diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2021-03-03 15:26:33 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-03 15:26:33 +0100 |
commit | 8bba30dc48d18513532d0dbaf6fddebda2466644 (patch) | |
tree | dd9053038c6b4b97f7eac3a6112a339d2c3fdf5f | |
parent | 476ea0da8dee849a96c445e702dfa547ac524b5b (diff) | |
parent | d14f36d7f7d552f4b4dac8a93f7426df2ef57bd3 (diff) |
Merge pull request #16734 from vespa-engine/arnej/add-brainfloat16-type
add BrainFloat16 "float with less precision"
-rw-r--r-- | vespalib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | vespalib/src/tests/util/bfloat16/CMakeLists.txt | 9 | ||||
-rw-r--r-- | vespalib/src/tests/util/bfloat16/bfloat16_test.cpp | 156 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/cppunit/.gitignore | 3 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/objects/nbostream.h | 3 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/CMakeLists.txt | 1 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/bfloat16.cpp | 3 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/bfloat16.h | 131 |
8 files changed, 304 insertions, 3 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index 2db3c89dfb5..c51e42176dc 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -130,6 +130,7 @@ vespa_define_module( src/tests/tutorial/simple src/tests/tutorial/threads src/tests/typify + src/tests/util/bfloat16 src/tests/util/generationhandler src/tests/util/generationhandler_stress src/tests/util/md5 diff --git a/vespalib/src/tests/util/bfloat16/CMakeLists.txt b/vespalib/src/tests/util/bfloat16/CMakeLists.txt new file mode 100644 index 00000000000..39a42e6f148 --- /dev/null +++ b/vespalib/src/tests/util/bfloat16/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_bfloat16_test_app TEST + SOURCES + bfloat16_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_bfloat16_test_app COMMAND vespalib_bfloat16_test_app) diff --git a/vespalib/src/tests/util/bfloat16/bfloat16_test.cpp b/vespalib/src/tests/util/bfloat16/bfloat16_test.cpp new file mode 100644 index 00000000000..4e4129feb78 --- /dev/null +++ b/vespalib/src/tests/util/bfloat16/bfloat16_test.cpp @@ -0,0 +1,156 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/util/bfloat16.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <stdio.h> +#include <cmath> +#include <cmath> +#include <vector> + +using namespace vespalib; + +using Limits = std::numeric_limits<BFloat16>; + +static std::vector<float> simple_values = { + 0.0, 1.0, -1.0, -0.0, 1.75, 0x1.02p20, -0x1.02p-20, 0x3.0p-100, 0x7.0p100 +}; + +TEST(BFloat16Test, normal_usage) { + EXPECT_EQ(sizeof(float), 4); + EXPECT_EQ(sizeof(BFloat16), 2); + BFloat16 answer = 42; + double fortytwo = answer; + EXPECT_EQ(fortytwo, 42); + std::vector<BFloat16> vec; + for (float value : simple_values) { + BFloat16 b = value; + float recover = b; + EXPECT_EQ(value, recover); + } + BFloat16 b1 = 0x101; + EXPECT_EQ(float(b1), 0x100); + BFloat16 b2 = 0x111; + EXPECT_EQ(float(b2), 0x110); +} + +TEST(BFloat16Test, with_nbostream) { + nbostream buf; + for (BFloat16 value : simple_values) { + buf << value; + } + for (float value : simple_values) { + BFloat16 stored; + buf >> stored; + EXPECT_EQ(float(stored), value); + } +} + +TEST(BFloat16Test, constants_check) { + EXPECT_EQ(0x1.0p-7, (1.0/128.0)); + + float n_min = Limits::min(); + float d_min = Limits::denorm_min(); + float eps = Limits::epsilon(); + float big = Limits::max(); + float low = Limits::lowest(); + + EXPECT_EQ(n_min, 0x1.0p-126); + EXPECT_EQ(d_min, 0x1.0p-133); + EXPECT_EQ(eps, 0x1.0p-7); + EXPECT_EQ(big, 0x1.FEp127); + EXPECT_EQ(low, -big); + + EXPECT_EQ(n_min, std::numeric_limits<float>::min()); + EXPECT_EQ(d_min, n_min / 128.0); + EXPECT_GT(eps, std::numeric_limits<float>::epsilon()); + + BFloat16 try_epsilon = 1.0f + eps; + EXPECT_GT(try_epsilon.to_float(), 1.0f); + BFloat16 try_half_epsilon = 1.0f + (0.5f * eps); + EXPECT_EQ(try_half_epsilon.to_float(), 1.0f); + + EXPECT_LT(big, std::numeric_limits<float>::max()); + EXPECT_GT(low, std::numeric_limits<float>::lowest()); + + printf("bfloat16 epsilon: %.10g (float has %.20g)\n", eps, std::numeric_limits<float>::epsilon()); + printf("bfloat16 norm_min: %.20g (float has %.20g)\n", n_min, std::numeric_limits<float>::min()); + printf("bfloat16 denorm_min: %.20g (float has %.20g)\n", d_min, std::numeric_limits<float>::denorm_min()); + printf("bfloat16 max: %.20g (float has %.20g)\n", big, std::numeric_limits<float>::max()); + printf("bfloat16 lowest: %.20g (float has %.20g)\n", low, std::numeric_limits<float>::lowest()); +} + +TEST(BFloat16Test, traits_check) { + EXPECT_TRUE(std::is_trivially_constructible<BFloat16>::value); + EXPECT_TRUE(std::is_trivially_move_constructible<BFloat16>::value); + EXPECT_TRUE(std::is_trivially_default_constructible<BFloat16>::value); + EXPECT_TRUE((std::is_trivially_assignable<BFloat16,BFloat16>::value)); + EXPECT_TRUE(std::is_trivially_move_assignable<BFloat16>::value); + EXPECT_TRUE(std::is_trivially_copy_assignable<BFloat16>::value); + EXPECT_TRUE(std::is_trivially_copyable<BFloat16>::value); + EXPECT_TRUE(std::is_trivially_destructible<BFloat16>::value); + EXPECT_TRUE(std::is_trivial<BFloat16>::value); + EXPECT_TRUE(std::is_swappable<BFloat16>::value); + EXPECT_TRUE(std::has_unique_object_representations<BFloat16>::value); +} + +static std::string hexdump(const void *p, size_t sz) { + char tmpbuf[10]; + if (sz == 2) { + uint16_t bits; + memcpy(&bits, p, sz); + snprintf(tmpbuf, 10, "%04x", bits); + } else if (sz == 4) { + uint32_t bits; + memcpy(&bits, p, sz); + snprintf(tmpbuf, 10, "%08x", bits); + } else { + abort(); + } + return tmpbuf; +} +#define HEX_DUMP(arg) hexdump(&arg, sizeof(arg)).c_str() + +TEST(BFloat16Test, check_special_values) { + // we should not need to support HW without normal float support: + EXPECT_TRUE(std::numeric_limits<float>::has_quiet_NaN); + EXPECT_TRUE(std::numeric_limits<float>::has_signaling_NaN); + EXPECT_TRUE(std::numeric_limits<BFloat16>::has_quiet_NaN); + EXPECT_TRUE(std::numeric_limits<BFloat16>::has_signaling_NaN); + float f_inf = std::numeric_limits<float>::infinity(); + float f_neg = -f_inf; + float f_qnan = std::numeric_limits<float>::quiet_NaN(); + float f_snan = std::numeric_limits<float>::signaling_NaN(); + BFloat16 b_inf = std::numeric_limits<BFloat16>::infinity(); + BFloat16 b_qnan = std::numeric_limits<BFloat16>::quiet_NaN(); + BFloat16 b_snan = std::numeric_limits<BFloat16>::signaling_NaN(); + BFloat16 b_from_f_inf = f_inf; + BFloat16 b_from_f_neg = f_neg; + BFloat16 b_from_f_qnan = f_qnan; + BFloat16 b_from_f_snan = f_snan; + EXPECT_EQ(memcmp(&b_inf, &b_from_f_inf, sizeof(BFloat16)), 0); + EXPECT_EQ(memcmp(&b_qnan, &b_from_f_qnan, sizeof(BFloat16)), 0); + EXPECT_EQ(memcmp(&b_snan, &b_from_f_snan, sizeof(BFloat16)), 0); + printf("+inf float is '%s' / bf16 is '%s'\n", HEX_DUMP(f_inf), HEX_DUMP(b_from_f_inf)); + printf("-inf float is '%s' / bf16 is '%s'\n", HEX_DUMP(f_neg), HEX_DUMP(b_from_f_neg)); + printf("qNaN float is '%s' / bf16 is '%s'\n", HEX_DUMP(f_qnan), HEX_DUMP(b_from_f_qnan)); + printf("sNan float is '%s' / bf16 is '%s'\n", HEX_DUMP(f_snan), HEX_DUMP(b_from_f_snan)); + double d_inf = b_inf; + double d_neg = b_from_f_neg; + double d_qnan = b_qnan; + double d_snan = b_snan; + EXPECT_EQ(d_inf, std::numeric_limits<double>::infinity()); + EXPECT_EQ(d_neg, -std::numeric_limits<double>::infinity()); + EXPECT_TRUE(std::isnan(d_qnan)); + EXPECT_TRUE(std::isnan(d_snan)); + float f_from_b_inf = b_inf; + float f_from_b_neg = b_from_f_neg; + float f_from_b_qnan = b_qnan; + float f_from_b_snan = b_snan; + EXPECT_EQ(memcmp(&f_inf, &f_from_b_inf, sizeof(float)), 0); + EXPECT_EQ(memcmp(&f_neg, &f_from_b_neg, sizeof(float)), 0); + EXPECT_EQ(memcmp(&f_qnan, &f_from_b_qnan, sizeof(float)), 0); + EXPECT_EQ(memcmp(&f_snan, &f_from_b_snan, sizeof(float)), 0); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/cppunit/.gitignore b/vespalib/src/vespa/vespalib/cppunit/.gitignore deleted file mode 100644 index 583460ae288..00000000000 --- a/vespalib/src/vespa/vespalib/cppunit/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.So -.depend -Makefile diff --git a/vespalib/src/vespa/vespalib/objects/nbostream.h b/vespalib/src/vespa/vespalib/objects/nbostream.h index b0823f76d71..7119645622f 100644 --- a/vespalib/src/vespa/vespalib/objects/nbostream.h +++ b/vespalib/src/vespa/vespalib/objects/nbostream.h @@ -5,6 +5,7 @@ #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/util/array.h> #include <vespa/vespalib/util/buffer.h> +#include <vespa/vespalib/util/bfloat16.h> #include "nbo.h" namespace vespalib { @@ -38,6 +39,8 @@ public: nbostream & operator >> (double & v) { double n; read8(&n); v = nbo::n2h(n); return *this; } nbostream & operator << (float v) { float n(nbo::n2h(v)); write4(&n); return *this; } nbostream & operator >> (float & v) { float n; read4(&n); v = nbo::n2h(n); return *this; } + nbostream & operator << (BFloat16 v) { uint16_t n(nbo::n2h(v.get_bits())); write2(&n); return *this; } + nbostream & operator >> (BFloat16 & v) { uint16_t n; read2(&n); v.assign_bits(nbo::n2h(n)); return *this; } nbostream & operator << (int64_t v) { int64_t n(nbo::n2h(v)); write8(&n); return *this; } nbostream & operator >> (int64_t & v) { int64_t n; read8(&n); v = nbo::n2h(n); return *this; } nbostream & operator << (uint64_t v) { uint64_t n(nbo::n2h(v)); write8(&n); return *this; } diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt index 64c27482e00..62d642b76b2 100644 --- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt @@ -10,6 +10,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT backtrace.cpp barrier.cpp benchmark_timer.cpp + bfloat16.cpp blockingthreadstackexecutor.cpp box.cpp child_process.cpp diff --git a/vespalib/src/vespa/vespalib/util/bfloat16.cpp b/vespalib/src/vespa/vespalib/util/bfloat16.cpp new file mode 100644 index 00000000000..5713ea66886 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/bfloat16.cpp @@ -0,0 +1,3 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "bfloat16.h" diff --git a/vespalib/src/vespa/vespalib/util/bfloat16.h b/vespalib/src/vespa/vespalib/util/bfloat16.h new file mode 100644 index 00000000000..573f94ec89f --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/bfloat16.h @@ -0,0 +1,131 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <bit> +#include <cstdint> +#include <cstring> +#include <limits> + +namespace vespalib { + +/** + * Class holding 16-bit floating-point numbers. + * Truncated version of normal 32-bit float; the sign and + * exponent are kept as-is but the mantissa has only 8-bit + * precision. Well suited for ML / AI, halving memory + * requirements for large vectors and similar data. + * Direct HW support possible (AVX-512 BF16 extension etc.) + * See also: + * https://en.wikipedia.org/wiki/Bfloat16_floating-point_format + **/ +class BFloat16 { +private: + uint16_t _bits; + struct TwoU16 { + uint16_t u1; + uint16_t u2; + }; + + template<std::endian native_endian = std::endian::native> + static constexpr uint16_t float_to_bits(float value) noexcept { + TwoU16 both{0,0}; + static_assert(sizeof(TwoU16) == sizeof(float)); + memcpy(&both, &value, sizeof(float)); + if constexpr (native_endian == std::endian::big) { + return both.u1; + } else { + static_assert(native_endian == std::endian::little, + "Unknown endian, cannot handle"); + return both.u2; + } + } + + template<std::endian native_endian = std::endian::native> + static constexpr float bits_to_float(uint16_t bits) noexcept { + TwoU16 both{0,0}; + if constexpr (native_endian == std::endian::big) { + both.u1 = bits; + } else { + static_assert(native_endian == std::endian::little, + "Unknown endian, cannot handle"); + both.u2 = bits; + } + float result = 0.0; + static_assert(sizeof(TwoU16) == sizeof(float)); + memcpy(&result, &both, sizeof(float)); + return result; + } +public: + constexpr BFloat16(float value) noexcept : _bits(float_to_bits(value)) {} + BFloat16() noexcept = default; + ~BFloat16() noexcept = default; + constexpr BFloat16(const BFloat16 &other) noexcept = default; + constexpr BFloat16(BFloat16 &&other) noexcept = default; + constexpr BFloat16& operator=(const BFloat16 &other) noexcept = default; + constexpr BFloat16& operator=(BFloat16 &&other) noexcept = default; + constexpr BFloat16& operator=(float value) noexcept { + _bits = float_to_bits(value); + return *this; + } + + constexpr operator float () const noexcept { return bits_to_float(_bits); } + + constexpr float to_float() const noexcept { return bits_to_float(_bits); } + constexpr void assign(float value) noexcept { _bits = float_to_bits(value); } + + constexpr uint16_t get_bits() const { return _bits; } + constexpr void assign_bits(uint16_t value) noexcept { _bits = value; } +}; + +} + +namespace std { +template<> class numeric_limits<vespalib::BFloat16> { +public: + static constexpr bool is_specialized = true; + static constexpr bool is_signed = true; + static constexpr bool is_integer = false; + static constexpr bool is_exact = false; + static constexpr bool has_infinity = false; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = true; + static constexpr bool has_denorm = true; + static constexpr bool has_denorm_loss = false; + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = false; + static constexpr bool traps = false; + static constexpr bool tinyness_before = false; + + static constexpr std::float_round_style round_style = std::round_toward_zero; + static constexpr int radix = 2; + + static constexpr int digits = 8; + static constexpr int digits10 = 2; + static constexpr int max_digits10 = 4; + + static constexpr int min_exponent = -125; + static constexpr int min_exponent10 = -2; + + static constexpr int max_exponent = 128; + static constexpr int max_exponent10 = 38; + + static constexpr vespalib::BFloat16 denorm_min() noexcept { return 0x1.0p-133; } + static constexpr vespalib::BFloat16 epsilon() noexcept { return 0x1.0p-7; } + static constexpr vespalib::BFloat16 lowest() noexcept { return -0x1.FEp127; } + static constexpr vespalib::BFloat16 max() noexcept { return 0x1.FEp127; } + static constexpr vespalib::BFloat16 min() noexcept { return 0x1.0p-126; } + static constexpr vespalib::BFloat16 round_error() noexcept { return 1.0; } + static constexpr vespalib::BFloat16 infinity() noexcept { + return std::numeric_limits<float>::infinity(); + } + static constexpr vespalib::BFloat16 quiet_NaN() noexcept { + return std::numeric_limits<float>::quiet_NaN(); + } + static constexpr vespalib::BFloat16 signaling_NaN() noexcept { + return std::numeric_limits<float>::signaling_NaN(); + } +}; + +} |