Merge pull request #16734 from vespa-engine/arnej/add-brainfloat16-type

add BrainFloat16 "float with less precision"
author: Arne H Juul <arnej27959@users.noreply.github.com> 2021-03-03 15:26:33 +0100
committer: GitHub <noreply@github.com> 2021-03-03 15:26:33 +0100
commit: 8bba30dc48d18513532d0dbaf6fddebda2466644 (patch)
tree: dd9053038c6b4b97f7eac3a6112a339d2c3fdf5f
parent: 476ea0da8dee849a96c445e702dfa547ac524b5b (diff)
parent: d14f36d7f7d552f4b4dac8a93f7426df2ef57bd3 (diff)
8 files changed, 304 insertions, 3 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index 2db3c89dfb5..c51e42176dc 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -130,6 +130,7 @@ vespa_define_module(
     src/tests/tutorial/simple
     src/tests/tutorial/threads
     src/tests/typify
+    src/tests/util/bfloat16
     src/tests/util/generationhandler
     src/tests/util/generationhandler_stress
     src/tests/util/md5
diff --git a/vespalib/src/tests/util/bfloat16/CMakeLists.txt b/vespalib/src/tests/util/bfloat16/CMakeLists.txt
new file mode 100644
index 00000000000..39a42e6f148
--- /dev/null
+++ b/vespalib/src/tests/util/bfloat16/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_bfloat16_test_app TEST
+    SOURCES
+    bfloat16_test.cpp
+    DEPENDS
+    vespalib
+    GTest::GTest
+)
+vespa_add_test(NAME vespalib_bfloat16_test_app COMMAND vespalib_bfloat16_test_app)
diff --git a/vespalib/src/tests/util/bfloat16/bfloat16_test.cpp b/vespalib/src/tests/util/bfloat16/bfloat16_test.cpp
new file mode 100644
index 00000000000..4e4129feb78
--- /dev/null
+++ b/vespalib/src/tests/util/bfloat16/bfloat16_test.cpp
@@ -0,0 +1,156 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/util/bfloat16.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <stdio.h>
+#include <cmath>
+#include <cmath>
+#include <vector>
+
+using namespace vespalib;
+
+using Limits = std::numeric_limits<BFloat16>;
+
+static std::vector<float> simple_values = {
+    0.0, 1.0, -1.0, -0.0, 1.75, 0x1.02p20, -0x1.02p-20, 0x3.0p-100, 0x7.0p100
+};
+
+TEST(BFloat16Test, normal_usage) {
+    EXPECT_EQ(sizeof(float), 4);
+    EXPECT_EQ(sizeof(BFloat16), 2);
+    BFloat16 answer = 42;
+    double fortytwo = answer;
+    EXPECT_EQ(fortytwo, 42);
+    std::vector<BFloat16> vec;
+    for (float value : simple_values) {
+        BFloat16 b = value;
+        float recover = b;
+        EXPECT_EQ(value, recover);
+    }
+    BFloat16 b1 = 0x101;
+    EXPECT_EQ(float(b1), 0x100);
+    BFloat16 b2 = 0x111;
+    EXPECT_EQ(float(b2), 0x110);
+}
+
+TEST(BFloat16Test, with_nbostream) {
+    nbostream buf;
+    for (BFloat16 value : simple_values) {
+        buf << value;
+    }
+    for (float value : simple_values) {
+        BFloat16 stored;
+        buf >> stored;
+        EXPECT_EQ(float(stored), value);
+    }
+}
+
+TEST(BFloat16Test, constants_check) {
+	EXPECT_EQ(0x1.0p-7, (1.0/128.0));
+
+	float n_min = Limits::min();
+	float d_min = Limits::denorm_min();
+	float eps = Limits::epsilon();
+	float big = Limits::max();
+	float low = Limits::lowest();
+
+	EXPECT_EQ(n_min, 0x1.0p-126);
+	EXPECT_EQ(d_min, 0x1.0p-133);
+	EXPECT_EQ(eps, 0x1.0p-7);
+	EXPECT_EQ(big, 0x1.FEp127);
+	EXPECT_EQ(low, -big);
+
+	EXPECT_EQ(n_min, std::numeric_limits<float>::min());
+	EXPECT_EQ(d_min, n_min / 128.0);
+	EXPECT_GT(eps, std::numeric_limits<float>::epsilon());
+
+	BFloat16 try_epsilon = 1.0f + eps;
+	EXPECT_GT(try_epsilon.to_float(), 1.0f);
+	BFloat16 try_half_epsilon = 1.0f + (0.5f * eps);
+	EXPECT_EQ(try_half_epsilon.to_float(), 1.0f);
+
+	EXPECT_LT(big, std::numeric_limits<float>::max());
+	EXPECT_GT(low, std::numeric_limits<float>::lowest());
+
+	printf("bfloat16 epsilon: %.10g (float has %.20g)\n", eps, std::numeric_limits<float>::epsilon());
+	printf("bfloat16 norm_min: %.20g (float has %.20g)\n", n_min, std::numeric_limits<float>::min());
+	printf("bfloat16 denorm_min: %.20g (float has %.20g)\n", d_min, std::numeric_limits<float>::denorm_min());
+	printf("bfloat16 max: %.20g (float has %.20g)\n", big, std::numeric_limits<float>::max());
+	printf("bfloat16 lowest: %.20g (float has %.20g)\n", low, std::numeric_limits<float>::lowest());
+}
+
+TEST(BFloat16Test, traits_check) {
+        EXPECT_TRUE(std::is_trivially_constructible<BFloat16>::value);
+        EXPECT_TRUE(std::is_trivially_move_constructible<BFloat16>::value);
+        EXPECT_TRUE(std::is_trivially_default_constructible<BFloat16>::value);
+        EXPECT_TRUE((std::is_trivially_assignable<BFloat16,BFloat16>::value));
+        EXPECT_TRUE(std::is_trivially_move_assignable<BFloat16>::value);
+        EXPECT_TRUE(std::is_trivially_copy_assignable<BFloat16>::value);
+        EXPECT_TRUE(std::is_trivially_copyable<BFloat16>::value);
+        EXPECT_TRUE(std::is_trivially_destructible<BFloat16>::value);
+        EXPECT_TRUE(std::is_trivial<BFloat16>::value);
+        EXPECT_TRUE(std::is_swappable<BFloat16>::value);
+        EXPECT_TRUE(std::has_unique_object_representations<BFloat16>::value);
+}
+
+static std::string hexdump(const void *p, size_t sz) {
+    char tmpbuf[10];
+    if (sz == 2) {
+        uint16_t bits;
+        memcpy(&bits, p, sz);
+        snprintf(tmpbuf, 10, "%04x", bits);
+    } else if (sz == 4) {
+        uint32_t bits;
+        memcpy(&bits, p, sz);
+        snprintf(tmpbuf, 10, "%08x", bits);
+    } else {
+        abort();
+    }
+    return tmpbuf;
+}
+#define HEX_DUMP(arg) hexdump(&arg, sizeof(arg)).c_str()
+
+TEST(BFloat16Test, check_special_values) {
+    // we should not need to support HW without normal float support:
+    EXPECT_TRUE(std::numeric_limits<float>::has_quiet_NaN);
+    EXPECT_TRUE(std::numeric_limits<float>::has_signaling_NaN);
+    EXPECT_TRUE(std::numeric_limits<BFloat16>::has_quiet_NaN);
+    EXPECT_TRUE(std::numeric_limits<BFloat16>::has_signaling_NaN);
+    float f_inf = std::numeric_limits<float>::infinity();
+    float f_neg = -f_inf;
+    float f_qnan = std::numeric_limits<float>::quiet_NaN();
+    float f_snan = std::numeric_limits<float>::signaling_NaN();
+    BFloat16 b_inf = std::numeric_limits<BFloat16>::infinity();
+    BFloat16 b_qnan = std::numeric_limits<BFloat16>::quiet_NaN();
+    BFloat16 b_snan = std::numeric_limits<BFloat16>::signaling_NaN();
+    BFloat16 b_from_f_inf = f_inf;
+    BFloat16 b_from_f_neg = f_neg;
+    BFloat16 b_from_f_qnan = f_qnan;
+    BFloat16 b_from_f_snan = f_snan;
+    EXPECT_EQ(memcmp(&b_inf, &b_from_f_inf, sizeof(BFloat16)), 0);
+    EXPECT_EQ(memcmp(&b_qnan, &b_from_f_qnan, sizeof(BFloat16)), 0);
+    EXPECT_EQ(memcmp(&b_snan, &b_from_f_snan, sizeof(BFloat16)), 0);
+    printf("+inf float is '%s' / bf16 is '%s'\n", HEX_DUMP(f_inf), HEX_DUMP(b_from_f_inf));
+    printf("-inf float is '%s' / bf16 is '%s'\n", HEX_DUMP(f_neg), HEX_DUMP(b_from_f_neg));
+    printf("qNaN float is '%s' / bf16 is '%s'\n", HEX_DUMP(f_qnan), HEX_DUMP(b_from_f_qnan));
+    printf("sNan float is '%s' / bf16 is '%s'\n", HEX_DUMP(f_snan), HEX_DUMP(b_from_f_snan));
+    double d_inf = b_inf;
+    double d_neg = b_from_f_neg;
+    double d_qnan = b_qnan;
+    double d_snan = b_snan;
+    EXPECT_EQ(d_inf, std::numeric_limits<double>::infinity());
+    EXPECT_EQ(d_neg, -std::numeric_limits<double>::infinity());
+    EXPECT_TRUE(std::isnan(d_qnan));
+    EXPECT_TRUE(std::isnan(d_snan));
+    float f_from_b_inf = b_inf;
+    float f_from_b_neg = b_from_f_neg;
+    float f_from_b_qnan = b_qnan;
+    float f_from_b_snan = b_snan;
+    EXPECT_EQ(memcmp(&f_inf, &f_from_b_inf, sizeof(float)), 0);
+    EXPECT_EQ(memcmp(&f_neg, &f_from_b_neg, sizeof(float)), 0);
+    EXPECT_EQ(memcmp(&f_qnan, &f_from_b_qnan, sizeof(float)), 0);
+    EXPECT_EQ(memcmp(&f_snan, &f_from_b_snan, sizeof(float)), 0);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/vespa/vespalib/cppunit/.gitignore b/vespalib/src/vespa/vespalib/cppunit/.gitignore
deleted file mode 100644
index 583460ae288..00000000000
--- a/vespalib/src/vespa/vespalib/cppunit/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.So
-.depend
-Makefile
diff --git a/vespalib/src/vespa/vespalib/objects/nbostream.h b/vespalib/src/vespa/vespalib/objects/nbostream.h
index b0823f76d71..7119645622f 100644
--- a/vespalib/src/vespa/vespalib/objects/nbostream.h
+++ b/vespalib/src/vespa/vespalib/objects/nbostream.h
@@ -5,6 +5,7 @@
 #include <vespa/vespalib/stllike/string.h>
 #include <vespa/vespalib/util/array.h>
 #include <vespa/vespalib/util/buffer.h>
+#include <vespa/vespalib/util/bfloat16.h>
 #include "nbo.h"
 
 namespace vespalib {
@@ -38,6 +39,8 @@ public:
     nbostream & operator >> (double & v)   { double n; read8(&n); v = nbo::n2h(n); return *this; }
     nbostream & operator << (float v)      { float n(nbo::n2h(v)); write4(&n); return *this; }
     nbostream & operator >> (float & v)    { float n; read4(&n); v = nbo::n2h(n); return *this; }
+    nbostream & operator << (BFloat16 v)   { uint16_t n(nbo::n2h(v.get_bits())); write2(&n); return *this; }
+    nbostream & operator >> (BFloat16 & v) { uint16_t n; read2(&n); v.assign_bits(nbo::n2h(n)); return *this; }
     nbostream & operator << (int64_t v)    { int64_t n(nbo::n2h(v)); write8(&n); return *this; }
     nbostream & operator >> (int64_t & v)  { int64_t n; read8(&n); v = nbo::n2h(n); return *this; }
     nbostream & operator << (uint64_t v)   { uint64_t n(nbo::n2h(v)); write8(&n); return *this; }
diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt
index 64c27482e00..62d642b76b2 100644
--- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt
@@ -10,6 +10,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT
     backtrace.cpp
     barrier.cpp
     benchmark_timer.cpp
+    bfloat16.cpp
     blockingthreadstackexecutor.cpp
     box.cpp
     child_process.cpp
diff --git a/vespalib/src/vespa/vespalib/util/bfloat16.cpp b/vespalib/src/vespa/vespalib/util/bfloat16.cpp
new file mode 100644
index 00000000000..5713ea66886
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/util/bfloat16.cpp
@@ -0,0 +1,3 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "bfloat16.h"
diff --git a/vespalib/src/vespa/vespalib/util/bfloat16.h b/vespalib/src/vespa/vespalib/util/bfloat16.h
new file mode 100644
index 00000000000..573f94ec89f
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/util/bfloat16.h
@@ -0,0 +1,131 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <bit>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+
+namespace vespalib {
+
+/**
+ * Class holding 16-bit floating-point numbers.
+ * Truncated version of normal 32-bit float; the sign and
+ * exponent are kept as-is but the mantissa has only 8-bit
+ * precision.  Well suited for ML / AI, halving memory
+ * requirements for large vectors and similar data.
+ * Direct HW support possible (AVX-512 BF16 extension etc.)
+ * See also:
+ * https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
+ **/
+class BFloat16 {
+private:
+    uint16_t _bits;
+    struct TwoU16 {
+        uint16_t u1;
+        uint16_t u2;
+    };
+
+    template<std::endian native_endian = std::endian::native>
+    static constexpr uint16_t float_to_bits(float value) noexcept {
+        TwoU16 both{0,0};
+        static_assert(sizeof(TwoU16) == sizeof(float));
+        memcpy(&both, &value, sizeof(float));
+        if constexpr (native_endian == std::endian::big) {
+            return both.u1;
+        } else {
+            static_assert(native_endian == std::endian::little,
+                          "Unknown endian, cannot handle");
+            return both.u2;
+        }
+    }
+
+    template<std::endian native_endian = std::endian::native>
+    static constexpr float bits_to_float(uint16_t bits) noexcept {
+        TwoU16 both{0,0};
+        if constexpr (native_endian == std::endian::big) {
+            both.u1 = bits;
+        } else {
+            static_assert(native_endian == std::endian::little,
+                          "Unknown endian, cannot handle");
+            both.u2 = bits;
+        }
+        float result = 0.0;
+        static_assert(sizeof(TwoU16) == sizeof(float));
+        memcpy(&result, &both, sizeof(float));
+        return result;
+    }
+public:
+    constexpr BFloat16(float value) noexcept : _bits(float_to_bits(value)) {}
+    BFloat16() noexcept = default;
+    ~BFloat16() noexcept = default;
+    constexpr BFloat16(const BFloat16 &other) noexcept = default;
+    constexpr BFloat16(BFloat16 &&other) noexcept = default;
+    constexpr BFloat16& operator=(const BFloat16 &other) noexcept = default;
+    constexpr BFloat16& operator=(BFloat16 &&other) noexcept = default;
+    constexpr BFloat16& operator=(float value) noexcept {
+        _bits = float_to_bits(value);
+        return *this;
+    }
+
+    constexpr operator float () const noexcept { return bits_to_float(_bits); }
+
+    constexpr float to_float() const noexcept { return bits_to_float(_bits); }
+    constexpr void assign(float value) noexcept { _bits = float_to_bits(value); }
+
+    constexpr uint16_t get_bits() const { return _bits; }
+    constexpr void assign_bits(uint16_t value) noexcept { _bits = value; }
+};
+
+}
+
+namespace std {
+template<> class numeric_limits<vespalib::BFloat16> {
+public:
+    static constexpr bool is_specialized = true;
+    static constexpr bool is_signed = true;
+    static constexpr bool is_integer = false;
+    static constexpr bool is_exact = false;
+    static constexpr bool has_infinity = false;
+    static constexpr bool has_quiet_NaN = true;
+    static constexpr bool has_signaling_NaN = true;
+    static constexpr bool has_denorm = true;
+    static constexpr bool has_denorm_loss = false;
+    static constexpr bool is_iec559 = false;
+    static constexpr bool is_bounded = true;
+    static constexpr bool is_modulo = false;
+    static constexpr bool traps = false;
+    static constexpr bool tinyness_before = false;
+
+    static constexpr std::float_round_style round_style = std::round_toward_zero;
+    static constexpr int radix = 2;
+
+    static constexpr int digits = 8;
+    static constexpr int digits10 = 2;
+    static constexpr int max_digits10 = 4;
+
+    static constexpr int min_exponent = -125;
+    static constexpr int min_exponent10 = -2;
+
+    static constexpr int max_exponent = 128;
+    static constexpr int max_exponent10 = 38;
+
+    static constexpr vespalib::BFloat16 denorm_min() noexcept { return 0x1.0p-133; }
+    static constexpr vespalib::BFloat16 epsilon() noexcept { return 0x1.0p-7; }
+    static constexpr vespalib::BFloat16 lowest() noexcept { return -0x1.FEp127; }
+    static constexpr vespalib::BFloat16 max() noexcept { return 0x1.FEp127; }
+    static constexpr vespalib::BFloat16 min() noexcept { return 0x1.0p-126; }
+    static constexpr vespalib::BFloat16 round_error() noexcept { return 1.0; }
+    static constexpr vespalib::BFloat16 infinity() noexcept {
+        return std::numeric_limits<float>::infinity();
+    }
+    static constexpr vespalib::BFloat16 quiet_NaN() noexcept {
+        return std::numeric_limits<float>::quiet_NaN();
+    }
+    static constexpr vespalib::BFloat16 signaling_NaN() noexcept {
+        return std::numeric_limits<float>::signaling_NaN();
+    }
+};
+
+}
author	Arne H Juul <arnej27959@users.noreply.github.com>	2021-03-03 15:26:33 +0100
committer	GitHub <noreply@github.com>	2021-03-03 15:26:33 +0100
commit	8bba30dc48d18513532d0dbaf6fddebda2466644 (patch)
tree	dd9053038c6b4b97f7eac3a6112a339d2c3fdf5f
parent	476ea0da8dee849a96c445e702dfa547ac524b5b (diff)
parent	d14f36d7f7d552f4b4dac8a93f7426df2ef57bd3 (diff)