summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorHÃ¥vard Pettersen <3535158+havardpe@users.noreply.github.com>2021-09-29 10:53:12 +0200
committerGitHub <noreply@github.com>2021-09-29 10:53:12 +0200
commit9417cd02a50bfd83e7b8aafa84d764a61e2680d7 (patch)
tree9c6e0c7e589e7a2a8478a46dda72922e602abee7 /vespalib
parent8923accf7e72d147d6d57185eecc4faf2b4adeb7 (diff)
parentabb465b7106ef1594903a9d34b86190cdf501eb2 (diff)
Merge pull request #19343 from vespa-engine/arnej/use-common-hamming-distance
Arnej/use common hamming distance
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/CMakeLists.txt1
-rw-r--r--vespalib/src/tests/util/hamming/CMakeLists.txt9
-rw-r--r--vespalib/src/tests/util/hamming/hamming_test.cpp80
-rw-r--r--vespalib/src/vespa/vespalib/util/CMakeLists.txt1
-rw-r--r--vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp33
-rw-r--r--vespalib/src/vespa/vespalib/util/binary_hamming_distance.h14
6 files changed, 138 insertions, 0 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index 5cf06093977..220c63c04a6 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -139,6 +139,7 @@ vespa_define_module(
src/tests/util/file_area_freelist
src/tests/util/generationhandler
src/tests/util/generationhandler_stress
+ src/tests/util/hamming
src/tests/util/md5
src/tests/util/mmap_file_allocator
src/tests/util/mmap_file_allocator_factory
diff --git a/vespalib/src/tests/util/hamming/CMakeLists.txt b/vespalib/src/tests/util/hamming/CMakeLists.txt
new file mode 100644
index 00000000000..24eafa16649
--- /dev/null
+++ b/vespalib/src/tests/util/hamming/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_hamming_test_app TEST
+ SOURCES
+ hamming_test.cpp
+ DEPENDS
+ vespalib
+ GTest::GTest
+)
+vespa_add_test(NAME vespalib_hamming_test_app COMMAND vespalib_hamming_test_app)
diff --git a/vespalib/src/tests/util/hamming/hamming_test.cpp b/vespalib/src/tests/util/hamming/hamming_test.cpp
new file mode 100644
index 00000000000..b63d32e4e96
--- /dev/null
+++ b/vespalib/src/tests/util/hamming/hamming_test.cpp
@@ -0,0 +1,80 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/util/binary_hamming_distance.h>
+#include <vespa/vespalib/util/require.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <cstdlib>
+#include <cstring>
+
+using namespace vespalib;
+
+constexpr size_t ALIGN = 8;
+constexpr size_t SZ = 64;
+
+void flip_one_bit(void *memory, void *other) {
+ auto buf = (uint8_t *)memory;
+ auto cmp = (uint8_t *)other;
+ while (true) {
+ size_t byte_idx = random() % SZ;
+ size_t bit_idx = random() % 8;
+ uint8_t lookaside = cmp[byte_idx];
+ uint8_t old = buf[byte_idx];
+ uint8_t bit = 1u << bit_idx;
+ if ((old & bit) == (lookaside & bit)) {
+ uint8_t new_val = old ^ bit;
+ REQUIRE(old != new_val);
+ buf[byte_idx] = new_val;
+ return;
+ }
+ }
+}
+
+void *my_alloc(int unalignment = 0) {
+ void *mem;
+ int r = posix_memalign(&mem, ALIGN, SZ*2);
+ REQUIRE_EQ(0, r);
+ uintptr_t addr = (uintptr_t) mem;
+ addr += unalignment;
+ return (void *)addr;
+}
+
+void check_with_flipping(void *mem_a, void *mem_b) {
+ memset(mem_a, 0, SZ);
+ memset(mem_b, 0, SZ);
+ size_t dist = 0;
+ EXPECT_EQ(binary_hamming_distance(mem_a, mem_b, SZ), dist);
+ while (dist < 100) {
+ flip_one_bit(mem_a, mem_b);
+ ++dist;
+ EXPECT_EQ(binary_hamming_distance(mem_a, mem_b, SZ), dist);
+ flip_one_bit(mem_b, mem_a);
+ ++dist;
+ EXPECT_EQ(binary_hamming_distance(mem_a, mem_b, SZ), dist);
+ }
+}
+
+TEST(BinaryHammingTest, aligned_usage) {
+ void *mem_a = my_alloc(0);
+ void *mem_b = my_alloc(0);
+ check_with_flipping(mem_a, mem_b);
+}
+
+TEST(BinaryHammingTest, one_unaligned) {
+ void *mem_a = my_alloc(3);
+ void *mem_b = my_alloc(0);
+ check_with_flipping(mem_a, mem_b);
+}
+
+TEST(BinaryHammingTest, other_unaligned) {
+ void *mem_a = my_alloc(0);
+ void *mem_b = my_alloc(7);
+ check_with_flipping(mem_a, mem_b);
+}
+
+TEST(BinaryHammingTest, both_unaligned) {
+ void *mem_a = my_alloc(2);
+ void *mem_b = my_alloc(6);
+ check_with_flipping(mem_a, mem_b);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt
index 17fc14d0e9e..9a176acabd6 100644
--- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt
@@ -11,6 +11,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT
barrier.cpp
benchmark_timer.cpp
bfloat16.cpp
+ binary_hamming_distance.cpp
blockingthreadstackexecutor.cpp
box.cpp
child_process.cpp
diff --git a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp
new file mode 100644
index 00000000000..cbf940c9f0c
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp
@@ -0,0 +1,33 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "binary_hamming_distance.h"
+#include <cstdint>
+
+namespace vespalib {
+
+size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) {
+ uintptr_t addr_a = (uintptr_t) lhs;
+ uintptr_t addr_b = (uintptr_t) rhs;
+ size_t sum = 0;
+ size_t i = 0;
+ static_assert(sizeof(uint64_t) == 8);
+ bool aligned = ((addr_a & 0x7) == 0) && ((addr_b & 0x7) == 0);
+ if (__builtin_expect(aligned, true)) {
+ const uint64_t *words_a = static_cast<const uint64_t *>(lhs);
+ const uint64_t *words_b = static_cast<const uint64_t *>(rhs);
+ for (; i * 8 + 7 < sz; ++i) {
+ uint64_t xor_bits = words_a[i] ^ words_b[i];
+ sum += __builtin_popcountl(xor_bits);
+ }
+ }
+ if (__builtin_expect((i * 8 < sz), false)) {
+ const uint8_t *bytes_a = static_cast<const uint8_t *>(lhs);
+ const uint8_t *bytes_b = static_cast<const uint8_t *>(rhs);
+ for (i *= 8; i < sz; ++i) {
+ uint64_t xor_bits = bytes_a[i] ^ bytes_b[i];
+ sum += __builtin_popcountl(xor_bits);
+ }
+ }
+ return sum;
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h
new file mode 100644
index 00000000000..ce8c8dacdf9
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h
@@ -0,0 +1,14 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+#include <cstddef>
+namespace vespalib {
+/**
+ * Compute Hamming distance between two binary blobs
+ *
+ * @param lhs a blob (to interpret as a bitvector with sz*8 bits)
+ * @param rhs a blob (to interpret as a bitvector with sz*8 bits)
+ * @param sz number of bytes in each blob
+ * @return number of bits that differ when comparing the two blobs
+ **/
+size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz);
+}