diff options
author | Arne H Juul <arnej@yahooinc.com> | 2021-09-28 16:14:21 +0000 |
---|---|---|
committer | Arne H Juul <arnej@yahooinc.com> | 2021-09-28 16:14:21 +0000 |
commit | 433e18f5267d0b2dbaf2794b739ed7a42c0e155a (patch) | |
tree | 368ac9b872393ec208a163fe38fb530d4a19b83d /vespalib | |
parent | 1b40ad9707a389ec1910d7d643648f200805b304 (diff) |
add common binary_hamming_distance function
Diffstat (limited to 'vespalib')
-rw-r--r-- | vespalib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | vespalib/src/tests/util/hamming/CMakeLists.txt | 9 | ||||
-rw-r--r-- | vespalib/src/tests/util/hamming/hamming_test.cpp | 80 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/CMakeLists.txt | 1 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/hamming_distance.cpp | 33 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/hamming_distance.h | 14 |
6 files changed, 138 insertions, 0 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index 5cf06093977..220c63c04a6 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -139,6 +139,7 @@ vespa_define_module( src/tests/util/file_area_freelist src/tests/util/generationhandler src/tests/util/generationhandler_stress + src/tests/util/hamming src/tests/util/md5 src/tests/util/mmap_file_allocator src/tests/util/mmap_file_allocator_factory diff --git a/vespalib/src/tests/util/hamming/CMakeLists.txt b/vespalib/src/tests/util/hamming/CMakeLists.txt new file mode 100644 index 00000000000..24eafa16649 --- /dev/null +++ b/vespalib/src/tests/util/hamming/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_hamming_test_app TEST + SOURCES + hamming_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_hamming_test_app COMMAND vespalib_hamming_test_app) diff --git a/vespalib/src/tests/util/hamming/hamming_test.cpp b/vespalib/src/tests/util/hamming/hamming_test.cpp new file mode 100644 index 00000000000..a955ccb7b7e --- /dev/null +++ b/vespalib/src/tests/util/hamming/hamming_test.cpp @@ -0,0 +1,80 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/util/hamming_distance.h> +#include <vespa/vespalib/util/require.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <cstdlib> +#include <cstring> + +using namespace vespalib; + +constexpr size_t ALIGN = 8; +constexpr size_t SZ = 64; + +void flip_one_bit(void *memory, void *other) { + auto buf = (uint8_t *)memory; + auto cmp = (uint8_t *)other; + while (true) { + size_t byte_idx = random() % SZ; + size_t bit_idx = random() % 8; + uint8_t lookaside = cmp[byte_idx]; + uint8_t old = buf[byte_idx]; + uint8_t bit = 1u << bit_idx; + if ((old & bit) == (lookaside & bit)) { + uint8_t new_val = old ^ bit; + REQUIRE(old != new_val); + buf[byte_idx] = new_val; + return; + } + } +} + +void *my_alloc(int unalignment = 0) { + void *mem; + int r = posix_memalign(&mem, ALIGN, SZ*2); + REQUIRE_EQ(0, r); + uintptr_t addr = (uintptr_t) mem; + addr += unalignment; + return (void *)addr; +} + +void check_with_flipping(void *mem_a, void *mem_b) { + memset(mem_a, 0, SZ); + memset(mem_b, 0, SZ); + size_t dist = 0; + EXPECT_EQ(binary_hamming_distance(mem_a, mem_b, SZ), dist); + while (dist < 100) { + flip_one_bit(mem_a, mem_b); + ++dist; + EXPECT_EQ(binary_hamming_distance(mem_a, mem_b, SZ), dist); + flip_one_bit(mem_b, mem_a); + ++dist; + EXPECT_EQ(binary_hamming_distance(mem_a, mem_b, SZ), dist); + } +} + +TEST(BinaryHammingTest, aligned_usage) { + void *mem_a = my_alloc(0); + void *mem_b = my_alloc(0); + check_with_flipping(mem_a, mem_b); +} + +TEST(BinaryHammingTest, one_unaligned) { + void *mem_a = my_alloc(3); + void *mem_b = my_alloc(0); + check_with_flipping(mem_a, mem_b); +} + +TEST(BinaryHammingTest, other_unaligned) { + void *mem_a = my_alloc(0); + void *mem_b = my_alloc(7); + check_with_flipping(mem_a, mem_b); +} + +TEST(BinaryHammingTest, both_unaligned) { + void *mem_a = my_alloc(2); + void *mem_b = my_alloc(6); + check_with_flipping(mem_a, mem_b); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt index 17fc14d0e9e..e02e1e73f94 100644 --- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt @@ -26,6 +26,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT gencnt.cpp generationhandler.cpp generationholder.cpp + hamming_distance.cpp hdr_abort.cpp host_name.cpp joinable.cpp diff --git a/vespalib/src/vespa/vespalib/util/hamming_distance.cpp b/vespalib/src/vespa/vespalib/util/hamming_distance.cpp new file mode 100644 index 00000000000..f783ef31374 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/hamming_distance.cpp @@ -0,0 +1,33 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "hamming_distance.h" +#include <cstdint> + +namespace vespalib { + +size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) { + uintptr_t addr_a = (uintptr_t) lhs; + uintptr_t addr_b = (uintptr_t) rhs; + size_t sum = 0; + size_t i = 0; + static_assert(sizeof(uint64_t) == 8); + bool aligned = ((addr_a & 0x7) == 0) && ((addr_b & 0x7) == 0); + if (__builtin_expect(aligned, true)) { + const uint64_t *words_a = static_cast<const uint64_t *>(lhs); + const uint64_t *words_b = static_cast<const uint64_t *>(rhs); + for (; i * 8 + 7 < sz; ++i) { + uint64_t xor_bits = words_a[i] ^ words_b[i]; + sum += __builtin_popcountl(xor_bits); + } + } + if (__builtin_expect((i * 8 < sz), false)) { + const uint8_t *bytes_a = static_cast<const uint8_t *>(lhs); + const uint8_t *bytes_b = static_cast<const uint8_t *>(rhs); + for (i *= 8; i < sz; ++i) { + uint64_t xor_bits = bytes_a[i] ^ bytes_b[i]; + sum += __builtin_popcountl(xor_bits); + } + } + return sum; +}; + +} diff --git a/vespalib/src/vespa/vespalib/util/hamming_distance.h b/vespalib/src/vespa/vespalib/util/hamming_distance.h new file mode 100644 index 00000000000..ce8c8dacdf9 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/hamming_distance.h @@ -0,0 +1,14 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once +#include <cstddef> +namespace vespalib { +/** + * Compute Hamming distance between two binary blobs + * + * @param lhs a blob (to interpret as a bitvector with sz*8 bits) + * @param rhs a blob (to interpret as a bitvector with sz*8 bits) + * @param sz number of bytes in each blob + * @return number of bits that differ when comparing the two blobs + **/ +size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz); +} |