aboutsummaryrefslogtreecommitdiffstats
path: root/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp
blob: 5f242059ccf9a2c9a59b8086d53a86163beedf03 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "binary_hamming_distance.h"
#include <cstdint>

namespace vespalib {

namespace {
    constexpr uint8_t WORD_SZ = sizeof (uint64_t);
    constexpr uint8_t UNROLL_CNT = 2;
    static_assert(sizeof(uint64_t) == 8);
}
size_t
binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) noexcept {
    auto addr_a = (uintptr_t) lhs;
    auto addr_b = (uintptr_t) rhs;
    size_t sum = 0;
    size_t i = 0;
    bool aligned = ((addr_a & 0x7) == 0) && ((addr_b & 0x7) == 0);
    if (__builtin_expect(aligned, true)) {
        const auto *words_a = static_cast<const uint64_t *>(lhs);
        const auto *words_b = static_cast<const uint64_t *>(rhs);
        for (; (i+UNROLL_CNT) * WORD_SZ <= sz; i += UNROLL_CNT) {
            for (uint8_t j=0; j < UNROLL_CNT; j++) {
                sum += __builtin_popcountl(words_a[i+j] ^ words_b[i+j]);
            }
        }
        for (; (i + 1) * WORD_SZ <= sz; ++i) {
            sum += __builtin_popcountl(words_a[i] ^ words_b[i]);
        }
    }
    if (__builtin_expect((i * WORD_SZ < sz), false)) {
        const auto *bytes_a = static_cast<const uint8_t *>(lhs);
        const auto *bytes_b = static_cast<const uint8_t *>(rhs);
        for (i *= WORD_SZ; i < sz; ++i) {
            uint64_t xor_bits = bytes_a[i] ^ bytes_b[i];
            sum += __builtin_popcountl(xor_bits);
        }
    }
    return sum;
};

}