add Hamming Distance optimizer for vectors

author: Arne H Juul <arnej@yahooinc.com> 2021-09-24 09:46:10 +0000
committer: Arne H Juul <arnej@yahooinc.com> 2021-09-24 10:57:04 +0000
commit: 284dd3bab0268cd376fa7c49c567b5861c5f7fe4 (patch)
tree: 1b086b7b17e2c7efaa6cb0c71136259d9ae82f6c /eval
parent: 5f1f8f36cd26e5d8a5502d165e3760ab636f8979 (diff)
4 files changed, 153 insertions, 0 deletions
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index 64acbceff04..c2e8d886fde 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -28,6 +28,7 @@
 #include <vespa/eval/instruction/vector_from_doubles_function.h>
 #include <vespa/eval/instruction/dense_tensor_create_function.h>
 #include <vespa/eval/instruction/dense_tensor_peek_function.h>
+#include <vespa/eval/instruction/dense_hamming_distance.h>
 
 #include <vespa/log/log.h>
 LOG_SETUP(".eval.eval.optimize_tensor_function");
@@ -53,6 +54,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te
             child.set(DenseMatMulFunction::optimize(child.get(), stash));
             child.set(DenseMultiMatMulFunction::optimize(child.get(), stash));
             child.set(MixedInnerProductFunction::optimize(child.get(), stash));
+            child.set(DenseHammingDistance::optimize(child.get(), stash));
             nodes.pop_back();
         }
     }
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index 88e3272bb7c..3ed969c0a18 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -5,6 +5,7 @@ vespa_add_library(eval_instruction OBJECT
     add_trivial_dimension_optimizer.cpp
     dense_cell_range_function.cpp
     dense_dot_product_function.cpp
+    dense_hamming_distance.cpp
     dense_lambda_peek_function.cpp
     dense_lambda_peek_optimizer.cpp
     dense_matmul_function.cpp
diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp
new file mode 100644
index 00000000000..d2aa7db0c8f
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp
@@ -0,0 +1,126 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "dense_hamming_distance.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/hamming_distance.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".eval.instruction.dense_hamming_distance");
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+
+namespace {
+
+/**
+template <typename LCT, typename RCT>
+struct MyHammingDistance {
+    static double apply(const LCT * lhs, const RCT * rhs, size_t count) {
+        double result = 0.0;
+        for (size_t i = 0; i < count; ++i) {
+            result += hamming_distance(lhs[i], rhs[i]);
+        }
+        return result;
+    }
+};
+**/
+
+float binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) {
+    const uint64_t *words_a = static_cast<const uint64_t *>(lhs);
+    const uint64_t *words_b = static_cast<const uint64_t *>(rhs);
+    size_t sum = 0;
+    size_t i = 0;
+    for (; i * 8 + 7 < sz; ++i) {
+        uint64_t xor_bits = words_a[i] ^ words_b[i];
+        sum += __builtin_popcountl(xor_bits);
+    }
+    if (__builtin_expect((i * 8 < sz), false)) {
+        const uint8_t *bytes_a = static_cast<const uint8_t *>(lhs);
+        const uint8_t *bytes_b = static_cast<const uint8_t *>(rhs);
+        for (i *= 8; i < sz; ++i) {
+            uint64_t xor_bits = bytes_a[i] ^ bytes_b[i];
+            sum += __builtin_popcountl(xor_bits);
+        }
+    }
+    return (float)sum;
+};
+
+struct DenseHammingDistanceParam {
+    ValueType res_type;
+    size_t vector_size;
+    size_t out_subspace_size;
+
+    DenseHammingDistanceParam(const ValueType &res_type_in,
+                              const ValueType &mix_type,
+                              const ValueType &vec_type)
+        : res_type(res_type_in),
+          vector_size(vec_type.dense_subspace_size()),
+          out_subspace_size(res_type.dense_subspace_size())
+    {
+        assert(vector_size * out_subspace_size == mix_type.dense_subspace_size());
+    }
+};
+
+void int8_hamming_to_double_op(InterpretedFunction::State &state, uint64_t vector_size) {
+    const auto &lhs = state.peek(1);
+    const auto &rhs = state.peek(0);
+    LOG_ASSERT(lhs.index().size() == 1);
+    LOG_ASSERT(rhs.index().size() == 1);
+    auto a = lhs.cells();
+    auto b = rhs.cells();
+    LOG_ASSERT(a.type == CellType::INT8);
+    LOG_ASSERT(b.type == CellType::INT8);
+    LOG_ASSERT(a.size == vector_size);
+    LOG_ASSERT(b.size == vector_size);
+    double result = binary_hamming_distance(a.data, b.data, vector_size);
+    state.pop_pop_push(state.stash.create<DoubleValue>(result));
+}
+
+} // namespace <unnamed>
+
+DenseHammingDistance::DenseHammingDistance(const ValueType &res_type_in,
+                                           const TensorFunction &dense_child,
+                                           const TensorFunction &vector_child)
+    : tensor_function::Op2(res_type_in, dense_child, vector_child)
+{
+}
+
+InterpretedFunction::Instruction
+DenseHammingDistance::compile_self(const ValueBuilderFactory &, Stash &) const
+{
+    auto op = int8_hamming_to_double_op;
+    const auto &lhs_type = lhs().result_type();
+    const auto &rhs_type = rhs().result_type();
+    LOG_ASSERT(lhs_type.dense_subspace_size() == rhs_type.dense_subspace_size());
+    return InterpretedFunction::Instruction(op, lhs_type.dense_subspace_size());
+}
+
+bool DenseHammingDistance::compatible_types(const ValueType &lhs, const ValueType &rhs) {
+    if (lhs.cell_type() != CellType::INT8) return false;
+    if (rhs.cell_type() != CellType::INT8) return false;
+    if (! lhs.is_dense()) return false;
+    if (! rhs.is_dense()) return false;
+    return (lhs.nontrivial_indexed_dimensions() == rhs.nontrivial_indexed_dimensions());
+}
+
+const TensorFunction &
+DenseHammingDistance::optimize(const TensorFunction &expr, Stash &stash)
+{
+    const auto & res_type = expr.result_type();
+    auto reduce = as<Reduce>(expr);
+    if (res_type.is_double() && reduce && (reduce->aggr() == Aggr::SUM)) {
+        auto join = as<Join>(reduce->child());
+        if (join && (join->function() == operation::Hamming::f)) {
+            const TensorFunction &lhs = join->lhs();
+            const TensorFunction &rhs = join->rhs();
+            if (compatible_types(lhs.result_type(), rhs.result_type())) {
+                return stash.create<DenseHammingDistance>(res_type, lhs, rhs);
+            }
+        }
+    }
+    return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.h b/eval/src/vespa/eval/instruction/dense_hamming_distance.h
new file mode 100644
index 00000000000..251e86d9075
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.h
@@ -0,0 +1,24 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function for a hamming distance producing a scalar result.
+ **/
+class DenseHammingDistance : public tensor_function::Op2
+{
+public:
+    DenseHammingDistance(const ValueType &res_type_in,
+                         const TensorFunction &lhs_child,
+                         const TensorFunction &rhs_child);
+    InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+    bool result_is_mutable() const override { return false; }
+    static bool compatible_types(const ValueType &lhs, const ValueType &rhs);
+    static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace
author	Arne H Juul <arnej@yahooinc.com>	2021-09-24 09:46:10 +0000
committer	Arne H Juul <arnej@yahooinc.com>	2021-09-24 10:57:04 +0000
commit	284dd3bab0268cd376fa7c49c567b5861c5f7fe4 (patch)
tree	1b086b7b17e2c7efaa6cb0c71136259d9ae82f6c /eval
parent	5f1f8f36cd26e5d8a5502d165e3760ab636f8979 (diff)