summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2021-09-27 22:44:36 +0200
committerGitHub <noreply@github.com>2021-09-27 22:44:36 +0200
commit990421550d364fda626b4ba3e4c16730f38c74ae (patch)
tree37cfb849669c668405cc0739e7020bf740c7a783
parent624a9baecb46171757a118c85b6c11e25dc6cc51 (diff)
parentb81901517335f3c6b1ad0f477a828d771e7e5507 (diff)
Merge pull request #19282 from vespa-engine/arnej/hamming-distance-optimizer-1
Arnej/hamming distance optimizer 1
-rw-r--r--eval/CMakeLists.txt1
-rw-r--r--eval/src/tests/instruction/dense_hamming_distance/CMakeLists.txt9
-rw-r--r--eval/src/tests/instruction/dense_hamming_distance/dense_hamming_distance_test.cpp91
-rw-r--r--eval/src/vespa/eval/eval/optimize_tensor_function.cpp2
-rw-r--r--eval/src/vespa/eval/instruction/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/instruction/dense_hamming_distance.cpp91
-rw-r--r--eval/src/vespa/eval/instruction/dense_hamming_distance.h22
7 files changed, 217 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 16dd729b1f1..8a7cd4f66bd 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -46,6 +46,7 @@ vespa_define_module(
src/tests/gp/ponder_nov2017
src/tests/instruction/add_trivial_dimension_optimizer
src/tests/instruction/dense_dot_product_function
+ src/tests/instruction/dense_hamming_distance
src/tests/instruction/dense_inplace_join_function
src/tests/instruction/dense_matmul_function
src/tests/instruction/dense_multi_matmul_function
diff --git a/eval/src/tests/instruction/dense_hamming_distance/CMakeLists.txt b/eval/src/tests/instruction/dense_hamming_distance/CMakeLists.txt
new file mode 100644
index 00000000000..3d18f9613b3
--- /dev/null
+++ b/eval/src/tests/instruction/dense_hamming_distance/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+vespa_add_executable(eval_dense_hamming_distance_test_app TEST
+ SOURCES
+ dense_hamming_distance_test.cpp
+ DEPENDS
+ vespaeval
+ GTest::GTest
+)
+vespa_add_test(NAME eval_dense_hamming_distance_test_app COMMAND eval_dense_hamming_distance_test_app)
diff --git a/eval/src/tests/instruction/dense_hamming_distance/dense_hamming_distance_test.cpp b/eval/src/tests/instruction/dense_hamming_distance/dense_hamming_distance_test.cpp
new file mode 100644
index 00000000000..8eaa0e72ad5
--- /dev/null
+++ b/eval/src/tests/instruction/dense_hamming_distance/dense_hamming_distance_test.cpp
@@ -0,0 +1,91 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/eval/eval/test/gen_spec.h>
+#include <vespa/eval/instruction/dense_hamming_distance.h>
+#include <vespa/vespalib/util/stash.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+#include <vespa/vespalib/util/require.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("dense_hamming_distance_function_test");
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get();
+
+struct FunInfo {
+ using LookFor = DenseHammingDistance;
+ void verify(const LookFor &fun) const {
+ EXPECT_TRUE(fun.result_is_mutable());
+ }
+};
+
+void assertOptimized(const vespalib::string &expr) {
+ CellTypeSpace just_int8({CellType::INT8}, 2);
+ EvalFixture::verify<FunInfo>(expr, {FunInfo{}}, just_int8);
+ CellTypeSpace just_double({CellType::DOUBLE}, 2);
+ EvalFixture::verify<FunInfo>(expr, {}, just_double);
+}
+
+void assertNotOptimized(const vespalib::string &expr) {
+ CellTypeSpace just_int8({CellType::INT8}, 2);
+ EvalFixture::verify<FunInfo>(expr, {}, just_int8);
+}
+
+TEST(DenseHammingDistanceOptimizer, hamming_distance_works_with_tensor_function) {
+ assertOptimized("reduce(hamming(x5$1,x5$2),sum)");
+ assertOptimized("reduce(hamming(x5$1,x5$2),sum,x)");
+ assertOptimized("reduce(join(x5$1,x5$2,f(x,y)(hamming(x,y))),sum)");
+ assertOptimized("reduce(join(x5$1,x5$2,f(x,y)(hamming(x,y))),sum,x)");
+}
+
+TEST(DenseHammingDistanceOptimizer, hamming_distance_with_compatible_dimensions_is_optimized) {
+ // various vector sizes
+ assertOptimized("reduce(hamming(x1$1,x1$2),sum)");
+ assertOptimized("reduce(hamming(x3$1,x3$2),sum)");
+ assertOptimized("reduce(hamming(x7$1,x7$2),sum)");
+ assertOptimized("reduce(hamming(x8$1,x8$2),sum)");
+ assertOptimized("reduce(hamming(x9$1,x9$2),sum)");
+ assertOptimized("reduce(hamming(x17$1,x17$2),sum)");
+ // multiple dimensions
+ assertOptimized("reduce(hamming(x3y3$1,x3y3$2),sum)");
+ assertOptimized("reduce(hamming(x3y4$1,x3y4$2),sum)");
+ // with trivial dimensions
+ assertOptimized("reduce(hamming(a1x3$1,x3$2),sum)");
+ assertOptimized("reduce(hamming(x3$1z1,x3$2),sum)");
+ assertOptimized("reduce(hamming(a1x3$1,b1x3$2z1),sum)");
+}
+
+TEST(DenseHammingDistanceOptimizer, hamming_distance_with_mapped_dimensions_is_NOT_optimized) {
+ assertNotOptimized("reduce(hamming(x3_1$1,x3_1$2),sum)");
+ assertNotOptimized("reduce(hamming(x3_1y2$1,x3_1y2$2),sum)");
+}
+
+TEST(DenseHammingDistanceOptimizer, hamming_distance_with_incompatible_dimensions_is_NOT_optimized) {
+ assertNotOptimized("reduce(hamming(x3,y3),sum)");
+ assertNotOptimized("reduce(hamming(y3,x3),sum)");
+ assertNotOptimized("reduce(hamming(x3,x3y3),sum)");
+ assertNotOptimized("reduce(hamming(x3y3,x3),sum)");
+}
+
+TEST(DenseHammingDistanceOptimizer, expressions_similar_to_hamming_distance_are_not_optimized) {
+ assertNotOptimized("reduce(hamming(x3$1,x3$2),prod)");
+}
+
+TEST(DenseHammingDistanceOptimizer, result_must_be_double_to_trigger_optimization) {
+ assertOptimized("reduce(hamming(x3y3$1,x3y3$2),sum,x,y)");
+ assertNotOptimized("reduce(hamming(x3y3$1,x3y3$2),sum,x)");
+ assertNotOptimized("reduce(hamming(x3y3$1,x3y3$2),sum,y)");
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()
+
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index 64acbceff04..c2e8d886fde 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -28,6 +28,7 @@
#include <vespa/eval/instruction/vector_from_doubles_function.h>
#include <vespa/eval/instruction/dense_tensor_create_function.h>
#include <vespa/eval/instruction/dense_tensor_peek_function.h>
+#include <vespa/eval/instruction/dense_hamming_distance.h>
#include <vespa/log/log.h>
LOG_SETUP(".eval.eval.optimize_tensor_function");
@@ -53,6 +54,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te
child.set(DenseMatMulFunction::optimize(child.get(), stash));
child.set(DenseMultiMatMulFunction::optimize(child.get(), stash));
child.set(MixedInnerProductFunction::optimize(child.get(), stash));
+ child.set(DenseHammingDistance::optimize(child.get(), stash));
nodes.pop_back();
}
}
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index 88e3272bb7c..3ed969c0a18 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -5,6 +5,7 @@ vespa_add_library(eval_instruction OBJECT
add_trivial_dimension_optimizer.cpp
dense_cell_range_function.cpp
dense_dot_product_function.cpp
+ dense_hamming_distance.cpp
dense_lambda_peek_function.cpp
dense_lambda_peek_optimizer.cpp
dense_matmul_function.cpp
diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp
new file mode 100644
index 00000000000..2a68663631a
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp
@@ -0,0 +1,91 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "dense_hamming_distance.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/hamming_distance.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".eval.instruction.dense_hamming_distance");
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+
+namespace {
+
+
+size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) {
+ const uint64_t *words_a = static_cast<const uint64_t *>(lhs);
+ const uint64_t *words_b = static_cast<const uint64_t *>(rhs);
+ size_t sum = 0;
+ size_t i = 0;
+ for (; i * 8 + 7 < sz; ++i) {
+ uint64_t xor_bits = words_a[i] ^ words_b[i];
+ sum += __builtin_popcountl(xor_bits);
+ }
+ if (__builtin_expect((i * 8 < sz), false)) {
+ const uint8_t *bytes_a = static_cast<const uint8_t *>(lhs);
+ const uint8_t *bytes_b = static_cast<const uint8_t *>(rhs);
+ for (i *= 8; i < sz; ++i) {
+ uint64_t xor_bits = bytes_a[i] ^ bytes_b[i];
+ sum += __builtin_popcountl(xor_bits);
+ }
+ }
+ return sum;
+};
+
+void int8_hamming_to_double_op(InterpretedFunction::State &state, uint64_t vector_size) {
+ const auto &lhs = state.peek(1);
+ const auto &rhs = state.peek(0);
+ auto a = lhs.cells();
+ auto b = rhs.cells();
+ double result = binary_hamming_distance(a.data, b.data, vector_size);
+ state.pop_pop_push(state.stash.create<DoubleValue>(result));
+}
+
+bool compatible_types(const ValueType &lhs, const ValueType &rhs) {
+ return ((lhs.cell_type() == CellType::INT8) &&
+ (rhs.cell_type() == CellType::INT8) &&
+ lhs.is_dense() &&
+ rhs.is_dense() &&
+ (lhs.nontrivial_indexed_dimensions() == rhs.nontrivial_indexed_dimensions()));
+}
+
+} // namespace <unnamed>
+
+DenseHammingDistance::DenseHammingDistance(const TensorFunction &lhs_child,
+ const TensorFunction &rhs_child)
+ : tensor_function::Op2(ValueType::double_type(), lhs_child, rhs_child)
+{
+}
+
+InterpretedFunction::Instruction
+DenseHammingDistance::compile_self(const ValueBuilderFactory &, Stash &) const
+{
+ auto op = int8_hamming_to_double_op;
+ const auto &lhs_type = lhs().result_type();
+ const auto &rhs_type = rhs().result_type();
+ LOG_ASSERT(lhs_type.dense_subspace_size() == rhs_type.dense_subspace_size());
+ return InterpretedFunction::Instruction(op, lhs_type.dense_subspace_size());
+}
+
+const TensorFunction &
+DenseHammingDistance::optimize(const TensorFunction &expr, Stash &stash)
+{
+ const auto & res_type = expr.result_type();
+ auto reduce = as<Reduce>(expr);
+ if (res_type.is_double() && reduce && (reduce->aggr() == Aggr::SUM)) {
+ auto join = as<Join>(reduce->child());
+ if (join && (join->function() == operation::Hamming::f)) {
+ const TensorFunction &lhs = join->lhs();
+ const TensorFunction &rhs = join->rhs();
+ if (compatible_types(lhs.result_type(), rhs.result_type())) {
+ return stash.create<DenseHammingDistance>(lhs, rhs);
+ }
+ }
+ }
+ return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.h b/eval/src/vespa/eval/instruction/dense_hamming_distance.h
new file mode 100644
index 00000000000..efc70d74d21
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.h
@@ -0,0 +1,22 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function for a hamming distance producing a scalar result.
+ **/
+class DenseHammingDistance : public tensor_function::Op2
+{
+public:
+ DenseHammingDistance(const TensorFunction &lhs_child,
+ const TensorFunction &rhs_child);
+ InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+ bool result_is_mutable() const override { return true; }
+ static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace