diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2021-09-27 22:44:36 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-27 22:44:36 +0200 |
commit | 990421550d364fda626b4ba3e4c16730f38c74ae (patch) | |
tree | 37cfb849669c668405cc0739e7020bf740c7a783 | |
parent | 624a9baecb46171757a118c85b6c11e25dc6cc51 (diff) | |
parent | b81901517335f3c6b1ad0f477a828d771e7e5507 (diff) |
Merge pull request #19282 from vespa-engine/arnej/hamming-distance-optimizer-1
Arnej/hamming distance optimizer 1
7 files changed, 217 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 16dd729b1f1..8a7cd4f66bd 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -46,6 +46,7 @@ vespa_define_module( src/tests/gp/ponder_nov2017 src/tests/instruction/add_trivial_dimension_optimizer src/tests/instruction/dense_dot_product_function + src/tests/instruction/dense_hamming_distance src/tests/instruction/dense_inplace_join_function src/tests/instruction/dense_matmul_function src/tests/instruction/dense_multi_matmul_function diff --git a/eval/src/tests/instruction/dense_hamming_distance/CMakeLists.txt b/eval/src/tests/instruction/dense_hamming_distance/CMakeLists.txt new file mode 100644 index 00000000000..3d18f9613b3 --- /dev/null +++ b/eval/src/tests/instruction/dense_hamming_distance/CMakeLists.txt @@ -0,0 +1,9 @@ + +vespa_add_executable(eval_dense_hamming_distance_test_app TEST + SOURCES + dense_hamming_distance_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_dense_hamming_distance_test_app COMMAND eval_dense_hamming_distance_test_app) diff --git a/eval/src/tests/instruction/dense_hamming_distance/dense_hamming_distance_test.cpp b/eval/src/tests/instruction/dense_hamming_distance/dense_hamming_distance_test.cpp new file mode 100644 index 00000000000..8eaa0e72ad5 --- /dev/null +++ b/eval/src/tests/instruction/dense_hamming_distance/dense_hamming_distance_test.cpp @@ -0,0 +1,91 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/tensor_function.h> +#include <vespa/eval/eval/test/eval_fixture.h> +#include <vespa/eval/eval/test/gen_spec.h> +#include <vespa/eval/instruction/dense_hamming_distance.h> +#include <vespa/vespalib/util/stash.h> +#include <vespa/vespalib/util/stringfmt.h> + +#include <vespa/vespalib/util/require.h> +#include <vespa/vespalib/gtest/gtest.h> + +#include <vespa/log/log.h> +LOG_SETUP("dense_hamming_distance_function_test"); + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get(); + +struct FunInfo { + using LookFor = DenseHammingDistance; + void verify(const LookFor &fun) const { + EXPECT_TRUE(fun.result_is_mutable()); + } +}; + +void assertOptimized(const vespalib::string &expr) { + CellTypeSpace just_int8({CellType::INT8}, 2); + EvalFixture::verify<FunInfo>(expr, {FunInfo{}}, just_int8); + CellTypeSpace just_double({CellType::DOUBLE}, 2); + EvalFixture::verify<FunInfo>(expr, {}, just_double); +} + +void assertNotOptimized(const vespalib::string &expr) { + CellTypeSpace just_int8({CellType::INT8}, 2); + EvalFixture::verify<FunInfo>(expr, {}, just_int8); +} + +TEST(DenseHammingDistanceOptimizer, hamming_distance_works_with_tensor_function) { + assertOptimized("reduce(hamming(x5$1,x5$2),sum)"); + assertOptimized("reduce(hamming(x5$1,x5$2),sum,x)"); + assertOptimized("reduce(join(x5$1,x5$2,f(x,y)(hamming(x,y))),sum)"); + assertOptimized("reduce(join(x5$1,x5$2,f(x,y)(hamming(x,y))),sum,x)"); +} + +TEST(DenseHammingDistanceOptimizer, hamming_distance_with_compatible_dimensions_is_optimized) { + // various vector sizes + assertOptimized("reduce(hamming(x1$1,x1$2),sum)"); + assertOptimized("reduce(hamming(x3$1,x3$2),sum)"); + assertOptimized("reduce(hamming(x7$1,x7$2),sum)"); + assertOptimized("reduce(hamming(x8$1,x8$2),sum)"); + assertOptimized("reduce(hamming(x9$1,x9$2),sum)"); + assertOptimized("reduce(hamming(x17$1,x17$2),sum)"); + // multiple dimensions + assertOptimized("reduce(hamming(x3y3$1,x3y3$2),sum)"); + assertOptimized("reduce(hamming(x3y4$1,x3y4$2),sum)"); + // with trivial dimensions + assertOptimized("reduce(hamming(a1x3$1,x3$2),sum)"); + assertOptimized("reduce(hamming(x3$1z1,x3$2),sum)"); + assertOptimized("reduce(hamming(a1x3$1,b1x3$2z1),sum)"); +} + +TEST(DenseHammingDistanceOptimizer, hamming_distance_with_mapped_dimensions_is_NOT_optimized) { + assertNotOptimized("reduce(hamming(x3_1$1,x3_1$2),sum)"); + assertNotOptimized("reduce(hamming(x3_1y2$1,x3_1y2$2),sum)"); +} + +TEST(DenseHammingDistanceOptimizer, hamming_distance_with_incompatible_dimensions_is_NOT_optimized) { + assertNotOptimized("reduce(hamming(x3,y3),sum)"); + assertNotOptimized("reduce(hamming(y3,x3),sum)"); + assertNotOptimized("reduce(hamming(x3,x3y3),sum)"); + assertNotOptimized("reduce(hamming(x3y3,x3),sum)"); +} + +TEST(DenseHammingDistanceOptimizer, expressions_similar_to_hamming_distance_are_not_optimized) { + assertNotOptimized("reduce(hamming(x3$1,x3$2),prod)"); +} + +TEST(DenseHammingDistanceOptimizer, result_must_be_double_to_trigger_optimization) { + assertOptimized("reduce(hamming(x3y3$1,x3y3$2),sum,x,y)"); + assertNotOptimized("reduce(hamming(x3y3$1,x3y3$2),sum,x)"); + assertNotOptimized("reduce(hamming(x3y3$1,x3y3$2),sum,y)"); +} + +//----------------------------------------------------------------------------- + +GTEST_MAIN_RUN_ALL_TESTS() + diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp index 64acbceff04..c2e8d886fde 100644 --- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp +++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp @@ -28,6 +28,7 @@ #include <vespa/eval/instruction/vector_from_doubles_function.h> #include <vespa/eval/instruction/dense_tensor_create_function.h> #include <vespa/eval/instruction/dense_tensor_peek_function.h> +#include <vespa/eval/instruction/dense_hamming_distance.h> #include <vespa/log/log.h> LOG_SETUP(".eval.eval.optimize_tensor_function"); @@ -53,6 +54,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te child.set(DenseMatMulFunction::optimize(child.get(), stash)); child.set(DenseMultiMatMulFunction::optimize(child.get(), stash)); child.set(MixedInnerProductFunction::optimize(child.get(), stash)); + child.set(DenseHammingDistance::optimize(child.get(), stash)); nodes.pop_back(); } } diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index 88e3272bb7c..3ed969c0a18 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -5,6 +5,7 @@ vespa_add_library(eval_instruction OBJECT add_trivial_dimension_optimizer.cpp dense_cell_range_function.cpp dense_dot_product_function.cpp + dense_hamming_distance.cpp dense_lambda_peek_function.cpp dense_lambda_peek_optimizer.cpp dense_matmul_function.cpp diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp new file mode 100644 index 00000000000..2a68663631a --- /dev/null +++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp @@ -0,0 +1,91 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "dense_hamming_distance.h" +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/hamming_distance.h> + +#include <vespa/log/log.h> +LOG_SETUP(".eval.instruction.dense_hamming_distance"); + +namespace vespalib::eval { + +using namespace tensor_function; + +namespace { + + +size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) { + const uint64_t *words_a = static_cast<const uint64_t *>(lhs); + const uint64_t *words_b = static_cast<const uint64_t *>(rhs); + size_t sum = 0; + size_t i = 0; + for (; i * 8 + 7 < sz; ++i) { + uint64_t xor_bits = words_a[i] ^ words_b[i]; + sum += __builtin_popcountl(xor_bits); + } + if (__builtin_expect((i * 8 < sz), false)) { + const uint8_t *bytes_a = static_cast<const uint8_t *>(lhs); + const uint8_t *bytes_b = static_cast<const uint8_t *>(rhs); + for (i *= 8; i < sz; ++i) { + uint64_t xor_bits = bytes_a[i] ^ bytes_b[i]; + sum += __builtin_popcountl(xor_bits); + } + } + return sum; +}; + +void int8_hamming_to_double_op(InterpretedFunction::State &state, uint64_t vector_size) { + const auto &lhs = state.peek(1); + const auto &rhs = state.peek(0); + auto a = lhs.cells(); + auto b = rhs.cells(); + double result = binary_hamming_distance(a.data, b.data, vector_size); + state.pop_pop_push(state.stash.create<DoubleValue>(result)); +} + +bool compatible_types(const ValueType &lhs, const ValueType &rhs) { + return ((lhs.cell_type() == CellType::INT8) && + (rhs.cell_type() == CellType::INT8) && + lhs.is_dense() && + rhs.is_dense() && + (lhs.nontrivial_indexed_dimensions() == rhs.nontrivial_indexed_dimensions())); +} + +} // namespace <unnamed> + +DenseHammingDistance::DenseHammingDistance(const TensorFunction &lhs_child, + const TensorFunction &rhs_child) + : tensor_function::Op2(ValueType::double_type(), lhs_child, rhs_child) +{ +} + +InterpretedFunction::Instruction +DenseHammingDistance::compile_self(const ValueBuilderFactory &, Stash &) const +{ + auto op = int8_hamming_to_double_op; + const auto &lhs_type = lhs().result_type(); + const auto &rhs_type = rhs().result_type(); + LOG_ASSERT(lhs_type.dense_subspace_size() == rhs_type.dense_subspace_size()); + return InterpretedFunction::Instruction(op, lhs_type.dense_subspace_size()); +} + +const TensorFunction & +DenseHammingDistance::optimize(const TensorFunction &expr, Stash &stash) +{ + const auto & res_type = expr.result_type(); + auto reduce = as<Reduce>(expr); + if (res_type.is_double() && reduce && (reduce->aggr() == Aggr::SUM)) { + auto join = as<Join>(reduce->child()); + if (join && (join->function() == operation::Hamming::f)) { + const TensorFunction &lhs = join->lhs(); + const TensorFunction &rhs = join->rhs(); + if (compatible_types(lhs.result_type(), rhs.result_type())) { + return stash.create<DenseHammingDistance>(lhs, rhs); + } + } + } + return expr; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.h b/eval/src/vespa/eval/instruction/dense_hamming_distance.h new file mode 100644 index 00000000000..efc70d74d21 --- /dev/null +++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.h @@ -0,0 +1,22 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::eval { + +/** + * Tensor function for a hamming distance producing a scalar result. + **/ +class DenseHammingDistance : public tensor_function::Op2 +{ +public: + DenseHammingDistance(const TensorFunction &lhs_child, + const TensorFunction &rhs_child); + InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; + bool result_is_mutable() const override { return true; } + static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); +}; + +} // namespace |