diff options
author | Arne H Juul <arnej@yahooinc.com> | 2021-09-24 09:46:10 +0000 |
---|---|---|
committer | Arne H Juul <arnej@yahooinc.com> | 2021-09-24 10:57:04 +0000 |
commit | 284dd3bab0268cd376fa7c49c567b5861c5f7fe4 (patch) | |
tree | 1b086b7b17e2c7efaa6cb0c71136259d9ae82f6c /eval | |
parent | 5f1f8f36cd26e5d8a5502d165e3760ab636f8979 (diff) |
add Hamming Distance optimizer for vectors
Diffstat (limited to 'eval')
4 files changed, 153 insertions, 0 deletions
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp index 64acbceff04..c2e8d886fde 100644 --- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp +++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp @@ -28,6 +28,7 @@ #include <vespa/eval/instruction/vector_from_doubles_function.h> #include <vespa/eval/instruction/dense_tensor_create_function.h> #include <vespa/eval/instruction/dense_tensor_peek_function.h> +#include <vespa/eval/instruction/dense_hamming_distance.h> #include <vespa/log/log.h> LOG_SETUP(".eval.eval.optimize_tensor_function"); @@ -53,6 +54,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te child.set(DenseMatMulFunction::optimize(child.get(), stash)); child.set(DenseMultiMatMulFunction::optimize(child.get(), stash)); child.set(MixedInnerProductFunction::optimize(child.get(), stash)); + child.set(DenseHammingDistance::optimize(child.get(), stash)); nodes.pop_back(); } } diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index 88e3272bb7c..3ed969c0a18 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -5,6 +5,7 @@ vespa_add_library(eval_instruction OBJECT add_trivial_dimension_optimizer.cpp dense_cell_range_function.cpp dense_dot_product_function.cpp + dense_hamming_distance.cpp dense_lambda_peek_function.cpp dense_lambda_peek_optimizer.cpp dense_matmul_function.cpp diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp new file mode 100644 index 00000000000..d2aa7db0c8f --- /dev/null +++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp @@ -0,0 +1,126 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "dense_hamming_distance.h" +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/hamming_distance.h> + +#include <vespa/log/log.h> +LOG_SETUP(".eval.instruction.dense_hamming_distance"); + +namespace vespalib::eval { + +using namespace tensor_function; + +namespace { + +/** +template <typename LCT, typename RCT> +struct MyHammingDistance { + static double apply(const LCT * lhs, const RCT * rhs, size_t count) { + double result = 0.0; + for (size_t i = 0; i < count; ++i) { + result += hamming_distance(lhs[i], rhs[i]); + } + return result; + } +}; +**/ + +float binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) { + const uint64_t *words_a = static_cast<const uint64_t *>(lhs); + const uint64_t *words_b = static_cast<const uint64_t *>(rhs); + size_t sum = 0; + size_t i = 0; + for (; i * 8 + 7 < sz; ++i) { + uint64_t xor_bits = words_a[i] ^ words_b[i]; + sum += __builtin_popcountl(xor_bits); + } + if (__builtin_expect((i * 8 < sz), false)) { + const uint8_t *bytes_a = static_cast<const uint8_t *>(lhs); + const uint8_t *bytes_b = static_cast<const uint8_t *>(rhs); + for (i *= 8; i < sz; ++i) { + uint64_t xor_bits = bytes_a[i] ^ bytes_b[i]; + sum += __builtin_popcountl(xor_bits); + } + } + return (float)sum; +}; + +struct DenseHammingDistanceParam { + ValueType res_type; + size_t vector_size; + size_t out_subspace_size; + + DenseHammingDistanceParam(const ValueType &res_type_in, + const ValueType &mix_type, + const ValueType &vec_type) + : res_type(res_type_in), + vector_size(vec_type.dense_subspace_size()), + out_subspace_size(res_type.dense_subspace_size()) + { + assert(vector_size * out_subspace_size == mix_type.dense_subspace_size()); + } +}; + +void int8_hamming_to_double_op(InterpretedFunction::State &state, uint64_t vector_size) { + const auto &lhs = state.peek(1); + const auto &rhs = state.peek(0); + LOG_ASSERT(lhs.index().size() == 1); + LOG_ASSERT(rhs.index().size() == 1); + auto a = lhs.cells(); + auto b = rhs.cells(); + LOG_ASSERT(a.type == CellType::INT8); + LOG_ASSERT(b.type == CellType::INT8); + LOG_ASSERT(a.size == vector_size); + LOG_ASSERT(b.size == vector_size); + double result = binary_hamming_distance(a.data, b.data, vector_size); + state.pop_pop_push(state.stash.create<DoubleValue>(result)); +} + +} // namespace <unnamed> + +DenseHammingDistance::DenseHammingDistance(const ValueType &res_type_in, + const TensorFunction &dense_child, + const TensorFunction &vector_child) + : tensor_function::Op2(res_type_in, dense_child, vector_child) +{ +} + +InterpretedFunction::Instruction +DenseHammingDistance::compile_self(const ValueBuilderFactory &, Stash &) const +{ + auto op = int8_hamming_to_double_op; + const auto &lhs_type = lhs().result_type(); + const auto &rhs_type = rhs().result_type(); + LOG_ASSERT(lhs_type.dense_subspace_size() == rhs_type.dense_subspace_size()); + return InterpretedFunction::Instruction(op, lhs_type.dense_subspace_size()); +} + +bool DenseHammingDistance::compatible_types(const ValueType &lhs, const ValueType &rhs) { + if (lhs.cell_type() != CellType::INT8) return false; + if (rhs.cell_type() != CellType::INT8) return false; + if (! lhs.is_dense()) return false; + if (! rhs.is_dense()) return false; + return (lhs.nontrivial_indexed_dimensions() == rhs.nontrivial_indexed_dimensions()); +} + +const TensorFunction & +DenseHammingDistance::optimize(const TensorFunction &expr, Stash &stash) +{ + const auto & res_type = expr.result_type(); + auto reduce = as<Reduce>(expr); + if (res_type.is_double() && reduce && (reduce->aggr() == Aggr::SUM)) { + auto join = as<Join>(reduce->child()); + if (join && (join->function() == operation::Hamming::f)) { + const TensorFunction &lhs = join->lhs(); + const TensorFunction &rhs = join->rhs(); + if (compatible_types(lhs.result_type(), rhs.result_type())) { + return stash.create<DenseHammingDistance>(res_type, lhs, rhs); + } + } + } + return expr; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.h b/eval/src/vespa/eval/instruction/dense_hamming_distance.h new file mode 100644 index 00000000000..251e86d9075 --- /dev/null +++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.h @@ -0,0 +1,24 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::eval { + +/** + * Tensor function for a hamming distance producing a scalar result. + **/ +class DenseHammingDistance : public tensor_function::Op2 +{ +public: + DenseHammingDistance(const ValueType &res_type_in, + const TensorFunction &lhs_child, + const TensorFunction &rhs_child); + InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; + bool result_is_mutable() const override { return false; } + static bool compatible_types(const ValueType &lhs, const ValueType &rhs); + static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); +}; + +} // namespace |