From 0d3163f0adc4d29bb815c4b19bfea4359ac24504 Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Fri, 3 Dec 2021 15:06:58 +0000 Subject: optimize squared euclidean distance between tensors --- eval/CMakeLists.txt | 1 + .../tests/instruction/l2_distance/CMakeLists.txt | 10 +++ .../instruction/l2_distance/l2_distance_test.cpp | 96 ++++++++++++++++++++++ .../vespa/eval/eval/optimize_tensor_function.cpp | 7 +- eval/src/vespa/eval/instruction/CMakeLists.txt | 1 + eval/src/vespa/eval/instruction/l2_distance.cpp | 96 ++++++++++++++++++++++ eval/src/vespa/eval/instruction/l2_distance.h | 21 +++++ 7 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 eval/src/tests/instruction/l2_distance/CMakeLists.txt create mode 100644 eval/src/tests/instruction/l2_distance/l2_distance_test.cpp create mode 100644 eval/src/vespa/eval/instruction/l2_distance.cpp create mode 100644 eval/src/vespa/eval/instruction/l2_distance.h diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 99c7e9c68b8..2e0af3acfa7 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -70,6 +70,7 @@ vespa_define_module( src/tests/instruction/index_lookup_table src/tests/instruction/inplace_map_function src/tests/instruction/join_with_number + src/tests/instruction/l2_distance src/tests/instruction/mixed_inner_product_function src/tests/instruction/mixed_simple_join_function src/tests/instruction/pow_as_map_optimizer diff --git a/eval/src/tests/instruction/l2_distance/CMakeLists.txt b/eval/src/tests/instruction/l2_distance/CMakeLists.txt new file mode 100644 index 00000000000..1e0fc69a3f9 --- /dev/null +++ b/eval/src/tests/instruction/l2_distance/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +vespa_add_executable(eval_l2_distance_test_app TEST + SOURCES + l2_distance_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_l2_distance_test_app COMMAND eval_l2_distance_test_app) diff --git a/eval/src/tests/instruction/l2_distance/l2_distance_test.cpp b/eval/src/tests/instruction/l2_distance/l2_distance_test.cpp new file mode 100644 index 00000000000..2cba9dfb18e --- /dev/null +++ b/eval/src/tests/instruction/l2_distance/l2_distance_test.cpp @@ -0,0 +1,96 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get(); + +//----------------------------------------------------------------------------- + +void verify(const TensorSpec &a, const TensorSpec &b, const vespalib::string &expr, bool optimized = true) { + EvalFixture::ParamRepo param_repo; + param_repo.add("a", a).add("b", b); + EvalFixture fast_fixture(prod_factory, expr, param_repo, true); + EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo)); + EXPECT_EQ(fast_fixture.find_all().size(), optimized ? 1 : 0); +} + +void verify_cell_types(GenSpec a, GenSpec b, const vespalib::string &expr, bool optimized = true) { + for (CellType act : CellTypeUtils::list_types()) { + for (CellType bct : CellTypeUtils::list_types()) { + if (optimized && (act == bct) && (act != CellType::BFLOAT16)) { + verify(a.cpy().cells(act), b.cpy().cells(bct), expr, true); + } else { + verify(a.cpy().cells(act), b.cpy().cells(bct), expr, false); + } + } + } +} + +//----------------------------------------------------------------------------- + +GenSpec gen(const vespalib::string &desc, int bias) { + return GenSpec::from_desc(desc).cells(CellType::FLOAT).seq(N(bias)); +} + +//----------------------------------------------------------------------------- + +vespalib::string sq_l2 = "reduce((a-b)^2,sum)"; +vespalib::string alt_sq_l2 = "reduce(map((a-b),f(x)(x*x)),sum)"; + +//----------------------------------------------------------------------------- + +TEST(L2DistanceTest, squared_l2_distance_can_be_optimized) { + verify_cell_types(gen("x5", 3), gen("x5", 7), sq_l2); + verify_cell_types(gen("x5", 3), gen("x5", 7), alt_sq_l2); +} + +TEST(L2DistanceTest, trivial_dimensions_are_ignored) { + verify(gen("x5y1", 3), gen("x5", 7), sq_l2); + verify(gen("x5", 3), gen("x5y1", 7), sq_l2); +} + +TEST(L2DistanceTest, multiple_dimensions_can_be_used) { + verify(gen("x5y3", 3), gen("x5y3", 7), sq_l2); +} + +//----------------------------------------------------------------------------- + +TEST(L2DistanceTest, inputs_must_be_dense) { + verify(gen("x5_1", 3), gen("x5_1", 7), sq_l2, false); + verify(gen("x5_1y3", 3), gen("x5_1y3", 7), sq_l2, false); + verify(gen("x5", 3), GenSpec(7), sq_l2, false); + verify(GenSpec(3), gen("x5", 7), sq_l2, false); +} + +TEST(L2DistanceTest, result_must_be_double) { + verify(gen("x5y1", 3), gen("x5y1", 7), "reduce((a-b)^2,sum,x)", false); + verify(gen("x5y1_1", 3), gen("x5y1_1", 7), "reduce((a-b)^2,sum,x)", false); +} + +TEST(L2DistanceTest, dimensions_must_match) { + verify(gen("x5y3", 3), gen("x5", 7), sq_l2, false); + verify(gen("x5", 3), gen("x5y3", 7), sq_l2, false); +} + +TEST(L2DistanceTest, similar_expressions_are_not_optimized) { + verify(gen("x5", 3), gen("x5", 7), "reduce((a-b)^2,prod)", false); + verify(gen("x5", 3), gen("x5", 7), "reduce((a-b)^3,sum)", false); + verify(gen("x5", 3), gen("x5", 7), "reduce((a+b)^2,sum)", false); +} + +//----------------------------------------------------------------------------- + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp index 09814cc0b06..e1520d4deb2 100644 --- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp +++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include LOG_SETUP(".eval.eval.optimize_tensor_function"); @@ -54,6 +55,10 @@ void run_optimize_pass(const Child &root, Func&& optimize_node) { const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const TensorFunction &expr, Stash &stash) { Child root(expr); + run_optimize_pass(root, [&stash](const Child &child) + { + child.set(PowAsMapOptimizer::optimize(child.get(), stash)); + }); run_optimize_pass(root, [&stash](const Child &child) { child.set(SumMaxDotProductFunction::optimize(child.get(), stash)); @@ -61,6 +66,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te run_optimize_pass(root, [&stash](const Child &child) { child.set(BestSimilarityFunction::optimize(child.get(), stash)); + child.set(L2Distance::optimize(child.get(), stash)); }); run_optimize_pass(root, [&stash](const Child &child) { @@ -83,7 +89,6 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash)); child.set(UnpackBitsFunction::optimize(child.get(), stash)); child.set(FastRenameOptimizer::optimize(child.get(), stash)); - child.set(PowAsMapOptimizer::optimize(child.get(), stash)); child.set(InplaceMapFunction::optimize(child.get(), stash)); child.set(MixedSimpleJoinFunction::optimize(child.get(), stash)); child.set(JoinWithNumberFunction::optimize(child.get(), stash)); diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index a462ece4734..56184c113d4 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -30,6 +30,7 @@ vespa_add_library(eval_instruction OBJECT index_lookup_table.cpp inplace_map_function.cpp join_with_number_function.cpp + l2_distance.cpp mixed_inner_product_function.cpp mixed_simple_join_function.cpp pow_as_map_optimizer.cpp diff --git a/eval/src/vespa/eval/instruction/l2_distance.cpp b/eval/src/vespa/eval/instruction/l2_distance.cpp new file mode 100644 index 00000000000..3f1e7632431 --- /dev/null +++ b/eval/src/vespa/eval/instruction/l2_distance.cpp @@ -0,0 +1,96 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "l2_distance.h" +#include +#include +#include +#include + +#include +LOG_SETUP(".eval.instruction.l2_distance"); + +namespace vespalib::eval { + +using namespace tensor_function; + +namespace { + +static const auto &hw = hwaccelrated::IAccelrated::getAccelerator(); + +template +double sq_l2(const Value &lhs, const Value &rhs, size_t len) { + return hw.squaredEuclideanDistance((const T *)lhs.cells().data, (const T *)rhs.cells().data, len); +} + +template <> +double sq_l2(const Value &lhs, const Value &rhs, size_t len) { + return sq_l2(lhs, rhs, len); +} + +template +void my_squared_l2_distance_op(InterpretedFunction::State &state, uint64_t vector_size) { + double result = sq_l2(state.peek(1), state.peek(0), vector_size); + state.pop_pop_push(state.stash.create(result)); +} + +struct SelectOp { + template + static InterpretedFunction::op_function invoke() { + constexpr bool is_bfloat16 = std::is_same_v; + if constexpr (!is_bfloat16) { + return my_squared_l2_distance_op; + } else { + abort(); + } + } +}; + +bool compatible_cell_types(CellType lhs, CellType rhs) { + return ((lhs == rhs) && ((lhs == CellType::INT8) || + (lhs == CellType::FLOAT) || + (lhs == CellType::DOUBLE))); +} + +bool compatible_types(const ValueType &lhs, const ValueType &rhs) { + return (compatible_cell_types(lhs.cell_type(), rhs.cell_type()) && + lhs.is_dense() && rhs.is_dense() && + (lhs.nontrivial_indexed_dimensions() == rhs.nontrivial_indexed_dimensions())); +} + +} // namespace + +L2Distance::L2Distance(const TensorFunction &lhs_in, const TensorFunction &rhs_in) + : tensor_function::Op2(ValueType::double_type(), lhs_in, rhs_in) +{ +} + +InterpretedFunction::Instruction +L2Distance::compile_self(const ValueBuilderFactory &, Stash &) const +{ + auto lhs_t = lhs().result_type(); + auto rhs_t = rhs().result_type(); + REQUIRE_EQ(lhs_t.cell_type(), rhs_t.cell_type()); + REQUIRE_EQ(lhs_t.dense_subspace_size(), rhs_t.dense_subspace_size()); + auto op = typify_invoke<1, TypifyCellType, SelectOp>(lhs_t.cell_type()); + return InterpretedFunction::Instruction(op, lhs_t.dense_subspace_size()); +} + +const TensorFunction & +L2Distance::optimize(const TensorFunction &expr, Stash &stash) +{ + auto reduce = as(expr); + if (reduce && (reduce->aggr() == Aggr::SUM) && expr.result_type().is_double()) { + auto map = as(reduce->child()); + if (map && (map->function() == operation::Square::f)) { + auto join = as(map->child()); + if (join && (join->function() == operation::Sub::f)) { + if (compatible_types(join->lhs().result_type(), join->rhs().result_type())) { + return stash.create(join->lhs(), join->rhs()); + } + } + } + } + return expr; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/l2_distance.h b/eval/src/vespa/eval/instruction/l2_distance.h new file mode 100644 index 00000000000..95b11b6c229 --- /dev/null +++ b/eval/src/vespa/eval/instruction/l2_distance.h @@ -0,0 +1,21 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace vespalib::eval { + +/** + * Tensor function for a squared euclidean distance producing a scalar result. + **/ +class L2Distance : public tensor_function::Op2 +{ +public: + L2Distance(const TensorFunction &lhs_in, const TensorFunction &rhs_in); + InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; + bool result_is_mutable() const override { return true; } + static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); +}; + +} // namespace -- cgit v1.2.3