diff options
author | Håvard Pettersen <havardpe@oath.com> | 2021-01-26 15:51:39 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2021-01-26 17:21:07 +0000 |
commit | 8bafc67cc4c404cfa1453f67b409eb729608e6a8 (patch) | |
tree | c2a83dcb673df92ee5120256b0a6eda35583e838 /eval | |
parent | 11bd6fd448dbfd8b8575c0186db7df9062bb20a1 (diff) |
added sum_max_dot_product_function optimization
Diffstat (limited to 'eval')
7 files changed, 312 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 1d9fa7478a0..239cf8f0f23 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -64,6 +64,7 @@ vespa_define_module( src/tests/instruction/mixed_simple_join_function src/tests/instruction/pow_as_map_optimizer src/tests/instruction/remove_trivial_dimension_optimizer + src/tests/instruction/sum_max_dot_product_function src/tests/instruction/vector_from_doubles_function src/tests/streamed/value src/tests/tensor/instruction_benchmark diff --git a/eval/src/tests/instruction/sum_max_dot_product_function/CMakeLists.txt b/eval/src/tests/instruction/sum_max_dot_product_function/CMakeLists.txt new file mode 100644 index 00000000000..d9276ec3431 --- /dev/null +++ b/eval/src/tests/instruction/sum_max_dot_product_function/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_sum_max_dot_product_function_test_app TEST + SOURCES + sum_max_dot_product_function_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_sum_max_dot_product_function_test_app COMMAND eval_sum_max_dot_product_function_test_app) diff --git a/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp b/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp new file mode 100644 index 00000000000..4b89f30d879 --- /dev/null +++ b/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp @@ -0,0 +1,120 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/tensor_function.h> +#include <vespa/eval/eval/test/eval_fixture.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/instruction/sum_max_dot_product_function.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get(); + +struct MyVecSeq : Sequence { + double bias; + double operator[](size_t i) const override { return (i + bias); } + MyVecSeq(double cellBias) : bias(cellBias) {} +}; + +//----------------------------------------------------------------------------- + +vespalib::string main_expr = "reduce(reduce(reduce(a*b,sum,z),max,y),sum,x)"; + +void assert_optimized(const TensorSpec &a, const TensorSpec &b, size_t dp_size) { + EvalFixture::ParamRepo param_repo; + param_repo.add("a", a); + param_repo.add("b", b); + EvalFixture slow_fixture(prod_factory, main_expr, param_repo, false); + EvalFixture fast_fixture(prod_factory, main_expr, param_repo, true); + EXPECT_EQ(slow_fixture.result(), EvalFixture::ref(main_expr, param_repo)); + EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(main_expr, param_repo)); + auto info = fast_fixture.find_all<SumMaxDotProductFunction>(); + ASSERT_EQ(info.size(), 1u); + EXPECT_TRUE(info[0]->result_is_mutable()); + EXPECT_EQUAL(info[0]->dp_size(), dp_size); +} + +void assert_not_optimized(const TensorSpec &a, const TensorSpec &b, const vespalib::string &expr = main_expr) { + EvalFixture::ParamRepo param_repo; + param_repo.add("a", a); + param_repo.add("b", b); + EvalFixture slow_fixture(prod_factory, expr, param_repo, false); + EvalFixture fast_fixture(prod_factory, expr, param_repo, true); + EXPECT_EQ(slow_fixture.result(), EvalFixture::ref(expr, param_repo)); + EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo)); + auto info = fast_fixture.find_all<SumMaxDotProductFunction>(); + ASSERT_EQ(info.size(), 0u); +} + +//----------------------------------------------------------------------------- + +auto query = spec(float_cells({x({"0", "1", "2"}),z(5)}), MyVecSeq(0.5)); +auto document = spec(float_cells({y({"0", "1", "2", "3", "4", "5"}),z(5)}), MyVecSeq(2.5)); +auto empty_query = spec(float_cells({x({}),z(5)}), MyVecSeq(0.5)); +auto empty_document = spec(float_cells({y({}),z(5)}), MyVecSeq(2.5)); + +TEST(SumMaxDotProduct, expressions_can_be_optimized) +{ + assert_optimized(query, document, 5); + assert_optimized(document, query, 5); + assert_optimized(empty_query, document, 5); + assert_optimized(query, empty_document, 5); + assert_optimized(empty_query, empty_document, 5); +} + +TEST(SumMaxDotProduct, double_cells_are_not_optimized) { + auto double_query = spec({x({"0", "1", "2"}),z(5)}, MyVecSeq(0.5)); + auto double_document = spec({y({"0", "1", "2", "3", "4", "5"}),z(5)}, MyVecSeq(2.5)); + assert_not_optimized(query, double_document); + assert_not_optimized(double_query, document); + assert_not_optimized(double_query, double_document); +} + +TEST(SumMaxDotProduct, trivial_dot_product_is_not_optimized) { + auto trivial_query = spec(float_cells({x({"0", "1", "2"}),z(1)}), MyVecSeq(0.5)); + auto trivial_document = spec(float_cells({y({"0", "1", "2", "3", "4", "5"}),z(1)}), MyVecSeq(2.5)); + assert_not_optimized(trivial_query, trivial_document); +} + +TEST(SumMaxDotProduct, additional_dimensions_are_not_optimized) { + auto extra_sparse_query = spec(float_cells({Domain("a", {"0"}),x({"0", "1", "2"}),z(5)}), MyVecSeq(0.5)); + auto extra_dense_query = spec(float_cells({Domain("a", 1),x({"0", "1", "2"}),z(5)}), MyVecSeq(0.5)); + auto extra_sparse_document = spec(float_cells({Domain("a", {"0"}),y({"0", "1", "2", "3", "4", "5"}),z(5)}), MyVecSeq(2.5)); + auto extra_dense_document = spec(float_cells({Domain("a", 1),y({"0", "1", "2", "3", "4", "5"}),z(5)}), MyVecSeq(2.5)); + vespalib::string extra_sum_expr = "reduce(reduce(reduce(a*b,sum,z),max,y),sum,a,x)"; + vespalib::string extra_max_expr = "reduce(reduce(reduce(a*b,sum,z),max,a,y),sum,x)"; + assert_not_optimized(extra_sparse_query, document); + assert_not_optimized(extra_dense_query, document); + assert_not_optimized(query, extra_sparse_document); + assert_not_optimized(query, extra_dense_document); + assert_not_optimized(extra_sparse_query, document, extra_sum_expr); + assert_not_optimized(extra_dense_query, document, extra_sum_expr); + assert_not_optimized(query, extra_sparse_document, extra_max_expr); + assert_not_optimized(query, extra_dense_document, extra_max_expr); +} + +TEST(SumMaxDotProduct, more_dense_variants_are_not_optimized) { + auto dense_query = spec(float_cells({x(3),z(5)}), MyVecSeq(0.5)); + auto dense_document = spec(float_cells({y(5),z(5)}), MyVecSeq(2.5)); + assert_not_optimized(dense_query, document); + assert_not_optimized(query, dense_document); + assert_not_optimized(dense_query, dense_document); +} + +TEST(SumMaxDotProduct, similar_expressions_are_not_optimized) { + vespalib::string max_sum_expr = "reduce(reduce(reduce(a*b,sum,z),sum,y),max,x)"; + vespalib::string not_dp_expr1 = "reduce(reduce(reduce(a+b,sum,z),max,y),sum,x)"; + vespalib::string not_dp_expr2 = "reduce(reduce(reduce(a*b,min,z),max,y),sum,x)"; + vespalib::string sum_all_expr = "reduce(reduce(reduce(a*b,sum,z),max,y),sum)"; + assert_not_optimized(query, document, max_sum_expr); + assert_not_optimized(query, document, not_dp_expr1); + assert_not_optimized(query, document, not_dp_expr2); + assert_not_optimized(query, document, sum_all_expr); +} + +//----------------------------------------------------------------------------- + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp index ea63171b260..2e8c89f88fc 100644 --- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp +++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp @@ -6,6 +6,7 @@ #include <vespa/eval/instruction/dense_dot_product_function.h> #include <vespa/eval/instruction/mixed_inner_product_function.h> +#include <vespa/eval/instruction/sum_max_dot_product_function.h> #include <vespa/eval/instruction/dense_xw_product_function.h> #include <vespa/eval/instruction/dense_matmul_function.h> #include <vespa/eval/instruction/dense_multi_matmul_function.h> @@ -44,6 +45,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &factory, c } while (!nodes.empty()) { const Child &child = nodes.back().get(); + child.set(SumMaxDotProductFunction::optimize(child.get(), stash)); child.set(DenseDotProductFunction::optimize(child.get(), stash)); child.set(DenseXWProductFunction::optimize(child.get(), stash)); child.set(DenseMatMulFunction::optimize(child.get(), stash)); diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index 113df255658..58d5290f5d9 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -32,5 +32,6 @@ vespa_add_library(eval_instruction OBJECT pow_as_map_optimizer.cpp remove_trivial_dimension_optimizer.cpp replace_type_function.cpp + sum_max_dot_product_function.cpp vector_from_doubles_function.cpp ) diff --git a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp new file mode 100644 index 00000000000..4b541062007 --- /dev/null +++ b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp @@ -0,0 +1,130 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "sum_max_dot_product_function.h" +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/value.h> +#include <cblas.h> + +namespace vespalib::eval { + +using namespace tensor_function; +using namespace operation; + +namespace { + +void my_sum_max_dot_product_op(InterpretedFunction::State &state, uint64_t dp_size) { + double result = 0.0; + auto query_cells = state.peek(1).cells().typify<float>(); + auto document_cells = state.peek(0).cells().typify<float>(); + if ((query_cells.size() > 0) && (document_cells.size() > 0)) { + for (const float *query = query_cells.begin(); query < query_cells.end(); query += dp_size) { + float max_dp = aggr::Max<float>::null_value(); + for (const float *document = document_cells.begin(); document < document_cells.end(); document += dp_size) { + max_dp = aggr::Max<float>::combine(max_dp, cblas_sdot(dp_size, query, 1, document, 1)); + } + result += max_dp; + } + } + state.pop_pop_push(state.stash.create<ScalarValue<double>>(result)); +} + +const Reduce *check_reduce(const TensorFunction &expr, Aggr aggr) { + if (auto reduce = as<Reduce>(expr)) { + if ((reduce->aggr() == aggr) && (reduce->dimensions().size() == 1)) { + return reduce; + } + } + return nullptr; +} + +const Join *check_mul(const TensorFunction &expr) { + if (auto join = as<Join>(expr)) { + if (join->function() == Mul::f) { + return join; + } + } + return nullptr; +} + +bool check_params(const ValueType &res_type, const ValueType &query, const ValueType &document, + const vespalib::string &sum_dim, const vespalib::string &max_dim, const vespalib::string &dp_dim) +{ + if (res_type.is_scalar() && (res_type.cell_type() == CellType::DOUBLE) && + (query.dimensions().size() == 2) && (query.cell_type() == CellType::FLOAT) && + (document.dimensions().size() == 2) && (document.cell_type() == CellType::FLOAT)) + { + size_t npos = ValueType::Dimension::npos; + size_t sum_idx = query.dimension_index(sum_dim); + size_t max_idx = document.dimension_index(max_dim); + size_t query_dp_idx = query.dimension_index(dp_dim); + size_t document_dp_idx = document.dimension_index(dp_dim); + if ((sum_idx != npos) && (max_idx != npos) && (query_dp_idx != npos) && (document_dp_idx != npos)) { + if (query.dimensions()[sum_idx].is_mapped() && document.dimensions()[max_idx].is_mapped() && + query.dimensions()[query_dp_idx].is_indexed() && !query.dimensions()[query_dp_idx].is_trivial()) + { + assert(query.dimensions()[query_dp_idx].size == document.dimensions()[document_dp_idx].size); + return true; + } + } + } + return false; +} + +size_t get_dim_size(const ValueType &type, const vespalib::string &dim) { + size_t npos = ValueType::Dimension::npos; + size_t idx = type.dimension_index(dim); + assert(idx != npos); + assert(type.dimensions()[idx].is_indexed()); + assert(!type.dimensions()[idx].is_trivial()); + return type.dimensions()[idx].size; +} + +} // namespace <unnamed> + +SumMaxDotProductFunction::SumMaxDotProductFunction(const ValueType &res_type_in, + const TensorFunction &query, + const TensorFunction &document, + size_t dp_size) + : tensor_function::Op2(res_type_in, query, document), + _dp_size(dp_size) +{ +} + +InterpretedFunction::Instruction +SumMaxDotProductFunction::compile_self(const ValueBuilderFactory &, Stash &) const +{ + return InterpretedFunction::Instruction(my_sum_max_dot_product_op, _dp_size); +} + +const TensorFunction & +SumMaxDotProductFunction::optimize(const TensorFunction &expr, Stash &stash) +{ + if (auto sum_reduce = check_reduce(expr, Aggr::SUM)) { + if (auto max_reduce = check_reduce(sum_reduce->child(), Aggr::MAX)) { + if (auto dp_sum = check_reduce(max_reduce->child(), Aggr::SUM)) { + if (auto dp_mul = check_mul(dp_sum->child())) { + const auto &sum_dim = sum_reduce->dimensions()[0]; + const auto &max_dim = max_reduce->dimensions()[0]; + const auto &dp_dim = dp_sum->dimensions()[0]; + const TensorFunction &lhs = dp_mul->lhs(); + const TensorFunction &rhs = dp_mul->rhs(); + if (check_params(expr.result_type(), lhs.result_type(), rhs.result_type(), + sum_dim, max_dim, dp_dim)) + { + size_t dp_size = get_dim_size(lhs.result_type(), dp_dim); + return stash.create<SumMaxDotProductFunction>(expr.result_type(), lhs, rhs, dp_size); + } + if (check_params(expr.result_type(), rhs.result_type(), lhs.result_type(), + sum_dim, max_dim, dp_dim)) + { + size_t dp_size = get_dim_size(rhs.result_type(), dp_dim); + return stash.create<SumMaxDotProductFunction>(expr.result_type(), rhs, lhs, dp_size); + } + } + } + } + } + return expr; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.h b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.h new file mode 100644 index 00000000000..955f014ad47 --- /dev/null +++ b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.h @@ -0,0 +1,49 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::eval { + +/** + * Tensor function combining multiple dot products with multiple + * layers of aggregation, resulting in a single scalar result. + * + * inputs: + * query: tensor<float>(qt{},x[32]) + * document: tensor<float>(dt{},x[32]) + * + * expression: + * reduce( + * reduce( + * reduce(query * document, sum, x), + * max, dt + * ), + * sum, qt + * ) + * + * Both query and document contains a collection of vectors. For each + * query vector, take the dot product with all document vectors and + * select the maximum result. Sum these partial results into the final + * result value. + * + * Note that not all equivalent forms are matched by this function + * (initial matching will be very specific). + **/ +class SumMaxDotProductFunction : public tensor_function::Op2 +{ +private: + size_t _dp_size; +public: + SumMaxDotProductFunction(const ValueType &res_type_in, + const TensorFunction &query, + const TensorFunction &document, + size_t dp_size); + InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; + size_t dp_size() const { return _dp_size; } + bool result_is_mutable() const override { return true; } + static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); +}; + +} // namespace |