summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2021-01-26 15:51:39 +0000
committerHåvard Pettersen <havardpe@oath.com>2021-01-26 17:21:07 +0000
commit8bafc67cc4c404cfa1453f67b409eb729608e6a8 (patch)
treec2a83dcb673df92ee5120256b0a6eda35583e838 /eval
parent11bd6fd448dbfd8b8575c0186db7df9062bb20a1 (diff)
added sum_max_dot_product_function optimization
Diffstat (limited to 'eval')
-rw-r--r--eval/CMakeLists.txt1
-rw-r--r--eval/src/tests/instruction/sum_max_dot_product_function/CMakeLists.txt9
-rw-r--r--eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp120
-rw-r--r--eval/src/vespa/eval/eval/optimize_tensor_function.cpp2
-rw-r--r--eval/src/vespa/eval/instruction/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp130
-rw-r--r--eval/src/vespa/eval/instruction/sum_max_dot_product_function.h49
7 files changed, 312 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 1d9fa7478a0..239cf8f0f23 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -64,6 +64,7 @@ vespa_define_module(
src/tests/instruction/mixed_simple_join_function
src/tests/instruction/pow_as_map_optimizer
src/tests/instruction/remove_trivial_dimension_optimizer
+ src/tests/instruction/sum_max_dot_product_function
src/tests/instruction/vector_from_doubles_function
src/tests/streamed/value
src/tests/tensor/instruction_benchmark
diff --git a/eval/src/tests/instruction/sum_max_dot_product_function/CMakeLists.txt b/eval/src/tests/instruction/sum_max_dot_product_function/CMakeLists.txt
new file mode 100644
index 00000000000..d9276ec3431
--- /dev/null
+++ b/eval/src/tests/instruction/sum_max_dot_product_function/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_sum_max_dot_product_function_test_app TEST
+ SOURCES
+ sum_max_dot_product_function_test.cpp
+ DEPENDS
+ vespaeval
+ GTest::GTest
+)
+vespa_add_test(NAME eval_sum_max_dot_product_function_test_app COMMAND eval_sum_max_dot_product_function_test_app)
diff --git a/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp b/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp
new file mode 100644
index 00000000000..4b89f30d879
--- /dev/null
+++ b/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp
@@ -0,0 +1,120 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/eval/instruction/sum_max_dot_product_function.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get();
+
+struct MyVecSeq : Sequence {
+ double bias;
+ double operator[](size_t i) const override { return (i + bias); }
+ MyVecSeq(double cellBias) : bias(cellBias) {}
+};
+
+//-----------------------------------------------------------------------------
+
+vespalib::string main_expr = "reduce(reduce(reduce(a*b,sum,z),max,y),sum,x)";
+
+void assert_optimized(const TensorSpec &a, const TensorSpec &b, size_t dp_size) {
+ EvalFixture::ParamRepo param_repo;
+ param_repo.add("a", a);
+ param_repo.add("b", b);
+ EvalFixture slow_fixture(prod_factory, main_expr, param_repo, false);
+ EvalFixture fast_fixture(prod_factory, main_expr, param_repo, true);
+ EXPECT_EQ(slow_fixture.result(), EvalFixture::ref(main_expr, param_repo));
+ EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(main_expr, param_repo));
+ auto info = fast_fixture.find_all<SumMaxDotProductFunction>();
+ ASSERT_EQ(info.size(), 1u);
+ EXPECT_TRUE(info[0]->result_is_mutable());
+ EXPECT_EQUAL(info[0]->dp_size(), dp_size);
+}
+
+void assert_not_optimized(const TensorSpec &a, const TensorSpec &b, const vespalib::string &expr = main_expr) {
+ EvalFixture::ParamRepo param_repo;
+ param_repo.add("a", a);
+ param_repo.add("b", b);
+ EvalFixture slow_fixture(prod_factory, expr, param_repo, false);
+ EvalFixture fast_fixture(prod_factory, expr, param_repo, true);
+ EXPECT_EQ(slow_fixture.result(), EvalFixture::ref(expr, param_repo));
+ EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo));
+ auto info = fast_fixture.find_all<SumMaxDotProductFunction>();
+ ASSERT_EQ(info.size(), 0u);
+}
+
+//-----------------------------------------------------------------------------
+
+auto query = spec(float_cells({x({"0", "1", "2"}),z(5)}), MyVecSeq(0.5));
+auto document = spec(float_cells({y({"0", "1", "2", "3", "4", "5"}),z(5)}), MyVecSeq(2.5));
+auto empty_query = spec(float_cells({x({}),z(5)}), MyVecSeq(0.5));
+auto empty_document = spec(float_cells({y({}),z(5)}), MyVecSeq(2.5));
+
+TEST(SumMaxDotProduct, expressions_can_be_optimized)
+{
+ assert_optimized(query, document, 5);
+ assert_optimized(document, query, 5);
+ assert_optimized(empty_query, document, 5);
+ assert_optimized(query, empty_document, 5);
+ assert_optimized(empty_query, empty_document, 5);
+}
+
+TEST(SumMaxDotProduct, double_cells_are_not_optimized) {
+ auto double_query = spec({x({"0", "1", "2"}),z(5)}, MyVecSeq(0.5));
+ auto double_document = spec({y({"0", "1", "2", "3", "4", "5"}),z(5)}, MyVecSeq(2.5));
+ assert_not_optimized(query, double_document);
+ assert_not_optimized(double_query, document);
+ assert_not_optimized(double_query, double_document);
+}
+
+TEST(SumMaxDotProduct, trivial_dot_product_is_not_optimized) {
+ auto trivial_query = spec(float_cells({x({"0", "1", "2"}),z(1)}), MyVecSeq(0.5));
+ auto trivial_document = spec(float_cells({y({"0", "1", "2", "3", "4", "5"}),z(1)}), MyVecSeq(2.5));
+ assert_not_optimized(trivial_query, trivial_document);
+}
+
+TEST(SumMaxDotProduct, additional_dimensions_are_not_optimized) {
+ auto extra_sparse_query = spec(float_cells({Domain("a", {"0"}),x({"0", "1", "2"}),z(5)}), MyVecSeq(0.5));
+ auto extra_dense_query = spec(float_cells({Domain("a", 1),x({"0", "1", "2"}),z(5)}), MyVecSeq(0.5));
+ auto extra_sparse_document = spec(float_cells({Domain("a", {"0"}),y({"0", "1", "2", "3", "4", "5"}),z(5)}), MyVecSeq(2.5));
+ auto extra_dense_document = spec(float_cells({Domain("a", 1),y({"0", "1", "2", "3", "4", "5"}),z(5)}), MyVecSeq(2.5));
+ vespalib::string extra_sum_expr = "reduce(reduce(reduce(a*b,sum,z),max,y),sum,a,x)";
+ vespalib::string extra_max_expr = "reduce(reduce(reduce(a*b,sum,z),max,a,y),sum,x)";
+ assert_not_optimized(extra_sparse_query, document);
+ assert_not_optimized(extra_dense_query, document);
+ assert_not_optimized(query, extra_sparse_document);
+ assert_not_optimized(query, extra_dense_document);
+ assert_not_optimized(extra_sparse_query, document, extra_sum_expr);
+ assert_not_optimized(extra_dense_query, document, extra_sum_expr);
+ assert_not_optimized(query, extra_sparse_document, extra_max_expr);
+ assert_not_optimized(query, extra_dense_document, extra_max_expr);
+}
+
+TEST(SumMaxDotProduct, more_dense_variants_are_not_optimized) {
+ auto dense_query = spec(float_cells({x(3),z(5)}), MyVecSeq(0.5));
+ auto dense_document = spec(float_cells({y(5),z(5)}), MyVecSeq(2.5));
+ assert_not_optimized(dense_query, document);
+ assert_not_optimized(query, dense_document);
+ assert_not_optimized(dense_query, dense_document);
+}
+
+TEST(SumMaxDotProduct, similar_expressions_are_not_optimized) {
+ vespalib::string max_sum_expr = "reduce(reduce(reduce(a*b,sum,z),sum,y),max,x)";
+ vespalib::string not_dp_expr1 = "reduce(reduce(reduce(a+b,sum,z),max,y),sum,x)";
+ vespalib::string not_dp_expr2 = "reduce(reduce(reduce(a*b,min,z),max,y),sum,x)";
+ vespalib::string sum_all_expr = "reduce(reduce(reduce(a*b,sum,z),max,y),sum)";
+ assert_not_optimized(query, document, max_sum_expr);
+ assert_not_optimized(query, document, not_dp_expr1);
+ assert_not_optimized(query, document, not_dp_expr2);
+ assert_not_optimized(query, document, sum_all_expr);
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index ea63171b260..2e8c89f88fc 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -6,6 +6,7 @@
#include <vespa/eval/instruction/dense_dot_product_function.h>
#include <vespa/eval/instruction/mixed_inner_product_function.h>
+#include <vespa/eval/instruction/sum_max_dot_product_function.h>
#include <vespa/eval/instruction/dense_xw_product_function.h>
#include <vespa/eval/instruction/dense_matmul_function.h>
#include <vespa/eval/instruction/dense_multi_matmul_function.h>
@@ -44,6 +45,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &factory, c
}
while (!nodes.empty()) {
const Child &child = nodes.back().get();
+ child.set(SumMaxDotProductFunction::optimize(child.get(), stash));
child.set(DenseDotProductFunction::optimize(child.get(), stash));
child.set(DenseXWProductFunction::optimize(child.get(), stash));
child.set(DenseMatMulFunction::optimize(child.get(), stash));
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index 113df255658..58d5290f5d9 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -32,5 +32,6 @@ vespa_add_library(eval_instruction OBJECT
pow_as_map_optimizer.cpp
remove_trivial_dimension_optimizer.cpp
replace_type_function.cpp
+ sum_max_dot_product_function.cpp
vector_from_doubles_function.cpp
)
diff --git a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp
new file mode 100644
index 00000000000..4b541062007
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp
@@ -0,0 +1,130 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "sum_max_dot_product_function.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/value.h>
+#include <cblas.h>
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+using namespace operation;
+
+namespace {
+
+void my_sum_max_dot_product_op(InterpretedFunction::State &state, uint64_t dp_size) {
+ double result = 0.0;
+ auto query_cells = state.peek(1).cells().typify<float>();
+ auto document_cells = state.peek(0).cells().typify<float>();
+ if ((query_cells.size() > 0) && (document_cells.size() > 0)) {
+ for (const float *query = query_cells.begin(); query < query_cells.end(); query += dp_size) {
+ float max_dp = aggr::Max<float>::null_value();
+ for (const float *document = document_cells.begin(); document < document_cells.end(); document += dp_size) {
+ max_dp = aggr::Max<float>::combine(max_dp, cblas_sdot(dp_size, query, 1, document, 1));
+ }
+ result += max_dp;
+ }
+ }
+ state.pop_pop_push(state.stash.create<ScalarValue<double>>(result));
+}
+
+const Reduce *check_reduce(const TensorFunction &expr, Aggr aggr) {
+ if (auto reduce = as<Reduce>(expr)) {
+ if ((reduce->aggr() == aggr) && (reduce->dimensions().size() == 1)) {
+ return reduce;
+ }
+ }
+ return nullptr;
+}
+
+const Join *check_mul(const TensorFunction &expr) {
+ if (auto join = as<Join>(expr)) {
+ if (join->function() == Mul::f) {
+ return join;
+ }
+ }
+ return nullptr;
+}
+
+bool check_params(const ValueType &res_type, const ValueType &query, const ValueType &document,
+ const vespalib::string &sum_dim, const vespalib::string &max_dim, const vespalib::string &dp_dim)
+{
+ if (res_type.is_scalar() && (res_type.cell_type() == CellType::DOUBLE) &&
+ (query.dimensions().size() == 2) && (query.cell_type() == CellType::FLOAT) &&
+ (document.dimensions().size() == 2) && (document.cell_type() == CellType::FLOAT))
+ {
+ size_t npos = ValueType::Dimension::npos;
+ size_t sum_idx = query.dimension_index(sum_dim);
+ size_t max_idx = document.dimension_index(max_dim);
+ size_t query_dp_idx = query.dimension_index(dp_dim);
+ size_t document_dp_idx = document.dimension_index(dp_dim);
+ if ((sum_idx != npos) && (max_idx != npos) && (query_dp_idx != npos) && (document_dp_idx != npos)) {
+ if (query.dimensions()[sum_idx].is_mapped() && document.dimensions()[max_idx].is_mapped() &&
+ query.dimensions()[query_dp_idx].is_indexed() && !query.dimensions()[query_dp_idx].is_trivial())
+ {
+ assert(query.dimensions()[query_dp_idx].size == document.dimensions()[document_dp_idx].size);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+size_t get_dim_size(const ValueType &type, const vespalib::string &dim) {
+ size_t npos = ValueType::Dimension::npos;
+ size_t idx = type.dimension_index(dim);
+ assert(idx != npos);
+ assert(type.dimensions()[idx].is_indexed());
+ assert(!type.dimensions()[idx].is_trivial());
+ return type.dimensions()[idx].size;
+}
+
+} // namespace <unnamed>
+
+SumMaxDotProductFunction::SumMaxDotProductFunction(const ValueType &res_type_in,
+ const TensorFunction &query,
+ const TensorFunction &document,
+ size_t dp_size)
+ : tensor_function::Op2(res_type_in, query, document),
+ _dp_size(dp_size)
+{
+}
+
+InterpretedFunction::Instruction
+SumMaxDotProductFunction::compile_self(const ValueBuilderFactory &, Stash &) const
+{
+ return InterpretedFunction::Instruction(my_sum_max_dot_product_op, _dp_size);
+}
+
+const TensorFunction &
+SumMaxDotProductFunction::optimize(const TensorFunction &expr, Stash &stash)
+{
+ if (auto sum_reduce = check_reduce(expr, Aggr::SUM)) {
+ if (auto max_reduce = check_reduce(sum_reduce->child(), Aggr::MAX)) {
+ if (auto dp_sum = check_reduce(max_reduce->child(), Aggr::SUM)) {
+ if (auto dp_mul = check_mul(dp_sum->child())) {
+ const auto &sum_dim = sum_reduce->dimensions()[0];
+ const auto &max_dim = max_reduce->dimensions()[0];
+ const auto &dp_dim = dp_sum->dimensions()[0];
+ const TensorFunction &lhs = dp_mul->lhs();
+ const TensorFunction &rhs = dp_mul->rhs();
+ if (check_params(expr.result_type(), lhs.result_type(), rhs.result_type(),
+ sum_dim, max_dim, dp_dim))
+ {
+ size_t dp_size = get_dim_size(lhs.result_type(), dp_dim);
+ return stash.create<SumMaxDotProductFunction>(expr.result_type(), lhs, rhs, dp_size);
+ }
+ if (check_params(expr.result_type(), rhs.result_type(), lhs.result_type(),
+ sum_dim, max_dim, dp_dim))
+ {
+ size_t dp_size = get_dim_size(rhs.result_type(), dp_dim);
+ return stash.create<SumMaxDotProductFunction>(expr.result_type(), rhs, lhs, dp_size);
+ }
+ }
+ }
+ }
+ }
+ return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.h b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.h
new file mode 100644
index 00000000000..955f014ad47
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.h
@@ -0,0 +1,49 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function combining multiple dot products with multiple
+ * layers of aggregation, resulting in a single scalar result.
+ *
+ * inputs:
+ * query: tensor<float>(qt{},x[32])
+ * document: tensor<float>(dt{},x[32])
+ *
+ * expression:
+ * reduce(
+ * reduce(
+ * reduce(query * document, sum, x),
+ * max, dt
+ * ),
+ * sum, qt
+ * )
+ *
+ * Both query and document contains a collection of vectors. For each
+ * query vector, take the dot product with all document vectors and
+ * select the maximum result. Sum these partial results into the final
+ * result value.
+ *
+ * Note that not all equivalent forms are matched by this function
+ * (initial matching will be very specific).
+ **/
+class SumMaxDotProductFunction : public tensor_function::Op2
+{
+private:
+ size_t _dp_size;
+public:
+ SumMaxDotProductFunction(const ValueType &res_type_in,
+ const TensorFunction &query,
+ const TensorFunction &document,
+ size_t dp_size);
+ InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+ size_t dp_size() const { return _dp_size; }
+ bool result_is_mutable() const override { return true; }
+ static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace