added sum_max_dot_product_function optimization

author: Håvard Pettersen <havardpe@oath.com> 2021-01-26 15:51:39 +0000
committer: Håvard Pettersen <havardpe@oath.com> 2021-01-26 17:21:07 +0000
commit: 8bafc67cc4c404cfa1453f67b409eb729608e6a8 (patch)
tree: c2a83dcb673df92ee5120256b0a6eda35583e838 /eval
parent: 11bd6fd448dbfd8b8575c0186db7df9062bb20a1 (diff)
7 files changed, 312 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 1d9fa7478a0..239cf8f0f23 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -64,6 +64,7 @@ vespa_define_module(
     src/tests/instruction/mixed_simple_join_function
     src/tests/instruction/pow_as_map_optimizer
     src/tests/instruction/remove_trivial_dimension_optimizer
+    src/tests/instruction/sum_max_dot_product_function
     src/tests/instruction/vector_from_doubles_function
     src/tests/streamed/value
     src/tests/tensor/instruction_benchmark
diff --git a/eval/src/tests/instruction/sum_max_dot_product_function/CMakeLists.txt b/eval/src/tests/instruction/sum_max_dot_product_function/CMakeLists.txt
new file mode 100644
index 00000000000..d9276ec3431
--- /dev/null
+++ b/eval/src/tests/instruction/sum_max_dot_product_function/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_sum_max_dot_product_function_test_app TEST
+    SOURCES
+    sum_max_dot_product_function_test.cpp
+    DEPENDS
+    vespaeval
+    GTest::GTest
+)
+vespa_add_test(NAME eval_sum_max_dot_product_function_test_app COMMAND eval_sum_max_dot_product_function_test_app)
diff --git a/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp b/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp
new file mode 100644
index 00000000000..4b89f30d879
--- /dev/null
+++ b/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp
@@ -0,0 +1,120 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/eval/instruction/sum_max_dot_product_function.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get();
+
+struct MyVecSeq : Sequence {
+    double bias;
+    double operator[](size_t i) const override { return (i + bias); }
+    MyVecSeq(double cellBias) : bias(cellBias) {}
+};
+
+//-----------------------------------------------------------------------------
+
+vespalib::string main_expr = "reduce(reduce(reduce(a*b,sum,z),max,y),sum,x)";
+
+void assert_optimized(const TensorSpec &a, const TensorSpec &b, size_t dp_size) {
+    EvalFixture::ParamRepo param_repo;
+    param_repo.add("a", a);
+    param_repo.add("b", b);
+    EvalFixture slow_fixture(prod_factory, main_expr, param_repo, false);
+    EvalFixture fast_fixture(prod_factory, main_expr, param_repo, true);
+    EXPECT_EQ(slow_fixture.result(), EvalFixture::ref(main_expr, param_repo));
+    EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(main_expr, param_repo));
+    auto info = fast_fixture.find_all<SumMaxDotProductFunction>();
+    ASSERT_EQ(info.size(), 1u);
+    EXPECT_TRUE(info[0]->result_is_mutable());
+    EXPECT_EQUAL(info[0]->dp_size(), dp_size);
+}
+
+void assert_not_optimized(const TensorSpec &a, const TensorSpec &b, const vespalib::string &expr = main_expr) {
+    EvalFixture::ParamRepo param_repo;
+    param_repo.add("a", a);
+    param_repo.add("b", b);
+    EvalFixture slow_fixture(prod_factory, expr, param_repo, false);
+    EvalFixture fast_fixture(prod_factory, expr, param_repo, true);
+    EXPECT_EQ(slow_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo));
+    auto info = fast_fixture.find_all<SumMaxDotProductFunction>();
+    ASSERT_EQ(info.size(), 0u);
+}
+
+//-----------------------------------------------------------------------------
+
+auto query = spec(float_cells({x({"0", "1", "2"}),z(5)}), MyVecSeq(0.5));
+auto document = spec(float_cells({y({"0", "1", "2", "3", "4", "5"}),z(5)}), MyVecSeq(2.5));
+auto empty_query = spec(float_cells({x({}),z(5)}), MyVecSeq(0.5));
+auto empty_document = spec(float_cells({y({}),z(5)}), MyVecSeq(2.5));
+
+TEST(SumMaxDotProduct, expressions_can_be_optimized)
+{
+    assert_optimized(query, document, 5);
+    assert_optimized(document, query, 5);
+    assert_optimized(empty_query, document, 5);
+    assert_optimized(query, empty_document, 5);
+    assert_optimized(empty_query, empty_document, 5);
+}
+
+TEST(SumMaxDotProduct, double_cells_are_not_optimized) {
+    auto double_query = spec({x({"0", "1", "2"}),z(5)}, MyVecSeq(0.5));
+    auto double_document = spec({y({"0", "1", "2", "3", "4", "5"}),z(5)}, MyVecSeq(2.5));
+    assert_not_optimized(query, double_document);
+    assert_not_optimized(double_query, document);
+    assert_not_optimized(double_query, double_document);
+}
+
+TEST(SumMaxDotProduct, trivial_dot_product_is_not_optimized) {
+    auto trivial_query = spec(float_cells({x({"0", "1", "2"}),z(1)}), MyVecSeq(0.5));
+    auto trivial_document = spec(float_cells({y({"0", "1", "2", "3", "4", "5"}),z(1)}), MyVecSeq(2.5));
+    assert_not_optimized(trivial_query, trivial_document);
+}
+
+TEST(SumMaxDotProduct, additional_dimensions_are_not_optimized) {
+    auto extra_sparse_query = spec(float_cells({Domain("a", {"0"}),x({"0", "1", "2"}),z(5)}), MyVecSeq(0.5));
+    auto extra_dense_query = spec(float_cells({Domain("a", 1),x({"0", "1", "2"}),z(5)}), MyVecSeq(0.5));
+    auto extra_sparse_document = spec(float_cells({Domain("a", {"0"}),y({"0", "1", "2", "3", "4", "5"}),z(5)}), MyVecSeq(2.5));
+    auto extra_dense_document = spec(float_cells({Domain("a", 1),y({"0", "1", "2", "3", "4", "5"}),z(5)}), MyVecSeq(2.5));
+    vespalib::string extra_sum_expr = "reduce(reduce(reduce(a*b,sum,z),max,y),sum,a,x)";
+    vespalib::string extra_max_expr = "reduce(reduce(reduce(a*b,sum,z),max,a,y),sum,x)";
+    assert_not_optimized(extra_sparse_query, document);
+    assert_not_optimized(extra_dense_query, document);
+    assert_not_optimized(query, extra_sparse_document);
+    assert_not_optimized(query, extra_dense_document);
+    assert_not_optimized(extra_sparse_query, document, extra_sum_expr);
+    assert_not_optimized(extra_dense_query, document, extra_sum_expr);
+    assert_not_optimized(query, extra_sparse_document, extra_max_expr);
+    assert_not_optimized(query, extra_dense_document, extra_max_expr);
+}
+
+TEST(SumMaxDotProduct, more_dense_variants_are_not_optimized) {
+    auto dense_query = spec(float_cells({x(3),z(5)}), MyVecSeq(0.5));
+    auto dense_document = spec(float_cells({y(5),z(5)}), MyVecSeq(2.5));
+    assert_not_optimized(dense_query, document);
+    assert_not_optimized(query, dense_document);
+    assert_not_optimized(dense_query, dense_document);
+}
+
+TEST(SumMaxDotProduct, similar_expressions_are_not_optimized) {
+    vespalib::string max_sum_expr = "reduce(reduce(reduce(a*b,sum,z),sum,y),max,x)";
+    vespalib::string not_dp_expr1 = "reduce(reduce(reduce(a+b,sum,z),max,y),sum,x)";
+    vespalib::string not_dp_expr2 = "reduce(reduce(reduce(a*b,min,z),max,y),sum,x)";
+    vespalib::string sum_all_expr = "reduce(reduce(reduce(a*b,sum,z),max,y),sum)";
+    assert_not_optimized(query, document, max_sum_expr);
+    assert_not_optimized(query, document, not_dp_expr1);
+    assert_not_optimized(query, document, not_dp_expr2);
+    assert_not_optimized(query, document, sum_all_expr);
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index ea63171b260..2e8c89f88fc 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -6,6 +6,7 @@
 
 #include <vespa/eval/instruction/dense_dot_product_function.h>
 #include <vespa/eval/instruction/mixed_inner_product_function.h>
+#include <vespa/eval/instruction/sum_max_dot_product_function.h>
 #include <vespa/eval/instruction/dense_xw_product_function.h>
 #include <vespa/eval/instruction/dense_matmul_function.h>
 #include <vespa/eval/instruction/dense_multi_matmul_function.h>
@@ -44,6 +45,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &factory, c
         }
         while (!nodes.empty()) {
             const Child &child = nodes.back().get();
+            child.set(SumMaxDotProductFunction::optimize(child.get(), stash));
             child.set(DenseDotProductFunction::optimize(child.get(), stash));
             child.set(DenseXWProductFunction::optimize(child.get(), stash));
             child.set(DenseMatMulFunction::optimize(child.get(), stash));
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index 113df255658..58d5290f5d9 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -32,5 +32,6 @@ vespa_add_library(eval_instruction OBJECT
     pow_as_map_optimizer.cpp
     remove_trivial_dimension_optimizer.cpp
     replace_type_function.cpp
+    sum_max_dot_product_function.cpp
     vector_from_doubles_function.cpp
 )
diff --git a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp
new file mode 100644
index 00000000000..4b541062007
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp
@@ -0,0 +1,130 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "sum_max_dot_product_function.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/value.h>
+#include <cblas.h>
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+using namespace operation;
+
+namespace {
+
+void my_sum_max_dot_product_op(InterpretedFunction::State &state, uint64_t dp_size) {
+    double result = 0.0;
+    auto query_cells = state.peek(1).cells().typify<float>();
+    auto document_cells = state.peek(0).cells().typify<float>();
+    if ((query_cells.size() > 0) && (document_cells.size() > 0)) {
+        for (const float *query = query_cells.begin(); query < query_cells.end(); query += dp_size) {
+            float max_dp = aggr::Max<float>::null_value();
+            for (const float *document = document_cells.begin(); document < document_cells.end(); document += dp_size) {
+                max_dp = aggr::Max<float>::combine(max_dp, cblas_sdot(dp_size, query, 1, document, 1));
+            }
+            result += max_dp;
+        }
+    }
+    state.pop_pop_push(state.stash.create<ScalarValue<double>>(result));
+}
+
+const Reduce *check_reduce(const TensorFunction &expr, Aggr aggr) {
+    if (auto reduce = as<Reduce>(expr)) {
+        if ((reduce->aggr() == aggr) && (reduce->dimensions().size() == 1)) {
+            return reduce;
+        }
+    }
+    return nullptr;
+}
+
+const Join *check_mul(const TensorFunction &expr) {
+    if (auto join = as<Join>(expr)) {
+        if (join->function() == Mul::f) {
+            return join;
+        }
+    }
+    return nullptr;
+}
+
+bool check_params(const ValueType &res_type, const ValueType &query, const ValueType &document,
+                  const vespalib::string &sum_dim, const vespalib::string &max_dim, const vespalib::string &dp_dim)
+{
+    if (res_type.is_scalar() && (res_type.cell_type() == CellType::DOUBLE) &&
+        (query.dimensions().size() == 2) && (query.cell_type() == CellType::FLOAT) &&
+        (document.dimensions().size() == 2) && (document.cell_type() == CellType::FLOAT))
+    {
+        size_t npos = ValueType::Dimension::npos;
+        size_t sum_idx = query.dimension_index(sum_dim);
+        size_t max_idx = document.dimension_index(max_dim);
+        size_t query_dp_idx = query.dimension_index(dp_dim);
+        size_t document_dp_idx = document.dimension_index(dp_dim);
+        if ((sum_idx != npos) && (max_idx != npos) && (query_dp_idx != npos) && (document_dp_idx != npos)) {
+            if (query.dimensions()[sum_idx].is_mapped() && document.dimensions()[max_idx].is_mapped() &&
+                query.dimensions()[query_dp_idx].is_indexed() && !query.dimensions()[query_dp_idx].is_trivial())
+            {
+                assert(query.dimensions()[query_dp_idx].size == document.dimensions()[document_dp_idx].size);
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+size_t get_dim_size(const ValueType &type, const vespalib::string &dim) {
+    size_t npos = ValueType::Dimension::npos;
+    size_t idx = type.dimension_index(dim);
+    assert(idx != npos);
+    assert(type.dimensions()[idx].is_indexed());
+    assert(!type.dimensions()[idx].is_trivial());
+    return type.dimensions()[idx].size;
+}
+
+} // namespace <unnamed>
+
+SumMaxDotProductFunction::SumMaxDotProductFunction(const ValueType &res_type_in,
+                                                   const TensorFunction &query,
+                                                   const TensorFunction &document,
+                                                   size_t dp_size)
+    : tensor_function::Op2(res_type_in, query, document),
+      _dp_size(dp_size)
+{
+}
+
+InterpretedFunction::Instruction
+SumMaxDotProductFunction::compile_self(const ValueBuilderFactory &, Stash &) const
+{
+    return InterpretedFunction::Instruction(my_sum_max_dot_product_op, _dp_size);
+}
+
+const TensorFunction &
+SumMaxDotProductFunction::optimize(const TensorFunction &expr, Stash &stash)
+{
+    if (auto sum_reduce = check_reduce(expr, Aggr::SUM)) {
+        if (auto max_reduce = check_reduce(sum_reduce->child(), Aggr::MAX)) {
+            if (auto dp_sum = check_reduce(max_reduce->child(), Aggr::SUM)) {
+                if (auto dp_mul = check_mul(dp_sum->child())) {
+                    const auto &sum_dim = sum_reduce->dimensions()[0];
+                    const auto &max_dim = max_reduce->dimensions()[0];
+                    const auto &dp_dim = dp_sum->dimensions()[0];
+                    const TensorFunction &lhs = dp_mul->lhs();
+                    const TensorFunction &rhs = dp_mul->rhs();
+                    if (check_params(expr.result_type(), lhs.result_type(), rhs.result_type(),
+                                     sum_dim, max_dim, dp_dim))
+                    {
+                        size_t dp_size = get_dim_size(lhs.result_type(), dp_dim);
+                        return stash.create<SumMaxDotProductFunction>(expr.result_type(), lhs, rhs, dp_size);
+                    }
+                    if (check_params(expr.result_type(), rhs.result_type(), lhs.result_type(),
+                                     sum_dim, max_dim, dp_dim))
+                    {
+                        size_t dp_size = get_dim_size(rhs.result_type(), dp_dim);
+                        return stash.create<SumMaxDotProductFunction>(expr.result_type(), rhs, lhs, dp_size);
+                    }
+                }
+            }
+        }
+    }
+    return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.h b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.h
new file mode 100644
index 00000000000..955f014ad47
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.h
@@ -0,0 +1,49 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function combining multiple dot products with multiple
+ * layers of aggregation, resulting in a single scalar result.
+ * 
+ * inputs:
+ *   query:    tensor<float>(qt{},x[32])
+ *   document: tensor<float>(dt{},x[32])
+ *
+ * expression:
+ *   reduce(
+ *     reduce(
+ *       reduce(query * document, sum, x),
+ *       max, dt
+ *     ),
+ *     sum, qt
+ *   )
+ *
+ * Both query and document contains a collection of vectors. For each
+ * query vector, take the dot product with all document vectors and
+ * select the maximum result. Sum these partial results into the final
+ * result value.
+ *
+ * Note that not all equivalent forms are matched by this function
+ * (initial matching will be very specific).
+ **/
+class SumMaxDotProductFunction : public tensor_function::Op2
+{
+private:
+    size_t _dp_size;
+public:
+    SumMaxDotProductFunction(const ValueType &res_type_in,
+                             const TensorFunction &query,
+                             const TensorFunction &document,
+                             size_t dp_size);
+    InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+    size_t dp_size() const { return _dp_size; }
+    bool result_is_mutable() const override { return true; }
+    static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace
author	Håvard Pettersen <havardpe@oath.com>	2021-01-26 15:51:39 +0000
committer	Håvard Pettersen <havardpe@oath.com>	2021-01-26 17:21:07 +0000
commit	8bafc67cc4c404cfa1453f67b409eb729608e6a8 (patch)
tree	c2a83dcb673df92ee5120256b0a6eda35583e838 /eval
parent	11bd6fd448dbfd8b8575c0186db7df9062bb20a1 (diff)