sparse dot product

author: Håvard Pettersen <havardpe@oath.com> 2021-02-01 15:13:22 +0000
committer: Håvard Pettersen <havardpe@oath.com> 2021-02-01 18:38:36 +0000
commit: 0261658338a6f7ad28bfca6f16f8a4b7c35d9cae (patch)
tree: 14f4494de17b6a64fc2f916fafcf66c1e723ec93 /eval
parent: fe6300b1e9b81c09aa0235b5049439198c6a2206 (diff)
10 files changed, 249 insertions, 11 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index fe621f6e9f0..23127cc12b5 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -65,6 +65,7 @@ vespa_define_module(
     src/tests/instruction/mixed_simple_join_function
     src/tests/instruction/pow_as_map_optimizer
     src/tests/instruction/remove_trivial_dimension_optimizer
+    src/tests/instruction/sparse_dot_product_function
     src/tests/instruction/sum_max_dot_product_function
     src/tests/instruction/vector_from_doubles_function
     src/tests/streamed/value
diff --git a/eval/src/tests/instruction/sparse_dot_product_function/CMakeLists.txt b/eval/src/tests/instruction/sparse_dot_product_function/CMakeLists.txt
new file mode 100644
index 00000000000..076f1d79796
--- /dev/null
+++ b/eval/src/tests/instruction/sparse_dot_product_function/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_sparse_dot_product_function_test_app TEST
+    SOURCES
+    sparse_dot_product_function_test.cpp
+    DEPENDS
+    vespaeval
+    GTest::GTest
+)
+vespa_add_test(NAME eval_sparse_dot_product_function_test_app COMMAND eval_sparse_dot_product_function_test_app)
diff --git a/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp b/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp
new file mode 100644
index 00000000000..65eab2778aa
--- /dev/null
+++ b/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp
@@ -0,0 +1,85 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/instruction/sparse_dot_product_function.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/eval/eval/test/gen_spec.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get();
+const ValueBuilderFactory &test_factory = SimpleValueBuilderFactory::get();
+
+//-----------------------------------------------------------------------------
+
+EvalFixture::ParamRepo make_params() {
+    return EvalFixture::ParamRepo()
+        .add("v1_x",   GenSpec().map("x", 32, 1).seq_bias(3.0).gen())
+        .add("v1_x_f", GenSpec().map("x", 32, 1).seq_bias(3.0).cells_float().gen())
+        .add("v2_x",   GenSpec().map("x", 16, 2).seq_bias(7.0).gen())
+        .add("v2_x_f", GenSpec().map("x", 16, 2).seq_bias(7.0).cells_float().gen())
+        .add("v3_y",   GenSpec().map("y", 10, 1).gen())
+        .add("v4_xd",  GenSpec().idx("x", 10).gen())
+        .add("m1_xy",  GenSpec().map("x", 32, 1).map("y", 16, 2).seq_bias(3.0).gen())
+        .add("m2_xy",  GenSpec().map("x", 16, 2).map("y", 32, 1).seq_bias(7.0).gen())
+        .add("m3_xym", GenSpec().map("x", 8, 1).idx("y", 5).gen());
+}
+EvalFixture::ParamRepo param_repo = make_params();
+
+void assert_optimized(const vespalib::string &expr) {
+    EvalFixture fast_fixture(prod_factory, expr, param_repo, true);
+    EvalFixture test_fixture(test_factory, expr, param_repo, true);
+    EvalFixture slow_fixture(prod_factory, expr, param_repo, false);
+    EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(test_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(slow_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(fast_fixture.find_all<SparseDotProductFunction>().size(), 1u);
+    EXPECT_EQ(test_fixture.find_all<SparseDotProductFunction>().size(), 1u);
+    EXPECT_EQ(slow_fixture.find_all<SparseDotProductFunction>().size(), 0u);
+}
+
+void assert_not_optimized(const vespalib::string &expr) {
+    EvalFixture fast_fixture(prod_factory, expr, param_repo, true);
+    EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(fast_fixture.find_all<SparseDotProductFunction>().size(), 0u);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(SparseDotProduct, expression_can_be_optimized)
+{
+    assert_optimized("reduce(v1_x*v2_x,sum,x)");
+    assert_optimized("reduce(v2_x*v1_x,sum)");
+    assert_optimized("reduce(v1_x*v2_x_f,sum)");
+    assert_optimized("reduce(v1_x_f*v2_x,sum)");
+    assert_optimized("reduce(v1_x_f*v2_x_f,sum)");
+}
+
+TEST(SparseDotProduct, multi_dimensional_expression_can_be_optimized)
+{
+    assert_optimized("reduce(m1_xy*m2_xy,sum,x,y)");
+    assert_optimized("reduce(m1_xy*m2_xy,sum)");
+}
+
+TEST(SparseDotProduct, embedded_dot_product_is_not_optimized)
+{
+    assert_not_optimized("reduce(m1_xy*v1_x,sum,x)");
+    assert_not_optimized("reduce(v1_x*m1_xy,sum,x)");
+}
+
+TEST(SparseDotProduct, similar_expressions_are_not_optimized)
+{
+    assert_not_optimized("reduce(m1_xy*v1_x,sum)");
+    assert_not_optimized("reduce(v1_x*v3_y,sum)");    
+    assert_not_optimized("reduce(v2_x*v1_x,max)");
+    assert_not_optimized("reduce(v2_x+v1_x,sum)");
+    assert_not_optimized("reduce(v4_xd*v4_xd,sum)");
+    assert_not_optimized("reduce(m3_xym*m3_xym,sum)");
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index 2e8c89f88fc..25612b8d5fd 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -5,6 +5,7 @@
 #include "simple_value.h"
 
 #include <vespa/eval/instruction/dense_dot_product_function.h>
+#include <vespa/eval/instruction/sparse_dot_product_function.h>
 #include <vespa/eval/instruction/mixed_inner_product_function.h>
 #include <vespa/eval/instruction/sum_max_dot_product_function.h>
 #include <vespa/eval/instruction/dense_xw_product_function.h>
@@ -31,11 +32,7 @@ namespace vespalib::eval {
 
 namespace {
 
-const TensorFunction &optimize_for_factory(const ValueBuilderFactory &factory, const TensorFunction &expr, Stash &stash) {
-    if (&factory == &SimpleValueBuilderFactory::get()) {
-        // never optimize simple value evaluation
-        return expr;
-    }
+const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const TensorFunction &expr, Stash &stash) {
     using Child = TensorFunction::Child;
     Child root(expr);
     {
@@ -47,6 +44,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &factory, c
             const Child &child = nodes.back().get();
             child.set(SumMaxDotProductFunction::optimize(child.get(), stash));
             child.set(DenseDotProductFunction::optimize(child.get(), stash));
+            child.set(SparseDotProductFunction::optimize(child.get(), stash));
             child.set(DenseXWProductFunction::optimize(child.get(), stash));
             child.set(DenseMatMulFunction::optimize(child.get(), stash));
             child.set(DenseMultiMatMulFunction::optimize(child.get(), stash));
diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.cpp b/eval/src/vespa/eval/eval/test/eval_fixture.cpp
index 58d8905baf3..9b1789fbeea 100644
--- a/eval/src/vespa/eval/eval/test/eval_fixture.cpp
+++ b/eval/src/vespa/eval/eval/test/eval_fixture.cpp
@@ -28,7 +28,10 @@ NodeTypes get_types(const Function &function, const ParamRepo &param_repo) {
     std::vector<ValueType> param_types;
     for (size_t i = 0; i < function.num_params(); ++i) {
         auto pos = param_repo.map.find(function.param_name(i));
-        ASSERT_TRUE(pos != param_repo.map.end());
+        if (pos == param_repo.map.end()) {
+            TEST_STATE(fmt("param name: '%s'", function.param_name(i).data()).c_str());
+            ASSERT_TRUE(pos != param_repo.map.end());
+        }
         param_types.push_back(ValueType::from_spec(pos->second.value.type()));
         ASSERT_TRUE(!param_types.back().is_error());
     }
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index 58d5290f5d9..cac69d23640 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -32,6 +32,7 @@ vespa_add_library(eval_instruction OBJECT
     pow_as_map_optimizer.cpp
     remove_trivial_dimension_optimizer.cpp
     replace_type_function.cpp
+    sparse_dot_product_function.cpp
     sum_max_dot_product_function.cpp
     vector_from_doubles_function.cpp
 )
diff --git a/eval/src/vespa/eval/instruction/generic_join.cpp b/eval/src/vespa/eval/instruction/generic_join.cpp
index abe29b8228c..6d6f86b7c4d 100644
--- a/eval/src/vespa/eval/instruction/generic_join.cpp
+++ b/eval/src/vespa/eval/instruction/generic_join.cpp
@@ -308,6 +308,17 @@ SparseJoinPlan::SparseJoinPlan(const ValueType &lhs_type, const ValueType &rhs_t
                  [](const auto &a, const auto &b){ return (a.name < b.name); });
 }
 
+SparseJoinPlan::SparseJoinPlan(size_t num_mapped_dims)
+    : sources(num_mapped_dims, Source::BOTH), lhs_overlap(), rhs_overlap()
+{
+    lhs_overlap.reserve(num_mapped_dims);
+    rhs_overlap.reserve(num_mapped_dims);
+    for (size_t i = 0; i < num_mapped_dims; ++i) {
+        lhs_overlap.push_back(i);
+        rhs_overlap.push_back(i);
+    }
+}
+
 bool
 SparseJoinPlan::should_forward_lhs_index() const
 {
diff --git a/eval/src/vespa/eval/instruction/generic_join.h b/eval/src/vespa/eval/instruction/generic_join.h
index 1fcfcf416cc..026a2938971 100644
--- a/eval/src/vespa/eval/instruction/generic_join.h
+++ b/eval/src/vespa/eval/instruction/generic_join.h
@@ -58,6 +58,7 @@ struct SparseJoinPlan {
     bool should_forward_lhs_index() const;
     bool should_forward_rhs_index() const;
     SparseJoinPlan(const ValueType &lhs_type, const ValueType &rhs_type);
+    SparseJoinPlan(size_t num_mapped_dims); // full overlap plan
     ~SparseJoinPlan();
 };
 
@@ -70,15 +71,14 @@ struct SparseJoinState {
     const Value::Index                     &first_index;
     const Value::Index                     &second_index;
     const std::vector<size_t>              &second_view_dims;
-    std::vector<string_id>                    full_address;
-    std::vector<string_id*>                   first_address;
-    std::vector<const string_id*>             address_overlap;
-    std::vector<string_id*>                   second_only_address;
+    std::vector<string_id>                  full_address;
+    std::vector<string_id*>                 first_address;
+    std::vector<const string_id*>           address_overlap;
+    std::vector<string_id*>                 second_only_address;
     size_t                                  lhs_subspace;
     size_t                                  rhs_subspace;
     size_t                                 &first_subspace;
     size_t                                 &second_subspace;
-
     SparseJoinState(const SparseJoinPlan &plan, const Value::Index &lhs, const Value::Index &rhs);
     ~SparseJoinState();
 };
diff --git a/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp b/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp
new file mode 100644
index 00000000000..1c9e552521d
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp
@@ -0,0 +1,107 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "sparse_dot_product_function.h"
+#include "generic_join.h"
+#include "detect_type.h"
+#include <vespa/eval/eval/fast_value.hpp>
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+using namespace operation;
+using namespace instruction;
+
+namespace {
+
+template <typename SCT, typename BCT>
+double my_fast_sparse_dot_product(const FastValueIndex &small_idx, const FastValueIndex &big_idx,
+                                  const SCT *small_cells, const BCT *big_cells)
+{
+    double result = 0.0;
+    small_idx.map.each_map_entry([&](auto small_subspace, auto hash) {
+                auto small_addr = small_idx.map.get_addr(small_subspace);
+                auto big_subspace = big_idx.map.lookup(small_addr, hash);
+                if (big_subspace != FastAddrMap::npos()) {
+                    result += (small_cells[small_subspace] * big_cells[big_subspace]);
+                }
+            });
+    return result;
+}
+
+template <typename LCT, typename RCT>
+void my_sparse_dot_product_op(InterpretedFunction::State &state, uint64_t num_mapped_dims) {
+    const auto &lhs_idx = state.peek(1).index();
+    const auto &rhs_idx = state.peek(0).index();
+    const LCT *lhs_cells = state.peek(1).cells().typify<LCT>().cbegin();
+    const RCT *rhs_cells = state.peek(0).cells().typify<RCT>().cbegin();
+    if (auto indexes = detect_type<FastValueIndex>(lhs_idx, rhs_idx)) {
+        [[likely]];
+        const auto &lhs_fast = indexes.get<0>();
+        const auto &rhs_fast = indexes.get<1>();
+        double result = (rhs_fast.map.size() < lhs_fast.map.size())
+                        ? my_fast_sparse_dot_product(rhs_fast, lhs_fast, rhs_cells, lhs_cells)
+                        : my_fast_sparse_dot_product(lhs_fast, rhs_fast, lhs_cells, rhs_cells);
+        state.pop_pop_push(state.stash.create<ScalarValue<double>>(result));
+    } else {
+        [[unlikely]];
+        double result = 0.0;
+        SparseJoinPlan plan(num_mapped_dims);
+        SparseJoinState sparse(plan, lhs_idx, rhs_idx);
+        auto outer = sparse.first_index.create_view({});
+        auto inner = sparse.second_index.create_view(sparse.second_view_dims);
+        outer->lookup({});
+        while (outer->next_result(sparse.first_address, sparse.first_subspace)) {
+            inner->lookup(sparse.address_overlap);
+            if (inner->next_result(sparse.second_only_address, sparse.second_subspace)) {
+                result += (lhs_cells[sparse.lhs_subspace] * rhs_cells[sparse.rhs_subspace]);
+            }
+        }
+        state.pop_pop_push(state.stash.create<ScalarValue<double>>(result));
+    }
+}
+
+struct MyGetFun {
+    template <typename LCT, typename RCT>
+    static auto invoke() { return my_sparse_dot_product_op<LCT,RCT>; }
+};
+
+} // namespace <unnamed>
+
+SparseDotProductFunction::SparseDotProductFunction(const TensorFunction &lhs_in,
+                                                   const TensorFunction &rhs_in)
+    : tensor_function::Op2(ValueType::make_type(CellType::DOUBLE, {}), lhs_in, rhs_in)
+{
+}
+
+InterpretedFunction::Instruction
+SparseDotProductFunction::compile_self(const ValueBuilderFactory &, Stash &) const
+{
+    auto op = typify_invoke<2,TypifyCellType,MyGetFun>(lhs().result_type().cell_type(), rhs().result_type().cell_type());
+    return InterpretedFunction::Instruction(op, lhs().result_type().count_mapped_dimensions());
+}
+
+bool
+SparseDotProductFunction::compatible_types(const ValueType &res, const ValueType &lhs, const ValueType &rhs)
+{
+    return (res.is_scalar() && (res.cell_type() == CellType::DOUBLE) &&
+            lhs.is_sparse() && (rhs.dimensions() == lhs.dimensions()));
+}
+
+const TensorFunction &
+SparseDotProductFunction::optimize(const TensorFunction &expr, Stash &stash)
+{
+    auto reduce = as<Reduce>(expr);
+    if (reduce && (reduce->aggr() == Aggr::SUM)) {
+        auto join = as<Join>(reduce->child());
+        if (join && (join->function() == Mul::f)) {
+            const TensorFunction &lhs = join->lhs();
+            const TensorFunction &rhs = join->rhs();
+            if (compatible_types(expr.result_type(), lhs.result_type(), rhs.result_type())) {
+                return stash.create<SparseDotProductFunction>(lhs, rhs);
+            }
+        }
+    }
+    return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/sparse_dot_product_function.h b/eval/src/vespa/eval/instruction/sparse_dot_product_function.h
new file mode 100644
index 00000000000..ccc7a61f5e8
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/sparse_dot_product_function.h
@@ -0,0 +1,23 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function for a dot product between two sparse tensors.
+ */
+class SparseDotProductFunction : public tensor_function::Op2
+{
+public:
+    SparseDotProductFunction(const TensorFunction &lhs_in,
+                             const TensorFunction &rhs_in);
+    InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+    bool result_is_mutable() const override { return true; }
+    static bool compatible_types(const ValueType &res, const ValueType &lhs, const ValueType &rhs);
+    static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace
author	Håvard Pettersen <havardpe@oath.com>	2021-02-01 15:13:22 +0000
committer	Håvard Pettersen <havardpe@oath.com>	2021-02-01 18:38:36 +0000
commit	0261658338a6f7ad28bfca6f16f8a4b7c35d9cae (patch)
tree	14f4494de17b6a64fc2f916fafcf66c1e723ec93 /eval
parent	fe6300b1e9b81c09aa0235b5049439198c6a2206 (diff)