optimize singledim sparse lookup

author: Håvard Pettersen <havardpe@yahooinc.com> 2022-06-17 08:37:47 +0000
committer: Håvard Pettersen <havardpe@yahooinc.com> 2022-06-17 10:22:30 +0000
commit: 7fd09d198ff274109e4e95b0feda4662199fd723 (patch)
tree: 26d815aff6ff1d8ae1fb9d47bc05ad835230cd12 /eval
parent: a18e8e12faec6318c1d5768fd4edf42efd0ecb78 (diff)
9 files changed, 188 insertions, 7 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 3dca80885f7..960f15eac27 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -82,6 +82,7 @@ vespa_define_module(
     src/tests/instruction/sparse_full_overlap_join_function
     src/tests/instruction/sparse_merge_function
     src/tests/instruction/sparse_no_overlap_join_function
+    src/tests/instruction/sparse_singledim_lookup
     src/tests/instruction/sum_max_dot_product_function
     src/tests/instruction/unpack_bits_function
     src/tests/instruction/vector_from_doubles_function
diff --git a/eval/src/tests/instruction/dense_tensor_peek_function/dense_tensor_peek_function_test.cpp b/eval/src/tests/instruction/dense_tensor_peek_function/dense_tensor_peek_function_test.cpp
index 11654a38342..5cd63ce0492 100644
--- a/eval/src/tests/instruction/dense_tensor_peek_function/dense_tensor_peek_function_test.cpp
+++ b/eval/src/tests/instruction/dense_tensor_peek_function/dense_tensor_peek_function_test.cpp
@@ -56,10 +56,10 @@ TEST("require that tensor peek can be optimized for dense tensors") {
     TEST_DO(verify("x3y2f{x:(a-1),y:(b)}", 0.0, 1, 0));
 }
 
-TEST("require that tensor peek is not optimized for sparse tensor") {
+TEST("require that tensor peek is optimized differently for sparse tensor") {
     TEST_DO(verify("xm{x:1}", 1.0, 0, 1));
-    TEST_DO(verify("xm{x:(c)}", 3.0, 0, 1));
-    TEST_DO(verify("xm{x:(c+1)}", 0.0, 0, 1));
+    TEST_DO(verify("xm{x:(c)}", 3.0, 0, 0));
+    TEST_DO(verify("xm{x:(c+1)}", 0.0, 0, 0));
 }
 
 TEST("require that tensor peek is not optimized for mixed tensor") {
@@ -71,10 +71,10 @@ TEST("require that tensor peek is not optimized for mixed tensor") {
 TEST("require that indexes are truncated when converted to integers") {
     TEST_DO(verify("x3{x:(a+0.7)}", 2.0, 1, 0));
     TEST_DO(verify("x3{x:(a+0.3)}", 2.0, 1, 0));
-    TEST_DO(verify("xm{x:(a+0.7)}", 1.0, 0, 1));
-    TEST_DO(verify("xm{x:(a+0.3)}", 1.0, 0, 1));
-    TEST_DO(verify("xm{x:(-a-0.7)}", 4.0, 0, 1));
-    TEST_DO(verify("xm{x:(-a-0.3)}", 4.0, 0, 1));
+    TEST_DO(verify("xm{x:(a+0.7)}", 1.0, 0, 0));
+    TEST_DO(verify("xm{x:(a+0.3)}", 1.0, 0, 0));
+    TEST_DO(verify("xm{x:(-a-0.7)}", 4.0, 0, 0));
+    TEST_DO(verify("xm{x:(-a-0.3)}", 4.0, 0, 0));
 }
 
 TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/eval/src/tests/instruction/sparse_singledim_lookup/CMakeLists.txt b/eval/src/tests/instruction/sparse_singledim_lookup/CMakeLists.txt
new file mode 100644
index 00000000000..983fd717540
--- /dev/null
+++ b/eval/src/tests/instruction/sparse_singledim_lookup/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_sparse_singledim_lookup_test_app TEST
+    SOURCES
+    sparse_singledim_lookup_test.cpp
+    DEPENDS
+    vespaeval
+    GTest::GTest
+)
+vespa_add_test(NAME eval_sparse_singledim_lookup_test_app COMMAND eval_sparse_singledim_lookup_test_app)
diff --git a/eval/src/tests/instruction/sparse_singledim_lookup/sparse_singledim_lookup_test.cpp b/eval/src/tests/instruction/sparse_singledim_lookup/sparse_singledim_lookup_test.cpp
new file mode 100644
index 00000000000..7d4e985a5ce
--- /dev/null
+++ b/eval/src/tests/instruction/sparse_singledim_lookup/sparse_singledim_lookup_test.cpp
@@ -0,0 +1,59 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/instruction/sparse_singledim_lookup.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/eval/eval/test/gen_spec.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+using vespalib::make_string_short::fmt;
+
+//-----------------------------------------------------------------------------
+
+struct FunInfo {
+    using LookFor = SparseSingledimLookup;
+    void verify(const LookFor &fun) const {
+        EXPECT_TRUE(fun.result_is_mutable());
+    }
+};
+
+void verify_optimized(const vespalib::string &expr) {
+    CellTypeSpace type_space(CellTypeUtils::list_types(), 1);
+    EvalFixture::verify<FunInfo>(expr, {FunInfo()}, type_space);
+}
+
+void verify_not_optimized(const vespalib::string &expr) {
+    CellTypeSpace just_float({CellType::FLOAT}, 1);
+    EvalFixture::verify<FunInfo>(expr, {}, just_float);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(SparseSingledimLookup, expression_can_be_optimized) {
+    verify_optimized("x5_1{x:(1+2)}");
+}
+
+TEST(SparseSingledimLookup, optimized_expression_handles_failed_lookup) {
+    verify_optimized("x5_1{x:(5+5)}");
+    verify_optimized("x5_1{x:(5-10)}");
+}
+
+TEST(SparseSingledimLookup, verbatim_expression_is_not_optimized) {
+    verify_not_optimized("x5_1{x:3}");
+    verify_not_optimized("x5_1{x:(3)}");
+}
+
+TEST(SparseSingledimLookup, similar_expressions_are_not_optimized) {
+    verify_not_optimized("x5{x:(1+2)}");
+    verify_not_optimized("x5_1y3{x:(1+2)}");
+    verify_not_optimized("x5_1y3_1{x:(1+2)}");
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index b6258acc9cb..45a4c79d2ed 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -9,6 +9,7 @@
 #include <vespa/eval/instruction/sparse_112_dot_product.h>
 #include <vespa/eval/instruction/mixed_112_dot_product.h>
 #include <vespa/eval/instruction/sparse_merge_function.h>
+#include <vespa/eval/instruction/sparse_singledim_lookup.h>
 #include <vespa/eval/instruction/sparse_no_overlap_join_function.h>
 #include <vespa/eval/instruction/sparse_full_overlap_join_function.h>
 #include <vespa/eval/instruction/mixed_inner_product_function.h>
@@ -100,6 +101,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te
                           child.set(SparseMergeFunction::optimize(child.get(), stash));
                           child.set(SparseNoOverlapJoinFunction::optimize(child.get(), stash));
                           child.set(SparseFullOverlapJoinFunction::optimize(child.get(), stash));
+                          child.set(SparseSingledimLookup::optimize(child.get(), stash));
                       });
     return root.get();
 }
diff --git a/eval/src/vespa/eval/eval/tensor_function.h b/eval/src/vespa/eval/eval/tensor_function.h
index c6700b0565a..c5cc99d9137 100644
--- a/eval/src/vespa/eval/eval/tensor_function.h
+++ b/eval/src/vespa/eval/eval/tensor_function.h
@@ -402,6 +402,7 @@ public:
     // mapping from dimension name to verbatim label or child index:
     using Spec = std::map<vespalib::string, LabelOrChildIndex>;
     Spec make_spec() const;
+    const TensorFunction &param() const { return _param.get(); }
     const ValueType &param_type() const { return _param.get().result_type(); }
     bool result_is_mutable() const override { return true; }
     InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const final override;
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index f1ec8aa49a9..c4fd0443cbb 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -42,6 +42,7 @@ vespa_add_library(eval_instruction OBJECT
     sparse_full_overlap_join_function.cpp
     sparse_merge_function.cpp
     sparse_no_overlap_join_function.cpp
+    sparse_singledim_lookup.cpp
     sum_max_dot_product_function.cpp
     unpack_bits_function.cpp
     vector_from_doubles_function.cpp
diff --git a/eval/src/vespa/eval/instruction/sparse_singledim_lookup.cpp b/eval/src/vespa/eval/instruction/sparse_singledim_lookup.cpp
new file mode 100644
index 00000000000..4dc453d2a58
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/sparse_singledim_lookup.cpp
@@ -0,0 +1,84 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "sparse_singledim_lookup.h"
+#include <vespa/eval/eval/fast_value.hpp>
+#include <vespa/vespalib/util/typify.h>
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+using namespace operation;
+using namespace instruction;
+using Handle = SharedStringRepo::Handle;
+
+namespace {
+
+template <typename CT>
+double my_sparse_singledim_lookup_fallback(const Value::Index &idx, const CT *cells, string_id key) __attribute__((noinline));
+template <typename CT>
+double my_sparse_singledim_lookup_fallback(const Value::Index &idx, const CT *cells, string_id key) {
+    size_t subspace = 0;
+    const string_id *key_ref = &key;
+    auto view = idx.create_view(ConstArrayRef<size_t>{&subspace, 1});
+    view->lookup(ConstArrayRef<const string_id *>{&key_ref, 1});
+    if (!view->next_result({}, subspace)) {
+        return 0.0;
+    }
+    return cells[subspace];
+}
+
+template <typename CT>
+double my_fast_sparse_singledim_lookup(const FastAddrMap *map, const CT *cells, string_id key)
+{
+    auto subspace = map->lookup_singledim(key);
+    return (subspace != FastAddrMap::npos()) ? double(cells[subspace]) : 0.0;
+}
+
+template <typename CT>
+void my_sparse_singledim_lookup_op(InterpretedFunction::State &state, uint64_t) {
+    const auto &idx = state.peek(1).index();
+    const CT *cells = state.peek(1).cells().typify<CT>().cbegin();
+    int64_t number(state.peek(0).as_double());
+    Handle handle = Handle::handle_from_number(number);
+    double result = __builtin_expect(is_fast(idx), true)
+        ? my_fast_sparse_singledim_lookup<CT>(&as_fast(idx).map, cells, handle.id())
+        : my_sparse_singledim_lookup_fallback<CT>(idx, cells, handle.id());
+    state.pop_pop_push(state.stash.create<DoubleValue>(result));
+}
+
+struct MyGetFun {
+    template <typename CT>
+    static auto invoke() { return my_sparse_singledim_lookup_op<CT>; }
+};
+
+} // namespace <unnamed>
+
+SparseSingledimLookup::SparseSingledimLookup(const TensorFunction &tensor,
+                                             const TensorFunction &expr)
+    : tensor_function::Op2(ValueType::double_type(), tensor, expr)
+{
+}
+
+InterpretedFunction::Instruction
+SparseSingledimLookup::compile_self(const ValueBuilderFactory &, Stash &) const
+{
+    auto op = typify_invoke<1,TypifyCellType,MyGetFun>(lhs().result_type().cell_type());
+    return InterpretedFunction::Instruction(op);
+}
+
+const TensorFunction &
+SparseSingledimLookup::optimize(const TensorFunction &expr, Stash &stash)
+{
+    auto peek = as<Peek>(expr);
+    if (peek && peek->result_type().is_double() &&
+        peek->param_type().is_sparse() &&
+        (peek->param_type().dimensions().size() == 1) &&
+        (peek->map().size() == 1) &&
+        (std::holds_alternative<TensorFunction::Child>(peek->map().begin()->second)))
+    {
+        return stash.create<SparseSingledimLookup>(peek->param(), std::get<TensorFunction::Child>(peek->map().begin()->second).get());
+    }
+    return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/sparse_singledim_lookup.h b/eval/src/vespa/eval/instruction/sparse_singledim_lookup.h
new file mode 100644
index 00000000000..8c79860361a
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/sparse_singledim_lookup.h
@@ -0,0 +1,24 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Look up the result of an expression (double->int64_t->label_enum)
+ * in a sparse tensor with a single dimension, resulting in a double
+ * result. If lookup keys are kept small [0,10000000> (to avoid label
+ * enumeration) this is a simple hashtable lookup with numeric keys.
+ **/
+class SparseSingledimLookup : public tensor_function::Op2
+{
+public:
+    SparseSingledimLookup(const TensorFunction &tensor, const TensorFunction &expr);
+    InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+    bool result_is_mutable() const override { return true; }
+    static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace
author	Håvard Pettersen <havardpe@yahooinc.com>	2022-06-17 08:37:47 +0000
committer	Håvard Pettersen <havardpe@yahooinc.com>	2022-06-17 10:22:30 +0000
commit	7fd09d198ff274109e4e95b0feda4662199fd723 (patch)
tree	26d815aff6ff1d8ae1fb9d47bc05ad835230cd12 /eval
parent	a18e8e12faec6318c1d5768fd4edf42efd0ecb78 (diff)