best similarity function

author: Håvard Pettersen <havardpe@oath.com> 2021-09-28 08:13:03 +0000
committer: Håvard Pettersen <havardpe@oath.com> 2021-09-30 09:20:27 +0000
commit: fff135ac0ccd2ae07edc49857abf7c305b2ac3a5 (patch)
tree: ac1889791678f06fd101c5cc3096dbdbfcb4515e /eval
parent: 2f5a11f868291b34a3aa2c28817b36c5d0ed3d52 (diff)
12 files changed, 524 insertions, 49 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 8a7cd4f66bd..59ad1a530e1 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -45,6 +45,7 @@ vespa_define_module(
     src/tests/eval/value_type
     src/tests/gp/ponder_nov2017
     src/tests/instruction/add_trivial_dimension_optimizer
+    src/tests/instruction/best_similarity_function
     src/tests/instruction/dense_dot_product_function
     src/tests/instruction/dense_hamming_distance
     src/tests/instruction/dense_inplace_join_function
diff --git a/eval/src/tests/eval/tensor_function/tensor_function_test.cpp b/eval/src/tests/eval/tensor_function/tensor_function_test.cpp
index c457f68a614..3d4e2d41cb5 100644
--- a/eval/src/tests/eval/tensor_function/tensor_function_test.cpp
+++ b/eval/src/tests/eval/tensor_function/tensor_function_test.cpp
@@ -510,4 +510,18 @@ TEST("require that tensor function can be dumped for debugging") {
     fprintf(stderr, "function dump -->[[%s]]<-- function dump\n", root.as_string().c_str());
 }
 
+TEST("require that full tensor reduce expands dimension list") {
+    Stash stash;
+    const auto &num = inject(ValueType::from_spec("double"), 0, stash);
+    const auto &mat = inject(ValueType::from_spec("tensor(x[5],y[5])"), 1, stash);
+    const auto *reduce_num = as<Reduce>(reduce(num, Aggr::SUM, {}, stash));
+    const auto *reduce_mat = as<Reduce>(reduce(mat, Aggr::SUM, {}, stash));
+    ASSERT_TRUE(reduce_num);
+    ASSERT_TRUE(reduce_mat);
+    EXPECT_EQUAL(reduce_num->dimensions().size(), 0u);
+    ASSERT_EQUAL(reduce_mat->dimensions().size(), 2u);
+    EXPECT_EQUAL(reduce_mat->dimensions()[0], "x");
+    EXPECT_EQUAL(reduce_mat->dimensions()[1], "y");
+}
+
 TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/eval/src/tests/instruction/best_similarity_function/CMakeLists.txt b/eval/src/tests/instruction/best_similarity_function/CMakeLists.txt
new file mode 100644
index 00000000000..fbcf435aad1
--- /dev/null
+++ b/eval/src/tests/instruction/best_similarity_function/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_best_similarity_function_test_app TEST
+    SOURCES
+    best_similarity_function_test.cpp
+    DEPENDS
+    vespaeval
+    GTest::GTest
+)
+vespa_add_test(NAME eval_best_similarity_function_test_app COMMAND eval_best_similarity_function_test_app)
diff --git a/eval/src/tests/instruction/best_similarity_function/best_similarity_function_test.cpp b/eval/src/tests/instruction/best_similarity_function/best_similarity_function_test.cpp
new file mode 100644
index 00000000000..058b0f82678
--- /dev/null
+++ b/eval/src/tests/instruction/best_similarity_function/best_similarity_function_test.cpp
@@ -0,0 +1,148 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/eval/eval/test/gen_spec.h>
+#include <vespa/eval/instruction/best_similarity_function.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get();
+
+//-----------------------------------------------------------------------------
+
+void verify_impl(const TensorSpec &a, const TensorSpec &b, const vespalib::string &expr, bool optimized) {
+    EvalFixture::ParamRepo param_repo;
+    param_repo.add("a", a).add("b", b);
+    EvalFixture fast_fixture(prod_factory, expr, param_repo, true);
+    EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(fast_fixture.find_all<BestSimilarityFunction>().size(), optimized ? 1 : 0);
+}
+
+void verify(const TensorSpec &a, const TensorSpec &b, const vespalib::string &expr, bool optimized = true) {
+    verify_impl(a, b, expr, optimized);
+    verify_impl(b, a, expr, optimized);
+}
+
+//-----------------------------------------------------------------------------
+
+GenSpec gen_double(const vespalib::string &desc, int bias) {
+    return GenSpec::from_desc(desc).cells(CellType::DOUBLE).seq(N(bias));
+}
+
+GenSpec gen_float(const vespalib::string &desc, int bias) {
+    return GenSpec::from_desc(desc).cells(CellType::FLOAT).seq(N(bias));
+}
+
+GenSpec gen_int8(const vespalib::string &desc, int bias) {
+    return GenSpec::from_desc(desc).cells(CellType::INT8).seq(N(bias));
+}
+
+vespalib::string max_sim = "reduce(reduce(a*b,sum,d),max,b)";
+vespalib::string min_hamming = "reduce(reduce(hamming(a,b),sum,d),min,b)";
+
+//-----------------------------------------------------------------------------
+
+TEST(BestSimilarityFunctionTest, result_is_mutable) {
+    tensor_function::Inject child(ValueType::double_type(), 0);
+    BestSimilarityFunction node(ValueType::double_type(), child, child, nullptr, 1);
+    EXPECT_TRUE(node.result_is_mutable());
+}
+
+TEST(BestSimilarityFunctionTest, max_sim_can_be_optimized) {
+    verify(gen_float("A3_2B3d8", 3), gen_float("b5d8", 7), max_sim);
+    verify(gen_float("A3_2B3d8", 3), gen_float("b5_2d8", 7), max_sim);
+}
+
+TEST(BestSimilarityFunctionTest, min_hamming_can_be_optimized) {
+    verify(gen_int8("A3_2B3d8", 3), gen_int8("b5d8", 7), min_hamming);
+    verify(gen_int8("A3_2B3d8", 3), gen_int8("b5_2d8", 7), min_hamming);
+}
+
+TEST(BestSimilarityFunctionTest, result_can_be_sparse) {
+    verify(gen_float("A3_2d8", 3), gen_float("b5d8", 7), max_sim);
+    verify(gen_int8("A3_2d8", 3), gen_int8("b5_2d8", 7), min_hamming);
+}
+
+TEST(BestSimilarityFunctionTest, result_can_be_dense) {
+    verify(gen_float("B3d8", 3), gen_float("b5d8", 7), max_sim);
+    verify(gen_int8("B3d8", 3), gen_int8("b5_2d8", 7), min_hamming);
+}
+
+TEST(BestSimilarityFunctionTest, result_can_be_double) {
+    verify(gen_float("d8", 3), gen_float("b5d8", 7), max_sim);
+    verify(gen_int8("d8", 3), gen_int8("b5_2d8", 7), min_hamming);
+}
+
+TEST(BestSimilarityFunctionTest, primary_dimensions_can_be_trivial) {
+    verify(gen_float("d1", 3), gen_float("b1d1", 7), max_sim);
+    verify(gen_int8("d1", 3), gen_int8("b1d1", 7), min_hamming);
+}
+
+TEST(BestSimilarityFunctionTest, extra_trivial_dimensions_are_allowed) {
+    verify(gen_float("A1a1d8x1z1", 3), gen_float("a1b5c1d8x1y1", 7), max_sim);
+}
+
+TEST(BestSimilarityFunctionTest, allow_full_reduce_for_outer_dimension) {
+    vespalib::string my_max_sim = "reduce(reduce(a*b,sum,d),max)";
+    vespalib::string my_min_hamming = "reduce(reduce(hamming(a,b),sum,d),min)";
+    verify(gen_float("d8", 3), gen_float("b5d8", 7), my_max_sim);
+    verify(gen_int8("d8", 3), gen_int8("b5_2d8", 7), my_min_hamming);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(BestSimilarityFunctionTest, cell_type_must_match_operation) {
+    verify(gen_double("d8", 3), gen_double("b5d8", 7), max_sim, false);
+    verify(gen_float("d8", 3), gen_float("b5_2d8", 7), min_hamming, false);
+}
+
+vespalib::string max_sim_2d_dist = "reduce(reduce(a*b,sum,d,e),max,b)";
+
+TEST(BestSimilarityFunctionTest, similarity_must_use_1d_vector) {
+    verify(gen_float("d8_1", 3), gen_float("b5d8_1", 7), max_sim, false);
+    verify(gen_float("d8e1", 3), gen_float("b5d8e1", 7), max_sim_2d_dist, false);
+}
+
+vespalib::string inv_max_sim = "reduce(reduce(a*b,sum,b),max,d)";
+
+TEST(BestSimilarityFunctionTest, similarity_dimension_must_be_inner) {
+    verify(gen_float("d8e3", 3), gen_float("b5d8", 7), max_sim, false);
+    verify(gen_float("d8", 3), gen_float("b5d8", 7), inv_max_sim, false);
+}
+
+vespalib::string max_sim_2d_best = "reduce(reduce(a*b,sum,d),max,a,b)";
+
+TEST(BestSimilarityFunctionTest, alternatives_must_use_a_single_dimension) {
+    verify(gen_float("d8", 3), gen_float("a1b5d8", 7), max_sim_2d_best, false);
+}
+
+TEST(BestSimilarityFunctionTest, alternatives_dimension_can_not_be_common) {
+    verify(gen_float("b5d8", 3), gen_float("b5d8", 7), max_sim, false);
+}
+
+TEST(BestSimilarityFunctionTest, extra_common_nontrivial_dimensions_not_allowed) {
+    verify(gen_float("a3d8", 3), gen_float("a3b5d8", 7), max_sim, false);
+}
+
+TEST(BestSimilarityFunctionTest, secondary_tensor_must_not_contain_extra_nontrivial_dimensions) {
+    verify(gen_float("d8", 3), gen_float("a2b5d8", 7), max_sim, false);
+}
+
+//-----------------------------------------------------------------------------
+
+vespalib::string other_join = "reduce(reduce(a+b,sum,d),max,b)";
+vespalib::string mismatch_best = "reduce(reduce(a*b,sum,d),min,b)";
+
+TEST(BestSimilarityFunctionTest, similar_expressions_are_not_optimized) {
+    verify(gen_float("d8", 3), gen_float("b5d8", 7), other_join, false);
+    verify(gen_float("d8", 3), gen_float("b5d8", 7), mismatch_best, false);
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp b/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp
index f68c089e784..4998885c6a6 100644
--- a/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp
+++ b/eval/src/tests/instruction/sum_max_dot_product_function/sum_max_dot_product_function_test.cpp
@@ -115,11 +115,9 @@ TEST(SumMaxDotProduct, similar_expressions_are_not_optimized) {
     vespalib::string max_sum_expr = "reduce(reduce(reduce(a*b,sum,z),sum,y),max,x)";
     vespalib::string not_dp_expr1 = "reduce(reduce(reduce(a+b,sum,z),max,y),sum,x)";
     vespalib::string not_dp_expr2 = "reduce(reduce(reduce(a*b,min,z),max,y),sum,x)";
-    vespalib::string sum_all_expr = "reduce(reduce(reduce(a*b,sum,z),max,y),sum)";
     assert_not_optimized(query, document, max_sum_expr);
     assert_not_optimized(query, document, not_dp_expr1);
     assert_not_optimized(query, document, not_dp_expr2);
-    assert_not_optimized(query, document, sum_all_expr);
 }
 
 //-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index c2e8d886fde..90849e55a71 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -11,6 +11,7 @@
 #include <vespa/eval/instruction/sparse_full_overlap_join_function.h>
 #include <vespa/eval/instruction/mixed_inner_product_function.h>
 #include <vespa/eval/instruction/sum_max_dot_product_function.h>
+#include <vespa/eval/instruction/best_similarity_function.h>
 #include <vespa/eval/instruction/dense_xw_product_function.h>
 #include <vespa/eval/instruction/dense_matmul_function.h>
 #include <vespa/eval/instruction/dense_multi_matmul_function.h>
@@ -37,54 +38,59 @@ namespace vespalib::eval {
 
 namespace {
 
-const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const TensorFunction &expr, Stash &stash) {
-    using Child = TensorFunction::Child;
-    Child root(expr);
-    {
-        std::vector<Child::CREF> nodes({root});
-        for (size_t i = 0; i < nodes.size(); ++i) {
-            nodes[i].get().get().push_children(nodes);
-        }
-        while (!nodes.empty()) {
-            const Child &child = nodes.back().get();
-            child.set(SumMaxDotProductFunction::optimize(child.get(), stash));
-            child.set(DenseDotProductFunction::optimize(child.get(), stash));
-            child.set(SparseDotProductFunction::optimize(child.get(), stash));
-            child.set(DenseXWProductFunction::optimize(child.get(), stash));
-            child.set(DenseMatMulFunction::optimize(child.get(), stash));
-            child.set(DenseMultiMatMulFunction::optimize(child.get(), stash));
-            child.set(MixedInnerProductFunction::optimize(child.get(), stash));
-            child.set(DenseHammingDistance::optimize(child.get(), stash));
-            nodes.pop_back();
-        }
+using Child = TensorFunction::Child;
+
+void run_optimize_pass(const Child &root, auto optimize_node) {
+    std::vector<Child::CREF> nodes({root});
+    for (size_t i = 0; i < nodes.size(); ++i) {
+        nodes[i].get().get().push_children(nodes);
     }
-    {
-        std::vector<Child::CREF> nodes({root});
-        for (size_t i = 0; i < nodes.size(); ++i) {
-            nodes[i].get().get().push_children(nodes);
-        }
-        while (!nodes.empty()) {
-            const Child &child = nodes.back().get();
-            child.set(DenseSimpleExpandFunction::optimize(child.get(), stash));
-            child.set(AddTrivialDimensionOptimizer::optimize(child.get(), stash));
-            child.set(RemoveTrivialDimensionOptimizer::optimize(child.get(), stash));
-            child.set(VectorFromDoublesFunction::optimize(child.get(), stash));
-            child.set(DenseTensorCreateFunction::optimize(child.get(), stash));
-            child.set(DenseTensorPeekFunction::optimize(child.get(), stash));
-            child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash));
-            child.set(UnpackBitsFunction::optimize(child.get(), stash));
-            child.set(FastRenameOptimizer::optimize(child.get(), stash));
-            child.set(PowAsMapOptimizer::optimize(child.get(), stash));
-            child.set(InplaceMapFunction::optimize(child.get(), stash));
-            child.set(MixedSimpleJoinFunction::optimize(child.get(), stash));
-            child.set(JoinWithNumberFunction::optimize(child.get(), stash));
-            child.set(DenseSingleReduceFunction::optimize(child.get(), stash));
-            child.set(SparseMergeFunction::optimize(child.get(), stash));
-            child.set(SparseNoOverlapJoinFunction::optimize(child.get(), stash));
-            child.set(SparseFullOverlapJoinFunction::optimize(child.get(), stash));
-            nodes.pop_back();
-        }
+    while (!nodes.empty()) {
+        optimize_node(nodes.back().get());
+        nodes.pop_back();
     }
+}
+
+const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const TensorFunction &expr, Stash &stash) {
+    Child root(expr);
+    run_optimize_pass(root, [&stash](const Child &child)
+                      {
+                          child.set(SumMaxDotProductFunction::optimize(child.get(), stash));
+                      });
+    run_optimize_pass(root, [&stash](const Child &child)
+                      {
+                          child.set(BestSimilarityFunction::optimize(child.get(), stash));
+                      });
+    run_optimize_pass(root, [&stash](const Child &child)
+                      {
+                          child.set(DenseDotProductFunction::optimize(child.get(), stash));
+                          child.set(SparseDotProductFunction::optimize(child.get(), stash));
+                          child.set(DenseXWProductFunction::optimize(child.get(), stash));
+                          child.set(DenseMatMulFunction::optimize(child.get(), stash));
+                          child.set(DenseMultiMatMulFunction::optimize(child.get(), stash));
+                          child.set(MixedInnerProductFunction::optimize(child.get(), stash));
+                          child.set(DenseHammingDistance::optimize(child.get(), stash));
+                      });
+    run_optimize_pass(root, [&stash](const Child &child)
+                      {
+                          child.set(DenseSimpleExpandFunction::optimize(child.get(), stash));
+                          child.set(AddTrivialDimensionOptimizer::optimize(child.get(), stash));
+                          child.set(RemoveTrivialDimensionOptimizer::optimize(child.get(), stash));
+                          child.set(VectorFromDoublesFunction::optimize(child.get(), stash));
+                          child.set(DenseTensorCreateFunction::optimize(child.get(), stash));
+                          child.set(DenseTensorPeekFunction::optimize(child.get(), stash));
+                          child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash));
+                          child.set(UnpackBitsFunction::optimize(child.get(), stash));
+                          child.set(FastRenameOptimizer::optimize(child.get(), stash));
+                          child.set(PowAsMapOptimizer::optimize(child.get(), stash));
+                          child.set(InplaceMapFunction::optimize(child.get(), stash));
+                          child.set(MixedSimpleJoinFunction::optimize(child.get(), stash));
+                          child.set(JoinWithNumberFunction::optimize(child.get(), stash));
+                          child.set(DenseSingleReduceFunction::optimize(child.get(), stash));
+                          child.set(SparseMergeFunction::optimize(child.get(), stash));
+                          child.set(SparseNoOverlapJoinFunction::optimize(child.get(), stash));
+                          child.set(SparseFullOverlapJoinFunction::optimize(child.get(), stash));
+                      });
     return root.get();
 }
 
diff --git a/eval/src/vespa/eval/eval/tensor_function.cpp b/eval/src/vespa/eval/eval/tensor_function.cpp
index c0cd7280212..70797250c5a 100644
--- a/eval/src/vespa/eval/eval/tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/tensor_function.cpp
@@ -443,7 +443,11 @@ const TensorFunction &inject(const ValueType &type, size_t param_idx, Stash &sta
 
 const TensorFunction &reduce(const TensorFunction &child, Aggr aggr, const std::vector<vespalib::string> &dimensions, Stash &stash) {
     ValueType result_type = child.result_type().reduce(dimensions);
-    return stash.create<Reduce>(result_type, child, aggr, dimensions);
+    if (dimensions.empty()) { // expand dimension list for full reduce
+        return stash.create<Reduce>(result_type, child, aggr, child.result_type().dimension_names());
+    } else {
+        return stash.create<Reduce>(result_type, child, aggr, dimensions);
+    }
 }
 
 const TensorFunction &map(const TensorFunction &child, map_fun_t function, Stash &stash) {
diff --git a/eval/src/vespa/eval/eval/value.cpp b/eval/src/vespa/eval/eval/value.cpp
index b799658cfae..e32e3152449 100644
--- a/eval/src/vespa/eval/eval/value.cpp
+++ b/eval/src/vespa/eval/eval/value.cpp
@@ -10,6 +10,11 @@ namespace eval {
 
 namespace {
 
+struct EmptyView : Value::Index::View {
+    void lookup(ConstArrayRef<const string_id*> ) override {}
+    bool next_result(ConstArrayRef<string_id*> , size_t &) override { return false; }
+};
+
 struct TrivialView : Value::Index::View {
     bool first = false;
     void lookup(ConstArrayRef<const string_id*> ) override { first = true; }
@@ -36,6 +41,20 @@ struct MySum {
 
 } // <unnamed>
 
+EmptyIndex::EmptyIndex() = default;
+EmptyIndex EmptyIndex::_index;
+
+size_t
+EmptyIndex::size() const
+{
+    return 0;
+}
+
+std::unique_ptr<Value::Index::View>
+EmptyIndex::create_view(ConstArrayRef<size_t>) const
+{
+    return std::make_unique<EmptyView>();
+}
 
 TrivialIndex::TrivialIndex() = default;
 TrivialIndex TrivialIndex::_index;
diff --git a/eval/src/vespa/eval/eval/value.h b/eval/src/vespa/eval/eval/value.h
index fcdaa7131c7..433ee36cb6a 100644
--- a/eval/src/vespa/eval/eval/value.h
+++ b/eval/src/vespa/eval/eval/value.h
@@ -64,6 +64,19 @@ struct Value {
 };
 
 /**
+ * Common empty index
+ **/
+class EmptyIndex : public Value::Index {
+private:
+    EmptyIndex();
+    static EmptyIndex _index;
+public:
+    static const EmptyIndex &get() { return _index; }
+    size_t size() const override;
+    std::unique_ptr<View> create_view(ConstArrayRef<size_t> dims) const override;
+};
+
+/**
  * Common index for values without any mapped dimensions.
  **/
 class TrivialIndex : public Value::Index {
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index 3ed969c0a18..90ab58827d1 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -3,6 +3,7 @@
 vespa_add_library(eval_instruction OBJECT
     SOURCES
     add_trivial_dimension_optimizer.cpp
+    best_similarity_function.cpp
     dense_cell_range_function.cpp
     dense_dot_product_function.cpp
     dense_hamming_distance.cpp
diff --git a/eval/src/vespa/eval/instruction/best_similarity_function.cpp b/eval/src/vespa/eval/instruction/best_similarity_function.cpp
new file mode 100644
index 00000000000..e8f401b90b5
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/best_similarity_function.cpp
@@ -0,0 +1,224 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "best_similarity_function.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/vespalib/util/binary_hamming_distance.h>
+#include <cblas.h>
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+using namespace operation;
+
+namespace {
+
+struct BestSimParam {
+    ValueType res_type;
+    size_t inner_size;
+    BestSimParam(const ValueType &res_type_in, size_t inner_size_in)
+      : res_type(res_type_in), inner_size(inner_size_in) {}
+};
+
+struct UseDotProduct {
+    static float calc(const float *pri, const float *sec, size_t size) {
+        return cblas_sdot(size, pri, 1, sec, 1);
+    }
+};
+
+struct UseHammingDist {
+    static float calc(const Int8Float *pri, const Int8Float *sec, size_t size) {
+        return binary_hamming_distance(pri, sec, size);
+    }
+};
+
+template <typename CT, typename AGGR, typename DIST>
+float best_similarity(const CT *pri, ConstArrayRef<CT> sec_cells, size_t inner_size) {
+    AGGR aggr;
+    for (const CT *sec = sec_cells.begin(); sec < sec_cells.end(); sec += inner_size) {
+        aggr.sample(DIST::calc(pri, sec, inner_size));
+    }
+    return aggr.result();
+}
+
+template <bool is_double>
+const Value &create_empty_result(const ValueType &type, Stash &stash) {
+    if (is_double) {
+        return stash.create<DoubleValue>(0.0);
+    } else if (type.count_mapped_dimensions() == 0) {
+        auto zero_cells = stash.create_array<float>(type.dense_subspace_size());
+        return stash.create<ValueView>(type, TrivialIndex::get(), TypedCells(zero_cells));
+    } else {
+        return stash.create<ValueView>(type, EmptyIndex::get(), TypedCells(nullptr, CellType::FLOAT, 0));
+    }
+}
+
+template <bool is_double, typename CT, typename AGGR, typename DIST>
+void my_best_similarity_op(InterpretedFunction::State &state, uint64_t param) {
+    size_t inner_size = is_double ? param : unwrap_param<BestSimParam>(param).inner_size;
+    const ValueType &res_type = is_double ? DoubleValue::shared_type() : unwrap_param<BestSimParam>(param).res_type;
+    const Value &pri_value = state.peek(1);
+    auto pri_cells = pri_value.cells().typify<CT>();
+    auto sec_cells = state.peek(0).cells().typify<CT>();
+    if ((pri_cells.size() == 0) || (sec_cells.size() == 0)) {
+        return state.pop_pop_push(create_empty_result<is_double>(res_type, state.stash));
+    }
+    if (is_double) {
+        auto best_sim = best_similarity<CT, AGGR, DIST>(pri_cells.begin(), sec_cells, inner_size);
+        return state.pop_pop_push(state.stash.create<DoubleValue>(best_sim));
+    }
+    auto out_cells = state.stash.create_uninitialized_array<float>(pri_cells.size() / inner_size);
+    const CT *pri = pri_cells.begin();
+    for (auto &out: out_cells) {
+        out = best_similarity<CT, AGGR, DIST>(pri, sec_cells, inner_size);
+        pri += inner_size;
+    }
+    Value &result_ref = state.stash.create<ValueView>(res_type, pri_value.index(), TypedCells(out_cells));
+    state.pop_pop_push(result_ref);
+}
+
+//-----------------------------------------------------------------------------
+
+size_t stride(const ValueType &type, const vespalib::string &name) {
+    size_t stride = 0;
+    for (const auto &dim: type.dimensions()) {
+        if (dim.is_indexed()) {
+            if (dim.name == name) {
+                stride = 1;
+            } else {
+                stride *= dim.size;
+            }
+        }
+    }
+    return stride;
+}
+
+bool check_dims(const ValueType &pri, const ValueType &sec,
+                const vespalib::string &best, const vespalib::string &inner)
+{
+    if ((stride(pri, inner) != 1) || (stride(sec, inner) != 1)) {
+        return false;
+    }
+    if (pri.dimension_index(best) != ValueType::Dimension::npos) {
+        return false;
+    }
+    if (sec.dimension_index(best) == ValueType::Dimension::npos) {
+        return false;
+    }
+    for (auto &&type = sec.reduce({inner,best}); auto &&dim: type.dimensions()) {
+        if (!dim.is_trivial()) {
+            return false;
+        }
+    }
+    return true;
+}
+
+size_t get_dim_size(const ValueType &type, const vespalib::string &dim) {
+    size_t npos = ValueType::Dimension::npos;
+    size_t idx = type.dimension_index(dim);
+    assert(idx != npos);
+    assert(type.dimensions()[idx].is_indexed());
+    return type.dimensions()[idx].size;
+}
+
+const Reduce *check_reduce(const TensorFunction &expr, std::initializer_list<Aggr> allow) {
+    if (auto reduce = as<Reduce>(expr)) {
+        if (reduce->dimensions().size() == 1) {
+            if (std::find(allow.begin(), allow.end(), reduce->aggr()) != allow.end()) {
+                return reduce;
+            }
+        }
+    }
+    return nullptr;
+}
+
+const Join *check_join(const TensorFunction &expr, std::initializer_list<op2_t> allow) {
+    if (auto join = as<Join>(expr)) {
+        if (std::find(allow.begin(), allow.end(), join->function()) != allow.end()) {
+            return join;
+        }
+    }
+    return nullptr;
+}
+
+struct SelectFun {
+    const ValueType &res_type;
+    const ValueType &lhs_type;
+    const ValueType &rhs_type;
+    SelectFun(const auto &res, const auto &lhs, const auto &rhs)
+      : res_type(res.result_type()), lhs_type(lhs.result_type()), rhs_type(rhs.result_type()) {}
+    template <typename R1> static InterpretedFunction::op_function invoke(Aggr best_aggr, op2_t join_fun, CellType cell_types) {
+        if ((best_aggr == Aggr::MAX) && (join_fun == Mul::f) && (cell_types == CellType::FLOAT)) {
+            return my_best_similarity_op<R1::value, float, aggr::Max<float>, UseDotProduct>;
+        }
+        if ((best_aggr == Aggr::MIN) && (join_fun == Hamming::f) && (cell_types == CellType::INT8)) {
+            return my_best_similarity_op<R1::value, Int8Float, aggr::Min<float>, UseHammingDist>;
+        }
+        return nullptr;
+    }
+    InterpretedFunction::op_function operator()(Aggr best_aggr, op2_t join_fun) {
+        static_assert(std::is_same_v<float, CellValueType<CellType::FLOAT>>);
+        static_assert(std::is_same_v<Int8Float, CellValueType<CellType::INT8>>);
+        if (lhs_type.cell_type() != rhs_type.cell_type()) {
+            return nullptr;
+        }
+        return typify_invoke<1,TypifyBool,SelectFun>(res_type.is_double(), best_aggr, join_fun, lhs_type.cell_type());
+    }
+};
+
+} // namespace <unnamed>
+
+uint64_t
+BestSimilarityFunction::make_param(Stash &stash) const
+{
+    if (result_type().is_double()) {
+        return _inner_size;
+    }
+    return wrap_param<BestSimParam>(stash.create<BestSimParam>(result_type(), _inner_size));
+}
+
+BestSimilarityFunction::BestSimilarityFunction(const ValueType &res_type_in,
+                                               const TensorFunction &pri,
+                                               const TensorFunction &sec,
+                                               InterpretedFunction::op_function my_fun,
+                                               size_t inner_size)
+  : tensor_function::Op2(res_type_in, pri, sec),
+    _my_fun(my_fun),
+    _inner_size(inner_size)
+{
+}
+
+InterpretedFunction::Instruction
+BestSimilarityFunction::compile_self(const ValueBuilderFactory &, Stash &stash) const
+{
+    return InterpretedFunction::Instruction(_my_fun, make_param(stash));
+}
+
+const TensorFunction &
+BestSimilarityFunction::optimize(const TensorFunction &expr, Stash &stash)
+{
+    if (auto best_reduce = check_reduce(expr, {Aggr::MAX, Aggr::MIN})) {
+        if (auto sum_reduce = check_reduce(best_reduce->child(), {Aggr::SUM})) {
+            if (auto join = check_join(sum_reduce->child(), {Mul::f, Hamming::f})) {
+                SelectFun select_fun(expr, join->lhs(), join->rhs());
+                if (auto my_fun = select_fun(best_reduce->aggr(), join->function())) {
+                    const auto &best_dim = best_reduce->dimensions()[0];
+                    const auto &inner_dim = sum_reduce->dimensions()[0];
+                    const TensorFunction &lhs = join->lhs();
+                    const TensorFunction &rhs = join->rhs();
+                    if (check_dims(lhs.result_type(), rhs.result_type(), best_dim, inner_dim)) {
+                        size_t inner_size = get_dim_size(lhs.result_type(), inner_dim);
+                        return stash.create<BestSimilarityFunction>(expr.result_type(), lhs, rhs, my_fun, inner_size);
+                    }
+                    if (check_dims(rhs.result_type(), lhs.result_type(), best_dim, inner_dim)) {
+                        size_t inner_size = get_dim_size(rhs.result_type(), inner_dim);
+                        return stash.create<BestSimilarityFunction>(expr.result_type(), rhs, lhs, my_fun, inner_size);
+                    }
+                }
+            }
+        }
+    }
+    return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/best_similarity_function.h b/eval/src/vespa/eval/instruction/best_similarity_function.h
new file mode 100644
index 00000000000..25fd44e8f28
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/best_similarity_function.h
@@ -0,0 +1,38 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function combining multiple vector-based similarity measures
+ * to find the best one. This function supports the following cases:
+ *
+ * - maximum dot product of vectors with float cell type (MaxSim)
+ * - minimum hamming distance of bitvectors with int8 cell type
+ *
+ * The vectors used to calculate the individual distance metrics must
+ * be the inner dense dimension of both inputs. The dimension reduced
+ * to find the best similarity measure must be the remaining dimension
+ * of one of the inputs.
+ **/
+class BestSimilarityFunction : public tensor_function::Op2
+{
+private:
+    InterpretedFunction::op_function _my_fun;
+    size_t _inner_size;
+    uint64_t make_param(Stash &stash) const;
+public:
+    BestSimilarityFunction(const ValueType &res_type_in,
+                           const TensorFunction &pri,
+                           const TensorFunction &sec,
+                           InterpretedFunction::op_function my_fun,
+                           size_t inner_size);
+    InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+    bool result_is_mutable() const override { return true; }
+    static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace
author	Håvard Pettersen <havardpe@oath.com>	2021-09-28 08:13:03 +0000
committer	Håvard Pettersen <havardpe@oath.com>	2021-09-30 09:20:27 +0000
commit	fff135ac0ccd2ae07edc49857abf7c305b2ac3a5 (patch)
tree	ac1889791678f06fd101c5cc3096dbdbfcb4515e /eval
parent	2f5a11f868291b34a3aa2c28817b36c5d0ed3d52 (diff)