10 files changed, 272 insertions, 108 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 0cba519bf88..22750a27f20 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -67,6 +67,7 @@ vespa_define_module(
     src/tests/instruction/remove_trivial_dimension_optimizer
     src/tests/instruction/sparse_dot_product_function
     src/tests/instruction/sparse_merge_function
+    src/tests/instruction/sparse_no_overlap_join_function
     src/tests/instruction/sum_max_dot_product_function
     src/tests/instruction/vector_from_doubles_function
     src/tests/streamed/value
diff --git a/eval/src/tests/instruction/sparse_no_overlap_join_function/CMakeLists.txt b/eval/src/tests/instruction/sparse_no_overlap_join_function/CMakeLists.txt
new file mode 100644
index 00000000000..87b58360366
--- /dev/null
+++ b/eval/src/tests/instruction/sparse_no_overlap_join_function/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_sparse_no_overlap_join_function_test_app TEST
+    SOURCES
+    sparse_no_overlap_join_function_test.cpp
+    DEPENDS
+    vespaeval
+    GTest::GTest
+)
+vespa_add_test(NAME eval_sparse_no_overlap_join_function_test_app COMMAND eval_sparse_no_overlap_join_function_test_app)
diff --git a/eval/src/tests/instruction/sparse_no_overlap_join_function/sparse_no_overlap_join_function_test.cpp b/eval/src/tests/instruction/sparse_no_overlap_join_function/sparse_no_overlap_join_function_test.cpp
new file mode 100644
index 00000000000..60755b50c1d
--- /dev/null
+++ b/eval/src/tests/instruction/sparse_no_overlap_join_function/sparse_no_overlap_join_function_test.cpp
@@ -0,0 +1,92 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/instruction/sparse_no_overlap_join_function.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get();
+const ValueBuilderFactory &test_factory = SimpleValueBuilderFactory::get();
+
+//-----------------------------------------------------------------------------
+
+EvalFixture::ParamRepo make_params() {
+    return EvalFixture::ParamRepo()
+        .add_variants("v1_a", GenSpec(3.0).map("a", 8, 1))
+        .add_variants("v2_b", GenSpec(7.0).map("b", 4, 2))
+        .add_variants("v2_b_trivial", GenSpec(7.0).map("b", 4, 2).idx("c", 1).idx("d", 1))
+        .add("m1_ac",  GenSpec(3.0).map("a", 8, 1).map("c", 8, 1))
+        .add("m2_bd",  GenSpec(17.0).map("b", 4, 2).map("d", 4, 2))
+        .add("scalar", GenSpec(1.0))
+        .add("dense_b",  GenSpec().idx("b", 5))
+        .add("mixed_bc", GenSpec().map("b", 5, 1).idx("c", 5));
+}
+EvalFixture::ParamRepo param_repo = make_params();
+
+void assert_optimized(const vespalib::string &expr) {
+    EvalFixture fast_fixture(prod_factory, expr, param_repo, true);
+    EvalFixture test_fixture(test_factory, expr, param_repo, true);
+    EvalFixture slow_fixture(prod_factory, expr, param_repo, false);
+    EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(test_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(slow_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(fast_fixture.find_all<SparseNoOverlapJoinFunction>().size(), 1u);
+    EXPECT_EQ(test_fixture.find_all<SparseNoOverlapJoinFunction>().size(), 1u);
+    EXPECT_EQ(slow_fixture.find_all<SparseNoOverlapJoinFunction>().size(), 0u);
+}
+
+void assert_not_optimized(const vespalib::string &expr) {
+    EvalFixture fast_fixture(prod_factory, expr, param_repo, true);
+    EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(fast_fixture.find_all<SparseNoOverlapJoinFunction>().size(), 0u);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(SparseNoOverlapJoin, expression_can_be_optimized)
+{
+    assert_optimized("v1_a*v2_b");
+    assert_optimized("v2_b*v1_a");
+    assert_optimized("m1_ac*m2_bd");
+    assert_optimized("m2_bd*m1_ac");
+    assert_optimized("m1_ac*v2_b");
+    assert_optimized("m2_bd*v1_a");
+    assert_optimized("join(v1_a,v2_b,f(x,y)(max(x,y)))");
+}
+
+TEST(SparseNoOverlapJoin, trivial_dimensions_are_ignored)
+{
+    assert_optimized("v1_a*v2_b_trivial");
+    assert_optimized("v2_b_trivial*v1_a");
+}
+
+TEST(SparseNoOverlapJoin, overlapping_dimensions_are_not_optimized)
+{
+    assert_not_optimized("v1_a*v1_a");
+    assert_not_optimized("v1_a*m1_ac");
+    assert_not_optimized("m1_ac*v1_a");
+}
+
+TEST(SparseNoOverlapJoin, both_values_must_be_sparse_tensors)
+{
+    assert_not_optimized("v1_a*scalar");
+    assert_not_optimized("scalar*v1_a");
+    assert_not_optimized("v1_a*dense_b");
+    assert_not_optimized("dense_b*v1_a");
+    assert_not_optimized("v1_a*mixed_bc");
+    assert_not_optimized("mixed_bc*v1_a");
+}
+
+TEST(SparseNoOverlapJoin, mixed_cell_types_are_not_optimized)
+{
+    assert_not_optimized("v1_a*v2_b_f");
+    assert_not_optimized("v1_a_f*v2_b");
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp
index 6673494ccd2..d5cfc9c6368 100644
--- a/eval/src/vespa/eval/eval/fast_value.hpp
+++ b/eval/src/vespa/eval/eval/fast_value.hpp
@@ -135,8 +135,6 @@ struct FastIterateView : public Value::Index::View {
 
 //-----------------------------------------------------------------------------
 
-using JoinAddrSource = instruction::SparseJoinPlan::Source;
-
 // This is the class instructions will look for when optimizing sparse
 // operations by calling inline functions directly.
 struct FastValueIndex final : Value::Index {
@@ -149,12 +147,6 @@ struct FastValueIndex final : Value::Index {
                 const FastValueIndex &lhs, const FastValueIndex &rhs,
                 ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash);
 
-    template <typename LCT, typename RCT, typename OCT, typename Fun>
-        static const Value &sparse_no_overlap_join(const ValueType &res_type, const Fun &fun,
-                const FastValueIndex &lhs, const FastValueIndex &rhs,
-                const std::vector<JoinAddrSource> &addr_sources,
-                ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash);
-
     size_t size() const override { return map.size(); }
     std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
 };
@@ -373,54 +365,4 @@ FastValueIndex::sparse_full_overlap_join(const ValueType &res_type, const Fun &f
 
 //-----------------------------------------------------------------------------
 
-template <typename LCT, typename RCT, typename OCT, typename Fun>
-const Value &
-FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun,
-                                       const FastValueIndex &lhs, const FastValueIndex &rhs,
-                                       const std::vector<JoinAddrSource> &addr_sources,
-                                       ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash)
-{
-    size_t num_mapped_dims = addr_sources.size();
-    auto &result = stash.create<FastValue<OCT,true>>(res_type, num_mapped_dims, 1, lhs.map.size()*rhs.map.size());
-    std::vector<string_id> output_addr(num_mapped_dims);
-    std::vector<size_t> store_lhs_idx;
-    std::vector<size_t> store_rhs_idx;
-    size_t out_idx = 0;
-    for (JoinAddrSource source : addr_sources) {
-        switch (source) {
-        case JoinAddrSource::LHS:
-            store_lhs_idx.push_back(out_idx++);
-            break;
-        case JoinAddrSource::RHS:
-            store_rhs_idx.push_back(out_idx++);
-            break;
-        default:
-            abort();
-        }
-    }
-    assert(out_idx == output_addr.size());
-    for (size_t lhs_subspace = 0; lhs_subspace < lhs.map.size(); ++lhs_subspace) {
-        auto l_addr = lhs.map.get_addr(lhs_subspace);
-        assert(l_addr.size() == store_lhs_idx.size());
-        for (size_t i = 0; i < store_lhs_idx.size(); ++i) {
-            size_t addr_idx = store_lhs_idx[i];
-            output_addr[addr_idx] = l_addr[i];
-        }
-        for (size_t rhs_subspace = 0; rhs_subspace < rhs.map.size(); ++rhs_subspace) {
-            auto r_addr = rhs.map.get_addr(rhs_subspace);
-            assert(r_addr.size() == store_rhs_idx.size());
-            for (size_t i = 0; i < store_rhs_idx.size(); ++i) {
-                size_t addr_idx = store_rhs_idx[i];
-                output_addr[addr_idx] = r_addr[i];
-            }
-            result.add_mapping(ConstArrayRef(output_addr));
-            auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
-            result.my_cells.push_back_fast(cell_value);
-        }
-    }
-    return result;
-}
-
-//-----------------------------------------------------------------------------
-
 }
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index 196e8a98679..aef49a2c75b 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -7,6 +7,7 @@
 #include <vespa/eval/instruction/dense_dot_product_function.h>
 #include <vespa/eval/instruction/sparse_dot_product_function.h>
 #include <vespa/eval/instruction/sparse_merge_function.h>
+#include <vespa/eval/instruction/sparse_no_overlap_join_function.h>
 #include <vespa/eval/instruction/mixed_inner_product_function.h>
 #include <vespa/eval/instruction/sum_max_dot_product_function.h>
 #include <vespa/eval/instruction/dense_xw_product_function.h>
@@ -74,6 +75,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te
             child.set(JoinWithNumberFunction::optimize(child.get(), stash));
             child.set(DenseSingleReduceFunction::optimize(child.get(), stash));
             child.set(SparseMergeFunction::optimize(child.get(), stash));
+            child.set(SparseNoOverlapJoinFunction::optimize(child.get(), stash));
             nodes.pop_back();
         }
     }
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index 3def8907ac8..50f7dbe7005 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -34,6 +34,7 @@ vespa_add_library(eval_instruction OBJECT
     replace_type_function.cpp
     sparse_dot_product_function.cpp
     sparse_merge_function.cpp
+    sparse_no_overlap_join_function.cpp
     sum_max_dot_product_function.cpp
     vector_from_doubles_function.cpp
 )
diff --git a/eval/src/vespa/eval/instruction/generic_join.cpp b/eval/src/vespa/eval/instruction/generic_join.cpp
index 6d6f86b7c4d..4b3755509c7 100644
--- a/eval/src/vespa/eval/instruction/generic_join.cpp
+++ b/eval/src/vespa/eval/instruction/generic_join.cpp
@@ -19,8 +19,6 @@ using operation::SwapArgs2;
 using State = InterpretedFunction::State;
 using Instruction = InterpretedFunction::Instruction;
 
-namespace {
-
 //-----------------------------------------------------------------------------
 
 template <typename LCT, typename RCT, typename OCT, typename Fun>
@@ -56,6 +54,8 @@ generic_mixed_join(const Value &lhs, const Value &rhs, const JoinParam &param)
     return builder->build(std::move(builder));
 };
 
+namespace {
+
 template <typename LCT, typename RCT, typename OCT, typename Fun>
 void my_mixed_join_op(State &state, uint64_t param_in) {
     const auto &param = unwrap_param<JoinParam>(param_in);
@@ -70,48 +70,6 @@ void my_mixed_join_op(State &state, uint64_t param_in) {
 //-----------------------------------------------------------------------------
 
 template <typename LCT, typename RCT, typename OCT, typename Fun>
-void my_sparse_no_overlap_join_op(State &state, uint64_t param_in) {
-    const auto &param = unwrap_param<JoinParam>(param_in);
-    const Value &lhs = state.peek(1);
-    const Value &rhs = state.peek(0);
-    auto lhs_cells = lhs.cells().typify<LCT>();
-    auto rhs_cells = rhs.cells().typify<RCT>();
-    const Value::Index &lhs_index = lhs.index();
-    const Value::Index &rhs_index = rhs.index();
-    if (auto indexes = detect_type<FastValueIndex>(lhs_index, rhs_index)) {
-        const auto &lhs_fast = indexes.get<0>();
-        const auto &rhs_fast = indexes.get<1>();
-        return state.pop_pop_push(
-                FastValueIndex::sparse_no_overlap_join<LCT,RCT,OCT,Fun>
-                        (param.res_type, Fun(param.function),
-                         lhs_fast, rhs_fast,
-                         param.sparse_plan.sources,
-                         lhs_cells, rhs_cells, state.stash));
-    }
-    Fun fun(param.function);
-    SparseJoinState sparse(param.sparse_plan, lhs.index(), rhs.index());
-    auto guess = lhs.index().size() * rhs.index().size();
-    assert(param.dense_plan.out_size == 1);
-    auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), 1, guess);
-    auto outer = sparse.first_index.create_view({});
-    assert(sparse.second_view_dims.empty());
-    auto inner = sparse.second_index.create_view({});
-    outer->lookup({});
-    while (outer->next_result(sparse.first_address, sparse.first_subspace)) {
-        inner->lookup({});
-        while (inner->next_result(sparse.second_only_address, sparse.second_subspace)) {
-            auto cell_value = fun(lhs_cells[sparse.lhs_subspace], rhs_cells[sparse.rhs_subspace]);
-            builder->add_subspace(sparse.full_address)[0] = cell_value;
-        }
-    }
-    auto &result = state.stash.create<std::unique_ptr<Value>>(builder->build(std::move(builder)));
-    const Value &result_ref = *(result.get());
-    state.pop_pop_push(result_ref);
-};
-
-//-----------------------------------------------------------------------------
-
-template <typename LCT, typename RCT, typename OCT, typename Fun>
 void my_sparse_full_overlap_join_op(State &state, uint64_t param_in) {
     const auto &param = unwrap_param<JoinParam>(param_in);
     const Value &lhs = state.peek(1);
@@ -222,12 +180,6 @@ struct SelectGenericJoinOp {
         {
             return my_sparse_full_overlap_join_op<LCT,RCT,OCT,Fun>;
         }
-        if ((param.dense_plan.out_size == 1) &&
-            (param.sparse_plan.lhs_overlap.size() == 0) &&
-            (param.sparse_plan.rhs_overlap.size() == 0))
-        {
-            return my_sparse_no_overlap_join_op<LCT,RCT,OCT,Fun>;
-        }
         return my_mixed_join_op<LCT,RCT,OCT,Fun>;
     }
 };
diff --git a/eval/src/vespa/eval/instruction/generic_join.h b/eval/src/vespa/eval/instruction/generic_join.h
index 026a2938971..d55718a0ff1 100644
--- a/eval/src/vespa/eval/instruction/generic_join.h
+++ b/eval/src/vespa/eval/instruction/generic_join.h
@@ -16,6 +16,11 @@ using join_fun_t = operation::op2_t;
 
 //-----------------------------------------------------------------------------
 
+struct JoinParam;
+
+template <typename LCT, typename RCT, typename OCT, typename Fun>
+Value::UP generic_mixed_join(const Value &lhs, const Value &rhs, const JoinParam &param);
+
 struct GenericJoin {
     static InterpretedFunction::Instruction
     make_instruction(const ValueType &lhs_type, const ValueType &rhs_type,
diff --git a/eval/src/vespa/eval/instruction/sparse_no_overlap_join_function.cpp b/eval/src/vespa/eval/instruction/sparse_no_overlap_join_function.cpp
new file mode 100644
index 00000000000..a9f68c7314d
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/sparse_no_overlap_join_function.cpp
@@ -0,0 +1,137 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "sparse_no_overlap_join_function.h"
+#include "generic_join.h"
+#include <vespa/eval/eval/fast_value.hpp>
+#include <vespa/vespalib/util/typify.h>
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+using namespace operation;
+using namespace instruction;
+
+namespace {
+
+template <typename CT, typename Fun>
+const Value &my_fast_no_overlap_sparse_join(const FastAddrMap &lhs_map, const FastAddrMap &rhs_map,
+                                            const CT *lhs_cells, const CT *rhs_cells,
+                                            const JoinParam &param, Stash &stash)
+{
+    Fun fun(param.function);
+    const auto &addr_sources = param.sparse_plan.sources;
+    size_t num_mapped_dims = addr_sources.size();
+    auto &result = stash.create<FastValue<CT,true>>(param.res_type, num_mapped_dims, 1, lhs_map.size() * rhs_map.size());
+    std::vector<string_id> output_addr(num_mapped_dims);
+    std::vector<size_t> store_lhs_idx;
+    std::vector<size_t> store_rhs_idx;
+    size_t out_idx = 0;
+    for (auto source: addr_sources) {
+        switch (source) {
+        case SparseJoinPlan::Source::LHS:
+            store_lhs_idx.push_back(out_idx++);
+            break;
+        case SparseJoinPlan::Source::RHS:
+            store_rhs_idx.push_back(out_idx++);
+            break;
+        default: abort();
+        }
+    }
+    assert(out_idx == output_addr.size());
+    for (size_t lhs_subspace = 0; lhs_subspace < lhs_map.size(); ++lhs_subspace) {
+        auto l_addr = lhs_map.get_addr(lhs_subspace);
+        assert(l_addr.size() == store_lhs_idx.size());
+        for (size_t i = 0; i < store_lhs_idx.size(); ++i) {
+            size_t addr_idx = store_lhs_idx[i];
+            output_addr[addr_idx] = l_addr[i];
+        }
+        for (size_t rhs_subspace = 0; rhs_subspace < rhs_map.size(); ++rhs_subspace) {
+            auto r_addr = rhs_map.get_addr(rhs_subspace);
+            assert(r_addr.size() == store_rhs_idx.size());
+            for (size_t i = 0; i < store_rhs_idx.size(); ++i) {
+                size_t addr_idx = store_rhs_idx[i];
+                output_addr[addr_idx] = r_addr[i];
+            }
+            result.add_mapping(ConstArrayRef(output_addr));
+            CT cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
+            result.my_cells.push_back_fast(cell_value);
+        }
+    }
+    return result;
+}
+
+template <typename CT, typename Fun>
+void my_sparse_no_overlap_join_op(InterpretedFunction::State &state, uint64_t param_in) {
+    const auto &param = unwrap_param<JoinParam>(param_in);
+    const Value &lhs = state.peek(1);
+    const Value &rhs = state.peek(0);
+    const auto &lhs_idx = lhs.index();
+    const auto &rhs_idx = rhs.index();
+    if (__builtin_expect(are_fast(lhs_idx, rhs_idx), true)) {
+        const Value &res = my_fast_no_overlap_sparse_join<CT,Fun>(as_fast(lhs_idx).map, as_fast(rhs_idx).map,
+                lhs.cells().typify<CT>().cbegin(), rhs.cells().typify<CT>().cbegin(), param, state.stash);
+        state.pop_pop_push(res);
+    } else {
+        auto res = generic_mixed_join<CT,CT,CT,Fun>(lhs, rhs, param);
+        state.pop_pop_push(*state.stash.create<std::unique_ptr<Value>>(std::move(res)));
+    }
+}
+
+struct SelectSparseNoOverlapJoinOp {
+    template <typename CT, typename Fun>
+    static auto invoke() { return my_sparse_no_overlap_join_op<CT,Fun>; }
+};
+
+using MyTypify = TypifyValue<TypifyCellType,operation::TypifyOp2>;
+
+bool is_sparse_like(const ValueType &type) {
+    return ((type.count_mapped_dimensions() > 0) && (type.dense_subspace_size() == 1));
+}
+
+} // namespace <unnamed>
+
+SparseNoOverlapJoinFunction::SparseNoOverlapJoinFunction(const tensor_function::Join &original)
+    : tensor_function::Join(original.result_type(),
+                            original.lhs(),
+                            original.rhs(),
+                            original.function())
+{
+    assert(compatible_types(result_type(), lhs().result_type(), rhs().result_type()));
+}
+
+InterpretedFunction::Instruction
+SparseNoOverlapJoinFunction::compile_self(const ValueBuilderFactory &factory, Stash &stash) const
+{
+    const auto &param = stash.create<JoinParam>(lhs().result_type(), rhs().result_type(), function(), factory);
+    auto op = typify_invoke<2,MyTypify,SelectSparseNoOverlapJoinOp>(result_type().cell_type(), function());
+    return InterpretedFunction::Instruction(op, wrap_param<JoinParam>(param));
+}
+
+bool
+SparseNoOverlapJoinFunction::compatible_types(const ValueType &res, const ValueType &lhs, const ValueType &rhs)
+{
+    if ((lhs.cell_type() == rhs.cell_type()) &&
+        is_sparse_like(lhs) && is_sparse_like(rhs) &&
+        (res.count_mapped_dimensions() == (lhs.count_mapped_dimensions() + rhs.count_mapped_dimensions())))
+    {
+        assert(is_sparse_like(res));
+        assert(res.cell_type() == lhs.cell_type());
+        return true;
+    }
+    return false;
+}
+
+const TensorFunction &
+SparseNoOverlapJoinFunction::optimize(const TensorFunction &expr, Stash &stash)
+{
+    if (auto join = as<Join>(expr)) {
+        const TensorFunction &lhs = join->lhs();
+        const TensorFunction &rhs = join->rhs();
+        if (compatible_types(expr.result_type(), lhs.result_type(), rhs.result_type())) {
+            return stash.create<SparseNoOverlapJoinFunction>(*join);
+        }
+    }
+    return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/sparse_no_overlap_join_function.h b/eval/src/vespa/eval/instruction/sparse_no_overlap_join_function.h
new file mode 100644
index 00000000000..962e6dc1361
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/sparse_no_overlap_join_function.h
@@ -0,0 +1,23 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function for joining sparse tensors with no overlapping
+ * dimensions.
+ */
+class SparseNoOverlapJoinFunction : public tensor_function::Join
+{
+public:
+    SparseNoOverlapJoinFunction(const tensor_function::Join &original);
+    InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+    bool result_is_mutable() const override { return true; }
+    static bool compatible_types(const ValueType &res, const ValueType &lhs, const ValueType &rhs);
+    static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace