8 files changed, 347 insertions, 4 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 9a7bd07534c..b68440795d4 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -36,6 +36,7 @@ vespa_define_module(
     src/tests/tensor/dense_inplace_join_function
     src/tests/tensor/dense_matmul_function
     src/tests/tensor/dense_multi_matmul_function
+    src/tests/tensor/dense_number_join_function
     src/tests/tensor/dense_remove_dimension_optimizer
     src/tests/tensor/dense_replace_type_function
     src/tests/tensor/dense_simple_join_function
diff --git a/eval/src/tests/tensor/dense_inplace_join_function/dense_inplace_join_function_test.cpp b/eval/src/tests/tensor/dense_inplace_join_function/dense_inplace_join_function_test.cpp
index ba2b1ba0023..3ce2e041997 100644
--- a/eval/src/tests/tensor/dense_inplace_join_function/dense_inplace_join_function_test.cpp
+++ b/eval/src/tests/tensor/dense_inplace_join_function/dense_inplace_join_function_test.cpp
@@ -98,9 +98,9 @@ TEST("require that self-join operations can be optimized") {
     TEST_DO(verify_p0_optimized("mut_x5_A+mut_x5_A"));
 }
 
-TEST("require that join(tensor,scalar) operations are not optimized") {
-    TEST_DO(verify_not_optimized("mut_x5_A-mut_dbl_B"));
-    TEST_DO(verify_not_optimized("mut_dbl_A-mut_x5_B"));
+TEST("require that join(tensor,scalar) operations are optimized") {
+    TEST_DO(verify_p0_optimized("mut_x5_A-mut_dbl_B"));
+    TEST_DO(verify_p1_optimized("mut_dbl_A-mut_x5_B"));
 }
 
 TEST("require that join with different tensor shapes are optimized") {
@@ -118,7 +118,7 @@ TEST("require that non-mutable tensors are not optimized") {
     TEST_DO(verify_not_optimized("con_x5_A+con_x5_B"));
 }
 
-TEST("require that scalar values are not optimized") {
+TEST("require that scalar values are optimized") {
     TEST_DO(verify_not_optimized("mut_dbl_A+mut_dbl_B"));
     TEST_DO(verify_not_optimized("mut_dbl_A+5"));
     TEST_DO(verify_not_optimized("5+mut_dbl_B"));
diff --git a/eval/src/tests/tensor/dense_number_join_function/CMakeLists.txt b/eval/src/tests/tensor/dense_number_join_function/CMakeLists.txt
new file mode 100644
index 00000000000..73c544cab38
--- /dev/null
+++ b/eval/src/tests/tensor/dense_number_join_function/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_dense_number_join_function_test_app TEST
+    SOURCES
+    dense_number_join_function_test.cpp
+    DEPENDS
+    vespaeval
+)
+vespa_add_test(NAME eval_dense_number_join_function_test_app COMMAND eval_dense_number_join_function_test_app)
diff --git a/eval/src/tests/tensor/dense_number_join_function/dense_number_join_function_test.cpp b/eval/src/tests/tensor/dense_number_join_function/dense_number_join_function_test.cpp
new file mode 100644
index 00000000000..92a7661e359
--- /dev/null
+++ b/eval/src/tests/tensor/dense_number_join_function/dense_number_join_function_test.cpp
@@ -0,0 +1,119 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/simple_tensor.h>
+#include <vespa/eval/eval/simple_tensor_engine.h>
+#include <vespa/eval/tensor/default_tensor_engine.h>
+#include <vespa/eval/tensor/dense/dense_number_join_function.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+
+#include <vespa/vespalib/util/stringfmt.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+using namespace vespalib::tensor;
+using namespace vespalib::eval::tensor_function;
+
+using vespalib::make_string_short::fmt;
+
+using Primary = DenseNumberJoinFunction::Primary;
+
+namespace vespalib::tensor {
+
+std::ostream &operator<<(std::ostream &os, Primary primary)
+{
+    switch(primary) {
+    case Primary::LHS: return os << "LHS";
+    case Primary::RHS: return os << "RHS";
+    }
+    abort();
+}
+
+}
+
+const TensorEngine &prod_engine = DefaultTensorEngine::ref();
+
+EvalFixture::ParamRepo make_params() {
+    return EvalFixture::ParamRepo()
+        .add("a", spec(1.5))
+        .add("number", spec(2.5))        
+        .add("sparse", spec({x({"a"})}, N()))
+        .add("dense", spec({y(5)}, N()))
+        .add("mixed", spec({x({"a"}),y(5)}, N()))
+        .add_matrix("x", 3, "y", 5);
+}
+EvalFixture::ParamRepo param_repo = make_params();
+
+void verify_optimized(const vespalib::string &expr, Primary primary, bool inplace) {
+    EvalFixture slow_fixture(prod_engine, expr, param_repo, false);
+    EvalFixture fixture(prod_engine, expr, param_repo, true, true);
+    EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQUAL(fixture.result(), slow_fixture.result());
+    auto info = fixture.find_all<DenseNumberJoinFunction>();
+    ASSERT_EQUAL(info.size(), 1u);
+    EXPECT_TRUE(info[0]->result_is_mutable());
+    EXPECT_EQUAL(info[0]->primary(), primary);
+    EXPECT_EQUAL(info[0]->inplace(), inplace);
+    int p_inplace = inplace ? ((primary == Primary::LHS) ? 0 : 1) : -1;
+    EXPECT_TRUE((p_inplace == -1) || (fixture.num_params() > size_t(p_inplace)));
+    for (size_t i = 0; i < fixture.num_params(); ++i) {
+        if (i == size_t(p_inplace)) {
+            EXPECT_EQUAL(fixture.get_param(i), fixture.result());
+        } else {
+            EXPECT_NOT_EQUAL(fixture.get_param(i), fixture.result());
+        }
+    }
+}
+
+void verify_not_optimized(const vespalib::string &expr) {
+    EvalFixture slow_fixture(prod_engine, expr, param_repo, false);
+    EvalFixture fixture(prod_engine, expr, param_repo, true);
+    EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQUAL(fixture.result(), slow_fixture.result());
+    auto info = fixture.find_all<DenseNumberJoinFunction>();
+    EXPECT_TRUE(info.empty());
+}
+
+TEST("require dense number join can be optimized") {
+    TEST_DO(verify_optimized("x3y5+a", Primary::LHS, false));
+    TEST_DO(verify_optimized("a+x3y5", Primary::RHS, false));
+    TEST_DO(verify_optimized("x3y5f*a", Primary::LHS, false));
+    TEST_DO(verify_optimized("a*x3y5f", Primary::RHS, false));
+}
+
+TEST("require dense number join can be inplace") {
+    TEST_DO(verify_optimized("@x3y5*a", Primary::LHS, true));
+    TEST_DO(verify_optimized("a*@x3y5", Primary::RHS, true));
+    TEST_DO(verify_optimized("@x3y5f+a", Primary::LHS, true));
+    TEST_DO(verify_optimized("a+@x3y5f", Primary::RHS, true));
+}
+
+TEST("require that asymmetric operations work") {
+    TEST_DO(verify_optimized("x3y5/a", Primary::LHS, false));
+    TEST_DO(verify_optimized("a/x3y5", Primary::RHS, false));
+    TEST_DO(verify_optimized("x3y5f-a", Primary::LHS, false));
+    TEST_DO(verify_optimized("a-x3y5f", Primary::RHS, false));
+}
+
+TEST("require that inappropriate cases are not optimized") {
+    int optimized = 0;
+    for (vespalib::string lhs: {"number", "dense", "sparse", "mixed"}) {
+        for (vespalib::string rhs: {"number", "dense", "sparse", "mixed"}) {
+            if (((lhs == "number") && (rhs == "dense")) ||
+                ((lhs == "dense") && (rhs == "number")))
+            {
+                ++optimized;
+            } else {
+                auto expr = fmt("%s+%s", lhs.c_str(), rhs.c_str());
+                TEST_STATE(expr.c_str());
+                verify_not_optimized(expr);
+            }
+        }
+    }
+    EXPECT_EQUAL(optimized, 2);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
index d374848af64..d9fcbaa3e2a 100644
--- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
+++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
@@ -19,6 +19,7 @@
 #include "dense/dense_lambda_peek_optimizer.h"
 #include "dense/dense_lambda_function.h"
 #include "dense/dense_simple_join_function.h"
+#include "dense/dense_number_join_function.h"
 #include "dense/dense_simple_map_function.h"
 #include "dense/vector_from_doubles_function.h"
 #include "dense/dense_tensor_create_function.h"
@@ -294,6 +295,7 @@ DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const
             child.set(DenseFastRenameOptimizer::optimize(child.get(), stash));
             child.set(DenseSimpleMapFunction::optimize(child.get(), stash));
             child.set(DenseSimpleJoinFunction::optimize(child.get(), stash));
+            child.set(DenseNumberJoinFunction::optimize(child.get(), stash));
             child.set(DenseSingleReduceFunction::optimize(child.get(), stash));
             nodes.pop_back();
         }
diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
index 7ababbee228..244e288b90a 100644
--- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
+++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
@@ -11,6 +11,7 @@ vespa_add_library(eval_tensor_dense OBJECT
     dense_lambda_peek_optimizer.cpp
     dense_matmul_function.cpp
     dense_multi_matmul_function.cpp
+    dense_number_join_function.cpp
     dense_remove_dimension_optimizer.cpp
     dense_replace_type_function.cpp
     dense_simple_join_function.cpp
diff --git a/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp
new file mode 100644
index 00000000000..3f48607cef4
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp
@@ -0,0 +1,179 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "dense_number_join_function.h"
+#include "dense_tensor_view.h"
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/operation.h>
+
+namespace vespalib::tensor {
+
+using vespalib::ArrayRef;
+
+using eval::Value;
+using eval::ValueType;
+using eval::TensorFunction;
+using eval::TensorEngine;
+using eval::as;
+
+using namespace eval::operation;
+using namespace eval::tensor_function;
+
+using Primary = DenseNumberJoinFunction::Primary;
+
+using op_function = eval::InterpretedFunction::op_function;
+using Instruction = eval::InterpretedFunction::Instruction;
+using State = eval::InterpretedFunction::State;
+
+namespace {
+
+struct CallFun {
+    join_fun_t function;
+    CallFun(join_fun_t function_in) : function(function_in) {}
+    double eval(double a, double b) const { return function(a, b); }
+};
+
+struct AddFun {
+    AddFun(join_fun_t) {}
+    template <typename A, typename B>
+    auto eval(A a, B b) const { return (a + b); }
+};
+
+struct MulFun {
+    MulFun(join_fun_t) {}
+    template <typename A, typename B>
+    auto eval(A a, B b) const { return (a * b); }
+};
+
+// needed for asymmetric operations like Sub and Div
+template <typename Fun>
+struct SwapFun {
+    Fun fun;
+    SwapFun(join_fun_t function_in) : fun(function_in) {}
+    template <typename A, typename B>
+    auto eval(A a, B b) const { return fun.eval(b, a); }
+};
+
+template <typename CT, typename Fun>
+void apply_fun_1_to_n(CT *dst, const CT *pri, CT sec, size_t n, const Fun &fun) {
+    for (size_t i = 0; i < n; ++i) {
+        dst[i] = fun.eval(pri[i], sec);
+    }
+}
+
+template <typename CT, bool inplace>
+ArrayRef<CT> make_dst_cells(ConstArrayRef<CT> src_cells, Stash &stash) {
+    if (inplace) {
+        return unconstify(src_cells);
+    } else {
+        return stash.create_array<CT>(src_cells.size());
+    }
+}
+
+template <typename CT, typename Fun, bool inplace, bool swap>
+void my_number_join_op(State &state, uint64_t param) {
+    using OP = typename std::conditional<swap,SwapFun<Fun>,Fun>::type;
+    OP my_op((join_fun_t)param);
+    const Value &tensor = state.peek(swap ? 0 : 1);
+    CT number = state.peek(swap ? 1 : 0).as_double();
+    auto src_cells = DenseTensorView::typify_cells<CT>(tensor);
+    auto dst_cells = make_dst_cells<CT, inplace>(src_cells, state.stash);
+    apply_fun_1_to_n(dst_cells.begin(), src_cells.begin(), number, dst_cells.size(), my_op);
+    if (inplace) {
+        state.pop_pop_push(tensor);
+    } else {
+        state.pop_pop_push(state.stash.create<DenseTensorView>(tensor.type(), TypedCells(dst_cells)));
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+template <typename Fun, bool inplace, bool swap>
+struct MyNumberJoinOp {
+    template <typename CT>
+    static auto get_fun() { return my_number_join_op<CT,Fun,inplace,swap>; }
+};
+
+template <typename Fun, bool inplace>
+op_function my_select_3(ValueType::CellType ct, Primary primary) {
+    switch (primary) {
+    case Primary::LHS: return select_1<MyNumberJoinOp<Fun,inplace,false>>(ct);
+    case Primary::RHS: return select_1<MyNumberJoinOp<Fun,inplace,true>>(ct);
+    }
+    abort();
+}
+
+template <typename Fun>
+op_function my_select_2(ValueType::CellType ct, Primary primary, bool inplace) {
+    if (inplace) {
+        return my_select_3<Fun, true>(ct, primary);
+    } else {
+        return my_select_3<Fun, false>(ct, primary);
+    }
+}
+
+op_function my_select(ValueType::CellType ct, Primary primary, bool inplace, join_fun_t fun_hint) {
+    if (fun_hint == Add::f) {
+        return my_select_2<AddFun>(ct, primary, inplace);
+    } else if (fun_hint == Mul::f) {
+        return my_select_2<MulFun>(ct, primary, inplace);
+    } else {
+        return my_select_2<CallFun>(ct, primary, inplace);
+    }
+}
+
+bool is_dense(const TensorFunction &tf) { return tf.result_type().is_dense(); }
+bool is_double(const TensorFunction &tf) { return tf.result_type().is_double(); }
+ValueType::CellType cell_type(const TensorFunction &tf) { return tf.result_type().cell_type(); }
+
+} // namespace vespalib::tensor::<unnamed>
+
+//-----------------------------------------------------------------------------
+
+DenseNumberJoinFunction::DenseNumberJoinFunction(const ValueType &result_type,
+                                                 const TensorFunction &lhs,
+                                                 const TensorFunction &rhs,
+                                                 join_fun_t function_in,
+                                                 Primary primary_in)
+    : Join(result_type, lhs, rhs, function_in),
+      _primary(primary_in)
+{
+}
+
+DenseNumberJoinFunction::~DenseNumberJoinFunction() = default;
+
+bool
+DenseNumberJoinFunction::inplace() const
+{
+    if (_primary == Primary::LHS) {
+        return lhs().result_is_mutable();
+    } else {
+        return rhs().result_is_mutable();
+    }
+}
+
+Instruction
+DenseNumberJoinFunction::compile_self(const TensorEngine &, Stash &) const
+{
+    auto op = my_select(result_type().cell_type(), _primary, inplace(), function());
+    static_assert(sizeof(uint64_t) == sizeof(function()));
+    return Instruction(op, (uint64_t)(function()));
+}
+
+const TensorFunction &
+DenseNumberJoinFunction::optimize(const TensorFunction &expr, Stash &stash)
+{
+    if (auto join = as<Join>(expr)) {
+        const TensorFunction &lhs = join->lhs();
+        const TensorFunction &rhs = join->rhs();
+        if (is_dense(lhs) && is_double(rhs)) {
+            assert(cell_type(expr) == cell_type(lhs));
+            return stash.create<DenseNumberJoinFunction>(join->result_type(), lhs, rhs, join->function(), Primary::LHS);
+        } else if (is_double(lhs) && is_dense(rhs)) {
+            assert(cell_type(expr) == cell_type(rhs));
+            return stash.create<DenseNumberJoinFunction>(join->result_type(), lhs, rhs, join->function(), Primary::RHS);
+        }
+    }
+    return expr;
+}
+
+} // namespace vespalib::tensor
diff --git a/eval/src/vespa/eval/tensor/dense/dense_number_join_function.h b/eval/src/vespa/eval/tensor/dense/dense_number_join_function.h
new file mode 100644
index 00000000000..1a9e92b860f
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/dense_number_join_function.h
@@ -0,0 +1,33 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::tensor {
+
+/**
+ * Tensor function for join operations between dense tensors and
+ * numbers.
+ **/
+class DenseNumberJoinFunction : public eval::tensor_function::Join
+{
+public:
+    enum class Primary : uint8_t { LHS, RHS };
+    using join_fun_t = ::vespalib::eval::tensor_function::join_fun_t;
+private:
+    Primary _primary;
+public:
+    DenseNumberJoinFunction(const eval::ValueType &result_type,
+                            const TensorFunction &lhs,
+                            const TensorFunction &rhs,
+                            join_fun_t function_in,
+                            Primary primary_in);
+    ~DenseNumberJoinFunction() override;
+    Primary primary() const { return _primary; }
+    bool inplace() const;
+    eval::InterpretedFunction::Instruction compile_self(const eval::TensorEngine &engine, Stash &stash) const override;
+    static const eval::TensorFunction &optimize(const eval::TensorFunction &expr, Stash &stash);
+};
+
+} // namespace vespalib::tensor