7 files changed, 305 insertions, 56 deletions
diff --git a/eval/src/tests/eval/tensor_lambda/tensor_lambda_test.cpp b/eval/src/tests/eval/tensor_lambda/tensor_lambda_test.cpp
index 3c35f90c521..c8091fd7c6e 100644
--- a/eval/src/tests/eval/tensor_lambda/tensor_lambda_test.cpp
+++ b/eval/src/tests/eval/tensor_lambda/tensor_lambda_test.cpp
@@ -8,6 +8,7 @@
 #include <vespa/eval/tensor/dense/dense_replace_type_function.h>
 #include <vespa/eval/tensor/dense/dense_cell_range_function.h>
 #include <vespa/eval/tensor/dense/dense_lambda_peek_function.h>
+#include <vespa/eval/tensor/dense/dense_lambda_function.h>
 #include <vespa/eval/tensor/dense/dense_fast_rename_optimizer.h>
 #include <vespa/eval/tensor/dense/dense_tensor.h>
 #include <vespa/eval/eval/test/tensor_model.hpp>
@@ -23,11 +24,27 @@ using namespace vespalib::eval::test;
 using namespace vespalib::tensor;
 using namespace vespalib::eval::tensor_function;
 
+using EvalMode = DenseLambdaFunction::EvalMode;
+
+namespace vespalib::tensor {
+
+std::ostream &operator<<(std::ostream &os, EvalMode eval_mode)
+{
+    switch(eval_mode) {
+    case EvalMode::COMPILED:    return os << "COMPILED";
+    case EvalMode::INTERPRETED: return os << "INTERPRETED";
+    }
+    abort();
+}
+
+}
+
 const TensorEngine &prod_engine = DefaultTensorEngine::ref();
 
 EvalFixture::ParamRepo make_params() {
     return EvalFixture::ParamRepo()
         .add("a", spec(1))
+        .add("b", spec(2))
         .add("x3", spec({x(3)}, N()))
         .add("x3f", spec(float_cells({x(3)}), N()))
         .add("x3m", spec({x({"0", "1", "2"})}, N()))
@@ -55,8 +72,14 @@ void verify_impl(const vespalib::string &expr, const vespalib::string &expect) {
     verify_impl<T>(expr, expect, [](const T*){});
 }
 
-void verify_generic(const vespalib::string &expr, const vespalib::string &expect) {
-    verify_impl<Lambda>(expr, expect);
+void verify_generic(const vespalib::string &expr, const vespalib::string &expect,
+                    EvalMode expect_eval_mode)
+{
+    verify_impl<DenseLambdaFunction>(expr, expect,
+                                     [&](const DenseLambdaFunction *info)
+                                     {
+                                         EXPECT_EQUAL(info->eval_mode(), expect_eval_mode);
+                                     });
 }
 
 void verify_reshape(const vespalib::string &expr, const vespalib::string &expect) {
@@ -67,8 +90,8 @@ void verify_range(const vespalib::string &expr, const vespalib::string &expect)
     verify_impl<DenseCellRangeFunction>(expr, expect);
 }
 
-void verify_compiled(const vespalib::string &expr, const vespalib::string &expect,
-                     const vespalib::string &expect_idx_fun)
+void verify_idx_fun(const vespalib::string &expr, const vespalib::string &expect,
+                    const vespalib::string &expect_idx_fun)
 {
     verify_impl<DenseLambdaPeekFunction>(expr, expect,
                                          [&](const DenseLambdaPeekFunction *info)
@@ -88,22 +111,32 @@ TEST("require that simple constant tensor lambda works") {
 }
 
 TEST("require that simple dynamic tensor lambda works") {
-    TEST_DO(verify_generic("tensor(x[3])(x+a)", "tensor(x[3]):[1,2,3]"));
+    TEST_DO(verify_generic("tensor(x[3])(x+a)", "tensor(x[3]):[1,2,3]", EvalMode::COMPILED));
+}
+
+TEST("require that compiled multi-dimensional multi-param dynamic tensor lambda works") {
+    TEST_DO(verify_generic("tensor(x[3],y[2])((b-a)+x+y)", "tensor(x[3],y[2]):[[1,2],[2,3],[3,4]]", EvalMode::COMPILED));
+    TEST_DO(verify_generic("tensor<float>(x[3],y[2])((b-a)+x+y)", "tensor<float>(x[3],y[2]):[[1,2],[2,3],[3,4]]", EvalMode::COMPILED));
+}
+
+TEST("require that interpreted multi-dimensional multi-param dynamic tensor lambda works") {
+    TEST_DO(verify_generic("tensor(x[3],y[2])((x3{x:(a)}-a)+x+y)", "tensor(x[3],y[2]):[[1,2],[2,3],[3,4]]", EvalMode::INTERPRETED));
+    TEST_DO(verify_generic("tensor<float>(x[3],y[2])((x3{x:(a)}-a)+x+y)", "tensor<float>(x[3],y[2]):[[1,2],[2,3],[3,4]]", EvalMode::INTERPRETED));
 }
 
 TEST("require that tensor lambda can be used for tensor slicing") {
-    TEST_DO(verify_generic("tensor(x[2])(x3{x:(x+a)})", "tensor(x[2]):[2,3]"));
-    TEST_DO(verify_generic("tensor(x[2])(a+x3{x:(x)})", "tensor(x[2]):[2,3]"));
+    TEST_DO(verify_generic("tensor(x[2])(x3{x:(x+a)})", "tensor(x[2]):[2,3]", EvalMode::INTERPRETED));
+    TEST_DO(verify_generic("tensor(x[2])(a+x3{x:(x)})", "tensor(x[2]):[2,3]", EvalMode::INTERPRETED));
 }
 
 TEST("require that tensor lambda can be used for cell type casting") {
-    TEST_DO(verify_compiled("tensor(x[3])(x3f{x:(x)})", "tensor(x[3]):[1,2,3]", "f(x)(x)"));
-    TEST_DO(verify_compiled("tensor<float>(x[3])(x3{x:(x)})", "tensor<float>(x[3]):[1,2,3]", "f(x)(x)"));
+    TEST_DO(verify_idx_fun("tensor(x[3])(x3f{x:(x)})", "tensor(x[3]):[1,2,3]", "f(x)(x)"));
+    TEST_DO(verify_idx_fun("tensor<float>(x[3])(x3{x:(x)})", "tensor<float>(x[3]):[1,2,3]", "f(x)(x)"));
 }
 
 TEST("require that tensor lambda can be used to convert from sparse to dense tensors") {
-    TEST_DO(verify_generic("tensor(x[3])(x3m{x:(x)})", "tensor(x[3]):[1,2,3]"));
-    TEST_DO(verify_generic("tensor(x[2])(x3m{x:(x)})", "tensor(x[2]):[1,2]"));
+    TEST_DO(verify_generic("tensor(x[3])(x3m{x:(x)})", "tensor(x[3]):[1,2,3]", EvalMode::INTERPRETED));
+    TEST_DO(verify_generic("tensor(x[2])(x3m{x:(x)})", "tensor(x[2]):[1,2]", EvalMode::INTERPRETED));
 }
 
 TEST("require that constant nested tensor lambda using tensor peek works") {
@@ -111,7 +144,7 @@ TEST("require that constant nested tensor lambda using tensor peek works") {
 }
 
 TEST("require that dynamic nested tensor lambda using tensor peek works") {
-    TEST_DO(verify_generic("tensor(x[2])(tensor(y[2])((x+y)+a){y:(x)})", "tensor(x[2]):[1,3]"));
+    TEST_DO(verify_generic("tensor(x[2])(tensor(y[2])((x+y)+a){y:(x)})", "tensor(x[2]):[1,3]", EvalMode::INTERPRETED));
 }
 
 TEST("require that tensor reshape is optimized") {
@@ -121,10 +154,10 @@ TEST("require that tensor reshape is optimized") {
 }
 
 TEST("require that tensor reshape with non-matching cell type requires cell copy") {
-    TEST_DO(verify_compiled("tensor(x[15])(x3y5f{x:(x/5),y:(x%5)})", "x15", "f(x)((floor((x/5))*5)+(x%5))"));
-    TEST_DO(verify_compiled("tensor<float>(x[15])(x3y5{x:(x/5),y:(x%5)})", "x15f", "f(x)((floor((x/5))*5)+(x%5))"));
-    TEST_DO(verify_compiled("tensor(x[3],y[5])(x15f{x:(x*5+y)})", "x3y5", "f(x,y)((x*5)+y)"));
-    TEST_DO(verify_compiled("tensor<float>(x[3],y[5])(x15{x:(x*5+y)})", "x3y5f", "f(x,y)((x*5)+y)"));
+    TEST_DO(verify_idx_fun("tensor(x[15])(x3y5f{x:(x/5),y:(x%5)})", "x15", "f(x)((floor((x/5))*5)+(x%5))"));
+    TEST_DO(verify_idx_fun("tensor<float>(x[15])(x3y5{x:(x/5),y:(x%5)})", "x15f", "f(x)((floor((x/5))*5)+(x%5))"));
+    TEST_DO(verify_idx_fun("tensor(x[3],y[5])(x15f{x:(x*5+y)})", "x3y5", "f(x,y)((x*5)+y)"));
+    TEST_DO(verify_idx_fun("tensor<float>(x[3],y[5])(x15{x:(x*5+y)})", "x3y5f", "f(x,y)((x*5)+y)"));
 }
 
 TEST("require that tensor cell subrange view is optimized") {
@@ -135,22 +168,22 @@ TEST("require that tensor cell subrange view is optimized") {
 }
 
 TEST("require that tensor cell subrange with non-matching cell type requires cell copy") {
-    TEST_DO(verify_compiled("tensor(x[3])(x15f{x:(x+5)})", "tensor(x[3]):[6,7,8]", "f(x)(x+5)"));
-    TEST_DO(verify_compiled("tensor<float>(x[3])(x15{x:(x+5)})", "tensor<float>(x[3]):[6,7,8]", "f(x)(x+5)"));
+    TEST_DO(verify_idx_fun("tensor(x[3])(x15f{x:(x+5)})", "tensor(x[3]):[6,7,8]", "f(x)(x+5)"));
+    TEST_DO(verify_idx_fun("tensor<float>(x[3])(x15{x:(x+5)})", "tensor<float>(x[3]):[6,7,8]", "f(x)(x+5)"));
 }
 
 TEST("require that non-continuous cell extraction is optimized") {
-    TEST_DO(verify_compiled("tensor(x[3])(x3y5{x:(x),y:2})", "x3y5{y:2}", "f(x)((floor(x)*5)+2)"));
-    TEST_DO(verify_compiled("tensor(x[3])(x3y5f{x:(x),y:2})", "x3y5{y:2}", "f(x)((floor(x)*5)+2)"));
-    TEST_DO(verify_compiled("tensor<float>(x[3])(x3y5{x:(x),y:2})", "x3y5f{y:2}", "f(x)((floor(x)*5)+2)"));
-    TEST_DO(verify_compiled("tensor<float>(x[3])(x3y5f{x:(x),y:2})", "x3y5f{y:2}", "f(x)((floor(x)*5)+2)"));
+    TEST_DO(verify_idx_fun("tensor(x[3])(x3y5{x:(x),y:2})", "x3y5{y:2}", "f(x)((floor(x)*5)+2)"));
+    TEST_DO(verify_idx_fun("tensor(x[3])(x3y5f{x:(x),y:2})", "x3y5{y:2}", "f(x)((floor(x)*5)+2)"));
+    TEST_DO(verify_idx_fun("tensor<float>(x[3])(x3y5{x:(x),y:2})", "x3y5f{y:2}", "f(x)((floor(x)*5)+2)"));
+    TEST_DO(verify_idx_fun("tensor<float>(x[3])(x3y5f{x:(x),y:2})", "x3y5f{y:2}", "f(x)((floor(x)*5)+2)"));
 }
 
 TEST("require that out-of-bounds cell extraction is not optimized") {
-    TEST_DO(verify_generic("tensor(x[3])(x3y5{x:1,y:(x+3)})", "tensor(x[3]):[9,10,0]"));
-    TEST_DO(verify_generic("tensor(x[3])(x3y5{x:1,y:(x-1)})", "tensor(x[3]):[0,6,7]"));
-    TEST_DO(verify_generic("tensor(x[3])(x3y5{x:(x+1),y:(x)})", "tensor(x[3]):[6,12,0]"));
-    TEST_DO(verify_generic("tensor(x[3])(x3y5{x:(x-1),y:(x)})", "tensor(x[3]):[0,2,8]"));
+    TEST_DO(verify_generic("tensor(x[3])(x3y5{x:1,y:(x+3)})", "tensor(x[3]):[9,10,0]", EvalMode::INTERPRETED));
+    TEST_DO(verify_generic("tensor(x[3])(x3y5{x:1,y:(x-1)})", "tensor(x[3]):[0,6,7]", EvalMode::INTERPRETED));
+    TEST_DO(verify_generic("tensor(x[3])(x3y5{x:(x+1),y:(x)})", "tensor(x[3]):[6,12,0]", EvalMode::INTERPRETED));
+    TEST_DO(verify_generic("tensor(x[3])(x3y5{x:(x-1),y:(x)})", "tensor(x[3]):[0,2,8]", EvalMode::INTERPRETED));
 }
 
 TEST("require that non-double result from inner tensor lambda function fails type resolving") {
diff --git a/eval/src/vespa/eval/eval/tensor_function.cpp b/eval/src/vespa/eval/eval/tensor_function.cpp
index 2656e240a5b..1aa18417b87 100644
--- a/eval/src/vespa/eval/eval/tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/tensor_function.cpp
@@ -134,9 +134,16 @@ void op_tensor_create(State &state, uint64_t param) {
     state.pop_n_push(i, result);
 }
 
+struct LambdaParams {
+    const Lambda &parent;
+    InterpretedFunction fun;
+    LambdaParams(const Lambda &parent_in, InterpretedFunction fun_in)
+        : parent(parent_in), fun(std::move(fun_in)) {}
+};
+
 void op_tensor_lambda(State &state, uint64_t param) {
-    const Lambda::Self &self = unwrap_param<Lambda::Self>(param);
-    TensorSpec spec = self.parent.create_spec(*state.params, self.fun);
+    const LambdaParams &params = unwrap_param<LambdaParams>(param);
+    TensorSpec spec = params.parent.create_spec(*state.params, params.fun);
     const Value &result = *state.stash.create<Value::UP>(state.engine.from_spec(spec));
     state.stack.emplace_back(result);
 }
@@ -439,13 +446,8 @@ InterpretedFunction::Instruction
 Lambda::compile_self(const TensorEngine &engine, Stash &stash) const
 {
     InterpretedFunction fun(engine, _lambda->root(), _lambda_types);
-    Self &self = stash.create<Self>(*this, std::move(fun));
-    return Instruction(op_tensor_lambda, wrap_param<Self>(self));
-}
-
-void
-Lambda::push_children(std::vector<Child::CREF> &) const
-{
+    LambdaParams &params = stash.create<LambdaParams>(*this, std::move(fun));
+    return Instruction(op_tensor_lambda, wrap_param<LambdaParams>(params));
 }
 
 void
diff --git a/eval/src/vespa/eval/eval/tensor_function.h b/eval/src/vespa/eval/eval/tensor_function.h
index e1961079017..6743f37eeb1 100644
--- a/eval/src/vespa/eval/eval/tensor_function.h
+++ b/eval/src/vespa/eval/eval/tensor_function.h
@@ -183,7 +183,7 @@ class ConstValue : public Leaf
 private:
     const Value &_value;
 public:
-    ConstValue(const Value &value_in) : Leaf(value_in.type()), _value(value_in) {}
+    ConstValue(const Value &value_in) : Super(value_in.type()), _value(value_in) {}
     const Value &value() const { return _value; }
     bool result_is_mutable() const override { return false; }
     InterpretedFunction::Instruction compile_self(const TensorEngine &engine, Stash &stash) const final override;
@@ -199,7 +199,7 @@ private:
     size_t _param_idx;
 public:
     Inject(const ValueType &result_type_in, size_t param_idx_in)
-        : Leaf(result_type_in), _param_idx(param_idx_in) {}
+        : Super(result_type_in), _param_idx(param_idx_in) {}
     size_t param_idx() const { return _param_idx; }
     bool result_is_mutable() const override { return false; }
     InterpretedFunction::Instruction compile_self(const TensorEngine &engine, Stash &stash) const final override;
@@ -219,7 +219,7 @@ public:
            const TensorFunction &child_in,
            Aggr aggr_in,
            const std::vector<vespalib::string> &dimensions_in)
-        : Op1(result_type_in, child_in), _aggr(aggr_in), _dimensions(dimensions_in) {}
+        : Super(result_type_in, child_in), _aggr(aggr_in), _dimensions(dimensions_in) {}
     Aggr aggr() const { return _aggr; }
     const std::vector<vespalib::string> &dimensions() const { return _dimensions; }
     bool result_is_mutable() const override { return true; }
@@ -238,7 +238,7 @@ public:
     Map(const ValueType &result_type_in,
         const TensorFunction &child_in,
         map_fun_t function_in)
-        : Op1(result_type_in, child_in), _function(function_in) {}
+        : Super(result_type_in, child_in), _function(function_in) {}
     map_fun_t function() const { return _function; }
     bool result_is_mutable() const override { return true; }
     InterpretedFunction::Instruction compile_self(const TensorEngine &engine, Stash &stash) const override;
@@ -257,7 +257,7 @@ public:
          const TensorFunction &lhs_in,
          const TensorFunction &rhs_in,
          join_fun_t function_in)
-        : Op2(result_type_in, lhs_in, rhs_in), _function(function_in) {}
+        : Super(result_type_in, lhs_in, rhs_in), _function(function_in) {}
     join_fun_t function() const { return _function; }
     bool result_is_mutable() const override { return true; }
     InterpretedFunction::Instruction compile_self(const TensorEngine &engine, Stash &stash) const override;
@@ -276,7 +276,7 @@ public:
           const TensorFunction &lhs_in,
           const TensorFunction &rhs_in,
           join_fun_t function_in)
-        : Op2(result_type_in, lhs_in, rhs_in), _function(function_in) {}
+        : Super(result_type_in, lhs_in, rhs_in), _function(function_in) {}
     join_fun_t function() const { return _function; }
     bool result_is_mutable() const override { return true; }
     InterpretedFunction::Instruction compile_self(const TensorEngine &engine, Stash &stash) const override;
@@ -295,7 +295,7 @@ public:
            const TensorFunction &lhs_in,
            const TensorFunction &rhs_in,
            const vespalib::string &dimension_in)
-        : Op2(result_type_in, lhs_in, rhs_in), _dimension(dimension_in) {}
+        : Super(result_type_in, lhs_in, rhs_in), _dimension(dimension_in) {}
     const vespalib::string &dimension() const { return _dimension; }
     bool result_is_mutable() const override { return true; }
     InterpretedFunction::Instruction compile_self(const TensorEngine &engine, Stash &stash) const final override;
@@ -311,7 +311,7 @@ private:
     std::map<TensorSpec::Address, Child> _spec;
 public:
     Create(const ValueType &result_type_in, const std::map<TensorSpec::Address, Node::CREF> &spec_in)
-        : Node(result_type_in), _spec()
+        : Super(result_type_in), _spec()
     {
         for (const auto &cell: spec_in) {
             _spec.emplace(cell.first, Child(cell.second));
@@ -326,23 +326,16 @@ public:
 
 //-----------------------------------------------------------------------------
 
-class Lambda : public Node
+class Lambda : public Leaf
 {
-    using Super = Node;
-public:
-    struct Self {
-        const Lambda &parent;
-        InterpretedFunction fun;
-        Self(const Lambda &parent_in, InterpretedFunction fun_in)
-            : parent(parent_in), fun(std::move(fun_in)) {}
-    };
+    using Super = Leaf;
 private:
     std::vector<size_t> _bindings;
     std::shared_ptr<Function const> _lambda;
     NodeTypes _lambda_types;
 public:
     Lambda(const ValueType &result_type_in, const std::vector<size_t> &bindings_in, const Function &lambda_in, NodeTypes lambda_types_in)
-        : Node(result_type_in), _bindings(bindings_in), _lambda(lambda_in.shared_from_this()), _lambda_types(std::move(lambda_types_in)) {}
+        : Super(result_type_in), _bindings(bindings_in), _lambda(lambda_in.shared_from_this()), _lambda_types(std::move(lambda_types_in)) {}
     const std::vector<size_t> &bindings() const { return _bindings; }
     const Function &lambda() const { return *_lambda; }
     const NodeTypes &types() const { return _lambda_types; }
@@ -352,7 +345,6 @@ public:
     }
     bool result_is_mutable() const override { return true; }
     InterpretedFunction::Instruction compile_self(const TensorEngine &engine, Stash &stash) const final override;
-    void push_children(std::vector<Child::CREF> &children) const final override;
     void visit_self(vespalib::ObjectVisitor &visitor) const override;
 };
 
@@ -369,7 +361,7 @@ private:
 public:
     Peek(const ValueType &result_type_in, const Node &param,
          const std::map<vespalib::string, std::variant<TensorSpec::Label, Node::CREF>> &spec)
-        : Node(result_type_in), _param(param), _spec()
+        : Super(result_type_in), _param(param), _spec()
     {
         for (const auto &dim: spec) {
             std::visit(vespalib::overload
@@ -404,7 +396,7 @@ public:
            const TensorFunction &child_in,
            const std::vector<vespalib::string> &from_in,
            const std::vector<vespalib::string> &to_in)
-        : Op1(result_type_in, child_in), _from(from_in), _to(to_in) {}
+        : Super(result_type_in, child_in), _from(from_in), _to(to_in) {}
     const std::vector<vespalib::string> &from() const { return _from; }
     const std::vector<vespalib::string> &to() const { return _to; }
     bool result_is_mutable() const override { return true; }
diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
index d28e420e6f2..d374848af64 100644
--- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
+++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
@@ -17,6 +17,7 @@
 #include "dense/dense_single_reduce_function.h"
 #include "dense/dense_remove_dimension_optimizer.h"
 #include "dense/dense_lambda_peek_optimizer.h"
+#include "dense/dense_lambda_function.h"
 #include "dense/dense_simple_join_function.h"
 #include "dense/dense_simple_map_function.h"
 #include "dense/vector_from_doubles_function.h"
@@ -289,6 +290,7 @@ DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const
             child.set(DenseTensorCreateFunction::optimize(child.get(), stash));
             child.set(DenseTensorPeekFunction::optimize(child.get(), stash));
             child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash));
+            child.set(DenseLambdaFunction::optimize(child.get(), stash));
             child.set(DenseFastRenameOptimizer::optimize(child.get(), stash));
             child.set(DenseSimpleMapFunction::optimize(child.get(), stash));
             child.set(DenseSimpleJoinFunction::optimize(child.get(), stash));
diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
index e0e73ecb3b8..7ababbee228 100644
--- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
+++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
@@ -6,6 +6,7 @@ vespa_add_library(eval_tensor_dense OBJECT
     dense_dimension_combiner.cpp
     dense_dot_product_function.cpp
     dense_fast_rename_optimizer.cpp
+    dense_lambda_function.cpp
     dense_lambda_peek_function.cpp
     dense_lambda_peek_optimizer.cpp
     dense_matmul_function.cpp
diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp
new file mode 100644
index 00000000000..b60d732d7a9
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp
@@ -0,0 +1,189 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "dense_lambda_function.h"
+#include "dense_tensor_view.h"
+#include <vespa/vespalib/objects/objectvisitor.h>
+#include <vespa/eval/tensor/default_tensor_engine.h>
+#include <vespa/eval/eval/llvm/compiled_function.h>
+#include <vespa/eval/eval/llvm/compile_cache.h>
+#include <assert.h>
+
+namespace vespalib::tensor {
+
+using eval::CompileCache;
+using eval::CompiledFunction;
+using eval::InterpretedFunction;
+using eval::LazyParams;
+using eval::PassParams;
+using eval::TensorEngine;
+using eval::TensorFunction;
+using eval::Value;
+using eval::DoubleValue;
+using eval::ValueType;
+using eval::as;
+using vespalib::Stash;
+
+using Instruction = InterpretedFunction::Instruction;
+using State = InterpretedFunction::State;
+
+using namespace eval::tensor_function;
+
+const TensorEngine &prod_engine = DefaultTensorEngine::ref();
+
+namespace {
+
+//-----------------------------------------------------------------------------
+
+bool step_labels(double *labels, const ValueType &type) {
+    for (size_t idx = type.dimensions().size(); idx-- > 0; ) {
+        if ((labels[idx] += 1.0) < type.dimensions()[idx].size) {
+            return true;
+        } else {
+            labels[idx] = 0.0;
+        }
+    }
+    return false;
+}
+
+struct ParamProxy : public LazyParams {
+    const std::vector<double> &labels;
+    const LazyParams          &params;
+    const std::vector<size_t> &bindings;
+    ParamProxy(const std::vector<double> &labels_in, const LazyParams &params_in, const std::vector<size_t> &bindings_in)
+        : labels(labels_in), params(params_in), bindings(bindings_in) {}
+    const Value &resolve(size_t idx, Stash &stash) const override {
+        if (idx < labels.size()) {
+            return stash.create<DoubleValue>(labels[idx]);
+        }
+        return params.resolve(bindings[idx - labels.size()], stash);
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+struct CompiledParams {
+    const ValueType &result_type;
+    const std::vector<size_t> &bindings;
+    size_t num_cells;
+    CompileCache::Token::UP token;
+    CompiledParams(const Lambda &lambda) 
+        : result_type(lambda.result_type()),
+          bindings(lambda.bindings()),
+          num_cells(result_type.dense_subspace_size()),
+          token(CompileCache::compile(lambda.lambda(), PassParams::ARRAY))
+    {
+        assert(lambda.lambda().num_params() == (result_type.dimensions().size() + bindings.size()));
+    }
+};
+
+template <typename CT>
+void my_compiled_lambda_op(eval::InterpretedFunction::State &state, uint64_t param) {
+    const CompiledParams &params = *(const CompiledParams*)param;
+    std::vector<double> args(params.result_type.dimensions().size() + params.bindings.size(), 0.0);
+    double *bind_next = &args[params.result_type.dimensions().size()];
+    for (size_t binding: params.bindings) {
+        *bind_next++ = state.params->resolve(binding, state.stash).as_double();
+    }
+    auto fun = params.token->get().get_function();
+    ArrayRef<CT> dst_cells = state.stash.create_array<CT>(params.num_cells);
+    CT *dst = &dst_cells[0];
+    do {
+        *dst++ = fun(&args[0]);
+    } while (step_labels(&args[0], params.result_type));
+    state.stack.push_back(state.stash.create<DenseTensorView>(params.result_type, TypedCells(dst_cells)));
+}
+
+struct MyCompiledLambdaOp {
+    template <typename CT>
+    static auto get_fun() { return my_compiled_lambda_op<CT>; }
+};
+
+//-----------------------------------------------------------------------------
+
+struct InterpretedParams {
+    const ValueType &result_type;
+    const std::vector<size_t> &bindings;
+    size_t num_cells;
+    InterpretedFunction fun;
+    InterpretedParams(const Lambda &lambda)
+        : result_type(lambda.result_type()),
+          bindings(lambda.bindings()),
+          num_cells(result_type.dense_subspace_size()),
+          fun(prod_engine, lambda.lambda().root(), lambda.types())
+    {
+        assert(lambda.lambda().num_params() == (result_type.dimensions().size() + bindings.size()));
+    }
+};
+
+template <typename CT>
+void my_interpreted_lambda_op(eval::InterpretedFunction::State &state, uint64_t param) {
+    const InterpretedParams &params = *(const InterpretedParams*)param;
+    std::vector<double> labels(params.result_type.dimensions().size(), 0.0);
+    ParamProxy param_proxy(labels, *state.params, params.bindings);
+    InterpretedFunction::Context ctx(params.fun);
+    ArrayRef<CT> dst_cells = state.stash.create_array<CT>(params.num_cells);
+    CT *dst = &dst_cells[0];
+    do {
+        *dst++ = params.fun.eval(ctx, param_proxy).as_double();
+    } while (step_labels(&labels[0], params.result_type));
+    state.stack.push_back(state.stash.create<DenseTensorView>(params.result_type, TypedCells(dst_cells)));
+}
+
+struct MyInterpretedLambdaOp {
+    template <typename CT>
+    static auto get_fun() { return my_interpreted_lambda_op<CT>; }
+};
+
+//-----------------------------------------------------------------------------
+
+}
+
+DenseLambdaFunction::DenseLambdaFunction(const Lambda &lambda_in)
+    : Super(lambda_in.result_type()),
+      _lambda(lambda_in)
+{
+}
+
+DenseLambdaFunction::~DenseLambdaFunction() = default;
+
+DenseLambdaFunction::EvalMode
+DenseLambdaFunction::eval_mode() const
+{
+    if (!CompiledFunction::detect_issues(_lambda.lambda()) &&
+        _lambda.types().all_types_are_double())
+    {
+        return EvalMode::COMPILED;
+    } else {
+        return EvalMode::INTERPRETED;
+    }
+}
+
+Instruction
+DenseLambdaFunction::compile_self(const TensorEngine &engine, Stash &stash) const
+{
+    assert(&engine == &prod_engine);
+    auto mode = eval_mode();
+    if (mode == EvalMode::COMPILED) {
+        CompiledParams &params = stash.create<CompiledParams>(_lambda);
+        auto op = select_1<MyCompiledLambdaOp>(result_type().cell_type());
+        static_assert(sizeof(&params) == sizeof(uint64_t));
+        return Instruction(op, (uint64_t)(&params));
+    } else {
+        assert(mode == EvalMode::INTERPRETED);
+        InterpretedParams &params = stash.create<InterpretedParams>(_lambda);
+        auto op = select_1<MyInterpretedLambdaOp>(result_type().cell_type());
+        static_assert(sizeof(&params) == sizeof(uint64_t));
+        return Instruction(op, (uint64_t)(&params));
+    }
+}
+
+const eval::TensorFunction &
+DenseLambdaFunction::optimize(const TensorFunction &expr, Stash &stash)
+{
+    if (auto lambda = as<Lambda>(expr)) {
+        return stash.create<DenseLambdaFunction>(*lambda);
+    }
+    return expr;
+}
+
+}
diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_function.h b/eval/src/vespa/eval/tensor/dense/dense_lambda_function.h
new file mode 100644
index 00000000000..a1b6e5a1551
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_function.h
@@ -0,0 +1,30 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::tensor {
+
+/**
+ * Tensor function for generic tensor lambda producing dense tensor
+ * views directly. This is the catch-all fall-back used by the default
+ * (production) tensor engine to avoid having a TensorSpec as an
+ * intermediate result.
+ **/
+class DenseLambdaFunction : public eval::tensor_function::Leaf
+{
+    using Super = eval::tensor_function::Leaf;
+private:
+    const eval::tensor_function::Lambda &_lambda;
+public:
+    enum class EvalMode : uint8_t { COMPILED, INTERPRETED };
+    DenseLambdaFunction(const eval::tensor_function::Lambda &lambda_in);
+    ~DenseLambdaFunction() override;
+    bool result_is_mutable() const override { return true; }
+    EvalMode eval_mode() const;
+    eval::InterpretedFunction::Instruction compile_self(const eval::TensorEngine &engine, Stash &stash) const override;
+    static const eval::TensorFunction &optimize(const eval::TensorFunction &expr, Stash &stash);
+};
+
+}