Merge pull request #4787 from vespa-engine/havardpe/use-flattened-tensor-function-in-interpreted-function

Havardpe/use flattened tensor function in interpreted function
author: Arne H Juul <arnej27959@users.noreply.github.com> 2018-01-26 14:48:07 +0100
committer: GitHub <noreply@github.com> 2018-01-26 14:48:07 +0100
commit: 6559e8a220f7cf7f435d8db2e597c3118ff2e356 (patch)
tree: 967b8a2b7111c9948696ffdd825de0e695e1f978
parent: 780264290b9e15f0594991b5dba8f1dc2021f92d (diff)
parent: 37139d2153a9735f9c835d8426c79d5ad1d372e2 (diff)
22 files changed, 346 insertions, 450 deletions
diff --git a/eval/src/apps/eval_expr/eval_expr.cpp b/eval/src/apps/eval_expr/eval_expr.cpp
index 91c669efe94..afddec40e48 100644
--- a/eval/src/apps/eval_expr/eval_expr.cpp
+++ b/eval/src/apps/eval_expr/eval_expr.cpp
@@ -3,7 +3,7 @@
 #include <vespa/eval/eval/function.h>
 #include <vespa/eval/eval/interpreted_function.h>
 #include <vespa/eval/eval/tensor_spec.h>
-
+#include <vespa/eval/eval/simple_tensor_engine.h>
 
 using namespace vespalib::eval;
 
diff --git a/eval/src/apps/tensor_conformance/tensor_conformance.cpp b/eval/src/apps/tensor_conformance/tensor_conformance.cpp
index 2d7cf9b5fa0..4130c75893b 100644
--- a/eval/src/apps/tensor_conformance/tensor_conformance.cpp
+++ b/eval/src/apps/tensor_conformance/tensor_conformance.cpp
@@ -113,9 +113,10 @@ TensorSpec eval_expr_tf(const Inspector &test, const TensorEngine &engine) {
     }
     SimpleObjectParams params(param_refs);
     NodeTypes types = NodeTypes(fun, get_types(param_values));
-    const auto &tfun = make_tensor_function(engine, fun.root(), types, stash);
-    const Value &result = tfun.eval(engine, params, stash);
-    ASSERT_EQUAL(result.type(), tfun.result_type());
+    const auto &plain_fun = make_tensor_function(engine, fun.root(), types, stash);
+    const auto &optimized = engine.optimize(plain_fun, stash);
+    const Value &result = optimized.eval(engine, params, stash);
+    ASSERT_EQUAL(result.type(), plain_fun.result_type());
     ASSERT_EQUAL(result.type(), types.get_type(fun.root()));
     return engine.to_spec(result);
 }
diff --git a/eval/src/tests/eval/function_speed/function_speed_test.cpp b/eval/src/tests/eval/function_speed/function_speed_test.cpp
index 65866de7ddd..178ab32d734 100644
--- a/eval/src/tests/eval/function_speed/function_speed_test.cpp
+++ b/eval/src/tests/eval/function_speed/function_speed_test.cpp
@@ -4,6 +4,7 @@
 #include <vespa/eval/eval/llvm/compiled_function.h>
 #include <vespa/vespalib/util/benchmark_timer.h>
 #include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/simple_tensor_engine.h>
 #include <vespa/vespalib/util/benchmark_timer.h>
 #include <vespa/eval/tensor/default_tensor_engine.h>
 
diff --git a/eval/src/tests/eval/gbdt/gbdt_test.cpp b/eval/src/tests/eval/gbdt/gbdt_test.cpp
index af5935fbf1e..9cf5c31f76b 100644
--- a/eval/src/tests/eval/gbdt/gbdt_test.cpp
+++ b/eval/src/tests/eval/gbdt/gbdt_test.cpp
@@ -6,6 +6,7 @@
 #include <vespa/eval/eval/llvm/deinline_forest.h>
 #include <vespa/eval/eval/llvm/compiled_function.h>
 #include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/simple_tensor_engine.h>
 #include <vespa/vespalib/util/stringfmt.h>
 #include "model.cpp"
 
diff --git a/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp b/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp
index 802f9555360..f0306e99a91 100644
--- a/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp
+++ b/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp
@@ -6,6 +6,7 @@
 #include <vespa/eval/eval/interpreted_function.h>
 #include <vespa/eval/eval/test/eval_spec.h>
 #include <vespa/eval/eval/basic_nodes.h>
+#include <vespa/eval/eval/simple_tensor_engine.h>
 #include <vespa/eval/tensor/default_tensor_engine.h>
 #include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/util/stash.h>
@@ -177,7 +178,7 @@ struct InnerProduct {
     InterpretedFunction interpreted;
     ~InnerProduct() {}
     InnerProduct(const vespalib::string &expr)
-        : engine(SimpleTensorEngine::ref()),
+        : engine(DefaultTensorEngine::ref()),
           function(Function::parse({"a", "b"}, expr)),
           a("null"), b("null"), expect("null"),
           types(),
@@ -186,10 +187,10 @@ struct InnerProduct {
               TensorSpec a_in,
               TensorSpec b_in,
               TensorSpec expect_in)
-        : engine(SimpleTensorEngine::ref()),
+        : engine(DefaultTensorEngine::ref()),
           function(Function::parse(expr)),
           a(a_in), b(b_in), expect(expect_in),
-          types(function, {ValueType::from_spec(a.type()), ValueType::from_spec(a.type())}),
+          types(function, {ValueType::from_spec(a.type()), ValueType::from_spec(b.type())}),
           interpreted(engine, function, types) {}
     void verify_optimized() const {
         EXPECT_EQUAL(1u, interpreted.program_size());
@@ -296,13 +297,13 @@ TEST("require that vector matrix multiplication works with tensor function") {
     TEST_DO(XW("reduce(join(b,a,f(x,y)(y*x)),sum,x)").verify_optimized());
 }
 
-TEST("require that matrix multiplication works with tensor function") {
-    TEST_DO(MatMul("reduce(a*b,sum,y)").verify_optimized());
-    TEST_DO(MatMul("reduce(join(a,b,f(x,y)(x*y)),sum,y)").verify_optimized());
-    TEST_DO(MatMul("reduce(b*a,sum,y)").verify_optimized());
-    TEST_DO(MatMul("reduce(join(b,a,f(x,y)(x*y)),sum,y)").verify_optimized());
-    TEST_DO(MatMul("reduce(join(a,b,f(x,y)(y*x)),sum,y)").verify_optimized());
-    TEST_DO(MatMul("reduce(join(b,a,f(x,y)(y*x)),sum,y)").verify_optimized());
+TEST("require that matrix multiplication is not optimized (yet)") {
+    TEST_DO(MatMul("reduce(a*b,sum,y)").verify_not_optimized());
+    TEST_DO(MatMul("reduce(join(a,b,f(x,y)(x*y)),sum,y)").verify_not_optimized());
+    TEST_DO(MatMul("reduce(b*a,sum,y)").verify_not_optimized());
+    TEST_DO(MatMul("reduce(join(b,a,f(x,y)(x*y)),sum,y)").verify_not_optimized());
+    TEST_DO(MatMul("reduce(join(a,b,f(x,y)(y*x)),sum,y)").verify_not_optimized());
+    TEST_DO(MatMul("reduce(join(b,a,f(x,y)(y*x)),sum,y)").verify_not_optimized());
 }
 
 TEST("require that expressions similar to inner product are not optimized") {
diff --git a/eval/src/tests/eval/tensor_function/tensor_function_test.cpp b/eval/src/tests/eval/tensor_function/tensor_function_test.cpp
index fb1ca3d18fe..b2df7eddd46 100644
--- a/eval/src/tests/eval/tensor_function/tensor_function_test.cpp
+++ b/eval/src/tests/eval/tensor_function/tensor_function_test.cpp
@@ -35,7 +35,7 @@ struct EvalCtx {
         return fun.eval(engine, SimpleObjectParams(params), stash);
     }
     const TensorFunction &compile(const tensor_function::Node &expr) {
-        return engine.compile(expr, stash);
+        return engine.optimize(expr, stash);
     }
     Value::UP make_true() {
         return engine.from_spec(TensorSpec("double").add({}, 1.0));
diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt
index 3816780d4d9..0eabb4f4219 100644
--- a/eval/src/vespa/eval/eval/CMakeLists.txt
+++ b/eval/src/vespa/eval/eval/CMakeLists.txt
@@ -4,6 +4,7 @@ vespa_add_library(eval_eval OBJECT
     aggr.cpp
     basic_nodes.cpp
     call_nodes.cpp
+    compile_tensor_function.cpp
     delete_node.cpp
     function.cpp
     gbdt.cpp
diff --git a/eval/src/vespa/eval/eval/compile_tensor_function.cpp b/eval/src/vespa/eval/eval/compile_tensor_function.cpp
new file mode 100644
index 00000000000..ac36720895f
--- /dev/null
+++ b/eval/src/vespa/eval/eval/compile_tensor_function.cpp
@@ -0,0 +1,83 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "compile_tensor_function.h"
+#include "tensor_function.h"
+
+namespace vespalib::eval {
+
+namespace {
+
+using State = InterpretedFunction::State;
+using Instruction = InterpretedFunction::Instruction;
+
+void op_skip(State &state, uint64_t param) {
+    state.program_offset += param;
+}
+
+void op_skip_if_false(State &state, uint64_t param) {
+    ++state.if_cnt;
+    if (!state.peek(0).as_bool()) {
+        state.program_offset += param;
+    }
+    state.stack.pop_back();
+}
+
+struct Frame {
+    const TensorFunction &node;
+    std::vector<TensorFunction::Child::CREF> children;
+    size_t child_idx;
+    Frame(const TensorFunction &node_in) : node(node_in), children(), child_idx(0) { node.push_children(children); }
+    bool has_next_child() const { return (child_idx < children.size()); }
+    const TensorFunction &next_child() { return children[child_idx++].get().get(); }
+};
+
+struct ProgramCompiler {
+    Stash &stash;
+    std::vector<Frame> stack;
+    std::vector<Instruction> prog;
+    ProgramCompiler(Stash &stash_in) : stash(stash_in), stack(), prog() {}
+
+    void append(const std::vector<Instruction> &other_prog) {
+        prog.insert(prog.end(), other_prog.begin(), other_prog.end());
+    }
+
+    void open(const TensorFunction &node) {
+        if (auto if_node = as<tensor_function::If>(node)) {
+            append(compile_tensor_function(if_node->cond(), stash));
+            auto true_prog = compile_tensor_function(if_node->true_child(), stash);
+            auto false_prog = compile_tensor_function(if_node->false_child(), stash);
+            true_prog.emplace_back(op_skip, false_prog.size());
+            prog.emplace_back(op_skip_if_false, true_prog.size());
+            append(true_prog);
+            append(false_prog);
+        } else {
+            stack.emplace_back(node);
+        }
+    }
+
+    void close(const TensorFunction &node) {
+        prog.push_back(node.compile_self(stash));
+    }
+
+    std::vector<Instruction> compile(const TensorFunction &function) {
+        open(function);
+        while (!stack.empty()) {
+            if (stack.back().has_next_child()) {
+                open(stack.back().next_child());
+            } else {
+                close(stack.back().node);
+                stack.pop_back();
+            }
+        }
+        return std::move(prog);
+    }
+};
+
+} // namespace vespalib::eval::<unnamed>
+
+std::vector<Instruction> compile_tensor_function(const TensorFunction &function, Stash &stash) {   
+    ProgramCompiler compiler(stash);
+    return compiler.compile(function);
+}
+
+} // namespace vespalib::eval
diff --git a/eval/src/vespa/eval/eval/compile_tensor_function.h b/eval/src/vespa/eval/eval/compile_tensor_function.h
new file mode 100644
index 00000000000..bfac0e0f036
--- /dev/null
+++ b/eval/src/vespa/eval/eval/compile_tensor_function.h
@@ -0,0 +1,16 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "interpreted_function.h"
+#include <vector>
+
+namespace vespalib { class Stash; }
+
+namespace vespalib::eval {
+
+class TensorFunction;
+
+std::vector<InterpretedFunction::Instruction> compile_tensor_function(const TensorFunction &function, Stash &stash);
+
+} // namespace vespalib::eval
diff --git a/eval/src/vespa/eval/eval/interpreted_function.cpp b/eval/src/vespa/eval/eval/interpreted_function.cpp
index 13ab6fe5676..28381030f24 100644
--- a/eval/src/vespa/eval/eval/interpreted_function.cpp
+++ b/eval/src/vespa/eval/eval/interpreted_function.cpp
@@ -6,434 +6,20 @@
 #include "check_type.h"
 #include "tensor_spec.h"
 #include "operation.h"
+#include "tensor_engine.h"
 #include <vespa/vespalib/util/classname.h>
 #include <vespa/eval/eval/llvm/compile_cache.h>
 #include <vespa/vespalib/util/benchmark_timer.h>
 #include <set>
 
+#include "make_tensor_function.h"
+#include "compile_tensor_function.h"
+
 namespace vespalib {
 namespace eval {
 
 namespace {
 
-using namespace nodes;
-using State = InterpretedFunction::State;
-using Instruction = InterpretedFunction::Instruction;
-using map_fun_t = double (*)(double);
-using join_fun_t = double (*)(double, double);
-
-//-----------------------------------------------------------------------------
-
-template <typename T, typename IN>
-uint64_t wrap_param(const IN &value_in) {
-    const T &value = value_in;
-    return (uint64_t)&value;
-}
-
-template <typename T>
-const T &unwrap_param(uint64_t param) { return *((const T *)param); }
-
-//-----------------------------------------------------------------------------
-
-uint64_t to_param(map_fun_t value) { return (uint64_t)value; }
-uint64_t to_param(join_fun_t value) { return (uint64_t)value; }
-map_fun_t to_map_fun(uint64_t param) { return (map_fun_t)param; }
-join_fun_t to_join_fun(uint64_t param) { return (join_fun_t)param; }
-
-//-----------------------------------------------------------------------------
-
-void op_load_const(State &state, uint64_t param) {
-    state.stack.push_back(unwrap_param<Value>(param));
-}
-
-void op_load_param(State &state, uint64_t param) {
-    state.stack.push_back(state.params->resolve(param, state.stash));
-}
-
-//-----------------------------------------------------------------------------
-
-void op_skip(State &state, uint64_t param) {
-    state.program_offset += param;
-}
-
-void op_skip_if_false(State &state, uint64_t param) {
-    ++state.if_cnt;
-    if (!state.peek(0).as_bool()) {
-        state.program_offset += param;
-    }
-    state.stack.pop_back();
-}
-
-//-----------------------------------------------------------------------------
-
-void op_double_map(State &state, uint64_t param) {
-    state.replace(1, state.stash.create<DoubleValue>(to_map_fun(param)(state.peek(0).as_double())));
-}
-
-void op_double_mul(State &state, uint64_t) {
-    state.replace(2, state.stash.create<DoubleValue>(state.peek(1).as_double() * state.peek(0).as_double()));
-}
-
-void op_double_add(State &state, uint64_t) {
-    state.replace(2, state.stash.create<DoubleValue>(state.peek(1).as_double() + state.peek(0).as_double()));
-}
-
-void op_double_join(State &state, uint64_t param) {
-    state.replace(2, state.stash.create<DoubleValue>(to_join_fun(param)(state.peek(1).as_double(), state.peek(0).as_double())));
-}
-
-//-----------------------------------------------------------------------------
-
-void op_tensor_map(State &state, uint64_t param) {
-    state.replace(1, state.engine.map(state.peek(0), to_map_fun(param), state.stash));
-}
-
-void op_tensor_join(State &state, uint64_t param) {
-    state.replace(2, state.engine.join(state.peek(1), state.peek(0), to_join_fun(param), state.stash));
-}
-
-using ReduceParams = std::pair<Aggr,std::vector<vespalib::string>>;
-void op_tensor_reduce(State &state, uint64_t param) {
-    const ReduceParams &params = unwrap_param<ReduceParams>(param);
-    state.replace(1, state.engine.reduce(state.peek(0), params.first, params.second, state.stash));
-}
-
-using RenameParams = std::pair<std::vector<vespalib::string>,std::vector<vespalib::string>>;
-void op_tensor_rename(State &state, uint64_t param) {
-    const RenameParams &params = unwrap_param<RenameParams>(param);
-    state.replace(1, state.engine.rename(state.peek(0), params.first, params.second, state.stash));
-}
-
-void op_tensor_concat(State &state, uint64_t param) {
-    const vespalib::string &dimension = unwrap_param<vespalib::string>(param);
-    state.replace(2, state.engine.concat(state.peek(1), state.peek(0), dimension, state.stash));
-}
-
-//-----------------------------------------------------------------------------
-
-void op_tensor_function(State &state, uint64_t param) {
-    const TensorFunction &fun = unwrap_param<TensorFunction>(param);
-    state.stack.push_back(fun.eval(state.engine, *state.params, state.stash));
-}
-
-//-----------------------------------------------------------------------------
-
-bool step_labels(std::vector<double> &labels, const ValueType &type) {
-    for (size_t idx = labels.size(); idx-- > 0; ) {
-        labels[idx] += 1.0;
-        if (size_t(labels[idx]) < type.dimensions()[idx].size) {
-            return true;
-        } else {
-            labels[idx] = 0.0;
-        }
-    }
-    return false;
-}
-
-//-----------------------------------------------------------------------------
-
-struct ProgramBuilder : public NodeVisitor, public NodeTraverser {
-    std::vector<Instruction> &program;
-    Stash                    &stash;
-    const TensorEngine       &tensor_engine;
-    const NodeTypes          &types;
-
-    ProgramBuilder(std::vector<Instruction> &program_in, Stash &stash_in, const TensorEngine &tensor_engine_in, const NodeTypes &types_in)
-        : program(program_in), stash(stash_in), tensor_engine(tensor_engine_in), types(types_in) {}
-
-    //-------------------------------------------------------------------------
-
-    bool is_mul_join(const Node &node) const {
-        if (auto join = as<TensorJoin>(node)) {
-            if (auto mul = as<Mul>(join->lambda().root())) {
-                auto sym1 = as<Symbol>(mul->lhs());
-                auto sym2 = as<Symbol>(mul->rhs());
-                return (sym1 && sym2 && (sym1->id() != sym2->id()));
-            }
-        }
-        return false;
-    }
-
-    bool is_mul(const Node &node) const {
-        auto mul = as<Mul>(node);
-        return (mul || is_mul_join(node));
-    }
-
-    bool is_typed_tensor(const Node &node) const {
-        const ValueType &type = types.get_type(node);
-        return (type.is_tensor() && !type.dimensions().empty());
-    }
-
-    bool is_typed_tensor_param(const Node &node) const {
-        auto sym = as<Symbol>(node);
-        return (sym && is_typed_tensor(node));
-    }
-
-    bool is_typed_tensor_product_of_params(const Node &node) const {
-        return (is_typed_tensor(node) && is_mul(node) &&
-                is_typed_tensor_param(node.get_child(0)) &&
-                is_typed_tensor_param(node.get_child(1)));
-    }
-
-    //-------------------------------------------------------------------------
-
-    void make_const_op(const Node &node, const Value &value) {
-        (void) node;
-        program.emplace_back(op_load_const, wrap_param<Value>(value));
-    }
-
-    void make_map_op(const Node &node, map_fun_t function) {
-        if (types.get_type(node).is_double()) {
-            program.emplace_back(op_double_map, to_param(function));
-        } else {
-            program.emplace_back(op_tensor_map, to_param(function));
-        }
-    }
-
-    void make_join_op(const Node &node, join_fun_t function) {
-        if (types.get_type(node).is_double()) {
-            if (function == operation::Mul::f) {
-                program.emplace_back(op_double_mul);
-            } else if (function == operation::Add::f) {
-                program.emplace_back(op_double_add);
-            } else {
-                program.emplace_back(op_double_join, to_param(function));
-            }
-        } else {
-            program.emplace_back(op_tensor_join, to_param(function));
-        }
-    }
-
-    //-------------------------------------------------------------------------
-
-    void visit(const Number &node) override {
-        make_const_op(node, stash.create<DoubleValue>(node.value()));
-    }
-    void visit(const Symbol &node) override {
-        program.emplace_back(op_load_param, node.id());
-    }
-    void visit(const String &node) override {
-        make_const_op(node, stash.create<DoubleValue>(node.hash()));
-    }
-    void visit(const In &node) override {
-        auto my_in = std::make_unique<In>(std::make_unique<Symbol>(0));
-        for (size_t i = 0; i < node.num_entries(); ++i) {
-            my_in->add_entry(std::make_unique<Number>(node.get_entry(i).get_const_value()));
-        }
-        Function my_fun(std::move(my_in), {"x"});
-        const auto &token = stash.create<CompileCache::Token::UP>(CompileCache::compile(my_fun, PassParams::SEPARATE));
-        make_map_op(node, token.get()->get().get_function<1>());
-    }
-    void visit(const Neg &node) override {
-        make_map_op(node, operation::Neg::f);
-    }
-    void visit(const Not &node) override {
-        make_map_op(node, operation::Not::f);
-    }
-    void visit(const If &node) override {
-        node.cond().traverse(*this);
-        size_t after_cond = program.size();
-        program.emplace_back(op_skip_if_false);
-        node.true_expr().traverse(*this);
-        size_t after_true = program.size();
-        program.emplace_back(op_skip);
-        node.false_expr().traverse(*this);
-        program[after_cond].update_param(after_true - after_cond);
-        program[after_true].update_param(program.size() - after_true - 1);
-    }
-    void visit(const Error &node) override {
-        make_const_op(node, ErrorValue::instance);
-    }
-    void visit(const TensorMap &node) override {
-        const auto &token = stash.create<CompileCache::Token::UP>(CompileCache::compile(node.lambda(), PassParams::SEPARATE));
-        make_map_op(node, token.get()->get().get_function<1>());
-    }
-    void visit(const TensorJoin &node) override {
-        const auto &token = stash.create<CompileCache::Token::UP>(CompileCache::compile(node.lambda(), PassParams::SEPARATE));
-        make_join_op(node, token.get()->get().get_function<2>());
-    }
-    void visit(const TensorReduce &node) override {
-        if ((node.aggr() == Aggr::SUM) && is_typed_tensor_product_of_params(node.get_child(0))) {
-            assert(program.size() >= 3); // load,load,mul
-            program.pop_back(); // mul
-            program.pop_back(); // load
-            program.pop_back(); // load
-            auto a = as<Symbol>(node.get_child(0).get_child(0));
-            auto b = as<Symbol>(node.get_child(0).get_child(1));
-            const auto &ir = tensor_function::reduce(tensor_function::join(
-                            tensor_function::inject(types.get_type(*a), a->id(), stash),
-                            tensor_function::inject(types.get_type(*b), b->id(), stash),
-                            operation::Mul::f, stash), node.aggr(), node.dimensions(), stash);
-            const auto &fun = tensor_engine.compile(ir, stash);
-            program.emplace_back(op_tensor_function, wrap_param<TensorFunction>(fun));
-        } else {
-            ReduceParams &params = stash.create<ReduceParams>(node.aggr(), node.dimensions());
-            program.emplace_back(op_tensor_reduce, wrap_param<ReduceParams>(params));
-        }
-    }
-    void visit(const TensorRename &node) override {
-        RenameParams &params = stash.create<RenameParams>(node.from(), node.to());
-        program.emplace_back(op_tensor_rename, wrap_param<RenameParams>(params));
-    }
-    void visit(const TensorLambda &node) override {
-        const auto &type = node.type();
-        TensorSpec spec(type.to_spec());
-        const auto &token = stash.create<CompileCache::Token::UP>(CompileCache::compile(node.lambda(), PassParams::ARRAY));
-        auto fun = token.get()->get().get_function();
-        std::vector<double> params(type.dimensions().size(), 0.0);
-        assert(token.get()->get().num_params() == params.size());
-        do {
-            TensorSpec::Address addr;
-            for (size_t i = 0; i < params.size(); ++i) {
-                addr.emplace(type.dimensions()[i].name, size_t(params[i]));
-            }
-            spec.add(addr, fun(&params[0]));
-        } while (step_labels(params, type));
-        make_const_op(node, *stash.create<Value::UP>(tensor_engine.from_spec(spec)));
-    }
-    void visit(const TensorConcat &node) override {
-        vespalib::string &dimension = stash.create<vespalib::string>(node.dimension());
-        program.emplace_back(op_tensor_concat, wrap_param<vespalib::string>(dimension));
-    }
-    void visit(const Add &node) override {
-        make_join_op(node, operation::Add::f);
-    }
-    void visit(const Sub &node) override {
-        make_join_op(node, operation::Sub::f);
-    }
-    void visit(const Mul &node) override {
-        make_join_op(node, operation::Mul::f);
-    }
-    void visit(const Div &node) override {
-        make_join_op(node, operation::Div::f);
-    }
-    void visit(const Mod &node) override {
-        make_join_op(node, operation::Mod::f);
-    }
-    void visit(const Pow &node) override {
-        make_join_op(node, operation::Pow::f);
-    }
-    void visit(const Equal &node) override {
-        make_join_op(node, operation::Equal::f);
-    }
-    void visit(const NotEqual &node) override {
-        make_join_op(node, operation::NotEqual::f);
-    }
-    void visit(const Approx &node) override {
-        make_join_op(node, operation::Approx::f);
-    }
-    void visit(const Less &node) override {
-        make_join_op(node, operation::Less::f);
-    }
-    void visit(const LessEqual &node) override {
-        make_join_op(node, operation::LessEqual::f);
-    }
-    void visit(const Greater &node) override {
-        make_join_op(node, operation::Greater::f);
-    }
-    void visit(const GreaterEqual &node) override {
-        make_join_op(node, operation::GreaterEqual::f);
-    }
-    void visit(const And &node) override {
-        make_join_op(node, operation::And::f);
-    }
-    void visit(const Or &node) override {
-        make_join_op(node, operation::Or::f);
-    }
-    void visit(const Cos &node) override {
-        make_map_op(node, operation::Cos::f);
-    }
-    void visit(const Sin &node) override {
-        make_map_op(node, operation::Sin::f);
-    }
-    void visit(const Tan &node) override {
-        make_map_op(node, operation::Tan::f);
-    }
-    void visit(const Cosh &node) override {
-        make_map_op(node, operation::Cosh::f);
-    }
-    void visit(const Sinh &node) override {
-        make_map_op(node, operation::Sinh::f);
-    }
-    void visit(const Tanh &node) override {
-        make_map_op(node, operation::Tanh::f);
-    }
-    void visit(const Acos &node) override {
-        make_map_op(node, operation::Acos::f);
-    }
-    void visit(const Asin &node) override {
-        make_map_op(node, operation::Asin::f);
-    }
-    void visit(const Atan &node) override {
-        make_map_op(node, operation::Atan::f);
-    }
-    void visit(const Exp &node) override {
-        make_map_op(node, operation::Exp::f);
-    }
-    void visit(const Log10 &node) override {
-        make_map_op(node, operation::Log10::f);
-    }
-    void visit(const Log &node) override {
-        make_map_op(node, operation::Log::f);
-    }
-    void visit(const Sqrt &node) override {
-        make_map_op(node, operation::Sqrt::f);
-    }
-    void visit(const Ceil &node) override {
-        make_map_op(node, operation::Ceil::f);
-    }
-    void visit(const Fabs &node) override {
-        make_map_op(node, operation::Fabs::f);
-    }
-    void visit(const Floor &node) override {
-        make_map_op(node, operation::Floor::f);
-    }
-    void visit(const Atan2 &node) override {
-        make_join_op(node, operation::Atan2::f);
-    }
-    void visit(const Ldexp &node) override {
-        make_join_op(node, operation::Ldexp::f);
-    }
-    void visit(const Pow2 &node) override {
-        make_join_op(node, operation::Pow::f);
-    }
-    void visit(const Fmod &node) override {
-        make_join_op(node, operation::Mod::f);
-    }
-    void visit(const Min &node) override {
-        make_join_op(node, operation::Min::f);
-    }
-    void visit(const Max &node) override {
-        make_join_op(node, operation::Max::f);
-    }
-    void visit(const IsNan &node) override {
-        make_map_op(node, operation::IsNan::f);
-    }
-    void visit(const Relu &node) override {
-        make_map_op(node, operation::Relu::f);
-    }
-    void visit(const Sigmoid &node) override {
-        make_map_op(node, operation::Sigmoid::f);
-    }
-    void visit(const Elu &node) override {
-        make_map_op(node, operation::Elu::f);
-    }
-
-    //-------------------------------------------------------------------------
-
-    bool open(const Node &node) override {
-        if (check_type<If>(node)) {
-            node.accept(*this);
-            return false;
-        }
-        return true;
-    }
-
-    void close(const Node &node) override {
-        node.accept(*this);
-    }
-};
-
 const Function *get_lambda(const nodes::Node &node) {
     if (auto ptr = as<nodes::TensorMap>(node)) {
         return &ptr->lambda();
@@ -489,8 +75,9 @@ InterpretedFunction::InterpretedFunction(const TensorEngine &engine, const nodes
       _num_params(num_params_in),
       _tensor_engine(engine)
 {
-    ProgramBuilder program_builder(_program, _stash, _tensor_engine, types);
-    root.traverse(program_builder);
+    const TensorFunction &plain_fun = make_tensor_function(engine, root, types, _stash);
+    const TensorFunction &optimized = engine.optimize(plain_fun, _stash);
+    _program = compile_tensor_function(optimized, _stash);
 }
 
 InterpretedFunction::~InterpretedFunction() {}
diff --git a/eval/src/vespa/eval/eval/interpreted_function.h b/eval/src/vespa/eval/eval/interpreted_function.h
index 2a52a5a8258..1c57b20682f 100644
--- a/eval/src/vespa/eval/eval/interpreted_function.h
+++ b/eval/src/vespa/eval/eval/interpreted_function.h
@@ -3,7 +3,6 @@
 #pragma once
 
 #include "function.h"
-#include "simple_tensor_engine.h"
 #include "node_types.h"
 #include "lazy_params.h"
 #include <vespa/vespalib/util/stash.h>
diff --git a/eval/src/vespa/eval/eval/make_tensor_function.cpp b/eval/src/vespa/eval/eval/make_tensor_function.cpp
index d28c4812a31..d84d9f53749 100644
--- a/eval/src/vespa/eval/eval/make_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/make_tensor_function.cpp
@@ -32,6 +32,21 @@ bool step_labels(std::vector<double> &labels, const ValueType &type) {
     return false;
 }
 
+// TODO(havardpe): generic function pointer resolving for all single
+//                 operation lambdas.
+
+template <typename OP2>
+bool is_op2(const Function &lambda) {
+    if (lambda.num_params() == 2) {
+        if (auto op2 = as<OP2>(lambda.root())) {
+            auto sym1 = as<Symbol>(op2->lhs());
+            auto sym2 = as<Symbol>(op2->rhs());
+            return (sym1 && sym2 && (sym1->id() != sym2->id()));
+        }
+    }
+    return false;
+}
+
 //-----------------------------------------------------------------------------
 
 struct TensorFunctionBuilder : public NodeVisitor, public NodeTraverser {
@@ -135,8 +150,14 @@ struct TensorFunctionBuilder : public NodeVisitor, public NodeTraverser {
         make_map(node, token.get()->get().get_function<1>());
     }
     void visit(const TensorJoin &node) override {
-        const auto &token = stash.create<CompileCache::Token::UP>(CompileCache::compile(node.lambda(), PassParams::SEPARATE));
-        make_join(node, token.get()->get().get_function<2>());
+        if (is_op2<Mul>(node.lambda())) {
+            make_join(node, operation::Mul::f);
+        } else if (is_op2<Add>(node.lambda())) {
+            make_join(node, operation::Add::f);
+        } else {
+            const auto &token = stash.create<CompileCache::Token::UP>(CompileCache::compile(node.lambda(), PassParams::SEPARATE));
+            make_join(node, token.get()->get().get_function<2>());
+        }
     }
     void visit(const TensorReduce &node) override {
         make_reduce(node, node.aggr(), node.dimensions());
diff --git a/eval/src/vespa/eval/eval/tensor_engine.h b/eval/src/vespa/eval/eval/tensor_engine.h
index 02a7f0c655a..a01a6f889fd 100644
--- a/eval/src/vespa/eval/eval/tensor_engine.h
+++ b/eval/src/vespa/eval/eval/tensor_engine.h
@@ -47,7 +47,7 @@ struct TensorEngine
     virtual void encode(const Value &value, nbostream &output) const = 0;
     virtual Value::UP decode(nbostream &input) const = 0;
 
-    virtual const TensorFunction &compile(const tensor_function::Node &expr, Stash &) const { return expr; }
+    virtual const TensorFunction &optimize(const TensorFunction &expr, Stash &) const { return expr; }
 
     virtual const Value &map(const Value &a, map_fun_t function, Stash &stash) const = 0;
     virtual const Value &join(const Value &a, const Value &b, join_fun_t function, Stash &stash) const = 0;
diff --git a/eval/src/vespa/eval/eval/tensor_function.cpp b/eval/src/vespa/eval/eval/tensor_function.cpp
index 8427cc53a16..62e547cbd7e 100644
--- a/eval/src/vespa/eval/eval/tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/tensor_function.cpp
@@ -11,6 +11,86 @@ namespace vespalib {
 namespace eval {
 namespace tensor_function {
 
+namespace {
+
+using State = InterpretedFunction::State;
+using Instruction = InterpretedFunction::Instruction;
+
+//-----------------------------------------------------------------------------
+
+template <typename T, typename IN>
+uint64_t wrap_param(const IN &value_in) {
+    const T &value = value_in;
+    return (uint64_t)&value;
+}
+
+template <typename T>
+const T &unwrap_param(uint64_t param) { return *((const T *)param); }
+
+//-----------------------------------------------------------------------------
+
+uint64_t to_param(map_fun_t value) { return (uint64_t)value; }
+uint64_t to_param(join_fun_t value) { return (uint64_t)value; }
+map_fun_t to_map_fun(uint64_t param) { return (map_fun_t)param; }
+join_fun_t to_join_fun(uint64_t param) { return (join_fun_t)param; }
+
+//-----------------------------------------------------------------------------
+
+void op_load_const(State &state, uint64_t param) {
+    state.stack.push_back(unwrap_param<Value>(param));
+}
+
+void op_load_param(State &state, uint64_t param) {
+    state.stack.push_back(state.params->resolve(param, state.stash));
+}
+
+//-----------------------------------------------------------------------------
+
+void op_double_map(State &state, uint64_t param) {
+    state.replace(1, state.stash.create<DoubleValue>(to_map_fun(param)(state.peek(0).as_double())));
+}
+
+void op_double_mul(State &state, uint64_t) {
+    state.replace(2, state.stash.create<DoubleValue>(state.peek(1).as_double() * state.peek(0).as_double()));
+}
+
+void op_double_add(State &state, uint64_t) {
+    state.replace(2, state.stash.create<DoubleValue>(state.peek(1).as_double() + state.peek(0).as_double()));
+}
+
+void op_double_join(State &state, uint64_t param) {
+    state.replace(2, state.stash.create<DoubleValue>(to_join_fun(param)(state.peek(1).as_double(), state.peek(0).as_double())));
+}
+
+//-----------------------------------------------------------------------------
+
+void op_tensor_map(State &state, uint64_t param) {
+    state.replace(1, state.engine.map(state.peek(0), to_map_fun(param), state.stash));
+}
+
+void op_tensor_join(State &state, uint64_t param) {
+    state.replace(2, state.engine.join(state.peek(1), state.peek(0), to_join_fun(param), state.stash));
+}
+
+using ReduceParams = std::pair<Aggr,std::vector<vespalib::string>>;
+void op_tensor_reduce(State &state, uint64_t param) {
+    const ReduceParams &params = unwrap_param<ReduceParams>(param);
+    state.replace(1, state.engine.reduce(state.peek(0), params.first, params.second, state.stash));
+}
+
+using RenameParams = std::pair<std::vector<vespalib::string>,std::vector<vespalib::string>>;
+void op_tensor_rename(State &state, uint64_t param) {
+    const RenameParams &params = unwrap_param<RenameParams>(param);
+    state.replace(1, state.engine.rename(state.peek(0), params.first, params.second, state.stash));
+}
+
+void op_tensor_concat(State &state, uint64_t param) {
+    const vespalib::string &dimension = unwrap_param<vespalib::string>(param);
+    state.replace(2, state.engine.concat(state.peek(1), state.peek(0), dimension, state.stash));
+}
+
+} // namespace vespalib::eval::tensor_function
+
 //-----------------------------------------------------------------------------
 
 void
@@ -39,6 +119,12 @@ ConstValue::eval(const TensorEngine &, const LazyParams &, Stash &) const
     return _value;
 }
 
+Instruction
+ConstValue::compile_self(Stash &) const
+{
+    return Instruction(op_load_const, wrap_param<Value>(_value));
+}
+
 //-----------------------------------------------------------------------------
 
 const Value &
@@ -47,6 +133,12 @@ Inject::eval(const TensorEngine &, const LazyParams &params, Stash &stash) const
     return params.resolve(_param_idx, stash);
 }
 
+Instruction
+Inject::compile_self(Stash &) const
+{
+    return Instruction(op_load_param, _param_idx);
+}
+
 //-----------------------------------------------------------------------------
 
 const Value &
@@ -56,6 +148,13 @@ Reduce::eval(const TensorEngine &engine, const LazyParams &params, Stash &stash)
     return engine.reduce(a, _aggr, _dimensions, stash);
 }
 
+Instruction
+Reduce::compile_self(Stash &stash) const
+{
+    ReduceParams &params = stash.create<ReduceParams>(_aggr, _dimensions);
+    return Instruction(op_tensor_reduce, wrap_param<ReduceParams>(params));
+}
+
 //-----------------------------------------------------------------------------
 
 const Value &
@@ -65,6 +164,15 @@ Map::eval(const TensorEngine &engine, const LazyParams &params, Stash &stash) co
     return engine.map(a, _function, stash);
 }
 
+Instruction
+Map::compile_self(Stash &) const
+{
+    if (result_type().is_double()) {
+        return Instruction(op_double_map, to_param(_function));
+    }
+    return Instruction(op_tensor_map, to_param(_function));
+}
+
 //-----------------------------------------------------------------------------
 
 const Value &
@@ -75,6 +183,21 @@ Join::eval(const TensorEngine &engine, const LazyParams &params, Stash &stash) c
     return engine.join(a, b, _function, stash);
 }
 
+Instruction
+Join::compile_self(Stash &) const
+{
+    if (result_type().is_double()) {
+        if (_function == operation::Mul::f) {
+            return Instruction(op_double_mul);
+        }
+        if (_function == operation::Add::f) {
+            return Instruction(op_double_add);
+        }
+        return Instruction(op_double_join, to_param(_function));
+    }
+    return Instruction(op_tensor_join, to_param(_function));
+}
+
 //-----------------------------------------------------------------------------
 
 const Value &
@@ -85,6 +208,12 @@ Concat::eval(const TensorEngine &engine, const LazyParams &params, Stash &stash)
     return engine.concat(a, b, _dimension, stash);
 }
 
+Instruction
+Concat::compile_self(Stash &) const
+{
+    return Instruction(op_tensor_concat, wrap_param<vespalib::string>(_dimension));
+}
+
 //-----------------------------------------------------------------------------
 
 const Value &
@@ -94,6 +223,13 @@ Rename::eval(const TensorEngine &engine, const LazyParams &params, Stash &stash)
     return engine.rename(a, _from, _to, stash);
 }
 
+Instruction
+Rename::compile_self(Stash &stash) const
+{
+    RenameParams &params = stash.create<RenameParams>(_from, _to);
+    return Instruction(op_tensor_rename, wrap_param<RenameParams>(params));
+}
+
 //-----------------------------------------------------------------------------
 
 void
@@ -112,6 +248,14 @@ If::eval(const TensorEngine &engine, const LazyParams &params, Stash &stash) con
             : false_child().eval(engine, params, stash));
 }
 
+Instruction
+If::compile_self(Stash &) const
+{
+    // 'if' is handled directly by compile_tensor_function to enable
+    // lazy-evaluation of true/false sub-expressions.
+    abort();
+}
+
 //-----------------------------------------------------------------------------
 
 const Node &const_value(const Value &value, Stash &stash) {
diff --git a/eval/src/vespa/eval/eval/tensor_function.h b/eval/src/vespa/eval/eval/tensor_function.h
index d9ee5cc068c..c739ea8cba9 100644
--- a/eval/src/vespa/eval/eval/tensor_function.h
+++ b/eval/src/vespa/eval/eval/tensor_function.h
@@ -12,6 +12,8 @@
 #include "value.h"
 #include "aggr.h"
 
+#include "interpreted_function.h"
+
 namespace vespalib {
 
 class Stash;
@@ -75,6 +77,17 @@ struct TensorFunction
     virtual void push_children(std::vector<Child::CREF> &children) const = 0;
 
     /**
+     * Compile this node into a single instruction that can be run by
+     * an interpreted function. Sub-expressions are compiled as
+     * separate instructions and their results will be available on
+     * the value stack during execution.
+     *
+     * @return instruction representing the operation of this node
+     * @param stash heterogeneous object store
+     **/
+    virtual InterpretedFunction::Instruction compile_self(Stash &stash) const = 0;
+
+    /**
      * Evaluate this tensor function based on the given
      * parameters. The given stash can be used to store temporary
      * objects that need to be kept alive for the return value to be
@@ -157,6 +170,7 @@ private:
 public:
     ConstValue(const Value &value_in) : Leaf(value_in.type()), _value(value_in) {}
     const Value &eval(const TensorEngine &engine, const LazyParams &params, Stash &) const final override;
+    InterpretedFunction::Instruction compile_self(Stash &stash) const final override;
 };
 
 //-----------------------------------------------------------------------------
@@ -170,6 +184,7 @@ public:
         : Leaf(result_type_in), _param_idx(param_idx_in) {}
     size_t param_idx() const { return _param_idx; }
     const Value &eval(const TensorEngine &engine, const LazyParams &params, Stash &) const final override;
+    InterpretedFunction::Instruction compile_self(Stash &stash) const final override;
 };
 
 //-----------------------------------------------------------------------------
@@ -188,6 +203,7 @@ public:
     Aggr aggr() const { return _aggr; }
     const std::vector<vespalib::string> &dimensions() const { return _dimensions; }
     const Value &eval(const TensorEngine &engine, const LazyParams &params, Stash &stash) const final override;
+    InterpretedFunction::Instruction compile_self(Stash &stash) const final override;
 };
 
 //-----------------------------------------------------------------------------
@@ -203,6 +219,7 @@ public:
         : Op1(result_type_in, child_in), _function(function_in) {}
     map_fun_t function() const { return _function; }
     const Value &eval(const TensorEngine &engine, const LazyParams &params, Stash &stash) const final override;
+    InterpretedFunction::Instruction compile_self(Stash &stash) const final override;
 };
 
 //-----------------------------------------------------------------------------
@@ -219,6 +236,7 @@ public:
         : Op2(result_type_in, lhs_in, rhs_in), _function(function_in) {}
     join_fun_t function() const { return _function; }
     const Value &eval(const TensorEngine &engine, const LazyParams &params, Stash &stash) const final override;
+    InterpretedFunction::Instruction compile_self(Stash &stash) const final override;
 };
 
 //-----------------------------------------------------------------------------
@@ -235,6 +253,7 @@ public:
         : Op2(result_type_in, lhs_in, rhs_in), _dimension(dimension_in) {}
     const vespalib::string &dimension() const { return _dimension; }
     const Value &eval(const TensorEngine &engine, const LazyParams &params, Stash &stash) const final override;
+    InterpretedFunction::Instruction compile_self(Stash &stash) const final override;
 };
 
 //-----------------------------------------------------------------------------
@@ -253,6 +272,7 @@ public:
     const std::vector<vespalib::string> &from() const { return _from; }
     const std::vector<vespalib::string> &to() const { return _to; }
     const Value &eval(const TensorEngine &engine, const LazyParams &params, Stash &stash) const final override;
+    InterpretedFunction::Instruction compile_self(Stash &stash) const final override;
 };
 
 //-----------------------------------------------------------------------------
@@ -274,6 +294,7 @@ public:
     const TensorFunction &false_child() const { return _false_child.get(); }
     void push_children(std::vector<Child::CREF> &children) const final override;    
     const Value &eval(const TensorEngine &engine, const LazyParams &params, Stash &stash) const final override;
+    InterpretedFunction::Instruction compile_self(Stash &stash) const final override;
 };
 
 //-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/eval/test/tensor_conformance.cpp b/eval/src/vespa/eval/eval/test/tensor_conformance.cpp
index 8fe0732f3c4..43ced9513f0 100644
--- a/eval/src/vespa/eval/eval/test/tensor_conformance.cpp
+++ b/eval/src/vespa/eval/eval/test/tensor_conformance.cpp
@@ -276,7 +276,7 @@ struct RetainedReduce : Eval {
         auto a_type = ValueType::from_spec(a.type());
         const auto &ir = tensor_function::reduce(tensor_function::inject(a_type, tensor_id_a, stash), aggr, dimensions, stash);
         ValueType expect_type = ir.result_type();
-        const auto &fun = engine.compile(ir, stash);
+        const auto &fun = engine.optimize(ir, stash);
         Input input(engine.from_spec(a));
         return Result(engine, check_type(fun.eval(engine, input.get(), stash), expect_type));
     }
@@ -291,7 +291,7 @@ struct RetainedMap : Eval {
         auto a_type = ValueType::from_spec(a.type());
         const auto &ir = tensor_function::map(tensor_function::inject(a_type, tensor_id_a, stash), function, stash);
         ValueType expect_type = ir.result_type();
-        const auto &fun = engine.compile(ir, stash);
+        const auto &fun = engine.optimize(ir, stash);
         Input input(engine.from_spec(a));
         return Result(engine, check_type(fun.eval(engine, input.get(), stash), expect_type));
     }
@@ -309,7 +309,7 @@ struct RetainedJoin : Eval {
                                                tensor_function::inject(b_type, tensor_id_b, stash),
                                                function, stash);
         ValueType expect_type = ir.result_type();
-        const auto &fun = engine.compile(ir, stash);
+        const auto &fun = engine.optimize(ir, stash);
         Input input(engine.from_spec(a), engine.from_spec(b));
         return Result(engine, check_type(fun.eval(engine, input.get(), stash), expect_type));
     }
diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
index c9f3be9d588..9477b36463a 100644
--- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
+++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
@@ -206,17 +206,13 @@ DefaultTensorEngine::decode(nbostream &input) const
 //-----------------------------------------------------------------------------
 
 const TensorFunction &
-DefaultTensorEngine::compile(const eval::tensor_function::Node &expr, Stash &stash) const
+DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const
 {
-    using Node = eval::tensor_function::Node;
-    using Child = Node::Child;
+    using Child = TensorFunction::Child;
     Child root(expr);
     std::vector<Child::CREF> nodes({root});
     for (size_t i = 0; i < nodes.size(); ++i) {
-        const Child &child = nodes[i];
-        const Node *node = dynamic_cast<const Node *>(&child.get());
-        assert(node != nullptr);
-        node->push_children(nodes);
+        nodes[i].get().get().push_children(nodes);
     }
     while (!nodes.empty()) {
         const Child &child = nodes.back();
diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.h b/eval/src/vespa/eval/tensor/default_tensor_engine.h
index 1cef4ba2d35..755bdcf6a9d 100644
--- a/eval/src/vespa/eval/tensor/default_tensor_engine.h
+++ b/eval/src/vespa/eval/tensor/default_tensor_engine.h
@@ -25,7 +25,7 @@ public:
     void encode(const Value &value, nbostream &output) const override;
     Value::UP decode(nbostream &input) const override;
 
-    const TensorFunction &compile(const eval::tensor_function::Node &expr, Stash &stash) const override;
+    const TensorFunction &optimize(const TensorFunction &expr, Stash &stash) const override;
 
     const Value &map(const Value &a, map_fun_t function, Stash &stash) const override;
     const Value &join(const Value &a, const Value &b, join_fun_t function, Stash &stash) const override;
diff --git a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp
index 9f09940806b..0f395bd353b 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp
@@ -26,6 +26,17 @@ getCellsRef(const eval::Value &value)
     return denseTensor.cellsRef();
 }
 
+void op_call_leaf_eval(eval::InterpretedFunction::State &state, uint64_t param) {
+    DenseDotProductFunction *self = (DenseDotProductFunction *)(param);
+    state.stack.push_back(self->eval(state.engine, *state.params, state.stash));
+}
+
+}
+
+eval::InterpretedFunction::Instruction
+DenseDotProductFunction::compile_self(Stash &) const
+{
+    return eval::InterpretedFunction::Instruction(op_call_leaf_eval, (uint64_t)(this));
 }
 
 const eval::Value &
diff --git a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h
index 4e3a54ca18d..d313602bd53 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h
+++ b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h
@@ -23,6 +23,7 @@ public:
     size_t rhsTensorId() const { return _rhsTensorId; }
     const eval::ValueType &result_type() const override { return eval::DoubleValue::double_type(); }
     void push_children(std::vector<Child::CREF> &) const override {}
+    eval::InterpretedFunction::Instruction compile_self(Stash &stash) const override;
     const eval::Value &eval(const eval::TensorEngine &engine, const eval::LazyParams &params, Stash &stash) const override;
 };
 
diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
index 50ab6efc931..a62dafb6831 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
@@ -69,8 +69,19 @@ getCellsRef(const eval::Value &value)
     return denseTensor.cellsRef();
 }
 
+void op_call_leaf_eval(eval::InterpretedFunction::State &state, uint64_t param) {
+    DenseXWProductFunction *self = (DenseXWProductFunction *)(param);
+    state.stack.push_back(self->eval(state.engine, *state.params, state.stash));
+}
+
 } // namespace <unnamed>
 
+eval::InterpretedFunction::Instruction
+DenseXWProductFunction::compile_self(Stash &) const
+{
+    return eval::InterpretedFunction::Instruction(op_call_leaf_eval, (uint64_t)(this));
+}
+
 const eval::Value &
 DenseXWProductFunction::eval(const eval::TensorEngine &, const eval::LazyParams &params, Stash &stash) const
 {
diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h
index c6a466dc527..4d2a85d96f7 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h
+++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h
@@ -47,6 +47,7 @@ public:
 
     const eval::ValueType &result_type() const override { return _resultType; }
     void push_children(std::vector<Child::CREF> &) const override {}
+    eval::InterpretedFunction::Instruction compile_self(Stash &stash) const override;
     const eval::Value &eval(const eval::TensorEngine &engine, const eval::LazyParams &params, Stash &stash) const override;
 };
author	Arne H Juul <arnej27959@users.noreply.github.com>	2018-01-26 14:48:07 +0100
committer	GitHub <noreply@github.com>	2018-01-26 14:48:07 +0100
commit	6559e8a220f7cf7f435d8db2e597c3118ff2e356 (patch)
tree	967b8a2b7111c9948696ffdd825de0e695e1f978
parent	780264290b9e15f0594991b5dba8f1dc2021f92d (diff)
parent	37139d2153a9735f9c835d8426c79d5ad1d372e2 (diff)