22 files changed, 504 insertions, 278 deletions
diff --git a/eval/src/tests/eval/compile_cache/compile_cache_test.cpp b/eval/src/tests/eval/compile_cache/compile_cache_test.cpp
index 1de56e605c9..a0dad889d9a 100644
--- a/eval/src/tests/eval/compile_cache/compile_cache_test.cpp
+++ b/eval/src/tests/eval/compile_cache/compile_cache_test.cpp
@@ -5,12 +5,16 @@
 #include <vespa/eval/eval/test/eval_spec.h>
 #include <vespa/vespalib/util/time.h>
 #include <vespa/vespalib/util/threadstackexecutor.h>
+#include <vespa/vespalib/util/blockingthreadstackexecutor.h>
+#include <vespa/vespalib/util/stringfmt.h>
 #include <thread>
 #include <set>
 
 using namespace vespalib;
 using namespace vespalib::eval;
 
+using vespalib::make_string_short::fmt;
+
 struct MyExecutor : public Executor {
     std::vector<Executor::Task::UP> tasks;
     Executor::Task::UP execute(Executor::Task::UP task) override {
@@ -157,7 +161,7 @@ TEST("require that cache usage works") {
 }
 
 TEST("require that async cache usage works") {
-    ThreadStackExecutor executor(8, 256*1024);
+    auto executor = std::make_shared<ThreadStackExecutor>(8, 256*1024);
     auto binding = CompileCache::bind(executor);
     CompileCache::Token::UP token_a = CompileCache::compile(*Function::parse("x+y"), PassParams::SEPARATE);
     EXPECT_EQUAL(5.0, token_a->get().get_function<2>()(2.0, 3.0));
@@ -166,7 +170,6 @@ TEST("require that async cache usage works") {
     CompileCache::Token::UP token_c = CompileCache::compile(*Function::parse("x+y"), PassParams::SEPARATE);
     EXPECT_EQUAL(5.0, token_c->get().get_function<2>()(2.0, 3.0));
     EXPECT_EQUAL(CompileCache::num_cached(), 2u);
-    executor.sync(); // wait for compile threads to drop all compile cache tokens
     token_a.reset();
     TEST_DO(verify_cache(2, 2));
     token_b.reset();
@@ -176,24 +179,24 @@ TEST("require that async cache usage works") {
 }
 
 TEST("require that compile tasks are run in the most recently bound executor") {
-    MyExecutor exe1;
-    MyExecutor exe2;
+    auto exe1 = std::make_shared<MyExecutor>();
+    auto exe2 = std::make_shared<MyExecutor>();
     auto token0 = CompileCache::compile(*Function::parse("a+b"), PassParams::SEPARATE);
     EXPECT_EQUAL(CompileCache::num_bound(), 0u);
-    EXPECT_EQUAL(exe1.tasks.size(), 0u);
-    EXPECT_EQUAL(exe2.tasks.size(), 0u);
+    EXPECT_EQUAL(exe1->tasks.size(), 0u);
+    EXPECT_EQUAL(exe2->tasks.size(), 0u);
     {
         auto bind1 = CompileCache::bind(exe1);
         auto token1 = CompileCache::compile(*Function::parse("a-b"), PassParams::SEPARATE);
         EXPECT_EQUAL(CompileCache::num_bound(), 1u);
-        EXPECT_EQUAL(exe1.tasks.size(), 1u);
-        EXPECT_EQUAL(exe2.tasks.size(), 0u);
+        EXPECT_EQUAL(exe1->tasks.size(), 1u);
+        EXPECT_EQUAL(exe2->tasks.size(), 0u);
         {
             auto bind2  = CompileCache::bind(exe2);
             auto token2 = CompileCache::compile(*Function::parse("a*b"), PassParams::SEPARATE);
             EXPECT_EQUAL(CompileCache::num_bound(), 2u);
-            EXPECT_EQUAL(exe1.tasks.size(), 1u);
-            EXPECT_EQUAL(exe2.tasks.size(), 1u);
+            EXPECT_EQUAL(exe1->tasks.size(), 1u);
+            EXPECT_EQUAL(exe2->tasks.size(), 1u);
         }
         EXPECT_EQUAL(CompileCache::num_bound(), 1u);
     }
@@ -201,9 +204,9 @@ TEST("require that compile tasks are run in the most recently bound executor") {
 }
 
 TEST("require that executors may be unbound in any order") {
-    MyExecutor exe1;
-    MyExecutor exe2;
-    MyExecutor exe3;
+    auto exe1 = std::make_shared<MyExecutor>();
+    auto exe2 = std::make_shared<MyExecutor>();
+    auto exe3 = std::make_shared<MyExecutor>();
     auto bind1 = CompileCache::bind(exe1);
     auto bind2 = CompileCache::bind(exe2);
     auto bind3 = CompileCache::bind(exe3);
@@ -213,13 +216,13 @@ TEST("require that executors may be unbound in any order") {
     bind3.reset();
     EXPECT_EQUAL(CompileCache::num_bound(), 1u);
     auto token = CompileCache::compile(*Function::parse("a+b"), PassParams::SEPARATE);
-    EXPECT_EQUAL(exe1.tasks.size(), 1u);
-    EXPECT_EQUAL(exe2.tasks.size(), 0u);
-    EXPECT_EQUAL(exe3.tasks.size(), 0u);
+    EXPECT_EQUAL(exe1->tasks.size(), 1u);
+    EXPECT_EQUAL(exe2->tasks.size(), 0u);
+    EXPECT_EQUAL(exe3->tasks.size(), 0u);
 }
 
 TEST("require that the same executor can be bound multiple times") {
-    MyExecutor exe1;
+    auto exe1 = std::make_shared<MyExecutor>();
     auto bind1 = CompileCache::bind(exe1);
     auto bind2 = CompileCache::bind(exe1);
     auto bind3 = CompileCache::bind(exe1);
@@ -230,7 +233,7 @@ TEST("require that the same executor can be bound multiple times") {
     EXPECT_EQUAL(CompileCache::num_bound(), 1u);
     auto token = CompileCache::compile(*Function::parse("a+b"), PassParams::SEPARATE);
     EXPECT_EQUAL(CompileCache::num_bound(), 1u);
-    EXPECT_EQUAL(exe1.tasks.size(), 1u);
+    EXPECT_EQUAL(exe1->tasks.size(), 1u);
 }
 
 struct CompileCheck : test::EvalSpec::EvalTest {
@@ -286,9 +289,9 @@ TEST_F("compile sequentially, then run all conformance tests", test::EvalSpec())
 
 TEST_F("compile concurrently (8 threads), then run all conformance tests", test::EvalSpec()) {
     f1.add_all_cases();
-    ThreadStackExecutor executor(8, 256*1024);
+    auto executor = std::make_shared<ThreadStackExecutor>(8, 256*1024);
     auto binding = CompileCache::bind(executor);
-    while (executor.num_idle_workers() < 8) {
+    while (executor->num_idle_workers() < 8) {
         std::this_thread::sleep_for(1ms);
     }
     for (size_t i = 0; i < 2; ++i) {
@@ -305,6 +308,43 @@ TEST_F("compile concurrently (8 threads), then run all conformance tests", test:
     }
 }
 
+struct MyCompileTask : public Executor::Task {
+    size_t seed;
+    size_t loop;
+    MyCompileTask(size_t seed_in, size_t loop_in) : seed(seed_in), loop(loop_in) {}
+    void run() override {
+        for (size_t i = 0; i < loop; ++i) {
+            // use custom constant to make a unique function that needs compilation
+            auto token = CompileCache::compile(*Function::parse(fmt("%zu", seed + i)), PassParams::SEPARATE);
+        }
+    }
+};
+
+TEST_MT_FF("require that deadlock is avoided with blocking executor", 8, std::shared_ptr<Executor>(nullptr), TimeBomb(300)) {
+    size_t loop = 16;
+    if (thread_id == 0) {
+        auto t0 = steady_clock::now();
+        f1 = std::make_shared<BlockingThreadStackExecutor>(2, 256*1024, 3);
+        auto binding = CompileCache::bind(f1);
+        TEST_BARRIER(); // #1
+        for (size_t i = 0; i < num_threads; ++i) {
+            f1->execute(std::make_unique<MyCompileTask>(i * loop, loop));
+        }
+        TEST_BARRIER(); // #2
+        auto t1 = steady_clock::now();
+        fprintf(stderr, "deadlock test took %" PRIu64 " ms\n", count_ms(t1 - t0));
+
+    } else {
+        TEST_BARRIER(); // #1
+        size_t seed = (10000 + (thread_id * loop));
+        for (size_t i = 0; i < loop; ++i) {
+            // use custom constant to make a unique function that needs compilation
+            auto token = CompileCache::compile(*Function::parse(fmt("%zu", seed + i)), PassParams::SEPARATE);
+        }
+        TEST_BARRIER(); // #2
+    }
+}
+
 //-----------------------------------------------------------------------------
 
 TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/eval/src/tests/eval/inline_operation/inline_operation_test.cpp b/eval/src/tests/eval/inline_operation/inline_operation_test.cpp
index 4520176e276..8895bd4bcbd 100644
--- a/eval/src/tests/eval/inline_operation/inline_operation_test.cpp
+++ b/eval/src/tests/eval/inline_operation/inline_operation_test.cpp
@@ -3,27 +3,29 @@
 #include <vespa/eval/eval/operation.h>
 #include <vespa/eval/eval/inline_operation.h>
 #include <vespa/eval/eval/function.h>
+#include <vespa/vespalib/util/typify.h>
 #include <vespa/vespalib/gtest/gtest.h>
 
+using vespalib::typify_invoke;
 using namespace vespalib::eval;
 using namespace vespalib::eval::operation;
 
-template <typename T> struct IsInlined { constexpr static bool value = true; };
-template <> struct IsInlined<CallOp1> { constexpr static bool value = false; };
-template <> struct IsInlined<CallOp2> { constexpr static bool value = false; };
+const int my_value = 42;
+struct AsValue { template <typename T> static int invoke() { return my_value; } };
+struct AsRef { template <typename T> static const int &invoke() { return my_value; } };
 
-template <typename T> double test_op1(op1_t ref, double a, bool inlined) {
-    T op(ref);
-    EXPECT_EQ(IsInlined<T>::value, inlined);
-    EXPECT_EQ(op(a), ref(a));
-    return op(a);
+template <typename T> void test_op1(op1_t ref, double a, double expect) {
+    bool need_ref = std::is_same_v<T,CallOp1>;
+    T op = need_ref ? T(ref) : T(nullptr);
+    EXPECT_DOUBLE_EQ(ref(a), expect);
+    EXPECT_DOUBLE_EQ(op(a), expect);
 };
 
-template <typename T> double test_op2(op2_t ref, double a, double b, bool inlined) {
-    T op(ref);
-    EXPECT_EQ(IsInlined<T>::value, inlined);
-    EXPECT_EQ(op(a,b), ref(a,b));
-    return op(a,b);
+template <typename T> void test_op2(op2_t ref, double a, double b, double expect) {
+    bool need_ref = std::is_same_v<T,CallOp2>;
+    T op = need_ref ? T(ref) : T(nullptr);
+    EXPECT_DOUBLE_EQ(ref(a, b), expect);
+    EXPECT_DOUBLE_EQ(op(a, b), expect);
 };
 
 op1_t as_op1(const vespalib::string &str) {
@@ -41,33 +43,36 @@ op2_t as_op2(const vespalib::string &str) {
 }
 
 TEST(InlineOperationTest, op1_lambdas_are_recognized) {
-    EXPECT_EQ(as_op1("-a"),         Neg::f);
-    EXPECT_EQ(as_op1("!a"),         Not::f);
-    EXPECT_EQ(as_op1("cos(a)"),     Cos::f);
-    EXPECT_EQ(as_op1("sin(a)"),     Sin::f);
-    EXPECT_EQ(as_op1("tan(a)"),     Tan::f);
-    EXPECT_EQ(as_op1("cosh(a)"),    Cosh::f);
-    EXPECT_EQ(as_op1("sinh(a)"),    Sinh::f);
-    EXPECT_EQ(as_op1("tanh(a)"),    Tanh::f);
-    EXPECT_EQ(as_op1("acos(a)"),    Acos::f);
-    EXPECT_EQ(as_op1("asin(a)"),    Asin::f);
-    EXPECT_EQ(as_op1("atan(a)"),    Atan::f);
-    EXPECT_EQ(as_op1("exp(a)"),     Exp::f);
-    EXPECT_EQ(as_op1("log10(a)"),   Log10::f);
-    EXPECT_EQ(as_op1("log(a)"),     Log::f);
-    EXPECT_EQ(as_op1("sqrt(a)"),    Sqrt::f);
-    EXPECT_EQ(as_op1("ceil(a)"),    Ceil::f);
-    EXPECT_EQ(as_op1("fabs(a)"),    Fabs::f);
-    EXPECT_EQ(as_op1("floor(a)"),   Floor::f);
-    EXPECT_EQ(as_op1("isNan(a)"),   IsNan::f);
-    EXPECT_EQ(as_op1("relu(a)"),    Relu::f);
-    EXPECT_EQ(as_op1("sigmoid(a)"), Sigmoid::f);
-    EXPECT_EQ(as_op1("elu(a)"),     Elu::f);
+    EXPECT_EQ(as_op1("-a"),         &Neg::f);
+    EXPECT_EQ(as_op1("!a"),         &Not::f);
+    EXPECT_EQ(as_op1("cos(a)"),     &Cos::f);
+    EXPECT_EQ(as_op1("sin(a)"),     &Sin::f);
+    EXPECT_EQ(as_op1("tan(a)"),     &Tan::f);
+    EXPECT_EQ(as_op1("cosh(a)"),    &Cosh::f);
+    EXPECT_EQ(as_op1("sinh(a)"),    &Sinh::f);
+    EXPECT_EQ(as_op1("tanh(a)"),    &Tanh::f);
+    EXPECT_EQ(as_op1("acos(a)"),    &Acos::f);
+    EXPECT_EQ(as_op1("asin(a)"),    &Asin::f);
+    EXPECT_EQ(as_op1("atan(a)"),    &Atan::f);
+    EXPECT_EQ(as_op1("exp(a)"),     &Exp::f);
+    EXPECT_EQ(as_op1("log10(a)"),   &Log10::f);
+    EXPECT_EQ(as_op1("log(a)"),     &Log::f);
+    EXPECT_EQ(as_op1("sqrt(a)"),    &Sqrt::f);
+    EXPECT_EQ(as_op1("ceil(a)"),    &Ceil::f);
+    EXPECT_EQ(as_op1("fabs(a)"),    &Fabs::f);
+    EXPECT_EQ(as_op1("floor(a)"),   &Floor::f);
+    EXPECT_EQ(as_op1("isNan(a)"),   &IsNan::f);
+    EXPECT_EQ(as_op1("relu(a)"),    &Relu::f);
+    EXPECT_EQ(as_op1("sigmoid(a)"), &Sigmoid::f);
+    EXPECT_EQ(as_op1("elu(a)"),     &Elu::f);
+    //-------------------------------------------
+    EXPECT_EQ(as_op1("1/a"),        &Inv::f);
+    EXPECT_EQ(as_op1("1.0/a"),      &Inv::f);
 }
 
 TEST(InlineOperationTest, op1_lambdas_are_recognized_with_different_parameter_names) {
-    EXPECT_EQ(lookup_op1(*Function::parse({"x"}, "-x")).value(), Neg::f);
-    EXPECT_EQ(lookup_op1(*Function::parse({"x"}, "!x")).value(), Not::f);
+    EXPECT_EQ(lookup_op1(*Function::parse({"x"}, "-x")).value(), &Neg::f);
+    EXPECT_EQ(lookup_op1(*Function::parse({"x"}, "!x")).value(), &Not::f);
 }
 
 TEST(InlineOperationTest, non_op1_lambdas_are_not_recognized) {
@@ -76,32 +81,32 @@ TEST(InlineOperationTest, non_op1_lambdas_are_not_recognized) {
 }
 
 TEST(InlineOperationTest, op2_lambdas_are_recognized) {
-    EXPECT_EQ(as_op2("a+b"),        Add::f);
-    EXPECT_EQ(as_op2("a-b"),        Sub::f);
-    EXPECT_EQ(as_op2("a*b"),        Mul::f);
-    EXPECT_EQ(as_op2("a/b"),        Div::f);
-    EXPECT_EQ(as_op2("a%b"),        Mod::f);
-    EXPECT_EQ(as_op2("a^b"),        Pow::f);
-    EXPECT_EQ(as_op2("a==b"),       Equal::f);
-    EXPECT_EQ(as_op2("a!=b"),       NotEqual::f);
-    EXPECT_EQ(as_op2("a~=b"),       Approx::f);
-    EXPECT_EQ(as_op2("a<b"),        Less::f);
-    EXPECT_EQ(as_op2("a<=b"),       LessEqual::f);
-    EXPECT_EQ(as_op2("a>b"),        Greater::f);
-    EXPECT_EQ(as_op2("a>=b"),       GreaterEqual::f);
-    EXPECT_EQ(as_op2("a&&b"),       And::f);
-    EXPECT_EQ(as_op2("a||b"),       Or::f);
-    EXPECT_EQ(as_op2("atan2(a,b)"), Atan2::f);
-    EXPECT_EQ(as_op2("ldexp(a,b)"), Ldexp::f);
-    EXPECT_EQ(as_op2("pow(a,b)"),   Pow::f);
-    EXPECT_EQ(as_op2("fmod(a,b)"),  Mod::f);
-    EXPECT_EQ(as_op2("min(a,b)"),   Min::f);
-    EXPECT_EQ(as_op2("max(a,b)"),   Max::f);
+    EXPECT_EQ(as_op2("a+b"),        &Add::f);
+    EXPECT_EQ(as_op2("a-b"),        &Sub::f);
+    EXPECT_EQ(as_op2("a*b"),        &Mul::f);
+    EXPECT_EQ(as_op2("a/b"),        &Div::f);
+    EXPECT_EQ(as_op2("a%b"),        &Mod::f);
+    EXPECT_EQ(as_op2("a^b"),        &Pow::f);
+    EXPECT_EQ(as_op2("a==b"),       &Equal::f);
+    EXPECT_EQ(as_op2("a!=b"),       &NotEqual::f);
+    EXPECT_EQ(as_op2("a~=b"),       &Approx::f);
+    EXPECT_EQ(as_op2("a<b"),        &Less::f);
+    EXPECT_EQ(as_op2("a<=b"),       &LessEqual::f);
+    EXPECT_EQ(as_op2("a>b"),        &Greater::f);
+    EXPECT_EQ(as_op2("a>=b"),       &GreaterEqual::f);
+    EXPECT_EQ(as_op2("a&&b"),       &And::f);
+    EXPECT_EQ(as_op2("a||b"),       &Or::f);
+    EXPECT_EQ(as_op2("atan2(a,b)"), &Atan2::f);
+    EXPECT_EQ(as_op2("ldexp(a,b)"), &Ldexp::f);
+    EXPECT_EQ(as_op2("pow(a,b)"),   &Pow::f);
+    EXPECT_EQ(as_op2("fmod(a,b)"),  &Mod::f);
+    EXPECT_EQ(as_op2("min(a,b)"),   &Min::f);
+    EXPECT_EQ(as_op2("max(a,b)"),   &Max::f);
 }
 
 TEST(InlineOperationTest, op2_lambdas_are_recognized_with_different_parameter_names) {
-    EXPECT_EQ(lookup_op2(*Function::parse({"x", "y"}, "x+y")).value(), Add::f);
-    EXPECT_EQ(lookup_op2(*Function::parse({"x", "y"}, "x-y")).value(), Sub::f);
+    EXPECT_EQ(lookup_op2(*Function::parse({"x", "y"}, "x+y")).value(), &Add::f);
+    EXPECT_EQ(lookup_op2(*Function::parse({"x", "y"}, "x-y")).value(), &Sub::f);
 }
 
 TEST(InlineOperationTest, non_op2_lambdas_are_not_recognized) {
@@ -121,11 +126,37 @@ TEST(InlineOperationTest, generic_op2_wrapper_works) {
     EXPECT_EQ(op(3,7), 10);
 }
 
+TEST(InlineOperationTest, op1_typifier_forwards_return_value_correctly) {
+    auto a = typify_invoke<1,TypifyOp1,AsValue>(Neg::f);
+    auto b = typify_invoke<1,TypifyOp1,AsRef>(Neg::f);
+    EXPECT_EQ(a, my_value);
+    EXPECT_EQ(b, my_value);
+    bool same_memory = (&(typify_invoke<1,TypifyOp1,AsRef>(Neg::f)) == &my_value);
+    EXPECT_EQ(same_memory, true);
+}
+
+TEST(InlineOperationTest, op2_typifier_forwards_return_value_correctly) {
+    auto a = typify_invoke<1,TypifyOp2,AsValue>(Add::f);
+    auto b = typify_invoke<1,TypifyOp2,AsRef>(Add::f);
+    EXPECT_EQ(a, my_value);
+    EXPECT_EQ(b, my_value);
+    bool same_memory = (&(typify_invoke<1,TypifyOp2,AsRef>(Add::f)) == &my_value);
+    EXPECT_EQ(same_memory, true);
+}
+
+TEST(InlineOperationTest, inline_op1_example_works) {
+    op1_t ignored = nullptr;
+    InlineOp1<Inv> op(ignored);
+    EXPECT_EQ(op(2.0), 0.5);
+    EXPECT_EQ(op(4.0f), 0.25f);
+    EXPECT_EQ(op(8.0), 0.125);
+}
+
 TEST(InlineOperationTest, inline_op2_example_works) {
     op2_t ignored = nullptr;
     InlineOp2<Add> op(ignored);
-    EXPECT_EQ(op(2,3), 5);
-    EXPECT_EQ(op(3,7), 10);
+    EXPECT_EQ(op(2.0, 3.0), 5.0);
+    EXPECT_EQ(op(3.0, 7.0), 10.0);
 }
 
 TEST(InlineOperationTest, parameter_swap_wrapper_works) {
@@ -137,20 +168,151 @@ TEST(InlineOperationTest, parameter_swap_wrapper_works) {
     EXPECT_EQ(swap_op(3,7), 4);
 }
 
-TEST(InlineOperationTest, resolved_op1_works) {
-    auto a = TypifyOp1::resolve(Neg::f, [](auto t){ return test_op1<typename decltype(t)::type>(Neg::f, 2.0, false); });
-    // putting the lambda inside the EXPECT does not work
-    EXPECT_EQ(a, -2.0);
+//-----------------------------------------------------------------------------
+
+TEST(InlineOperationTest, op1_exp_is_inlined) {
+    TypifyOp1::resolve(Exp::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,InlineOp1<Exp>>;
+                           op1_t ref = Exp::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op1<T>(ref, 2.0, std::exp(2.0));
+                           test_op1<T>(ref, 3.0, std::exp(3.0));
+                           test_op1<T>(ref, 7.0, std::exp(7.0));
+                       });
+}
+
+TEST(InlineOperationTest, op1_inv_is_inlined) {
+    TypifyOp1::resolve(Inv::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,InlineOp1<Inv>>;
+                           op1_t ref = Inv::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op1<T>(ref, 2.0, 1.0/2.0);
+                           test_op1<T>(ref, 4.0, 1.0/4.0);
+                           test_op1<T>(ref, 8.0, 1.0/8.0);
+                       });
+}
+
+TEST(InlineOperationTest, op1_sqrt_is_inlined) {
+    TypifyOp1::resolve(Sqrt::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,InlineOp1<Sqrt>>;
+                           op1_t ref = Sqrt::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op1<T>(ref, 2.0, sqrt(2.0));
+                           test_op1<T>(ref, 4.0, sqrt(4.0));
+                           test_op1<T>(ref, 64.0, sqrt(64.0));
+                       });
+}
+
+TEST(InlineOperationTest, op1_tanh_is_inlined) {
+    TypifyOp1::resolve(Tanh::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,InlineOp1<Tanh>>;
+                           op1_t ref = Tanh::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op1<T>(ref, 0.1, std::tanh(0.1));
+                           test_op1<T>(ref, 0.3, std::tanh(0.3));
+                           test_op1<T>(ref, 0.7, std::tanh(0.7));
+                       });
+}
+
+TEST(InlineOperationTest, op1_neg_is_not_inlined) {
+    TypifyOp1::resolve(Neg::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,CallOp1>;
+                           op1_t ref = Neg::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op1<T>(ref, 3.0, -3.0);
+                           test_op1<T>(ref, 5.0, -5.0);
+                           test_op1<T>(ref, -2.0, 2.0);
+                       });
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(InlineOperationTest, op2_add_is_inlined) {
+    TypifyOp2::resolve(Add::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,InlineOp2<Add>>;
+                           op2_t ref = Add::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op2<T>(ref, 2.0, 2.0,  4.0);
+                           test_op2<T>(ref, 3.0, 8.0, 11.0);
+                           test_op2<T>(ref, 7.0, 1.0,  8.0);
+                       });
+}
+
+TEST(InlineOperationTest, op2_div_is_inlined) {
+    TypifyOp2::resolve(Div::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,InlineOp2<Div>>;
+                           op2_t ref = Div::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op2<T>(ref, 2.0, 2.0, 1.0);
+                           test_op2<T>(ref, 3.0, 8.0, 3.0 / 8.0);
+                           test_op2<T>(ref, 7.0, 5.0, 7.0 / 5.0);
+                       });
+}
+
+TEST(InlineOperationTest, op2_mul_is_inlined) {
+    TypifyOp2::resolve(Mul::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,InlineOp2<Mul>>;
+                           op2_t ref = Mul::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op2<T>(ref, 2.0, 2.0, 4.0);
+                           test_op2<T>(ref, 3.0, 8.0, 24.0);
+                           test_op2<T>(ref, 7.0, 5.0, 35.0);
+                       });
+}
+
+TEST(InlineOperationTest, op2_pow_is_inlined) {
+    TypifyOp2::resolve(Pow::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,InlineOp2<Pow>>;
+                           op2_t ref = Pow::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op2<T>(ref, 2.0, 2.0, std::pow(2.0, 2.0));
+                           test_op2<T>(ref, 3.0, 8.0, std::pow(3.0, 8.0));
+                           test_op2<T>(ref, 7.0, 5.0, std::pow(7.0, 5.0));
+                       });
+}
+
+TEST(InlineOperationTest, op2_sub_is_inlined) {
+    TypifyOp2::resolve(Sub::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,InlineOp2<Sub>>;
+                           op2_t ref = Sub::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op2<T>(ref, 3.0, 2.0, 1.0);
+                           test_op2<T>(ref, 3.0, 8.0, -5.0);
+                           test_op2<T>(ref, 7.0, 5.0, 2.0);
+                       });
 }
 
-TEST(InlineOperationTest, resolved_op2_works) {
-    auto a = TypifyOp2::resolve(Add::f, [](auto t){ return test_op2<typename decltype(t)::type>(Add::f, 2.0, 5.0, true); });
-    auto b = TypifyOp2::resolve(Mul::f, [](auto t){ return test_op2<typename decltype(t)::type>(Mul::f, 5.0, 3.0, true); });
-    auto c = TypifyOp2::resolve(Sub::f, [](auto t){ return test_op2<typename decltype(t)::type>(Sub::f, 8.0, 5.0, false); });
-    // putting the lambda inside the EXPECT does not work
-    EXPECT_EQ(a, 7.0);
-    EXPECT_EQ(b, 15.0);
-    EXPECT_EQ(c, 3.0);
+TEST(InlineOperationTest, op2_mod_is_not_inlined) {
+    TypifyOp2::resolve(Mod::f, [](auto t)
+                       {
+                           using T = typename decltype(t)::type;
+                           bool type_ok = std::is_same_v<T,CallOp2>;
+                           op2_t ref = Mod::f;
+                           EXPECT_TRUE(type_ok);
+                           test_op2<T>(ref, 3.0, 2.0, std::fmod(3.0, 2.0));
+                           test_op2<T>(ref, 3.0, 8.0, std::fmod(3.0, 8.0));
+                           test_op2<T>(ref, 7.0, 5.0, std::fmod(7.0, 5.0));
+                       });
 }
 
 GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/aggr.cpp b/eval/src/vespa/eval/eval/aggr.cpp
index d10bbc4abb8..8efb0ec9fe7 100644
--- a/eval/src/vespa/eval/eval/aggr.cpp
+++ b/eval/src/vespa/eval/eval/aggr.cpp
@@ -71,15 +71,11 @@ Aggregator::~Aggregator()
 Aggregator &
 Aggregator::create(Aggr aggr, Stash &stash)
 {
-    switch (aggr) {
-    case Aggr::AVG:   return stash.create<Wrapper<aggr::Avg<double>>>();
-    case Aggr::COUNT: return stash.create<Wrapper<aggr::Count<double>>>();
-    case Aggr::PROD:  return stash.create<Wrapper<aggr::Prod<double>>>();
-    case Aggr::SUM:   return stash.create<Wrapper<aggr::Sum<double>>>();
-    case Aggr::MAX:   return stash.create<Wrapper<aggr::Max<double>>>();
-    case Aggr::MIN:   return stash.create<Wrapper<aggr::Min<double>>>();
-    }
-    LOG_ABORT("should not be reached");
+    return TypifyAggr::resolve(aggr, [&stash](auto t)->Aggregator&
+                               {
+                                   using T = typename decltype(t)::template templ<double>;
+                                   return stash.create<Wrapper<T>>();
+                               });
 }
 
 std::vector<Aggr>
diff --git a/eval/src/vespa/eval/eval/aggr.h b/eval/src/vespa/eval/eval/aggr.h
index 8dea54d8abc..169f0b1d2af 100644
--- a/eval/src/vespa/eval/eval/aggr.h
+++ b/eval/src/vespa/eval/eval/aggr.h
@@ -2,6 +2,7 @@
 
 #pragma once
 
+#include <vespa/vespalib/util/typify.h>
 #include <vespa/vespalib/stllike/string.h>
 #include <vector>
 #include <map>
@@ -118,5 +119,21 @@ public:
 };
 
 } // namespave vespalib::eval::aggr
+
+struct TypifyAggr {
+    template <template<typename> typename TT> using Result = TypifyResultSimpleTemplate<TT>;
+    template <typename F> static decltype(auto) resolve(Aggr aggr, F &&f) {
+        switch (aggr) {
+        case Aggr::AVG:   return f(Result<aggr::Avg>());
+        case Aggr::COUNT: return f(Result<aggr::Count>());
+        case Aggr::PROD:  return f(Result<aggr::Prod>());
+        case Aggr::SUM:   return f(Result<aggr::Sum>());
+        case Aggr::MAX:   return f(Result<aggr::Max>());
+        case Aggr::MIN:   return f(Result<aggr::Min>());
+        }
+        abort();
+    }
+};
+
 } // namespace vespalib::eval
 } // namespace vespalib
diff --git a/eval/src/vespa/eval/eval/inline_operation.h b/eval/src/vespa/eval/eval/inline_operation.h
index 493de9ea56c..fccf1874242 100644
--- a/eval/src/vespa/eval/eval/inline_operation.h
+++ b/eval/src/vespa/eval/eval/inline_operation.h
@@ -4,6 +4,7 @@
 
 #include "operation.h"
 #include <vespa/vespalib/util/typify.h>
+#include <cmath>
 
 namespace vespalib::eval::operation {
 
@@ -15,11 +16,38 @@ struct CallOp1 {
     double operator()(double a) const { return my_op1(a); }
 };
 
+template <typename T> struct InlineOp1;
+template <> struct InlineOp1<Exp> {
+    InlineOp1(op1_t) {}
+    template <typename A> constexpr auto operator()(A a) const { return exp(a); }
+};
+template <> struct InlineOp1<Inv> {
+    InlineOp1(op1_t) {}
+    template <typename A> constexpr auto operator()(A a) const { return (A{1}/a); }
+};
+template <> struct InlineOp1<Sqrt> {
+    InlineOp1(op1_t) {}
+    template <typename A> constexpr auto operator()(A a) const { return std::sqrt(a); }
+};
+template <> struct InlineOp1<Tanh> {
+    InlineOp1(op1_t) {}
+    template <typename A> constexpr auto operator()(A a) const { return std::tanh(a); }
+};
+
 struct TypifyOp1 {
     template <typename T> using Result = TypifyResultType<T>;
     template <typename F> static decltype(auto) resolve(op1_t value, F &&f) {
-        (void) value;
-        return f(Result<CallOp1>());
+        if (value == Exp::f) {
+            return f(Result<InlineOp1<Exp>>());
+        } else if (value == Inv::f) {
+            return f(Result<InlineOp1<Inv>>());
+        } else if (value == Sqrt::f) {
+            return f(Result<InlineOp1<Sqrt>>());
+        } else if (value == Tanh::f) {
+            return f(Result<InlineOp1<Tanh>>());
+        } else {
+            return f(Result<CallOp1>());
+        }
     }
 };
 
@@ -44,18 +72,36 @@ template <> struct InlineOp2<Add> {
     InlineOp2(op2_t) {}
     template <typename A, typename B> constexpr auto operator()(A a, B b) const { return (a+b); }
 };
+template <> struct InlineOp2<Div> {
+    InlineOp2(op2_t) {}
+    template <typename A, typename B> constexpr auto operator()(A a, B b) const { return (a/b); }
+};
 template <> struct InlineOp2<Mul> {
     InlineOp2(op2_t) {}
     template <typename A, typename B> constexpr auto operator()(A a, B b) const { return (a*b); }
 };
+template <> struct InlineOp2<Pow> {
+    InlineOp2(op2_t) {}
+    template <typename A, typename B> constexpr auto operator()(A a, B b) const { return std::pow(a,b); }
+};
+template <> struct InlineOp2<Sub> {
+    InlineOp2(op2_t) {}
+    template <typename A, typename B> constexpr auto operator()(A a, B b) const { return (a-b); }
+};
 
 struct TypifyOp2 {
     template <typename T> using Result = TypifyResultType<T>;
     template <typename F> static decltype(auto) resolve(op2_t value, F &&f) {
         if (value == Add::f) {
             return f(Result<InlineOp2<Add>>());
+        } else if (value == Div::f) {
+            return f(Result<InlineOp2<Div>>());
         } else if (value == Mul::f) {
             return f(Result<InlineOp2<Mul>>());
+        } else if (value == Pow::f) {
+            return f(Result<InlineOp2<Pow>>());
+        } else if (value == Sub::f) {
+            return f(Result<InlineOp2<Sub>>());
         } else {
             return f(Result<CallOp2>());
         }
@@ -64,4 +110,27 @@ struct TypifyOp2 {
 
 //-----------------------------------------------------------------------------
 
+template <typename A, typename OP1>
+void apply_op1_vec(A *dst, const A *src, size_t n, OP1 &&f) {
+    for (size_t i = 0; i < n; ++i) {
+        dst[i] = f(src[i]);
+    }
+}
+
+template <typename D, typename A, typename B, typename OP2>
+void apply_op2_vec_num(D *dst, const A *a, B b, size_t n, OP2 &&f) {
+    for (size_t i = 0; i < n; ++i) {
+        dst[i] = f(a[i], b);
+    }
+}
+
+template <typename D, typename A, typename B, typename OP2>
+void apply_op2_vec_vec(D *dst, const A *a, const B *b, size_t n, OP2 &&f) {
+    for (size_t i = 0; i < n; ++i) {
+        dst[i] = f(a[i], b[i]);
+    }
+}
+
+//-----------------------------------------------------------------------------
+
 }
diff --git a/eval/src/vespa/eval/eval/llvm/compile_cache.cpp b/eval/src/vespa/eval/eval/llvm/compile_cache.cpp
index 4aa18d3bb65..e2674a6e4d6 100644
--- a/eval/src/vespa/eval/eval/llvm/compile_cache.cpp
+++ b/eval/src/vespa/eval/eval/llvm/compile_cache.cpp
@@ -10,14 +10,14 @@ namespace eval {
 std::mutex CompileCache::_lock{};
 CompileCache::Map CompileCache::_cached{};
 uint64_t CompileCache::_executor_tag{0};
-std::vector<std::pair<uint64_t,Executor*>> CompileCache::_executor_stack{};
+std::vector<std::pair<uint64_t,std::shared_ptr<Executor>>> CompileCache::_executor_stack{};
 
 const CompiledFunction &
 CompileCache::Value::wait_for_result()
 {
-    std::unique_lock<std::mutex> guard(_lock);
-    cond.wait(guard, [this](){ return bool(compiled_function); });
-    return *compiled_function;
+    std::unique_lock<std::mutex> guard(result->lock);
+    result->cond.wait(guard, [this](){ return bool(result->compiled_function); });
+    return *(result->compiled_function);
 }
 
 void
@@ -30,10 +30,10 @@ CompileCache::release(Map::iterator entry)
 }
 
 uint64_t
-CompileCache::attach_executor(Executor &executor)
+CompileCache::attach_executor(std::shared_ptr<Executor> executor)
 {
     std::lock_guard<std::mutex> guard(_lock);
-    _executor_stack.emplace_back(++_executor_tag, &executor);
+    _executor_stack.emplace_back(++_executor_tag, std::move(executor));
     return _executor_tag;
 }
 
@@ -52,6 +52,7 @@ CompileCache::compile(const Function &function, PassParams pass_params)
 {
     Token::UP token;
     Executor::Task::UP task;
+    std::shared_ptr<Executor> executor;
     vespalib::string key = gen_key(function, pass_params);
     {
         std::lock_guard<std::mutex> guard(_lock);
@@ -63,14 +64,15 @@ CompileCache::compile(const Function &function, PassParams pass_params)
             auto res = _cached.emplace(std::move(key), Value::ctor_tag());
             assert(res.second);
             token = std::make_unique<Token>(res.first, Token::ctor_tag());
-            ++(res.first->second.num_refs);
-            task = std::make_unique<CompileTask>(function, pass_params,
-                    std::make_unique<Token>(res.first, Token::ctor_tag()));
+            task = std::make_unique<CompileTask>(function, pass_params, res.first->second.result);
             if (!_executor_stack.empty()) {
-                task = _executor_stack.back().second->execute(std::move(task));
+                executor = _executor_stack.back().second;
             }
         }
     }
+    if (executor) {
+        task = executor->execute(std::move(task));
+    }
     if (task) {
         std::thread([&task](){ task.get()->run(); }).join();
     }
@@ -84,7 +86,7 @@ CompileCache::wait_pending()
     {
         std::lock_guard<std::mutex> guard(_lock);
         for (auto entry = _cached.begin(); entry != _cached.end(); ++entry) {
-            if (entry->second.compiled_function.get() == nullptr) {
+            if (entry->second.result->cf.load(std::memory_order_acquire) == nullptr) {
                 ++(entry->second.num_refs);
                 pending.push_back(std::make_unique<Token>(entry, Token::ctor_tag()));
             }
@@ -129,7 +131,7 @@ CompileCache::count_pending()
     std::lock_guard<std::mutex> guard(_lock);
     size_t pending = 0;
     for (const auto &entry: _cached) {
-        if (entry.second.compiled_function.get() == nullptr) {
+        if (entry.second.result->cf.load(std::memory_order_acquire) == nullptr) {
             ++pending;
         }
     }
@@ -139,12 +141,11 @@ CompileCache::count_pending()
 void
 CompileCache::CompileTask::run()
 {
-    auto &entry = token->_entry->second;
-    auto result = std::make_unique<CompiledFunction>(*function, pass_params);
-    std::lock_guard<std::mutex> guard(_lock);
-    entry.compiled_function = std::move(result);
-    entry.cf.store(entry.compiled_function.get(), std::memory_order_release);
-    entry.cond.notify_all();
+    auto compiled = std::make_unique<CompiledFunction>(*function, pass_params);
+    std::lock_guard<std::mutex> guard(result->lock);
+    result->compiled_function = std::move(compiled);
+    result->cf.store(result->compiled_function.get(), std::memory_order_release);
+    result->cond.notify_all();
 }
 
 } // namespace vespalib::eval
diff --git a/eval/src/vespa/eval/eval/llvm/compile_cache.h b/eval/src/vespa/eval/eval/llvm/compile_cache.h
index 09b5b2060f5..61d0cc83d94 100644
--- a/eval/src/vespa/eval/eval/llvm/compile_cache.h
+++ b/eval/src/vespa/eval/eval/llvm/compile_cache.h
@@ -23,16 +23,22 @@ class CompileCache
 {
 private:
     using Key = vespalib::string;
-    struct Value {
-        size_t num_refs;
+    struct Result {
+        using SP = std::shared_ptr<Result>;
         std::atomic<const CompiledFunction *> cf;
+        std::mutex lock;
         std::condition_variable cond;
         CompiledFunction::UP compiled_function;
+        Result() : cf(nullptr), lock(), cond(), compiled_function(nullptr) {}
+    };
+    struct Value {
+        size_t num_refs;
+        Result::SP result;
         struct ctor_tag {};
-        Value(ctor_tag) : num_refs(1), cf(nullptr), cond(), compiled_function() {}
+        Value(ctor_tag) : num_refs(1), result(std::make_shared<Result>()) {}
         const CompiledFunction &wait_for_result();
         const CompiledFunction &get() {
-            const CompiledFunction *ptr = cf.load(std::memory_order_acquire);
+            const CompiledFunction *ptr = result->cf.load(std::memory_order_acquire);
             if (ptr == nullptr) {
                 return wait_for_result();
             }
@@ -43,10 +49,10 @@ private:
     static std::mutex _lock;
     static Map _cached;
     static uint64_t _executor_tag;
-    static std::vector<std::pair<uint64_t,Executor*>> _executor_stack;
+    static std::vector<std::pair<uint64_t,std::shared_ptr<Executor>>> _executor_stack;
 
     static void release(Map::iterator entry);
-    static uint64_t attach_executor(Executor &executor);
+    static uint64_t attach_executor(std::shared_ptr<Executor> executor);
     static void detach_executor(uint64_t tag);
 
 public:
@@ -54,7 +60,6 @@ public:
     {
     private:
         friend class CompileCache;
-        friend class CompileTask;
         struct ctor_tag {};
         CompileCache::Map::iterator _entry;
     public:
@@ -79,14 +84,15 @@ public:
         ExecutorBinding &operator=(ExecutorBinding &&) = delete;
         ExecutorBinding &operator=(const ExecutorBinding &) = delete;
         using UP = std::unique_ptr<ExecutorBinding>;
-        explicit ExecutorBinding(Executor &executor, ctor_tag) : _tag(attach_executor(executor)) {}
+        explicit ExecutorBinding(std::shared_ptr<Executor> executor, ctor_tag)
+            : _tag(attach_executor(std::move(executor))) {}
         ~ExecutorBinding() { detach_executor(_tag); }
     };
 
     static Token::UP compile(const Function &function, PassParams pass_params);
     static void wait_pending();
-    static ExecutorBinding::UP bind(Executor &executor) {
-        return std::make_unique<ExecutorBinding>(executor, ExecutorBinding::ctor_tag());
+    static ExecutorBinding::UP bind(std::shared_ptr<Executor> executor) {
+        return std::make_unique<ExecutorBinding>(std::move(executor), ExecutorBinding::ctor_tag());
     }
     static size_t num_cached();
     static size_t num_bound();
@@ -97,9 +103,9 @@ private:
     struct CompileTask : public Executor::Task {
         std::shared_ptr<Function const> function;
         PassParams pass_params;
-        Token::UP token;
-        CompileTask(const Function &function_in, PassParams pass_params_in, Token::UP token_in)
-            : function(function_in.shared_from_this()), pass_params(pass_params_in), token(std::move(token_in)) {}
+        Result::SP result;
+        CompileTask(const Function &function_in, PassParams pass_params_in, Result::SP result_in)
+            : function(function_in.shared_from_this()), pass_params(pass_params_in), result(std::move(result_in)) {}
         void run() override;
     };
 };
diff --git a/eval/src/vespa/eval/eval/operation.cpp b/eval/src/vespa/eval/eval/operation.cpp
index 581f65c0e31..bbd37ab68b2 100644
--- a/eval/src/vespa/eval/eval/operation.cpp
+++ b/eval/src/vespa/eval/eval/operation.cpp
@@ -49,6 +49,8 @@ double IsNan::f(double a) { return std::isnan(a) ? 1.0 : 0.0; }
 double Relu::f(double a) { return std::max(a, 0.0); }
 double Sigmoid::f(double a) { return 1.0 / (1.0 + std::exp(-1.0 * a)); }
 double Elu::f(double a) { return (a < 0) ? std::exp(a) - 1 : a; }
+//-----------------------------------------------------------------------------
+double Inv::f(double a) { return (1 / a); }
 
 namespace {
 
@@ -102,6 +104,8 @@ std::map<vespalib::string,op1_t> make_op1_map() {
     add_op1(map, "relu(a)",    Relu::f);
     add_op1(map, "sigmoid(a)", Sigmoid::f);
     add_op1(map, "elu(a)",     Elu::f);
+    //-------------------------------------
+    add_op1(map, "1/a",        Inv::f);
     return map;
 }
 
diff --git a/eval/src/vespa/eval/eval/operation.h b/eval/src/vespa/eval/eval/operation.h
index a80193e704d..b00bb5e26fc 100644
--- a/eval/src/vespa/eval/eval/operation.h
+++ b/eval/src/vespa/eval/eval/operation.h
@@ -48,6 +48,8 @@ struct IsNan { static double f(double a); };
 struct Relu { static double f(double a); };
 struct Sigmoid { static double f(double a); };
 struct Elu { static double f(double a); };
+//-----------------------------------------------------------------------------
+struct Inv { static double f(double a); };
 
 using op1_t = double (*)(double);
 using op2_t = double (*)(double, double);
diff --git a/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp
index 9b93f5e7d72..84da53c8488 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp
@@ -25,7 +25,7 @@ void my_cell_range_op(eval::InterpretedFunction::State &state, uint64_t param) {
 
 struct MyCellRangeOp {
     template <typename CT>
-    static auto get_fun() { return my_cell_range_op<CT>; }
+    static auto invoke() { return my_cell_range_op<CT>; }
 };
 
 } // namespace vespalib::tensor::<unnamed>
@@ -46,7 +46,9 @@ DenseCellRangeFunction::compile_self(const TensorEngine &, Stash &) const
 {
     static_assert(sizeof(uint64_t) == sizeof(this));
     assert(result_type().cell_type() == child().result_type().cell_type());
-    auto op = select_1<MyCellRangeOp>(result_type().cell_type());
+
+    using MyTypify = eval::TypifyCellType;
+    auto op = typify_invoke<1,MyTypify,MyCellRangeOp>(result_type().cell_type());
     return eval::InterpretedFunction::Instruction(op, (uint64_t)this);
 }
 
diff --git a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp
index c9ff57e4a65..9e30451cd67 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp
@@ -48,7 +48,7 @@ void my_cblas_float_dot_product_op(eval::InterpretedFunction::State &state, uint
 
 struct MyDotProductOp {
     template <typename LCT, typename RCT>
-    static auto get_fun() { return my_dot_product_op<LCT,RCT>; }
+    static auto invoke() { return my_dot_product_op<LCT,RCT>; }
 };
 
 eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct) {
@@ -60,7 +60,8 @@ eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct) {
             return my_cblas_float_dot_product_op;
         }
     }
-    return select_2<MyDotProductOp>(lct, rct);
+    using MyTypify = eval::TypifyCellType;
+    return typify_invoke<2,MyTypify,MyDotProductOp>(lct, rct);
 }
 
 } // namespace vespalib::tensor::<unnamed>
diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp
index b60d732d7a9..e373ca09e11 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp
@@ -95,7 +95,7 @@ void my_compiled_lambda_op(eval::InterpretedFunction::State &state, uint64_t par
 
 struct MyCompiledLambdaOp {
     template <typename CT>
-    static auto get_fun() { return my_compiled_lambda_op<CT>; }
+    static auto invoke() { return my_compiled_lambda_op<CT>; }
 };
 
 //-----------------------------------------------------------------------------
@@ -131,7 +131,7 @@ void my_interpreted_lambda_op(eval::InterpretedFunction::State &state, uint64_t
 
 struct MyInterpretedLambdaOp {
     template <typename CT>
-    static auto get_fun() { return my_interpreted_lambda_op<CT>; }
+    static auto invoke() { return my_interpreted_lambda_op<CT>; }
 };
 
 //-----------------------------------------------------------------------------
@@ -163,15 +163,16 @@ DenseLambdaFunction::compile_self(const TensorEngine &engine, Stash &stash) cons
 {
     assert(&engine == &prod_engine);
     auto mode = eval_mode();
+    using MyTypify = eval::TypifyCellType;
     if (mode == EvalMode::COMPILED) {
         CompiledParams &params = stash.create<CompiledParams>(_lambda);
-        auto op = select_1<MyCompiledLambdaOp>(result_type().cell_type());
+        auto op = typify_invoke<1,MyTypify,MyCompiledLambdaOp>(result_type().cell_type());
         static_assert(sizeof(&params) == sizeof(uint64_t));
         return Instruction(op, (uint64_t)(&params));
     } else {
         assert(mode == EvalMode::INTERPRETED);
         InterpretedParams &params = stash.create<InterpretedParams>(_lambda);
-        auto op = select_1<MyInterpretedLambdaOp>(result_type().cell_type());
+        auto op = typify_invoke<1,MyTypify,MyInterpretedLambdaOp>(result_type().cell_type());
         static_assert(sizeof(&params) == sizeof(uint64_t));
         return Instruction(op, (uint64_t)(&params));
     }
diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp
index a5f532e643a..70bdc8ae7d6 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp
@@ -45,7 +45,7 @@ void my_lambda_peek_op(InterpretedFunction::State &state, uint64_t param) {
 
 struct MyLambdaPeekOp {
     template <typename DST_CT, typename SRC_CT>
-    static auto get_fun() { return my_lambda_peek_op<DST_CT, SRC_CT>; }
+    static auto invoke() { return my_lambda_peek_op<DST_CT, SRC_CT>; }
 };
 
 } // namespace vespalib::tensor::<unnamed>
@@ -64,7 +64,8 @@ InterpretedFunction::Instruction
 DenseLambdaPeekFunction::compile_self(const TensorEngine &, Stash &stash) const
 {
     const Self &self = stash.create<Self>(result_type(), *_idx_fun);
-    auto op = select_2<MyLambdaPeekOp>(result_type().cell_type(), child().result_type().cell_type());
+    using MyTypify = eval::TypifyCellType;
+    auto op = typify_invoke<2,MyTypify,MyLambdaPeekOp>(result_type().cell_type(), child().result_type().cell_type());
     static_assert(sizeof(uint64_t) == sizeof(&self));
     assert(child().result_type().is_dense());
     return InterpretedFunction::Instruction(op, (uint64_t)&self);
diff --git a/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp
index 695e0fddd08..9c18cf285d4 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp
@@ -80,47 +80,6 @@ void my_cblas_float_matmul_op(eval::InterpretedFunction::State &state, uint64_t
     state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells)));
 }
 
-template <bool lhs_common_inner, bool rhs_common_inner>
-struct MyMatMulOp {
-    template <typename LCT, typename RCT>
-    static auto get_fun() { return my_matmul_op<LCT,RCT,lhs_common_inner,rhs_common_inner>; }
-};
-
-template <bool lhs_common_inner, bool rhs_common_inner>
-eval::InterpretedFunction::op_function my_select3(CellType lct, CellType rct)
-{
-    if (lct == rct) {
-        if (lct == ValueType::CellType::DOUBLE) {
-            return my_cblas_double_matmul_op<lhs_common_inner,rhs_common_inner>;
-        }
-        if (lct == ValueType::CellType::FLOAT) {
-            return my_cblas_float_matmul_op<lhs_common_inner,rhs_common_inner>;
-        }
-    }
-    return select_2<MyMatMulOp<lhs_common_inner,rhs_common_inner>>(lct, rct);
-}
-
-template <bool lhs_common_inner>
-eval::InterpretedFunction::op_function my_select2(CellType lct, CellType rct,
-                                                  bool rhs_common_inner)
-{
-    if (rhs_common_inner) {
-        return my_select3<lhs_common_inner,true>(lct, rct);
-    } else {
-        return my_select3<lhs_common_inner,false>(lct, rct);
-    }
-}
-
-eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct,
-                                                 bool lhs_common_inner, bool rhs_common_inner)
-{
-    if (lhs_common_inner) {
-        return my_select2<true>(lct, rct, rhs_common_inner);
-    } else {
-        return my_select2<false>(lct, rct, rhs_common_inner);
-    }
-}
-
 bool is_matrix(const ValueType &type) {
     return (type.is_dense() && (type.dimensions().size() == 2));
 }
@@ -160,6 +119,18 @@ const TensorFunction &create_matmul(const TensorFunction &a, const TensorFunctio
     }
 }
 
+struct MyGetFun {
+    template<typename R1, typename R2, typename R3, typename R4> static auto invoke() {
+        if (std::is_same_v<R1,double> && std::is_same_v<R2,double>) {
+            return my_cblas_double_matmul_op<R3::value, R4::value>;
+        } else if (std::is_same_v<R1,float> && std::is_same_v<R2,float>) {
+            return my_cblas_float_matmul_op<R3::value, R4::value>;
+        } else {
+            return my_matmul_op<R1, R2, R3::value, R4::value>;
+        }
+    }
+};
+
 } // namespace vespalib::tensor::<unnamed>
 
 DenseMatMulFunction::Self::Self(const eval::ValueType &result_type_in,
@@ -197,9 +168,11 @@ DenseMatMulFunction::~DenseMatMulFunction() = default;
 eval::InterpretedFunction::Instruction
 DenseMatMulFunction::compile_self(const TensorEngine &, Stash &stash) const
 {
+    using MyTypify = TypifyValue<eval::TypifyCellType,TypifyBool>;
     Self &self = stash.create<Self>(result_type(), _lhs_size, _common_size, _rhs_size);
-    auto op = my_select(lhs().result_type().cell_type(), rhs().result_type().cell_type(),
-                        _lhs_common_inner, _rhs_common_inner);
+    auto op = typify_invoke<4,MyTypify,MyGetFun>(
+            lhs().result_type().cell_type(), rhs().result_type().cell_type(),
+            _lhs_common_inner, _rhs_common_inner);
     return eval::InterpretedFunction::Instruction(op, (uint64_t)(&self));
 }
 
diff --git a/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp
index a28c8150d59..925627c5684 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp
@@ -29,13 +29,6 @@ using State = eval::InterpretedFunction::State;
 
 namespace {
 
-template <typename CT, typename Fun>
-void apply_fun_1_to_n(CT *dst, const CT *pri, CT sec, size_t n, const Fun &fun) {
-    for (size_t i = 0; i < n; ++i) {
-        dst[i] = fun(pri[i], sec);
-    }
-}
-
 template <typename CT, bool inplace>
 ArrayRef<CT> make_dst_cells(ConstArrayRef<CT> src_cells, Stash &stash) {
     if (inplace) {
@@ -53,7 +46,7 @@ void my_number_join_op(State &state, uint64_t param) {
     CT number = state.peek(swap ? 1 : 0).as_double();
     auto src_cells = DenseTensorView::typify_cells<CT>(tensor);
     auto dst_cells = make_dst_cells<CT, inplace>(src_cells, state.stash);
-    apply_fun_1_to_n(dst_cells.begin(), src_cells.begin(), number, dst_cells.size(), my_op);
+    apply_op2_vec_num(dst_cells.begin(), src_cells.begin(), number, dst_cells.size(), my_op);
     if (inplace) {
         state.pop_pop_push(tensor);
     } else {
diff --git a/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp
index c358c9d618d..5f8fbcac9bb 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp
@@ -53,20 +53,6 @@ struct JoinParams {
         : result_type(result_type_in), factor(factor_in), function(function_in) {}
 };
 
-template <typename OCT, typename PCT, typename SCT, typename Fun>
-void apply_fun_1_to_n(OCT *dst, const PCT *pri, SCT sec, size_t n, const Fun &fun) {
-    for (size_t i = 0; i < n; ++i) {
-        dst[i] = fun(pri[i], sec);
-    }
-}
-
-template <typename OCT, typename PCT, typename SCT, typename Fun>
-void apply_fun_n_to_n(OCT *dst, const PCT *pri, const SCT *sec, size_t n, const Fun &fun) {
-    for (size_t i = 0; i < n; ++i) {
-        dst[i] = fun(pri[i], sec[i]);
-    }
-}
-
 template <typename OCT, bool pri_mut, typename PCT>
 ArrayRef<OCT> make_dst_cells(ConstArrayRef<PCT> pri_cells, Stash &stash) {
     if constexpr (pri_mut && std::is_same<PCT,OCT>::value) {
@@ -88,12 +74,12 @@ void my_simple_join_op(State &state, uint64_t param) {
     auto sec_cells = DenseTensorView::typify_cells<SCT>(state.peek(swap ? 1 : 0));
     auto dst_cells = make_dst_cells<OCT, pri_mut>(pri_cells, state.stash);
     if (overlap == Overlap::FULL) {
-        apply_fun_n_to_n(dst_cells.begin(), pri_cells.begin(), sec_cells.begin(), dst_cells.size(), my_op);
+        apply_op2_vec_vec(dst_cells.begin(), pri_cells.begin(), sec_cells.begin(), dst_cells.size(), my_op);
     } else if (overlap == Overlap::OUTER) {
         size_t offset = 0;
         size_t factor = params.factor;
         for (SCT cell: sec_cells) {
-            apply_fun_1_to_n(dst_cells.begin() + offset, pri_cells.begin() + offset, cell, factor, my_op);
+            apply_op2_vec_num(dst_cells.begin() + offset, pri_cells.begin() + offset, cell, factor, my_op);
             offset += factor;
         }
     } else {
@@ -101,7 +87,7 @@ void my_simple_join_op(State &state, uint64_t param) {
         size_t offset = 0;
         size_t factor = params.factor;
         for (size_t i = 0; i < factor; ++i) {
-            apply_fun_n_to_n(dst_cells.begin() + offset, pri_cells.begin() + offset, sec_cells.begin(), sec_cells.size(), my_op);
+            apply_op2_vec_vec(dst_cells.begin() + offset, pri_cells.begin() + offset, sec_cells.begin(), sec_cells.size(), my_op);
             offset += sec_cells.size();
         }
     }
diff --git a/eval/src/vespa/eval/tensor/dense/dense_simple_map_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_simple_map_function.cpp
index 784d356da39..b5f46fca70c 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_simple_map_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_simple_map_function.cpp
@@ -27,13 +27,6 @@ using State = eval::InterpretedFunction::State;
 
 namespace {
 
-template <typename CT, typename Fun>
-void apply_fun_to_n(CT *dst, const CT *src, size_t n, const Fun &fun) {
-    for (size_t i = 0; i < n; ++i) {
-        dst[i] = fun(src[i]);
-    }
-}
-
 template <typename CT, bool inplace>
 ArrayRef<CT> make_dst_cells(ConstArrayRef<CT> src_cells, Stash &stash) {
     if (inplace) {
@@ -49,7 +42,7 @@ void my_simple_map_op(State &state, uint64_t param) {
     auto const &child = state.peek(0);
     auto src_cells = DenseTensorView::typify_cells<CT>(child);
     auto dst_cells = make_dst_cells<CT, inplace>(src_cells, state.stash);
-    apply_fun_to_n(dst_cells.begin(), src_cells.begin(), dst_cells.size(), my_fun);
+    apply_op1_vec(dst_cells.begin(), src_cells.begin(), dst_cells.size(), my_fun);
     if (!inplace) {
         state.pop_push(state.stash.create<DenseTensorView>(child.type(), TypedCells(dst_cells)));
     }
diff --git a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp
index 663993b6c26..571bcb79c9f 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp
@@ -2,6 +2,7 @@
 
 #include "dense_single_reduce_function.h"
 #include "dense_tensor_view.h"
+#include <vespa/vespalib/util/typify.h>
 #include <vespa/eval/eval/value.h>
 
 namespace vespalib::tensor {
@@ -12,6 +13,8 @@ using eval::TensorEngine;
 using eval::TensorFunction;
 using eval::Value;
 using eval::ValueType;
+using eval::TypifyCellType;
+using eval::TypifyAggr;
 using eval::as;
 
 using namespace eval::tensor_function;
@@ -66,28 +69,13 @@ void my_single_reduce_op(InterpretedFunction::State &state, uint64_t param) {
     state.pop_push(state.stash.create<DenseTensorView>(params.result_type, TypedCells(dst_cells)));
 }
 
-template <typename CT>
-InterpretedFunction::op_function my_select_2(Aggr aggr) {
-    switch (aggr) {
-    case Aggr::AVG:   return my_single_reduce_op<CT, Avg<CT>>;
-    case Aggr::COUNT: return my_single_reduce_op<CT, Count<CT>>;
-    case Aggr::PROD:  return my_single_reduce_op<CT, Prod<CT>>;
-    case Aggr::SUM:   return my_single_reduce_op<CT, Sum<CT>>;
-    case Aggr::MAX:   return my_single_reduce_op<CT, Max<CT>>;
-    case Aggr::MIN:   return my_single_reduce_op<CT, Min<CT>>;
+struct MyGetFun {
+    template <typename R1, typename R2> static auto invoke() {
+        return my_single_reduce_op<R1, typename R2::template templ<R1>>;
     }
-    abort();
-}
+};
 
-InterpretedFunction::op_function my_select(CellType cell_type, Aggr aggr) {
-    if (cell_type == ValueType::CellType::DOUBLE) {
-        return my_select_2<double>(aggr);
-    }
-    if (cell_type == ValueType::CellType::FLOAT) {
-        return my_select_2<float>(aggr);
-    }
-    abort();
-}
+using MyTypify = TypifyValue<TypifyCellType,TypifyAggr>;
 
 bool check_input_type(const ValueType &type) {
     return (type.is_dense() && ((type.cell_type() == CellType::FLOAT) || (type.cell_type() == CellType::DOUBLE)));
@@ -109,7 +97,7 @@ DenseSingleReduceFunction::~DenseSingleReduceFunction() = default;
 InterpretedFunction::Instruction
 DenseSingleReduceFunction::compile_self(const TensorEngine &, Stash &stash) const
 {
-    auto op = my_select(result_type().cell_type(), _aggr);
+    auto op = typify_invoke<2,MyTypify,MyGetFun>(result_type().cell_type(), _aggr);
     auto &params = stash.create<Params>(result_type(), child().result_type(), _dim_idx);
     static_assert(sizeof(uint64_t) == sizeof(&params));
     return InterpretedFunction::Instruction(op, (uint64_t)&params);
diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp
index 3533ab20175..7e887d4df34 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp
@@ -34,7 +34,7 @@ void my_tensor_create_op(eval::InterpretedFunction::State &state, uint64_t param
 
 struct MyTensorCreateOp {
     template <typename CT>
-    static auto get_fun() { return my_tensor_create_op<CT>; }
+    static auto invoke() { return my_tensor_create_op<CT>; }
 };
 
 size_t get_index(const TensorSpec::Address &addr, const ValueType &type) {
@@ -72,7 +72,9 @@ eval::InterpretedFunction::Instruction
 DenseTensorCreateFunction::compile_self(const TensorEngine &, Stash &) const
 {
     static_assert(sizeof(uint64_t) == sizeof(&_self));
-    auto op = select_1<MyTensorCreateOp>(result_type().cell_type());
+
+    using MyTypify = eval::TypifyCellType;
+    auto op = typify_invoke<1,MyTypify,MyTensorCreateOp>(result_type().cell_type());
     return eval::InterpretedFunction::Instruction(op, (uint64_t)&_self);
 }
 
diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_peek_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_peek_function.cpp
index 5cb1cbfd88f..16c0b01b169 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_tensor_peek_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_peek_function.cpp
@@ -44,7 +44,7 @@ void my_tensor_peek_op(eval::InterpretedFunction::State &state, uint64_t param)
 
 struct MyTensorPeekOp {
     template <typename CT>
-    static auto get_fun() { return my_tensor_peek_op<CT>; }
+    static auto invoke() { return my_tensor_peek_op<CT>; }
 };
 
 } // namespace vespalib::tensor::<unnamed>
@@ -71,7 +71,8 @@ eval::InterpretedFunction::Instruction
 DenseTensorPeekFunction::compile_self(const TensorEngine &, Stash &) const
 {
     static_assert(sizeof(uint64_t) == sizeof(&_spec));
-    auto op = select_1<MyTensorPeekOp>(_children[0].get().result_type().cell_type());
+    using MyTypify = eval::TypifyCellType;
+    auto op = typify_invoke<1,MyTypify,MyTensorPeekOp>(_children[0].get().result_type().cell_type());
     return eval::InterpretedFunction::Instruction(op, (uint64_t)&_spec);
 }
 
diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
index a0d63a1ce1e..968308d69c9 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
@@ -76,33 +76,6 @@ void my_cblas_float_xw_product_op(eval::InterpretedFunction::State &state, uint6
     state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells)));
 }
 
-template <bool common_inner>
-struct MyXWProductOp {
-    template <typename LCT, typename RCT>
-    static auto get_fun() { return my_xw_product_op<LCT,RCT,common_inner>; }
-};
-
-template <bool common_inner>
-eval::InterpretedFunction::op_function my_select2(CellType lct, CellType rct) {
-    if (lct == rct) {
-        if (lct == ValueType::CellType::DOUBLE) {
-            return my_cblas_double_xw_product_op<common_inner>;
-        }
-        if (lct == ValueType::CellType::FLOAT) {
-            return my_cblas_float_xw_product_op<common_inner>;
-        }
-    }
-    return select_2<MyXWProductOp<common_inner>>(lct, rct);
-}
-
-eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct, bool common_inner) {
-    if (common_inner) {
-        return my_select2<true>(lct, rct);
-    } else {
-        return my_select2<false>(lct, rct);
-    }
-}
-
 bool isDenseTensor(const ValueType &type, size_t d) {
     return (type.is_dense() && (type.dimensions().size() == d));
 }
@@ -132,6 +105,18 @@ const TensorFunction &createDenseXWProduct(const ValueType &res, const TensorFun
                                                 common_inner);
 }
 
+struct MyXWProductOp {
+    template<typename R1, typename R2, typename R3> static auto invoke() {
+        if (std::is_same_v<R1,double> && std::is_same_v<R2,double>) {
+            return my_cblas_double_xw_product_op<R3::value>;
+        } else if (std::is_same_v<R1,float> && std::is_same_v<R2,float>) {
+            return my_cblas_float_xw_product_op<R3::value>;
+        } else {
+            return my_xw_product_op<R1, R2, R3::value>;
+        }
+    }
+};
+
 } // namespace vespalib::tensor::<unnamed>
 
 DenseXWProductFunction::Self::Self(const eval::ValueType &result_type_in,
@@ -160,8 +145,10 @@ eval::InterpretedFunction::Instruction
 DenseXWProductFunction::compile_self(const TensorEngine &, Stash &stash) const
 {
     Self &self = stash.create<Self>(result_type(), _vector_size, _result_size);
-    auto op = my_select(lhs().result_type().cell_type(),
-                        rhs().result_type().cell_type(), _common_inner);
+    using MyTypify = TypifyValue<eval::TypifyCellType,vespalib::TypifyBool>;
+    auto op = typify_invoke<3,MyTypify,MyXWProductOp>(lhs().result_type().cell_type(),
+                                                      rhs().result_type().cell_type(),
+                                                      _common_inner);
     return eval::InterpretedFunction::Instruction(op, (uint64_t)(&self));
 }
 
diff --git a/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp
index 7a4b5917f00..57f727f7968 100644
--- a/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp
@@ -19,7 +19,7 @@ namespace {
 struct CallVectorFromDoubles {
     template <typename CT>
     static TypedCells
-    call(eval::InterpretedFunction::State &state, size_t numCells) {
+    invoke(eval::InterpretedFunction::State &state, size_t numCells) {
         ArrayRef<CT> outputCells = state.stash.create_array<CT>(numCells);
         for (size_t i = numCells; i-- > 0; ) {
             outputCells[i] = (CT) state.peek(0).as_double();
@@ -33,7 +33,8 @@ void my_vector_from_doubles_op(eval::InterpretedFunction::State &state, uint64_t
     const auto *self = (const VectorFromDoublesFunction::Self *)(param);
     CellType ct = self->resultType.cell_type();
     size_t numCells = self->resultSize;
-    TypedCells cells = dispatch_0<CallVectorFromDoubles>(ct, state, numCells);
+    using MyTypify = eval::TypifyCellType;
+    TypedCells cells = typify_invoke<1,MyTypify,CallVectorFromDoubles>(ct, state, numCells);
     const Value &result = state.stash.create<DenseTensorView>(self->resultType, cells);
     state.stack.emplace_back(result);
 }