diff options
Diffstat (limited to 'eval/src')
22 files changed, 504 insertions, 278 deletions
diff --git a/eval/src/tests/eval/compile_cache/compile_cache_test.cpp b/eval/src/tests/eval/compile_cache/compile_cache_test.cpp index 1de56e605c9..a0dad889d9a 100644 --- a/eval/src/tests/eval/compile_cache/compile_cache_test.cpp +++ b/eval/src/tests/eval/compile_cache/compile_cache_test.cpp @@ -5,12 +5,16 @@ #include <vespa/eval/eval/test/eval_spec.h> #include <vespa/vespalib/util/time.h> #include <vespa/vespalib/util/threadstackexecutor.h> +#include <vespa/vespalib/util/blockingthreadstackexecutor.h> +#include <vespa/vespalib/util/stringfmt.h> #include <thread> #include <set> using namespace vespalib; using namespace vespalib::eval; +using vespalib::make_string_short::fmt; + struct MyExecutor : public Executor { std::vector<Executor::Task::UP> tasks; Executor::Task::UP execute(Executor::Task::UP task) override { @@ -157,7 +161,7 @@ TEST("require that cache usage works") { } TEST("require that async cache usage works") { - ThreadStackExecutor executor(8, 256*1024); + auto executor = std::make_shared<ThreadStackExecutor>(8, 256*1024); auto binding = CompileCache::bind(executor); CompileCache::Token::UP token_a = CompileCache::compile(*Function::parse("x+y"), PassParams::SEPARATE); EXPECT_EQUAL(5.0, token_a->get().get_function<2>()(2.0, 3.0)); @@ -166,7 +170,6 @@ TEST("require that async cache usage works") { CompileCache::Token::UP token_c = CompileCache::compile(*Function::parse("x+y"), PassParams::SEPARATE); EXPECT_EQUAL(5.0, token_c->get().get_function<2>()(2.0, 3.0)); EXPECT_EQUAL(CompileCache::num_cached(), 2u); - executor.sync(); // wait for compile threads to drop all compile cache tokens token_a.reset(); TEST_DO(verify_cache(2, 2)); token_b.reset(); @@ -176,24 +179,24 @@ TEST("require that async cache usage works") { } TEST("require that compile tasks are run in the most recently bound executor") { - MyExecutor exe1; - MyExecutor exe2; + auto exe1 = std::make_shared<MyExecutor>(); + auto exe2 = std::make_shared<MyExecutor>(); auto token0 = CompileCache::compile(*Function::parse("a+b"), PassParams::SEPARATE); EXPECT_EQUAL(CompileCache::num_bound(), 0u); - EXPECT_EQUAL(exe1.tasks.size(), 0u); - EXPECT_EQUAL(exe2.tasks.size(), 0u); + EXPECT_EQUAL(exe1->tasks.size(), 0u); + EXPECT_EQUAL(exe2->tasks.size(), 0u); { auto bind1 = CompileCache::bind(exe1); auto token1 = CompileCache::compile(*Function::parse("a-b"), PassParams::SEPARATE); EXPECT_EQUAL(CompileCache::num_bound(), 1u); - EXPECT_EQUAL(exe1.tasks.size(), 1u); - EXPECT_EQUAL(exe2.tasks.size(), 0u); + EXPECT_EQUAL(exe1->tasks.size(), 1u); + EXPECT_EQUAL(exe2->tasks.size(), 0u); { auto bind2 = CompileCache::bind(exe2); auto token2 = CompileCache::compile(*Function::parse("a*b"), PassParams::SEPARATE); EXPECT_EQUAL(CompileCache::num_bound(), 2u); - EXPECT_EQUAL(exe1.tasks.size(), 1u); - EXPECT_EQUAL(exe2.tasks.size(), 1u); + EXPECT_EQUAL(exe1->tasks.size(), 1u); + EXPECT_EQUAL(exe2->tasks.size(), 1u); } EXPECT_EQUAL(CompileCache::num_bound(), 1u); } @@ -201,9 +204,9 @@ TEST("require that compile tasks are run in the most recently bound executor") { } TEST("require that executors may be unbound in any order") { - MyExecutor exe1; - MyExecutor exe2; - MyExecutor exe3; + auto exe1 = std::make_shared<MyExecutor>(); + auto exe2 = std::make_shared<MyExecutor>(); + auto exe3 = std::make_shared<MyExecutor>(); auto bind1 = CompileCache::bind(exe1); auto bind2 = CompileCache::bind(exe2); auto bind3 = CompileCache::bind(exe3); @@ -213,13 +216,13 @@ TEST("require that executors may be unbound in any order") { bind3.reset(); EXPECT_EQUAL(CompileCache::num_bound(), 1u); auto token = CompileCache::compile(*Function::parse("a+b"), PassParams::SEPARATE); - EXPECT_EQUAL(exe1.tasks.size(), 1u); - EXPECT_EQUAL(exe2.tasks.size(), 0u); - EXPECT_EQUAL(exe3.tasks.size(), 0u); + EXPECT_EQUAL(exe1->tasks.size(), 1u); + EXPECT_EQUAL(exe2->tasks.size(), 0u); + EXPECT_EQUAL(exe3->tasks.size(), 0u); } TEST("require that the same executor can be bound multiple times") { - MyExecutor exe1; + auto exe1 = std::make_shared<MyExecutor>(); auto bind1 = CompileCache::bind(exe1); auto bind2 = CompileCache::bind(exe1); auto bind3 = CompileCache::bind(exe1); @@ -230,7 +233,7 @@ TEST("require that the same executor can be bound multiple times") { EXPECT_EQUAL(CompileCache::num_bound(), 1u); auto token = CompileCache::compile(*Function::parse("a+b"), PassParams::SEPARATE); EXPECT_EQUAL(CompileCache::num_bound(), 1u); - EXPECT_EQUAL(exe1.tasks.size(), 1u); + EXPECT_EQUAL(exe1->tasks.size(), 1u); } struct CompileCheck : test::EvalSpec::EvalTest { @@ -286,9 +289,9 @@ TEST_F("compile sequentially, then run all conformance tests", test::EvalSpec()) TEST_F("compile concurrently (8 threads), then run all conformance tests", test::EvalSpec()) { f1.add_all_cases(); - ThreadStackExecutor executor(8, 256*1024); + auto executor = std::make_shared<ThreadStackExecutor>(8, 256*1024); auto binding = CompileCache::bind(executor); - while (executor.num_idle_workers() < 8) { + while (executor->num_idle_workers() < 8) { std::this_thread::sleep_for(1ms); } for (size_t i = 0; i < 2; ++i) { @@ -305,6 +308,43 @@ TEST_F("compile concurrently (8 threads), then run all conformance tests", test: } } +struct MyCompileTask : public Executor::Task { + size_t seed; + size_t loop; + MyCompileTask(size_t seed_in, size_t loop_in) : seed(seed_in), loop(loop_in) {} + void run() override { + for (size_t i = 0; i < loop; ++i) { + // use custom constant to make a unique function that needs compilation + auto token = CompileCache::compile(*Function::parse(fmt("%zu", seed + i)), PassParams::SEPARATE); + } + } +}; + +TEST_MT_FF("require that deadlock is avoided with blocking executor", 8, std::shared_ptr<Executor>(nullptr), TimeBomb(300)) { + size_t loop = 16; + if (thread_id == 0) { + auto t0 = steady_clock::now(); + f1 = std::make_shared<BlockingThreadStackExecutor>(2, 256*1024, 3); + auto binding = CompileCache::bind(f1); + TEST_BARRIER(); // #1 + for (size_t i = 0; i < num_threads; ++i) { + f1->execute(std::make_unique<MyCompileTask>(i * loop, loop)); + } + TEST_BARRIER(); // #2 + auto t1 = steady_clock::now(); + fprintf(stderr, "deadlock test took %" PRIu64 " ms\n", count_ms(t1 - t0)); + + } else { + TEST_BARRIER(); // #1 + size_t seed = (10000 + (thread_id * loop)); + for (size_t i = 0; i < loop; ++i) { + // use custom constant to make a unique function that needs compilation + auto token = CompileCache::compile(*Function::parse(fmt("%zu", seed + i)), PassParams::SEPARATE); + } + TEST_BARRIER(); // #2 + } +} + //----------------------------------------------------------------------------- TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/inline_operation/inline_operation_test.cpp b/eval/src/tests/eval/inline_operation/inline_operation_test.cpp index 4520176e276..8895bd4bcbd 100644 --- a/eval/src/tests/eval/inline_operation/inline_operation_test.cpp +++ b/eval/src/tests/eval/inline_operation/inline_operation_test.cpp @@ -3,27 +3,29 @@ #include <vespa/eval/eval/operation.h> #include <vespa/eval/eval/inline_operation.h> #include <vespa/eval/eval/function.h> +#include <vespa/vespalib/util/typify.h> #include <vespa/vespalib/gtest/gtest.h> +using vespalib::typify_invoke; using namespace vespalib::eval; using namespace vespalib::eval::operation; -template <typename T> struct IsInlined { constexpr static bool value = true; }; -template <> struct IsInlined<CallOp1> { constexpr static bool value = false; }; -template <> struct IsInlined<CallOp2> { constexpr static bool value = false; }; +const int my_value = 42; +struct AsValue { template <typename T> static int invoke() { return my_value; } }; +struct AsRef { template <typename T> static const int &invoke() { return my_value; } }; -template <typename T> double test_op1(op1_t ref, double a, bool inlined) { - T op(ref); - EXPECT_EQ(IsInlined<T>::value, inlined); - EXPECT_EQ(op(a), ref(a)); - return op(a); +template <typename T> void test_op1(op1_t ref, double a, double expect) { + bool need_ref = std::is_same_v<T,CallOp1>; + T op = need_ref ? T(ref) : T(nullptr); + EXPECT_DOUBLE_EQ(ref(a), expect); + EXPECT_DOUBLE_EQ(op(a), expect); }; -template <typename T> double test_op2(op2_t ref, double a, double b, bool inlined) { - T op(ref); - EXPECT_EQ(IsInlined<T>::value, inlined); - EXPECT_EQ(op(a,b), ref(a,b)); - return op(a,b); +template <typename T> void test_op2(op2_t ref, double a, double b, double expect) { + bool need_ref = std::is_same_v<T,CallOp2>; + T op = need_ref ? T(ref) : T(nullptr); + EXPECT_DOUBLE_EQ(ref(a, b), expect); + EXPECT_DOUBLE_EQ(op(a, b), expect); }; op1_t as_op1(const vespalib::string &str) { @@ -41,33 +43,36 @@ op2_t as_op2(const vespalib::string &str) { } TEST(InlineOperationTest, op1_lambdas_are_recognized) { - EXPECT_EQ(as_op1("-a"), Neg::f); - EXPECT_EQ(as_op1("!a"), Not::f); - EXPECT_EQ(as_op1("cos(a)"), Cos::f); - EXPECT_EQ(as_op1("sin(a)"), Sin::f); - EXPECT_EQ(as_op1("tan(a)"), Tan::f); - EXPECT_EQ(as_op1("cosh(a)"), Cosh::f); - EXPECT_EQ(as_op1("sinh(a)"), Sinh::f); - EXPECT_EQ(as_op1("tanh(a)"), Tanh::f); - EXPECT_EQ(as_op1("acos(a)"), Acos::f); - EXPECT_EQ(as_op1("asin(a)"), Asin::f); - EXPECT_EQ(as_op1("atan(a)"), Atan::f); - EXPECT_EQ(as_op1("exp(a)"), Exp::f); - EXPECT_EQ(as_op1("log10(a)"), Log10::f); - EXPECT_EQ(as_op1("log(a)"), Log::f); - EXPECT_EQ(as_op1("sqrt(a)"), Sqrt::f); - EXPECT_EQ(as_op1("ceil(a)"), Ceil::f); - EXPECT_EQ(as_op1("fabs(a)"), Fabs::f); - EXPECT_EQ(as_op1("floor(a)"), Floor::f); - EXPECT_EQ(as_op1("isNan(a)"), IsNan::f); - EXPECT_EQ(as_op1("relu(a)"), Relu::f); - EXPECT_EQ(as_op1("sigmoid(a)"), Sigmoid::f); - EXPECT_EQ(as_op1("elu(a)"), Elu::f); + EXPECT_EQ(as_op1("-a"), &Neg::f); + EXPECT_EQ(as_op1("!a"), &Not::f); + EXPECT_EQ(as_op1("cos(a)"), &Cos::f); + EXPECT_EQ(as_op1("sin(a)"), &Sin::f); + EXPECT_EQ(as_op1("tan(a)"), &Tan::f); + EXPECT_EQ(as_op1("cosh(a)"), &Cosh::f); + EXPECT_EQ(as_op1("sinh(a)"), &Sinh::f); + EXPECT_EQ(as_op1("tanh(a)"), &Tanh::f); + EXPECT_EQ(as_op1("acos(a)"), &Acos::f); + EXPECT_EQ(as_op1("asin(a)"), &Asin::f); + EXPECT_EQ(as_op1("atan(a)"), &Atan::f); + EXPECT_EQ(as_op1("exp(a)"), &Exp::f); + EXPECT_EQ(as_op1("log10(a)"), &Log10::f); + EXPECT_EQ(as_op1("log(a)"), &Log::f); + EXPECT_EQ(as_op1("sqrt(a)"), &Sqrt::f); + EXPECT_EQ(as_op1("ceil(a)"), &Ceil::f); + EXPECT_EQ(as_op1("fabs(a)"), &Fabs::f); + EXPECT_EQ(as_op1("floor(a)"), &Floor::f); + EXPECT_EQ(as_op1("isNan(a)"), &IsNan::f); + EXPECT_EQ(as_op1("relu(a)"), &Relu::f); + EXPECT_EQ(as_op1("sigmoid(a)"), &Sigmoid::f); + EXPECT_EQ(as_op1("elu(a)"), &Elu::f); + //------------------------------------------- + EXPECT_EQ(as_op1("1/a"), &Inv::f); + EXPECT_EQ(as_op1("1.0/a"), &Inv::f); } TEST(InlineOperationTest, op1_lambdas_are_recognized_with_different_parameter_names) { - EXPECT_EQ(lookup_op1(*Function::parse({"x"}, "-x")).value(), Neg::f); - EXPECT_EQ(lookup_op1(*Function::parse({"x"}, "!x")).value(), Not::f); + EXPECT_EQ(lookup_op1(*Function::parse({"x"}, "-x")).value(), &Neg::f); + EXPECT_EQ(lookup_op1(*Function::parse({"x"}, "!x")).value(), &Not::f); } TEST(InlineOperationTest, non_op1_lambdas_are_not_recognized) { @@ -76,32 +81,32 @@ TEST(InlineOperationTest, non_op1_lambdas_are_not_recognized) { } TEST(InlineOperationTest, op2_lambdas_are_recognized) { - EXPECT_EQ(as_op2("a+b"), Add::f); - EXPECT_EQ(as_op2("a-b"), Sub::f); - EXPECT_EQ(as_op2("a*b"), Mul::f); - EXPECT_EQ(as_op2("a/b"), Div::f); - EXPECT_EQ(as_op2("a%b"), Mod::f); - EXPECT_EQ(as_op2("a^b"), Pow::f); - EXPECT_EQ(as_op2("a==b"), Equal::f); - EXPECT_EQ(as_op2("a!=b"), NotEqual::f); - EXPECT_EQ(as_op2("a~=b"), Approx::f); - EXPECT_EQ(as_op2("a<b"), Less::f); - EXPECT_EQ(as_op2("a<=b"), LessEqual::f); - EXPECT_EQ(as_op2("a>b"), Greater::f); - EXPECT_EQ(as_op2("a>=b"), GreaterEqual::f); - EXPECT_EQ(as_op2("a&&b"), And::f); - EXPECT_EQ(as_op2("a||b"), Or::f); - EXPECT_EQ(as_op2("atan2(a,b)"), Atan2::f); - EXPECT_EQ(as_op2("ldexp(a,b)"), Ldexp::f); - EXPECT_EQ(as_op2("pow(a,b)"), Pow::f); - EXPECT_EQ(as_op2("fmod(a,b)"), Mod::f); - EXPECT_EQ(as_op2("min(a,b)"), Min::f); - EXPECT_EQ(as_op2("max(a,b)"), Max::f); + EXPECT_EQ(as_op2("a+b"), &Add::f); + EXPECT_EQ(as_op2("a-b"), &Sub::f); + EXPECT_EQ(as_op2("a*b"), &Mul::f); + EXPECT_EQ(as_op2("a/b"), &Div::f); + EXPECT_EQ(as_op2("a%b"), &Mod::f); + EXPECT_EQ(as_op2("a^b"), &Pow::f); + EXPECT_EQ(as_op2("a==b"), &Equal::f); + EXPECT_EQ(as_op2("a!=b"), &NotEqual::f); + EXPECT_EQ(as_op2("a~=b"), &Approx::f); + EXPECT_EQ(as_op2("a<b"), &Less::f); + EXPECT_EQ(as_op2("a<=b"), &LessEqual::f); + EXPECT_EQ(as_op2("a>b"), &Greater::f); + EXPECT_EQ(as_op2("a>=b"), &GreaterEqual::f); + EXPECT_EQ(as_op2("a&&b"), &And::f); + EXPECT_EQ(as_op2("a||b"), &Or::f); + EXPECT_EQ(as_op2("atan2(a,b)"), &Atan2::f); + EXPECT_EQ(as_op2("ldexp(a,b)"), &Ldexp::f); + EXPECT_EQ(as_op2("pow(a,b)"), &Pow::f); + EXPECT_EQ(as_op2("fmod(a,b)"), &Mod::f); + EXPECT_EQ(as_op2("min(a,b)"), &Min::f); + EXPECT_EQ(as_op2("max(a,b)"), &Max::f); } TEST(InlineOperationTest, op2_lambdas_are_recognized_with_different_parameter_names) { - EXPECT_EQ(lookup_op2(*Function::parse({"x", "y"}, "x+y")).value(), Add::f); - EXPECT_EQ(lookup_op2(*Function::parse({"x", "y"}, "x-y")).value(), Sub::f); + EXPECT_EQ(lookup_op2(*Function::parse({"x", "y"}, "x+y")).value(), &Add::f); + EXPECT_EQ(lookup_op2(*Function::parse({"x", "y"}, "x-y")).value(), &Sub::f); } TEST(InlineOperationTest, non_op2_lambdas_are_not_recognized) { @@ -121,11 +126,37 @@ TEST(InlineOperationTest, generic_op2_wrapper_works) { EXPECT_EQ(op(3,7), 10); } +TEST(InlineOperationTest, op1_typifier_forwards_return_value_correctly) { + auto a = typify_invoke<1,TypifyOp1,AsValue>(Neg::f); + auto b = typify_invoke<1,TypifyOp1,AsRef>(Neg::f); + EXPECT_EQ(a, my_value); + EXPECT_EQ(b, my_value); + bool same_memory = (&(typify_invoke<1,TypifyOp1,AsRef>(Neg::f)) == &my_value); + EXPECT_EQ(same_memory, true); +} + +TEST(InlineOperationTest, op2_typifier_forwards_return_value_correctly) { + auto a = typify_invoke<1,TypifyOp2,AsValue>(Add::f); + auto b = typify_invoke<1,TypifyOp2,AsRef>(Add::f); + EXPECT_EQ(a, my_value); + EXPECT_EQ(b, my_value); + bool same_memory = (&(typify_invoke<1,TypifyOp2,AsRef>(Add::f)) == &my_value); + EXPECT_EQ(same_memory, true); +} + +TEST(InlineOperationTest, inline_op1_example_works) { + op1_t ignored = nullptr; + InlineOp1<Inv> op(ignored); + EXPECT_EQ(op(2.0), 0.5); + EXPECT_EQ(op(4.0f), 0.25f); + EXPECT_EQ(op(8.0), 0.125); +} + TEST(InlineOperationTest, inline_op2_example_works) { op2_t ignored = nullptr; InlineOp2<Add> op(ignored); - EXPECT_EQ(op(2,3), 5); - EXPECT_EQ(op(3,7), 10); + EXPECT_EQ(op(2.0, 3.0), 5.0); + EXPECT_EQ(op(3.0, 7.0), 10.0); } TEST(InlineOperationTest, parameter_swap_wrapper_works) { @@ -137,20 +168,151 @@ TEST(InlineOperationTest, parameter_swap_wrapper_works) { EXPECT_EQ(swap_op(3,7), 4); } -TEST(InlineOperationTest, resolved_op1_works) { - auto a = TypifyOp1::resolve(Neg::f, [](auto t){ return test_op1<typename decltype(t)::type>(Neg::f, 2.0, false); }); - // putting the lambda inside the EXPECT does not work - EXPECT_EQ(a, -2.0); +//----------------------------------------------------------------------------- + +TEST(InlineOperationTest, op1_exp_is_inlined) { + TypifyOp1::resolve(Exp::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,InlineOp1<Exp>>; + op1_t ref = Exp::f; + EXPECT_TRUE(type_ok); + test_op1<T>(ref, 2.0, std::exp(2.0)); + test_op1<T>(ref, 3.0, std::exp(3.0)); + test_op1<T>(ref, 7.0, std::exp(7.0)); + }); +} + +TEST(InlineOperationTest, op1_inv_is_inlined) { + TypifyOp1::resolve(Inv::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,InlineOp1<Inv>>; + op1_t ref = Inv::f; + EXPECT_TRUE(type_ok); + test_op1<T>(ref, 2.0, 1.0/2.0); + test_op1<T>(ref, 4.0, 1.0/4.0); + test_op1<T>(ref, 8.0, 1.0/8.0); + }); +} + +TEST(InlineOperationTest, op1_sqrt_is_inlined) { + TypifyOp1::resolve(Sqrt::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,InlineOp1<Sqrt>>; + op1_t ref = Sqrt::f; + EXPECT_TRUE(type_ok); + test_op1<T>(ref, 2.0, sqrt(2.0)); + test_op1<T>(ref, 4.0, sqrt(4.0)); + test_op1<T>(ref, 64.0, sqrt(64.0)); + }); +} + +TEST(InlineOperationTest, op1_tanh_is_inlined) { + TypifyOp1::resolve(Tanh::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,InlineOp1<Tanh>>; + op1_t ref = Tanh::f; + EXPECT_TRUE(type_ok); + test_op1<T>(ref, 0.1, std::tanh(0.1)); + test_op1<T>(ref, 0.3, std::tanh(0.3)); + test_op1<T>(ref, 0.7, std::tanh(0.7)); + }); +} + +TEST(InlineOperationTest, op1_neg_is_not_inlined) { + TypifyOp1::resolve(Neg::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,CallOp1>; + op1_t ref = Neg::f; + EXPECT_TRUE(type_ok); + test_op1<T>(ref, 3.0, -3.0); + test_op1<T>(ref, 5.0, -5.0); + test_op1<T>(ref, -2.0, 2.0); + }); +} + +//----------------------------------------------------------------------------- + +TEST(InlineOperationTest, op2_add_is_inlined) { + TypifyOp2::resolve(Add::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,InlineOp2<Add>>; + op2_t ref = Add::f; + EXPECT_TRUE(type_ok); + test_op2<T>(ref, 2.0, 2.0, 4.0); + test_op2<T>(ref, 3.0, 8.0, 11.0); + test_op2<T>(ref, 7.0, 1.0, 8.0); + }); +} + +TEST(InlineOperationTest, op2_div_is_inlined) { + TypifyOp2::resolve(Div::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,InlineOp2<Div>>; + op2_t ref = Div::f; + EXPECT_TRUE(type_ok); + test_op2<T>(ref, 2.0, 2.0, 1.0); + test_op2<T>(ref, 3.0, 8.0, 3.0 / 8.0); + test_op2<T>(ref, 7.0, 5.0, 7.0 / 5.0); + }); +} + +TEST(InlineOperationTest, op2_mul_is_inlined) { + TypifyOp2::resolve(Mul::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,InlineOp2<Mul>>; + op2_t ref = Mul::f; + EXPECT_TRUE(type_ok); + test_op2<T>(ref, 2.0, 2.0, 4.0); + test_op2<T>(ref, 3.0, 8.0, 24.0); + test_op2<T>(ref, 7.0, 5.0, 35.0); + }); +} + +TEST(InlineOperationTest, op2_pow_is_inlined) { + TypifyOp2::resolve(Pow::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,InlineOp2<Pow>>; + op2_t ref = Pow::f; + EXPECT_TRUE(type_ok); + test_op2<T>(ref, 2.0, 2.0, std::pow(2.0, 2.0)); + test_op2<T>(ref, 3.0, 8.0, std::pow(3.0, 8.0)); + test_op2<T>(ref, 7.0, 5.0, std::pow(7.0, 5.0)); + }); +} + +TEST(InlineOperationTest, op2_sub_is_inlined) { + TypifyOp2::resolve(Sub::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,InlineOp2<Sub>>; + op2_t ref = Sub::f; + EXPECT_TRUE(type_ok); + test_op2<T>(ref, 3.0, 2.0, 1.0); + test_op2<T>(ref, 3.0, 8.0, -5.0); + test_op2<T>(ref, 7.0, 5.0, 2.0); + }); } -TEST(InlineOperationTest, resolved_op2_works) { - auto a = TypifyOp2::resolve(Add::f, [](auto t){ return test_op2<typename decltype(t)::type>(Add::f, 2.0, 5.0, true); }); - auto b = TypifyOp2::resolve(Mul::f, [](auto t){ return test_op2<typename decltype(t)::type>(Mul::f, 5.0, 3.0, true); }); - auto c = TypifyOp2::resolve(Sub::f, [](auto t){ return test_op2<typename decltype(t)::type>(Sub::f, 8.0, 5.0, false); }); - // putting the lambda inside the EXPECT does not work - EXPECT_EQ(a, 7.0); - EXPECT_EQ(b, 15.0); - EXPECT_EQ(c, 3.0); +TEST(InlineOperationTest, op2_mod_is_not_inlined) { + TypifyOp2::resolve(Mod::f, [](auto t) + { + using T = typename decltype(t)::type; + bool type_ok = std::is_same_v<T,CallOp2>; + op2_t ref = Mod::f; + EXPECT_TRUE(type_ok); + test_op2<T>(ref, 3.0, 2.0, std::fmod(3.0, 2.0)); + test_op2<T>(ref, 3.0, 8.0, std::fmod(3.0, 8.0)); + test_op2<T>(ref, 7.0, 5.0, std::fmod(7.0, 5.0)); + }); } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/aggr.cpp b/eval/src/vespa/eval/eval/aggr.cpp index d10bbc4abb8..8efb0ec9fe7 100644 --- a/eval/src/vespa/eval/eval/aggr.cpp +++ b/eval/src/vespa/eval/eval/aggr.cpp @@ -71,15 +71,11 @@ Aggregator::~Aggregator() Aggregator & Aggregator::create(Aggr aggr, Stash &stash) { - switch (aggr) { - case Aggr::AVG: return stash.create<Wrapper<aggr::Avg<double>>>(); - case Aggr::COUNT: return stash.create<Wrapper<aggr::Count<double>>>(); - case Aggr::PROD: return stash.create<Wrapper<aggr::Prod<double>>>(); - case Aggr::SUM: return stash.create<Wrapper<aggr::Sum<double>>>(); - case Aggr::MAX: return stash.create<Wrapper<aggr::Max<double>>>(); - case Aggr::MIN: return stash.create<Wrapper<aggr::Min<double>>>(); - } - LOG_ABORT("should not be reached"); + return TypifyAggr::resolve(aggr, [&stash](auto t)->Aggregator& + { + using T = typename decltype(t)::template templ<double>; + return stash.create<Wrapper<T>>(); + }); } std::vector<Aggr> diff --git a/eval/src/vespa/eval/eval/aggr.h b/eval/src/vespa/eval/eval/aggr.h index 8dea54d8abc..169f0b1d2af 100644 --- a/eval/src/vespa/eval/eval/aggr.h +++ b/eval/src/vespa/eval/eval/aggr.h @@ -2,6 +2,7 @@ #pragma once +#include <vespa/vespalib/util/typify.h> #include <vespa/vespalib/stllike/string.h> #include <vector> #include <map> @@ -118,5 +119,21 @@ public: }; } // namespave vespalib::eval::aggr + +struct TypifyAggr { + template <template<typename> typename TT> using Result = TypifyResultSimpleTemplate<TT>; + template <typename F> static decltype(auto) resolve(Aggr aggr, F &&f) { + switch (aggr) { + case Aggr::AVG: return f(Result<aggr::Avg>()); + case Aggr::COUNT: return f(Result<aggr::Count>()); + case Aggr::PROD: return f(Result<aggr::Prod>()); + case Aggr::SUM: return f(Result<aggr::Sum>()); + case Aggr::MAX: return f(Result<aggr::Max>()); + case Aggr::MIN: return f(Result<aggr::Min>()); + } + abort(); + } +}; + } // namespace vespalib::eval } // namespace vespalib diff --git a/eval/src/vespa/eval/eval/inline_operation.h b/eval/src/vespa/eval/eval/inline_operation.h index 493de9ea56c..fccf1874242 100644 --- a/eval/src/vespa/eval/eval/inline_operation.h +++ b/eval/src/vespa/eval/eval/inline_operation.h @@ -4,6 +4,7 @@ #include "operation.h" #include <vespa/vespalib/util/typify.h> +#include <cmath> namespace vespalib::eval::operation { @@ -15,11 +16,38 @@ struct CallOp1 { double operator()(double a) const { return my_op1(a); } }; +template <typename T> struct InlineOp1; +template <> struct InlineOp1<Exp> { + InlineOp1(op1_t) {} + template <typename A> constexpr auto operator()(A a) const { return exp(a); } +}; +template <> struct InlineOp1<Inv> { + InlineOp1(op1_t) {} + template <typename A> constexpr auto operator()(A a) const { return (A{1}/a); } +}; +template <> struct InlineOp1<Sqrt> { + InlineOp1(op1_t) {} + template <typename A> constexpr auto operator()(A a) const { return std::sqrt(a); } +}; +template <> struct InlineOp1<Tanh> { + InlineOp1(op1_t) {} + template <typename A> constexpr auto operator()(A a) const { return std::tanh(a); } +}; + struct TypifyOp1 { template <typename T> using Result = TypifyResultType<T>; template <typename F> static decltype(auto) resolve(op1_t value, F &&f) { - (void) value; - return f(Result<CallOp1>()); + if (value == Exp::f) { + return f(Result<InlineOp1<Exp>>()); + } else if (value == Inv::f) { + return f(Result<InlineOp1<Inv>>()); + } else if (value == Sqrt::f) { + return f(Result<InlineOp1<Sqrt>>()); + } else if (value == Tanh::f) { + return f(Result<InlineOp1<Tanh>>()); + } else { + return f(Result<CallOp1>()); + } } }; @@ -44,18 +72,36 @@ template <> struct InlineOp2<Add> { InlineOp2(op2_t) {} template <typename A, typename B> constexpr auto operator()(A a, B b) const { return (a+b); } }; +template <> struct InlineOp2<Div> { + InlineOp2(op2_t) {} + template <typename A, typename B> constexpr auto operator()(A a, B b) const { return (a/b); } +}; template <> struct InlineOp2<Mul> { InlineOp2(op2_t) {} template <typename A, typename B> constexpr auto operator()(A a, B b) const { return (a*b); } }; +template <> struct InlineOp2<Pow> { + InlineOp2(op2_t) {} + template <typename A, typename B> constexpr auto operator()(A a, B b) const { return std::pow(a,b); } +}; +template <> struct InlineOp2<Sub> { + InlineOp2(op2_t) {} + template <typename A, typename B> constexpr auto operator()(A a, B b) const { return (a-b); } +}; struct TypifyOp2 { template <typename T> using Result = TypifyResultType<T>; template <typename F> static decltype(auto) resolve(op2_t value, F &&f) { if (value == Add::f) { return f(Result<InlineOp2<Add>>()); + } else if (value == Div::f) { + return f(Result<InlineOp2<Div>>()); } else if (value == Mul::f) { return f(Result<InlineOp2<Mul>>()); + } else if (value == Pow::f) { + return f(Result<InlineOp2<Pow>>()); + } else if (value == Sub::f) { + return f(Result<InlineOp2<Sub>>()); } else { return f(Result<CallOp2>()); } @@ -64,4 +110,27 @@ struct TypifyOp2 { //----------------------------------------------------------------------------- +template <typename A, typename OP1> +void apply_op1_vec(A *dst, const A *src, size_t n, OP1 &&f) { + for (size_t i = 0; i < n; ++i) { + dst[i] = f(src[i]); + } +} + +template <typename D, typename A, typename B, typename OP2> +void apply_op2_vec_num(D *dst, const A *a, B b, size_t n, OP2 &&f) { + for (size_t i = 0; i < n; ++i) { + dst[i] = f(a[i], b); + } +} + +template <typename D, typename A, typename B, typename OP2> +void apply_op2_vec_vec(D *dst, const A *a, const B *b, size_t n, OP2 &&f) { + for (size_t i = 0; i < n; ++i) { + dst[i] = f(a[i], b[i]); + } +} + +//----------------------------------------------------------------------------- + } diff --git a/eval/src/vespa/eval/eval/llvm/compile_cache.cpp b/eval/src/vespa/eval/eval/llvm/compile_cache.cpp index 4aa18d3bb65..e2674a6e4d6 100644 --- a/eval/src/vespa/eval/eval/llvm/compile_cache.cpp +++ b/eval/src/vespa/eval/eval/llvm/compile_cache.cpp @@ -10,14 +10,14 @@ namespace eval { std::mutex CompileCache::_lock{}; CompileCache::Map CompileCache::_cached{}; uint64_t CompileCache::_executor_tag{0}; -std::vector<std::pair<uint64_t,Executor*>> CompileCache::_executor_stack{}; +std::vector<std::pair<uint64_t,std::shared_ptr<Executor>>> CompileCache::_executor_stack{}; const CompiledFunction & CompileCache::Value::wait_for_result() { - std::unique_lock<std::mutex> guard(_lock); - cond.wait(guard, [this](){ return bool(compiled_function); }); - return *compiled_function; + std::unique_lock<std::mutex> guard(result->lock); + result->cond.wait(guard, [this](){ return bool(result->compiled_function); }); + return *(result->compiled_function); } void @@ -30,10 +30,10 @@ CompileCache::release(Map::iterator entry) } uint64_t -CompileCache::attach_executor(Executor &executor) +CompileCache::attach_executor(std::shared_ptr<Executor> executor) { std::lock_guard<std::mutex> guard(_lock); - _executor_stack.emplace_back(++_executor_tag, &executor); + _executor_stack.emplace_back(++_executor_tag, std::move(executor)); return _executor_tag; } @@ -52,6 +52,7 @@ CompileCache::compile(const Function &function, PassParams pass_params) { Token::UP token; Executor::Task::UP task; + std::shared_ptr<Executor> executor; vespalib::string key = gen_key(function, pass_params); { std::lock_guard<std::mutex> guard(_lock); @@ -63,14 +64,15 @@ CompileCache::compile(const Function &function, PassParams pass_params) auto res = _cached.emplace(std::move(key), Value::ctor_tag()); assert(res.second); token = std::make_unique<Token>(res.first, Token::ctor_tag()); - ++(res.first->second.num_refs); - task = std::make_unique<CompileTask>(function, pass_params, - std::make_unique<Token>(res.first, Token::ctor_tag())); + task = std::make_unique<CompileTask>(function, pass_params, res.first->second.result); if (!_executor_stack.empty()) { - task = _executor_stack.back().second->execute(std::move(task)); + executor = _executor_stack.back().second; } } } + if (executor) { + task = executor->execute(std::move(task)); + } if (task) { std::thread([&task](){ task.get()->run(); }).join(); } @@ -84,7 +86,7 @@ CompileCache::wait_pending() { std::lock_guard<std::mutex> guard(_lock); for (auto entry = _cached.begin(); entry != _cached.end(); ++entry) { - if (entry->second.compiled_function.get() == nullptr) { + if (entry->second.result->cf.load(std::memory_order_acquire) == nullptr) { ++(entry->second.num_refs); pending.push_back(std::make_unique<Token>(entry, Token::ctor_tag())); } @@ -129,7 +131,7 @@ CompileCache::count_pending() std::lock_guard<std::mutex> guard(_lock); size_t pending = 0; for (const auto &entry: _cached) { - if (entry.second.compiled_function.get() == nullptr) { + if (entry.second.result->cf.load(std::memory_order_acquire) == nullptr) { ++pending; } } @@ -139,12 +141,11 @@ CompileCache::count_pending() void CompileCache::CompileTask::run() { - auto &entry = token->_entry->second; - auto result = std::make_unique<CompiledFunction>(*function, pass_params); - std::lock_guard<std::mutex> guard(_lock); - entry.compiled_function = std::move(result); - entry.cf.store(entry.compiled_function.get(), std::memory_order_release); - entry.cond.notify_all(); + auto compiled = std::make_unique<CompiledFunction>(*function, pass_params); + std::lock_guard<std::mutex> guard(result->lock); + result->compiled_function = std::move(compiled); + result->cf.store(result->compiled_function.get(), std::memory_order_release); + result->cond.notify_all(); } } // namespace vespalib::eval diff --git a/eval/src/vespa/eval/eval/llvm/compile_cache.h b/eval/src/vespa/eval/eval/llvm/compile_cache.h index 09b5b2060f5..61d0cc83d94 100644 --- a/eval/src/vespa/eval/eval/llvm/compile_cache.h +++ b/eval/src/vespa/eval/eval/llvm/compile_cache.h @@ -23,16 +23,22 @@ class CompileCache { private: using Key = vespalib::string; - struct Value { - size_t num_refs; + struct Result { + using SP = std::shared_ptr<Result>; std::atomic<const CompiledFunction *> cf; + std::mutex lock; std::condition_variable cond; CompiledFunction::UP compiled_function; + Result() : cf(nullptr), lock(), cond(), compiled_function(nullptr) {} + }; + struct Value { + size_t num_refs; + Result::SP result; struct ctor_tag {}; - Value(ctor_tag) : num_refs(1), cf(nullptr), cond(), compiled_function() {} + Value(ctor_tag) : num_refs(1), result(std::make_shared<Result>()) {} const CompiledFunction &wait_for_result(); const CompiledFunction &get() { - const CompiledFunction *ptr = cf.load(std::memory_order_acquire); + const CompiledFunction *ptr = result->cf.load(std::memory_order_acquire); if (ptr == nullptr) { return wait_for_result(); } @@ -43,10 +49,10 @@ private: static std::mutex _lock; static Map _cached; static uint64_t _executor_tag; - static std::vector<std::pair<uint64_t,Executor*>> _executor_stack; + static std::vector<std::pair<uint64_t,std::shared_ptr<Executor>>> _executor_stack; static void release(Map::iterator entry); - static uint64_t attach_executor(Executor &executor); + static uint64_t attach_executor(std::shared_ptr<Executor> executor); static void detach_executor(uint64_t tag); public: @@ -54,7 +60,6 @@ public: { private: friend class CompileCache; - friend class CompileTask; struct ctor_tag {}; CompileCache::Map::iterator _entry; public: @@ -79,14 +84,15 @@ public: ExecutorBinding &operator=(ExecutorBinding &&) = delete; ExecutorBinding &operator=(const ExecutorBinding &) = delete; using UP = std::unique_ptr<ExecutorBinding>; - explicit ExecutorBinding(Executor &executor, ctor_tag) : _tag(attach_executor(executor)) {} + explicit ExecutorBinding(std::shared_ptr<Executor> executor, ctor_tag) + : _tag(attach_executor(std::move(executor))) {} ~ExecutorBinding() { detach_executor(_tag); } }; static Token::UP compile(const Function &function, PassParams pass_params); static void wait_pending(); - static ExecutorBinding::UP bind(Executor &executor) { - return std::make_unique<ExecutorBinding>(executor, ExecutorBinding::ctor_tag()); + static ExecutorBinding::UP bind(std::shared_ptr<Executor> executor) { + return std::make_unique<ExecutorBinding>(std::move(executor), ExecutorBinding::ctor_tag()); } static size_t num_cached(); static size_t num_bound(); @@ -97,9 +103,9 @@ private: struct CompileTask : public Executor::Task { std::shared_ptr<Function const> function; PassParams pass_params; - Token::UP token; - CompileTask(const Function &function_in, PassParams pass_params_in, Token::UP token_in) - : function(function_in.shared_from_this()), pass_params(pass_params_in), token(std::move(token_in)) {} + Result::SP result; + CompileTask(const Function &function_in, PassParams pass_params_in, Result::SP result_in) + : function(function_in.shared_from_this()), pass_params(pass_params_in), result(std::move(result_in)) {} void run() override; }; }; diff --git a/eval/src/vespa/eval/eval/operation.cpp b/eval/src/vespa/eval/eval/operation.cpp index 581f65c0e31..bbd37ab68b2 100644 --- a/eval/src/vespa/eval/eval/operation.cpp +++ b/eval/src/vespa/eval/eval/operation.cpp @@ -49,6 +49,8 @@ double IsNan::f(double a) { return std::isnan(a) ? 1.0 : 0.0; } double Relu::f(double a) { return std::max(a, 0.0); } double Sigmoid::f(double a) { return 1.0 / (1.0 + std::exp(-1.0 * a)); } double Elu::f(double a) { return (a < 0) ? std::exp(a) - 1 : a; } +//----------------------------------------------------------------------------- +double Inv::f(double a) { return (1 / a); } namespace { @@ -102,6 +104,8 @@ std::map<vespalib::string,op1_t> make_op1_map() { add_op1(map, "relu(a)", Relu::f); add_op1(map, "sigmoid(a)", Sigmoid::f); add_op1(map, "elu(a)", Elu::f); + //------------------------------------- + add_op1(map, "1/a", Inv::f); return map; } diff --git a/eval/src/vespa/eval/eval/operation.h b/eval/src/vespa/eval/eval/operation.h index a80193e704d..b00bb5e26fc 100644 --- a/eval/src/vespa/eval/eval/operation.h +++ b/eval/src/vespa/eval/eval/operation.h @@ -48,6 +48,8 @@ struct IsNan { static double f(double a); }; struct Relu { static double f(double a); }; struct Sigmoid { static double f(double a); }; struct Elu { static double f(double a); }; +//----------------------------------------------------------------------------- +struct Inv { static double f(double a); }; using op1_t = double (*)(double); using op2_t = double (*)(double, double); diff --git a/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp index 9b93f5e7d72..84da53c8488 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp @@ -25,7 +25,7 @@ void my_cell_range_op(eval::InterpretedFunction::State &state, uint64_t param) { struct MyCellRangeOp { template <typename CT> - static auto get_fun() { return my_cell_range_op<CT>; } + static auto invoke() { return my_cell_range_op<CT>; } }; } // namespace vespalib::tensor::<unnamed> @@ -46,7 +46,9 @@ DenseCellRangeFunction::compile_self(const TensorEngine &, Stash &) const { static_assert(sizeof(uint64_t) == sizeof(this)); assert(result_type().cell_type() == child().result_type().cell_type()); - auto op = select_1<MyCellRangeOp>(result_type().cell_type()); + + using MyTypify = eval::TypifyCellType; + auto op = typify_invoke<1,MyTypify,MyCellRangeOp>(result_type().cell_type()); return eval::InterpretedFunction::Instruction(op, (uint64_t)this); } diff --git a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp index c9ff57e4a65..9e30451cd67 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp @@ -48,7 +48,7 @@ void my_cblas_float_dot_product_op(eval::InterpretedFunction::State &state, uint struct MyDotProductOp { template <typename LCT, typename RCT> - static auto get_fun() { return my_dot_product_op<LCT,RCT>; } + static auto invoke() { return my_dot_product_op<LCT,RCT>; } }; eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct) { @@ -60,7 +60,8 @@ eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct) { return my_cblas_float_dot_product_op; } } - return select_2<MyDotProductOp>(lct, rct); + using MyTypify = eval::TypifyCellType; + return typify_invoke<2,MyTypify,MyDotProductOp>(lct, rct); } } // namespace vespalib::tensor::<unnamed> diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp index b60d732d7a9..e373ca09e11 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_function.cpp @@ -95,7 +95,7 @@ void my_compiled_lambda_op(eval::InterpretedFunction::State &state, uint64_t par struct MyCompiledLambdaOp { template <typename CT> - static auto get_fun() { return my_compiled_lambda_op<CT>; } + static auto invoke() { return my_compiled_lambda_op<CT>; } }; //----------------------------------------------------------------------------- @@ -131,7 +131,7 @@ void my_interpreted_lambda_op(eval::InterpretedFunction::State &state, uint64_t struct MyInterpretedLambdaOp { template <typename CT> - static auto get_fun() { return my_interpreted_lambda_op<CT>; } + static auto invoke() { return my_interpreted_lambda_op<CT>; } }; //----------------------------------------------------------------------------- @@ -163,15 +163,16 @@ DenseLambdaFunction::compile_self(const TensorEngine &engine, Stash &stash) cons { assert(&engine == &prod_engine); auto mode = eval_mode(); + using MyTypify = eval::TypifyCellType; if (mode == EvalMode::COMPILED) { CompiledParams ¶ms = stash.create<CompiledParams>(_lambda); - auto op = select_1<MyCompiledLambdaOp>(result_type().cell_type()); + auto op = typify_invoke<1,MyTypify,MyCompiledLambdaOp>(result_type().cell_type()); static_assert(sizeof(¶ms) == sizeof(uint64_t)); return Instruction(op, (uint64_t)(¶ms)); } else { assert(mode == EvalMode::INTERPRETED); InterpretedParams ¶ms = stash.create<InterpretedParams>(_lambda); - auto op = select_1<MyInterpretedLambdaOp>(result_type().cell_type()); + auto op = typify_invoke<1,MyTypify,MyInterpretedLambdaOp>(result_type().cell_type()); static_assert(sizeof(¶ms) == sizeof(uint64_t)); return Instruction(op, (uint64_t)(¶ms)); } diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp index a5f532e643a..70bdc8ae7d6 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp @@ -45,7 +45,7 @@ void my_lambda_peek_op(InterpretedFunction::State &state, uint64_t param) { struct MyLambdaPeekOp { template <typename DST_CT, typename SRC_CT> - static auto get_fun() { return my_lambda_peek_op<DST_CT, SRC_CT>; } + static auto invoke() { return my_lambda_peek_op<DST_CT, SRC_CT>; } }; } // namespace vespalib::tensor::<unnamed> @@ -64,7 +64,8 @@ InterpretedFunction::Instruction DenseLambdaPeekFunction::compile_self(const TensorEngine &, Stash &stash) const { const Self &self = stash.create<Self>(result_type(), *_idx_fun); - auto op = select_2<MyLambdaPeekOp>(result_type().cell_type(), child().result_type().cell_type()); + using MyTypify = eval::TypifyCellType; + auto op = typify_invoke<2,MyTypify,MyLambdaPeekOp>(result_type().cell_type(), child().result_type().cell_type()); static_assert(sizeof(uint64_t) == sizeof(&self)); assert(child().result_type().is_dense()); return InterpretedFunction::Instruction(op, (uint64_t)&self); diff --git a/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp index 695e0fddd08..9c18cf285d4 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp @@ -80,47 +80,6 @@ void my_cblas_float_matmul_op(eval::InterpretedFunction::State &state, uint64_t state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells))); } -template <bool lhs_common_inner, bool rhs_common_inner> -struct MyMatMulOp { - template <typename LCT, typename RCT> - static auto get_fun() { return my_matmul_op<LCT,RCT,lhs_common_inner,rhs_common_inner>; } -}; - -template <bool lhs_common_inner, bool rhs_common_inner> -eval::InterpretedFunction::op_function my_select3(CellType lct, CellType rct) -{ - if (lct == rct) { - if (lct == ValueType::CellType::DOUBLE) { - return my_cblas_double_matmul_op<lhs_common_inner,rhs_common_inner>; - } - if (lct == ValueType::CellType::FLOAT) { - return my_cblas_float_matmul_op<lhs_common_inner,rhs_common_inner>; - } - } - return select_2<MyMatMulOp<lhs_common_inner,rhs_common_inner>>(lct, rct); -} - -template <bool lhs_common_inner> -eval::InterpretedFunction::op_function my_select2(CellType lct, CellType rct, - bool rhs_common_inner) -{ - if (rhs_common_inner) { - return my_select3<lhs_common_inner,true>(lct, rct); - } else { - return my_select3<lhs_common_inner,false>(lct, rct); - } -} - -eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct, - bool lhs_common_inner, bool rhs_common_inner) -{ - if (lhs_common_inner) { - return my_select2<true>(lct, rct, rhs_common_inner); - } else { - return my_select2<false>(lct, rct, rhs_common_inner); - } -} - bool is_matrix(const ValueType &type) { return (type.is_dense() && (type.dimensions().size() == 2)); } @@ -160,6 +119,18 @@ const TensorFunction &create_matmul(const TensorFunction &a, const TensorFunctio } } +struct MyGetFun { + template<typename R1, typename R2, typename R3, typename R4> static auto invoke() { + if (std::is_same_v<R1,double> && std::is_same_v<R2,double>) { + return my_cblas_double_matmul_op<R3::value, R4::value>; + } else if (std::is_same_v<R1,float> && std::is_same_v<R2,float>) { + return my_cblas_float_matmul_op<R3::value, R4::value>; + } else { + return my_matmul_op<R1, R2, R3::value, R4::value>; + } + } +}; + } // namespace vespalib::tensor::<unnamed> DenseMatMulFunction::Self::Self(const eval::ValueType &result_type_in, @@ -197,9 +168,11 @@ DenseMatMulFunction::~DenseMatMulFunction() = default; eval::InterpretedFunction::Instruction DenseMatMulFunction::compile_self(const TensorEngine &, Stash &stash) const { + using MyTypify = TypifyValue<eval::TypifyCellType,TypifyBool>; Self &self = stash.create<Self>(result_type(), _lhs_size, _common_size, _rhs_size); - auto op = my_select(lhs().result_type().cell_type(), rhs().result_type().cell_type(), - _lhs_common_inner, _rhs_common_inner); + auto op = typify_invoke<4,MyTypify,MyGetFun>( + lhs().result_type().cell_type(), rhs().result_type().cell_type(), + _lhs_common_inner, _rhs_common_inner); return eval::InterpretedFunction::Instruction(op, (uint64_t)(&self)); } diff --git a/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp index a28c8150d59..925627c5684 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_number_join_function.cpp @@ -29,13 +29,6 @@ using State = eval::InterpretedFunction::State; namespace { -template <typename CT, typename Fun> -void apply_fun_1_to_n(CT *dst, const CT *pri, CT sec, size_t n, const Fun &fun) { - for (size_t i = 0; i < n; ++i) { - dst[i] = fun(pri[i], sec); - } -} - template <typename CT, bool inplace> ArrayRef<CT> make_dst_cells(ConstArrayRef<CT> src_cells, Stash &stash) { if (inplace) { @@ -53,7 +46,7 @@ void my_number_join_op(State &state, uint64_t param) { CT number = state.peek(swap ? 1 : 0).as_double(); auto src_cells = DenseTensorView::typify_cells<CT>(tensor); auto dst_cells = make_dst_cells<CT, inplace>(src_cells, state.stash); - apply_fun_1_to_n(dst_cells.begin(), src_cells.begin(), number, dst_cells.size(), my_op); + apply_op2_vec_num(dst_cells.begin(), src_cells.begin(), number, dst_cells.size(), my_op); if (inplace) { state.pop_pop_push(tensor); } else { diff --git a/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp index c358c9d618d..5f8fbcac9bb 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp @@ -53,20 +53,6 @@ struct JoinParams { : result_type(result_type_in), factor(factor_in), function(function_in) {} }; -template <typename OCT, typename PCT, typename SCT, typename Fun> -void apply_fun_1_to_n(OCT *dst, const PCT *pri, SCT sec, size_t n, const Fun &fun) { - for (size_t i = 0; i < n; ++i) { - dst[i] = fun(pri[i], sec); - } -} - -template <typename OCT, typename PCT, typename SCT, typename Fun> -void apply_fun_n_to_n(OCT *dst, const PCT *pri, const SCT *sec, size_t n, const Fun &fun) { - for (size_t i = 0; i < n; ++i) { - dst[i] = fun(pri[i], sec[i]); - } -} - template <typename OCT, bool pri_mut, typename PCT> ArrayRef<OCT> make_dst_cells(ConstArrayRef<PCT> pri_cells, Stash &stash) { if constexpr (pri_mut && std::is_same<PCT,OCT>::value) { @@ -88,12 +74,12 @@ void my_simple_join_op(State &state, uint64_t param) { auto sec_cells = DenseTensorView::typify_cells<SCT>(state.peek(swap ? 1 : 0)); auto dst_cells = make_dst_cells<OCT, pri_mut>(pri_cells, state.stash); if (overlap == Overlap::FULL) { - apply_fun_n_to_n(dst_cells.begin(), pri_cells.begin(), sec_cells.begin(), dst_cells.size(), my_op); + apply_op2_vec_vec(dst_cells.begin(), pri_cells.begin(), sec_cells.begin(), dst_cells.size(), my_op); } else if (overlap == Overlap::OUTER) { size_t offset = 0; size_t factor = params.factor; for (SCT cell: sec_cells) { - apply_fun_1_to_n(dst_cells.begin() + offset, pri_cells.begin() + offset, cell, factor, my_op); + apply_op2_vec_num(dst_cells.begin() + offset, pri_cells.begin() + offset, cell, factor, my_op); offset += factor; } } else { @@ -101,7 +87,7 @@ void my_simple_join_op(State &state, uint64_t param) { size_t offset = 0; size_t factor = params.factor; for (size_t i = 0; i < factor; ++i) { - apply_fun_n_to_n(dst_cells.begin() + offset, pri_cells.begin() + offset, sec_cells.begin(), sec_cells.size(), my_op); + apply_op2_vec_vec(dst_cells.begin() + offset, pri_cells.begin() + offset, sec_cells.begin(), sec_cells.size(), my_op); offset += sec_cells.size(); } } diff --git a/eval/src/vespa/eval/tensor/dense/dense_simple_map_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_simple_map_function.cpp index 784d356da39..b5f46fca70c 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_simple_map_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_simple_map_function.cpp @@ -27,13 +27,6 @@ using State = eval::InterpretedFunction::State; namespace { -template <typename CT, typename Fun> -void apply_fun_to_n(CT *dst, const CT *src, size_t n, const Fun &fun) { - for (size_t i = 0; i < n; ++i) { - dst[i] = fun(src[i]); - } -} - template <typename CT, bool inplace> ArrayRef<CT> make_dst_cells(ConstArrayRef<CT> src_cells, Stash &stash) { if (inplace) { @@ -49,7 +42,7 @@ void my_simple_map_op(State &state, uint64_t param) { auto const &child = state.peek(0); auto src_cells = DenseTensorView::typify_cells<CT>(child); auto dst_cells = make_dst_cells<CT, inplace>(src_cells, state.stash); - apply_fun_to_n(dst_cells.begin(), src_cells.begin(), dst_cells.size(), my_fun); + apply_op1_vec(dst_cells.begin(), src_cells.begin(), dst_cells.size(), my_fun); if (!inplace) { state.pop_push(state.stash.create<DenseTensorView>(child.type(), TypedCells(dst_cells))); } diff --git a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp index 663993b6c26..571bcb79c9f 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp @@ -2,6 +2,7 @@ #include "dense_single_reduce_function.h" #include "dense_tensor_view.h" +#include <vespa/vespalib/util/typify.h> #include <vespa/eval/eval/value.h> namespace vespalib::tensor { @@ -12,6 +13,8 @@ using eval::TensorEngine; using eval::TensorFunction; using eval::Value; using eval::ValueType; +using eval::TypifyCellType; +using eval::TypifyAggr; using eval::as; using namespace eval::tensor_function; @@ -66,28 +69,13 @@ void my_single_reduce_op(InterpretedFunction::State &state, uint64_t param) { state.pop_push(state.stash.create<DenseTensorView>(params.result_type, TypedCells(dst_cells))); } -template <typename CT> -InterpretedFunction::op_function my_select_2(Aggr aggr) { - switch (aggr) { - case Aggr::AVG: return my_single_reduce_op<CT, Avg<CT>>; - case Aggr::COUNT: return my_single_reduce_op<CT, Count<CT>>; - case Aggr::PROD: return my_single_reduce_op<CT, Prod<CT>>; - case Aggr::SUM: return my_single_reduce_op<CT, Sum<CT>>; - case Aggr::MAX: return my_single_reduce_op<CT, Max<CT>>; - case Aggr::MIN: return my_single_reduce_op<CT, Min<CT>>; +struct MyGetFun { + template <typename R1, typename R2> static auto invoke() { + return my_single_reduce_op<R1, typename R2::template templ<R1>>; } - abort(); -} +}; -InterpretedFunction::op_function my_select(CellType cell_type, Aggr aggr) { - if (cell_type == ValueType::CellType::DOUBLE) { - return my_select_2<double>(aggr); - } - if (cell_type == ValueType::CellType::FLOAT) { - return my_select_2<float>(aggr); - } - abort(); -} +using MyTypify = TypifyValue<TypifyCellType,TypifyAggr>; bool check_input_type(const ValueType &type) { return (type.is_dense() && ((type.cell_type() == CellType::FLOAT) || (type.cell_type() == CellType::DOUBLE))); @@ -109,7 +97,7 @@ DenseSingleReduceFunction::~DenseSingleReduceFunction() = default; InterpretedFunction::Instruction DenseSingleReduceFunction::compile_self(const TensorEngine &, Stash &stash) const { - auto op = my_select(result_type().cell_type(), _aggr); + auto op = typify_invoke<2,MyTypify,MyGetFun>(result_type().cell_type(), _aggr); auto ¶ms = stash.create<Params>(result_type(), child().result_type(), _dim_idx); static_assert(sizeof(uint64_t) == sizeof(¶ms)); return InterpretedFunction::Instruction(op, (uint64_t)¶ms); diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp index 3533ab20175..7e887d4df34 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp @@ -34,7 +34,7 @@ void my_tensor_create_op(eval::InterpretedFunction::State &state, uint64_t param struct MyTensorCreateOp { template <typename CT> - static auto get_fun() { return my_tensor_create_op<CT>; } + static auto invoke() { return my_tensor_create_op<CT>; } }; size_t get_index(const TensorSpec::Address &addr, const ValueType &type) { @@ -72,7 +72,9 @@ eval::InterpretedFunction::Instruction DenseTensorCreateFunction::compile_self(const TensorEngine &, Stash &) const { static_assert(sizeof(uint64_t) == sizeof(&_self)); - auto op = select_1<MyTensorCreateOp>(result_type().cell_type()); + + using MyTypify = eval::TypifyCellType; + auto op = typify_invoke<1,MyTypify,MyTensorCreateOp>(result_type().cell_type()); return eval::InterpretedFunction::Instruction(op, (uint64_t)&_self); } diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_peek_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_peek_function.cpp index 5cb1cbfd88f..16c0b01b169 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_tensor_peek_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_peek_function.cpp @@ -44,7 +44,7 @@ void my_tensor_peek_op(eval::InterpretedFunction::State &state, uint64_t param) struct MyTensorPeekOp { template <typename CT> - static auto get_fun() { return my_tensor_peek_op<CT>; } + static auto invoke() { return my_tensor_peek_op<CT>; } }; } // namespace vespalib::tensor::<unnamed> @@ -71,7 +71,8 @@ eval::InterpretedFunction::Instruction DenseTensorPeekFunction::compile_self(const TensorEngine &, Stash &) const { static_assert(sizeof(uint64_t) == sizeof(&_spec)); - auto op = select_1<MyTensorPeekOp>(_children[0].get().result_type().cell_type()); + using MyTypify = eval::TypifyCellType; + auto op = typify_invoke<1,MyTypify,MyTensorPeekOp>(_children[0].get().result_type().cell_type()); return eval::InterpretedFunction::Instruction(op, (uint64_t)&_spec); } diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp index a0d63a1ce1e..968308d69c9 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp @@ -76,33 +76,6 @@ void my_cblas_float_xw_product_op(eval::InterpretedFunction::State &state, uint6 state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells))); } -template <bool common_inner> -struct MyXWProductOp { - template <typename LCT, typename RCT> - static auto get_fun() { return my_xw_product_op<LCT,RCT,common_inner>; } -}; - -template <bool common_inner> -eval::InterpretedFunction::op_function my_select2(CellType lct, CellType rct) { - if (lct == rct) { - if (lct == ValueType::CellType::DOUBLE) { - return my_cblas_double_xw_product_op<common_inner>; - } - if (lct == ValueType::CellType::FLOAT) { - return my_cblas_float_xw_product_op<common_inner>; - } - } - return select_2<MyXWProductOp<common_inner>>(lct, rct); -} - -eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct, bool common_inner) { - if (common_inner) { - return my_select2<true>(lct, rct); - } else { - return my_select2<false>(lct, rct); - } -} - bool isDenseTensor(const ValueType &type, size_t d) { return (type.is_dense() && (type.dimensions().size() == d)); } @@ -132,6 +105,18 @@ const TensorFunction &createDenseXWProduct(const ValueType &res, const TensorFun common_inner); } +struct MyXWProductOp { + template<typename R1, typename R2, typename R3> static auto invoke() { + if (std::is_same_v<R1,double> && std::is_same_v<R2,double>) { + return my_cblas_double_xw_product_op<R3::value>; + } else if (std::is_same_v<R1,float> && std::is_same_v<R2,float>) { + return my_cblas_float_xw_product_op<R3::value>; + } else { + return my_xw_product_op<R1, R2, R3::value>; + } + } +}; + } // namespace vespalib::tensor::<unnamed> DenseXWProductFunction::Self::Self(const eval::ValueType &result_type_in, @@ -160,8 +145,10 @@ eval::InterpretedFunction::Instruction DenseXWProductFunction::compile_self(const TensorEngine &, Stash &stash) const { Self &self = stash.create<Self>(result_type(), _vector_size, _result_size); - auto op = my_select(lhs().result_type().cell_type(), - rhs().result_type().cell_type(), _common_inner); + using MyTypify = TypifyValue<eval::TypifyCellType,vespalib::TypifyBool>; + auto op = typify_invoke<3,MyTypify,MyXWProductOp>(lhs().result_type().cell_type(), + rhs().result_type().cell_type(), + _common_inner); return eval::InterpretedFunction::Instruction(op, (uint64_t)(&self)); } diff --git a/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp index 7a4b5917f00..57f727f7968 100644 --- a/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp @@ -19,7 +19,7 @@ namespace { struct CallVectorFromDoubles { template <typename CT> static TypedCells - call(eval::InterpretedFunction::State &state, size_t numCells) { + invoke(eval::InterpretedFunction::State &state, size_t numCells) { ArrayRef<CT> outputCells = state.stash.create_array<CT>(numCells); for (size_t i = numCells; i-- > 0; ) { outputCells[i] = (CT) state.peek(0).as_double(); @@ -33,7 +33,8 @@ void my_vector_from_doubles_op(eval::InterpretedFunction::State &state, uint64_t const auto *self = (const VectorFromDoublesFunction::Self *)(param); CellType ct = self->resultType.cell_type(); size_t numCells = self->resultSize; - TypedCells cells = dispatch_0<CallVectorFromDoubles>(ct, state, numCells); + using MyTypify = eval::TypifyCellType; + TypedCells cells = typify_invoke<1,MyTypify,CallVectorFromDoubles>(ct, state, numCells); const Value &result = state.stash.create<DenseTensorView>(self->resultType, cells); state.stack.emplace_back(result); } |