diff options
author | Haavard <havardpe@yahoo-inc.com> | 2017-01-23 12:14:40 +0000 |
---|---|---|
committer | Haavard <havardpe@yahoo-inc.com> | 2017-01-23 12:14:40 +0000 |
commit | 145659f1d677face587b710726285df872a319c0 (patch) | |
tree | 074eafbf9d3b9ee030ff2ec584667b0386f37618 /eval | |
parent | 31690a1baa64d046d7ba25510b4570aa20792134 (diff) |
move code
Diffstat (limited to 'eval')
228 files changed, 20031 insertions, 0 deletions
diff --git a/eval/src/apps/eval_expr/.gitignore b/eval/src/apps/eval_expr/.gitignore new file mode 100644 index 00000000000..04661a7889c --- /dev/null +++ b/eval/src/apps/eval_expr/.gitignore @@ -0,0 +1,4 @@ +/.depend +/Makefile +/eval_expr +vespalib_eval_expr_app diff --git a/eval/src/apps/eval_expr/CMakeLists.txt b/eval/src/apps/eval_expr/CMakeLists.txt new file mode 100644 index 00000000000..f576295717a --- /dev/null +++ b/eval/src/apps/eval_expr/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_eval_expr_app + SOURCES + eval_expr.cpp + INSTALL bin + DEPENDS + vespalib +# vespalib_vespalib_eval +) diff --git a/eval/src/apps/eval_expr/eval_expr.cpp b/eval/src/apps/eval_expr/eval_expr.cpp new file mode 100644 index 00000000000..dc5274cde47 --- /dev/null +++ b/eval/src/apps/eval_expr/eval_expr.cpp @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/interpreted_function.h> + +using namespace vespalib::eval; + +int main(int argc, char **argv) { + if (argc != 2) { + fprintf(stderr, "usage: %s <expr>\n", argv[0]); + fprintf(stderr, " the expression must be self-contained (no arguments)\n"); + fprintf(stderr, " quote the expression to make it a single parameter\n"); + fprintf(stderr, " use let to simulate parameters: let(x, 1, x + 3)\n"); + return 1; + } + Function function = Function::parse({}, argv[1]); + if (function.has_error()) { + fprintf(stderr, "expression error: %s\n", function.get_error().c_str()); + return 1; + } + InterpretedFunction::Context ctx; + InterpretedFunction interpreted(SimpleTensorEngine::ref(), function, NodeTypes()); + double result = interpreted.eval(ctx).as_double(); + fprintf(stdout, "%.32g\n", result); + return 0; +} diff --git a/eval/src/tests/eval/compile_cache/.gitignore b/eval/src/tests/eval/compile_cache/.gitignore new file mode 100644 index 00000000000..f7b5a4b55d5 --- /dev/null +++ b/eval/src/tests/eval/compile_cache/.gitignore @@ -0,0 +1 @@ +vespalib_compile_cache_test_app diff --git a/eval/src/tests/eval/compile_cache/CMakeLists.txt b/eval/src/tests/eval/compile_cache/CMakeLists.txt new file mode 100644 index 00000000000..5484ee9580b --- /dev/null +++ b/eval/src/tests/eval/compile_cache/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_compile_cache_test_app TEST + SOURCES + compile_cache_test.cpp + DEPENDS + vespalib + vespalib_vespalib_eval_llvm +) +vespa_add_test(NAME vespalib_compile_cache_test_app COMMAND vespalib_compile_cache_test_app) diff --git a/eval/src/tests/eval/compile_cache/compile_cache_test.cpp b/eval/src/tests/eval/compile_cache/compile_cache_test.cpp new file mode 100644 index 00000000000..f80df8090d9 --- /dev/null +++ b/eval/src/tests/eval/compile_cache/compile_cache_test.cpp @@ -0,0 +1,148 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/llvm/compile_cache.h> +#include <vespa/vespalib/eval/key_gen.h> +#include <vespa/vespalib/eval/test/eval_spec.h> +#include <set> + +using namespace vespalib::eval; + +//----------------------------------------------------------------------------- + +TEST("require that parameter passing selection affects function key") { + EXPECT_NOT_EQUAL(gen_key(Function::parse("a+b"), PassParams::SEPARATE), + gen_key(Function::parse("a+b"), PassParams::ARRAY)); +} + +TEST("require that the number of parameters affects function key") { + EXPECT_NOT_EQUAL(gen_key(Function::parse({"a", "b"}, "a+b"), PassParams::SEPARATE), + gen_key(Function::parse({"a", "b", "c"}, "a+b"), PassParams::SEPARATE)); + EXPECT_NOT_EQUAL(gen_key(Function::parse({"a", "b"}, "a+b"), PassParams::ARRAY), + gen_key(Function::parse({"a", "b", "c"}, "a+b"), PassParams::ARRAY)); +} + +TEST("require that implicit and explicit parameters give the same function key") { + EXPECT_EQUAL(gen_key(Function::parse({"a", "b"}, "a+b"), PassParams::SEPARATE), + gen_key(Function::parse("a+b"), PassParams::SEPARATE)); + EXPECT_EQUAL(gen_key(Function::parse({"a", "b"}, "a+b"), PassParams::ARRAY), + gen_key(Function::parse("a+b"), PassParams::ARRAY)); +} + +TEST("require that symbol names does not affect function key") { + EXPECT_EQUAL(gen_key(Function::parse("a+b"), PassParams::SEPARATE), + gen_key(Function::parse("x+y"), PassParams::SEPARATE)); + EXPECT_EQUAL(gen_key(Function::parse("a+b"), PassParams::ARRAY), + gen_key(Function::parse("x+y"), PassParams::ARRAY)); +} + +TEST("require that let bind names does not affect function key") { + EXPECT_EQUAL(gen_key(Function::parse("let(a,1,a+a)"), PassParams::SEPARATE), + gen_key(Function::parse("let(b,1,b+b)"), PassParams::SEPARATE)); + EXPECT_EQUAL(gen_key(Function::parse("let(a,1,a+a)"), PassParams::ARRAY), + gen_key(Function::parse("let(b,1,b+b)"), PassParams::ARRAY)); +} + +TEST("require that different values give different function keys") { + EXPECT_NOT_EQUAL(gen_key(Function::parse("1"), PassParams::SEPARATE), + gen_key(Function::parse("2"), PassParams::SEPARATE)); + EXPECT_NOT_EQUAL(gen_key(Function::parse("1"), PassParams::ARRAY), + gen_key(Function::parse("2"), PassParams::ARRAY)); +} + +TEST("require that different strings give different function keys") { + EXPECT_NOT_EQUAL(gen_key(Function::parse("\"a\""), PassParams::SEPARATE), + gen_key(Function::parse("\"b\""), PassParams::SEPARATE)); + EXPECT_NOT_EQUAL(gen_key(Function::parse("\"a\""), PassParams::ARRAY), + gen_key(Function::parse("\"b\""), PassParams::ARRAY)); +} + +//----------------------------------------------------------------------------- + +struct CheckKeys : test::EvalSpec::EvalTest { + bool failed = false; + std::set<vespalib::string> seen_keys; + bool check_key(const vespalib::string &key) { + bool seen = (seen_keys.count(key) > 0); + seen_keys.insert(key); + return seen; + } + virtual void next_expression(const std::vector<vespalib::string> ¶m_names, + const vespalib::string &expression) override + { + Function function = Function::parse(param_names, expression); + if (!CompiledFunction::detect_issues(function)) { + if (check_key(gen_key(function, PassParams::ARRAY)) || + check_key(gen_key(function, PassParams::SEPARATE))) + { + failed = true; + fprintf(stderr, "key collision for: %s\n", expression.c_str()); + } + } + } + virtual void handle_case(const std::vector<vespalib::string> &, + const std::vector<double> &, + const vespalib::string &, + double) override {} +}; + +TEST_FF("require that all conformance expressions have different function keys", + CheckKeys(), test::EvalSpec()) +{ + f2.add_all_cases(); + f2.each_case(f1); + EXPECT_TRUE(!f1.failed); + EXPECT_GREATER(f1.seen_keys.size(), 100u); +} + +//----------------------------------------------------------------------------- + +void verify_cache(size_t expect_cached, size_t expect_refs) { + EXPECT_EQUAL(expect_cached, CompileCache::num_cached()); + EXPECT_EQUAL(expect_refs, CompileCache::count_refs()); +} + +TEST("require that cache is initially empty") { + TEST_DO(verify_cache(0, 0)); +} + +TEST("require that unused functions are evicted from the cache") { + CompileCache::Token::UP token_a = CompileCache::compile(Function::parse("x+y"), PassParams::ARRAY); + TEST_DO(verify_cache(1, 1)); + token_a.reset(); + TEST_DO(verify_cache(0, 0)); +} + +TEST("require that agents can have separate functions in the cache") { + CompileCache::Token::UP token_a = CompileCache::compile(Function::parse("x+y"), PassParams::ARRAY); + CompileCache::Token::UP token_b = CompileCache::compile(Function::parse("x*y"), PassParams::ARRAY); + TEST_DO(verify_cache(2, 2)); +} + +TEST("require that agents can share functions in the cache") { + CompileCache::Token::UP token_a = CompileCache::compile(Function::parse("x+y"), PassParams::ARRAY); + CompileCache::Token::UP token_b = CompileCache::compile(Function::parse("x+y"), PassParams::ARRAY); + TEST_DO(verify_cache(1, 2)); +} + +TEST("require that cache usage works") { + TEST_DO(verify_cache(0, 0)); + CompileCache::Token::UP token_a = CompileCache::compile(Function::parse("x+y"), PassParams::SEPARATE); + EXPECT_EQUAL(5.0, token_a->get().get_function<2>()(2.0, 3.0)); + TEST_DO(verify_cache(1, 1)); + CompileCache::Token::UP token_b = CompileCache::compile(Function::parse("x*y"), PassParams::SEPARATE); + EXPECT_EQUAL(6.0, token_b->get().get_function<2>()(2.0, 3.0)); + TEST_DO(verify_cache(2, 2)); + CompileCache::Token::UP token_c = CompileCache::compile(Function::parse("x+y"), PassParams::SEPARATE); + EXPECT_EQUAL(5.0, token_c->get().get_function<2>()(2.0, 3.0)); + TEST_DO(verify_cache(2, 3)); + token_a.reset(); + TEST_DO(verify_cache(2, 2)); + token_b.reset(); + TEST_DO(verify_cache(1, 1)); + token_c.reset(); + TEST_DO(verify_cache(0, 0)); +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/compiled_function/.gitignore b/eval/src/tests/eval/compiled_function/.gitignore new file mode 100644 index 00000000000..849f107211e --- /dev/null +++ b/eval/src/tests/eval/compiled_function/.gitignore @@ -0,0 +1 @@ +vespalib_compiled_function_test_app diff --git a/eval/src/tests/eval/compiled_function/CMakeLists.txt b/eval/src/tests/eval/compiled_function/CMakeLists.txt new file mode 100644 index 00000000000..c362811c93e --- /dev/null +++ b/eval/src/tests/eval/compiled_function/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_compiled_function_test_app TEST + SOURCES + compiled_function_test.cpp + DEPENDS + vespalib + vespalib_vespalib_eval_llvm +) +vespa_add_test(NAME vespalib_compiled_function_test_app COMMAND vespalib_compiled_function_test_app) diff --git a/eval/src/tests/eval/compiled_function/FILES b/eval/src/tests/eval/compiled_function/FILES new file mode 100644 index 00000000000..44a9116c4fe --- /dev/null +++ b/eval/src/tests/eval/compiled_function/FILES @@ -0,0 +1 @@ +compiled_function_test.cpp diff --git a/eval/src/tests/eval/compiled_function/compiled_function_test.cpp b/eval/src/tests/eval/compiled_function/compiled_function_test.cpp new file mode 100644 index 00000000000..882dd1b330e --- /dev/null +++ b/eval/src/tests/eval/compiled_function/compiled_function_test.cpp @@ -0,0 +1,222 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/llvm/compiled_function.h> +#include <vespa/vespalib/eval/test/eval_spec.h> +#include <vespa/vespalib/eval/basic_nodes.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <cmath> +#include <vespa/vespalib/test/insertion_operators.h> +#include <iostream> + +using namespace vespalib::eval; + +//----------------------------------------------------------------------------- + +std::vector<vespalib::string> params_10({"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10"}); + +const char *expr_10 = "p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9 + p10"; + +TEST("require that separate parameter passing works") { + CompiledFunction cf_10(Function::parse(params_10, expr_10), PassParams::SEPARATE); + auto fun_10 = cf_10.get_function<10>(); + EXPECT_EQUAL(10.0, fun_10(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0)); + EXPECT_EQUAL(50.0, fun_10(5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0)); + EXPECT_EQUAL(45.0, fun_10(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)); + EXPECT_EQUAL(45.0, fun_10(9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0)); +} + +TEST("require that array parameter passing works") { + CompiledFunction arr_cf(Function::parse(params_10, expr_10), PassParams::ARRAY); + auto arr_fun = arr_cf.get_function(); + EXPECT_EQUAL(10.0, arr_fun(&std::vector<double>({1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0})[0])); + EXPECT_EQUAL(50.0, arr_fun(&std::vector<double>({5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0})[0])); + EXPECT_EQUAL(45.0, arr_fun(&std::vector<double>({0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0})[0])); + EXPECT_EQUAL(45.0, arr_fun(&std::vector<double>({9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0})[0])); +} + +//----------------------------------------------------------------------------- + +std::vector<vespalib::string> unsupported = { + "sum(", + "map(", + "join(", + "reduce(", + "rename(", + "tensor(", + "concat(" +}; + +bool is_unsupported(const vespalib::string &expression) { + for (const auto &prefix: unsupported) { + if (starts_with(expression, prefix)) { + return true; + } + } + return false; +} + +//----------------------------------------------------------------------------- + +struct MyEvalTest : test::EvalSpec::EvalTest { + size_t pass_cnt = 0; + size_t fail_cnt = 0; + bool print_pass = false; + bool print_fail = false; + virtual void next_expression(const std::vector<vespalib::string> ¶m_names, + const vespalib::string &expression) override + { + Function function = Function::parse(param_names, expression); + ASSERT_TRUE(!function.has_error()); + bool is_supported = !is_unsupported(expression); + bool has_issues = CompiledFunction::detect_issues(function); + if (is_supported == has_issues) { + const char *supported_str = is_supported ? "supported" : "not supported"; + const char *issues_str = has_issues ? "has issues" : "does not have issues"; + print_fail && fprintf(stderr, "expression %s is %s, but %s\n", + expression.c_str(), supported_str, issues_str); + ++fail_cnt; + } + } + virtual void handle_case(const std::vector<vespalib::string> ¶m_names, + const std::vector<double> ¶m_values, + const vespalib::string &expression, + double expected_result) override + { + Function function = Function::parse(param_names, expression); + ASSERT_TRUE(!function.has_error()); + bool is_supported = !is_unsupported(expression); + bool has_issues = CompiledFunction::detect_issues(function); + if (is_supported && !has_issues) { + CompiledFunction cfun(function, PassParams::ARRAY); + auto fun = cfun.get_function(); + ASSERT_EQUAL(cfun.num_params(), param_values.size()); + double result = fun(¶m_values[0]); + if (is_same(expected_result, result)) { + print_pass && fprintf(stderr, "verifying: %s -> %g ... PASS\n", + as_string(param_names, param_values, expression).c_str(), + expected_result); + ++pass_cnt; + } else { + print_fail && fprintf(stderr, "verifying: %s -> %g ... FAIL: got %g\n", + as_string(param_names, param_values, expression).c_str(), + expected_result, result); + ++fail_cnt; + } + } + } +}; + +TEST_FF("require that compiled evaluation passes all conformance tests", MyEvalTest(), test::EvalSpec()) { + f1.print_fail = true; + f2.add_all_cases(); + f2.each_case(f1); + EXPECT_GREATER(f1.pass_cnt, 1000u); + EXPECT_EQUAL(0u, f1.fail_cnt); +} + +//----------------------------------------------------------------------------- + +TEST("require that large (plugin) set membership checks work") { + nodes::Array my_set; + for(size_t i = 1; i <= 100; ++i) { + my_set.add(nodes::Node_UP(new nodes::Number(i))); + } + nodes::DumpContext dump_ctx({}); + vespalib::string expr = vespalib::make_string("if(a in %s,1,0)", + my_set.dump(dump_ctx).c_str()); + // fprintf(stderr, "expression: %s\n", expr.c_str()); + CompiledFunction cf(Function::parse(expr), PassParams::SEPARATE); + CompiledFunction arr_cf(Function::parse(expr), PassParams::ARRAY); + auto fun = cf.get_function<1>(); + auto arr_fun = arr_cf.get_function(); + for (double value = 0.5; value <= 100.5; value += 0.5) { + if (std::round(value) == value) { + EXPECT_EQUAL(1.0, fun(value)); + EXPECT_EQUAL(1.0, arr_fun(&value)); + } else { + EXPECT_EQUAL(0.0, fun(value)); + EXPECT_EQUAL(0.0, arr_fun(&value)); + } + } +} + +//----------------------------------------------------------------------------- + +CompiledFunction pass_fun(CompiledFunction cf) { + auto fun = cf.get_function<2>(); + EXPECT_EQUAL(5.0, fun(2.0, 3.0)); + return cf; +} + +TEST("require that compiled expression can be passed (moved) around") { + CompiledFunction cf(Function::parse("a+b"), PassParams::SEPARATE); + auto fun = cf.get_function<2>(); + EXPECT_EQUAL(4.0, fun(2.0, 2.0)); + CompiledFunction cf2 = pass_fun(std::move(cf)); + EXPECT_TRUE(cf.get_function<2>() == nullptr); + auto fun2 = cf2.get_function<2>(); + EXPECT_TRUE(fun == fun2); + EXPECT_EQUAL(10.0, fun(3.0, 7.0)); +} + +TEST("require that expressions with constant sub-expressions evaluate correctly") { + CompiledFunction cf(Function::parse("if(1,2,10)+a+b+max(1,2)/1"), PassParams::SEPARATE); + auto fun = cf.get_function<2>(); + EXPECT_EQUAL(7.0, fun(1.0, 2.0)); + EXPECT_EQUAL(11.0, fun(3.0, 4.0)); +} + +TEST("dump ir code to verify lazy casting") { + CompiledFunction cf(Function::parse({"a", "b"}, "12==2+if(a==3&&a<10||b,10,5)"), PassParams::SEPARATE); + auto fun = cf.get_function<2>(); + EXPECT_EQUAL(0.0, fun(0.0, 0.0)); + EXPECT_EQUAL(1.0, fun(0.0, 1.0)); + EXPECT_EQUAL(1.0, fun(3.0, 0.0)); + cf.dump(); +} + +TEST_MT("require that multithreaded compilation works", 64) { + { + CompiledFunction cf(Function::parse({"x", "y", "z", "w"}, "((x+1)*(y-1))/((z+1)/(w-1))"), + PassParams::SEPARATE); + auto fun = cf.get_function<4>(); + EXPECT_EQUAL(1.0, fun(0.0, 2.0, 0.0, 2.0)); + } + { + CompiledFunction cf(Function::parse({"x", "y", "z", "w"}, "((x+1)*(y-1))/((z+1)/(w-1))"), + PassParams::SEPARATE); + auto fun = cf.get_function<4>(); + EXPECT_EQUAL(4.0, fun(1.0, 3.0, 0.0, 2.0)); + } + { + CompiledFunction cf(Function::parse({"x", "y", "z", "w"}, "((x+1)*(y-1))/((z+1)/(w-1))"), + PassParams::SEPARATE); + auto fun = cf.get_function<4>(); + EXPECT_EQUAL(2.0, fun(1.0, 3.0, 1.0, 2.0)); + } + { + CompiledFunction cf(Function::parse({"x", "y", "z", "w"}, "((x+1)*(y-1))/((z+1)/(w-1))"), + PassParams::SEPARATE); + auto fun = cf.get_function<4>(); + EXPECT_EQUAL(8.0, fun(1.0, 3.0, 1.0, 5.0)); + } +} + +//----------------------------------------------------------------------------- + +TEST("require that function issues can be detected") { + auto simple = Function::parse("a+b"); + auto complex = Function::parse("join(a,b,f(a,b)(a+b))"); + EXPECT_FALSE(simple.has_error()); + EXPECT_FALSE(complex.has_error()); + EXPECT_FALSE(CompiledFunction::detect_issues(simple)); + EXPECT_TRUE(CompiledFunction::detect_issues(complex)); + std::cerr << "Example function issues:" << std::endl + << CompiledFunction::detect_issues(complex).list + << std::endl; +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/function/.gitignore b/eval/src/tests/eval/function/.gitignore new file mode 100644 index 00000000000..a8713610644 --- /dev/null +++ b/eval/src/tests/eval/function/.gitignore @@ -0,0 +1 @@ +vespalib_function_test_app diff --git a/eval/src/tests/eval/function/CMakeLists.txt b/eval/src/tests/eval/function/CMakeLists.txt new file mode 100644 index 00000000000..7c7268eb492 --- /dev/null +++ b/eval/src/tests/eval/function/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_function_test_app TEST + SOURCES + function_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_function_test_app COMMAND vespalib_function_test_app) diff --git a/eval/src/tests/eval/function/FILES b/eval/src/tests/eval/function/FILES new file mode 100644 index 00000000000..a7421dd2605 --- /dev/null +++ b/eval/src/tests/eval/function/FILES @@ -0,0 +1 @@ +function_parser_test.cpp diff --git a/eval/src/tests/eval/function/function_test.cpp b/eval/src/tests/eval/function/function_test.cpp new file mode 100644 index 00000000000..d3cb55a1cef --- /dev/null +++ b/eval/src/tests/eval/function/function_test.cpp @@ -0,0 +1,901 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/operator_nodes.h> +#include <vespa/vespalib/eval/node_traverser.h> +#include <set> +#include <vespa/vespalib/eval/test/eval_spec.h> +#include <vespa/vespalib/eval/check_type.h> + +using namespace vespalib::eval; +using namespace vespalib::eval::nodes; + +std::vector<vespalib::string> params({"x", "y", "z", "w"}); + +double as_number(const Function &f) { + auto number = as<Number>(f.root()); + if (number) { + return number->value(); + } else { + return error_value; + } +} + +vespalib::string as_string(const Function &f) { + auto string = as<String>(f.root()); + if (string) { + return string->value(); + } else { + return "<error>"; + } +} + +struct OperatorLayer { + Operator::Order order; + std::vector<vespalib::string> op_names; +}; + +Operator_UP create_op(vespalib::string name) { + Operator_UP op = OperatorRepo::instance().create(name); + ASSERT_TRUE(op.get() != nullptr); + EXPECT_EQUAL(name, op->op_str()); + return op; +} + +void verify_operator_binding_order(std::initializer_list<OperatorLayer> layers) { + std::set<vespalib::string> seen_names; + int layer_idx = 0; + for (OperatorLayer layer: layers) { + ++layer_idx; + for (vespalib::string op_name: layer.op_names) { + seen_names.insert(op_name); + int other_layer_idx = 0; + for (OperatorLayer other_layer: layers) { + ++other_layer_idx; + for (vespalib::string other_op_name: other_layer.op_names) { + Operator_UP op = create_op(op_name); + Operator_UP other_op = create_op(other_op_name); + bool do_op_before_other_op = (layer_idx < other_layer_idx) + || ((layer_idx == other_layer_idx) + && (layer.order == Operator::Order::LEFT)); + if (!EXPECT_EQUAL(do_op_before_other_op, op->do_before(*other_op))) { + fprintf(stderr, "error: left operator '%s' should %sbind before right operator '%s'\n", + op->op_str().c_str(), do_op_before_other_op? "" : "not ", other_op->op_str().c_str()); + } + } + } + } + } + auto all_names = OperatorRepo::instance().get_names(); + for (auto name: all_names) { + if (!EXPECT_EQUAL(1u, seen_names.count(name))) { + fprintf(stderr, "error: operator '%s' not verified by binding order test\n", name.c_str()); + } + } +} + +bool verify_string(const vespalib::string &str, const vespalib::string &expr) { + bool ok = true; + ok &= EXPECT_EQUAL(str, as_string(Function::parse(params, expr))); + ok &= EXPECT_EQUAL(expr, Function::parse(params, expr).dump()); + return ok; +} + + +TEST("require that scientific numbers can be parsed") { + EXPECT_EQUAL(1.0, as_number(Function::parse(params, "1"))); + EXPECT_EQUAL(2.5, as_number(Function::parse(params, "2.5"))); + EXPECT_EQUAL(100.0, as_number(Function::parse(params, "100"))); + EXPECT_EQUAL(0.01, as_number(Function::parse(params, "0.01"))); + EXPECT_EQUAL(1.05e5, as_number(Function::parse(params, "1.05e5"))); + EXPECT_EQUAL(3e7, as_number(Function::parse(params, "3e7"))); + EXPECT_EQUAL(1.05e5, as_number(Function::parse(params, "1.05e+5"))); + EXPECT_EQUAL(3e7, as_number(Function::parse(params, "3e+7"))); + EXPECT_EQUAL(1.05e-5, as_number(Function::parse(params, "1.05e-5"))); + EXPECT_EQUAL(3e-7, as_number(Function::parse(params, "3e-7"))); + EXPECT_EQUAL(1.05e5, as_number(Function::parse(params, "1.05E5"))); + EXPECT_EQUAL(3e7, as_number(Function::parse(params, "3E7"))); + EXPECT_EQUAL(1.05e5, as_number(Function::parse(params, "1.05E+5"))); + EXPECT_EQUAL(3e7, as_number(Function::parse(params, "3E+7"))); + EXPECT_EQUAL(1.05e-5, as_number(Function::parse(params, "1.05E-5"))); + EXPECT_EQUAL(3e-7, as_number(Function::parse(params, "3E-7"))); +} + +TEST("require that number parsing does not eat +/- operators") { + EXPECT_EQUAL("(((1+2)+3)+4)", Function::parse(params, "1+2+3+4").dump()); + EXPECT_EQUAL("(((1-2)-3)-4)", Function::parse(params, "1-2-3-4").dump()); + EXPECT_EQUAL("(((1+x)+3)+y)", Function::parse(params, "1+x+3+y").dump()); + EXPECT_EQUAL("(((1-x)-3)-y)", Function::parse(params, "1-x-3-y").dump()); +} + +TEST("require that symbols can be parsed") { + EXPECT_EQUAL("x", Function::parse(params, "x").dump()); + EXPECT_EQUAL("y", Function::parse(params, "y").dump()); + EXPECT_EQUAL("z", Function::parse(params, "z").dump()); +} + +TEST("require that parenthesis can be parsed") { + EXPECT_EQUAL("x", Function::parse(params, "(x)").dump()); + EXPECT_EQUAL("x", Function::parse(params, "((x))").dump()); + EXPECT_EQUAL("x", Function::parse(params, "(((x)))").dump()); +} + +TEST("require that strings are parsed and dumped correctly") { + EXPECT_TRUE(verify_string("foo", "\"foo\"")); + EXPECT_TRUE(verify_string("", "\"\"")); + EXPECT_TRUE(verify_string(" ", "\" \"")); + EXPECT_TRUE(verify_string(">\\<", "\">\\\\<\"")); + EXPECT_TRUE(verify_string(">\"<", "\">\\\"<\"")); + EXPECT_TRUE(verify_string(">\t<", "\">\\t<\"")); + EXPECT_TRUE(verify_string(">\n<", "\">\\n<\"")); + EXPECT_TRUE(verify_string(">\r<", "\">\\r<\"")); + EXPECT_TRUE(verify_string(">\f<", "\">\\f<\"")); + for (int c = 0; c < 256; ++c) { + vespalib::string raw_expr = vespalib::make_string("\"%c\"", c); + vespalib::string hex_expr = vespalib::make_string("\"\\x%02x\"", c); + vespalib::string raw_str = vespalib::make_string("%c", c); + EXPECT_EQUAL(raw_str, as_string(Function::parse(params, hex_expr))); + if (c != 0 && c != '\"' && c != '\\') { + EXPECT_EQUAL(raw_str, as_string(Function::parse(params, raw_expr))); + } else { + EXPECT_TRUE(Function::parse(params, raw_expr).has_error()); + } + if (c == '\\') { + EXPECT_EQUAL("\"\\\\\"", Function::parse(params, hex_expr).dump()); + } else if (c == '\"') { + EXPECT_EQUAL("\"\\\"\"", Function::parse(params, hex_expr).dump()); + } else if (c == '\t') { + EXPECT_EQUAL("\"\\t\"", Function::parse(params, hex_expr).dump()); + } else if (c == '\n') { + EXPECT_EQUAL("\"\\n\"", Function::parse(params, hex_expr).dump()); + } else if (c == '\r') { + EXPECT_EQUAL("\"\\r\"", Function::parse(params, hex_expr).dump()); + } else if (c == '\f') { + EXPECT_EQUAL("\"\\f\"", Function::parse(params, hex_expr).dump()); + } else if ((c >= 32) && (c <= 126)) { + if (c >= 'a' && c <= 'z' && c != 't' && c != 'n' && c != 'r' && c != 'f') { + EXPECT_TRUE(Function::parse(params, vespalib::make_string("\"\\%c\"", c)).has_error()); + } + EXPECT_EQUAL(raw_expr, Function::parse(params, hex_expr).dump()); + } else { + EXPECT_EQUAL(hex_expr, Function::parse(params, hex_expr).dump()); + } + } +} + +TEST("require that arrays can be parsed") { + EXPECT_EQUAL("[]", Function::parse(params, "[]").dump()); + EXPECT_EQUAL("[1,2,3]", Function::parse(params, "[1,2,3]").dump()); + EXPECT_EQUAL("[1,2,3]", Function::parse(params, "[ 1 , 2 , 3 ]").dump()); + EXPECT_EQUAL("[[x],[x,y],[1,2,[z,w]]]", Function::parse(params, "[[x],[x,y],[1,2,[z,w]]]").dump()); + EXPECT_EQUAL("[(x+1),(y-[3,7]),z,[]]", Function::parse(params, "[x+1,y-[3,7],z,[]]").dump()); +} + +TEST("require that negative values can be parsed") { + EXPECT_EQUAL("(-1)", Function::parse(params, "-1").dump()); + EXPECT_EQUAL("(-2.5)", Function::parse(params, "-2.5").dump()); + EXPECT_EQUAL("(-100)", Function::parse(params, "-100").dump()); +} + +TEST("require that negative symbols can be parsed") { + EXPECT_EQUAL("(-x)", Function::parse(params, "-x").dump()); + EXPECT_EQUAL("(-y)", Function::parse(params, "-y").dump()); + EXPECT_EQUAL("(-z)", Function::parse(params, "-z").dump()); + EXPECT_EQUAL("(-(-(-x)))", Function::parse(params, "---x").dump()); +} + +TEST("require that not can be parsed") { + EXPECT_EQUAL("(!x)", Function::parse(params, "!x").dump()); + EXPECT_EQUAL("(!(!x))", Function::parse(params, "!!x").dump()); + EXPECT_EQUAL("(!(!(!x)))", Function::parse(params, "!!!x").dump()); +} + +TEST("require that not/neg binds to next value") { + EXPECT_EQUAL("((!(!(-(-x))))^z)", Function::parse(params, "!!--x^z").dump()); + EXPECT_EQUAL("((-(-(!(!x))))^z)", Function::parse(params, "--!!x^z").dump()); + EXPECT_EQUAL("((!(-(-(!x))))^z)", Function::parse(params, "!--!x^z").dump()); + EXPECT_EQUAL("((-(!(!(-x))))^z)", Function::parse(params, "-!!-x^z").dump()); +} + +TEST("require that parenthesis resolves before not/neg") { + EXPECT_EQUAL("(!(x^z))", Function::parse(params, "!(x^z)").dump()); + EXPECT_EQUAL("(-(x^z))", Function::parse(params, "-(x^z)").dump()); +} + +TEST("require that operators have appropriate binding order") { + verify_operator_binding_order({ { Operator::Order::RIGHT, { "^" } }, + { Operator::Order::LEFT, { "*", "/" } }, + { Operator::Order::LEFT, { "+", "-" } }, + { Operator::Order::LEFT, { "==", "!=", "~=", "<", "<=", ">", ">=", "in" } }, + { Operator::Order::LEFT, { "&&" } }, + { Operator::Order::LEFT, { "||" } } }); +} + +TEST("require that operators binding left are calculated left to right") { + EXPECT_TRUE(create_op("+")->order() == Operator::Order::LEFT); + EXPECT_EQUAL("((x+y)+z)", Function::parse(params, "x+y+z").dump()); +} + +TEST("require that operators binding right are calculated right to left") { + EXPECT_TRUE(create_op("^")->order() == Operator::Order::RIGHT); + EXPECT_EQUAL("(x^(y^z))", Function::parse(params, "x^y^z").dump()); +} + +TEST("require that operators with higher precedence are resolved first") { + EXPECT_TRUE(create_op("*")->priority() > create_op("+")->priority()); + EXPECT_EQUAL("(x+(y*z))", Function::parse(params, "x+y*z").dump()); + EXPECT_EQUAL("((x*y)+z)", Function::parse(params, "x*y+z").dump()); +} + +TEST("require that multi-level operator precedence resolving works") { + EXPECT_TRUE(create_op("^")->priority() > create_op("*")->priority()); + EXPECT_TRUE(create_op("*")->priority() > create_op("+")->priority()); + EXPECT_EQUAL("(x+(y*(z^w)))", Function::parse(params, "x+y*z^w").dump()); + EXPECT_EQUAL("(x+((y^z)*w))", Function::parse(params, "x+y^z*w").dump()); + EXPECT_EQUAL("((x*y)+(z^w))", Function::parse(params, "x*y+z^w").dump()); + EXPECT_EQUAL("((x*(y^z))+w)", Function::parse(params, "x*y^z+w").dump()); + EXPECT_EQUAL("((x^y)+(z*w))", Function::parse(params, "x^y+z*w").dump()); + EXPECT_EQUAL("(((x^y)*z)+w)", Function::parse(params, "x^y*z+w").dump()); +} + +TEST("require that expressions are combined when parenthesis are closed") { + EXPECT_EQUAL("((x+(y+z))+w)", Function::parse(params, "x+(y+z)+w").dump()); +} + +TEST("require that operators can not bind out of parenthesis") { + EXPECT_TRUE(create_op("*")->priority() > create_op("+")->priority()); + EXPECT_EQUAL("((x+y)*(x+z))", Function::parse(params, "(x+y)*(x+z)").dump()); +} + +TEST("require that set membership constructs can be parsed") { + EXPECT_EQUAL("(x in [y,z,w])", Function::parse(params, "x in [y,z,w]").dump()); + EXPECT_EQUAL("(x in [y,z,w])", Function::parse(params, "x in[y,z,w]").dump()); + EXPECT_EQUAL("(x in [y,z,w])", Function::parse(params, "(x)in[y,z,w]").dump()); + EXPECT_EQUAL("((x+1) in [y,z,(w-1)])", Function::parse(params, "(x+1)in[y,z,(w-1)]").dump()); +} + +TEST("require that function calls can be parsed") { + EXPECT_EQUAL("min(max(x,y),sqrt(z))", Function::parse(params, "min(max(x,y),sqrt(z))").dump()); +} + +TEST("require that if expressions can be parsed") { + EXPECT_EQUAL("if(x,y,z)", Function::parse(params, "if(x,y,z)").dump()); + EXPECT_EQUAL("if(x,y,z)", Function::parse(params, "if (x,y,z)").dump()); + EXPECT_EQUAL("if(x,y,z)", Function::parse(params, " if ( x , y , z ) ").dump()); + EXPECT_EQUAL("if(((x>1)&&(y<3)),(y+1),(z-1))", Function::parse(params, "if(x>1&&y<3,y+1,z-1)").dump()); + EXPECT_EQUAL("if(if(x,y,z),if(x,y,z),if(x,y,z))", Function::parse(params, "if(if(x,y,z),if(x,y,z),if(x,y,z))").dump()); + EXPECT_EQUAL("if(x,y,z,0.25)", Function::parse(params, "if(x,y,z,0.25)").dump()); + EXPECT_EQUAL("if(x,y,z,0.75)", Function::parse(params, "if(x,y,z,0.75)").dump()); +} + +TEST("require that if probability can be inspected") { + Function fun_1 = Function::parse("if(x,y,z,0.25)"); + auto if_1 = as<If>(fun_1.root()); + ASSERT_TRUE(if_1); + EXPECT_EQUAL(0.25, if_1->p_true()); + Function fun_2 = Function::parse("if(x,y,z,0.75)"); + auto if_2 = as<If>(fun_2.root()); + ASSERT_TRUE(if_2); + EXPECT_EQUAL(0.75, if_2->p_true()); +} + +TEST("require that symbols can be implicit") { + EXPECT_EQUAL("x", Function::parse("x").dump()); + EXPECT_EQUAL("y", Function::parse("y").dump()); + EXPECT_EQUAL("z", Function::parse("z").dump()); +} + +TEST("require that implicit parameters are picket up left to right") { + Function fun1 = Function::parse("x+y+y"); + Function fun2 = Function::parse("y+y+x"); + EXPECT_EQUAL("((x+y)+y)", fun1.dump()); + EXPECT_EQUAL("((y+y)+x)", fun2.dump()); + ASSERT_EQUAL(2u, fun1.num_params()); + ASSERT_EQUAL(2u, fun2.num_params()); + EXPECT_EQUAL("x", fun1.param_name(0)); + EXPECT_EQUAL("x", fun2.param_name(1)); + EXPECT_EQUAL("y", fun1.param_name(1)); + EXPECT_EQUAL("y", fun2.param_name(0)); +} + +//----------------------------------------------------------------------------- + +TEST("require that leaf nodes have no children") { + EXPECT_TRUE(Function::parse("123").root().is_leaf()); + EXPECT_TRUE(Function::parse("x").root().is_leaf()); + EXPECT_TRUE(Function::parse("\"abc\"").root().is_leaf()); + EXPECT_EQUAL(0u, Function::parse("123").root().num_children()); + EXPECT_EQUAL(0u, Function::parse("x").root().num_children()); + EXPECT_EQUAL(0u, Function::parse("\"abc\"").root().num_children()); +} + +TEST("require that Array children can be accessed") { + Function f = Function::parse("[1,2,3]"); + const Node &root = f.root(); + EXPECT_TRUE(!root.is_leaf()); + ASSERT_EQUAL(3u, root.num_children()); + EXPECT_EQUAL(1.0, root.get_child(0).get_const_value()); + EXPECT_EQUAL(2.0, root.get_child(1).get_const_value()); + EXPECT_EQUAL(3.0, root.get_child(2).get_const_value()); +} + +TEST("require that Neg child can be accessed") { + Function f = Function::parse("-1"); + const Node &root = f.root(); + EXPECT_TRUE(!root.is_leaf()); + ASSERT_EQUAL(1u, root.num_children()); + EXPECT_EQUAL(1.0, root.get_child(0).get_const_value()); +} + +TEST("require that Not child can be accessed") { + Function f = Function::parse("!1"); + const Node &root = f.root(); + EXPECT_TRUE(!root.is_leaf()); + ASSERT_EQUAL(1u, root.num_children()); + EXPECT_EQUAL(1.0, root.get_child(0).get_const_value()); +} + +TEST("require that If children can be accessed") { + Function f = Function::parse("if(1,2,3)"); + const Node &root = f.root(); + EXPECT_TRUE(!root.is_leaf()); + ASSERT_EQUAL(3u, root.num_children()); + EXPECT_EQUAL(1.0, root.get_child(0).get_const_value()); + EXPECT_EQUAL(2.0, root.get_child(1).get_const_value()); + EXPECT_EQUAL(3.0, root.get_child(2).get_const_value()); +} + +TEST("require that Let children can be accessed") { + Function f = Function::parse("let(a,1,2)"); + const Node &root = f.root(); + EXPECT_TRUE(!root.is_leaf()); + ASSERT_EQUAL(2u, root.num_children()); + EXPECT_EQUAL(1.0, root.get_child(0).get_const_value()); + EXPECT_EQUAL(2.0, root.get_child(1).get_const_value()); +} + +TEST("require that Operator children can be accessed") { + Function f = Function::parse("1+2"); + const Node &root = f.root(); + EXPECT_TRUE(!root.is_leaf()); + ASSERT_EQUAL(2u, root.num_children()); + EXPECT_EQUAL(1.0, root.get_child(0).get_const_value()); + EXPECT_EQUAL(2.0, root.get_child(1).get_const_value()); +} + +TEST("require that Call children can be accessed") { + Function f = Function::parse("max(1,2)"); + const Node &root = f.root(); + EXPECT_TRUE(!root.is_leaf()); + ASSERT_EQUAL(2u, root.num_children()); + EXPECT_EQUAL(1.0, root.get_child(0).get_const_value()); + EXPECT_EQUAL(2.0, root.get_child(1).get_const_value()); +} + +struct MyNodeHandler : public NodeHandler { + std::vector<nodes::Node_UP> nodes; + virtual void handle(nodes::Node_UP node) { + if (node.get() != nullptr) { + nodes.push_back(std::move(node)); + } + } +}; + +size_t detach_from_root(const vespalib::string &expr) { + MyNodeHandler handler; + Function function = Function::parse(expr); + nodes::Node &mutable_root = const_cast<nodes::Node&>(function.root()); + mutable_root.detach_children(handler); + return handler.nodes.size(); +} + +TEST("require that children can be detached") { + EXPECT_EQUAL(0u, detach_from_root("1")); + EXPECT_EQUAL(0u, detach_from_root("a")); + EXPECT_EQUAL(1u, detach_from_root("-a")); + EXPECT_EQUAL(1u, detach_from_root("!a")); + EXPECT_EQUAL(3u, detach_from_root("if(1,2,3)")); + EXPECT_EQUAL(2u, detach_from_root("let(a,1,a)")); + EXPECT_EQUAL(5u, detach_from_root("[1,2,3,4,5]")); + EXPECT_EQUAL(2u, detach_from_root("a+b")); + EXPECT_EQUAL(1u, detach_from_root("isNan(a)")); + EXPECT_EQUAL(2u, detach_from_root("max(a,b)")); +} + +//----------------------------------------------------------------------------- + +struct MyTraverser : public NodeTraverser { + size_t open_true_cnt; + std::vector<std::pair<bool, const nodes::Node &> > history; + explicit MyTraverser(size_t open_true_cnt_in) + : open_true_cnt(open_true_cnt_in), history() {} + virtual bool open(const nodes::Node &node) override { + history.emplace_back(true, node); + if (open_true_cnt == 0) { + return false; + } + --open_true_cnt; + return true; + } + virtual void close(const nodes::Node &node) override { + history.emplace_back(false, node); + } + void verify(const nodes::Node &node, size_t &offset, size_t &open_cnt) { + ASSERT_TRUE(history.size() > offset); + EXPECT_TRUE(history[offset].first); + EXPECT_EQUAL(&node, &history[offset].second); + ++offset; + if (open_cnt == 0) { + return; + } + --open_cnt; + for (size_t i = 0; i < node.num_children(); ++i) { + verify(node.get_child(i), offset, open_cnt); + } + ASSERT_TRUE(history.size() > offset); + EXPECT_TRUE(!history[offset].first); + EXPECT_EQUAL(&node, &history[offset].second); + ++offset; + } +}; + +size_t verify_traversal(size_t open_true_cnt, const vespalib::string &expression) { + Function function = Function::parse(expression); + if (!EXPECT_TRUE(!function.has_error())) { + fprintf(stderr, "--> %s\n", function.get_error().c_str()); + } + MyTraverser traverser(open_true_cnt); + function.root().traverse(traverser); + size_t offset = 0; + size_t open_cnt = open_true_cnt; + traverser.verify(function.root(), offset, open_cnt); + EXPECT_EQUAL(offset, traverser.history.size()); + return offset; +} + +bool verify_expression_traversal(const vespalib::string &expression) { + for (size_t open_cnt = 0; true; ++open_cnt) { + size_t num_callbacks = verify_traversal(open_cnt, expression); + if (num_callbacks == (open_cnt * 2)) { // graph is now fully expanded + return EXPECT_EQUAL(open_cnt * 2, verify_traversal(open_cnt + 1, expression)); + } + } +} + +TEST("require that traversal works as expected") { + EXPECT_TRUE(verify_expression_traversal("1")); + EXPECT_TRUE(verify_expression_traversal("1+2")); + EXPECT_TRUE(verify_expression_traversal("1+2*3-4/5")); + EXPECT_TRUE(verify_expression_traversal("if(x,1+2*3,[a,b,c]/5)")); +} + +//----------------------------------------------------------------------------- + +TEST("require that node types can be checked") { + EXPECT_TRUE(nodes::check_type<nodes::Add>(Function::parse("1+2").root())); + EXPECT_TRUE(!nodes::check_type<nodes::Add>(Function::parse("1-2").root())); + EXPECT_TRUE(!nodes::check_type<nodes::Add>(Function::parse("1*2").root())); + EXPECT_TRUE(!nodes::check_type<nodes::Add>(Function::parse("1/2").root())); + EXPECT_TRUE((nodes::check_type<nodes::Add, nodes::Sub, nodes::Mul>(Function::parse("1+2").root()))); + EXPECT_TRUE((nodes::check_type<nodes::Add, nodes::Sub, nodes::Mul>(Function::parse("1-2").root()))); + EXPECT_TRUE((nodes::check_type<nodes::Add, nodes::Sub, nodes::Mul>(Function::parse("1*2").root()))); + EXPECT_TRUE((!nodes::check_type<nodes::Add, nodes::Sub, nodes::Mul>(Function::parse("1/2").root()))); +} + +//----------------------------------------------------------------------------- + +TEST("require that parameter is param, but not const") { + EXPECT_TRUE(Function::parse("x").root().is_param()); + EXPECT_TRUE(!Function::parse("x").root().is_const()); +} + +TEST("require that inverted parameter is not param") { + EXPECT_TRUE(!Function::parse("-x").root().is_param()); +} + +TEST("require that let references are not params") { + Function fun = Function::parse("let(foo,bar,foo)"); + auto let = as<Let>(fun.root()); + ASSERT_TRUE(let); + EXPECT_TRUE(let->value().is_param()); + EXPECT_TRUE(!let->expr().is_param()); +} + +TEST("require that number is const, but not param") { + EXPECT_TRUE(Function::parse("123").root().is_const()); + EXPECT_TRUE(!Function::parse("123").root().is_param()); +} + +TEST("require that string is const") { + EXPECT_TRUE(Function::parse("\"x\"").root().is_const()); +} + +TEST("require that array is const if all elements are const") { + EXPECT_TRUE(Function::parse("[1,2,3]").root().is_const()); + EXPECT_TRUE(!Function::parse("[x,2,3]").root().is_const()); + EXPECT_TRUE(!Function::parse("[1,y,3]").root().is_const()); + EXPECT_TRUE(!Function::parse("[1,2,z]").root().is_const()); + EXPECT_TRUE(!Function::parse("[x,y,z]").root().is_const()); +} + +TEST("require that neg is const if sub-expression is const") { + EXPECT_TRUE(Function::parse("-123").root().is_const()); + EXPECT_TRUE(!Function::parse("-x").root().is_const()); +} + +TEST("require that not is const if sub-expression is const") { + EXPECT_TRUE(Function::parse("!1").root().is_const()); + EXPECT_TRUE(!Function::parse("!x").root().is_const()); +} + +TEST("require that operators are cost if both children are const") { + EXPECT_TRUE(!Function::parse("x+y").root().is_const()); + EXPECT_TRUE(!Function::parse("1+y").root().is_const()); + EXPECT_TRUE(!Function::parse("x+2").root().is_const()); + EXPECT_TRUE(Function::parse("1+2").root().is_const()); +} + +TEST("require that set membership is const only if array elements are const") { + EXPECT_TRUE(!Function::parse("x in [x,y,z]").root().is_const()); + EXPECT_TRUE(!Function::parse("1 in [x,y,z]").root().is_const()); + EXPECT_TRUE(!Function::parse("1 in [1,y,z]").root().is_const()); + EXPECT_TRUE(Function::parse("1 in [1,2,3]").root().is_const()); +} + +TEST("require that calls are cost if all parameters are const") { + EXPECT_TRUE(!Function::parse("max(x,y)").root().is_const()); + EXPECT_TRUE(!Function::parse("max(1,y)").root().is_const()); + EXPECT_TRUE(!Function::parse("max(x,2)").root().is_const()); + EXPECT_TRUE(Function::parse("max(1,2)").root().is_const()); +} + +TEST("require that const let is not const") { + EXPECT_TRUE(!Function::parse("let(a,1,a)").root().is_const()); +} + +//----------------------------------------------------------------------------- + +TEST("require that feature less than constant is tree if children are trees or constants") { + EXPECT_TRUE(Function::parse("if (foo < 2, 3, 4)").root().is_tree()); + EXPECT_TRUE(Function::parse("if (foo < 2, if(bar < 3, 4, 5), 6)").root().is_tree()); + EXPECT_TRUE(Function::parse("if (foo < 2, if(bar < 3, 4, 5), if(baz < 6, 7, 8))").root().is_tree()); + EXPECT_TRUE(Function::parse("if (foo < 2, 3, if(baz < 4, 5, 6))").root().is_tree()); + EXPECT_TRUE(Function::parse("if (foo < max(1,2), 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (2 < foo, 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (foo < bar, 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (1 < 2, 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (foo <= 2, 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (foo == 2, 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (foo > 2, 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (foo >= 2, 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (foo ~= 2, 3, 4)").root().is_tree()); +} + +TEST("require that feature in set of constants is tree if children are trees or constants") { + EXPECT_TRUE(Function::parse("if (foo in [1, 2], 3, 4)").root().is_tree()); + EXPECT_TRUE(Function::parse("if (foo in [1, 2], if(bar < 3, 4, 5), 6)").root().is_tree()); + EXPECT_TRUE(Function::parse("if (foo in [1, 2], if(bar < 3, 4, 5), if(baz < 6, 7, 8))").root().is_tree()); + EXPECT_TRUE(Function::parse("if (foo in [1, 2], 3, if(baz < 4, 5, 6))").root().is_tree()); + EXPECT_TRUE(Function::parse("if (foo in [min(1,2), max(1,2)], 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (1 in [1, 2], 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (1 in [foo, 2], 3, 4)").root().is_tree()); + EXPECT_TRUE(!Function::parse("if (foo in [bar, 2], 3, 4)").root().is_tree()); +} + +TEST("require that sums of trees and forests are forests") { + EXPECT_TRUE(Function::parse("if(foo<1,2,3) + if(bar<4,5,6)").root().is_forest()); + EXPECT_TRUE(Function::parse("if(foo<1,2,3) + if(bar<4,5,6) + if(bar<7,8,9)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) + 10").root().is_forest()); + EXPECT_TRUE(!Function::parse("10 + if(bar<4,5,6)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) - if(bar<4,5,6)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) * if(bar<4,5,6)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) / if(bar<4,5,6)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) ^ if(bar<4,5,6)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) - if(bar<4,5,6) + if(bar<7,8,9)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) * if(bar<4,5,6) + if(bar<7,8,9)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) / if(bar<4,5,6) + if(bar<7,8,9)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) ^ if(bar<4,5,6) + if(bar<7,8,9)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) + if(bar<4,5,6) - if(bar<7,8,9)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) + if(bar<4,5,6) * if(bar<7,8,9)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) + if(bar<4,5,6) / if(bar<7,8,9)").root().is_forest()); + EXPECT_TRUE(!Function::parse("if(foo<1,2,3) + if(bar<4,5,6) ^ if(bar<7,8,9)").root().is_forest()); +} + +//----------------------------------------------------------------------------- + +struct UnWrapped { + vespalib::string wrapper; + vespalib::string body; + vespalib::string error; +}; + +UnWrapped unwrap(const vespalib::string &str) { + UnWrapped result; + bool ok = Function::unwrap(str, result.wrapper, result.body, result.error); + EXPECT_EQUAL(ok, result.error.empty()); + return result; +} + +TEST("require that unwrapping works") { + EXPECT_EQUAL("max", unwrap("max(x+y)").wrapper); + EXPECT_EQUAL("max", unwrap(" max(x+y)").wrapper); + EXPECT_EQUAL("max", unwrap(" max (x+y)").wrapper); + EXPECT_EQUAL("x+y", unwrap("max(x+y)").body); + EXPECT_EQUAL("x+y", unwrap("max(x+y) ").body); + EXPECT_EQUAL("max", unwrap("max()").wrapper); + EXPECT_EQUAL("", unwrap("max()").body); + EXPECT_EQUAL("", unwrap("max()").error); + EXPECT_EQUAL("could not extract wrapper name", unwrap("").error); + EXPECT_EQUAL("could not extract wrapper name", unwrap("(x+y)").error); + EXPECT_EQUAL("could not extract wrapper name", unwrap(" (x+y)").error); + EXPECT_EQUAL("could not match opening '('", unwrap("max").error); + EXPECT_EQUAL("could not match opening '('", unwrap("max)").error); + EXPECT_EQUAL("could not match opening '('", unwrap("max5(x+y)").error); + EXPECT_EQUAL("could not match opening '('", unwrap("max)x+y(").error); + EXPECT_EQUAL("could not match closing ')'", unwrap("max(x+y").error); + EXPECT_EQUAL("could not match closing ')'", unwrap("max(x+y)x").error); + EXPECT_EQUAL("could not match closing ')'", unwrap("max(").error); +} + +//----------------------------------------------------------------------------- + +struct MySymbolExtractor : SymbolExtractor { + std::vector<char> extra; + mutable size_t invoke_count; + bool is_extra(char c) const { + for (char extra_char: extra) { + if (c == extra_char) { + return true; + } + } + return false; + } + MySymbolExtractor() : extra(), invoke_count() {} + explicit MySymbolExtractor(std::initializer_list<char> extra_in) : extra(extra_in), invoke_count() {} + virtual void extract_symbol(const char *pos_in, const char *end_in, + const char *&pos_out, vespalib::string &symbol_out) const + { + ++invoke_count; + for (; pos_in < end_in; ++pos_in) { + char c = *pos_in; + if ((c >= 'a' && c <= 'z') || is_extra(c)) { + symbol_out.push_back(c); + } else { + break; + } + } + pos_out = pos_in; + } +}; + +TEST("require that custom symbol extractor may be used") { + EXPECT_EQUAL("[x+]...[missing value]...[*y]", Function::parse(params, "x+*y").dump()); + EXPECT_EQUAL("[x+]...[missing value]...[*y]", Function::parse(params, "x+*y", MySymbolExtractor()).dump()); + EXPECT_EQUAL("[x+]...[unknown symbol: 'x+']...[*y]", Function::parse(params, "x+*y", MySymbolExtractor({'+'})).dump()); + EXPECT_EQUAL("[x+*y]...[unknown symbol: 'x+*y']...[]", Function::parse(params, "x+*y", MySymbolExtractor({'+', '*'})).dump()); +} + +TEST("require that unknown function works as expected with custom symbol extractor") { + EXPECT_EQUAL("[bogus(]...[unknown function: 'bogus']...[x)+y]", Function::parse(params, "bogus(x)+y").dump()); + EXPECT_EQUAL("[bogus]...[unknown symbol: 'bogus']...[(x)+y]", Function::parse(params, "bogus(x)+y", MySymbolExtractor()).dump()); + EXPECT_EQUAL("[bogus(x)]...[unknown symbol: 'bogus(x)']...[+y]", Function::parse(params, "bogus(x)+y", MySymbolExtractor({'(', ')'})).dump()); +} + +TEST("require that unknown function that is valid parameter works as expected with custom symbol extractor") { + EXPECT_EQUAL("[z(]...[unknown function: 'z']...[x)+y]", Function::parse(params, "z(x)+y").dump()); + EXPECT_EQUAL("[z]...[invalid operator: '(']...[(x)+y]", Function::parse(params, "z(x)+y", MySymbolExtractor()).dump()); + EXPECT_EQUAL("[z(x)]...[unknown symbol: 'z(x)']...[+y]", Function::parse(params, "z(x)+y", MySymbolExtractor({'(', ')'})).dump()); +} + +TEST("require that custom symbol extractor is only invoked for tokens that must be parameters") { + MySymbolExtractor my_extractor; + EXPECT_EQUAL(0u, Function::parse("max(1,2)", my_extractor).num_params()); + EXPECT_EQUAL(0u, Function::parse("max(let(a,1,a),2)", my_extractor).num_params()); + ASSERT_EQUAL(1u, Function::parse("max(let(a,1,b),2)", my_extractor).num_params()); + EXPECT_EQUAL(1u, my_extractor.invoke_count); + EXPECT_EQUAL("b", Function::parse("max(let(a,1,b),2)", my_extractor).param_name(0)); + EXPECT_EQUAL(2u, my_extractor.invoke_count); + EXPECT_EQUAL("[bogus]...[invalid operator: '(']...[(1,2)]", Function::parse("bogus(1,2)", my_extractor).dump()); + EXPECT_EQUAL(3u, my_extractor.invoke_count); +} + +//----------------------------------------------------------------------------- + +void verify_error(const vespalib::string &expr, const vespalib::string &expected_error) { + Function function = Function::parse(params, expr); + EXPECT_TRUE(function.has_error()); + EXPECT_EQUAL(expected_error, function.get_error()); +} + +TEST("require that valid function does not report parse error") { + Function function = Function::parse(params, "x + y"); + EXPECT_TRUE(!function.has_error()); + EXPECT_EQUAL("", function.get_error()); +} + +TEST("require that an invalid function with explicit paramers retain its parameters") { + Function function = Function::parse({"x", "y"}, "x & y"); + EXPECT_TRUE(function.has_error()); + ASSERT_EQUAL(2u, function.num_params()); + ASSERT_EQUAL("x", function.param_name(0)); + ASSERT_EQUAL("y", function.param_name(1)); +} + +TEST("require that an invalid function with implicit paramers has no parameters") { + Function function = Function::parse("x & y"); + EXPECT_TRUE(function.has_error()); + EXPECT_EQUAL(0u, function.num_params()); +} + +TEST("require that unknown operator gives parse error") { + verify_error("x&y", "[x]...[invalid operator: '&']...[&y]"); +} + +TEST("require that unknown symbol gives parse error") { + verify_error("x+a", "[x+a]...[unknown symbol: 'a']...[]"); +} + +TEST("require that missing value gives parse error") { + verify_error("x+", "[x+]...[missing value]...[]"); + verify_error("x++y", "[x+]...[missing value]...[+y]"); + verify_error("x+++y", "[x+]...[missing value]...[++y]"); + verify_error("x+(y+)+z", "[x+(y+]...[missing value]...[)+z]"); +} + +//----------------------------------------------------------------------------- + +TEST("require that tensor sum can be parsed") { + EXPECT_EQUAL("sum(a)", Function::parse("sum(a)").dump()); + EXPECT_EQUAL("sum(a)", Function::parse(" sum ( a ) ").dump()); + EXPECT_EQUAL("sum(a,dim)", Function::parse("sum(a,dim)").dump()); + EXPECT_EQUAL("sum(a,dim)", Function::parse(" sum ( a , dim ) ").dump()); +} + +TEST("require that tensor operations can be nested") { + EXPECT_EQUAL("sum(sum(sum(a)),dim)", Function::parse("sum(sum(sum(a)),dim)").dump()); +} + +//----------------------------------------------------------------------------- + +TEST("require that tensor map can be parsed") { + EXPECT_EQUAL("map(a,f(x)(x+1))", Function::parse("map(a,f(x)(x+1))").dump()); + EXPECT_EQUAL("map(a,f(x)(x+1))", Function::parse(" map ( a , f ( x ) ( x + 1 ) ) ").dump()); +} + +TEST("require that tensor join can be parsed") { + EXPECT_EQUAL("join(a,b,f(x,y)(x+y))", Function::parse("join(a,b,f(x,y)(x+y))").dump()); + EXPECT_EQUAL("join(a,b,f(x,y)(x+y))", Function::parse(" join ( a , b , f ( x , y ) ( x + y ) ) ").dump()); +} + +TEST("require that parenthesis are added around lambda expression when needed") { + EXPECT_EQUAL("f(x)(sin(x))", Function::parse("sin(x)").dump_as_lambda()); +} + +TEST("require that parse error inside a lambda fails the enclosing expression") { + verify_error("map(x,f(a)(b))", "[map(x,f(a)(b]...[unknown symbol: 'b']...[))]"); +} + +TEST("require that outer parameters are hidden within a lambda") { + verify_error("map(x,f(a)(y))", "[map(x,f(a)(y]...[unknown symbol: 'y']...[))]"); +} + +TEST("require that outer let bindings are hidden within a lambda") { + verify_error("let(b,x,map(b,f(a)(b)))", "[let(b,x,map(b,f(a)(b]...[unknown symbol: 'b']...[)))]"); +} + +//----------------------------------------------------------------------------- + +TEST("require that tensor reduce can be parsed") { + EXPECT_EQUAL("reduce(x,sum,a,b)", Function::parse({"x"}, "reduce(x,sum,a,b)").dump()); + EXPECT_EQUAL("reduce(x,sum,a,b,c)", Function::parse({"x"}, "reduce(x,sum,a,b,c)").dump()); + EXPECT_EQUAL("reduce(x,sum,a,b,c)", Function::parse({"x"}, " reduce ( x , sum , a , b , c ) ").dump()); + EXPECT_EQUAL("reduce(x,avg)", Function::parse({"x"}, "reduce(x,avg)").dump()); + EXPECT_EQUAL("reduce(x,avg)", Function::parse({"x"}, "reduce( x , avg )").dump()); + EXPECT_EQUAL("reduce(x,count)", Function::parse({"x"}, "reduce(x,count)").dump()); + EXPECT_EQUAL("reduce(x,prod)", Function::parse({"x"}, "reduce(x,prod)").dump()); + EXPECT_EQUAL("reduce(x,min)", Function::parse({"x"}, "reduce(x,min)").dump()); + EXPECT_EQUAL("reduce(x,max)", Function::parse({"x"}, "reduce(x,max)").dump()); +} + +TEST("require that tensor reduce is mapped to tensor sum for all dimensions/single dimension") { + EXPECT_EQUAL("sum(x)", Function::parse({"x"}, "reduce(x,sum)").dump()); + EXPECT_EQUAL("sum(x,d)", Function::parse({"x"}, "reduce(x,sum,d)").dump()); +} + +TEST("require that tensor reduce with unknown aggregator fails") { + verify_error("reduce(x,bogus)", "[reduce(x,bogus]...[unknown aggregator: 'bogus']...[)]"); +} + +TEST("require that tensor reduce with duplicate dimensions fails") { + verify_error("reduce(x,sum,a,a)", "[reduce(x,sum,a,a]...[duplicate identifiers]...[)]"); +} + +//----------------------------------------------------------------------------- + +TEST("require that tensor rename can be parsed") { + EXPECT_EQUAL("rename(x,a,b)", Function::parse({"x"}, "rename(x,a,b)").dump()); + EXPECT_EQUAL("rename(x,a,b)", Function::parse({"x"}, "rename(x,(a),(b))").dump()); + EXPECT_EQUAL("rename(x,a,b)", Function::parse({"x"}, "rename(x,a,(b))").dump()); + EXPECT_EQUAL("rename(x,a,b)", Function::parse({"x"}, "rename(x,(a),b)").dump()); + EXPECT_EQUAL("rename(x,(a,b),(b,a))", Function::parse({"x"}, "rename(x,(a,b),(b,a))").dump()); + EXPECT_EQUAL("rename(x,a,b)", Function::parse({"x"}, "rename( x , a , b )").dump()); + EXPECT_EQUAL("rename(x,a,b)", Function::parse({"x"}, "rename( x , ( a ) , ( b ) )").dump()); + EXPECT_EQUAL("rename(x,(a,b),(b,a))", Function::parse({"x"}, "rename( x , ( a , b ) , ( b , a ) )").dump()); +} + +TEST("require that tensor rename dimension lists cannot be empty") { + verify_error("rename(x,,b)", "[rename(x,]...[missing identifier]...[,b)]"); + verify_error("rename(x,a,)", "[rename(x,a,]...[missing identifier]...[)]"); + verify_error("rename(x,(),b)", "[rename(x,()]...[missing identifiers]...[,b)]"); + verify_error("rename(x,a,())", "[rename(x,a,()]...[missing identifiers]...[)]"); +} + +TEST("require that tensor rename dimension lists cannot contain duplicates") { + verify_error("rename(x,(a,a),(b,a))", "[rename(x,(a,a)]...[duplicate identifiers]...[,(b,a))]"); + verify_error("rename(x,(a,b),(b,b))", "[rename(x,(a,b),(b,b)]...[duplicate identifiers]...[)]"); +} + +TEST("require that tensor rename dimension lists must have equal size") { + verify_error("rename(x,(a,b),(b))", "[rename(x,(a,b),(b)]...[dimension list size mismatch]...[)]"); + verify_error("rename(x,(a),(b,a))", "[rename(x,(a),(b,a)]...[dimension list size mismatch]...[)]"); +} + +//----------------------------------------------------------------------------- + +TEST("require that tensor lambda can be parsed") { + EXPECT_EQUAL("tensor(x[10])(x)", Function::parse({""}, "tensor(x[10])(x)").dump()); + EXPECT_EQUAL("tensor(x[10],y[10])(x==y)", Function::parse({""}, "tensor(x[10],y[10])(x==y)").dump()); + EXPECT_EQUAL("tensor(x[10],y[10])(x==y)", Function::parse({""}, " tensor ( x [ 10 ] , y [ 10 ] ) ( x == y ) ").dump()); +} + +TEST("require that tensor lambda requires appropriate tensor type") { + verify_error("tensor(x[10],y[])(x==y)", "[tensor(x[10],y[])]...[invalid tensor type]...[(x==y)]"); + verify_error("tensor(x[10],y{})(x==y)", "[tensor(x[10],y{})]...[invalid tensor type]...[(x==y)]"); + verify_error("tensor()(x==y)", "[tensor()]...[invalid tensor type]...[(x==y)]"); +} + +TEST("require that tensor lambda can only use dimension names") { + verify_error("tensor(x[10],y[10])(x==z)", "[tensor(x[10],y[10])(x==z]...[unknown symbol: 'z']...[)]"); +} + +//----------------------------------------------------------------------------- + +TEST("require that tensor concat can be parsed") { + EXPECT_EQUAL("concat(a,b,d)", Function::parse({"a", "b"}, "concat(a,b,d)").dump()); + EXPECT_EQUAL("concat(a,b,d)", Function::parse({"a", "b"}, " concat ( a , b , d ) ").dump()); +} + +//----------------------------------------------------------------------------- + +struct CheckExpressions : test::EvalSpec::EvalTest { + bool failed = false; + size_t seen_cnt = 0; + virtual void next_expression(const std::vector<vespalib::string> ¶m_names, + const vespalib::string &expression) override + { + Function function = Function::parse(param_names, expression); + if (function.has_error()) { + failed = true; + fprintf(stderr, "parse error: %s\n", function.get_error().c_str()); + } + ++seen_cnt; + } + virtual void handle_case(const std::vector<vespalib::string> &, + const std::vector<double> &, + const vespalib::string &, + double) override {} +}; + +TEST_FF("require that all conformance test expressions can be parsed", + CheckExpressions(), test::EvalSpec()) +{ + f2.add_all_cases(); + f2.each_case(f1); + EXPECT_TRUE(!f1.failed); + EXPECT_GREATER(f1.seen_cnt, 42u); +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/function_speed/.gitignore b/eval/src/tests/eval/function_speed/.gitignore new file mode 100644 index 00000000000..f9516af310f --- /dev/null +++ b/eval/src/tests/eval/function_speed/.gitignore @@ -0,0 +1 @@ +vespalib_function_speed_test_app diff --git a/eval/src/tests/eval/function_speed/CMakeLists.txt b/eval/src/tests/eval/function_speed/CMakeLists.txt new file mode 100644 index 00000000000..310de28cf43 --- /dev/null +++ b/eval/src/tests/eval/function_speed/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_function_speed_test_app TEST + SOURCES + function_speed_test.cpp + DEPENDS + vespalib + vespalib_vespalib_eval_llvm +) +vespa_add_test(NAME vespalib_function_speed_test_app NO_VALGRIND COMMAND vespalib_function_speed_test_app) diff --git a/eval/src/tests/eval/function_speed/function_speed_test.cpp b/eval/src/tests/eval/function_speed/function_speed_test.cpp new file mode 100644 index 00000000000..44e05f264dd --- /dev/null +++ b/eval/src/tests/eval/function_speed/function_speed_test.cpp @@ -0,0 +1,132 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/llvm/compiled_function.h> +#include <vespa/vespalib/util/benchmark_timer.h> +#include <vespa/vespalib/eval/interpreted_function.h> + +using namespace vespalib::eval; + +std::vector<vespalib::string> params_5({"p", "o", "q", "f", "w"}); + +double sum_sum = 0.0; + +const char *function_str = "(0.35*p + 0.15*o + 0.30*q + 0.20*f) * w"; +Function function_ast = Function::parse(params_5, function_str); +InterpretedFunction interpreted_function(SimpleTensorEngine::ref(), function_ast, NodeTypes()); +CompiledFunction compiled_function(function_ast, PassParams::SEPARATE); +auto jit_function = compiled_function.get_function<5>(); + +double gcc_function(double p, double o, double q, double f, double w) { + return (0.35*p + 0.15*o + 0.30*q + 0.20*f) * w; +} + +InterpretedFunction::Context icontext; + +double interpret_function(double p, double o, double q, double f, double w) { + icontext.clear_params(); + icontext.add_param(p); + icontext.add_param(o); + icontext.add_param(q); + icontext.add_param(f); + icontext.add_param(w); + return interpreted_function.eval(icontext).as_double(); +} + +//----------------------------------------------------------------------------- + +const char *big_function_str = "(0.35*p + 0.15*o + 0.30*q + 0.20*f) * w + " + "(0.35*p + 0.15*o + 0.30*q + 0.20*f) * w + " + "(0.35*p + 0.15*o + 0.30*q + 0.20*f) * w + " + "(0.35*p + 0.15*o + 0.30*q + 0.20*f) * w"; + +Function big_function_ast = Function::parse(params_5, big_function_str); +InterpretedFunction big_interpreted_function(SimpleTensorEngine::ref(), big_function_ast, NodeTypes()); +CompiledFunction big_compiled_function(big_function_ast, PassParams::SEPARATE); +auto big_jit_function = big_compiled_function.get_function<5>(); + +double big_gcc_function(double p, double o, double q, double f, double w) { + return (0.35*p + 0.15*o + 0.30*q + 0.20*f) * w + + (0.35*p + 0.15*o + 0.30*q + 0.20*f) * w + + (0.35*p + 0.15*o + 0.30*q + 0.20*f) * w + + (0.35*p + 0.15*o + 0.30*q + 0.20*f) * w; +} + +InterpretedFunction::Context big_icontext; + +double big_interpret_function(double p, double o, double q, double f, double w) { + big_icontext.clear_params(); + big_icontext.add_param(p); + big_icontext.add_param(o); + big_icontext.add_param(q); + big_icontext.add_param(f); + big_icontext.add_param(w); + return big_interpreted_function.eval(big_icontext).as_double(); +} + +//----------------------------------------------------------------------------- + +double measure_best(CompiledFunction::expand<5>::type function) { + double sum = 0.0; + vespalib::BenchmarkTimer timer(1.0); + while (timer.has_budget()) { + timer.before(); + for (int p = 0; p < 10; ++p) { + for (int o = 0; o < 10; ++o) { + for (int q = 0; q < 10; ++q) { + for (int f = 0; f < 10; ++f) { + for (int w = 0; w < 10; ++w) { + sum += function(p, o, q, f, w); + } + } + } + } + } + timer.after(); + } + return (timer.min_time() * 1000.0); +} + +//----------------------------------------------------------------------------- + +TEST("require that small functions return the same result") { + EXPECT_EQUAL(interpret_function(1,2,3,4,5), jit_function(1,2,3,4,5)); + EXPECT_EQUAL(interpret_function(1,2,3,4,5), gcc_function(1,2,3,4,5)); + EXPECT_EQUAL(interpret_function(5,4,3,2,1), jit_function(5,4,3,2,1)); + EXPECT_EQUAL(interpret_function(5,4,3,2,1), gcc_function(5,4,3,2,1)); +} + +TEST("require that big functions return the same result") { + EXPECT_EQUAL(big_interpret_function(1,2,3,4,5), big_jit_function(1,2,3,4,5)); + EXPECT_EQUAL(big_interpret_function(1,2,3,4,5), big_gcc_function(1,2,3,4,5)); + EXPECT_EQUAL(big_interpret_function(5,4,3,2,1), big_jit_function(5,4,3,2,1)); + EXPECT_EQUAL(big_interpret_function(5,4,3,2,1), big_gcc_function(5,4,3,2,1)); +} + +TEST("measure small function eval/jit/gcc speed") { + double interpret_time = measure_best(interpret_function); + double jit_time = measure_best(jit_function); + double gcc_time = measure_best(gcc_function); + double jit_vs_interpret_speed = (1.0/jit_time)/(1.0/interpret_time); + double gcc_vs_jit_speed = (1.0/gcc_time)/(1.0/jit_time); + fprintf(stderr, "interpret: %g ms\n", interpret_time); + fprintf(stderr, "jit compiled: %g ms\n", jit_time); + fprintf(stderr, "gcc compiled: %g ms\n", gcc_time); + fprintf(stderr, "jit speed compared to interpret: %g\n", jit_vs_interpret_speed); + fprintf(stderr, "gcc speed compared to jit: %g\n", gcc_vs_jit_speed); +} + +TEST("measure big function eval/jit/gcc speed") { + double interpret_time = measure_best(big_interpret_function); + double jit_time = measure_best(big_jit_function); + double gcc_time = measure_best(big_gcc_function); + double jit_vs_interpret_speed = (1.0/jit_time)/(1.0/interpret_time); + double gcc_vs_jit_speed = (1.0/gcc_time)/(1.0/jit_time); + fprintf(stderr, "interpret: %g ms\n", interpret_time); + fprintf(stderr, "jit compiled: %g ms\n", jit_time); + fprintf(stderr, "gcc compiled: %g ms\n", gcc_time); + fprintf(stderr, "jit speed compared to interpret: %g\n", jit_vs_interpret_speed); + fprintf(stderr, "gcc speed compared to jit: %g\n", gcc_vs_jit_speed); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/gbdt/.gitignore b/eval/src/tests/eval/gbdt/.gitignore new file mode 100644 index 00000000000..f2a7b65c2aa --- /dev/null +++ b/eval/src/tests/eval/gbdt/.gitignore @@ -0,0 +1,3 @@ +/gbdt_benchmark +vespalib_gbdt_test_app +vespalib_gbdt_benchmark_app diff --git a/eval/src/tests/eval/gbdt/CMakeLists.txt b/eval/src/tests/eval/gbdt/CMakeLists.txt new file mode 100644 index 00000000000..d6fc1c12e10 --- /dev/null +++ b/eval/src/tests/eval/gbdt/CMakeLists.txt @@ -0,0 +1,17 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_gbdt_test_app TEST + SOURCES + gbdt_test.cpp + DEPENDS + vespalib + vespalib_vespalib_eval_llvm +) +vespa_add_test(NAME vespalib_gbdt_test_app COMMAND vespalib_gbdt_test_app) +vespa_add_executable(vespalib_gbdt_benchmark_app + SOURCES + gbdt_benchmark.cpp + DEPENDS + vespalib + vespalib_vespalib_eval_llvm +) +vespa_add_test(NAME vespalib_gbdt_benchmark_app COMMAND vespalib_gbdt_benchmark_app BENCHMARK) diff --git a/eval/src/tests/eval/gbdt/gbdt_benchmark.cpp b/eval/src/tests/eval/gbdt/gbdt_benchmark.cpp new file mode 100644 index 00000000000..ee25b68eb96 --- /dev/null +++ b/eval/src/tests/eval/gbdt/gbdt_benchmark.cpp @@ -0,0 +1,277 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/gbdt.h> +#include <vespa/vespalib/eval/vm_forest.h> +#include <vespa/vespalib/eval/llvm/deinline_forest.h> +#include <vespa/vespalib/eval/llvm/compiled_function.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/util/benchmark_timer.h> +#include <vespa/vespalib/util/stringfmt.h> +#include "model.cpp" + +using namespace vespalib::eval; +using namespace vespalib::eval::nodes; +using namespace vespalib::eval::gbdt; + +//----------------------------------------------------------------------------- + +struct CompileStrategy { + virtual const char *name() const = 0; + virtual const char *code_name() const = 0; + virtual CompiledFunction compile(const Function &function) const = 0; + bool is_same(const CompileStrategy &rhs) const { + return (this == &rhs); + } + virtual ~CompileStrategy() {} +}; + +struct NullStrategy : CompileStrategy { + virtual const char *name() const { + return "none"; + } + virtual const char *code_name() const { + return "Optimize::none"; + } + virtual CompiledFunction compile(const Function &function) const { + return CompiledFunction(function, PassParams::ARRAY, Optimize::none); + } +}; +NullStrategy none; + +struct VMForestStrategy : CompileStrategy { + virtual const char *name() const { + return "vm-forest"; + } + virtual const char *code_name() const { + return "VMForest::optimize_chain"; + } + virtual CompiledFunction compile(const Function &function) const { + return CompiledFunction(function, PassParams::ARRAY, VMForest::optimize_chain); + } +}; +VMForestStrategy vm_forest; + +struct DeinlineForestStrategy : CompileStrategy { + virtual const char *name() const { + return "deinline-forest"; + } + virtual const char *code_name() const { + return "DeinlineForest::optimize_chain"; + } + virtual CompiledFunction compile(const Function &function) const { + return CompiledFunction(function, PassParams::ARRAY, DeinlineForest::optimize_chain); + } +}; +DeinlineForestStrategy deinline_forest; + +//----------------------------------------------------------------------------- + +struct Option { + size_t id; + const CompileStrategy &strategy; + bool is_same(const Option &rhs) const { return strategy.is_same(rhs.strategy); } + const char *name() const { return strategy.name(); } + CompiledFunction compile(const Function &function) const { return strategy.compile(function); } + const char *code_name() const { return strategy.code_name(); } +}; + +std::vector<Option> all_options({{0, none},{1, vm_forest}}); + +//----------------------------------------------------------------------------- + +struct Result { + double us; + size_t opt_idx; + bool operator<(const Result &rhs) { + return (us < rhs.us); + } +}; + +struct Segment { + double min; + Option option; + vespalib::string build() const { + return vespalib::make_string("{%g, %zu}", min, option.id); + } +}; + +struct Plan { + std::vector<Segment> segments; + void add(const Segment &seg) { + if (segments.empty()) { + segments.push_back(seg); + } else { + if (!segments.back().option.is_same(seg.option)) { + segments.push_back(seg); + } + } + } + vespalib::string build() const { + vespalib::string plan; + plan.append("{"); + for (size_t i = 0; i < segments.size(); ++i) { + if (i > 0) { + plan.append(", "); + } + plan += segments[i].build(); + } + plan.append("}"); + return plan; + } +}; + +//----------------------------------------------------------------------------- + +bool crop(const std::vector<Option> &options, const Option &opt, size_t &end) { + for (size_t i = 0; i < end; ++i) { + if (options[i].is_same(opt)) { + end = i; + return true; + } + } + return false; +} + +std::vector<Option> keep_contested(const std::vector<Option> &a, + const std::vector<Option> &b) +{ + size_t end = b.size(); + std::vector<Option> ret; + for (size_t i = 0; (i < a.size()) && (end > 0); ++i) { + if (crop(b, a[i], end)) { + ret.push_back(a[i]); + } + } + return ret; +} + +std::vector<Option> find_order(const ForestParams ¶ms, + const std::vector<Option> &options, + size_t num_trees) +{ + std::vector<Result> results; + Function forest = make_forest(params, num_trees); + for (size_t i = 0; i < options.size(); ++i) { + CompiledFunction compiled_function = options[i].compile(forest); + std::vector<double> inputs(compiled_function.num_params(), 0.5); + results.push_back({compiled_function.estimate_cost_us(inputs), i}); + fprintf(stderr, " %20s@%6zu: %16g us (inputs: %zu)\n", + options[i].name(), num_trees, results.back().us, + inputs.size()); + } + std::sort(results.begin(), results.end()); + std::vector<Option> ret; + for (auto result: results) { + ret.push_back(options[result.opt_idx]); + } + return ret; +} + +double expected_path(const ForestParams ¶ms, size_t num_trees) { + return ForestStats(extract_trees(make_forest(params, num_trees).root())).total_expected_path_length; +} + +void explore_segment(const ForestParams ¶ms, + const std::vector<Option> &min_order, + const std::vector<Option> &max_order, + size_t min_trees, size_t max_trees, + Plan &plan_out) +{ + assert(min_trees != max_trees); + std::vector<Option> options = keep_contested(min_order, max_order); + assert(!options.empty()); + if (options.size() == 1) { + plan_out.add(Segment{expected_path(params, min_trees), options[0]}); + } else { + if ((max_trees - min_trees) == 1) { + plan_out.add(Segment{expected_path(params, min_trees), min_order[0]}); + plan_out.add(Segment{expected_path(params, max_trees), max_order[0]}); + } else { + size_t num_trees = (min_trees + max_trees) / 2; + std::vector<Option> order = find_order(params, options, num_trees); + explore_segment(params, min_order, order, min_trees, num_trees, plan_out); + explore_segment(params, order, max_order, num_trees, max_trees, plan_out); + } + } +} + +Plan find_plan(const ForestParams ¶ms, std::initializer_list<size_t> limits) { + Plan plan; + auto num_trees = limits.begin(); + size_t min_trees = *num_trees++; + std::vector<Option> min_order = find_order(params, all_options, min_trees); + while (num_trees != limits.end()) { + size_t max_trees = *num_trees++; + std::vector<Option> max_order = find_order(params, all_options, max_trees); + explore_segment(params, min_order, max_order, min_trees, max_trees, plan); + std::swap(min_trees, max_trees); + std::swap(min_order, max_order); + } + return plan; +} + +//----------------------------------------------------------------------------- + +void dump_options(const std::vector<Option> &options) { + fprintf(stdout, "std::vector<Optimize::Chain> options({"); + for (size_t i = 0; i < options.size(); ++i) { + if (i > 0) { + fprintf(stdout, ", "); + } + fprintf(stdout, "%s", options[i].code_name()); + } + fprintf(stdout, "});\n"); + fflush(stdout); +} + +void dump_param_values(const char *name, const std::vector<size_t> &values) { + fprintf(stdout, "std::vector<size_t> %s({", name); + for (size_t i = 0; i < values.size(); ++i) { + if (i > 0) { + fprintf(stdout, ", "); + } + fprintf(stdout, "%zu", values[i]); + } + fprintf(stdout, "});\n"); + fflush(stdout); +} + +void dump_plan(const ForestParams ¶ms, const Plan &plan) { + fprintf(stdout, "{{%zu, %zu}, %s}", + params.less_percent, params.tree_size, + plan.build().c_str()); +} + +//----------------------------------------------------------------------------- + +TEST("find optimization plans") { + std::vector<size_t> less_percent_values({90, 100}); + std::vector<size_t> tree_size_values( + {2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 18, 20, 22, 24, 26, 28, 30, 32, + 36, 40, 44, 48, 52, 56, 60, 64, + 72, 80, 88, 96, 104, 112, 120, 128}); + + dump_options(all_options); + dump_param_values("less_percent_values", less_percent_values); + dump_param_values("tree_size_values", tree_size_values); + + size_t num_plans = 0; + fprintf(stdout, "std::map<Params,Plan> plan_repo({"); + for (size_t less_percent: less_percent_values) { + for (size_t tree_size: tree_size_values) { + ForestParams params(1234u, less_percent, tree_size); + fprintf(stdout, "%s\n", (num_plans++ == 0) ? "" : ","); + fflush(stdout); + fprintf(stdout, " "); + Plan plan = find_plan(params, {8, 512}); + dump_plan(params, plan); + } + } + fprintf(stdout, "});\n"); +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/gbdt/gbdt_test.cpp b/eval/src/tests/eval/gbdt/gbdt_test.cpp new file mode 100644 index 00000000000..195836d9827 --- /dev/null +++ b/eval/src/tests/eval/gbdt/gbdt_test.cpp @@ -0,0 +1,256 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/gbdt.h> +#include <vespa/vespalib/eval/vm_forest.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/llvm/deinline_forest.h> +#include <vespa/vespalib/eval/llvm/compiled_function.h> +#include <vespa/vespalib/eval/interpreted_function.h> +#include <vespa/vespalib/util/stringfmt.h> +#include "model.cpp" + +using namespace vespalib::eval; +using namespace vespalib::eval::nodes; +using namespace vespalib::eval::gbdt; + +//----------------------------------------------------------------------------- + +double eval_double(const Function &function, const std::vector<double> ¶ms) { + InterpretedFunction ifun(SimpleTensorEngine::ref(), function, NodeTypes()); + InterpretedFunction::Context ctx; + for (double param: params) { + ctx.add_param(param); + } + return ifun.eval(ctx).as_double(); +} + +//----------------------------------------------------------------------------- + +TEST("require that tree stats can be calculated") { + for (size_t tree_size = 2; tree_size < 64; ++tree_size) { + EXPECT_EQUAL(tree_size, TreeStats(Function::parse(Model().make_tree(tree_size)).root()).size); + } + + TreeStats stats1(Function::parse("if((a<1),1.0,if((b in [1,2,3]),if((c in 1),2.0,3.0),4.0))").root()); + EXPECT_EQUAL(4u, stats1.size); + EXPECT_EQUAL(1u, stats1.num_less_checks); + EXPECT_EQUAL(2u, stats1.num_in_checks); + EXPECT_EQUAL(3u, stats1.max_set_size); + + TreeStats stats2(Function::parse("if((d in 1),10.0,if((e<1),20.0,30.0))").root()); + EXPECT_EQUAL(3u, stats2.size); + EXPECT_EQUAL(1u, stats2.num_less_checks); + EXPECT_EQUAL(1u, stats2.num_in_checks); + EXPECT_EQUAL(1u, stats2.max_set_size); +} + +TEST("require that trees can be extracted from forest") { + for (size_t tree_size = 10; tree_size < 20; ++tree_size) { + for (size_t forest_size = 10; forest_size < 20; ++forest_size) { + vespalib::string expression = Model().make_forest(forest_size, tree_size); + Function function = Function::parse(expression); + std::vector<const Node *> trees = extract_trees(function.root()); + EXPECT_EQUAL(forest_size, trees.size()); + for (const Node *tree: trees) { + EXPECT_EQUAL(tree_size, TreeStats(*tree).size); + } + } + } +} + +TEST("require that forest stats can be calculated") { + Function function = Function::parse("if((a<1),1.0,if((b in [1,2,3]),if((c in 1),2.0,3.0),4.0))+" + "if((d in 1),10.0,if((e<1),20.0,30.0))+" + "if((d in 1),10.0,if((e<1),20.0,30.0))"); + std::vector<const Node *> trees = extract_trees(function.root()); + ForestStats stats(trees); + EXPECT_EQUAL(3u, stats.num_trees); + EXPECT_EQUAL(10u, stats.total_size); + ASSERT_EQUAL(2u, stats.tree_sizes.size()); + EXPECT_EQUAL(3u, stats.tree_sizes[0].size); + EXPECT_EQUAL(2u, stats.tree_sizes[0].count); + EXPECT_EQUAL(4u, stats.tree_sizes[1].size); + EXPECT_EQUAL(1u, stats.tree_sizes[1].count); + EXPECT_EQUAL(3u, stats.total_less_checks); + EXPECT_EQUAL(4u, stats.total_in_checks); + EXPECT_EQUAL(3u, stats.max_set_size); +} + +double expected_path(const vespalib::string &forest) { + return ForestStats(extract_trees(Function::parse(forest).root())).total_expected_path_length; +} + +TEST("require that expected path length is calculated correctly") { + EXPECT_EQUAL(0.0, expected_path("1")); + EXPECT_EQUAL(0.0, expected_path("if(1,2,3)")); + EXPECT_EQUAL(1.0, expected_path("if(a<1,2,3)")); + EXPECT_EQUAL(1.0, expected_path("if(b in [1,2,3],2,3)")); + EXPECT_EQUAL(2.0, expected_path("if(a<1,2,3)+if(a<1,2,3)")); + EXPECT_EQUAL(3.0, expected_path("if(a<1,2,3)+if(a<1,2,3)+if(a<1,2,3)")); + EXPECT_EQUAL(0.50*1.0 + 0.50*2.0, expected_path("if(a<1,1,if(a<1,2,3))")); + EXPECT_EQUAL(0.25*1.0 + 0.75*2.0, expected_path("if(a<1,1,if(a<1,2,3),0.25)")); + EXPECT_EQUAL(0.75*1.0 + 0.25*2.0, expected_path("if(a<1,1,if(a<1,2,3),0.75)")); +} + +double average_path(const vespalib::string &forest) { + return ForestStats(extract_trees(Function::parse(forest).root())).total_average_path_length; +} + +TEST("require that average path length is calculated correctly") { + EXPECT_EQUAL(0.0, average_path("1")); + EXPECT_EQUAL(0.0, average_path("if(1,2,3)")); + EXPECT_EQUAL(1.0, average_path("if(a<1,2,3)")); + EXPECT_EQUAL(1.0, average_path("if(b in [1,2,3],2,3)")); + EXPECT_EQUAL(2.0, average_path("if(a<1,2,3)+if(a<1,2,3)")); + EXPECT_EQUAL(3.0, average_path("if(a<1,2,3)+if(a<1,2,3)+if(a<1,2,3)")); + EXPECT_EQUAL(5.0/3.0, average_path("if(a<1,1,if(a<1,2,3))")); + EXPECT_EQUAL(5.0/3.0, average_path("if(a<1,1,if(a<1,2,3),0.25)")); + EXPECT_EQUAL(5.0/3.0, average_path("if(a<1,1,if(a<1,2,3),0.75)")); +} + +double count_tuned(const vespalib::string &forest) { + return ForestStats(extract_trees(Function::parse(forest).root())).total_tuned_checks; +} + +TEST("require that tuned checks are counted correctly") { + EXPECT_EQUAL(0.0, count_tuned("if(a<1,2,3)")); + EXPECT_EQUAL(0.0, count_tuned("if(a<1,2,3,0.5)")); // NB: no explicit tuned flag + EXPECT_EQUAL(1.0, count_tuned("if(a<1,2,3,0.3)")); + EXPECT_EQUAL(1.0, count_tuned("if(b in [1,2,3],2,3,0.8)")); + EXPECT_EQUAL(2.0, count_tuned("if(a<1,2,3,0.3)+if(a<1,2,3,0.8)")); + EXPECT_EQUAL(3.0, count_tuned("if(a<1,2,3,0.3)+if(a<1,2,3,0.4)+if(a<1,2,3,0.9)")); + EXPECT_EQUAL(1.0, count_tuned("if(a<1,1,if(a<1,2,3),0.25)")); + EXPECT_EQUAL(2.0, count_tuned("if(a<1,1,if(a<1,2,3,0.2),0.25)")); +} + +//----------------------------------------------------------------------------- + +struct DummyForest1 : public Forest { + size_t num_trees; + explicit DummyForest1(size_t num_trees_in) : num_trees(num_trees_in) {} + static double eval(const Forest *forest, const double *) { + const DummyForest1 &self = *((const DummyForest1 *)forest); + return double(self.num_trees * 2); + } + static Optimize::Result optimize(const ForestStats &stats, + const std::vector<const nodes::Node *> &trees) + { + if (stats.num_trees < 50) { + return Optimize::Result(); + } + return Optimize::Result(Forest::UP(new DummyForest1(trees.size())), eval); + } +}; + +struct DummyForest2 : public Forest { + size_t num_trees; + explicit DummyForest2(size_t num_trees_in) : num_trees(num_trees_in) {} + static double eval(const Forest *forest, const double *) { + const DummyForest1 &self = *((const DummyForest1 *)forest); + return double(self.num_trees); + } + static Optimize::Result optimize(const ForestStats &stats, + const std::vector<const nodes::Node *> &trees) + { + if (stats.num_trees < 25) { + return Optimize::Result(); + } + return Optimize::Result(Forest::UP(new DummyForest2(trees.size())), eval); + } +}; + +//----------------------------------------------------------------------------- + +TEST("require that trees can be optimized by a forest optimizer") { + Optimize::Chain chain({DummyForest1::optimize, DummyForest2::optimize}); + size_t tree_size = 20; + for (size_t forest_size = 10; forest_size <= 100; forest_size += 10) { + vespalib::string expression = Model().make_forest(forest_size, tree_size); + Function function = Function::parse(expression); + CompiledFunction compiled_function(function, PassParams::ARRAY, chain); + std::vector<double> inputs(function.num_params(), 0.5); + if (forest_size < 25) { + EXPECT_EQUAL(eval_double(function, inputs), compiled_function.get_function()(&inputs[0])); + } else if (forest_size < 50) { + EXPECT_EQUAL(double(forest_size), compiled_function.get_function()(&inputs[0])); + } else { + EXPECT_EQUAL(double(2 * forest_size), compiled_function.get_function()(&inputs[0])); + } + } +} + +//----------------------------------------------------------------------------- + +Optimize::Chain less_only_vm_chain({VMForest::less_only_optimize}); +Optimize::Chain general_vm_chain({VMForest::general_optimize}); + +TEST("require that less only VM tree optimizer works") { + Function function = Function::parse("if((a<1),1.0,if((b<1),if((c<1),2.0,3.0),4.0))+" + "if((d<1),10.0,if((e<1),if((f<1),20.0,30.0),40.0))"); + CompiledFunction compiled_function(function, PassParams::SEPARATE, less_only_vm_chain); + auto f = compiled_function.get_function<6>(); + EXPECT_EQUAL(11.0, f(0.5, 0.0, 0.0, 0.5, 0.0, 0.0)); + EXPECT_EQUAL(22.0, f(1.5, 0.5, 0.5, 1.5, 0.5, 0.5)); + EXPECT_EQUAL(33.0, f(1.5, 0.5, 1.5, 1.5, 0.5, 1.5)); + EXPECT_EQUAL(44.0, f(1.5, 1.5, 0.0, 1.5, 1.5, 0.0)); +} + +TEST("require that models with in checks are rejected by less only vm optimizer") { + Function function = Function::parse(Model().less_percent(100).make_forest(300, 30)); + auto trees = extract_trees(function.root()); + ForestStats stats(trees); + EXPECT_TRUE(Optimize::apply_chain(less_only_vm_chain, stats, trees).valid()); + stats.total_in_checks = 1; + EXPECT_TRUE(!Optimize::apply_chain(less_only_vm_chain, stats, trees).valid()); +} + +TEST("require that general VM tree optimizer works") { + Function function = Function::parse("if((a<1),1.0,if((b in [1,2,3]),if((c in 1),2.0,3.0),4.0))+" + "if((d in 1),10.0,if((e<1),if((f<1),20.0,30.0),40.0))"); + CompiledFunction compiled_function(function, PassParams::SEPARATE, general_vm_chain); + auto f = compiled_function.get_function<6>(); + EXPECT_EQUAL(11.0, f(0.5, 0.0, 0.0, 1.0, 0.0, 0.0)); + EXPECT_EQUAL(22.0, f(1.5, 2.0, 1.0, 2.0, 0.5, 0.5)); + EXPECT_EQUAL(33.0, f(1.5, 2.0, 2.0, 2.0, 0.5, 1.5)); + EXPECT_EQUAL(44.0, f(1.5, 5.0, 0.0, 2.0, 1.5, 0.0)); +} + +TEST("require that models with too large sets are rejected by general vm optimizer") { + Function function = Function::parse(Model().less_percent(80).make_forest(300, 30)); + auto trees = extract_trees(function.root()); + ForestStats stats(trees); + EXPECT_TRUE(stats.total_in_checks > 0); + EXPECT_TRUE(Optimize::apply_chain(general_vm_chain, stats, trees).valid()); + stats.max_set_size = 256; + EXPECT_TRUE(!Optimize::apply_chain(general_vm_chain, stats, trees).valid()); +} + +//----------------------------------------------------------------------------- + +TEST("require that forests evaluate to approximately the same for all evaluation options") { + for (size_t tree_size: std::vector<size_t>({20})) { + for (size_t num_trees: std::vector<size_t>({50})) { + for (size_t less_percent: std::vector<size_t>({100, 80})) { + vespalib::string expression = Model().less_percent(less_percent).make_forest(num_trees, tree_size); + Function function = Function::parse(expression); + CompiledFunction none(function, PassParams::ARRAY, Optimize::none); + CompiledFunction deinline(function, PassParams::ARRAY, DeinlineForest::optimize_chain); + CompiledFunction vm_forest(function, PassParams::ARRAY, VMForest::optimize_chain); + EXPECT_EQUAL(0u, none.get_forests().size()); + ASSERT_EQUAL(1u, deinline.get_forests().size()); + EXPECT_TRUE(dynamic_cast<DeinlineForest*>(deinline.get_forests()[0].get()) != nullptr); + ASSERT_EQUAL(1u, vm_forest.get_forests().size()); + EXPECT_TRUE(dynamic_cast<VMForest*>(vm_forest.get_forests()[0].get()) != nullptr); + std::vector<double> inputs(function.num_params(), 0.5); + double expected = eval_double(function, inputs); + EXPECT_APPROX(expected, none.get_function()(&inputs[0]), 1e-6); + EXPECT_APPROX(expected, deinline.get_function()(&inputs[0]), 1e-6); + EXPECT_APPROX(expected, vm_forest.get_function()(&inputs[0]), 1e-6); + } + } + } +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/gbdt/model.cpp b/eval/src/tests/eval/gbdt/model.cpp new file mode 100644 index 00000000000..e125d9e77d2 --- /dev/null +++ b/eval/src/tests/eval/gbdt/model.cpp @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <random> +#include <vespa/vespalib/eval/function.h> + +using vespalib::make_string; +using vespalib::eval::Function; + +//----------------------------------------------------------------------------- + +class Model +{ +private: + std::mt19937 _gen; + size_t _less_percent; + + size_t get_int(size_t min, size_t max) { + std::uniform_int_distribution<size_t> dist(min, max); + return dist(_gen); + } + + double get_real(double min, double max) { + std::uniform_real_distribution<double> dist(min, max); + return dist(_gen); + } + + std::string make_feature_name() { + size_t max_feature = 2; + while ((max_feature < 1024) && (get_int(0, 99) < 50)) { + max_feature *= 2; + } + return make_string("feature_%zu", get_int(1, max_feature)); + } + + std::string make_cond() { + if (get_int(1,100) > _less_percent) { + return make_string("(%s in [%g,%g,%g])", + make_feature_name().c_str(), + get_int(0, 4) / 4.0, + get_int(0, 4) / 4.0, + get_int(0, 4) / 4.0); + } else { + return make_string("(%s<%g)", + make_feature_name().c_str(), + get_real(0.0, 1.0)); + } + } + +public: + explicit Model(size_t seed = 5489u) : _gen(seed), _less_percent(80) {} + + Model &less_percent(size_t value) { + _less_percent = value; + return *this; + } + + std::string make_tree(size_t size) { + assert(size > 0); + if (size == 1) { + return make_string("%g", get_real(0.0, 1.0)); + } + size_t pivot = get_int(1, size - 1); + return make_string("if(%s,%s,%s)", + make_cond().c_str(), + make_tree(pivot).c_str(), + make_tree(size - pivot).c_str()); + } + + std::string make_forest(size_t num_trees, size_t tree_sizes) { + assert(num_trees > 0); + vespalib::string forest = make_tree(tree_sizes); + for (size_t i = 1; i < num_trees; ++i) { + forest.append("+"); + forest.append(make_tree(tree_sizes)); + } + return forest; + } +}; + +//----------------------------------------------------------------------------- + +struct ForestParams { + size_t model_seed; + size_t less_percent; + size_t tree_size; + ForestParams(size_t model_seed_in, size_t less_percent_in, size_t tree_size_in) + : model_seed(model_seed_in), less_percent(less_percent_in), tree_size(tree_size_in) {} +}; + +//----------------------------------------------------------------------------- + +Function make_forest(const ForestParams ¶ms, size_t num_trees) { + return Function::parse(Model(params.model_seed) + .less_percent(params.less_percent) + .make_forest(num_trees, params.tree_size)); +} + +//----------------------------------------------------------------------------- diff --git a/eval/src/tests/eval/interpreted_function/.gitignore b/eval/src/tests/eval/interpreted_function/.gitignore new file mode 100644 index 00000000000..0ac61ca2aa8 --- /dev/null +++ b/eval/src/tests/eval/interpreted_function/.gitignore @@ -0,0 +1 @@ +vespalib_interpreted_function_test_app diff --git a/eval/src/tests/eval/interpreted_function/CMakeLists.txt b/eval/src/tests/eval/interpreted_function/CMakeLists.txt new file mode 100644 index 00000000000..09d7ce7364b --- /dev/null +++ b/eval/src/tests/eval/interpreted_function/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_interpreted_function_test_app TEST + SOURCES + interpreted_function_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_interpreted_function_test_app COMMAND vespalib_interpreted_function_test_app) diff --git a/eval/src/tests/eval/interpreted_function/FILES b/eval/src/tests/eval/interpreted_function/FILES new file mode 100644 index 00000000000..e046bd3ff35 --- /dev/null +++ b/eval/src/tests/eval/interpreted_function/FILES @@ -0,0 +1 @@ +interpreted_function_test.cpp diff --git a/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp b/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp new file mode 100644 index 00000000000..71aaaf0ec42 --- /dev/null +++ b/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp @@ -0,0 +1,248 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/tensor_spec.h> +#include <vespa/vespalib/eval/interpreted_function.h> +#include <vespa/vespalib/eval/test/eval_spec.h> +#include <vespa/vespalib/eval/basic_nodes.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/stash.h> +#include <vespa/vespalib/test/insertion_operators.h> +#include <iostream> + +using namespace vespalib::eval; +using vespalib::Stash; + +//----------------------------------------------------------------------------- + +std::vector<vespalib::string> unsupported = { + "map(", + "join(", + "reduce(", + "rename(", + "tensor(", + "concat(" +}; + +bool is_unsupported(const vespalib::string &expression) { + if (expression == "reduce(a,sum)") { + return false; + } + for (const auto &prefix: unsupported) { + if (starts_with(expression, prefix)) { + return true; + } + } + return false; +} + +//----------------------------------------------------------------------------- + +struct MyEvalTest : test::EvalSpec::EvalTest { + size_t pass_cnt = 0; + size_t fail_cnt = 0; + bool print_pass = false; + bool print_fail = false; + virtual void next_expression(const std::vector<vespalib::string> ¶m_names, + const vespalib::string &expression) override + { + Function function = Function::parse(param_names, expression); + ASSERT_TRUE(!function.has_error()); + bool is_supported = !is_unsupported(expression); + bool has_issues = InterpretedFunction::detect_issues(function); + if (is_supported == has_issues) { + const char *supported_str = is_supported ? "supported" : "not supported"; + const char *issues_str = has_issues ? "has issues" : "does not have issues"; + print_fail && fprintf(stderr, "expression %s is %s, but %s\n", + expression.c_str(), supported_str, issues_str); + ++fail_cnt; + } + } + virtual void handle_case(const std::vector<vespalib::string> ¶m_names, + const std::vector<double> ¶m_values, + const vespalib::string &expression, + double expected_result) override + { + Function function = Function::parse(param_names, expression); + ASSERT_TRUE(!function.has_error()); + bool is_supported = !is_unsupported(expression); + bool has_issues = InterpretedFunction::detect_issues(function); + if (is_supported && !has_issues) { + InterpretedFunction ifun(SimpleTensorEngine::ref(), function, NodeTypes()); + ASSERT_EQUAL(ifun.num_params(), param_values.size()); + InterpretedFunction::Context ictx; + for (double param: param_values) { + ictx.add_param(param); + } + const Value &result_value = ifun.eval(ictx); + double result = result_value.as_double(); + if (result_value.is_double() && is_same(expected_result, result)) { + print_pass && fprintf(stderr, "verifying: %s -> %g ... PASS\n", + as_string(param_names, param_values, expression).c_str(), + expected_result); + ++pass_cnt; + } else { + print_fail && fprintf(stderr, "verifying: %s -> %g ... FAIL: got %g\n", + as_string(param_names, param_values, expression).c_str(), + expected_result, result); + ++fail_cnt; + } + } + } +}; + +TEST_FF("require that compiled evaluation passes all conformance tests", MyEvalTest(), test::EvalSpec()) { + f1.print_fail = true; + f2.add_all_cases(); + f2.each_case(f1); + EXPECT_GREATER(f1.pass_cnt, 1000u); + EXPECT_EQUAL(0u, f1.fail_cnt); +} + +//----------------------------------------------------------------------------- + +TEST("require that invalid function evaluates to a error") { + std::vector<vespalib::string> params({"x", "y", "z", "w"}); + Function function = Function::parse(params, "x & y"); + EXPECT_TRUE(function.has_error()); + InterpretedFunction ifun(SimpleTensorEngine::ref(), function, NodeTypes()); + InterpretedFunction::Context ctx; + ctx.add_param(1); + ctx.add_param(2); + ctx.add_param(3); + ctx.add_param(4); + const Value &result = ifun.eval(ctx); + EXPECT_TRUE(result.is_error()); + EXPECT_EQUAL(error_value, result.as_double()); +} + +//----------------------------------------------------------------------------- + +size_t count_ifs(const vespalib::string &expr, std::initializer_list<double> params_in) { + Function fun = Function::parse(expr); + InterpretedFunction ifun(SimpleTensorEngine::ref(), fun, NodeTypes()); + InterpretedFunction::Context ctx; + for (double param: params_in) { + ctx.add_param(param); + } + ifun.eval(ctx); + return ctx.if_cnt(); +} + +TEST("require that if_cnt in eval context is updated correctly") { + EXPECT_EQUAL(0u, count_ifs("1", {})); + EXPECT_EQUAL(1u, count_ifs("if(a<10,if(a<9,if(a<8,if(a<7,5,4),3),2),1)", {10})); + EXPECT_EQUAL(2u, count_ifs("if(a<10,if(a<9,if(a<8,if(a<7,5,4),3),2),1)", {9})); + EXPECT_EQUAL(3u, count_ifs("if(a<10,if(a<9,if(a<8,if(a<7,5,4),3),2),1)", {8})); + EXPECT_EQUAL(4u, count_ifs("if(a<10,if(a<9,if(a<8,if(a<7,5,4),3),2),1)", {7})); + EXPECT_EQUAL(4u, count_ifs("if(a<10,if(a<9,if(a<8,if(a<7,5,4),3),2),1)", {6})); +} + +//----------------------------------------------------------------------------- + +TEST("require that interpreted function instructions have expected size") { + EXPECT_EQUAL(sizeof(InterpretedFunction::Instruction), 16u); +} + +TEST("require that basic addition works") { + Function function = Function::parse("a+10"); + InterpretedFunction interpreted(SimpleTensorEngine::ref(), function, NodeTypes()); + InterpretedFunction::Context ctx; + ctx.add_param(20); + EXPECT_EQUAL(interpreted.eval(ctx).as_double(), 30.0); + ctx.clear_params(); + ctx.add_param(40); + EXPECT_EQUAL(interpreted.eval(ctx).as_double(), 50.0); +} + +//----------------------------------------------------------------------------- + +TEST("require that dot product like expression is not optimized for unknown types") { + const TensorEngine &engine = SimpleTensorEngine::ref(); + Function function = Function::parse("sum(a*b)"); + DoubleValue a(2.0); + DoubleValue b(3.0); + double expect = (2.0 * 3.0); + InterpretedFunction interpreted(engine, function, NodeTypes()); + EXPECT_EQUAL(4u, interpreted.program_size()); + InterpretedFunction::Context ctx; + ctx.add_param(a); + ctx.add_param(b); + const Value &result = interpreted.eval(ctx); + EXPECT_TRUE(result.is_double()); + EXPECT_EQUAL(expect, result.as_double()); +} + +TEST("require that dot product works with tensor function") { + const TensorEngine &engine = SimpleTensorEngine::ref(); + Function function = Function::parse("sum(a*b)"); + auto a = TensorSpec("tensor(x[3])") + .add({{"x", 0}}, 5.0) + .add({{"x", 1}}, 3.0) + .add({{"x", 2}}, 2.0); + auto b = TensorSpec("tensor(x[3])") + .add({{"x", 0}}, 7.0) + .add({{"x", 1}}, 11.0) + .add({{"x", 2}}, 13.0); + double expect = ((5.0 * 7.0) + (3.0 * 11.0) + (2.0 * 13.0)); + NodeTypes types(function, {ValueType::from_spec(a.type()), ValueType::from_spec(a.type())}); + InterpretedFunction interpreted(engine, function, types); + EXPECT_EQUAL(1u, interpreted.program_size()); + InterpretedFunction::Context ctx; + TensorValue va(engine.create(a)); + TensorValue vb(engine.create(b)); + ctx.add_param(va); + ctx.add_param(vb); + const Value &result = interpreted.eval(ctx); + EXPECT_TRUE(result.is_double()); + EXPECT_EQUAL(expect, result.as_double()); +} + +TEST("require that matrix multiplication works with tensor function") { + const TensorEngine &engine = SimpleTensorEngine::ref(); + Function function = Function::parse("sum(a*b,y)"); + auto a = TensorSpec("tensor(x[2],y[2])") + .add({{"x", 0},{"y", 0}}, 1.0) + .add({{"x", 0},{"y", 1}}, 2.0) + .add({{"x", 1},{"y", 0}}, 3.0) + .add({{"x", 1},{"y", 1}}, 5.0); + auto b = TensorSpec("tensor(y[2],z[2])") + .add({{"y", 0},{"z", 0}}, 7.0) + .add({{"y", 0},{"z", 1}}, 11.0) + .add({{"y", 1},{"z", 0}}, 13.0) + .add({{"y", 1},{"z", 1}}, 17.0); + auto expect = TensorSpec("tensor(x[2],z[2])") + .add({{"x", 0},{"z", 0}}, (1.0 * 7.0) + (2.0 * 13.0)) + .add({{"x", 0},{"z", 1}}, (1.0 * 11.0) + (2.0 * 17.0)) + .add({{"x", 1},{"z", 0}}, (3.0 * 7.0) + (5.0 * 13.0)) + .add({{"x", 1},{"z", 1}}, (3.0 * 11.0) + (5.0 * 17.0)); + NodeTypes types(function, {ValueType::from_spec(a.type()), ValueType::from_spec(a.type())}); + InterpretedFunction interpreted(engine, function, types); + EXPECT_EQUAL(1u, interpreted.program_size()); + InterpretedFunction::Context ctx; + TensorValue va(engine.create(a)); + TensorValue vb(engine.create(b)); + ctx.add_param(va); + ctx.add_param(vb); + const Value &result = interpreted.eval(ctx); + ASSERT_TRUE(result.is_tensor()); + EXPECT_EQUAL(expect, engine.to_spec(*result.as_tensor())); +} + +//----------------------------------------------------------------------------- + +TEST("require function issues can be detected") { + auto simple = Function::parse("a+b"); + auto complex = Function::parse("join(a,b,f(a,b)(a+b))"); + EXPECT_FALSE(simple.has_error()); + EXPECT_FALSE(complex.has_error()); + EXPECT_FALSE(InterpretedFunction::detect_issues(simple)); + EXPECT_TRUE(InterpretedFunction::detect_issues(complex)); + std::cerr << "Example function issues:" << std::endl + << InterpretedFunction::detect_issues(complex).list + << std::endl; +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/node_types/.gitignore b/eval/src/tests/eval/node_types/.gitignore new file mode 100644 index 00000000000..bd793bfefcf --- /dev/null +++ b/eval/src/tests/eval/node_types/.gitignore @@ -0,0 +1 @@ +vespalib_node_types_test_app diff --git a/eval/src/tests/eval/node_types/CMakeLists.txt b/eval/src/tests/eval/node_types/CMakeLists.txt new file mode 100644 index 00000000000..2471815fa51 --- /dev/null +++ b/eval/src/tests/eval/node_types/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_node_types_test_app TEST + SOURCES + node_types_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_node_types_test_app COMMAND vespalib_node_types_test_app) diff --git a/eval/src/tests/eval/node_types/node_types_test.cpp b/eval/src/tests/eval/node_types/node_types_test.cpp new file mode 100644 index 00000000000..5dd74e638d2 --- /dev/null +++ b/eval/src/tests/eval/node_types/node_types_test.cpp @@ -0,0 +1,311 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/value_type.h> +#include <vespa/vespalib/eval/value_type_spec.h> +#include <vespa/vespalib/eval/node_types.h> + +using namespace vespalib::eval; + +/** + * Hack to avoid parse-conflict between tensor type expressions and + * lambda-generated tensors. This will patch leading identifier 'T' to + * 't' directly in the input stream after we have concluded that this + * is not a lambda-generated tensor in order to parse it out as a + * valid tensor type. This may be reverted later if we add support for + * parser rollback when we fail to parse a lambda-generated tensor. + **/ +void tensor_type_hack(const char *pos_in, const char *end_in) { + if ((pos_in < end_in) && (*pos_in == 'T')) { + const_cast<char *>(pos_in)[0] = 't'; + } +} + +struct TypeSpecExtractor : public vespalib::eval::SymbolExtractor { + void extract_symbol(const char *pos_in, const char *end_in, + const char *&pos_out, vespalib::string &symbol_out) const override + { + tensor_type_hack(pos_in, end_in); + ValueType type = value_type::parse_spec(pos_in, end_in, pos_out); + if (pos_out != nullptr) { + symbol_out = type.to_spec(); + } + } +}; + +void verify(const vespalib::string &type_expr_in, const vespalib::string &type_spec, bool replace = true) { + vespalib::string type_expr = type_expr_in; + if (replace) { + // replace 'tensor' with 'Tensor' in type expression, see hack above + for (size_t idx = type_expr.find("tensor"); + idx != type_expr.npos; + idx = type_expr.find("tensor")) + { + type_expr[idx] = 'T'; + } + } + Function function = Function::parse(type_expr, TypeSpecExtractor()); + if (!EXPECT_TRUE(!function.has_error())) { + fprintf(stderr, "parse error: %s\n", function.get_error().c_str()); + return; + } + std::vector<ValueType> input_types; + for (size_t i = 0; i < function.num_params(); ++i) { + input_types.push_back(ValueType::from_spec(function.param_name(i))); + } + NodeTypes types(function, input_types); + ValueType expected_type = ValueType::from_spec(type_spec); + ValueType actual_type = types.get_type(function.root()); + EXPECT_EQUAL(expected_type, actual_type); +} + +TEST("require that error nodes have error type") { + Function function = Function::parse("1 2 3 4 5", TypeSpecExtractor()); + EXPECT_TRUE(function.has_error()); + NodeTypes types(function, std::vector<ValueType>()); + ValueType expected_type = ValueType::from_spec("error"); + ValueType actual_type = types.get_type(function.root()); + EXPECT_EQUAL(expected_type, actual_type); +} + +TEST("require that leaf constants have appropriate type") { + TEST_DO(verify("123", "double")); + TEST_DO(verify("\"string values are hashed\"", "double")); +} + +TEST("require that input parameters preserve their type") { + TEST_DO(verify("any", "any")); + TEST_DO(verify("error", "error")); + TEST_DO(verify("double", "double")); + TEST_DO(verify("tensor", "tensor")); + TEST_DO(verify("tensor(x{},y[10],z[])", "tensor(x{},y[10],z[])")); +} + +TEST("require that arrays are double (size) unless they contain an error") { + TEST_DO(verify("[1,2,3]", "double")); + TEST_DO(verify("[any,tensor,double]", "double")); + TEST_DO(verify("[1,error,3]", "error")); +} + +TEST("require that if resolves to the appropriate type") { + TEST_DO(verify("if(error,1,2)", "error")); + TEST_DO(verify("if(1,error,2)", "error")); + TEST_DO(verify("if(1,2,error)", "error")); + TEST_DO(verify("if(any,1,2)", "double")); + TEST_DO(verify("if(double,1,2)", "double")); + TEST_DO(verify("if(tensor,1,2)", "double")); + TEST_DO(verify("if(double,tensor,tensor)", "tensor")); + TEST_DO(verify("if(double,any,any)", "any")); + TEST_DO(verify("if(double,tensor(a{}),tensor(a{}))", "tensor(a{})")); + TEST_DO(verify("if(double,tensor(a{}),tensor(b{}))", "tensor")); + TEST_DO(verify("if(double,tensor(a{}),tensor)", "tensor")); + TEST_DO(verify("if(double,tensor,tensor(a{}))", "tensor")); + TEST_DO(verify("if(double,tensor,any)", "any")); + TEST_DO(verify("if(double,any,tensor)", "any")); + TEST_DO(verify("if(double,tensor,double)", "any")); + TEST_DO(verify("if(double,double,tensor)", "any")); + TEST_DO(verify("if(double,double,any)", "any")); + TEST_DO(verify("if(double,any,double)", "any")); +} + +TEST("require that let expressions propagate type correctly") { + TEST_DO(verify("let(a,10,a)", "double")); + TEST_DO(verify("let(a,double,a)", "double")); + TEST_DO(verify("let(a,any,a)", "any")); + TEST_DO(verify("let(a,error,a)", "error")); + TEST_DO(verify("let(a,tensor,let(b,double,a))", "tensor")); + TEST_DO(verify("let(a,tensor,let(b,double,b))", "double")); + TEST_DO(verify("let(a,tensor,let(b,a,b))", "tensor")); +} + +TEST("require that set membership resolves to double unless error") { + TEST_DO(verify("1 in [1,2,3]", "double")); + TEST_DO(verify("1 in [tensor,tensor,tensor]", "double")); + TEST_DO(verify("1 in tensor", "double")); + TEST_DO(verify("tensor in 1", "double")); + TEST_DO(verify("tensor in [1,2,any]", "double")); + TEST_DO(verify("any in [1,tensor,any]", "double")); + TEST_DO(verify("error in [1,tensor,any]", "error")); + TEST_DO(verify("any in [tensor,error,any]", "error")); +} + +TEST("require that sum resolves correct type") { + TEST_DO(verify("sum(error)", "error")); + TEST_DO(verify("sum(tensor)", "double")); + TEST_DO(verify("sum(double)", "double")); + TEST_DO(verify("sum(any)", "any")); +} + +TEST("require that dimension sum resolves correct type") { + TEST_DO(verify("sum(error,x)", "error")); + TEST_DO(verify("sum(tensor,x)", "any")); + TEST_DO(verify("sum(any,x)", "any")); + TEST_DO(verify("sum(double,x)", "error")); + TEST_DO(verify("sum(tensor(x{},y{},z{}),y)", "tensor(x{},z{})")); + TEST_DO(verify("sum(tensor(x{},y{},z{}),w)", "error")); + TEST_DO(verify("sum(tensor(x{}),x)", "double")); +} + +TEST("require that reduce resolves correct type") { + TEST_DO(verify("reduce(error,sum)", "error")); + TEST_DO(verify("reduce(tensor,sum)", "double")); + TEST_DO(verify("reduce(tensor(x{}),sum)", "double")); + TEST_DO(verify("reduce(double,sum)", "double")); + TEST_DO(verify("reduce(any,sum)", "any")); + TEST_DO(verify("reduce(error,sum,x)", "error")); + TEST_DO(verify("reduce(tensor,sum,x)", "any")); + TEST_DO(verify("reduce(any,sum,x)", "any")); + TEST_DO(verify("reduce(double,sum,x)", "error")); + TEST_DO(verify("reduce(tensor(x{},y{},z{}),sum,y)", "tensor(x{},z{})")); + TEST_DO(verify("reduce(tensor(x{},y{},z{}),sum,x,z)", "tensor(y{})")); + TEST_DO(verify("reduce(tensor(x{},y{},z{}),sum,y,z,x)", "double")); + TEST_DO(verify("reduce(tensor(x{},y{},z{}),sum,w)", "error")); + TEST_DO(verify("reduce(tensor(x{}),sum,x)", "double")); +} + +TEST("require that rename resolves correct type") { + TEST_DO(verify("rename(error,x,y)", "error")); + TEST_DO(verify("rename(tensor,x,y)", "any")); + TEST_DO(verify("rename(double,x,y)", "error")); + TEST_DO(verify("rename(any,x,y)", "any")); + TEST_DO(verify("rename(tensor(x{},y[],z[5]),a,b)", "error")); + TEST_DO(verify("rename(tensor(x{},y[],z[5]),x,y)", "error")); + TEST_DO(verify("rename(tensor(x{},y[],z[5]),x,x)", "tensor(x{},y[],z[5])")); + TEST_DO(verify("rename(tensor(x{},y[],z[5]),x,w)", "tensor(w{},y[],z[5])")); + TEST_DO(verify("rename(tensor(x{},y[],z[5]),y,w)", "tensor(x{},w[],z[5])")); + TEST_DO(verify("rename(tensor(x{},y[],z[5]),z,w)", "tensor(x{},y[],w[5])")); + TEST_DO(verify("rename(tensor(x{},y[],z[5]),(x,y,z),(z,y,x))", "tensor(z{},y[],x[5])")); + TEST_DO(verify("rename(tensor(x{},y[],z[5]),(x,z),(z,x))", "tensor(z{},y[],x[5])")); + TEST_DO(verify("rename(tensor(x{},y[],z[5]),(x,y,z),(a,b,c))", "tensor(a{},b[],c[5])")); +} + +vespalib::string strfmt(const char *pattern, const char *a) { + return vespalib::make_string(pattern, a); +} + +vespalib::string strfmt(const char *pattern, const char *a, const char *b) { + return vespalib::make_string(pattern, a, b); +} + +void verify_op1(const char *pattern) { + TEST_DO(verify(strfmt(pattern, "error"), "error")); + TEST_DO(verify(strfmt(pattern, "any"), "any")); + TEST_DO(verify(strfmt(pattern, "double"), "double")); + TEST_DO(verify(strfmt(pattern, "tensor"), "tensor")); + TEST_DO(verify(strfmt(pattern, "tensor(x{},y[10],z[])"), "tensor(x{},y[10],z[])")); +} + +void verify_op2(const char *pattern) { + TEST_DO(verify(strfmt(pattern, "error", "error"), "error")); + TEST_DO(verify(strfmt(pattern, "any", "error"), "error")); + TEST_DO(verify(strfmt(pattern, "error", "any"), "error")); + TEST_DO(verify(strfmt(pattern, "double", "error"), "error")); + TEST_DO(verify(strfmt(pattern, "error", "double"), "error")); + TEST_DO(verify(strfmt(pattern, "tensor", "error"), "error")); + TEST_DO(verify(strfmt(pattern, "error", "tensor"), "error")); + TEST_DO(verify(strfmt(pattern, "any", "any"), "any")); + TEST_DO(verify(strfmt(pattern, "any", "double"), "any")); + TEST_DO(verify(strfmt(pattern, "double", "any"), "any")); + TEST_DO(verify(strfmt(pattern, "any", "tensor"), "any")); + TEST_DO(verify(strfmt(pattern, "tensor", "any"), "any")); + TEST_DO(verify(strfmt(pattern, "double", "double"), "double")); + TEST_DO(verify(strfmt(pattern, "tensor", "double"), "tensor")); + TEST_DO(verify(strfmt(pattern, "double", "tensor"), "tensor")); + TEST_DO(verify(strfmt(pattern, "tensor(x{})", "double"), "tensor(x{})")); + TEST_DO(verify(strfmt(pattern, "double", "tensor(x{})"), "tensor(x{})")); + TEST_DO(verify(strfmt(pattern, "tensor", "tensor"), "any")); + TEST_DO(verify(strfmt(pattern, "tensor(x{})", "tensor(x{})"), "tensor(x{})")); + TEST_DO(verify(strfmt(pattern, "tensor(x{})", "tensor(y{})"), "tensor(x{},y{})")); + TEST_DO(verify(strfmt(pattern, "tensor(x[3])", "tensor(x[5])"), "tensor(x[3])")); + TEST_DO(verify(strfmt(pattern, "tensor(x[])", "tensor(x[5])"), "tensor(x[])")); + TEST_DO(verify(strfmt(pattern, "tensor(x[5])", "tensor(x[3])"), "tensor(x[3])")); + TEST_DO(verify(strfmt(pattern, "tensor(x[5])", "tensor(x[])"), "tensor(x[])")); + TEST_DO(verify(strfmt(pattern, "tensor(x{})", "tensor(x[5])"), "error")); +} + +TEST("require that various operations resolve appropriate type") { + TEST_DO(verify_op1("-%s")); // Neg + TEST_DO(verify_op1("!%s")); // Not + TEST_DO(verify_op2("%s+%s")); // Add + TEST_DO(verify_op2("%s-%s")); // Sub + TEST_DO(verify_op2("%s*%s")); // Mul + TEST_DO(verify_op2("%s/%s")); // Div + TEST_DO(verify_op2("%s^%s")); // Pow + TEST_DO(verify_op2("%s==%s")); // Equal + TEST_DO(verify_op2("%s!=%s")); // NotEqual + TEST_DO(verify_op2("%s~=%s")); // Approx + TEST_DO(verify_op2("%s<%s")); // Less + TEST_DO(verify_op2("%s<=%s")); // LessEqual + TEST_DO(verify_op2("%s>%s")); // Greater + TEST_DO(verify_op2("%s>=%s")); // GreaterEqual + TEST_DO(verify_op2("%s&&%s")); // And + TEST_DO(verify_op2("%s||%s")); // Or + TEST_DO(verify_op1("cos(%s)")); // Cos + TEST_DO(verify_op1("sin(%s)")); // Sin + TEST_DO(verify_op1("tan(%s)")); // Tan + TEST_DO(verify_op1("cosh(%s)")); // Cosh + TEST_DO(verify_op1("sinh(%s)")); // Sinh + TEST_DO(verify_op1("tanh(%s)")); // Tanh + TEST_DO(verify_op1("acos(%s)")); // Acos + TEST_DO(verify_op1("asin(%s)")); // Asin + TEST_DO(verify_op1("atan(%s)")); // Atan + TEST_DO(verify_op1("exp(%s)")); // Exp + TEST_DO(verify_op1("log10(%s)")); // Log10 + TEST_DO(verify_op1("log(%s)")); // Log + TEST_DO(verify_op1("sqrt(%s)")); // Sqrt + TEST_DO(verify_op1("ceil(%s)")); // Ceil + TEST_DO(verify_op1("fabs(%s)")); // Fabs + TEST_DO(verify_op1("floor(%s)")); // Floor + TEST_DO(verify_op2("atan2(%s,%s)")); // Atan2 + TEST_DO(verify_op2("ldexp(%s,%s)")); // Ldexp + TEST_DO(verify_op2("pow(%s,%s)")); // Pow2 + TEST_DO(verify_op2("fmod(%s,%s)")); // Fmod + TEST_DO(verify_op2("min(%s,%s)")); // min + TEST_DO(verify_op2("max(%s,%s)")); // max + TEST_DO(verify_op1("isNan(%s)")); // IsNan + TEST_DO(verify_op1("relu(%s)")); // Relu + TEST_DO(verify_op1("sigmoid(%s)")); // Sigmoid +} + +TEST("require that map resolves correct type") { + TEST_DO(verify_op1("map(%s,f(x)(sin(x)))")); +} + +TEST("require that join resolves correct type") { + TEST_DO(verify_op2("join(%s,%s,f(x,y)(x+y))")); +} + +TEST("require that lambda tensor resolves correct type") { + TEST_DO(verify("tensor(x[5])(1.0)", "tensor(x[5])", false)); + TEST_DO(verify("tensor(x[5],y[10])(1.0)", "tensor(x[5],y[10])", false)); + TEST_DO(verify("tensor(x[5],y[10],z[15])(1.0)", "tensor(x[5],y[10],z[15])", false)); +} + +TEST("require that tensor concat resolves correct type") { + TEST_DO(verify("concat(double,double,x)", "tensor(x[2])")); + TEST_DO(verify("concat(tensor(x[2]),tensor(x[3]),x)", "tensor(x[5])")); + TEST_DO(verify("concat(tensor(x[2]),tensor(x[3]),y)", "tensor(x[2],y[2])")); + TEST_DO(verify("concat(tensor(x[2]),tensor(x{}),x)", "error")); + TEST_DO(verify("concat(tensor(x[2]),tensor(y{}),x)", "tensor(x[3],y{})")); +} + +TEST("require that double only expressions can be detected") { + Function plain_fun = Function::parse("1+2"); + Function complex_fun = Function::parse("sum(a)"); + NodeTypes plain_types(plain_fun, {}); + NodeTypes complex_types(complex_fun, {ValueType::tensor_type({})}); + EXPECT_TRUE(plain_types.get_type(plain_fun.root()).is_double()); + EXPECT_TRUE(complex_types.get_type(complex_fun.root()).is_double()); + EXPECT_TRUE(plain_types.all_types_are_double()); + EXPECT_FALSE(complex_types.all_types_are_double()); +} + +TEST("require that empty type repo works as expected") { + NodeTypes types; + Function function = Function::parse("1+2"); + EXPECT_FALSE(function.has_error()); + EXPECT_TRUE(types.get_type(function.root()).is_any()); + EXPECT_FALSE(types.all_types_are_double()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/simple_tensor/.gitignore b/eval/src/tests/eval/simple_tensor/.gitignore new file mode 100644 index 00000000000..f371f5c6c6d --- /dev/null +++ b/eval/src/tests/eval/simple_tensor/.gitignore @@ -0,0 +1 @@ +vespalib_simple_tensor_test_app diff --git a/eval/src/tests/eval/simple_tensor/CMakeLists.txt b/eval/src/tests/eval/simple_tensor/CMakeLists.txt new file mode 100644 index 00000000000..cbd65296abc --- /dev/null +++ b/eval/src/tests/eval/simple_tensor/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_simple_tensor_test_app TEST + SOURCES + simple_tensor_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_simple_tensor_test_app COMMAND vespalib_simple_tensor_test_app) diff --git a/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp b/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp new file mode 100644 index 00000000000..36cb9f773c1 --- /dev/null +++ b/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp @@ -0,0 +1,166 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/simple_tensor.h> +#include <vespa/vespalib/eval/simple_tensor_engine.h> +#include <vespa/vespalib/eval/operation.h> +#include <vespa/vespalib/util/stash.h> +#include <iostream> + +using namespace vespalib::eval; + +using Cell = SimpleTensor::Cell; +using Cells = SimpleTensor::Cells; +using Address = SimpleTensor::Address; +using Stash = vespalib::Stash; + +// need to specify numbers explicitly as size_t to avoid ambiguous behavior for 0 +constexpr size_t operator "" _z (unsigned long long int n) { return n; } + +const Tensor &unwrap(const Value &value) { + ASSERT_TRUE(value.is_tensor()); + return *value.as_tensor(); +} + +struct CellBuilder { + Cells cells; + CellBuilder &add(const Address &addr, double value) { + cells.emplace_back(addr, value); + return *this; + } + Cells build() { return cells; } +}; + +TEST("require that simple tensors can be built using tensor spec") { + TensorSpec spec("tensor(w{},x[2],y{},z[2])"); + spec.add({{"w", "xxx"}, {"x", 0}, {"y", "xxx"}, {"z", 0}}, 1.0) + .add({{"w", "xxx"}, {"x", 0}, {"y", "yyy"}, {"z", 1}}, 2.0) + .add({{"w", "yyy"}, {"x", 1}, {"y", "xxx"}, {"z", 0}}, 3.0) + .add({{"w", "yyy"}, {"x", 1}, {"y", "yyy"}, {"z", 1}}, 4.0); + auto tensor = SimpleTensorEngine::ref().create(spec); + TensorSpec full_spec("tensor(w{},x[2],y{},z[2])"); + full_spec + .add({{"w", "xxx"}, {"x", 0}, {"y", "xxx"}, {"z", 0}}, 1.0) + .add({{"w", "xxx"}, {"x", 0}, {"y", "xxx"}, {"z", 1}}, 0.0) + .add({{"w", "xxx"}, {"x", 0}, {"y", "yyy"}, {"z", 0}}, 0.0) + .add({{"w", "xxx"}, {"x", 0}, {"y", "yyy"}, {"z", 1}}, 2.0) + .add({{"w", "xxx"}, {"x", 1}, {"y", "xxx"}, {"z", 0}}, 0.0) + .add({{"w", "xxx"}, {"x", 1}, {"y", "xxx"}, {"z", 1}}, 0.0) + .add({{"w", "xxx"}, {"x", 1}, {"y", "yyy"}, {"z", 0}}, 0.0) + .add({{"w", "xxx"}, {"x", 1}, {"y", "yyy"}, {"z", 1}}, 0.0) + .add({{"w", "yyy"}, {"x", 0}, {"y", "xxx"}, {"z", 0}}, 0.0) + .add({{"w", "yyy"}, {"x", 0}, {"y", "xxx"}, {"z", 1}}, 0.0) + .add({{"w", "yyy"}, {"x", 0}, {"y", "yyy"}, {"z", 0}}, 0.0) + .add({{"w", "yyy"}, {"x", 0}, {"y", "yyy"}, {"z", 1}}, 0.0) + .add({{"w", "yyy"}, {"x", 1}, {"y", "xxx"}, {"z", 0}}, 3.0) + .add({{"w", "yyy"}, {"x", 1}, {"y", "xxx"}, {"z", 1}}, 0.0) + .add({{"w", "yyy"}, {"x", 1}, {"y", "yyy"}, {"z", 0}}, 0.0) + .add({{"w", "yyy"}, {"x", 1}, {"y", "yyy"}, {"z", 1}}, 4.0); + auto full_tensor = SimpleTensorEngine::ref().create(full_spec); + SimpleTensor expect_tensor(ValueType::from_spec("tensor(w{},x[2],y{},z[2])"), + CellBuilder() + .add({{"xxx"}, {0_z}, {"xxx"}, {0_z}}, 1.0) + .add({{"xxx"}, {0_z}, {"xxx"}, {1_z}}, 0.0) + .add({{"xxx"}, {0_z}, {"yyy"}, {0_z}}, 0.0) + .add({{"xxx"}, {0_z}, {"yyy"}, {1_z}}, 2.0) + .add({{"xxx"}, {1_z}, {"xxx"}, {0_z}}, 0.0) + .add({{"xxx"}, {1_z}, {"xxx"}, {1_z}}, 0.0) + .add({{"xxx"}, {1_z}, {"yyy"}, {0_z}}, 0.0) + .add({{"xxx"}, {1_z}, {"yyy"}, {1_z}}, 0.0) + .add({{"yyy"}, {0_z}, {"xxx"}, {0_z}}, 0.0) + .add({{"yyy"}, {0_z}, {"xxx"}, {1_z}}, 0.0) + .add({{"yyy"}, {0_z}, {"yyy"}, {0_z}}, 0.0) + .add({{"yyy"}, {0_z}, {"yyy"}, {1_z}}, 0.0) + .add({{"yyy"}, {1_z}, {"xxx"}, {0_z}}, 3.0) + .add({{"yyy"}, {1_z}, {"xxx"}, {1_z}}, 0.0) + .add({{"yyy"}, {1_z}, {"yyy"}, {0_z}}, 0.0) + .add({{"yyy"}, {1_z}, {"yyy"}, {1_z}}, 4.0) + .build()); + EXPECT_EQUAL(expect_tensor, *tensor); + EXPECT_EQUAL(expect_tensor, *full_tensor); + EXPECT_EQUAL(full_spec, tensor->engine().to_spec(*tensor)); +}; + +TEST("require that simple tensors can have their values negated") { + auto tensor = SimpleTensor::create( + TensorSpec("tensor(x{},y{})") + .add({{"x","1"},{"y","1"}}, 1) + .add({{"x","2"},{"y","1"}}, -3) + .add({{"x","1"},{"y","2"}}, 5)); + auto expect = SimpleTensor::create( + TensorSpec("tensor(x{},y{})") + .add({{"x","1"},{"y","1"}}, -1) + .add({{"x","2"},{"y","1"}}, 3) + .add({{"x","1"},{"y","2"}}, -5)); + auto result = SimpleTensor::map(operation::Neg(), *tensor); + EXPECT_EQUAL(*expect, *result); + Stash stash; + const Value &result2 = SimpleTensorEngine::ref().map(operation::Neg(), *tensor, stash); + EXPECT_EQUAL(*expect, unwrap(result2)); +} + +TEST("require that simple tensors can be multiplied with each other") { + auto lhs = SimpleTensor::create( + TensorSpec("tensor(x{},y{})") + .add({{"x","1"},{"y","1"}}, 1) + .add({{"x","2"},{"y","1"}}, 3) + .add({{"x","1"},{"y","2"}}, 5)); + auto rhs = SimpleTensor::create( + TensorSpec("tensor(y{},z{})") + .add({{"y","1"},{"z","1"}}, 7) + .add({{"y","2"},{"z","1"}}, 11) + .add({{"y","1"},{"z","2"}}, 13)); + auto expect = SimpleTensor::create( + TensorSpec("tensor(x{},y{},z{})") + .add({{"x","1"},{"y","1"},{"z","1"}}, 7) + .add({{"x","1"},{"y","1"},{"z","2"}}, 13) + .add({{"x","2"},{"y","1"},{"z","1"}}, 21) + .add({{"x","2"},{"y","1"},{"z","2"}}, 39) + .add({{"x","1"},{"y","2"},{"z","1"}}, 55)); + auto result = SimpleTensor::join(operation::Mul(), *lhs, *rhs); + EXPECT_EQUAL(*expect, *result); + Stash stash; + const Value &result2 = SimpleTensorEngine::ref().apply(operation::Mul(), *lhs, *rhs, stash); + EXPECT_EQUAL(*expect, unwrap(result2)); +} + +TEST("require that simple tensors support dimension reduction") { + auto tensor = SimpleTensor::create( + TensorSpec("tensor(x[3],y[2])") + .add({{"x",0},{"y",0}}, 1) + .add({{"x",1},{"y",0}}, 2) + .add({{"x",2},{"y",0}}, 3) + .add({{"x",0},{"y",1}}, 4) + .add({{"x",1},{"y",1}}, 5) + .add({{"x",2},{"y",1}}, 6)); + auto expect_sum_y = SimpleTensor::create( + TensorSpec("tensor(x[3])") + .add({{"x",0}}, 5) + .add({{"x",1}}, 7) + .add({{"x",2}}, 9)); + auto expect_sum_x = SimpleTensor::create( + TensorSpec("tensor(y[2])") + .add({{"y",0}}, 6) + .add({{"y",1}}, 15)); + auto expect_sum_all = SimpleTensor::create(TensorSpec("double").add({}, 21)); + auto result_sum_y = tensor->reduce(operation::Add(), {"y"}); + auto result_sum_x = tensor->reduce(operation::Add(), {"x"}); + auto result_sum_all = tensor->reduce(operation::Add(), {"x", "y"}); + EXPECT_EQUAL(*expect_sum_y, *result_sum_y); + EXPECT_EQUAL(*expect_sum_x, *result_sum_x); + EXPECT_EQUAL(*expect_sum_all, *result_sum_all); + Stash stash; + const Value &result_sum_y_2 = SimpleTensorEngine::ref().reduce(*tensor, operation::Add(), {"y"}, stash); + const Value &result_sum_x_2 = SimpleTensorEngine::ref().reduce(*tensor, operation::Add(), {"x"}, stash); + const Value &result_sum_all_2 = SimpleTensorEngine::ref().reduce(*tensor, operation::Add(), {"x", "y"}, stash); + const Value &result_sum_all_3 = SimpleTensorEngine::ref().reduce(*tensor, operation::Add(), {}, stash); + EXPECT_EQUAL(*expect_sum_y, unwrap(result_sum_y_2)); + EXPECT_EQUAL(*expect_sum_x, unwrap(result_sum_x_2)); + EXPECT_TRUE(result_sum_all_2.is_double()); + EXPECT_TRUE(result_sum_all_3.is_double()); + EXPECT_EQUAL(21, result_sum_all_2.as_double()); + EXPECT_EQUAL(21, result_sum_all_3.as_double()); + EXPECT_EQUAL(*result_sum_y, *result_sum_y); + EXPECT_NOT_EQUAL(*result_sum_y, *result_sum_x); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/tensor_function/.gitignore b/eval/src/tests/eval/tensor_function/.gitignore new file mode 100644 index 00000000000..016f8b918ff --- /dev/null +++ b/eval/src/tests/eval/tensor_function/.gitignore @@ -0,0 +1 @@ +vespalib_eval_tensor_function_test_app diff --git a/eval/src/tests/eval/tensor_function/CMakeLists.txt b/eval/src/tests/eval/tensor_function/CMakeLists.txt new file mode 100644 index 00000000000..cabe37160ac --- /dev/null +++ b/eval/src/tests/eval/tensor_function/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_eval_tensor_function_test_app TEST + SOURCES + tensor_function_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_eval_tensor_function_test_app COMMAND vespalib_eval_tensor_function_test_app) diff --git a/eval/src/tests/eval/tensor_function/tensor_function_test.cpp b/eval/src/tests/eval/tensor_function/tensor_function_test.cpp new file mode 100644 index 00000000000..50aee92a17b --- /dev/null +++ b/eval/src/tests/eval/tensor_function/tensor_function_test.cpp @@ -0,0 +1,164 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/operation.h> +#include <vespa/vespalib/eval/simple_tensor.h> +#include <vespa/vespalib/eval/simple_tensor_engine.h> +#include <vespa/vespalib/eval/tensor_function.h> +#include <vespa/vespalib/eval/value_type.h> +#include <vespa/vespalib/util/stash.h> +#include <map> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::tensor_function; + +struct EvalCtx : TensorFunction::Input { + const TensorEngine &engine; + Stash stash; + operation::Neg neg; + ErrorValue error; + std::map<size_t, Value::UP> tensors; + EvalCtx(const TensorEngine &engine_in) + : engine(engine_in), stash(), neg(), error(), tensors() {} + void add_tensor(std::unique_ptr<Tensor> tensor, size_t id) { + tensors.emplace(id, std::make_unique<TensorValue>(std::move(tensor))); + } + const Value &get_tensor(size_t id) const override { + if (tensors.count(id) == 0) { + return error; + } + return *tensors.find(id)->second; + } + const UnaryOperation &get_map_operation(size_t id) const override { + ASSERT_EQUAL(42u, id); + return neg; + } + const Value &eval(const TensorFunction &fun) { return fun.eval(*this, stash); } + const ValueType type(const Tensor &tensor) const { return engine.type_of(tensor); } + TensorFunction::UP compile(tensor_function::Node_UP expr) const { + return engine.compile(std::move(expr)); + } + std::unique_ptr<Tensor> make_tensor_inject() { + return engine.create( + TensorSpec("tensor(x[2],y[2])") + .add({{"x", 0}, {"y", 0}}, 1.0) + .add({{"x", 0}, {"y", 1}}, 2.0) + .add({{"x", 1}, {"y", 0}}, 3.0) + .add({{"x", 1}, {"y", 1}}, 4.0)); + } + std::unique_ptr<Tensor> make_tensor_reduce_input() { + return engine.create( + TensorSpec("tensor(x[3],y[2])") + .add({{"x",0},{"y",0}}, 1) + .add({{"x",1},{"y",0}}, 2) + .add({{"x",2},{"y",0}}, 3) + .add({{"x",0},{"y",1}}, 4) + .add({{"x",1},{"y",1}}, 5) + .add({{"x",2},{"y",1}}, 6)); + } + std::unique_ptr<Tensor> make_tensor_reduce_y_output() { + return engine.create( + TensorSpec("tensor(x[3])") + .add({{"x",0}}, 5) + .add({{"x",1}}, 7) + .add({{"x",2}}, 9)); + } + std::unique_ptr<Tensor> make_tensor_map_input() { + return engine.create( + TensorSpec("tensor(x{},y{})") + .add({{"x","1"},{"y","1"}}, 1) + .add({{"x","2"},{"y","1"}}, -3) + .add({{"x","1"},{"y","2"}}, 5)); + } + std::unique_ptr<Tensor> make_tensor_map_output() { + return engine.create( + TensorSpec("tensor(x{},y{})") + .add({{"x","1"},{"y","1"}}, -1) + .add({{"x","2"},{"y","1"}}, 3) + .add({{"x","1"},{"y","2"}}, -5)); + } + std::unique_ptr<Tensor> make_tensor_apply_lhs() { + return engine.create( + TensorSpec("tensor(x{},y{})") + .add({{"x","1"},{"y","1"}}, 1) + .add({{"x","2"},{"y","1"}}, 3) + .add({{"x","1"},{"y","2"}}, 5)); + } + std::unique_ptr<Tensor> make_tensor_apply_rhs() { + return engine.create( + TensorSpec("tensor(y{},z{})") + .add({{"y","1"},{"z","1"}}, 7) + .add({{"y","2"},{"z","1"}}, 11) + .add({{"y","1"},{"z","2"}}, 13)); + } + std::unique_ptr<Tensor> make_tensor_apply_output() { + return engine.create( + TensorSpec("tensor(x{},y{},z{})") + .add({{"x","1"},{"y","1"},{"z","1"}}, 7) + .add({{"x","1"},{"y","1"},{"z","2"}}, 13) + .add({{"x","2"},{"y","1"},{"z","1"}}, 21) + .add({{"x","2"},{"y","1"},{"z","2"}}, 39) + .add({{"x","1"},{"y","2"},{"z","1"}}, 55)); + } +}; + +void verify_equal(const Tensor &expect, const Value &value) { + const Tensor *tensor = value.as_tensor(); + ASSERT_TRUE(tensor != nullptr); + ASSERT_EQUAL(&expect.engine(), &tensor->engine()); + EXPECT_TRUE(expect.engine().equal(expect, *tensor)); +} + +TEST("require that tensor injection works") { + EvalCtx ctx(SimpleTensorEngine::ref()); + ctx.add_tensor(ctx.make_tensor_inject(), 1); + auto expect = ctx.make_tensor_inject(); + auto fun = inject(ValueType::from_spec("tensor(x[2],y[2])"), 1); + EXPECT_EQUAL(ctx.type(*expect), fun->result_type); + auto prog = ctx.compile(std::move(fun)); + TEST_DO(verify_equal(*expect, ctx.eval(*prog))); +} + +TEST("require that partial tensor reduction works") { + EvalCtx ctx(SimpleTensorEngine::ref()); + ctx.add_tensor(ctx.make_tensor_reduce_input(), 1); + auto expect = ctx.make_tensor_reduce_y_output(); + auto fun = reduce(inject(ValueType::from_spec("tensor(x[3],y[2])"), 1), operation::Add(), {"y"}); + EXPECT_EQUAL(ctx.type(*expect), fun->result_type); + auto prog = ctx.compile(std::move(fun)); + TEST_DO(verify_equal(*expect, ctx.eval(*prog))); +} + +TEST("require that full tensor reduction works") { + EvalCtx ctx(SimpleTensorEngine::ref()); + ctx.add_tensor(ctx.make_tensor_reduce_input(), 1); + auto fun = reduce(inject(ValueType::from_spec("tensor(x[3],y[2])"), 1), operation::Add(), {}); + EXPECT_EQUAL(ValueType::from_spec("double"), fun->result_type); + auto prog = ctx.compile(std::move(fun)); + EXPECT_EQUAL(21.0, ctx.eval(*prog).as_double()); +} + +TEST("require that tensor map works") { + EvalCtx ctx(SimpleTensorEngine::ref()); + ctx.add_tensor(ctx.make_tensor_map_input(), 1); + auto expect = ctx.make_tensor_map_output(); + auto fun = map(42, inject(ValueType::from_spec("tensor(x{},y{})"), 1)); + EXPECT_EQUAL(ctx.type(*expect), fun->result_type); + auto prog = ctx.compile(std::move(fun)); + TEST_DO(verify_equal(*expect, ctx.eval(*prog))); +} + +TEST("require that tensor apply works") { + EvalCtx ctx(SimpleTensorEngine::ref()); + ctx.add_tensor(ctx.make_tensor_apply_lhs(), 1); + ctx.add_tensor(ctx.make_tensor_apply_rhs(), 2); + auto expect = ctx.make_tensor_apply_output(); + auto fun = apply(operation::Mul(), + inject(ValueType::from_spec("tensor(x{},y{})"), 1), + inject(ValueType::from_spec("tensor(y{},z{})"), 2)); + EXPECT_EQUAL(ctx.type(*expect), fun->result_type); + auto prog = ctx.compile(std::move(fun)); + TEST_DO(verify_equal(*expect, ctx.eval(*prog))); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/value_cache/.gitignore b/eval/src/tests/eval/value_cache/.gitignore new file mode 100644 index 00000000000..a2ea8716d0c --- /dev/null +++ b/eval/src/tests/eval/value_cache/.gitignore @@ -0,0 +1,2 @@ +/vespalib_value_cache_test_app +/vespalib_tensor_loader_test_app diff --git a/eval/src/tests/eval/value_cache/CMakeLists.txt b/eval/src/tests/eval/value_cache/CMakeLists.txt new file mode 100644 index 00000000000..6a752ae6b60 --- /dev/null +++ b/eval/src/tests/eval/value_cache/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_value_cache_test_app TEST + SOURCES + value_cache_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_value_cache_test_app COMMAND vespalib_value_cache_test_app) +vespa_add_executable(vespalib_tensor_loader_test_app TEST + SOURCES + tensor_loader_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_tensor_loader_test_app COMMAND vespalib_tensor_loader_test_app) diff --git a/eval/src/tests/eval/value_cache/dense.json b/eval/src/tests/eval/value_cache/dense.json new file mode 100644 index 00000000000..2263053f01f --- /dev/null +++ b/eval/src/tests/eval/value_cache/dense.json @@ -0,0 +1,8 @@ +{ + "dimensions": ["x","y"], + "cells": [ + { "address": { "x": "0", "y": "0" }, "value": 1.0 }, + { "address": { "x": "0", "y": "1" }, "value": 2.0 }, + { "address": { "x": "1", "y": "0" }, "value": 3.0 }, + { "address": { "x": "1", "y": "1" }, "value": 4.0 }] +} diff --git a/eval/src/tests/eval/value_cache/invalid.json b/eval/src/tests/eval/value_cache/invalid.json new file mode 100644 index 00000000000..c232189106a --- /dev/null +++ b/eval/src/tests/eval/value_cache/invalid.json @@ -0,0 +1 @@ +this file does not contain valid json diff --git a/eval/src/tests/eval/value_cache/mixed.json b/eval/src/tests/eval/value_cache/mixed.json new file mode 100644 index 00000000000..74c840d83b8 --- /dev/null +++ b/eval/src/tests/eval/value_cache/mixed.json @@ -0,0 +1,6 @@ +{ + "dimensions": ["x","y"], + "cells": [ + { "address": { "x": "foo", "y": "0" }, "value": 1.0 }, + { "address": { "x": "foo", "y": "1" }, "value": 2.0 }] +} diff --git a/eval/src/tests/eval/value_cache/sparse.json b/eval/src/tests/eval/value_cache/sparse.json new file mode 100644 index 00000000000..a80e7906286 --- /dev/null +++ b/eval/src/tests/eval/value_cache/sparse.json @@ -0,0 +1,6 @@ +{ + "dimensions": ["x","y"], + "cells": [ + { "address": { "x": "foo", "y": "bar" }, "value": 1.0 }, + { "address": { "x": "bar", "y": "foo" }, "value": 2.0 }] +} diff --git a/eval/src/tests/eval/value_cache/tensor_loader_test.cpp b/eval/src/tests/eval/value_cache/tensor_loader_test.cpp new file mode 100644 index 00000000000..8725eab597b --- /dev/null +++ b/eval/src/tests/eval/value_cache/tensor_loader_test.cpp @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/value_cache/constant_tensor_loader.h> +#include <vespa/vespalib/eval/simple_tensor_engine.h> +#include <vespa/vespalib/eval/tensor_spec.h> + +using namespace vespalib::eval; + +std::unique_ptr<Tensor> dense_tensor_nocells() { + return SimpleTensorEngine::ref() + .create(TensorSpec("tensor(x[2],y[2])")); +} + +std::unique_ptr<Tensor> make_nodim_tensor() { + return SimpleTensorEngine::ref() + .create(TensorSpec("double")); +} + +std::unique_ptr<Tensor> make_dense_tensor() { + return SimpleTensorEngine::ref() + .create(TensorSpec("tensor(x[2],y[2])") + .add({{"x", 0}, {"y", 0}}, 1.0) + .add({{"x", 0}, {"y", 1}}, 2.0) + .add({{"x", 1}, {"y", 0}}, 3.0) + .add({{"x", 1}, {"y", 1}}, 4.0)); +} + +std::unique_ptr<Tensor> make_sparse_tensor() { + return SimpleTensorEngine::ref() + .create(TensorSpec("tensor(x{},y{})") + .add({{"x", "foo"}, {"y", "bar"}}, 1.0) + .add({{"x", "bar"}, {"y", "foo"}}, 2.0)); +} + +std::unique_ptr<Tensor> make_mixed_tensor() { + return SimpleTensorEngine::ref() + .create(TensorSpec("tensor(x{},y[2])") + .add({{"x", "foo"}, {"y", 0}}, 1.0) + .add({{"x", "foo"}, {"y", 1}}, 2.0)); +} + +void verify_tensor(std::unique_ptr<Tensor> expect, ConstantValue::UP actual) { + const auto &engine = expect->engine(); + ASSERT_EQUAL(engine.type_of(*expect), actual->type()); + EXPECT_TRUE(&engine == &actual->value().as_tensor()->engine()); + EXPECT_TRUE(engine.equal(*expect, *actual->value().as_tensor())); +} + +TEST_F("require that invalid types loads an empty double", ConstantTensorLoader(SimpleTensorEngine::ref())) { + TEST_DO(verify_tensor(make_nodim_tensor(), f1.create(TEST_PATH("dense.json"), "invalid type spec"))); +} + +TEST_F("require that invalid file name loads an empty tensor", ConstantTensorLoader(SimpleTensorEngine::ref())) { + TEST_DO(verify_tensor(dense_tensor_nocells(), f1.create(TEST_PATH("missing_file.json"), "tensor(x[2],y[2])"))); +} + +TEST_F("require that invalid json loads an empty tensor", ConstantTensorLoader(SimpleTensorEngine::ref())) { + TEST_DO(verify_tensor(dense_tensor_nocells(), f1.create(TEST_PATH("invalid.json"), "tensor(x[2],y[2])"))); +} + +TEST_F("require that dense tensors can be loaded", ConstantTensorLoader(SimpleTensorEngine::ref())) { + TEST_DO(verify_tensor(make_dense_tensor(), f1.create(TEST_PATH("dense.json"), "tensor(x[2],y[2])"))); +} + +TEST_F("require that sparse tensors can be loaded", ConstantTensorLoader(SimpleTensorEngine::ref())) { + TEST_DO(verify_tensor(make_sparse_tensor(), f1.create(TEST_PATH("sparse.json"), "tensor(x{},y{})"))); +} + +TEST_F("require that mixed tensors can be loaded", ConstantTensorLoader(SimpleTensorEngine::ref())) { + TEST_DO(verify_tensor(make_mixed_tensor(), f1.create(TEST_PATH("mixed.json"), "tensor(x{},y[2])"))); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/value_cache/value_cache_test.cpp b/eval/src/tests/eval/value_cache/value_cache_test.cpp new file mode 100644 index 00000000000..ff991382a3a --- /dev/null +++ b/eval/src/tests/eval/value_cache/value_cache_test.cpp @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/value_cache/constant_value_cache.h> +#include <vespa/vespalib/eval/value_cache/constant_value.h> +#include <vespa/vespalib/eval/value.h> +#include <vespa/vespalib/eval/value_type.h> + +using namespace vespalib::eval; + +struct MyValue : ConstantValue { + DoubleValue my_value; + ValueType my_type; + MyValue(double val) : my_value(val), my_type(ValueType::double_type()) {} + const ValueType &type() const override { return my_type; } + const Value &value() const override { return my_value; } +}; + +struct MyFactory : ConstantValueFactory { + mutable size_t create_cnt = 0; + ConstantValue::UP create(const vespalib::string &path, const vespalib::string &) const override { + ++create_cnt; + return std::make_unique<MyValue>(double(atoi(path.c_str()))); + } +}; + +TEST_FF("require that values can be created", MyFactory(), ConstantValueCache(f1)) { + ConstantValue::UP res = f2.create("1", "type"); + EXPECT_TRUE(res->type().is_double()); + EXPECT_EQUAL(1.0, res->value().as_double()); + EXPECT_EQUAL(2.0, f2.create("2", "type")->value().as_double()); + EXPECT_EQUAL(3.0, f2.create("3", "type")->value().as_double()); + EXPECT_EQUAL(3, f1.create_cnt); +} + +TEST_FF("require that underlying values can be shared", MyFactory(), ConstantValueCache(f1)) { + auto res1 = f2.create("1", "type"); + auto res2 = f2.create("2", "type"); + auto res3 = f2.create("2", "type"); + auto res4 = f2.create("2", "type"); + EXPECT_EQUAL(1.0, res1->value().as_double()); + EXPECT_EQUAL(2.0, res2->value().as_double()); + EXPECT_EQUAL(2.0, res2->value().as_double()); + EXPECT_EQUAL(2.0, res2->value().as_double()); + EXPECT_EQUAL(2, f1.create_cnt); +} + +TEST_FF("require that unused values are evicted", MyFactory(), ConstantValueCache(f1)) { + EXPECT_EQUAL(1.0, f2.create("1", "type")->value().as_double()); + EXPECT_EQUAL(2.0, f2.create("2", "type")->value().as_double()); + EXPECT_EQUAL(2.0, f2.create("2", "type")->value().as_double()); + EXPECT_EQUAL(2.0, f2.create("2", "type")->value().as_double()); + EXPECT_EQUAL(4, f1.create_cnt); +} + +TEST_FF("require that type spec is part of cache key", MyFactory(), ConstantValueCache(f1)) { + auto res1 = f2.create("1", "type"); + auto res2 = f2.create("2", "type_a"); + auto res3 = f2.create("2", "type_b"); + auto res4 = f2.create("2", "type_b"); + EXPECT_EQUAL(1.0, res1->value().as_double()); + EXPECT_EQUAL(2.0, res2->value().as_double()); + EXPECT_EQUAL(2.0, res2->value().as_double()); + EXPECT_EQUAL(2.0, res2->value().as_double()); + EXPECT_EQUAL(3, f1.create_cnt); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/value_type/.gitignore b/eval/src/tests/eval/value_type/.gitignore new file mode 100644 index 00000000000..e6842b91e98 --- /dev/null +++ b/eval/src/tests/eval/value_type/.gitignore @@ -0,0 +1 @@ +vespalib_value_type_test_app diff --git a/eval/src/tests/eval/value_type/CMakeLists.txt b/eval/src/tests/eval/value_type/CMakeLists.txt new file mode 100644 index 00000000000..c6ef9f61359 --- /dev/null +++ b/eval/src/tests/eval/value_type/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_value_type_test_app TEST + SOURCES + value_type_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_value_type_test_app NO_VALGRIND COMMAND vespalib_value_type_test_app) diff --git a/eval/src/tests/eval/value_type/value_type_test.cpp b/eval/src/tests/eval/value_type/value_type_test.cpp new file mode 100644 index 00000000000..1a1f1ae6cca --- /dev/null +++ b/eval/src/tests/eval/value_type/value_type_test.cpp @@ -0,0 +1,432 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/value_type.h> +#include <vespa/vespalib/eval/value_type_spec.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/test/insertion_operators.h> +#include <ostream> + +using namespace vespalib::eval; + +const size_t npos = ValueType::Dimension::npos; + +TEST("require that ANY value type can be created") { + ValueType t = ValueType::any_type(); + EXPECT_TRUE(t.type() == ValueType::Type::ANY); + EXPECT_EQUAL(t.dimensions().size(), 0u); +} + +TEST("require that ERROR value type can be created") { + ValueType t = ValueType::error_type(); + EXPECT_TRUE(t.type() == ValueType::Type::ERROR); + EXPECT_EQUAL(t.dimensions().size(), 0u); +} + +TEST("require that DOUBLE value type can be created") { + ValueType t = ValueType::double_type(); + EXPECT_TRUE(t.type() == ValueType::Type::DOUBLE); + EXPECT_EQUAL(t.dimensions().size(), 0u); +} + +TEST("require that TENSOR value type can be created") { + ValueType t = ValueType::tensor_type({{"x", 10},{"y"}}); + EXPECT_TRUE(t.type() == ValueType::Type::TENSOR); + ASSERT_EQUAL(t.dimensions().size(), 2u); + EXPECT_EQUAL(t.dimensions()[0].name, "x"); + EXPECT_EQUAL(t.dimensions()[0].size, 10u); + EXPECT_EQUAL(t.dimensions()[1].name, "y"); + EXPECT_EQUAL(t.dimensions()[1].size, npos); +} + +TEST("require that TENSOR value type sorts dimensions") { + ValueType t = ValueType::tensor_type({{"x", 10}, {"z", 30}, {"y"}}); + EXPECT_TRUE(t.type() == ValueType::Type::TENSOR); + ASSERT_EQUAL(t.dimensions().size(), 3u); + EXPECT_EQUAL(t.dimensions()[0].name, "x"); + EXPECT_EQUAL(t.dimensions()[0].size, 10u); + EXPECT_EQUAL(t.dimensions()[1].name, "y"); + EXPECT_EQUAL(t.dimensions()[1].size, npos); + EXPECT_EQUAL(t.dimensions()[2].name, "z"); + EXPECT_EQUAL(t.dimensions()[2].size, 30u); +} + +TEST("require that dimension names can be obtained") { + EXPECT_EQUAL(ValueType::double_type().dimension_names(), + std::vector<vespalib::string>({})); + EXPECT_EQUAL(ValueType::tensor_type({{"y", 10}, {"x", 30}}).dimension_names(), + std::vector<vespalib::string>({"x", "y"})); + EXPECT_EQUAL(ValueType::tensor_type({{"y", 10}, {"x", 30}, {"z"}}).dimension_names(), + std::vector<vespalib::string>({"x", "y", "z"})); +} + +TEST("require that dimension index can be obtained") { + EXPECT_EQUAL(ValueType::error_type().dimension_index("x"), ValueType::Dimension::npos); + EXPECT_EQUAL(ValueType::any_type().dimension_index("x"), ValueType::Dimension::npos); + EXPECT_EQUAL(ValueType::double_type().dimension_index("x"), ValueType::Dimension::npos); + EXPECT_EQUAL(ValueType::tensor_type({}).dimension_index("x"), ValueType::Dimension::npos); + auto my_type = ValueType::tensor_type({{"y", 10}, {"x"}, {"z", 0}}); + EXPECT_EQUAL(my_type.dimension_index("x"), 0); + EXPECT_EQUAL(my_type.dimension_index("y"), 1); + EXPECT_EQUAL(my_type.dimension_index("z"), 2); + EXPECT_EQUAL(my_type.dimension_index("w"), ValueType::Dimension::npos); +} + +void verify_equal(const ValueType &a, const ValueType &b) { + EXPECT_TRUE(a == b); + EXPECT_TRUE(b == a); + EXPECT_FALSE(a != b); + EXPECT_FALSE(b != a); +} + +void verify_not_equal(const ValueType &a, const ValueType &b) { + EXPECT_TRUE(a != b); + EXPECT_TRUE(b != a); + EXPECT_FALSE(a == b); + EXPECT_FALSE(b == a); +} + +TEST("require that value types can be compared") { + TEST_DO(verify_equal(ValueType::error_type(), ValueType::error_type())); + TEST_DO(verify_not_equal(ValueType::error_type(), ValueType::any_type())); + TEST_DO(verify_not_equal(ValueType::error_type(), ValueType::double_type())); + TEST_DO(verify_not_equal(ValueType::error_type(), ValueType::tensor_type({}))); + TEST_DO(verify_equal(ValueType::any_type(), ValueType::any_type())); + TEST_DO(verify_not_equal(ValueType::any_type(), ValueType::double_type())); + TEST_DO(verify_not_equal(ValueType::any_type(), ValueType::tensor_type({}))); + TEST_DO(verify_equal(ValueType::double_type(), ValueType::double_type())); + TEST_DO(verify_not_equal(ValueType::double_type(), ValueType::tensor_type({}))); + TEST_DO(verify_equal(ValueType::tensor_type({{"x"}, {"y"}}), ValueType::tensor_type({{"y"}, {"x"}}))); + TEST_DO(verify_not_equal(ValueType::tensor_type({{"x"}, {"y"}}), ValueType::tensor_type({{"x"}, {"y"}, {"z"}}))); + TEST_DO(verify_equal(ValueType::tensor_type({{"x", 10}, {"y", 20}}), ValueType::tensor_type({{"y", 20}, {"x", 10}}))); + TEST_DO(verify_not_equal(ValueType::tensor_type({{"x", 10}, {"y", 20}}), ValueType::tensor_type({{"x", 10}, {"y", 10}}))); + TEST_DO(verify_not_equal(ValueType::tensor_type({{"x", 10}}), ValueType::tensor_type({{"x"}}))); +} + +void verify_predicates(const ValueType &type, + bool expect_any, bool expect_error, bool expect_double, bool expect_tensor, + bool expect_maybe_tensor, bool expect_abstract, bool expect_unknown_dimensions) +{ + EXPECT_EQUAL(type.is_any(), expect_any); + EXPECT_EQUAL(type.is_error(), expect_error); + EXPECT_EQUAL(type.is_double(), expect_double); + EXPECT_EQUAL(type.is_tensor(), expect_tensor); + EXPECT_EQUAL(type.maybe_tensor(), expect_maybe_tensor); + EXPECT_EQUAL(type.is_abstract(), expect_abstract); + EXPECT_EQUAL(type.unknown_dimensions(), expect_unknown_dimensions); +} + +TEST("require that type-related predicate functions work as expected") { + TEST_DO(verify_predicates(ValueType::any_type(), + true, false, false, false, + true, true, true)); + TEST_DO(verify_predicates(ValueType::error_type(), + false, true, false, false, + false, false, false)); + TEST_DO(verify_predicates(ValueType::double_type(), + false, false, true, false, + false, false, false)); + TEST_DO(verify_predicates(ValueType::tensor_type({}), + false, false, false, true, + true, true, true)); + TEST_DO(verify_predicates(ValueType::tensor_type({{"x"}}), + false, false, false, true, + true, false, false)); + TEST_DO(verify_predicates(ValueType::tensor_type({{"x", 0}}), + false, false, false, true, + true, true, false)); +} + +TEST("require that dimension predicates work as expected") { + ValueType type = ValueType::tensor_type({{"x"}, {"y", 10}, {"z", 0}}); + ASSERT_EQUAL(3u, type.dimensions().size()); + EXPECT_TRUE(type.dimensions()[0].is_mapped()); + EXPECT_TRUE(!type.dimensions()[0].is_indexed()); + EXPECT_TRUE(!type.dimensions()[0].is_bound()); + EXPECT_TRUE(!type.dimensions()[1].is_mapped()); + EXPECT_TRUE(type.dimensions()[1].is_indexed()); + EXPECT_TRUE(type.dimensions()[1].is_bound()); + EXPECT_TRUE(!type.dimensions()[2].is_mapped()); + EXPECT_TRUE(type.dimensions()[2].is_indexed()); + EXPECT_TRUE(!type.dimensions()[2].is_bound()); +} + +TEST("require that duplicate dimension names result in error types") { + EXPECT_TRUE(ValueType::tensor_type({{"x"}, {"x"}}).is_error()); +} + +TEST("require that removing dimensions from non-abstract non-tensor types gives error type") { + EXPECT_TRUE(ValueType::error_type().reduce({"x"}).is_error()); + EXPECT_TRUE(ValueType::double_type().reduce({"x"}).is_error()); +} + +TEST("require that removing dimensions from abstract maybe-tensor types gives any type") { + EXPECT_TRUE(ValueType::any_type().reduce({"x"}).is_any()); + EXPECT_TRUE(ValueType::tensor_type({}).reduce({"x"}).is_any()); +} + +TEST("require that dimensions can be removed from tensor value types") { + ValueType type = ValueType::tensor_type({{"x", 10}, {"y", 20}, {"z", 30}}); + EXPECT_EQUAL(ValueType::tensor_type({{"y", 20}, {"z", 30}}), type.reduce({"x"})); + EXPECT_EQUAL(ValueType::tensor_type({{"x", 10}, {"z", 30}}), type.reduce({"y"})); + EXPECT_EQUAL(ValueType::tensor_type({{"x", 10}, {"y", 20}}), type.reduce({"z"})); + EXPECT_EQUAL(ValueType::tensor_type({{"y", 20}}), type.reduce({"x", "z"})); + EXPECT_EQUAL(ValueType::tensor_type({{"y", 20}}), type.reduce({"z", "x"})); +} + +TEST("require that removing an empty set of dimensions means removing them all") { + EXPECT_EQUAL(ValueType::tensor_type({{"x", 10}, {"y", 20}, {"z", 30}}).reduce({}), ValueType::double_type()); +} + +TEST("require that removing non-existing dimensions gives error type") { + EXPECT_TRUE(ValueType::tensor_type({{"y"}}).reduce({"x"}).is_error()); + EXPECT_TRUE(ValueType::tensor_type({{"y", 10}}).reduce({"x"}).is_error()); +} + +TEST("require that removing all dimensions gives double type") { + ValueType type = ValueType::tensor_type({{"x", 10}, {"y", 20}, {"z", 30}}); + EXPECT_EQUAL(ValueType::double_type(), type.reduce({"x", "y", "z"})); +} + +TEST("require that dimensions can be combined for tensor value types") { + ValueType tensor_type_xy = ValueType::tensor_type({{"x"}, {"y"}}); + ValueType tensor_type_yz = ValueType::tensor_type({{"y"}, {"z"}}); + ValueType tensor_type_xyz = ValueType::tensor_type({{"x"}, {"y"}, {"z"}}); + ValueType tensor_type_y = ValueType::tensor_type({{"y"}}); + EXPECT_EQUAL(ValueType::join(tensor_type_xy, tensor_type_yz), tensor_type_xyz); + EXPECT_EQUAL(ValueType::join(tensor_type_yz, tensor_type_xy), tensor_type_xyz); + EXPECT_EQUAL(ValueType::join(tensor_type_y, tensor_type_y), tensor_type_y); +} + +TEST("require that indexed dimensions combine to the minimal dimension size") { + ValueType tensor_0 = ValueType::tensor_type({{"x", 0}}); + ValueType tensor_10 = ValueType::tensor_type({{"x", 10}}); + ValueType tensor_20 = ValueType::tensor_type({{"x", 20}}); + EXPECT_EQUAL(ValueType::join(tensor_10, tensor_0), tensor_0); + EXPECT_EQUAL(ValueType::join(tensor_10, tensor_10), tensor_10); + EXPECT_EQUAL(ValueType::join(tensor_10, tensor_20), tensor_10); +} + +void verify_combinable(const ValueType &a, const ValueType &b) { + EXPECT_TRUE(!ValueType::join(a, b).is_error()); + EXPECT_TRUE(!ValueType::join(b, a).is_error()); + EXPECT_TRUE(!ValueType::join(a, b).is_any()); + EXPECT_TRUE(!ValueType::join(b, a).is_any()); +} + +void verify_not_combinable(const ValueType &a, const ValueType &b) { + EXPECT_TRUE(ValueType::join(a, b).is_error()); + EXPECT_TRUE(ValueType::join(b, a).is_error()); +} + +void verify_maybe_combinable(const ValueType &a, const ValueType &b) { + EXPECT_TRUE(ValueType::join(a, b).is_any()); + EXPECT_TRUE(ValueType::join(b, a).is_any()); +} + +TEST("require that mapped and indexed dimensions are not combinable") { + verify_not_combinable(ValueType::tensor_type({{"x", 10}}), ValueType::tensor_type({{"x"}})); +} + +TEST("require that dimension combining is only allowed (yes/no/maybe) for appropriate types") { + std::vector<ValueType> types = { ValueType::any_type(), ValueType::error_type(), ValueType::double_type(), + ValueType::tensor_type({}), ValueType::tensor_type({{"x"}}) }; + for (size_t a = 0; a < types.size(); ++a) { + for (size_t b = a; b < types.size(); ++b) { + TEST_STATE(vespalib::make_string("a='%s', b='%s'", types[a].to_spec().c_str(), types[b].to_spec().c_str()).c_str()); + if (types[a].is_error() || types[b].is_error()) { + verify_not_combinable(types[a], types[b]); + } else if (types[a].is_any() || types[b].is_any()) { + verify_maybe_combinable(types[a], types[b]); + } else if (types[a].is_double() || types[b].is_double()) { + verify_combinable(types[a], types[b]); + } else if (types[a].unknown_dimensions() || types[b].unknown_dimensions()) { + verify_maybe_combinable(types[a], types[b]); + } else { + verify_combinable(types[a], types[b]); + } + } + } +} + +TEST("require that value type can make spec") { + EXPECT_EQUAL("any", ValueType::any_type().to_spec()); + EXPECT_EQUAL("error", ValueType::error_type().to_spec()); + EXPECT_EQUAL("double", ValueType::double_type().to_spec()); + EXPECT_EQUAL("tensor", ValueType::tensor_type({}).to_spec()); + EXPECT_EQUAL("tensor(x{})", ValueType::tensor_type({{"x"}}).to_spec()); + EXPECT_EQUAL("tensor(y[10])", ValueType::tensor_type({{"y", 10}}).to_spec()); + EXPECT_EQUAL("tensor(z[])", ValueType::tensor_type({{"z", 0}}).to_spec()); + EXPECT_EQUAL("tensor(x{},y[10],z[])", ValueType::tensor_type({{"x"}, {"y", 10}, {"z", 0}}).to_spec()); +} + +TEST("require that value type spec can be parsed") { + EXPECT_EQUAL(ValueType::any_type(), ValueType::from_spec("any")); + EXPECT_EQUAL(ValueType::double_type(), ValueType::from_spec("double")); + EXPECT_EQUAL(ValueType::tensor_type({}), ValueType::from_spec("tensor")); + EXPECT_EQUAL(ValueType::tensor_type({}), ValueType::from_spec("tensor()")); + EXPECT_EQUAL(ValueType::tensor_type({{"x"}}), ValueType::from_spec("tensor(x{})")); + EXPECT_EQUAL(ValueType::tensor_type({{"y", 10}}), ValueType::from_spec("tensor(y[10])")); + EXPECT_EQUAL(ValueType::tensor_type({{"z", 0}}), ValueType::from_spec("tensor(z[])")); + EXPECT_EQUAL(ValueType::tensor_type({{"x"}, {"y", 10}, {"z", 0}}), ValueType::from_spec("tensor(x{},y[10],z[])")); +} + +TEST("require that value type spec can be parsed with extra whitespace") { + EXPECT_EQUAL(ValueType::any_type(), ValueType::from_spec(" any ")); + EXPECT_EQUAL(ValueType::double_type(), ValueType::from_spec(" double ")); + EXPECT_EQUAL(ValueType::tensor_type({}), ValueType::from_spec(" tensor ")); + EXPECT_EQUAL(ValueType::tensor_type({}), ValueType::from_spec(" tensor ( ) ")); + EXPECT_EQUAL(ValueType::tensor_type({{"x"}}), ValueType::from_spec(" tensor ( x { } ) ")); + EXPECT_EQUAL(ValueType::tensor_type({{"y", 10}}), ValueType::from_spec(" tensor ( y [ 10 ] ) ")); + EXPECT_EQUAL(ValueType::tensor_type({{"z", 0}}), ValueType::from_spec(" tensor ( z [ ] ) ")); + EXPECT_EQUAL(ValueType::tensor_type({{"x"}, {"y", 10}, {"z", 0}}), + ValueType::from_spec(" tensor ( x { } , y [ 10 ] , z [ ] ) ")); +} + +TEST("require that malformed value type spec is parsed as error") { + EXPECT_TRUE(ValueType::from_spec("").is_error()); + EXPECT_TRUE(ValueType::from_spec(" ").is_error()); + EXPECT_TRUE(ValueType::from_spec("error").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor tensor").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x{10})").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x{},)").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(,x{})").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x{},,y{})").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x{} y{})").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x{}").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x{}),").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x[10)").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x[foo])").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x,y)").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x{},x{})").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x{},x[10])").is_error()); + EXPECT_TRUE(ValueType::from_spec("tensor(x{},x[])").is_error()); +} + +struct ParseResult { + vespalib::string spec; + const char *pos; + const char *end; + const char *after; + ValueType type; + ParseResult(const vespalib::string &spec_in) + : spec(spec_in), + pos(spec.data()), + end(pos + spec.size()), + after(nullptr), + type(value_type::parse_spec(pos, end, after)) {} + bool after_inside() const { return ((after > pos) && (after < end)); } +}; + +TEST("require that we can parse a partial string into a type with the low-level API") { + ParseResult result("tensor(a[]) , "); + EXPECT_EQUAL(result.type, ValueType::tensor_type({{"a", 0}})); + ASSERT_TRUE(result.after_inside()); + EXPECT_EQUAL(*result.after, ','); +} + +TEST("require that we can parse an abstract tensor type from a partial string") { + ParseResult result("tensor , "); + EXPECT_EQUAL(result.type, ValueType::tensor_type({})); + ASSERT_TRUE(result.after_inside()); + EXPECT_EQUAL(*result.after, ','); +} + +TEST("require that 'error' is the valid representation of the error type") { + ParseResult valid(" error "); + ParseResult invalid(" fubar "); + EXPECT_EQUAL(valid.type, ValueType::error_type()); + EXPECT_TRUE(valid.after == valid.end); // parse ok + EXPECT_EQUAL(invalid.type, ValueType::error_type()); + EXPECT_TRUE(invalid.after == nullptr); // parse not ok +} + +TEST("require that a sparse type must be a tensor with dimensions that all are mapped") { + EXPECT_TRUE(ValueType::from_spec("tensor(x{})").is_sparse()); + EXPECT_TRUE(ValueType::from_spec("tensor(x{},y{})").is_sparse()); + EXPECT_FALSE(ValueType::from_spec("tensor()").is_sparse()); + EXPECT_FALSE(ValueType::from_spec("tensor(x[])").is_sparse()); + EXPECT_FALSE(ValueType::from_spec("tensor(x{},y[])").is_sparse()); + EXPECT_FALSE(ValueType::from_spec("double").is_sparse()); + EXPECT_FALSE(ValueType::from_spec("any").is_sparse()); + EXPECT_FALSE(ValueType::from_spec("error").is_sparse()); +} + +TEST("require that a dense type must be a tensor with dimensions that all are indexed") { + EXPECT_TRUE(ValueType::from_spec("tensor(x[])").is_dense()); + EXPECT_TRUE(ValueType::from_spec("tensor(x[],y[])").is_dense()); + EXPECT_FALSE(ValueType::from_spec("tensor()").is_dense()); + EXPECT_FALSE(ValueType::from_spec("tensor(x{})").is_dense()); + EXPECT_FALSE(ValueType::from_spec("tensor(x[],y{})").is_dense()); + EXPECT_FALSE(ValueType::from_spec("double").is_dense()); + EXPECT_FALSE(ValueType::from_spec("any").is_dense()); + EXPECT_FALSE(ValueType::from_spec("error").is_dense()); +} + +TEST("require that tensor dimensions can be renamed") { + EXPECT_EQUAL(ValueType::from_spec("tensor(x{})").rename({"x"}, {"y"}), + ValueType::from_spec("tensor(y{})")); + EXPECT_EQUAL(ValueType::from_spec("tensor(x{},y[])").rename({"x","y"}, {"y","x"}), + ValueType::from_spec("tensor(y{},x[])")); + EXPECT_EQUAL(ValueType::from_spec("tensor(x{})").rename({"x"}, {"x"}), + ValueType::from_spec("tensor(x{})")); + EXPECT_EQUAL(ValueType::from_spec("tensor(x{})").rename({}, {}), ValueType::error_type()); + EXPECT_EQUAL(ValueType::double_type().rename({}, {}), ValueType::error_type()); + EXPECT_EQUAL(ValueType::from_spec("tensor(x{},y{})").rename({"x"}, {"y","z"}), ValueType::error_type()); + EXPECT_EQUAL(ValueType::from_spec("tensor(x{},y{})").rename({"x","y"}, {"z"}), ValueType::error_type()); + EXPECT_EQUAL(ValueType::tensor_type({}).rename({"x"}, {"y"}), ValueType::any_type()); + EXPECT_EQUAL(ValueType::any_type().rename({"x"}, {"y"}), ValueType::any_type()); + EXPECT_EQUAL(ValueType::double_type().rename({"a"}, {"b"}), ValueType::error_type()); + EXPECT_EQUAL(ValueType::error_type().rename({"a"}, {"b"}), ValueType::error_type()); +} + +TEST("require that types can be concatenated") { + ValueType error = ValueType::error_type(); + ValueType any = ValueType::any_type(); + ValueType tensor = ValueType::tensor_type({}); + ValueType scalar = ValueType::double_type(); + ValueType vx_2 = ValueType::from_spec("tensor(x[2])"); + ValueType vx_m = ValueType::from_spec("tensor(x{})"); + ValueType vx_3 = ValueType::from_spec("tensor(x[3])"); + ValueType vx_5 = ValueType::from_spec("tensor(x[5])"); + ValueType vx_any = ValueType::from_spec("tensor(x[])"); + ValueType vy_7 = ValueType::from_spec("tensor(y[7])"); + ValueType mxy_22 = ValueType::from_spec("tensor(x[2],y[2])"); + ValueType mxy_52 = ValueType::from_spec("tensor(x[5],y[2])"); + ValueType mxy_29 = ValueType::from_spec("tensor(x[2],y[9])"); + ValueType cxyz_572 = ValueType::from_spec("tensor(x[5],y[7],z[2])"); + ValueType cxyz_m72 = ValueType::from_spec("tensor(x{},y[7],z[2])"); + + EXPECT_EQUAL(ValueType::concat(error, vx_2, "x"), error); + EXPECT_EQUAL(ValueType::concat(vx_2, error, "x"), error); + EXPECT_EQUAL(ValueType::concat(error, any, "x"), error); + EXPECT_EQUAL(ValueType::concat(any, error, "x"), error); + EXPECT_EQUAL(ValueType::concat(vx_m, vx_2, "x"), error); + EXPECT_EQUAL(ValueType::concat(vx_2, vx_m, "x"), error); + EXPECT_EQUAL(ValueType::concat(vx_m, vx_m, "x"), error); + EXPECT_EQUAL(ValueType::concat(vx_m, scalar, "x"), error); + EXPECT_EQUAL(ValueType::concat(scalar, vx_m, "x"), error); + EXPECT_EQUAL(ValueType::concat(vy_7, vx_m, "z"), cxyz_m72); + EXPECT_EQUAL(ValueType::concat(tensor, vx_2, "x"), any); + EXPECT_EQUAL(ValueType::concat(vx_2, tensor, "x"), any); + EXPECT_EQUAL(ValueType::concat(any, vx_2, "x"), any); + EXPECT_EQUAL(ValueType::concat(vx_2, any, "x"), any); + EXPECT_EQUAL(ValueType::concat(any, tensor, "x"), any); + EXPECT_EQUAL(ValueType::concat(tensor, any, "x"), any); + EXPECT_EQUAL(ValueType::concat(scalar, scalar, "x"), vx_2); + EXPECT_EQUAL(ValueType::concat(vx_2, scalar, "x"), vx_3); + EXPECT_EQUAL(ValueType::concat(scalar, vx_2, "x"), vx_3); + EXPECT_EQUAL(ValueType::concat(vx_2, vx_3, "x"), vx_5); + EXPECT_EQUAL(ValueType::concat(vx_2, vx_any, "x"), vx_any); + EXPECT_EQUAL(ValueType::concat(vx_any, vx_2, "x"), vx_any); + EXPECT_EQUAL(ValueType::concat(scalar, vx_2, "y"), mxy_22); + EXPECT_EQUAL(ValueType::concat(vx_2, scalar, "y"), mxy_22); + EXPECT_EQUAL(ValueType::concat(vx_2, vx_3, "y"), mxy_22); + EXPECT_EQUAL(ValueType::concat(vx_3, vx_2, "y"), mxy_22); + EXPECT_EQUAL(ValueType::concat(mxy_22, vx_3, "x"), mxy_52); + EXPECT_EQUAL(ValueType::concat(vx_3, mxy_22, "x"), mxy_52); + EXPECT_EQUAL(ValueType::concat(mxy_22, vy_7, "y"), mxy_29); + EXPECT_EQUAL(ValueType::concat(vy_7, mxy_22, "y"), mxy_29); + EXPECT_EQUAL(ValueType::concat(vx_5, vy_7, "z"), cxyz_572); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/dense_dot_product_function/CMakeLists.txt b/eval/src/tests/tensor/dense_dot_product_function/CMakeLists.txt new file mode 100644 index 00000000000..d02f2cf7646 --- /dev/null +++ b/eval/src/tests/tensor/dense_dot_product_function/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_dense_dot_product_function_test_app TEST + SOURCES + dense_dot_product_function_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test(NAME vespalib_dense_dot_product_function_test_app COMMAND vespalib_dense_dot_product_function_test_app) diff --git a/eval/src/tests/tensor/dense_dot_product_function/FILES b/eval/src/tests/tensor/dense_dot_product_function/FILES new file mode 100644 index 00000000000..c79d4ae29de --- /dev/null +++ b/eval/src/tests/tensor/dense_dot_product_function/FILES @@ -0,0 +1 @@ +dense_dot_product_function_test.cpp diff --git a/eval/src/tests/tensor/dense_dot_product_function/dense_dot_product_function_test.cpp b/eval/src/tests/tensor/dense_dot_product_function/dense_dot_product_function_test.cpp new file mode 100644 index 00000000000..3ffcdd7a567 --- /dev/null +++ b/eval/src/tests/tensor/dense_dot_product_function/dense_dot_product_function_test.cpp @@ -0,0 +1,177 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/log/log.h> +LOG_SETUP("dense_dot_product_function_test"); + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/tensor_function.h> +#include <vespa/vespalib/tensor/dense/dense_dot_product_function.h> +#include <vespa/vespalib/tensor/dense/dense_tensor.h> +#include <vespa/vespalib/tensor/dense/dense_tensor_builder.h> +#include <vespa/vespalib/tensor/dense/dense_tensor_view.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/stash.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::tensor; + +ValueType +makeType(size_t numCells) +{ + return ValueType::tensor_type({{"x", numCells}}); +} + +tensor::Tensor::UP +makeTensor(size_t numCells, double cellBias) +{ + DenseTensorBuilder builder; + DenseTensorBuilder::Dimension dim = builder.defineDimension("x", numCells); + for (size_t i = 0; i < numCells; ++i) { + builder.addLabel(dim, i).addCell(i + cellBias); + } + return builder.build(); +} + +double +calcDotProduct(const DenseTensor &lhs, const DenseTensor &rhs) +{ + size_t numCells = std::min(lhs.cells().size(), rhs.cells().size()); + double result = 0; + for (size_t i = 0; i < numCells; ++i) { + result += (lhs.cells()[i] * rhs.cells()[i]); + } + return result; +} + +const DenseTensor & +asDenseTensor(const tensor::Tensor &tensor) +{ + return dynamic_cast<const DenseTensor &>(tensor); +} + +class FunctionInput : public TensorFunction::Input +{ +private: + tensor::Tensor::UP _lhsTensor; + tensor::Tensor::UP _rhsTensor; + const DenseTensor &_lhsDenseTensor; + const DenseTensor &_rhsDenseTensor; + TensorValue _lhsValue; + TensorValue _rhsValue; + +public: + FunctionInput(size_t lhsNumCells, size_t rhsNumCells) + : _lhsTensor(makeTensor(lhsNumCells, 3.0)), + _rhsTensor(makeTensor(rhsNumCells, 5.0)), + _lhsDenseTensor(asDenseTensor(*_lhsTensor)), + _rhsDenseTensor(asDenseTensor(*_rhsTensor)), + _lhsValue(std::make_unique<DenseTensor>(_lhsDenseTensor.type(), + _lhsDenseTensor.cells())), + _rhsValue(std::make_unique<DenseTensor>(_rhsDenseTensor.type(), + _rhsDenseTensor.cells())) + {} + virtual const Value &get_tensor(size_t id) const override { + if (id == 0) { + return _lhsValue; + } else { + return _rhsValue; + } + } + virtual const UnaryOperation &get_map_operation(size_t) const override { + abort(); + } + double expectedDotProduct() const { + return calcDotProduct(_lhsDenseTensor, _rhsDenseTensor); + } +}; + +struct Fixture +{ + DenseDotProductFunction function; + FunctionInput input; + Fixture(size_t lhsNumCells, size_t rhsNumCells) + : function(0, 1), + input(lhsNumCells, rhsNumCells) + { + } + double eval() const { + Stash stash; + const Value &result = function.eval(input, stash); + ASSERT_TRUE(result.is_double()); + LOG(info, "eval(): (%s) * (%s) = %f", + input.get_tensor(0).type().to_spec().c_str(), + input.get_tensor(1).type().to_spec().c_str(), + result.as_double()); + return result.as_double(); + } +}; + +void +assertDotProduct(size_t numCells) +{ + Fixture f(numCells, numCells); + EXPECT_EQUAL(f.input.expectedDotProduct(), f.eval()); +} + +void +assertDotProduct(size_t lhsNumCells, size_t rhsNumCells) +{ + Fixture f(lhsNumCells, rhsNumCells); + EXPECT_EQUAL(f.input.expectedDotProduct(), f.eval()); +} + +TEST_F("require that empty dot product is correct", Fixture(0, 0)) +{ + EXPECT_EQUAL(0.0, f.eval()); +} + +TEST_F("require that basic dot product with equal sizes is correct", Fixture(2, 2)) +{ + EXPECT_EQUAL((3.0 * 5.0) + (4.0 * 6.0), f.eval()); +} + +TEST_F("require that basic dot product with un-equal sizes is correct", Fixture(2, 3)) +{ + EXPECT_EQUAL((3.0 * 5.0) + (4.0 * 6.0), f.eval()); +} + +TEST_F("require that basic dot product with un-equal sizes is correct", Fixture(3, 2)) +{ + EXPECT_EQUAL((3.0 * 5.0) + (4.0 * 6.0), f.eval()); +} + +TEST("require that dot product with equal sizes is correct") +{ + TEST_DO(assertDotProduct(8)); + TEST_DO(assertDotProduct(16)); + TEST_DO(assertDotProduct(32)); + TEST_DO(assertDotProduct(64)); + TEST_DO(assertDotProduct(128)); + TEST_DO(assertDotProduct(256)); + TEST_DO(assertDotProduct(512)); + TEST_DO(assertDotProduct(1024)); + + TEST_DO(assertDotProduct(8 + 3)); + TEST_DO(assertDotProduct(16 + 3)); + TEST_DO(assertDotProduct(32 + 3)); + TEST_DO(assertDotProduct(64 + 3)); + TEST_DO(assertDotProduct(128 + 3)); + TEST_DO(assertDotProduct(256 + 3)); + TEST_DO(assertDotProduct(512 + 3)); + TEST_DO(assertDotProduct(1024 + 3)); +} + +TEST("require that dot product with un-equal sizes is correct") +{ + TEST_DO(assertDotProduct(8, 8 + 3)); + TEST_DO(assertDotProduct(16, 16 + 3)); + TEST_DO(assertDotProduct(32, 32 + 3)); + TEST_DO(assertDotProduct(64, 64 + 3)); + TEST_DO(assertDotProduct(128, 128 + 3)); + TEST_DO(assertDotProduct(256, 256 + 3)); + TEST_DO(assertDotProduct(512, 512 + 3)); + TEST_DO(assertDotProduct(1024, 1024 + 3)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/dense_tensor_address_combiner/CMakeLists.txt b/eval/src/tests/tensor/dense_tensor_address_combiner/CMakeLists.txt new file mode 100644 index 00000000000..65e7c711b19 --- /dev/null +++ b/eval/src/tests/tensor/dense_tensor_address_combiner/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_dense_tensor_address_combiner_test_app TEST + SOURCES + dense_tensor_address_combiner_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test(NAME vespalib_dense_tensor_address_combiner_test_app COMMAND vespalib_dense_tensor_address_combiner_test_app) diff --git a/eval/src/tests/tensor/dense_tensor_address_combiner/FILES b/eval/src/tests/tensor/dense_tensor_address_combiner/FILES new file mode 100644 index 00000000000..0a49bd4647b --- /dev/null +++ b/eval/src/tests/tensor/dense_tensor_address_combiner/FILES @@ -0,0 +1 @@ +dense_tensor_address_combiner_test.cpp diff --git a/eval/src/tests/tensor/dense_tensor_address_combiner/dense_tensor_address_combiner_test.cpp b/eval/src/tests/tensor/dense_tensor_address_combiner/dense_tensor_address_combiner_test.cpp new file mode 100644 index 00000000000..37f95172251 --- /dev/null +++ b/eval/src/tests/tensor/dense_tensor_address_combiner/dense_tensor_address_combiner_test.cpp @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/tensor/dense/dense_tensor_address_combiner.h> +#include <vespa/vespalib/test/insertion_operators.h> + +using namespace vespalib::tensor; +using vespalib::eval::ValueType; + +ValueType +combine(const std::vector<ValueType::Dimension> &lhs, + const std::vector<ValueType::Dimension> &rhs) +{ + return DenseTensorAddressCombiner::combineDimensions( + ValueType::tensor_type(lhs), + ValueType::tensor_type(rhs)); +} + +TEST("require that dimensions can be combined") +{ + EXPECT_EQUAL(ValueType::tensor_type({{"a", 3}, {"b", 5}}), combine({{"a", 3}}, {{"b", 5}})); + EXPECT_EQUAL(ValueType::tensor_type({{"a", 3}, {"b", 5}}), combine({{"a", 3}, {"b", 5}}, {{"b", 5}})); + EXPECT_EQUAL(ValueType::tensor_type({{"a", 3}, {"b", 5}}), combine({{"a", 3}, {"b", 7}}, {{"b", 5}})); + EXPECT_EQUAL(ValueType::tensor_type({{"a", 3}, {"b", 11}, {"c", 5}, {"d", 7}, {"e", 17}}), + combine({{"a", 3}, {"c", 5}, {"d", 7}}, + {{"b", 11}, {"c", 13}, {"e", 17}})); + EXPECT_EQUAL(ValueType::tensor_type({{"a", 3}, {"b", 11}, {"c", 5}, {"d", 7}, {"e", 17}}), + combine({{"b", 11}, {"c", 13}, {"e", 17}}, + {{"a", 3}, {"c", 5}, {"d", 7}})); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/dense_tensor_builder/.gitignore b/eval/src/tests/tensor/dense_tensor_builder/.gitignore new file mode 100644 index 00000000000..5b3598a205d --- /dev/null +++ b/eval/src/tests/tensor/dense_tensor_builder/.gitignore @@ -0,0 +1 @@ +vespalib_dense_tensor_builder_test_app diff --git a/eval/src/tests/tensor/dense_tensor_builder/CMakeLists.txt b/eval/src/tests/tensor/dense_tensor_builder/CMakeLists.txt new file mode 100644 index 00000000000..9028138ab87 --- /dev/null +++ b/eval/src/tests/tensor/dense_tensor_builder/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_dense_tensor_builder_test_app TEST + SOURCES + dense_tensor_builder_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test(NAME vespalib_dense_tensor_builder_test_app COMMAND vespalib_dense_tensor_builder_test_app) diff --git a/eval/src/tests/tensor/dense_tensor_builder/FILES b/eval/src/tests/tensor/dense_tensor_builder/FILES new file mode 100644 index 00000000000..448dd3c1e3c --- /dev/null +++ b/eval/src/tests/tensor/dense_tensor_builder/FILES @@ -0,0 +1 @@ +dense_tensor_builder_test.cpp diff --git a/eval/src/tests/tensor/dense_tensor_builder/dense_tensor_builder_test.cpp b/eval/src/tests/tensor/dense_tensor_builder/dense_tensor_builder_test.cpp new file mode 100644 index 00000000000..5036f247db3 --- /dev/null +++ b/eval/src/tests/tensor/dense_tensor_builder/dense_tensor_builder_test.cpp @@ -0,0 +1,251 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/test/insertion_operators.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/tensor/dense/dense_tensor_builder.h> +#include <vespa/vespalib/util/exceptions.h> + +using namespace vespalib::tensor; +using vespalib::IllegalArgumentException; +using Builder = DenseTensorBuilder; +using vespalib::eval::TensorSpec; +using vespalib::eval::ValueType; + +void +assertTensor(const std::vector<ValueType::Dimension> &expDims, + const DenseTensor::Cells &expCells, + const Tensor &tensor) +{ + const DenseTensor &realTensor = dynamic_cast<const DenseTensor &>(tensor); + EXPECT_EQUAL(ValueType::tensor_type(expDims), realTensor.type()); + EXPECT_EQUAL(expCells, realTensor.cells()); +} + +void +assertTensorSpec(const TensorSpec &expSpec, const Tensor &tensor) +{ + TensorSpec actSpec = tensor.toSpec(); + EXPECT_EQUAL(expSpec, actSpec); +} + +struct Fixture +{ + Builder builder; +}; + +Tensor::UP +build1DTensor(Builder &builder) +{ + Builder::Dimension dimX = builder.defineDimension("x", 3); + builder.addLabel(dimX, 0).addCell(10). + addLabel(dimX, 1).addCell(11). + addLabel(dimX, 2).addCell(12); + return builder.build(); +} + +TEST_F("require that 1d tensor can be constructed", Fixture) +{ + assertTensor({{"x",3}}, {10,11,12}, *build1DTensor(f.builder)); +} + +TEST_F("require that 1d tensor can be converted to tensor spec", Fixture) +{ + assertTensorSpec(TensorSpec("tensor(x[3])"). + add({{"x", 0}}, 10). + add({{"x", 1}}, 11). + add({{"x", 2}}, 12), + *build1DTensor(f.builder)); +} + +Tensor::UP +build2DTensor(Builder &builder) +{ + Builder::Dimension dimX = builder.defineDimension("x", 3); + Builder::Dimension dimY = builder.defineDimension("y", 2); + builder.addLabel(dimX, 0).addLabel(dimY, 0).addCell(10). + addLabel(dimX, 0).addLabel(dimY, 1).addCell(11). + addLabel(dimX, 1).addLabel(dimY, 0).addCell(12). + addLabel(dimX, 1).addLabel(dimY, 1).addCell(13). + addLabel(dimX, 2).addLabel(dimY, 0).addCell(14). + addLabel(dimX, 2).addLabel(dimY, 1).addCell(15); + return builder.build(); +} + +TEST_F("require that 2d tensor can be constructed", Fixture) +{ + assertTensor({{"x",3},{"y",2}}, {10,11,12,13,14,15}, *build2DTensor(f.builder)); +} + +TEST_F("require that 2d tensor can be converted to tensor spec", Fixture) +{ + assertTensorSpec(TensorSpec("tensor(x[3],y[2])"). + add({{"x", 0},{"y", 0}}, 10). + add({{"x", 0},{"y", 1}}, 11). + add({{"x", 1},{"y", 0}}, 12). + add({{"x", 1},{"y", 1}}, 13). + add({{"x", 2},{"y", 0}}, 14). + add({{"x", 2},{"y", 1}}, 15), + *build2DTensor(f.builder)); +} + +TEST_F("require that 3d tensor can be constructed", Fixture) +{ + Builder::Dimension dimX = f.builder.defineDimension("x", 3); + Builder::Dimension dimY = f.builder.defineDimension("y", 2); + Builder::Dimension dimZ = f.builder.defineDimension("z", 2); + f.builder.addLabel(dimX, 0).addLabel(dimY, 0).addLabel(dimZ, 0).addCell(10). + addLabel(dimX, 0).addLabel(dimY, 0).addLabel(dimZ, 1).addCell(11). + addLabel(dimX, 0).addLabel(dimY, 1).addLabel(dimZ, 0).addCell(12). + addLabel(dimX, 0).addLabel(dimY, 1).addLabel(dimZ, 1).addCell(13). + addLabel(dimX, 1).addLabel(dimY, 0).addLabel(dimZ, 0).addCell(14). + addLabel(dimX, 1).addLabel(dimY, 0).addLabel(dimZ, 1).addCell(15). + addLabel(dimX, 1).addLabel(dimY, 1).addLabel(dimZ, 0).addCell(16). + addLabel(dimX, 1).addLabel(dimY, 1).addLabel(dimZ, 1).addCell(17). + addLabel(dimX, 2).addLabel(dimY, 0).addLabel(dimZ, 0).addCell(18). + addLabel(dimX, 2).addLabel(dimY, 0).addLabel(dimZ, 1).addCell(19). + addLabel(dimX, 2).addLabel(dimY, 1).addLabel(dimZ, 0).addCell(20). + addLabel(dimX, 2).addLabel(dimY, 1).addLabel(dimZ, 1).addCell(21); + assertTensor({{"x",3},{"y",2},{"z",2}}, + {10,11,12,13,14,15,16,17,18,19,20,21}, + *f.builder.build()); +} + +TEST_F("require that cells get default value 0 if not specified", Fixture) +{ + Builder::Dimension dimX = f.builder.defineDimension("x", 3); + f.builder.addLabel(dimX, 1).addCell(11); + assertTensor({{"x",3}}, {0,11,0}, + *f.builder.build()); +} + +TEST_F("require that labels can be added in arbitrarily order", Fixture) +{ + Builder::Dimension dimX = f.builder.defineDimension("x", 2); + Builder::Dimension dimY = f.builder.defineDimension("y", 3); + f.builder.addLabel(dimY, 0).addLabel(dimX, 1).addCell(10); + assertTensor({{"x",2},{"y",3}}, {0,0,0,10,0,0}, + *f.builder.build()); +} + +TEST_F("require that builder can be re-used", Fixture) +{ + { + Builder::Dimension dimX = f.builder.defineDimension("x", 2); + f.builder.addLabel(dimX, 0).addCell(10). + addLabel(dimX, 1).addCell(11); + assertTensor({{"x",2}}, {10,11}, + *f.builder.build()); + } + { + Builder::Dimension dimY = f.builder.defineDimension("y", 3); + f.builder.addLabel(dimY, 0).addCell(20). + addLabel(dimY, 1).addCell(21). + addLabel(dimY, 2).addCell(22); + assertTensor({{"y",3}}, {20,21,22}, + *f.builder.build()); + } +} + +void +assertTensorCell(const std::vector<size_t> &expAddress, + double expCell, + const DenseTensor::CellsIterator &itr) +{ + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(expAddress, itr.address()); + EXPECT_EQUAL(expCell, itr.cell()); +} + +TEST_F("require that dense tensor cells iterator works for 1d tensor", Fixture) +{ + Tensor::UP tensor; + { + Builder::Dimension dimX = f.builder.defineDimension("x", 2); + f.builder.addLabel(dimX, 0).addCell(2). + addLabel(dimX, 1).addCell(3); + tensor = f.builder.build(); + } + + const DenseTensor &denseTensor = dynamic_cast<const DenseTensor &>(*tensor); + DenseTensor::CellsIterator itr = denseTensor.cellsIterator(); + + assertTensorCell({0}, 2, itr); + itr.next(); + assertTensorCell({1}, 3, itr); + itr.next(); + EXPECT_FALSE(itr.valid()); +} + +TEST_F("require that dense tensor cells iterator works for 2d tensor", Fixture) +{ + Tensor::UP tensor; + { + Builder::Dimension dimX = f.builder.defineDimension("x", 2); + Builder::Dimension dimY = f.builder.defineDimension("y", 2); + f.builder.addLabel(dimX, 0).addLabel(dimY, 0).addCell(2). + addLabel(dimX, 0).addLabel(dimY, 1).addCell(3). + addLabel(dimX, 1).addLabel(dimY, 0).addCell(5). + addLabel(dimX, 1).addLabel(dimY, 1).addCell(7); + tensor = f.builder.build(); + } + + const DenseTensor &denseTensor = dynamic_cast<const DenseTensor &>(*tensor); + DenseTensor::CellsIterator itr = denseTensor.cellsIterator(); + + assertTensorCell({0,0}, 2, itr); + itr.next(); + assertTensorCell({0,1}, 3, itr); + itr.next(); + assertTensorCell({1,0}, 5, itr); + itr.next(); + assertTensorCell({1,1}, 7, itr); + itr.next(); + EXPECT_FALSE(itr.valid()); +} + +TEST_F("require that undefined label for a dimension throws exception", Fixture) +{ + Builder::Dimension dimX = f.builder.defineDimension("x", 2); + f.builder.defineDimension("y", 3); + EXPECT_EXCEPTION(f.builder.addLabel(dimX, 0).addCell(10), + IllegalArgumentException, + "Label for dimension 'y' is undefined. Expected a value in the range [0, 3>"); +} + +TEST_F("require that label outside range throws exception", Fixture) +{ + Builder::Dimension dimX = f.builder.defineDimension("x", 2); + EXPECT_EXCEPTION(f.builder.addLabel(dimX, 2).addCell(10), + IllegalArgumentException, + "Label '2' for dimension 'x' is outside range [0, 2>"); +} + +TEST_F("require that already specified label throws exception", Fixture) +{ + Builder::Dimension dimX = f.builder.defineDimension("x", 2); + EXPECT_EXCEPTION(f.builder.addLabel(dimX, 0).addLabel(dimX, 1).addCell(10), + IllegalArgumentException, + "Label for dimension 'x' is already specified with value '0'"); +} + +TEST_F("require that dimensions are sorted", Fixture) +{ + Builder::Dimension dimY = f.builder.defineDimension("y", 3); + Builder::Dimension dimX = f.builder.defineDimension("x", 5); + f.builder.addLabel(dimX, 0).addLabel(dimY, 0).addCell(10); + f.builder.addLabel(dimX, 0).addLabel(dimY, 1).addCell(11); + f.builder.addLabel(dimX, 1).addLabel(dimY, 0).addCell(12); + std::unique_ptr<Tensor> tensor = f.builder.build(); + const DenseTensor &denseTensor = dynamic_cast<const DenseTensor &>(*tensor); + assertTensor({{"x", 5}, {"y", 3}}, + {10, 11, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + denseTensor); + EXPECT_EQUAL("tensor(x[5],y[3])", denseTensor.getType().to_spec()); +} + + + + + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/dense_tensor_function_compiler/CMakeLists.txt b/eval/src/tests/tensor/dense_tensor_function_compiler/CMakeLists.txt new file mode 100644 index 00000000000..a34b39abb70 --- /dev/null +++ b/eval/src/tests/tensor/dense_tensor_function_compiler/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_dense_tensor_function_compiler_test_app TEST + SOURCES + dense_tensor_function_compiler_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test(NAME vespalib_dense_tensor_function_compiler_test_app COMMAND vespalib_dense_tensor_function_compiler_test_app) diff --git a/eval/src/tests/tensor/dense_tensor_function_compiler/FILES b/eval/src/tests/tensor/dense_tensor_function_compiler/FILES new file mode 100644 index 00000000000..3c4ec2f1753 --- /dev/null +++ b/eval/src/tests/tensor/dense_tensor_function_compiler/FILES @@ -0,0 +1 @@ +dense_tensor_function_compiler_test.cpp diff --git a/eval/src/tests/tensor/dense_tensor_function_compiler/dense_tensor_function_compiler_test.cpp b/eval/src/tests/tensor/dense_tensor_function_compiler/dense_tensor_function_compiler_test.cpp new file mode 100644 index 00000000000..c1420f2b8d2 --- /dev/null +++ b/eval/src/tests/tensor/dense_tensor_function_compiler/dense_tensor_function_compiler_test.cpp @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/tensor/dense/dense_dot_product_function.h> +#include <vespa/vespalib/tensor/dense/dense_tensor_function_compiler.h> + +using namespace vespalib::eval; +using namespace vespalib::eval::operation; +using namespace vespalib::eval::tensor_function; +using namespace vespalib::tensor; + +template <typename T> +const T *as(const TensorFunction &function) { return dynamic_cast<const T *>(&function); } + +TensorFunction::UP +compileDotProduct(const vespalib::string &lhsType, + const vespalib::string &rhsType) +{ + Node_UP reduceNode = reduce(apply(Mul(), + inject(ValueType::from_spec(lhsType), 1), + inject(ValueType::from_spec(rhsType), 3)), + Add(), {}); + return DenseTensorFunctionCompiler::compile(std::move(reduceNode)); +} + +void +assertCompiledDotProduct(const vespalib::string &lhsType, + const vespalib::string &rhsType) +{ + TensorFunction::UP func = compileDotProduct(lhsType, rhsType); + const DenseDotProductFunction *dotProduct = as<DenseDotProductFunction>(*func); + ASSERT_TRUE(dotProduct); + EXPECT_EQUAL(1u, dotProduct->lhsTensorId()); + EXPECT_EQUAL(3u, dotProduct->rhsTensorId()); +} + +void +assertNotCompiledDotProduct(const vespalib::string &lhsType, + const vespalib::string &rhsType) +{ + TensorFunction::UP func = compileDotProduct(lhsType, rhsType); + const Reduce *reduce = as<Reduce>(*func); + EXPECT_TRUE(reduce); +} + +TEST("require that dot product with compatible dimensions is compiled") +{ + TEST_DO(assertCompiledDotProduct("tensor(x[5])", "tensor(x[5])")); + TEST_DO(assertCompiledDotProduct("tensor(x[3])", "tensor(x[5])")); + TEST_DO(assertCompiledDotProduct("tensor(x[5])", "tensor(x[3])")); + TEST_DO(assertCompiledDotProduct("tensor(x[])", "tensor(x[5])")); + TEST_DO(assertCompiledDotProduct("tensor(x[5])", "tensor(x[])")); + TEST_DO(assertCompiledDotProduct("tensor(x[])", "tensor(x[])")); +} + +TEST("require that dot product with incompatible dimensions is NOT compiled") +{ + TEST_DO(assertNotCompiledDotProduct("tensor(x[5])", "tensor(y[5])")); + TEST_DO(assertNotCompiledDotProduct("tensor(y[5])", "tensor(x[5])")); + TEST_DO(assertNotCompiledDotProduct("tensor(y[])", "tensor(x[])")); + TEST_DO(assertNotCompiledDotProduct("tensor(x[5])", "tensor(x[5],y[7])")); + TEST_DO(assertNotCompiledDotProduct("tensor(x[5],y[7])", "tensor(x[5],y[7])")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/sparse_tensor_builder/.gitignore b/eval/src/tests/tensor/sparse_tensor_builder/.gitignore new file mode 100644 index 00000000000..e0316d190bb --- /dev/null +++ b/eval/src/tests/tensor/sparse_tensor_builder/.gitignore @@ -0,0 +1 @@ +vespalib_sparse_tensor_builder_test_app diff --git a/eval/src/tests/tensor/sparse_tensor_builder/CMakeLists.txt b/eval/src/tests/tensor/sparse_tensor_builder/CMakeLists.txt new file mode 100644 index 00000000000..c8ae7ece908 --- /dev/null +++ b/eval/src/tests/tensor/sparse_tensor_builder/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_sparse_tensor_builder_test_app TEST + SOURCES + sparse_tensor_builder_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test(NAME vespalib_sparse_tensor_builder_test_app COMMAND vespalib_sparse_tensor_builder_test_app) diff --git a/eval/src/tests/tensor/sparse_tensor_builder/FILES b/eval/src/tests/tensor/sparse_tensor_builder/FILES new file mode 100644 index 00000000000..ad47666278e --- /dev/null +++ b/eval/src/tests/tensor/sparse_tensor_builder/FILES @@ -0,0 +1 @@ +sparse_tensor_builder_test.cpp diff --git a/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp b/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp new file mode 100644 index 00000000000..d1ad41e8a7e --- /dev/null +++ b/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp @@ -0,0 +1,97 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/tensor/sparse/sparse_tensor_builder.h> +#include <vespa/vespalib/test/insertion_operators.h> + +using namespace vespalib::tensor; +using vespalib::eval::TensorSpec; +using vespalib::eval::ValueType; + +void +assertCellValue(double expValue, const TensorAddress &address, + const ValueType &type, + const SparseTensor::Cells &cells) +{ + SparseTensorAddressBuilder addressBuilder; + auto dimsItr = type.dimensions().cbegin(); + auto dimsItrEnd = type.dimensions().cend(); + for (const auto &element : address.elements()) { + while ((dimsItr < dimsItrEnd) && (dimsItr->name < element.dimension())) { + addressBuilder.add(""); + ++dimsItr; + } + assert((dimsItr != dimsItrEnd) && (dimsItr->name == element.dimension())); + addressBuilder.add(element.label()); + ++dimsItr; + } + while (dimsItr < dimsItrEnd) { + addressBuilder.add(""); + ++dimsItr; + } + SparseTensorAddressRef addressRef(addressBuilder.getAddressRef()); + auto itr = cells.find(addressRef); + EXPECT_FALSE(itr == cells.end()); + EXPECT_EQUAL(expValue, itr->second); +} + +Tensor::UP +buildTensor() +{ + SparseTensorBuilder builder; + builder.define_dimension("c"); + builder.define_dimension("d"); + builder.define_dimension("a"); + builder.define_dimension("b"); + builder.add_label(builder.define_dimension("a"), "1"). + add_label(builder.define_dimension("b"), "2").add_cell(10). + add_label(builder.define_dimension("c"), "3"). + add_label(builder.define_dimension("d"), "4").add_cell(20); + return builder.build(); +} + +TEST("require that tensor can be constructed") +{ + Tensor::UP tensor = buildTensor(); + const SparseTensor &sparseTensor = dynamic_cast<const SparseTensor &>(*tensor); + const ValueType &type = sparseTensor.type(); + const SparseTensor::Cells &cells = sparseTensor.cells(); + EXPECT_EQUAL(2u, cells.size()); + assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}), + type, cells); + assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}), + type, cells); +} + +TEST("require that tensor can be converted to tensor spec") +{ + Tensor::UP tensor = buildTensor(); + TensorSpec expSpec("tensor(a{},b{},c{},d{})"); + expSpec.add({{"a", "1"}, {"b", "2"}, {"c", ""}, {"d", ""}}, 10). + add({{"a", ""},{"b",""},{"c", "3"}, {"d", "4"}}, 20); + TensorSpec actSpec = tensor->toSpec(); + EXPECT_EQUAL(expSpec, actSpec); +} + +TEST("require that dimensions are extracted") +{ + SparseTensorBuilder builder; + builder.define_dimension("c"); + builder.define_dimension("a"); + builder.define_dimension("b"); + builder. + add_label(builder.define_dimension("a"), "1"). + add_label(builder.define_dimension("b"), "2").add_cell(10). + add_label(builder.define_dimension("b"), "3"). + add_label(builder.define_dimension("c"), "4").add_cell(20); + Tensor::UP tensor = builder.build(); + const SparseTensor &sparseTensor = dynamic_cast<const SparseTensor &>(*tensor); + const auto &dims = sparseTensor.type().dimensions(); + EXPECT_EQUAL(3u, dims.size()); + EXPECT_EQUAL("a", dims[0].name); + EXPECT_EQUAL("b", dims[1].name); + EXPECT_EQUAL("c", dims[2].name); + EXPECT_EQUAL("tensor(a{},b{},c{})", sparseTensor.getType().to_spec()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/tensor_address/.gitignore b/eval/src/tests/tensor/tensor_address/.gitignore new file mode 100644 index 00000000000..189adb8710b --- /dev/null +++ b/eval/src/tests/tensor/tensor_address/.gitignore @@ -0,0 +1 @@ +vespalib_tensor_address_test_app diff --git a/eval/src/tests/tensor/tensor_address/CMakeLists.txt b/eval/src/tests/tensor/tensor_address/CMakeLists.txt new file mode 100644 index 00000000000..43c45f913a5 --- /dev/null +++ b/eval/src/tests/tensor/tensor_address/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_tensor_address_test_app TEST + SOURCES + tensor_address_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test(NAME vespalib_tensor_address_test_app COMMAND vespalib_tensor_address_test_app) diff --git a/eval/src/tests/tensor/tensor_address/FILES b/eval/src/tests/tensor/tensor_address/FILES new file mode 100644 index 00000000000..1d7d1c533a0 --- /dev/null +++ b/eval/src/tests/tensor/tensor_address/FILES @@ -0,0 +1 @@ +tensor_address_test.cpp diff --git a/eval/src/tests/tensor/tensor_address/tensor_address_test.cpp b/eval/src/tests/tensor/tensor_address/tensor_address_test.cpp new file mode 100644 index 00000000000..70f33bdf0c4 --- /dev/null +++ b/eval/src/tests/tensor/tensor_address/tensor_address_test.cpp @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/tensor/tensor_address.h> + +using namespace vespalib::tensor; + +void +assertSortOrder(const TensorAddress::Elements &exp, + const TensorAddress::Elements &input) +{ + TensorAddress address(input); + EXPECT_EQUAL(exp, address.elements()); +} + +TEST("require that elements are sorted in constructor") +{ + assertSortOrder({{"a","1"},{"b","1"},{"c","1"}}, + {{"c","1"},{"a","1"},{"b","1"}}); +} + +TEST("require that we can check whether a dimension is present") +{ + TensorAddress address({{"a","1"},{"b","1"}}); + EXPECT_TRUE(address.hasDimension("a")); + EXPECT_TRUE(address.hasDimension("b")); + EXPECT_FALSE(address.hasDimension("c")); +} + +TEST("require that tensor address sort order is defined") +{ + TensorAddress::Elements single = {{"a","1"}}; + EXPECT_LESS(TensorAddress(single), + TensorAddress({{"a","1"},{"b","1"}})); + EXPECT_LESS(TensorAddress({{"a","1"},{"b","1"}}), + TensorAddress({{"a","1"},{"c","1"}})); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/tensor_conformance/CMakeLists.txt b/eval/src/tests/tensor/tensor_conformance/CMakeLists.txt new file mode 100644 index 00000000000..0aaddb481cc --- /dev/null +++ b/eval/src/tests/tensor/tensor_conformance/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_tensor_tensor_conformance_test_app TEST + SOURCES + tensor_conformance_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test(NAME vespalib_tensor_tensor_conformance_test_app COMMAND vespalib_tensor_tensor_conformance_test_app) diff --git a/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp b/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp new file mode 100644 index 00000000000..238d0604ee7 --- /dev/null +++ b/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/test/tensor_conformance.h> +#include <vespa/vespalib/eval/simple_tensor_engine.h> +#include <vespa/vespalib/tensor/default_tensor_engine.h> + +using vespalib::eval::SimpleTensorEngine; +using vespalib::eval::test::TensorConformance; +using vespalib::tensor::DefaultTensorEngine; + +TEST("require that reference tensor implementation passes all conformance tests") { + TEST_DO(TensorConformance::run_tests(SimpleTensorEngine::ref(), true)); +} + +IGNORE_TEST("require that production tensor implementation passes non-mixed conformance tests") { + TEST_DO(TensorConformance::run_tests(DefaultTensorEngine::ref(), false)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/tensor_mapper/.gitignore b/eval/src/tests/tensor/tensor_mapper/.gitignore new file mode 100644 index 00000000000..8a312ff3157 --- /dev/null +++ b/eval/src/tests/tensor/tensor_mapper/.gitignore @@ -0,0 +1 @@ +vespalib_tensor_mapper_test_app diff --git a/eval/src/tests/tensor/tensor_mapper/CMakeLists.txt b/eval/src/tests/tensor/tensor_mapper/CMakeLists.txt new file mode 100644 index 00000000000..fb18883f7ef --- /dev/null +++ b/eval/src/tests/tensor/tensor_mapper/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_tensor_mapper_test_app TEST + SOURCES + tensor_mapper_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test(NAME vespalib_tensor_mapper_test_app COMMAND vespalib_tensor_mapper_test_app) diff --git a/eval/src/tests/tensor/tensor_mapper/FILES b/eval/src/tests/tensor/tensor_mapper/FILES new file mode 100644 index 00000000000..8678f175be1 --- /dev/null +++ b/eval/src/tests/tensor/tensor_mapper/FILES @@ -0,0 +1 @@ +tensor_mapper_test.cpp diff --git a/eval/src/tests/tensor/tensor_mapper/tensor_mapper_test.cpp b/eval/src/tests/tensor/tensor_mapper/tensor_mapper_test.cpp new file mode 100644 index 00000000000..f4edd8901e4 --- /dev/null +++ b/eval/src/tests/tensor/tensor_mapper/tensor_mapper_test.cpp @@ -0,0 +1,245 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/tensor/sparse/sparse_tensor.h> +#include <vespa/vespalib/tensor/sparse/sparse_tensor_builder.h> +#include <vespa/vespalib/tensor/dense/dense_tensor.h> +#include <vespa/vespalib/tensor/dense/dense_tensor_builder.h> +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/tensor/tensor_factory.h> +#include <vespa/vespalib/tensor/tensor_mapper.h> +#include <vespa/vespalib/tensor/default_tensor.h> +#include <ostream> + +using vespalib::eval::ValueType; +using namespace vespalib::tensor; + +namespace vespalib { +namespace tensor { + +static bool operator==(const Tensor &lhs, const Tensor &rhs) +{ + return lhs.equals(rhs); +} + +} +} + +template <typename BuilderType> +bool defaultBuilder() { return false; } + +template <> +bool defaultBuilder<DefaultTensor::builder>() { return true; } + +template <typename BuilderType> +struct TensorTFromBuilder; + +template <> +struct TensorTFromBuilder<SparseTensorBuilder> { + using TensorT = SparseTensor; +}; + +template <typename BuilderType> +using TensorTFromBuilder_t = typename TensorTFromBuilder<BuilderType>::TensorT; + +struct FixtureBase +{ + Tensor::UP createDenseTensor(const DenseTensorCells &cells) { + return TensorFactory::createDense(cells); + } +}; + +template <typename BuilderType> +struct Fixture : public FixtureBase +{ + BuilderType _builder; + using TensorT = TensorTFromBuilder_t<BuilderType>; + Fixture() : FixtureBase(), _builder() {} + + Tensor::UP createTensor(const TensorCells &cells, + const TensorDimensions &dimensions) { + return TensorFactory::create(cells, dimensions, _builder); + } + + void assertSparseMapImpl(const Tensor &exp, + const ValueType &tensorType, + const Tensor &rhs, bool isDefaultBuilder) + { + EXPECT_TRUE(tensorType.is_sparse()); + if (isDefaultBuilder) { + TensorMapper mapper(tensorType); + std::unique_ptr<Tensor> mapped = mapper.map(rhs); + EXPECT_TRUE(!!mapped); + EXPECT_EQUAL(exp, *mapped); + } + std::unique_ptr<Tensor> mapped = + TensorMapper::mapToSparse<TensorT>(rhs, tensorType); + EXPECT_TRUE(!!mapped); + EXPECT_EQUAL(exp, *mapped); + } + + void assertDenseMapImpl(const Tensor &exp, + const ValueType &tensorType, + const Tensor &rhs) + { + EXPECT_TRUE(tensorType.is_dense()); + TensorMapper mapper(tensorType); + std::unique_ptr<Tensor> mapped = mapper.map(rhs); + EXPECT_TRUE(!!mapped); + EXPECT_EQUAL(exp, *mapped); + } + + void + assertSparseMap(const TensorCells &expTensor, + const TensorDimensions &expDimensions, + const vespalib::string &typeSpec, + const TensorCells &rhsTensor, + const TensorDimensions &rhsDimensions) + { + assertSparseMapImpl(*createTensor(expTensor, expDimensions), + ValueType::from_spec(typeSpec), + *createTensor(rhsTensor, rhsDimensions), + defaultBuilder<BuilderType>()); + } + + void + assertDenseMap(const DenseTensorCells &expTensor, + const vespalib::string &typeSpec, + const TensorCells &rhsTensor, + const TensorDimensions &rhsDimensions) + { + assertDenseMapImpl(*createDenseTensor(expTensor), + ValueType::from_spec(typeSpec), + *createTensor(rhsTensor, rhsDimensions)); + } +}; + +using SparseFixture = Fixture<SparseTensorBuilder>; + +template <typename FixtureType> +void +testTensorMapper(FixtureType &f) +{ + TEST_DO(f.assertSparseMap({ + {{{"y","1"}}, 4}, + {{{"y","2"}}, 12} + }, + { "y" }, + "tensor(y{})", + { + {{{"x","1"},{"y","1"}}, 1}, + {{{"x","2"},{"y","1"}}, 3}, + {{{"x","1"},{"y","2"}}, 5}, + {{{"x","2"},{"y","2"}}, 7} + }, + { "x", "y" })); + TEST_DO(f.assertSparseMap({ + {{{"x","1"}}, 6}, + {{{"x","2"}}, 10} + }, + { "x" }, + "tensor(x{})", + { + {{{"x","1"},{"y","1"}}, 1}, + {{{"x","2"},{"y","1"}}, 3}, + {{{"x","1"},{"y","2"}}, 5}, + {{{"x","2"},{"y","2"}}, 7} + }, + { "x", "y" })); + TEST_DO(f.assertDenseMap({ + {{{"y",0}}, 4}, + {{{"y",1}}, 12}, + {{{"y",2}}, 0} + }, + "tensor(y[3])", + { + {{{"x","1"},{"y","0"}}, 1}, + {{{"x","2"},{"y","0"}}, 3}, + {{{"x","1"},{"y","1"}}, 5}, + {{{"x","2"},{"y","1"}}, 7} + }, + { "x", "y" })); + TEST_DO(f.assertDenseMap({ + {{{"y",0}}, 3}, + {{{"y",1}}, 5}, + {{{"y",2}}, 0} + }, + "tensor(y[3])", + { + {{{"x","1"},{"y","0x"}}, 1}, + {{{"x","2"},{"y",""}}, 3}, + {{{"x","1"},{"y","1"}}, 5}, + {{{"x","2"},{"y","10"}}, 7} + }, + { "x", "y" })); + TEST_DO(f.assertDenseMap({ + {{{"x",0},{"y",0}}, 1}, + {{{"x",0},{"y",1}}, 5}, + {{{"x",0},{"y",2}}, 0}, + {{{"x",1},{"y",0}}, 3}, + {{{"x",1},{"y",1}}, 0}, + {{{"x",1},{"y",2}}, 0} + }, + "tensor(x[2], y[3])", + { + {{{"x","0"},{"y","0"}}, 1}, + {{{"x","1"},{"y","0"}}, 3}, + {{{"x","0"},{"y","1"}}, 5}, + {{{"x","10"},{"y","1"}}, 7} + }, + { "x", "y" })); + TEST_DO(f.assertDenseMap({ + {{{"x",0},{"y",0}}, 1}, + {{{"x",0},{"y",1}}, 5}, + {{{"x",1},{"y",0}}, 3}, + {{{"x",1},{"y",1}}, 0} + }, + "tensor(x[2], y[])", + { + {{{"x","0"},{"y","0"}}, 1}, + {{{"x","1"},{"y","0"}}, 3}, + {{{"x","0"},{"y","1"}}, 5}, + {{{"x","10"},{"y","1"}}, 7} + }, + { "x", "y" })); + TEST_DO(f.assertDenseMap({ + {{{"x",0},{"y",0}}, 1}, + {{{"x",0},{"y",1}}, 5}, + {{{"x",1},{"y",0}}, 3}, + {{{"x",1},{"y",1}}, 0}, + {{{"x",2},{"y",0}}, 7}, + {{{"x",2},{"y",1}}, 0} + }, + "tensor(x[], y[])", + { + {{{"x","0"},{"y","0"}}, 1}, + {{{"x","1"},{"y","0"}}, 3}, + {{{"x","0"},{"y","1"}}, 5}, + {{{"x","2"},{"y","0"}}, 7} + }, + { "x", "y" })); + TEST_DO(f.assertDenseMap({ + {{{"x",0},{"y",0}}, 1}, + {{{"x",0},{"y",1}}, 5}, + {{{"x",0},{"y",2}}, 0}, + {{{"x",1},{"y",0}}, 3}, + {{{"x",1},{"y",1}}, 0}, + {{{"x",1},{"y",2}}, 0} + }, + "tensor(x[], y[3])", + { + {{{"x","0"},{"y","0"}}, 1}, + {{{"x","1"},{"y","0"}}, 3}, + {{{"x","0"},{"y","1"}}, 5}, + {{{"x","10"},{"y","3"}}, 7} + }, + { "x", "y" })); +} + +TEST_F("test tensor mapper for SparseTensor", SparseFixture) +{ + testTensorMapper(f); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/tensor_performance/.gitignore b/eval/src/tests/tensor/tensor_performance/.gitignore new file mode 100644 index 00000000000..c9401246324 --- /dev/null +++ b/eval/src/tests/tensor/tensor_performance/.gitignore @@ -0,0 +1 @@ +vespalib_tensor_performance_test_app diff --git a/eval/src/tests/tensor/tensor_performance/CMakeLists.txt b/eval/src/tests/tensor/tensor_performance/CMakeLists.txt new file mode 100644 index 00000000000..a2f041db265 --- /dev/null +++ b/eval/src/tests/tensor/tensor_performance/CMakeLists.txt @@ -0,0 +1,13 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_tensor_performance_test_app TEST + SOURCES + tensor_performance_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test( + NAME vespalib_tensor_performance_test_app + COMMAND vespalib_tensor_performance_test_app + ENVIRONMENT "TEST_SUBSET=SMOKETEST" +) diff --git a/eval/src/tests/tensor/tensor_performance/FILES b/eval/src/tests/tensor/tensor_performance/FILES new file mode 100644 index 00000000000..4cec89055e5 --- /dev/null +++ b/eval/src/tests/tensor/tensor_performance/FILES @@ -0,0 +1 @@ +tensor_performance_test.cpp diff --git a/eval/src/tests/tensor/tensor_performance/tensor_performance_test.cpp b/eval/src/tests/tensor/tensor_performance/tensor_performance_test.cpp new file mode 100644 index 00000000000..8dc57bd0f71 --- /dev/null +++ b/eval/src/tests/tensor/tensor_performance/tensor_performance_test.cpp @@ -0,0 +1,378 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/interpreted_function.h> +#include <vespa/vespalib/eval/tensor_nodes.h> +#include <vespa/vespalib/eval/tensor_spec.h> +#include <vespa/vespalib/tensor/sparse/sparse_tensor.h> +#include <vespa/vespalib/tensor/sparse/sparse_tensor_builder.h> +#include <vespa/vespalib/tensor/dense/dense_tensor_builder.h> +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/tensor_builder.h> +#include <vespa/vespalib/util/benchmark_timer.h> +#include <vespa/vespalib/tensor/default_tensor_engine.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::tensor; + +//----------------------------------------------------------------------------- + +const vespalib::string dot_product_match_expr = "sum(query*document)"; +const vespalib::string dot_product_multiply_expr = "sum(query*document)"; +const vespalib::string model_match_expr = "sum((query*document)*model)"; +const vespalib::string matrix_product_expr = "sum(sum((query+document)*model,x))"; + +//----------------------------------------------------------------------------- + +Value::UP wrap(std::unique_ptr<eval::Tensor> tensor) { + return Value::UP(new TensorValue(std::move(tensor))); +} + +//----------------------------------------------------------------------------- + +struct Params { + std::map<vespalib::string, Value::UP> map; + Params &add(const vespalib::string &name, Value::UP value) { + map.emplace(name, std::move(value)); + return *this; + } + Params &add(const vespalib::string &name, std::unique_ptr<eval::Tensor> value) { + return add(name, wrap(std::move(value))); + } +}; + +void inject_params(const Function &function, const Params ¶ms, + InterpretedFunction::Context &ctx) +{ + ctx.clear_params(); + EXPECT_EQUAL(params.map.size(), function.num_params()); + for (size_t i = 0; i < function.num_params(); ++i) { + auto param = params.map.find(function.param_name(i)); + ASSERT_TRUE(param != params.map.end()); + ctx.add_param(*(param->second)); + } +} + +std::vector<ValueType> extract_param_types(const Function &function, const Params ¶ms) { + std::vector<ValueType> result; + EXPECT_EQUAL(params.map.size(), function.num_params()); + for (size_t i = 0; i < function.num_params(); ++i) { + auto param = params.map.find(function.param_name(i)); + ASSERT_TRUE(param != params.map.end()); + result.push_back(param->second->type()); + } + return result; +} + +double calculate_expression(const vespalib::string &expression, const Params ¶ms) { + const Function function = Function::parse(expression); + const NodeTypes types(function, extract_param_types(function, params)); + const InterpretedFunction interpreted(tensor::DefaultTensorEngine::ref(), function, types); + InterpretedFunction::Context context; + inject_params(function, params, context); + const Value &result = interpreted.eval(context); + EXPECT_TRUE(result.is_double()); + return result.as_double(); +} + +DoubleValue dummy_result(0.0); +const Value &dummy_ranking(InterpretedFunction::Context &) { return dummy_result; } + +double benchmark_expression_us(const vespalib::string &expression, const Params ¶ms) { + const Function function = Function::parse(expression); + const NodeTypes types(function, extract_param_types(function, params)); + const InterpretedFunction interpreted(tensor::DefaultTensorEngine::ref(), function, types); + InterpretedFunction::Context context; + inject_params(function, params, context); + auto ranking = [&](){ interpreted.eval(context); }; + auto baseline = [&](){ dummy_ranking(context); }; + return BenchmarkTimer::benchmark(ranking, baseline, 5.0) * 1000.0 * 1000.0; +} + +//----------------------------------------------------------------------------- + +tensor::Tensor::UP make_tensor(const TensorSpec &spec) { + auto tensor = DefaultTensorEngine::ref().create(spec); + return tensor::Tensor::UP(dynamic_cast<tensor::Tensor*>(tensor.release())); +} + +//----------------------------------------------------------------------------- + +TEST("SMOKETEST - require that dot product benchmark expressions produce expected results") { + Params params; + params.add("query", make_tensor(TensorSpec("tensor(x{})") + .add({{"x","0"}}, 1.0) + .add({{"x","1"}}, 2.0) + .add({{"x","2"}}, 3.0))); + params.add("document", make_tensor(TensorSpec("tensor(x{})") + .add({{"x","0"}}, 2.0) + .add({{"x","1"}}, 2.0) + .add({{"x","2"}}, 2.0))); + EXPECT_EQUAL(calculate_expression(dot_product_match_expr, params), 12.0); + EXPECT_EQUAL(calculate_expression(dot_product_multiply_expr, params), 12.0); +} + +TEST("SMOKETEST - require that model match benchmark expression produces expected result") { + Params params; + params.add("query", make_tensor(TensorSpec("tensor(x{})") + .add({{"x","0"}}, 1.0) + .add({{"x","1"}}, 2.0))); + params.add("document", make_tensor(TensorSpec("tensor(y{})") + .add({{"y","0"}}, 3.0) + .add({{"y","1"}}, 4.0))); + params.add("model", make_tensor(TensorSpec("tensor(x{},y{})") + .add({{"x","0"},{"y","0"}}, 2.0) + .add({{"x","0"},{"y","1"}}, 2.0) + .add({{"x","1"},{"y","0"}}, 2.0) + .add({{"x","1"},{"y","1"}}, 2.0))); + EXPECT_EQUAL(calculate_expression(model_match_expr, params), 42.0); +} + +TEST("SMOKETEST - require that matrix product benchmark expression produces expected result") { + Params params; + params.add("query", make_tensor(TensorSpec("tensor(x{})") + .add({{"x","0"}}, 1.0) + .add({{"x","1"}}, 0.0))); + params.add("document", make_tensor(TensorSpec("tensor(x{})") + .add({{"x","0"}}, 0.0) + .add({{"x","1"}}, 2.0))); + params.add("model", make_tensor(TensorSpec("tensor(x{},y{})") + .add({{"x","0"},{"y","0"}}, 1.0) + .add({{"x","0"},{"y","1"}}, 2.0) + .add({{"x","1"},{"y","0"}}, 3.0) + .add({{"x","1"},{"y","1"}}, 4.0))); + EXPECT_EQUAL(calculate_expression(matrix_product_expr, params), 17.0); +} + +//----------------------------------------------------------------------------- + +struct DummyBuilder : TensorBuilder { + Dimension define_dimension(const vespalib::string &) override { return 0; } + TensorBuilder &add_label(Dimension, const vespalib::string &) override { return *this; } + TensorBuilder &add_cell(double) override { return *this; } + tensor::Tensor::UP build() override { return tensor::Tensor::UP(); } +}; + + +struct DummyDenseTensorBuilder +{ + using Dimension = TensorBuilder::Dimension; + Dimension defineDimension(const vespalib::string &, size_t) { return 0; } + DummyDenseTensorBuilder &addLabel(Dimension, size_t) { return *this; } + DummyDenseTensorBuilder &addCell(double) { return *this; } + tensor::Tensor::UP build() { return tensor::Tensor::UP(); } +}; + +struct DimensionSpec { + vespalib::string name; + size_t count; + size_t offset; + DimensionSpec(const vespalib::string &name_in, size_t count_in, size_t offset_in = 0) + : name(name_in), count(count_in), offset(offset_in) {} +}; + +struct StringBinding { + TensorBuilder::Dimension dimension; + vespalib::string label; + StringBinding(TensorBuilder &builder, const DimensionSpec &dimension_in) + : dimension(builder.define_dimension(dimension_in.name)), + label() + { + } + void set_label(size_t id) { + label = vespalib::make_string("%zu", id); + } + static void add_cell(TensorBuilder &builder, double value) { + builder.add_cell(value); + } + void add_label(TensorBuilder &builder) const { + builder.add_label(dimension, label); + } +}; + +struct NumberBinding { + TensorBuilder::Dimension dimension; + size_t label; + template <typename Builder> + NumberBinding(Builder &builder, const DimensionSpec &dimension_in) + : dimension(builder.defineDimension(dimension_in.name, + dimension_in.offset + + dimension_in.count)), + label() + { + } + void set_label(size_t id) { + label = id; + } + template <typename Builder> + static void add_cell(Builder &builder, double value) { + builder.addCell(value); + } + template <typename Builder> + void add_label(Builder &builder) const { + builder.addLabel(dimension, label); + } +}; + + +template <typename Builder, typename Binding> +void build_tensor(Builder &builder, const std::vector<DimensionSpec> &dimensions, + std::vector<Binding> &bindings) +{ + if (bindings.size() == dimensions.size()) { + for (const auto &bound: bindings) { + bound.add_label(builder); + } + Binding::add_cell(builder, 42); + } else { + const auto &spec = dimensions[bindings.size()]; + bindings.emplace_back(builder, spec); + for (size_t i = 0; i < spec.count; ++i) { + bindings.back().set_label(spec.offset + i); + build_tensor(builder, dimensions, bindings); + } + bindings.pop_back(); + } +} + +template <typename Builder, typename IBuilder, typename Binding> +tensor::Tensor::UP make_tensor_impl(const std::vector<DimensionSpec> &dimensions) { + Builder builder; + std::vector<Binding> bindings; + bindings.reserve(dimensions.size()); + build_tensor<IBuilder, Binding>(builder, dimensions, bindings); + return builder.build(); +} + +//----------------------------------------------------------------------------- + +enum class BuilderType { DUMMY, SPARSE, NUMBERDUMMY, + DENSE }; + +const BuilderType DUMMY = BuilderType::DUMMY; +const BuilderType SPARSE = BuilderType::SPARSE; +const BuilderType NUMBERDUMMY = BuilderType::NUMBERDUMMY; +const BuilderType DENSE = BuilderType::DENSE; + +const char *name(BuilderType type) { + switch (type) { + case BuilderType::DUMMY: return " dummy"; + case BuilderType::SPARSE: return "sparse"; + case BuilderType::NUMBERDUMMY: return "numberdummy"; + case BuilderType::DENSE: return "dense"; + } + abort(); +} + +tensor::Tensor::UP make_tensor(BuilderType type, const std::vector<DimensionSpec> &dimensions) { + switch (type) { + case BuilderType::DUMMY: + return make_tensor_impl<DummyBuilder, TensorBuilder, StringBinding> + (dimensions); + case BuilderType::SPARSE: + return make_tensor_impl<SparseTensorBuilder, TensorBuilder, + StringBinding>(dimensions); + case BuilderType::NUMBERDUMMY: + return make_tensor_impl<DummyDenseTensorBuilder, + DummyDenseTensorBuilder, NumberBinding>(dimensions); + case BuilderType::DENSE: + return make_tensor_impl<DenseTensorBuilder, DenseTensorBuilder, + NumberBinding>(dimensions); + } + abort(); +} + +//----------------------------------------------------------------------------- + +struct BuildTask { + BuilderType type; + std::vector<DimensionSpec> spec; + BuildTask(BuilderType type_in, const std::vector<DimensionSpec> &spec_in) : type(type_in), spec(spec_in) {} + void operator()() { tensor::Tensor::UP tensor = make_tensor(type, spec); } +}; + +double benchmark_build_us(BuilderType type, const std::vector<DimensionSpec> &spec) { + BuildTask build_task(type, spec); + BuildTask dummy_task((type == DENSE) ? NUMBERDUMMY : DUMMY, spec); + return BenchmarkTimer::benchmark(build_task, dummy_task, 5.0) * 1000.0 * 1000.0; +} + +TEST("benchmark create/destroy time for 1d tensors") { + for (size_t size: {5, 10, 25, 50, 100, 250, 500}) { + for (auto type: {SPARSE, DENSE}) { + double time_us = benchmark_build_us(type, {DimensionSpec("x", size)}); + fprintf(stderr, "-- 1d tensor create/destroy (%s) with size %zu: %g us\n", name(type), size, time_us); + } + } +} + +TEST("benchmark create/destroy time for 2d tensors") { + for (size_t size: {5, 10, 25, 50, 100}) { + for (auto type: {SPARSE, DENSE}) { + double time_us = benchmark_build_us(type, {DimensionSpec("x", size), DimensionSpec("y", size)}); + fprintf(stderr, "-- 2d tensor create/destroy (%s) with size %zux%zu: %g us\n", name(type), size, size, time_us); + } + } +} + +//----------------------------------------------------------------------------- + +TEST("benchmark dot product using match") { + for (size_t size: {10, 25, 50, 100, 250}) { + for (auto type: {SPARSE, DENSE}) { + Params params; + params.add("query", make_tensor(type, {DimensionSpec("x", size)})); + params.add("document", make_tensor(type, {DimensionSpec("x", size)})); + double time_us = benchmark_expression_us(dot_product_match_expr, params); + fprintf(stderr, "-- dot product (%s) using match %zu vs %zu: %g us\n", name(type), size, size, time_us); + } + } +} + +TEST("benchmark dot product using multiply") { + for (size_t size: {10, 25, 50, 100, 250}) { + for (auto type: {SPARSE, DENSE}) { + Params params; + params.add("query", make_tensor(type, {DimensionSpec("x", size)})); + params.add("document", make_tensor(type, {DimensionSpec("x", size)})); + double time_us = benchmark_expression_us(dot_product_multiply_expr, params); + fprintf(stderr, "-- dot product (%s) using multiply %zu vs %zu: %g us\n", name(type), size, size, time_us); + } + } +} + +TEST("benchmark model match") { + for (size_t model_size: {25, 50, 100}) { + for (size_t vector_size: {5, 10, 25, 50, 100}) { + if (vector_size <= model_size) { + for (auto type: {SPARSE}) { + Params params; + params.add("query", make_tensor(type, {DimensionSpec("x", vector_size)})); + params.add("document", make_tensor(type, {DimensionSpec("y", vector_size)})); + params.add("model", make_tensor(type, {DimensionSpec("x", model_size), DimensionSpec("y", model_size)})); + double time_us = benchmark_expression_us(model_match_expr, params); + fprintf(stderr, "-- model match (%s) %zu * %zu vs %zux%zu: %g us\n", name(type), vector_size, vector_size, model_size, model_size, time_us); + } + } + } + } +} + +TEST("benchmark matrix product") { + for (size_t vector_size: {5, 10, 25, 50}) { + size_t matrix_size = vector_size * 2; + for (auto type: {SPARSE, DENSE}) { + Params params; + params.add("query", make_tensor(type, {DimensionSpec("x", matrix_size)})); + params.add("document", make_tensor(type, {DimensionSpec("x", matrix_size)})); + params.add("model", make_tensor(type, {DimensionSpec("x", matrix_size), DimensionSpec("y", matrix_size)})); + double time_us = benchmark_expression_us(matrix_product_expr, params); + fprintf(stderr, "-- matrix product (%s) %zu + %zu vs %zux%zu: %g us\n", name(type), vector_size, vector_size, matrix_size, matrix_size, time_us); + } + } +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/tensor_serialization/.gitignore b/eval/src/tests/tensor/tensor_serialization/.gitignore new file mode 100644 index 00000000000..f8525561c6b --- /dev/null +++ b/eval/src/tests/tensor/tensor_serialization/.gitignore @@ -0,0 +1 @@ +vespalib_tensor_serialization_test_app diff --git a/eval/src/tests/tensor/tensor_serialization/CMakeLists.txt b/eval/src/tests/tensor/tensor_serialization/CMakeLists.txt new file mode 100644 index 00000000000..2fdf47d4738 --- /dev/null +++ b/eval/src/tests/tensor/tensor_serialization/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_tensor_serialization_test_app TEST + SOURCES + tensor_serialization_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test(NAME vespalib_tensor_serialization_test_app COMMAND vespalib_tensor_serialization_test_app) diff --git a/eval/src/tests/tensor/tensor_serialization/FILES b/eval/src/tests/tensor/tensor_serialization/FILES new file mode 100644 index 00000000000..882dd368f5c --- /dev/null +++ b/eval/src/tests/tensor/tensor_serialization/FILES @@ -0,0 +1 @@ +tensor_serialization_test.cpp diff --git a/eval/src/tests/tensor/tensor_serialization/tensor_serialization_test.cpp b/eval/src/tests/tensor/tensor_serialization/tensor_serialization_test.cpp new file mode 100644 index 00000000000..95d6a45f196 --- /dev/null +++ b/eval/src/tests/tensor/tensor_serialization/tensor_serialization_test.cpp @@ -0,0 +1,255 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/tensor/sparse/sparse_tensor.h> +#include <vespa/vespalib/tensor/sparse/sparse_tensor_builder.h> +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/tensor/default_tensor.h> +#include <vespa/vespalib/tensor/tensor_factory.h> +#include <vespa/vespalib/tensor/serialization/typed_binary_format.h> +#include <vespa/vespalib/tensor/serialization/sparse_binary_format.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/objects/hexdump.h> +#include <ostream> + +using namespace vespalib::tensor; +using vespalib::nbostream; +using ExpBuffer = std::vector<uint8_t>; + +namespace std { + +bool operator==(const std::vector<uint8_t> &exp, const nbostream &stream) +{ + return ((exp.size() == stream.size()) && + (memcmp(&exp[0], stream.peek(), exp.size()) == 0)); +} + +std::ostream &operator<<(std::ostream &out, const std::vector<uint8_t> &rhs) +{ + out << vespalib::HexDump(&rhs[0], rhs.size()); + return out; +} + +} + +namespace vespalib { + +namespace tensor { + +static bool operator==(const Tensor &lhs, const Tensor &rhs) +{ + return lhs.equals(rhs); +} + +} +} + +template <class BuilderType> +void +checkDeserialize(vespalib::nbostream &stream, const Tensor &rhs) +{ + (void) stream; + (void) rhs; +} + +template <> +void +checkDeserialize<DefaultTensor::builder>(nbostream &stream, const Tensor &rhs) +{ + nbostream wrapStream(stream.peek(), stream.size()); + auto chk = TypedBinaryFormat::deserialize(wrapStream); + EXPECT_EQUAL(0u, wrapStream.size()); + EXPECT_EQUAL(*chk, rhs); +} + +template <typename BuilderType> +struct Fixture +{ + BuilderType _builder; + Fixture() : _builder() {} + + Tensor::UP createTensor(const TensorCells &cells) { + return vespalib::tensor::TensorFactory::create(cells, _builder); + } + Tensor::UP createTensor(const TensorCells &cells, const TensorDimensions &dimensions) { + return TensorFactory::create(cells, dimensions, _builder); + } + + void serialize(nbostream &stream, const Tensor &tensor) { + TypedBinaryFormat::serialize(stream, tensor); + } + Tensor::UP deserialize(nbostream &stream) { + BuilderType builder; + nbostream wrapStream(stream.peek(), stream.size()); + auto formatId = wrapStream.getInt1_4Bytes(); + ASSERT_EQUAL(formatId, 1); // sparse format + SparseBinaryFormat::deserialize(wrapStream, builder); + EXPECT_TRUE(wrapStream.size() == 0); + auto ret = builder.build(); + checkDeserialize<BuilderType>(stream, *ret); + stream.adjustReadPos(stream.size()); + return ret; + } + void assertSerialized(const ExpBuffer &exp, const TensorCells &rhs, + const TensorDimensions &rhsDimensions) { + Tensor::UP rhsTensor(createTensor(rhs, rhsDimensions)); + nbostream rhsStream; + serialize(rhsStream, *rhsTensor); + EXPECT_EQUAL(exp, rhsStream); + auto rhs2 = deserialize(rhsStream); + EXPECT_EQUAL(*rhs2, *rhsTensor); + } +}; + +using SparseFixture = Fixture<SparseTensorBuilder>; + + +template <typename FixtureType> +void +testTensorSerialization(FixtureType &f) +{ + TEST_DO(f.assertSerialized({ 0x01, 0x00, 0x00 }, {}, {})); + TEST_DO(f.assertSerialized({ 0x01, 0x01, 0x01, 0x78, 0x00 }, + {}, { "x" })); + TEST_DO(f.assertSerialized({ 0x01, 0x02, 0x01, 0x78, 0x01, 0x79, 0x00 }, + {}, { "x", "y" })); + TEST_DO(f.assertSerialized({ 0x01, 0x01, 0x01, 0x78, 0x01, 0x01, 0x31, 0x40, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { {{{"x","1"}}, 3} }, { "x" })); + TEST_DO(f.assertSerialized({ 0x01, 0x02, 0x01, 0x78, 0x01, 0x79, 0x01, 0x00, + 0x00, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00 }, + { {{}, 3} }, { "x", "y"})); + TEST_DO(f.assertSerialized({ 0x01, 0x02, 0x01, 0x78, 0x01, 0x79, 0x01, 0x01, + 0x31, 0x00, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00 }, + { {{{"x","1"}}, 3} }, { "x", "y" })); + TEST_DO(f.assertSerialized({ 0x01, 0x02, 0x01, 0x78, 0x01, 0x79, 0x01, 0x00, + 0x01, 0x33, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00 }, + { {{{"y","3"}}, 3} }, { "x", "y" })); + TEST_DO(f.assertSerialized({ 0x01, 0x02, 0x01, 0x78, 0x01, 0x79, 0x01, 0x01, + 0x32, 0x01, 0x34, 0x40, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00 }, + { {{{"x","2"}, {"y", "4"}}, 3} }, { "x", "y" })); + TEST_DO(f.assertSerialized({ 0x01, 0x02, 0x01, 0x78, 0x01, 0x79, + 0x01, 0x01, 0x31, 0x00, 0x40, 0x08, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { {{{"x","1"}}, 3} }, {"x", "y"})); +} + +TEST_F("test tensor serialization for SparseTensor", SparseFixture) +{ + testTensorSerialization(f); +} + + +struct DenseFixture +{ + Tensor::UP createTensor(const DenseTensorCells &cells) { + return TensorFactory::createDense(cells); + } + + void serialize(nbostream &stream, const Tensor &tensor) { + TypedBinaryFormat::serialize(stream, tensor); + } + + Tensor::UP deserialize(nbostream &stream) { + nbostream wrapStream(stream.peek(), stream.size()); + auto ret = TypedBinaryFormat::deserialize(wrapStream); + EXPECT_TRUE(wrapStream.size() == 0); + stream.adjustReadPos(stream.size()); + return ret; + } + void assertSerialized(const ExpBuffer &exp, const DenseTensorCells &rhs) { + Tensor::UP rhsTensor(createTensor(rhs)); + nbostream rhsStream; + serialize(rhsStream, *rhsTensor); + EXPECT_EQUAL(exp, rhsStream); + auto rhs2 = deserialize(rhsStream); + EXPECT_EQUAL(*rhs2, *rhsTensor); + } +}; + + +TEST_F("test tensor serialization for DenseTensor", DenseFixture) +{ + TEST_DO(f.assertSerialized({ 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00}, + {})); + TEST_DO(f.assertSerialized({ 0x02, 0x01, 0x01, 0x78, 0x01, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00}, + { {{{"x",0}}, 0} })); + TEST_DO(f.assertSerialized({ 0x02, 0x02, 0x01, 0x78, 0x01, + 0x01, 0x79, 0x01, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }, + { {{{"x",0},{"y", 0}}, 0} })); + TEST_DO(f.assertSerialized({ 0x02, 0x01, 0x01, 0x78, 0x02, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x40, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }, + { {{{"x",1}}, 3} })); + TEST_DO(f.assertSerialized({ 0x02, 0x02, 0x01, 0x78, 0x01, + 0x01, 0x79, 0x01, + 0x40, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }, + { {{{"x",0},{"y",0}}, 3} })); + TEST_DO(f.assertSerialized({ 0x02, 0x02, 0x01, 0x78, 0x02, + 0x01, 0x79, 0x01, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x40, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }, + { {{{"x",1},{"y",0}}, 3} })); + TEST_DO(f.assertSerialized({ 0x02, 0x02, 0x01, 0x78, 0x01, + 0x01, 0x79, 0x04, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x40, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }, + { {{{"x",0},{"y",3}}, 3} })); + TEST_DO(f.assertSerialized({ 0x02, 0x02, 0x01, 0x78, 0x03, + 0x01, 0x79, 0x05, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x40, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }, + { {{{"x",2}, {"y",4}}, 3} })); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/tensor/tensor_slime_serialization/.gitignore b/eval/src/tests/tensor/tensor_slime_serialization/.gitignore new file mode 100644 index 00000000000..9cb3b664d58 --- /dev/null +++ b/eval/src/tests/tensor/tensor_slime_serialization/.gitignore @@ -0,0 +1 @@ +vespalib_tensor_slime_serialization_test_app diff --git a/eval/src/tests/tensor/tensor_slime_serialization/CMakeLists.txt b/eval/src/tests/tensor/tensor_slime_serialization/CMakeLists.txt new file mode 100644 index 00000000000..a0323928fd3 --- /dev/null +++ b/eval/src/tests/tensor/tensor_slime_serialization/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_tensor_slime_serialization_test_app TEST + SOURCES + tensor_slime_serialization_test.cpp + DEPENDS + vespalib + vespalib_vespalib_tensor +) +vespa_add_test(NAME vespalib_tensor_slime_serialization_test_app COMMAND vespalib_tensor_slime_serialization_test_app) diff --git a/eval/src/tests/tensor/tensor_slime_serialization/FILES b/eval/src/tests/tensor/tensor_slime_serialization/FILES new file mode 100644 index 00000000000..874f951beb5 --- /dev/null +++ b/eval/src/tests/tensor/tensor_slime_serialization/FILES @@ -0,0 +1 @@ +tensor_slime_serialization_test.cpp diff --git a/eval/src/tests/tensor/tensor_slime_serialization/tensor_slime_serialization_test.cpp b/eval/src/tests/tensor/tensor_slime_serialization/tensor_slime_serialization_test.cpp new file mode 100644 index 00000000000..f3005a21730 --- /dev/null +++ b/eval/src/tests/tensor/tensor_slime_serialization/tensor_slime_serialization_test.cpp @@ -0,0 +1,185 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/tensor/sparse/sparse_tensor.h> +#include <vespa/vespalib/tensor/sparse/sparse_tensor_builder.h> +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/tensor/default_tensor.h> +#include <vespa/vespalib/tensor/tensor_factory.h> +#include <vespa/vespalib/tensor/serialization/typed_binary_format.h> +#include <vespa/vespalib/tensor/serialization/slime_binary_format.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <iostream> + +using namespace vespalib::tensor; + +template <typename BuilderType> +struct Fixture +{ + BuilderType _builder; + Fixture() : _builder() {} + + Tensor::UP createTensor(const TensorCells &cells) { + return vespalib::tensor::TensorFactory::create(cells, _builder); + } + Tensor::UP createTensor(const TensorCells &cells, const TensorDimensions &dimensions) { + return TensorFactory::create(cells, dimensions, _builder); + } + + static inline uint32_t getTensorTypeId(); + + void assertSerialized(const vespalib::string &exp, const TensorCells &rhs, + const TensorDimensions &rhsDimensions) { + Tensor::UP rhsTensor(createTensor(rhs, rhsDimensions)); + auto slime = SlimeBinaryFormat::serialize(*rhsTensor); + vespalib::slime::Memory memory_exp(exp); + vespalib::Slime expSlime; + size_t used = vespalib::slime::JsonFormat::decode(memory_exp, expSlime); + EXPECT_EQUAL(used, memory_exp.size); + EXPECT_EQUAL(expSlime, *slime); + } +}; + +template <> +uint32_t +Fixture<SparseTensorBuilder>::getTensorTypeId() { return 2u; } + + +using SparseFixture = Fixture<SparseTensorBuilder>; + + +namespace { +vespalib::string twoCellsJson[3] = +{ + "{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { y:'3'}, value: 4.0 }," + "{ address: { x:'1'}, value: 3.0 }" + "] }", + "{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { x:'1'}, value: 3.0 }," + "{ address: { y:'3'}, value: 4.0 }" + "] }", + "{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { x:'1'}, value: 3.0 }," + "{ address: { y:'3'}, value: 4.0 }" + "] }", +}; +} + + +template <typename FixtureType> +void +testTensorSlimeSerialization(FixtureType &f) +{ + TEST_DO(f.assertSerialized("{ dimensions: [], cells: [] }", {}, {})); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x' ], cells: [] }", + {}, { "x" })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x', 'y' ], cells: [] }", + {}, { "x", "y" })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x' ]," + "cells: [" + "{ address: { x: '1' }, value: 3.0 }" + "] }", + { {{{"x","1"}}, 3} }, { "x" })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { }, value: 3.0 }" + "] }", + { {{}, 3} }, { "x", "y"})); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { x: '1' }, value: 3.0 }" + "] }", + { {{{"x","1"}}, 3} }, { "x", "y" })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { y: '3' }, value: 3.0 }" + "] }", + { {{{"y","3"}}, 3} }, { "x", "y" })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { x:'2', y:'4'}, value: 3.0 }" + "] }", + { {{{"x","2"}, {"y", "4"}}, 3} }, { "x", "y" })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { x:'1'}, value: 3.0 }" + "] }", + { {{{"x","1"}}, 3} }, {"x", "y"})); + TEST_DO(f.assertSerialized(twoCellsJson[FixtureType::getTensorTypeId()], + { {{{"x","1"}}, 3}, {{{"y","3"}}, 4} }, + {"x", "y"})); +} + +TEST_F("test tensor slime serialization for SparseTensor", SparseFixture) +{ + testTensorSlimeSerialization(f); +} + + +struct DenseFixture +{ + DenseFixture() {} + + Tensor::UP createTensor(const DenseTensorCells &cells) { + return vespalib::tensor::TensorFactory::createDense(cells); + } + + void assertSerialized(const vespalib::string &exp, + const DenseTensorCells &rhs) { + Tensor::UP rhsTensor(createTensor(rhs)); + auto slime = SlimeBinaryFormat::serialize(*rhsTensor); + vespalib::slime::Memory memory_exp(exp); + vespalib::Slime expSlime; + size_t used = vespalib::slime::JsonFormat::decode(memory_exp, expSlime); + EXPECT_EQUAL(used, memory_exp.size); + EXPECT_EQUAL(expSlime, *slime); + } +}; + + +TEST_F("test tensor slime serialization for DenseTensor", DenseFixture) +{ + TEST_DO(f.assertSerialized("{ dimensions: [], cells: [" + "{ address: { }, value: 0.0 }" + "] }", {})); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x' ], cells: [" + "{ address: { x: '0' }, value: 0.0 }" + "] }", + { {{{"x",0}}, 0} })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x', 'y' ], cells: [" + "{ address: { x: '0', y: '0' }, value: 0.0 }" + "] }", + { {{{"x",0},{"y",0}}, 0} })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x' ]," + "cells: [" + "{ address: { x: '0' }, value: 0.0 }," + "{ address: { x: '1' }, value: 3.0 }" + "] }", + { {{{"x",1}}, 3} })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { x: '0', y: '0' }, value: 3.0 }" + "] }", + { {{{"x",0},{"y",0}}, 3} })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { x: '0', y: '0' }, value: 0.0 }," + "{ address: { x: '1', y: '0' }, value: 3.0 }" + "] }", + { {{{"x",1},{"y", 0}}, 3} })); + TEST_DO(f.assertSerialized("{ dimensions: [ 'x', 'y' ]," + " cells: [" + "{ address: { x: '0', y: '0' }, value: 0.0 }," + "{ address: { x: '0', y: '1' }, value: 0.0 }," + "{ address: { x: '0', y: '2' }, value: 0.0 }," + "{ address: { x: '0', y: '3' }, value: 3.0 }" + "] }", + { {{{"x",0},{"y",3}}, 3} })); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt new file mode 100644 index 00000000000..2e28f3252bd --- /dev/null +++ b/eval/src/vespa/eval/eval/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(vespalib_vespalib_eval OBJECT + SOURCES + basic_nodes.cpp + call_nodes.cpp + delete_node.cpp + function.cpp + gbdt.cpp + interpreted_function.cpp + key_gen.cpp + node_types.cpp + operation.cpp + operator_nodes.cpp + simple_tensor.cpp + simple_tensor_engine.cpp + tensor.cpp + tensor_engine.cpp + tensor_function.cpp + tensor_nodes.cpp + tensor_spec.cpp + value.cpp + value_type.cpp + value_type_spec.cpp + vm_forest.cpp +) diff --git a/eval/src/vespa/eval/eval/basic_nodes.cpp b/eval/src/vespa/eval/eval/basic_nodes.cpp new file mode 100644 index 00000000000..6d1a18dff03 --- /dev/null +++ b/eval/src/vespa/eval/eval/basic_nodes.cpp @@ -0,0 +1,129 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "basic_nodes.h" +#include "node_traverser.h" +#include "node_visitor.h" +#include "interpreted_function.h" +#include "simple_tensor_engine.h" + +namespace vespalib { +namespace eval { +namespace nodes { + +namespace { + +struct Frame { + const Node &node; + size_t child_idx; + explicit Frame(const Node &node_in) : node(node_in), child_idx(0) {} + bool has_next_child() const { return (child_idx < node.num_children()); } + const Node &next_child() { return node.get_child(child_idx++); } +}; + +} // namespace vespalib::eval::nodes::<unnamed> + +double +Node::get_const_value() const { + assert(is_const()); + InterpretedFunction function(SimpleTensorEngine::ref(), *this, 0, NodeTypes()); + InterpretedFunction::Context ctx; + return function.eval(ctx).as_double(); +} + +void +Node::traverse(NodeTraverser &traverser) const +{ + if (!traverser.open(*this)) { + return; + } + std::vector<Frame> stack({Frame(*this)}); + while (!stack.empty()) { + if (stack.back().has_next_child()) { + const Node &next_child = stack.back().next_child(); + if (traverser.open(next_child)) { + stack.emplace_back(next_child); + } + } else { + traverser.close(stack.back().node); + stack.pop_back(); + } + } +} + +void Number::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void Symbol::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void String::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void Array ::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void Neg ::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void Not ::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void If ::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void Let ::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void Error ::accept(NodeVisitor &visitor) const { visitor.visit(*this); } + +vespalib::string +String::dump(DumpContext &) const +{ + vespalib::string str; + str.push_back('"'); + for (uint32_t i = 0; i < _value.size(); ++i) { + char c = _value[i]; + switch (c) { + case '\\': + str.append("\\\\"); + break; + case '"': + str.append("\\\""); + break; + case '\t': + str.append("\\t"); + break; + case '\n': + str.append("\\n"); + break; + case '\r': + str.append("\\r"); + break; + case '\f': + str.append("\\f"); + break; + default: + if (static_cast<unsigned char>(c) >= 32 && + static_cast<unsigned char>(c) <= 126) + { + str.push_back(c); + } else { + const char *lookup = "0123456789abcdef"; + str.append("\\x"); + str.push_back(lookup[(c >> 4) & 0xf]); + str.push_back(lookup[c & 0xf]); + } + } + } + str.push_back('"'); + return str; +} + +If::If(Node_UP cond_in, Node_UP true_expr_in, Node_UP false_expr_in, double p_true_in) + : _cond(std::move(cond_in)), + _true_expr(std::move(true_expr_in)), + _false_expr(std::move(false_expr_in)), + _p_true(p_true_in), + _is_tree(false) +{ + auto less = as<Less>(cond()); + auto in = as<In>(cond()); + bool true_is_subtree = (true_expr().is_tree() || true_expr().is_const()); + bool false_is_subtree = (false_expr().is_tree() || false_expr().is_const()); + if (true_is_subtree && false_is_subtree) { + if (less) { + _is_tree = (less->lhs().is_param() && less->rhs().is_const()); + } else if (in) { + _is_tree = (in->lhs().is_param() && in->rhs().is_const()); + } + } +} + +} // namespace vespalib::eval::nodes +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/basic_nodes.h b/eval/src/vespa/eval/eval/basic_nodes.h new file mode 100644 index 00000000000..2887856a66d --- /dev/null +++ b/eval/src/vespa/eval/eval/basic_nodes.h @@ -0,0 +1,344 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/string_hash.h> +#include <memory> +#include <map> +#include <vector> +#include <cassert> + +namespace vespalib { +namespace eval { + +namespace nodes { class Node; } + +struct NodeTraverser; +struct NodeVisitor; + +/** + * Simple interface for handing the ownership of an AST Node from one + * actor to another. + **/ +struct NodeHandler { + virtual void handle(std::unique_ptr<nodes::Node> node) = 0; + virtual ~NodeHandler() {} +}; + +namespace nodes { + +/** + * Context object used when dumping an AST to text to keep track of + * the names of bound values. + **/ +struct DumpContext { + const std::vector<vespalib::string> ¶m_names; + std::vector<vespalib::string> let_names; + DumpContext(const std::vector<vespalib::string> ¶m_names_in) + : param_names(param_names_in), let_names() {} +}; + +/** + * Abstract base class of all nodes in an AST. Each node in an AST has + * exclusive ownership of its children. + **/ +struct Node { + virtual bool is_forest() const { return false; } + virtual bool is_tree() const { return false; } + virtual bool is_const() const { return false; } + virtual bool is_param() const { return false; } + virtual double get_const_value() const; + void traverse(NodeTraverser &traverser) const; + virtual vespalib::string dump(DumpContext &ctx) const = 0; + virtual void accept(NodeVisitor &visitor) const = 0; + virtual size_t num_children() const = 0; + virtual const Node &get_child(size_t idx) const = 0; + virtual void detach_children(NodeHandler &handler) = 0; + bool is_leaf() const { return (num_children() == 0); } + virtual ~Node() {} +}; +typedef std::unique_ptr<Node> Node_UP; + +/** + * Simple typecasting utility. Intended usage: + * <pre> + * auto number = as<Number>(node); + * if (number) { + * do_stuff(number->value()); + * } + * </pre> + **/ +template <typename T> +const T *as(const Node &node) { return dynamic_cast<const T *>(&node); } + +/** + * AST leaf nodes should inherit from this class to easy their API + * burden by not having to care about the concept of children. + **/ +struct Leaf : public Node { + size_t num_children() const override { return 0; } + const Node &get_child(size_t) const override { + abort(); + } + void detach_children(NodeHandler &) override {} +}; + +/** + * Helper class used to insert commas on the appropriate places in + * comma-separated textual lists. + **/ +struct CommaTracker { + bool first; + CommaTracker() : first(true) {} + void maybe_comma(vespalib::string &dst) { + if (first) { + first = false; + } else { + dst.push_back(','); + } + } +}; + +class Number : public Leaf { +private: + double _value; +public: + Number(double value_in) : _value(value_in) {} + virtual bool is_const() const override { return true; } + virtual double get_const_value() const override { return value(); } + double value() const { return _value; } + virtual vespalib::string dump(DumpContext &) const { + return make_string("%g", _value); + } + virtual void accept(NodeVisitor &visitor) const override; +}; + +class Symbol : public Leaf { +private: + int _id; +public: + static const int UNDEF = std::numeric_limits<int>::max(); + explicit Symbol(int id_in) : _id(id_in) {} + int id() const { return _id; } + virtual bool is_param() const override { + return (_id >= 0); + } + virtual vespalib::string dump(DumpContext &ctx) const { + if (_id >= 0) { // param value + assert(size_t(_id) < ctx.param_names.size()); + return ctx.param_names[_id]; + } else { // let binding + int let_offset = -(_id + 1); + assert(size_t(let_offset) < ctx.let_names.size()); + return ctx.let_names[let_offset]; + } + } + virtual void accept(NodeVisitor &visitor) const override; +}; + +class String : public Leaf { +private: + vespalib::string _value; +public: + String(const vespalib::string &value_in) : _value(value_in) {} + virtual bool is_const() const override { return true; } + virtual double get_const_value() const override { return hash(); } + const vespalib::string value() const { return _value; } + uint32_t hash() const { return hash_code(_value.data(), _value.size()); } + virtual vespalib::string dump(DumpContext &ctx) const; + virtual void accept(NodeVisitor &visitor) const override; +}; + +class Array : public Node { +private: + std::vector<Node_UP> _nodes; + bool _is_const; +public: + Array() : _nodes(), _is_const(false) {} + virtual bool is_const() const override { return _is_const; } + size_t size() const { return _nodes.size(); } + const Node &get(size_t i) const { return *_nodes[i]; } + virtual size_t num_children() const override { return size(); } + virtual const Node &get_child(size_t idx) const override { return get(idx); } + virtual void detach_children(NodeHandler &handler) override { + for (size_t i = 0; i < _nodes.size(); ++i) { + handler.handle(std::move(_nodes[i])); + } + _nodes.clear(); + } + void add(Node_UP node) { + if (_nodes.empty()) { + _is_const = node->is_const(); + } else { + _is_const = (_is_const && node->is_const()); + } + _nodes.push_back(std::move(node)); + } + virtual vespalib::string dump(DumpContext &ctx) const { + vespalib::string str; + str += "["; + CommaTracker node_list; + for (const auto &node: _nodes) { + node_list.maybe_comma(str); + str += node->dump(ctx); + } + str += "]"; + return str; + } + virtual void accept(NodeVisitor &visitor) const override; +}; + +class Neg : public Node { +private: + Node_UP _child; + bool _is_const; +public: + Neg(Node_UP child_in) : _child(std::move(child_in)), _is_const(_child->is_const()) {} + virtual bool is_const() const override { return _is_const; } + const Node &child() const { return *_child; } + virtual size_t num_children() const override { return _child ? 1 : 0; } + virtual const Node &get_child(size_t idx) const override { + (void) idx; + assert(idx == 0); + return child(); + } + virtual void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_child)); + } + virtual vespalib::string dump(DumpContext &ctx) const { + vespalib::string str; + str += "(-"; + str += _child->dump(ctx); + str += ")"; + return str; + } + virtual void accept(NodeVisitor &visitor) const override; +}; + +class Not : public Node { +private: + Node_UP _child; + bool _is_const; +public: + Not(Node_UP child_in) : _child(std::move(child_in)), _is_const(_child->is_const()) {} + virtual bool is_const() const override { return _is_const; } + const Node &child() const { return *_child; } + virtual size_t num_children() const override { return _child ? 1 : 0; } + virtual const Node &get_child(size_t idx) const override { + (void) idx; + assert(idx == 0); + return child(); + } + virtual void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_child)); + } + virtual vespalib::string dump(DumpContext &ctx) const { + vespalib::string str; + str += "(!"; + str += _child->dump(ctx); + str += ")"; + return str; + } + virtual void accept(NodeVisitor &visitor) const override; +}; + +class If : public Node { +private: + Node_UP _cond; + Node_UP _true_expr; + Node_UP _false_expr; + double _p_true; + bool _is_tree; +public: + If(Node_UP cond_in, Node_UP true_expr_in, Node_UP false_expr_in, double p_true_in); + const Node &cond() const { return *_cond; } + const Node &true_expr() const { return *_true_expr; } + const Node &false_expr() const { return *_false_expr; } + double p_true() const { return _p_true; } + virtual bool is_tree() const override { return _is_tree; } + virtual size_t num_children() const override { + return (_cond && _true_expr && _false_expr) ? 3 : 0; + } + virtual const Node &get_child(size_t idx) const override { + assert(idx < 3); + if (idx == 0) { + return cond(); + } else if (idx == 1) { + return true_expr(); + } else { + return false_expr(); + } + } + virtual void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_cond)); + handler.handle(std::move(_true_expr)); + handler.handle(std::move(_false_expr)); + } + virtual vespalib::string dump(DumpContext &ctx) const { + vespalib::string str; + str += "if("; + str += _cond->dump(ctx); + str += ","; + str += _true_expr->dump(ctx); + str += ","; + str += _false_expr->dump(ctx); + if (_p_true != 0.5) { + str += make_string(",%g", _p_true); + } + str += ")"; + return str; + } + virtual void accept(NodeVisitor &visitor) const override; +}; + +class Let : public Node { +private: + vespalib::string _name; + Node_UP _value; + Node_UP _expr; +public: + Let(const vespalib::string &name_in, Node_UP value_in, Node_UP expr_in) + : _name(name_in), _value(std::move(value_in)), _expr(std::move(expr_in)) {} + const vespalib::string &name() const { return _name; } + const Node &value() const { return *_value; } + const Node &expr() const { return *_expr; } + virtual size_t num_children() const override { return (_value && _expr) ? 2 : 0; } + virtual const Node &get_child(size_t idx) const override { + assert(idx < 2); + return (idx == 0) ? value() : expr(); + } + virtual void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_value)); + handler.handle(std::move(_expr)); + } + virtual vespalib::string dump(DumpContext &ctx) const { + vespalib::string str; + str += "let("; + str += _name; + str += ","; + str += _value->dump(ctx); + str += ","; + ctx.let_names.push_back(_name); + str += _expr->dump(ctx); + ctx.let_names.pop_back(); + str += ")"; + return str; + } + virtual void accept(NodeVisitor &visitor) const override; +}; + +class Error : public Leaf { +private: + vespalib::string _message; +public: + Error(const vespalib::string &message_in) : _message(message_in) {} + const vespalib::string &message() const { return _message; } + virtual vespalib::string dump(DumpContext &) const { return _message; } + virtual void accept(NodeVisitor &visitor) const override; +}; + +} // namespace vespalib::eval::nodes +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/call_nodes.cpp b/eval/src/vespa/eval/eval/call_nodes.cpp new file mode 100644 index 00000000000..8260ede54a0 --- /dev/null +++ b/eval/src/vespa/eval/eval/call_nodes.cpp @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "call_nodes.h" +#include "node_visitor.h" + +namespace vespalib { +namespace eval { +namespace nodes { + +template <typename T> void CallHelper<T>::accept(NodeVisitor &visitor) const { + visitor.visit(static_cast<const T&>(*this)); +} + +CallRepo CallRepo::_instance; +CallRepo::CallRepo() : _map() { + add(nodes::Cos()); + add(nodes::Sin()); + add(nodes::Tan()); + add(nodes::Cosh()); + add(nodes::Sinh()); + add(nodes::Tanh()); + add(nodes::Acos()); + add(nodes::Asin()); + add(nodes::Atan()); + add(nodes::Exp()); + add(nodes::Log10()); + add(nodes::Log()); + add(nodes::Sqrt()); + add(nodes::Ceil()); + add(nodes::Fabs()); + add(nodes::Floor()); + add(nodes::Atan2()); + add(nodes::Ldexp()); + add(nodes::Pow2()); + add(nodes::Fmod()); + add(nodes::Min()); + add(nodes::Max()); + add(nodes::IsNan()); + add(nodes::Relu()); + add(nodes::Sigmoid()); +} + +} // namespace vespalib::eval::nodes +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/call_nodes.h b/eval/src/vespa/eval/eval/call_nodes.h new file mode 100644 index 00000000000..70996e2f629 --- /dev/null +++ b/eval/src/vespa/eval/eval/call_nodes.h @@ -0,0 +1,144 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/stringfmt.h> +#include "basic_nodes.h" +#include <map> +#include <cmath> + +namespace vespalib { +namespace eval { + +struct NodeVisitor; + +namespace nodes { + +/** + * Common superclass for AST nodes describing calls to built-in + * functions. A call has a (function) name and a pre-defined number of + * parameters that must be matched by the parsed expression. + **/ +class Call : public Node { +private: + vespalib::string _name; + size_t _num_params; + std::vector<Node_UP> _args; + bool _is_const; +public: + Call(const vespalib::string &name_in, size_t num_params_in) + : _name(name_in), _num_params(num_params_in), _is_const(false) {} + virtual bool is_const() const override { return _is_const; } + const vespalib::string &name() const { return _name; } + size_t num_params() const { return _num_params; } + size_t num_args() const { return _args.size(); } + const Node &arg(size_t i) const { return *_args[i]; } + virtual size_t num_children() const override { return num_args(); } + virtual const Node &get_child(size_t idx) const override { return arg(idx); } + virtual void detach_children(NodeHandler &handler) override { + for (size_t i = 0; i < _args.size(); ++i) { + handler.handle(std::move(_args[i])); + } + _args.clear(); + } + virtual void bind_next(Node_UP arg_in) { + if (_args.empty()) { + _is_const = arg_in->is_const(); + } else { + _is_const = (_is_const && arg_in->is_const()); + } + _args.push_back(std::move(arg_in)); + } + virtual vespalib::string dump(DumpContext &ctx) const { + vespalib::string str; + str += _name; + str += "("; + for (size_t i = 0; i < _args.size(); ++i) { + if (i > 0) { + str += ","; + } + str += arg(i).dump(ctx); + } + str += ")"; + return str; + } +}; +typedef std::unique_ptr<Call> Call_UP; + +//----------------------------------------------------------------------------- + +/** + * Repository for known built-in functions. This is used by the parser + * to create appropriate call nodes by looking up function names. + **/ +class CallRepo { +private: + static CallRepo _instance; + typedef nodes::Call_UP (*factory_type)(); + std::map<vespalib::string,factory_type> _map; + template <typename T> + void add(const T &op) { _map[op.name()] = T::create; } + CallRepo(); +public: + static const CallRepo &instance() { return _instance; } + nodes::Call_UP create(const vespalib::string &name) const { + auto result = _map.find(name); + if (result != _map.end()) { + return result->second(); + } + return nodes::Call_UP(nullptr); + } + std::vector<vespalib::string> get_names() const { + std::vector<vespalib::string> ret; + for (const auto &entry: _map) { + ret.push_back(entry.first); + } + return ret; + } +}; + +//----------------------------------------------------------------------------- + +template <typename T> +struct CallHelper : Call { + typedef CallHelper<T> Helper; + CallHelper(const vespalib::string &name_in, size_t num_params_in) + : Call(name_in, num_params_in) {} + virtual void accept(NodeVisitor &visitor) const override; + static Call_UP create() { return Call_UP(new T()); } +}; + +//----------------------------------------------------------------------------- + +struct Cos : CallHelper<Cos> { Cos() : Helper("cos", 1) {} }; +struct Sin : CallHelper<Sin> { Sin() : Helper("sin", 1) {} }; +struct Tan : CallHelper<Tan> { Tan() : Helper("tan", 1) {} }; +struct Cosh : CallHelper<Cosh> { Cosh() : Helper("cosh", 1) {} }; +struct Sinh : CallHelper<Sinh> { Sinh() : Helper("sinh", 1) {} }; +struct Tanh : CallHelper<Tanh> { Tanh() : Helper("tanh", 1) {} }; +struct Acos : CallHelper<Acos> { Acos() : Helper("acos", 1) {} }; +struct Asin : CallHelper<Asin> { Asin() : Helper("asin", 1) {} }; +struct Atan : CallHelper<Atan> { Atan() : Helper("atan", 1) {} }; +struct Exp : CallHelper<Exp> { Exp() : Helper("exp", 1) {} }; +struct Log10 : CallHelper<Log10> { Log10() : Helper("log10", 1) {} }; +struct Log : CallHelper<Log> { Log() : Helper("log", 1) {} }; +struct Sqrt : CallHelper<Sqrt> { Sqrt() : Helper("sqrt", 1) {} }; +struct Ceil : CallHelper<Ceil> { Ceil() : Helper("ceil", 1) {} }; +struct Fabs : CallHelper<Fabs> { Fabs() : Helper("fabs", 1) {} }; +struct Floor : CallHelper<Floor> { Floor() : Helper("floor", 1) {} }; +struct Atan2 : CallHelper<Atan2> { Atan2() : Helper("atan2", 2) {} }; +struct Ldexp : CallHelper<Ldexp> { Ldexp() : Helper("ldexp", 2) {} }; +struct Pow2 : CallHelper<Pow2> { Pow2() : Helper("pow", 2) {} }; +struct Fmod : CallHelper<Fmod> { Fmod() : Helper("fmod", 2) {} }; +struct Min : CallHelper<Min> { Min() : Helper("min", 2) {} }; +struct Max : CallHelper<Max> { Max() : Helper("max", 2) {} }; +struct IsNan : CallHelper<IsNan> { IsNan() : Helper("isNan", 1) {} }; +struct Relu : CallHelper<Relu> { Relu() : Helper("relu", 1) {} }; +struct Sigmoid : CallHelper<Sigmoid> { Sigmoid() : Helper("sigmoid", 1) {} }; + +//----------------------------------------------------------------------------- + +} // namespace vespalib::eval::nodes +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/check_type.h b/eval/src/vespa/eval/eval/check_type.h new file mode 100644 index 00000000000..30ef3e69da3 --- /dev/null +++ b/eval/src/vespa/eval/eval/check_type.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "node_visitor.h" + +namespace vespalib { +namespace eval { +namespace nodes { + +/** + * A templated visitor used to check if the visited node matches any + * of the given types. + **/ + +template <typename... TYPES> struct CheckTypeVisitor; + +template <> +struct CheckTypeVisitor<> : EmptyNodeVisitor { + bool result = false; +}; + +template <typename HEAD, typename... TAIL> +struct CheckTypeVisitor<HEAD, TAIL...> : CheckTypeVisitor<TAIL...> { + virtual void visit(const HEAD &) override { this->result = true; } +}; + +template <typename... TYPES> +bool check_type(const nodes::Node &node) { + CheckTypeVisitor<TYPES...> check; + node.accept(check); + return check.result; +} + +} // namespace vespalib::eval::nodes +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/delete_node.cpp b/eval/src/vespa/eval/eval/delete_node.cpp new file mode 100644 index 00000000000..61204c890b2 --- /dev/null +++ b/eval/src/vespa/eval/eval/delete_node.cpp @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "key_gen.h" +#include "node_visitor.h" +#include "node_traverser.h" + +namespace vespalib { +namespace eval { + +namespace { + +struct ChildReaper : public NodeTraverser, public NodeHandler { + virtual void handle(nodes::Node_UP) override {} + virtual bool open(const nodes::Node &) override { return true; } + virtual void close(const nodes::Node &node) override { + nodes::Node &mutable_node = const_cast<nodes::Node&>(node); + mutable_node.detach_children(*this); + } +}; + +} // namespace vespalib::nodes::<unnamed> + +void +delete_node(nodes::Node_UP node) +{ + if (node) { + ChildReaper reaper; + node->traverse(reaper); + } +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/delete_node.h b/eval/src/vespa/eval/eval/delete_node.h new file mode 100644 index 00000000000..228e41ff9fd --- /dev/null +++ b/eval/src/vespa/eval/eval/delete_node.h @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "basic_nodes.h" + +namespace vespalib { +namespace eval { + +/** + * Function used to delete an AST with arbitrary depth without + * overflowing the stack. This is needed because the AST is not + * compacted in any way and large expressions will produce very deep + * ASTs. + **/ +void delete_node(nodes::Node_UP node); + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/function.cpp b/eval/src/vespa/eval/eval/function.cpp new file mode 100644 index 00000000000..c2dcadb3ef5 --- /dev/null +++ b/eval/src/vespa/eval/eval/function.cpp @@ -0,0 +1,910 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <cctype> +#include <map> +#include "function.h" +#include "basic_nodes.h" +#include "tensor_nodes.h" +#include "operator_nodes.h" +#include "call_nodes.h" +#include "delete_node.h" + +namespace vespalib { +namespace eval { + +using nodes::Node_UP; +using nodes::Operator_UP; +using nodes::Call_UP; + +namespace { + +bool has_duplicates(const std::vector<vespalib::string> &list) { + for (size_t i = 0; i < list.size(); ++i) { + for (size_t j = (i + 1); j < list.size(); ++j) { + if (list[i] == list[j]) { + return true; + } + } + } + return false; +} + +bool check_tensor_lambda_type(const ValueType &type) { + if (!type.is_tensor() || type.dimensions().empty()) { + return false; + } + for (const auto &dim: type.dimensions()) { + if (!dim.is_indexed() || !dim.is_bound()) { + return false; + } + } + return true; +} + +//----------------------------------------------------------------------------- + +class Params { +private: + std::map<vespalib::string,size_t> _params; +protected: + size_t lookup(vespalib::stringref token) const { + auto result = _params.find(token); + return (result == _params.end()) ? UNDEF : result->second; + } + size_t lookup_add(vespalib::stringref token) { + size_t result = lookup(token); + if (result == UNDEF) { + result = _params.size(); + _params[token] = result; + } + return result; + } +public: + static const size_t UNDEF = -1; + virtual bool implicit() const = 0; + virtual size_t resolve(vespalib::stringref token) const = 0; + std::vector<vespalib::string> extract() const { + std::vector<vespalib::string> params_out; + params_out.resize(_params.size()); + for (const auto &item: _params) { + params_out[item.second] = item.first; + } + return params_out; + } + virtual ~Params() {} +}; + +struct ExplicitParams : Params { + explicit ExplicitParams(const std::vector<vespalib::string> ¶ms_in) { + for (const auto ¶m: params_in) { + assert(lookup(param) == UNDEF); + lookup_add(param); + } + } + virtual bool implicit() const { return false; } + virtual size_t resolve(vespalib::stringref token) const override { + return lookup(token); + } +}; + +struct ImplicitParams : Params { + virtual bool implicit() const { return true; } + virtual size_t resolve(vespalib::stringref token) const override { + return const_cast<ImplicitParams*>(this)->lookup_add(token); + } +}; + +//----------------------------------------------------------------------------- + +class ResolveContext +{ +private: + const Params &_params; + const SymbolExtractor *_symbol_extractor; + std::vector<vespalib::string> _let_names; +public: + ResolveContext(const Params ¶ms, const SymbolExtractor *symbol_extractor) + : _params(params), _symbol_extractor(symbol_extractor), _let_names() {} + + void push_let_name(const vespalib::string &name) { + _let_names.push_back(name); + } + + void pop_let_name() { + assert(!_let_names.empty()); + _let_names.pop_back(); + } + + int resolve_let_name(const vespalib::string &name) const { + for (int i = (int(_let_names.size()) - 1); i >= 0; --i) { + if (name == _let_names[i]) { + return -(i + 1); + } + } + return nodes::Symbol::UNDEF; + } + + int resolve_param(const vespalib::string &name) const { + size_t param_id = _params.resolve(name); + if (param_id == Params::UNDEF) { + return nodes::Symbol::UNDEF; + } + return param_id; + } + + const SymbolExtractor *symbol_extractor() const { return _symbol_extractor; } +}; + +class ParseContext +{ +private: + const char *_begin; + const char *_pos; + const char *_end; + char _curr; + vespalib::string _scratch; + vespalib::string _failure; + std::vector<Node_UP> _expression_stack; + std::vector<Operator_UP> _operator_stack; + size_t _operator_mark; + std::vector<ResolveContext> _resolve_stack; + +public: + ParseContext(const Params ¶ms, const char *str, size_t len, + const SymbolExtractor *symbol_extractor) + : _begin(str), _pos(str), _end(str + len), _curr(0), + _scratch(), _failure(), + _expression_stack(), _operator_stack(), + _operator_mark(0), + _resolve_stack({ResolveContext(params, symbol_extractor)}) + { + if (_pos < _end) { + _curr = *_pos; + } + } + ~ParseContext() { + for (size_t i = 0; i < _expression_stack.size(); ++i) { + delete_node(std::move(_expression_stack[i])); + } + _expression_stack.clear(); + } + + ResolveContext &resolver() { + assert(!_resolve_stack.empty()); + return _resolve_stack.back(); + } + + const ResolveContext &resolver() const { + assert(!_resolve_stack.empty()); + return _resolve_stack.back(); + } + + void push_resolve_context(const Params ¶ms, const SymbolExtractor *symbol_extractor) { + _resolve_stack.emplace_back(params, symbol_extractor); + } + + void pop_resolve_context() { + assert(!_resolve_stack.empty()); + _resolve_stack.pop_back(); + } + + void fail(const vespalib::string &msg) { + if (_failure.empty()) { + _failure = msg; + _curr = 0; + } + } + bool failed() const { return !_failure.empty(); } + void next() { _curr = (_curr && (_pos < _end)) ? *(++_pos) : 0; } + + struct InputMark { + const char *pos; + char curr; + }; + + InputMark get_input_mark() const { return InputMark{_pos, _curr}; } + void restore_input_mark(InputMark mark) { + if ((_curr == 0) && (mark.curr != 0)) { + _failure.clear(); + } + _pos = mark.pos; + _curr = mark.curr; + } + + char get() const { return _curr; } + bool eos() const { return !_curr; } + void eat(char c) { + if (_curr == c) { + next(); + } else { + fail(make_string("expected '%c', but got '%c'", c, _curr)); + } + } + void skip_spaces() { + while (!eos() && isspace(_curr)) { + next(); + } + } + vespalib::string &scratch() { + _scratch.clear(); + return _scratch; + } + vespalib::string &peek(vespalib::string &str, size_t n) { + const char *p = _pos; + for (size_t i = 0; i < n; ++i, ++p) { + if (_curr != 0 && p < _end) { + str.push_back(*p); + } else { + str.push_back(0); + } + } + return str; + } + void skip(size_t n) { + for (size_t i = 0; i < n; ++i) { + next(); + } + } + + void push_let_binding(const vespalib::string &name) { + resolver().push_let_name(name); + } + + void pop_let_binding() { + resolver().pop_let_name(); + } + + int resolve_let_ref(const vespalib::string &name) const { + return resolver().resolve_let_name(name); + } + + int resolve_parameter(const vespalib::string &name) const { + return resolver().resolve_param(name); + } + + void extract_symbol(vespalib::string &symbol_out, InputMark before_symbol) { + const SymbolExtractor *symbol_extractor = resolver().symbol_extractor(); + if (symbol_extractor == nullptr) { + return; + } + symbol_out.clear(); + restore_input_mark(before_symbol); + if (!eos()) { + const char *new_pos = nullptr; + symbol_extractor->extract_symbol(_pos, _end, new_pos, symbol_out); + if ((new_pos != nullptr) && (new_pos > _pos) && (new_pos <= _end)) { + _pos = new_pos; + _curr = (_pos < _end) ? *_pos : 0; + } else { + symbol_out.clear(); + } + } + } + + Node_UP get_result() { + if (!eos() || (num_expressions() != 1) || (num_operators() > 0)) { + fail("incomplete parse"); + } + if (!_failure.empty()) { + vespalib::string before(_begin, (_pos - _begin)); + vespalib::string after(_pos, (_end - _pos)); + return Node_UP(new nodes::Error(make_string("[%s]...[%s]...[%s]", + before.c_str(), _failure.c_str(), after.c_str()))); + } + return pop_expression(); + } + + void apply_operator() { + Operator_UP op = pop_operator(); + Node_UP rhs = pop_expression(); + Node_UP lhs = pop_expression(); + op->bind(std::move(lhs), std::move(rhs)); + push_expression(std::move(op)); + } + size_t num_expressions() const { return _expression_stack.size(); } + void push_expression(Node_UP node) { + _expression_stack.push_back(std::move(node)); + } + Node_UP pop_expression() { + if (_expression_stack.empty()) { + fail("expression stack underflow"); + return Node_UP(new nodes::Number(0.0)); + } + Node_UP node = std::move(_expression_stack.back()); + _expression_stack.pop_back(); + return node; + } + size_t num_operators() const { return _operator_stack.size(); } + + size_t operator_mark() const { return _operator_mark; } + void operator_mark(size_t mark) { _operator_mark = mark; } + + void push_operator(Operator_UP node) { + while ((_operator_stack.size() > _operator_mark) && + (_operator_stack.back()->do_before(*node))) + { + apply_operator(); + } + _operator_stack.push_back(std::move(node)); + } + Operator_UP pop_operator() { + assert(!_operator_stack.empty()); + Operator_UP node = std::move(_operator_stack.back()); + _operator_stack.pop_back(); + return node; + } +}; + +//----------------------------------------------------------------------------- + +void parse_expression(ParseContext &ctx); + +int unhex(char c) { + if (c >= '0' && c <= '9') { + return (c - '0'); + } + if (c >= 'a' && c <= 'f') { + return ((c - 'a') + 10); + } + if (c >= 'A' && c <= 'F') { + return ((c - 'A') + 10); + } + return -1; +} + +void parse_string(ParseContext &ctx) { + vespalib::string &str = ctx.scratch(); + ctx.eat('"'); + while (!ctx.eos() && ctx.get() != '"') { + if (ctx.get() == '\\') { + ctx.next(); + if (ctx.get() == 'x') { + ctx.next(); + int hex1 = unhex(ctx.get()); + ctx.next(); + int hex2 = unhex(ctx.get()); + if (hex1 < 0 || hex2 < 0) { + ctx.fail("bad hex quote"); + } + str.push_back((hex1 << 4) + hex2); + } else { + switch(ctx.get()) { + case '"': str.push_back('"'); break; + case '\\': str.push_back('\\'); break; + case 'f': str.push_back('\f'); break; + case 'n': str.push_back('\n'); break; + case 'r': str.push_back('\r'); break; + case 't': str.push_back('\t'); break; + default: ctx.fail("bad quote"); break; + } + } + } else { + str.push_back(ctx.get()); // default case + } + ctx.next(); + } + ctx.eat('"'); + ctx.push_expression(Node_UP(new nodes::String(str))); +} + +void parse_number(ParseContext &ctx) { + vespalib::string &str = ctx.scratch(); + str.push_back(ctx.get()); + ctx.next(); + while (ctx.get() >= '0' && ctx.get() <= '9') { + str.push_back(ctx.get()); + ctx.next(); + } + if (ctx.get() == '.') { + str.push_back(ctx.get()); + ctx.next(); + while (ctx.get() >= '0' && ctx.get() <= '9') { + str.push_back(ctx.get()); + ctx.next(); + } + } + if (ctx.get() == 'e' || ctx.get() == 'E') { + str.push_back(ctx.get()); + ctx.next(); + if (ctx.get() == '+' || ctx.get() == '-') { + str.push_back(ctx.get()); + ctx.next(); + } + while (ctx.get() >= '0' && ctx.get() <= '9') { + str.push_back(ctx.get()); + ctx.next(); + } + } + char *end = nullptr; + double value = strtod(str.c_str(), &end); + if (!str.empty() && end == str.data() + str.size()) { + ctx.push_expression(Node_UP(new nodes::Number(value))); + } else { + ctx.fail(make_string("invalid number: '%s'", str.c_str())); + } +} + +// NOTE: using non-standard definition of identifiers +// (to match ranking expression parser in Java) +bool is_ident(char c, bool first) { + return ((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + (c == '_') || (c == '@') || + (c == '$' && !first)); +} + +vespalib::string get_ident(ParseContext &ctx, bool allow_empty) { + ctx.skip_spaces(); + vespalib::string ident; + if (is_ident(ctx.get(), true)) { + ident.push_back(ctx.get()); + for (ctx.next(); is_ident(ctx.get(), false); ctx.next()) { + ident.push_back(ctx.get()); + } + } + if (!allow_empty && ident.empty()) { + ctx.fail("missing identifier"); + } + return ident; +} + +void parse_if(ParseContext &ctx) { + parse_expression(ctx); + Node_UP cond = ctx.pop_expression(); + ctx.eat(','); + parse_expression(ctx); + Node_UP true_expr = ctx.pop_expression(); + ctx.eat(','); + parse_expression(ctx); + Node_UP false_expr = ctx.pop_expression(); + double p_true = 0.5; + if (ctx.get() == ',') { + ctx.eat(','); + parse_number(ctx); + Node_UP p_true_node = ctx.pop_expression(); + auto p_true_number = nodes::as<nodes::Number>(*p_true_node); + if (p_true_number) { + p_true = p_true_number->value(); + } + } + ctx.push_expression(Node_UP(new nodes::If(std::move(cond), std::move(true_expr), std::move(false_expr), p_true))); +} + +void parse_let(ParseContext &ctx) { + vespalib::string name = get_ident(ctx, false); + ctx.skip_spaces(); + ctx.eat(','); + parse_expression(ctx); + Node_UP value = ctx.pop_expression(); + ctx.eat(','); + ctx.push_let_binding(name); + parse_expression(ctx); + Node_UP expr = ctx.pop_expression(); + ctx.pop_let_binding(); + ctx.push_expression(Node_UP(new nodes::Let(name, std::move(value), std::move(expr)))); +} + +void parse_call(ParseContext &ctx, Call_UP call) { + for (size_t i = 0; i < call->num_params(); ++i) { + if (i > 0) { + ctx.eat(','); + } + parse_expression(ctx); + call->bind_next(ctx.pop_expression()); + } + ctx.push_expression(std::move(call)); +} + +// (a,b,c) wrapped +// ,a,b,c -> ) not wrapped +std::vector<vespalib::string> get_ident_list(ParseContext &ctx, bool wrapped) { + std::vector<vespalib::string> list; + if (wrapped) { + ctx.skip_spaces(); + ctx.eat('('); + } + for (ctx.skip_spaces(); !ctx.eos() && (ctx.get() != ')'); ctx.skip_spaces()) { + if (!list.empty() || !wrapped) { + ctx.eat(','); + } + list.push_back(get_ident(ctx, false)); + } + if (wrapped) { + ctx.eat(')'); + } + if (has_duplicates(list)) { + ctx.fail("duplicate identifiers"); + } + return list; +} + +// a +// (a,b,c) +// cannot be empty +std::vector<vespalib::string> get_idents(ParseContext &ctx) { + std::vector<vespalib::string> list; + ctx.skip_spaces(); + if (ctx.get() == '(') { + list = get_ident_list(ctx, true); + } else { + list.push_back(get_ident(ctx, false)); + } + if (list.empty()) { + ctx.fail("missing identifiers"); + } + return list; +} + +Function parse_lambda(ParseContext &ctx, size_t num_params) { + ctx.skip_spaces(); + ctx.eat('f'); + auto param_names = get_ident_list(ctx, true); + ExplicitParams params(param_names); + ctx.push_resolve_context(params, nullptr); + ctx.skip_spaces(); + ctx.eat('('); + parse_expression(ctx); + ctx.eat(')'); + ctx.skip_spaces(); + ctx.pop_resolve_context(); + Node_UP lambda_root = ctx.pop_expression(); + if (param_names.size() != num_params) { + ctx.fail(make_string("expected lambda with %zu parameter(s), was %zu", + num_params, param_names.size())); + } + return Function(std::move(lambda_root), std::move(param_names)); +} + +void parse_tensor_map(ParseContext &ctx) { + parse_expression(ctx); + Node_UP child = ctx.pop_expression(); + ctx.eat(','); + Function lambda = parse_lambda(ctx, 1); + ctx.push_expression(std::make_unique<nodes::TensorMap>(std::move(child), std::move(lambda))); +} + +void parse_tensor_join(ParseContext &ctx) { + parse_expression(ctx); + Node_UP lhs = ctx.pop_expression(); + ctx.eat(','); + parse_expression(ctx); + Node_UP rhs = ctx.pop_expression(); + ctx.eat(','); + Function lambda = parse_lambda(ctx, 2); + ctx.push_expression(std::make_unique<nodes::TensorJoin>(std::move(lhs), std::move(rhs), std::move(lambda))); +} + +void parse_tensor_reduce(ParseContext &ctx) { + parse_expression(ctx); + Node_UP child = ctx.pop_expression(); + ctx.eat(','); + auto aggr_name = get_ident(ctx, false); + auto maybe_aggr = nodes::AggrNames::from_name(aggr_name); + if (!maybe_aggr) { + ctx.fail(make_string("unknown aggregator: '%s'", aggr_name.c_str())); + return; + } + auto dimensions = get_ident_list(ctx, false); + if ((*maybe_aggr == nodes::Aggr::SUM) && dimensions.empty()) { + ctx.push_expression(std::make_unique<nodes::TensorSum>(std::move(child))); + } else if ((*maybe_aggr == nodes::Aggr::SUM) && (dimensions.size() == 1)) { + ctx.push_expression(std::make_unique<nodes::TensorSum>(std::move(child), dimensions[0])); + } else { + ctx.push_expression(std::make_unique<nodes::TensorReduce>(std::move(child), *maybe_aggr, std::move(dimensions))); + } +} + +void parse_tensor_rename(ParseContext &ctx) { + parse_expression(ctx); + Node_UP child = ctx.pop_expression(); + ctx.eat(','); + auto from = get_idents(ctx); + ctx.skip_spaces(); + ctx.eat(','); + auto to = get_idents(ctx); + if (from.size() != to.size()) { + ctx.fail("dimension list size mismatch"); + } else { + ctx.push_expression(std::make_unique<nodes::TensorRename>(std::move(child), std::move(from), std::move(to))); + } + ctx.skip_spaces(); +} + +void parse_tensor_lambda(ParseContext &ctx) { + vespalib::string type_spec("tensor("); + while(!ctx.eos() && (ctx.get() != ')')) { + type_spec.push_back(ctx.get()); + ctx.next(); + } + ctx.eat(')'); + type_spec.push_back(')'); + ValueType type = ValueType::from_spec(type_spec); + if (!check_tensor_lambda_type(type)) { + ctx.fail("invalid tensor type"); + return; + } + auto param_names = type.dimension_names(); + ExplicitParams params(param_names); + ctx.push_resolve_context(params, nullptr); + ctx.skip_spaces(); + ctx.eat('('); + parse_expression(ctx); + ctx.pop_resolve_context(); + Function lambda(ctx.pop_expression(), std::move(param_names)); + ctx.push_expression(std::make_unique<nodes::TensorLambda>(std::move(type), std::move(lambda))); +} + +void parse_tensor_concat(ParseContext &ctx) { + parse_expression(ctx); + Node_UP lhs = ctx.pop_expression(); + ctx.eat(','); + parse_expression(ctx); + Node_UP rhs = ctx.pop_expression(); + ctx.eat(','); + auto dimension = get_ident(ctx, false); + ctx.skip_spaces(); + ctx.push_expression(std::make_unique<nodes::TensorConcat>(std::move(lhs), std::move(rhs), dimension)); +} + +// to be replaced with more generic 'reduce' +void parse_tensor_sum(ParseContext &ctx) { + parse_expression(ctx); + Node_UP child = ctx.pop_expression(); + if (ctx.get() == ',') { + ctx.next(); + vespalib::string dimension = get_ident(ctx, false); + ctx.skip_spaces(); + ctx.push_expression(Node_UP(new nodes::TensorSum(std::move(child), dimension))); + } else { + ctx.push_expression(Node_UP(new nodes::TensorSum(std::move(child)))); + } +} + +bool try_parse_call(ParseContext &ctx, const vespalib::string &name) { + ctx.skip_spaces(); + if (ctx.get() == '(') { + ctx.eat('('); + if (name == "if") { + parse_if(ctx); + } else if (name == "let") { + parse_let(ctx); + } else { + Call_UP call = nodes::CallRepo::instance().create(name); + if (call.get() != nullptr) { + parse_call(ctx, std::move(call)); + } else if (name == "map") { + parse_tensor_map(ctx); + } else if (name == "join") { + parse_tensor_join(ctx); + } else if (name == "reduce") { + parse_tensor_reduce(ctx); + } else if (name == "rename") { + parse_tensor_rename(ctx); + } else if (name == "tensor") { + parse_tensor_lambda(ctx); + } else if (name == "concat") { + parse_tensor_concat(ctx); + } else if (name == "sum") { + parse_tensor_sum(ctx); + } else { + ctx.fail(make_string("unknown function: '%s'", name.c_str())); + return false; + } + } + ctx.eat(')'); + return true; + } + return false; +} + +int parse_symbol(ParseContext &ctx, vespalib::string &name, ParseContext::InputMark before_name) { + int id = ctx.resolve_let_ref(name); + if (id != nodes::Symbol::UNDEF) { + return id; + } + ctx.extract_symbol(name, before_name); + return ctx.resolve_parameter(name); +} + +void parse_symbol_or_call(ParseContext &ctx) { + ParseContext::InputMark before_name = ctx.get_input_mark(); + vespalib::string name = get_ident(ctx, true); + if (!try_parse_call(ctx, name)) { + int id = parse_symbol(ctx, name, before_name); + if (name.empty()) { + ctx.fail("missing value"); + } else if (id == nodes::Symbol::UNDEF) { + ctx.fail(make_string("unknown symbol: '%s'", name.c_str())); + } else { + ctx.push_expression(Node_UP(new nodes::Symbol(id))); + } + } +} + +void parse_array(ParseContext &ctx) { + std::unique_ptr<nodes::Array> array(new nodes::Array()); + ctx.eat('['); + ctx.skip_spaces(); + size_t size = 0; + while (!ctx.eos() && ctx.get() != ']') { + if (++size > 1) { + ctx.eat(','); + } + parse_expression(ctx); + array->add(ctx.pop_expression()); + } + ctx.eat(']'); + ctx.push_expression(std::move(array)); +} + +void parse_value(ParseContext &ctx) { + ctx.skip_spaces(); + if (ctx.get() == '-') { + ctx.next(); + parse_value(ctx); + ctx.push_expression(Node_UP(new nodes::Neg(ctx.pop_expression()))); + } else if (ctx.get() == '!') { + ctx.next(); + parse_value(ctx); + ctx.push_expression(Node_UP(new nodes::Not(ctx.pop_expression()))); + } else if (ctx.get() == '(') { + ctx.next(); + parse_expression(ctx); + ctx.eat(')'); + } else if (ctx.get() == '[') { + parse_array(ctx); + } else if (ctx.get() == '"') { + parse_string(ctx); + } else if (isdigit(ctx.get())) { + parse_number(ctx); + } else { + parse_symbol_or_call(ctx); + } +} + +void parse_operator(ParseContext &ctx) { + ctx.skip_spaces(); + vespalib::string &str = ctx.peek(ctx.scratch(), nodes::OperatorRepo::instance().max_size()); + Operator_UP op = nodes::OperatorRepo::instance().create(str); + if (op.get() != nullptr) { + ctx.push_operator(std::move(op)); + ctx.skip(str.size()); + } else { + ctx.fail(make_string("invalid operator: '%c'", ctx.get())); + } +} + +void parse_expression(ParseContext &ctx) { + size_t old_mark = ctx.operator_mark(); + ctx.operator_mark(ctx.num_operators()); + for (;;) { + parse_value(ctx); + ctx.skip_spaces(); + if (ctx.eos() || ctx.get() == ')' || ctx.get() == ',' || ctx.get() == ']') { + while (ctx.num_operators() > ctx.operator_mark()) { + ctx.apply_operator(); + } + ctx.operator_mark(old_mark); + return; + } + parse_operator(ctx); + } +} + +Function parse_function(const Params ¶ms, vespalib::stringref expression, + const SymbolExtractor *symbol_extractor) +{ + ParseContext ctx(params, expression.data(), expression.size(), symbol_extractor); + parse_expression(ctx); + if (ctx.failed() && params.implicit()) { + return Function(ctx.get_result(), std::vector<vespalib::string>()); + } + return Function(ctx.get_result(), params.extract()); +} + +} // namespace vespalib::<unnamed> + +//----------------------------------------------------------------------------- + +bool +Function::has_error() const +{ + auto error = nodes::as<nodes::Error>(*_root); + return error; +} + +vespalib::string +Function::get_error() const +{ + auto error = nodes::as<nodes::Error>(*_root); + return error ? error->message() : ""; +} + +Function +Function::parse(vespalib::stringref expression) +{ + return parse_function(ImplicitParams(), expression, nullptr); +} + +Function +Function::parse(vespalib::stringref expression, const SymbolExtractor &symbol_extractor) +{ + return parse_function(ImplicitParams(), expression, &symbol_extractor); +} + +Function +Function::parse(const std::vector<vespalib::string> ¶ms, vespalib::stringref expression) +{ + return parse_function(ExplicitParams(params), expression, nullptr); +} + +Function +Function::parse(const std::vector<vespalib::string> ¶ms, vespalib::stringref expression, + const SymbolExtractor &symbol_extractor) +{ + return parse_function(ExplicitParams(params), expression, &symbol_extractor); +} + +//----------------------------------------------------------------------------- + +vespalib::string +Function::dump_as_lambda() const +{ + vespalib::string lambda = "f("; + for (size_t i = 0; i < _params.size(); ++i) { + if (i > 0) { + lambda += ","; + } + lambda += _params[i]; + } + lambda += ")"; + vespalib::string expr = dump(); + if (starts_with(expr, "(")) { + lambda += expr; + } else { + lambda += "("; + lambda += expr; + lambda += ")"; + } + return lambda; +} + +bool +Function::unwrap(vespalib::stringref input, + vespalib::string &wrapper, + vespalib::string &body, + vespalib::string &error) +{ + size_t pos = 0; + for (; pos < input.size() && isspace(input[pos]); ++pos); + size_t wrapper_begin = pos; + for (; pos < input.size() && isalpha(input[pos]); ++pos); + size_t wrapper_end = pos; + if (wrapper_end == wrapper_begin) { + error = "could not extract wrapper name"; + return false; + } + for (; pos < input.size() && isspace(input[pos]); ++pos); + if (pos == input.size() || input[pos] != '(') { + error = "could not match opening '('"; + return false; + } + size_t body_begin = (pos + 1); + size_t body_end = (input.size() - 1); + for (; body_end > body_begin && isspace(input[body_end]); --body_end); + if (input[body_end] != ')') { + error = "could not match closing ')'"; + return false; + } + assert(body_end >= body_begin); + wrapper = vespalib::stringref(input.data() + wrapper_begin, wrapper_end - wrapper_begin); + body = vespalib::stringref(input.data() + body_begin, body_end - body_begin); + return true; +} + +//----------------------------------------------------------------------------- + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/function.h b/eval/src/vespa/eval/eval/function.h new file mode 100644 index 00000000000..35a89ce6512 --- /dev/null +++ b/eval/src/vespa/eval/eval/function.h @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cmath> +#include <memory> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <map> +#include "basic_nodes.h" +#include "delete_node.h" +#include "value.h" + +namespace vespalib { +namespace eval { + +enum class PassParams { SEPARATE, ARRAY }; + +/** + * Interface used to perform custom symbol extraction. This is + * typically used by the ranking framework to extend what will be + * parsed as parameter names. + **/ +struct SymbolExtractor { + virtual void extract_symbol(const char *pos_in, const char *end_in, + const char *&pos_out, vespalib::string &symbol_out) const = 0; + virtual ~SymbolExtractor() {} +}; + +struct NodeVisitor; + +/** + * When you parse an expression you get a Function. It contains the + * AST root and the names of all parameters. A function can only be + * evaluated using the appropriate number of parameters. + **/ +class Function +{ +private: + nodes::Node_UP _root; + std::vector<vespalib::string> _params; + +public: + Function() : _root(new nodes::Number(0.0)), _params() {} + Function(nodes::Node_UP root_in, std::vector<vespalib::string> &¶ms_in) + : _root(std::move(root_in)), _params(std::move(params_in)) {} + Function(Function &&rhs) : _root(std::move(rhs._root)), _params(std::move(rhs._params)) {} + ~Function() { delete_node(std::move(_root)); } + size_t num_params() const { return _params.size(); } + vespalib::stringref param_name(size_t idx) const { return _params[idx]; } + bool has_error() const; + vespalib::string get_error() const; + const nodes::Node &root() const { return *_root; } + static Function parse(vespalib::stringref expression); + static Function parse(vespalib::stringref expression, const SymbolExtractor &symbol_extractor); + static Function parse(const std::vector<vespalib::string> ¶ms, vespalib::stringref expression); + static Function parse(const std::vector<vespalib::string> ¶ms, vespalib::stringref expression, + const SymbolExtractor &symbol_extractor); + vespalib::string dump() const { + nodes::DumpContext dump_context(_params); + return _root->dump(dump_context); + } + vespalib::string dump_as_lambda() const; + // Utility function used to unwrap an expression contained inside + // a named wrapper. For example 'max(x+y)' -> 'max', 'x+y' + static bool unwrap(vespalib::stringref input, + vespalib::string &wrapper, + vespalib::string &body, + vespalib::string &error); + /** + * Issues is used to report issues relating to the function + * structure, typically to explain why a function cannot be + * evaluated in a specific context due to it using features not + * supported in that context. + **/ + struct Issues { + std::vector<vespalib::string> list; + operator bool() const { return !list.empty(); } + Issues(std::vector<vespalib::string> &&list_in) : list(std::move(list_in)) {} + }; +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/gbdt.cpp b/eval/src/vespa/eval/eval/gbdt.cpp new file mode 100644 index 00000000000..893692c6889 --- /dev/null +++ b/eval/src/vespa/eval/eval/gbdt.cpp @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "gbdt.h" +#include <vespa/vespalib/eval/basic_nodes.h> +#include <vespa/vespalib/eval/call_nodes.h> +#include <vespa/vespalib/eval/operator_nodes.h> +#include "vm_forest.h" + +namespace vespalib { +namespace eval { +namespace gbdt { + +//----------------------------------------------------------------------------- + +std::vector<const nodes::Node *> extract_trees(const nodes::Node &node) { + std::vector<const nodes::Node *> trees; + std::vector<const nodes::Node *> todo; + if (node.is_tree()) { + trees.push_back(&node); + } else if (node.is_forest()) { + todo.push_back(&node); + } + while (!todo.empty()) { + const nodes::Node &forest = *todo.back(); todo.pop_back(); + for (size_t i = 0; i < forest.num_children(); ++i) { + const nodes::Node &child = forest.get_child(i); + if (child.is_tree()) { + trees.push_back(&child); + } else if (child.is_forest()) { + todo.push_back(&child); + } + } + } + return trees; +} + +//----------------------------------------------------------------------------- + +TreeStats::TreeStats(const nodes::Node &tree) + : size(0), + num_less_checks(0), + num_in_checks(0), + num_tuned_checks(0), + max_set_size(0), + expected_path_length(0.0), + average_path_length(0.0) +{ + size_t sum_path = 0.0; + expected_path_length = traverse(tree, 0, sum_path); + average_path_length = double(sum_path) / double(size); +} + +double +TreeStats::traverse(const nodes::Node &node, size_t depth, size_t &sum_path) { + auto if_node = nodes::as<nodes::If>(node); + if (if_node) { + double p_true = if_node->p_true(); + if (p_true != 0.5) { + ++num_tuned_checks; + } + double true_path = traverse(if_node->true_expr(), depth + 1, sum_path); + double false_path = traverse(if_node->false_expr(), depth + 1, sum_path); + auto less = nodes::as<nodes::Less>(if_node->cond()); + auto in = nodes::as<nodes::In>(if_node->cond()); + if (less) { + ++num_less_checks; + } else { + assert(in); + ++num_in_checks; + auto array = nodes::as<nodes::Array>(in->rhs()); + size_t array_size = (array) ? array->size() : 1; + max_set_size = std::max(max_set_size, array_size); + } + return 1.0 + (p_true * true_path) + ((1.0 - p_true) * false_path); + } else { + ++size; + sum_path += depth; + return 0.0; + } +} + +ForestStats::ForestStats(const std::vector<const nodes::Node *> &trees) + : num_trees(trees.size()), + total_size(0), + tree_sizes(), + total_less_checks(0), + total_in_checks(0), + total_tuned_checks(0), + max_set_size(0), + total_expected_path_length(0.0), + total_average_path_length(0.0) +{ + std::map<size_t,size_t> size_map; + for (const nodes::Node *tree: trees) { + TreeStats stats(*tree); + total_size += stats.size; + ++size_map[stats.size]; + total_less_checks += stats.num_less_checks; + total_in_checks += stats.num_in_checks; + total_tuned_checks += stats.num_tuned_checks; + max_set_size = std::max(max_set_size, stats.max_set_size); + total_expected_path_length += stats.expected_path_length; + total_average_path_length += stats.average_path_length; + } + for (auto const &size: size_map) { + tree_sizes.push_back(TreeSize{size.first, size.second}); + } +} + +//----------------------------------------------------------------------------- + +Optimize::Result +Optimize::select_best(const ForestStats &stats, + const std::vector<const nodes::Node *> &trees) +{ + double path_len = stats.total_average_path_length; + if ((stats.tree_sizes.back().size > 12) && (path_len > 2500.0)) { + return apply_chain(VMForest::optimize_chain, stats, trees); + } + return Optimize::Result(); +} + +Optimize::Chain Optimize::best({select_best}); +Optimize::Chain Optimize::none; + +//----------------------------------------------------------------------------- + +} // namespace vespalib::eval::gbdt +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/gbdt.h b/eval/src/vespa/eval/eval/gbdt.h new file mode 100644 index 00000000000..c7ec59b603c --- /dev/null +++ b/eval/src/vespa/eval/eval/gbdt.h @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vector> + +namespace vespalib { +namespace eval { + +namespace nodes { class Node; } + +namespace gbdt { + +//----------------------------------------------------------------------------- + +/** + * Function used to map out individual GBDT trees from a GBDT forest. + **/ +std::vector<const nodes::Node *> extract_trees(const nodes::Node &node); + +/** + * Statistics for a single GBDT tree. + **/ +struct TreeStats { + size_t size; + size_t num_less_checks; + size_t num_in_checks; + size_t num_tuned_checks; + size_t max_set_size; + double expected_path_length; + double average_path_length; + explicit TreeStats(const nodes::Node &tree); +private: + double traverse(const nodes::Node &tree, size_t depth, size_t &sum_path); +}; + +/** + * Statistics for a GBDT forest. + **/ +struct ForestStats { + struct TreeSize { + size_t size; + size_t count; + }; + size_t num_trees; + size_t total_size; + std::vector<TreeSize> tree_sizes; + size_t total_less_checks; + size_t total_in_checks; + size_t total_tuned_checks; + size_t max_set_size; + double total_expected_path_length; + double total_average_path_length; + explicit ForestStats(const std::vector<const nodes::Node *> &trees); +}; + +//----------------------------------------------------------------------------- + +/** + * A Forest object represents deletable custom prepared state that may + * be used to evaluate a GBDT forest from within LLVM generated + * machine code. It is very important that the evaluation function + * used is passed exactly the subclass of Forest it expects. This is + * why Optimize::Result bundles together both the prepared state + * (Forest object) and the evaluation function reference; they are + * chosen at the same time at the same place. + **/ +struct Forest { + using UP = std::unique_ptr<Forest>; + using eval_function = double (*)(const Forest *self, const double *args); + virtual ~Forest() {} +}; + +/** + * Definitions and helper functions related to custom GBDT forest + * optimization. The optimization chain named 'best' is used by + * default. The one named 'none' results in no special handling for + * GBDT forests. + **/ +struct Optimize { + struct Result { + Forest::UP forest; + Forest::eval_function eval; + Result() : forest(nullptr), eval(nullptr) {} + Result(Forest::UP &&forest_in, Forest::eval_function eval_in) + : forest(std::move(forest_in)), eval(eval_in) {} + Result(Result &&rhs) : forest(std::move(rhs.forest)), eval(rhs.eval) {} + bool valid() const { return (forest.get() != nullptr); } + }; + using optimize_function = Result (*)(const ForestStats &stats, + const std::vector<const nodes::Node *> &trees); + using Chain = std::vector<optimize_function>; + static Result select_best(const ForestStats &stats, + const std::vector<const nodes::Node *> &trees); + static Chain best; + static Chain none; + static Result apply_chain(const Chain &chain, + const ForestStats &stats, + const std::vector<const nodes::Node *> &trees) { + for (optimize_function optimize: chain) { + Result result = optimize(stats, trees); + if (result.valid()) { + return result; + } + } + return Result(); + } + // Optimize() = delete; +}; + +//----------------------------------------------------------------------------- + +} // namespace vespalib::eval::gbdt +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/interpreted_function.cpp b/eval/src/vespa/eval/eval/interpreted_function.cpp new file mode 100644 index 00000000000..47bd483bba4 --- /dev/null +++ b/eval/src/vespa/eval/eval/interpreted_function.cpp @@ -0,0 +1,496 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "interpreted_function.h" +#include "node_visitor.h" +#include "node_traverser.h" +#include "check_type.h" +#include <cmath> +#include <vespa/vespalib/util/approx.h> +#include "operation.h" +#include <set> +#include "tensor_spec.h" +#include "simple_tensor_engine.h" +#include <vespa/vespalib/util/classname.h> + +namespace vespalib { +namespace eval { + +namespace { + +using namespace nodes; +using State = InterpretedFunction::State; +using Instruction = InterpretedFunction::Instruction; + +//----------------------------------------------------------------------------- + +template <typename T, typename IN> +uint64_t wrap_param(const IN &value_in) { + const T &value = value_in; + return (uint64_t)&value; +} + +template <typename T> +const T &unwrap_param(uint64_t param) { return *((const T *)param); } + +//----------------------------------------------------------------------------- + +void op_load_const(State &state, uint64_t param) { + state.stack.push_back(unwrap_param<Value>(param)); +} + +void op_load_param(State &state, uint64_t param) { + state.stack.push_back(state.params[param]); +} + +void op_load_let(State &state, uint64_t param) { + state.stack.push_back(state.let_values[param]); +} + +//----------------------------------------------------------------------------- + +template <typename OP1> +void op_unary(State &state, uint64_t) { + state.replace(1, OP1().perform(state.peek(0), state.stash)); +} + +template <typename OP2> +void op_binary(State &state, uint64_t) { + state.replace(2, OP2().perform(state.peek(1), state.peek(0), state.stash)); +} + +//----------------------------------------------------------------------------- + +void op_skip(State &state, uint64_t param) { + state.program_offset += param; +} + +void op_skip_if_false(State &state, uint64_t param) { + ++state.if_cnt; + if (!state.peek(0).as_bool()) { + state.program_offset += param; + } + state.stack.pop_back(); +} + +//----------------------------------------------------------------------------- + +void op_store_let(State &state, uint64_t) { + state.let_values.push_back(state.peek(0)); + state.stack.pop_back(); +} + +void op_evict_let(State &state, uint64_t) { + state.let_values.pop_back(); +} + +//----------------------------------------------------------------------------- + +// compare lhs with a set member, short-circuit if found +void op_check_member(State &state, uint64_t param) { + if (state.peek(1).equal(state.peek(0))) { + state.replace(2, state.stash.create<DoubleValue>(1.0)); + state.program_offset += param; + } else { + state.stack.pop_back(); + } +} + +// set member not found, replace lhs with false +void op_not_member(State &state, uint64_t) { + state.stack.pop_back(); + state.stack.push_back(state.stash.create<DoubleValue>(0.0)); +} + +//----------------------------------------------------------------------------- + +void op_tensor_sum(State &state, uint64_t) { + const eval::Tensor *tensor = state.peek(0).as_tensor(); + if (tensor != nullptr) { + state.replace(1, tensor->engine().reduce(*tensor, operation::Add(), {}, state.stash)); + } +} + +void op_tensor_sum_dimension(State &state, uint64_t param) { + const eval::Tensor *tensor = state.peek(0).as_tensor(); + if (tensor != nullptr) { + const vespalib::string &dimension = unwrap_param<vespalib::string>(param); + state.replace(1, tensor->engine().reduce(*tensor, operation::Add(), {dimension}, state.stash)); + } else { + state.replace(1, state.stash.create<ErrorValue>()); + } +} + +//----------------------------------------------------------------------------- + +template <typename T> +const T &undef_cref() { + const T *undef = nullptr; + assert(undef); + return *undef; +} + +struct TensorFunctionArgArgMeta { + TensorFunction::UP function; + size_t param1; + size_t param2; + TensorFunctionArgArgMeta(TensorFunction::UP function_in, size_t param1_in, size_t param2_in) + : function(std::move(function_in)), param1(param1_in), param2(param2_in) {} +}; + +struct ArgArgInput : TensorFunction::Input { + const TensorFunctionArgArgMeta &meta; + const State &state; + ArgArgInput(const TensorFunctionArgArgMeta &meta_in, const State &state_in) + : meta(meta_in), state(state_in) {} + const Value &get_tensor(size_t id) const override { + if (id == 0) { + return state.params[meta.param1]; + } else if (id == 1) { + return state.params[meta.param2]; + } + return undef_cref<Value>(); + } + const UnaryOperation &get_map_operation(size_t) const override { + return undef_cref<UnaryOperation>(); + } +}; + +void op_tensor_function_arg_arg(State &state, uint64_t param) { + const TensorFunctionArgArgMeta &meta = unwrap_param<TensorFunctionArgArgMeta>(param); + ArgArgInput input(meta, state); + state.stack.push_back(meta.function->eval(input, state.stash)); +} + +//----------------------------------------------------------------------------- + +struct ProgramBuilder : public NodeVisitor, public NodeTraverser { + std::vector<Instruction> &program; + Stash &stash; + const TensorEngine &tensor_engine; + const NodeTypes &types; + + ProgramBuilder(std::vector<Instruction> &program_in, Stash &stash_in, const TensorEngine &tensor_engine_in, const NodeTypes &types_in) + : program(program_in), stash(stash_in), tensor_engine(tensor_engine_in), types(types_in) {} + + //------------------------------------------------------------------------- + + bool is_typed_tensor(const Node &node) const { + const ValueType &type = types.get_type(node); + return (type.is_tensor() && !type.dimensions().empty()); + } + + bool is_typed(const Node &node) const { + return (types.get_type(node).is_double() || is_typed_tensor(node)); + } + + bool is_typed_tensor_param(const Node &node) const { + auto sym = as<Symbol>(node); + return (sym && (sym->id() >= 0) && is_typed_tensor(node)); + } + + bool is_typed_tensor_product_of_params(const Node &node) const { + auto mul = as<Mul>(node); + return (mul && is_typed_tensor(*mul) && + is_typed_tensor_param(mul->lhs()) && + is_typed_tensor_param(mul->rhs())); + } + + //------------------------------------------------------------------------- + + virtual void visit(const Number &node) { + program.emplace_back(op_load_const, wrap_param<Value>(stash.create<DoubleValue>(node.value()))); + } + virtual void visit(const Symbol &node) { + if (node.id() >= 0) { // param value + program.emplace_back(op_load_param, node.id()); + } else { // let binding + int let_offset = -(node.id() + 1); + program.emplace_back(op_load_let, let_offset); + } + } + virtual void visit(const String &node) { + program.emplace_back(op_load_const, wrap_param<Value>(stash.create<DoubleValue>(node.hash()))); + } + virtual void visit(const Array &node) { + program.emplace_back(op_load_const, wrap_param<Value>(stash.create<DoubleValue>(node.size()))); + } + virtual void visit(const Neg &) { + program.emplace_back(op_unary<operation::Neg>); + } + virtual void visit(const Not &) { + program.emplace_back(op_unary<operation::Not>); + } + virtual void visit(const If &node) { + node.cond().traverse(*this); + size_t after_cond = program.size(); + program.emplace_back(op_skip_if_false); + node.true_expr().traverse(*this); + size_t after_true = program.size(); + program.emplace_back(op_skip); + node.false_expr().traverse(*this); + program[after_cond].update_param(after_true - after_cond); + program[after_true].update_param(program.size() - after_true - 1); + } + virtual void visit(const Let &node) { + node.value().traverse(*this); + program.emplace_back(op_store_let); + node.expr().traverse(*this); + program.emplace_back(op_evict_let); + } + virtual void visit(const Error &) { + program.emplace_back(op_load_const, wrap_param<Value>(stash.create<ErrorValue>())); + } + virtual void visit(const TensorSum &node) { + if (is_typed(node) && is_typed_tensor_product_of_params(node.get_child(0))) { + assert(program.size() >= 3); // load,load,mul + program.pop_back(); // mul + program.pop_back(); // load + program.pop_back(); // load + std::vector<vespalib::string> dim_list; + if (!node.dimension().empty()) { + dim_list.push_back(node.dimension()); + } + auto a = as<Symbol>(node.get_child(0).get_child(0)); + auto b = as<Symbol>(node.get_child(0).get_child(1)); + auto ir = tensor_function::reduce(tensor_function::apply(operation::Mul(), + tensor_function::inject(types.get_type(*a), 0), + tensor_function::inject(types.get_type(*b), 1)), operation::Add(), dim_list); + auto fun = tensor_engine.compile(std::move(ir)); + const auto &meta = stash.create<TensorFunctionArgArgMeta>(std::move(fun), a->id(), b->id()); + program.emplace_back(op_tensor_function_arg_arg, wrap_param<TensorFunctionArgArgMeta>(meta)); + } else if (node.dimension().empty()) { + program.emplace_back(op_tensor_sum); + } else { + program.emplace_back(op_tensor_sum_dimension, + wrap_param<vespalib::string>(stash.create<vespalib::string>(node.dimension()))); + } + } + virtual void visit(const TensorMap &) { + // TODO(havardpe): add actual evaluation + program.emplace_back(op_load_const, wrap_param<Value>(stash.create<ErrorValue>())); + } + virtual void visit(const TensorJoin &) { + // TODO(havardpe): add actual evaluation + program.emplace_back(op_load_const, wrap_param<Value>(stash.create<ErrorValue>())); + } + virtual void visit(const TensorReduce &) { + // TODO(havardpe): add actual evaluation + program.emplace_back(op_load_const, wrap_param<Value>(stash.create<ErrorValue>())); + } + virtual void visit(const TensorRename &) { + // TODO(havardpe): add actual evaluation + program.emplace_back(op_load_const, wrap_param<Value>(stash.create<ErrorValue>())); + } + virtual void visit(const TensorLambda &) { + // TODO(havardpe): add actual evaluation + program.emplace_back(op_load_const, wrap_param<Value>(stash.create<ErrorValue>())); + } + virtual void visit(const TensorConcat &) { + // TODO(havardpe): add actual evaluation + program.emplace_back(op_load_const, wrap_param<Value>(stash.create<ErrorValue>())); + } + virtual void visit(const Add &) { + program.emplace_back(op_binary<operation::Add>); + } + virtual void visit(const Sub &) { + program.emplace_back(op_binary<operation::Sub>); + } + virtual void visit(const Mul &) { + program.emplace_back(op_binary<operation::Mul>); + } + virtual void visit(const Div &) { + program.emplace_back(op_binary<operation::Div>); + } + virtual void visit(const Pow &) { + program.emplace_back(op_binary<operation::Pow>); + } + virtual void visit(const Equal &) { + program.emplace_back(op_binary<operation::Equal>); + } + virtual void visit(const NotEqual &) { + program.emplace_back(op_binary<operation::NotEqual>); + } + virtual void visit(const Approx &) { + program.emplace_back(op_binary<operation::Approx>); + } + virtual void visit(const Less &) { + program.emplace_back(op_binary<operation::Less>); + } + virtual void visit(const LessEqual &) { + program.emplace_back(op_binary<operation::LessEqual>); + } + virtual void visit(const Greater &) { + program.emplace_back(op_binary<operation::Greater>); + } + virtual void visit(const GreaterEqual &) { + program.emplace_back(op_binary<operation::GreaterEqual>); + } + virtual void visit(const In &node) { + std::vector<size_t> checks; + node.lhs().traverse(*this); + auto array = as<Array>(node.rhs()); + if (array) { + for (size_t i = 0; i < array->size(); ++i) { + array->get(i).traverse(*this); + checks.push_back(program.size()); + program.emplace_back(op_check_member); + } + } else { + node.rhs().traverse(*this); + checks.push_back(program.size()); + program.emplace_back(op_check_member); + } + for (size_t i = 0; i < checks.size(); ++i) { + program[checks[i]].update_param(program.size() - checks[i]); + } + program.emplace_back(op_not_member); + } + virtual void visit(const And &) { + program.emplace_back(op_binary<operation::And>); + } + virtual void visit(const Or &) { + program.emplace_back(op_binary<operation::Or>); + } + virtual void visit(const Cos &) { + program.emplace_back(op_unary<operation::Cos>); + } + virtual void visit(const Sin &) { + program.emplace_back(op_unary<operation::Sin>); + } + virtual void visit(const Tan &) { + program.emplace_back(op_unary<operation::Tan>); + } + virtual void visit(const Cosh &) { + program.emplace_back(op_unary<operation::Cosh>); + } + virtual void visit(const Sinh &) { + program.emplace_back(op_unary<operation::Sinh>); + } + virtual void visit(const Tanh &) { + program.emplace_back(op_unary<operation::Tanh>); + } + virtual void visit(const Acos &) { + program.emplace_back(op_unary<operation::Acos>); + } + virtual void visit(const Asin &) { + program.emplace_back(op_unary<operation::Asin>); + } + virtual void visit(const Atan &) { + program.emplace_back(op_unary<operation::Atan>); + } + virtual void visit(const Exp &) { + program.emplace_back(op_unary<operation::Exp>); + } + virtual void visit(const Log10 &) { + program.emplace_back(op_unary<operation::Log10>); + } + virtual void visit(const Log &) { + program.emplace_back(op_unary<operation::Log>); + } + virtual void visit(const Sqrt &) { + program.emplace_back(op_unary<operation::Sqrt>); + } + virtual void visit(const Ceil &) { + program.emplace_back(op_unary<operation::Ceil>); + } + virtual void visit(const Fabs &) { + program.emplace_back(op_unary<operation::Fabs>); + } + virtual void visit(const Floor &) { + program.emplace_back(op_unary<operation::Floor>); + } + virtual void visit(const Atan2 &) { + program.emplace_back(op_binary<operation::Atan2>); + } + virtual void visit(const Ldexp &) { + program.emplace_back(op_binary<operation::Ldexp>); + } + virtual void visit(const Pow2 &) { + program.emplace_back(op_binary<operation::Pow>); + } + virtual void visit(const Fmod &) { + program.emplace_back(op_binary<operation::Fmod>); + } + virtual void visit(const Min &) { + program.emplace_back(op_binary<operation::Min>); + } + virtual void visit(const Max &) { + program.emplace_back(op_binary<operation::Max>); + } + virtual void visit(const IsNan &) { + program.emplace_back(op_unary<operation::IsNan>); + } + virtual void visit(const Relu &) { + program.emplace_back(op_unary<operation::Relu>); + } + virtual void visit(const Sigmoid &) { + program.emplace_back(op_unary<operation::Sigmoid>); + } + + //------------------------------------------------------------------------- + + virtual bool open(const Node &node) { + if (check_type<Array, If, Let, In>(node)) { + node.accept(*this); + return false; + } + return true; + } + + virtual void close(const Node &node) { + node.accept(*this); + } +}; + +} // namespace vespalib::<unnamed> + +InterpretedFunction::InterpretedFunction(const TensorEngine &engine, const nodes::Node &root, size_t num_params_in, const NodeTypes &types) + : _program(), + _stash(), + _num_params(num_params_in), + _tensor_engine(engine) +{ + ProgramBuilder program_builder(_program, _stash, _tensor_engine, types); + root.traverse(program_builder); +} + +const Value & +InterpretedFunction::eval(Context &ctx) const +{ + State &state = ctx._state; + state.clear(); + assert(state.params.size() == _num_params); + while (state.program_offset < _program.size()) { + _program[state.program_offset++].perform(state); + } + if (state.stack.size() != 1) { + state.stack.push_back(state.stash.create<ErrorValue>()); + } + return state.stack.back(); +} + +Function::Issues +InterpretedFunction::detect_issues(const Function &function) +{ + struct NotSupported : NodeTraverser { + std::vector<vespalib::string> issues; + bool open(const nodes::Node &) override { return true; } + void close(const nodes::Node &node) override { + if (nodes::check_type<nodes::TensorMap, + nodes::TensorJoin, + nodes::TensorReduce, + nodes::TensorRename, + nodes::TensorLambda, + nodes::TensorConcat>(node)) { + issues.push_back(make_string("unsupported node type: %s", + getClassName(node).c_str())); + } + } + } checker; + function.root().traverse(checker); + return Function::Issues(std::move(checker.issues)); +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/interpreted_function.h b/eval/src/vespa/eval/eval/interpreted_function.h new file mode 100644 index 00000000000..fa1ea6580dd --- /dev/null +++ b/eval/src/vespa/eval/eval/interpreted_function.h @@ -0,0 +1,101 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "function.h" +#include <vespa/vespalib/util/stash.h> +#include "simple_tensor_engine.h" +#include "node_types.h" + +namespace vespalib { +namespace eval { + +namespace nodes { class Node; } +class TensorEngine; + +/** + * A Function that has been prepared for execution. This will + * typically run slower than a compiled function but faster than + * evaluating the Function AST directly. The + * InterpretedFunction::Context class is used to keep track of the + * run-time state related to the evaluation of an interpreted + * function. The result of an evaluation is only valid until either + * the context is destructed or the context is re-used to perform + * another evaluation. + **/ +class InterpretedFunction +{ +public: + struct State { + std::vector<Value::CREF> params; + Stash stash; + std::vector<Value::CREF> stack; + std::vector<Value::CREF> let_values; + uint32_t program_offset; + uint32_t if_cnt; + State() : params(), stash(), stack(), let_values(), program_offset(0) {} + void clear() { + stash.clear(); + stack.clear(); + let_values.clear(); + program_offset = 0; + if_cnt = 0; + } + const Value &peek(size_t ridx) const { + return stack[stack.size() - 1 - ridx]; + } + void replace(size_t prune_cnt, const Value &value) { + for (size_t i = 0; i < prune_cnt; ++i) { + stack.pop_back(); + } + stack.push_back(value); + } + }; + class Context { + friend class InterpretedFunction; + private: + State _state; + Stash _param_stash; + public: + void clear_params() { + _state.params.clear(); + _param_stash.clear(); + } + void add_param(const Value ¶m) { _state.params.push_back(param); } + void add_param(double param) { add_param(_param_stash.create<DoubleValue>(param)); } + uint32_t if_cnt() const { return _state.if_cnt; } + }; + using op_function = void (*)(State &, uint64_t); + class Instruction { + private: + op_function function; + uint64_t param; + public: + explicit Instruction(op_function function_in) + : function(function_in), param(0) {} + Instruction(op_function function_in, uint64_t param_in) + : function(function_in), param(param_in) {} + void update_param(uint64_t param_in) { param = param_in; } + void perform(State &state) const { function(state, param); } + }; + +private: + std::vector<Instruction> _program; + Stash _stash; + size_t _num_params; + const TensorEngine &_tensor_engine; + +public: + typedef std::unique_ptr<InterpretedFunction> UP; + InterpretedFunction(const TensorEngine &engine, const nodes::Node &root, size_t num_params_in, const NodeTypes &types); + InterpretedFunction(const TensorEngine &engine, const Function &function, const NodeTypes &types) + : InterpretedFunction(engine, function.root(), function.num_params(), types) {} + InterpretedFunction(InterpretedFunction &&rhs) = default; + size_t program_size() const { return _program.size(); } + size_t num_params() const { return _num_params; } + const Value &eval(Context &ctx) const; + static Function::Issues detect_issues(const Function &function); +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/key_gen.cpp b/eval/src/vespa/eval/eval/key_gen.cpp new file mode 100644 index 00000000000..3d0f1f67e29 --- /dev/null +++ b/eval/src/vespa/eval/eval/key_gen.cpp @@ -0,0 +1,102 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "key_gen.h" +#include "node_visitor.h" +#include "node_traverser.h" +#include "function.h" + +namespace vespalib { +namespace eval { + +using namespace nodes; + +namespace { + +struct KeyGen : public NodeVisitor, public NodeTraverser { + vespalib::string key; + + // build + void add_double(double value) { key.append(&value, sizeof(value)); } + void add_size(size_t value) { key.append(&value, sizeof(value)); } + void add_int(int value) { key.append(&value, sizeof(value)); } + void add_hash(uint32_t value) { key.append(&value, sizeof(value)); } + void add_byte(uint8_t value) { key.append(&value, sizeof(value)); } + void add_bool(bool value) { key.push_back(value ? '1' : '0'); } + + // visit + virtual void visit(const Number &node) { add_byte( 1); add_double(node.value()); } + virtual void visit(const Symbol &node) { add_byte( 2); add_int(node.id()); } + virtual void visit(const String &node) { add_byte( 3); add_hash(node.hash()); } + virtual void visit(const Array &node) { add_byte( 4); add_size(node.size()); } + virtual void visit(const Neg &) { add_byte( 5); } + virtual void visit(const Not &) { add_byte( 6); } + virtual void visit(const If &node) { add_byte( 7); add_double(node.p_true()); } + virtual void visit(const Let &) { add_byte( 8); } + virtual void visit(const Error &) { add_byte( 9); } + virtual void visit(const TensorSum &) { add_byte(10); } // dimensions should be part of key + virtual void visit(const TensorMap &) { add_byte(11); } // lambda should be part of key + virtual void visit(const TensorJoin &) { add_byte(12); } // lambda should be part of key + virtual void visit(const TensorReduce &) { add_byte(13); } // aggr/dimensions should be part of key + virtual void visit(const TensorRename &) { add_byte(14); } // dimensions should be part of key + virtual void visit(const TensorLambda &) { add_byte(15); } // type/lambda should be part of key + virtual void visit(const TensorConcat &) { add_byte(16); } // dimension should be part of key + virtual void visit(const Add &) { add_byte(20); } + virtual void visit(const Sub &) { add_byte(21); } + virtual void visit(const Mul &) { add_byte(22); } + virtual void visit(const Div &) { add_byte(23); } + virtual void visit(const Pow &) { add_byte(24); } + virtual void visit(const Equal &) { add_byte(25); } + virtual void visit(const NotEqual &) { add_byte(26); } + virtual void visit(const Approx &) { add_byte(27); } + virtual void visit(const Less &) { add_byte(28); } + virtual void visit(const LessEqual &) { add_byte(29); } + virtual void visit(const Greater &) { add_byte(30); } + virtual void visit(const GreaterEqual &) { add_byte(31); } + virtual void visit(const In &) { add_byte(32); } + virtual void visit(const And &) { add_byte(33); } + virtual void visit(const Or &) { add_byte(34); } + virtual void visit(const Cos &) { add_byte(35); } + virtual void visit(const Sin &) { add_byte(36); } + virtual void visit(const Tan &) { add_byte(37); } + virtual void visit(const Cosh &) { add_byte(38); } + virtual void visit(const Sinh &) { add_byte(39); } + virtual void visit(const Tanh &) { add_byte(40); } + virtual void visit(const Acos &) { add_byte(41); } + virtual void visit(const Asin &) { add_byte(42); } + virtual void visit(const Atan &) { add_byte(43); } + virtual void visit(const Exp &) { add_byte(44); } + virtual void visit(const Log10 &) { add_byte(45); } + virtual void visit(const Log &) { add_byte(46); } + virtual void visit(const Sqrt &) { add_byte(47); } + virtual void visit(const Ceil &) { add_byte(48); } + virtual void visit(const Fabs &) { add_byte(49); } + virtual void visit(const Floor &) { add_byte(50); } + virtual void visit(const Atan2 &) { add_byte(51); } + virtual void visit(const Ldexp &) { add_byte(52); } + virtual void visit(const Pow2 &) { add_byte(53); } + virtual void visit(const Fmod &) { add_byte(54); } + virtual void visit(const Min &) { add_byte(55); } + virtual void visit(const Max &) { add_byte(56); } + virtual void visit(const IsNan &) { add_byte(57); } + virtual void visit(const Relu &) { add_byte(58); } + virtual void visit(const Sigmoid &) { add_byte(59); } + + // traverse + virtual bool open(const Node &node) { node.accept(*this); return true; } + virtual void close(const Node &) {} +}; + +} // namespace vespalib::eval::<unnamed> + +vespalib::string gen_key(const Function &function, PassParams pass_params) +{ + KeyGen key_gen; + key_gen.add_bool(pass_params == PassParams::ARRAY); + key_gen.add_size(function.num_params()); + function.root().traverse(key_gen); + return key_gen.key; +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/key_gen.h b/eval/src/vespa/eval/eval/key_gen.h new file mode 100644 index 00000000000..c8479b1b457 --- /dev/null +++ b/eval/src/vespa/eval/eval/key_gen.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> + +namespace vespalib { +namespace eval { + +class Function; +enum class PassParams; + +/** + * Function used to generate a binary key that may be used to query + * the compilation cache. + **/ +vespalib::string gen_key(const Function &function, PassParams pass_params); + +} // namespace vespalib::eval +} // namespace vespalib + diff --git a/eval/src/vespa/eval/eval/llvm/CMakeLists.txt b/eval/src/vespa/eval/eval/llvm/CMakeLists.txt new file mode 100644 index 00000000000..238dceb4026 --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(vespalib_vespalib_eval_llvm + SOURCES + compile_cache.cpp + compiled_function.cpp + deinline_forest.cpp + llvm_wrapper.cpp + INSTALL lib64 + DEPENDS + vespalib + LLVM-3.4 +) diff --git a/eval/src/vespa/eval/eval/llvm/compile_cache.cpp b/eval/src/vespa/eval/eval/llvm/compile_cache.cpp new file mode 100644 index 00000000000..1f7731da403 --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/compile_cache.cpp @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "compile_cache.h" +#include <vespa/vespalib/eval/key_gen.h> +#include <thread> + +namespace vespalib { +namespace eval { + +std::mutex CompileCache::_lock; +CompileCache::Map CompileCache::_cached; + +void +CompileCache::release(Map::iterator entry) +{ + std::lock_guard<std::mutex> guard(_lock); + if (--(entry->second.num_refs) == 0) { + _cached.erase(entry); + } +} + +CompileCache::Token::UP +CompileCache::compile(const Function &function, PassParams pass_params) +{ + std::lock_guard<std::mutex> guard(_lock); + CompileContext compile_ctx(function, pass_params); + std::thread thread(do_compile, std::ref(compile_ctx)); + thread.join(); + return std::move(compile_ctx.token); +} + +size_t +CompileCache::num_cached() +{ + std::lock_guard<std::mutex> guard(_lock); + return _cached.size(); +} + +size_t +CompileCache::count_refs() +{ + std::lock_guard<std::mutex> guard(_lock); + size_t refs = 0; + for (const auto &entry: _cached) { + refs += entry.second.num_refs; + } + return refs; +} + +void +CompileCache::do_compile(CompileContext &ctx) { + vespalib::string key = gen_key(ctx.function, ctx.pass_params); + auto pos = _cached.find(key); + if (pos != _cached.end()) { + ++(pos->second.num_refs); + ctx.token.reset(new Token(pos)); + } else { + auto res = _cached.emplace(std::move(key), Value(CompiledFunction(ctx.function, ctx.pass_params))); + assert(res.second); + ctx.token.reset(new Token(res.first)); + } +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/llvm/compile_cache.h b/eval/src/vespa/eval/eval/llvm/compile_cache.h new file mode 100644 index 00000000000..f137b37ba85 --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/compile_cache.h @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "compiled_function.h" +#include <mutex> + +namespace vespalib { +namespace eval { + +/** + * A compilation cache used to reduce application configuration cost + * by not having to compile equivalent expressions multiple times. The + * expression AST is used to produce a binary key that in turn is used + * to query the cache. The cache itself will not keep anything alive, + * but will let you find compiled functions that are currently in use + * by others. + **/ +class CompileCache +{ +private: + typedef vespalib::string Key; + struct Value { + size_t num_refs; + CompiledFunction cf; + Value(CompiledFunction &&cf_in) : num_refs(1), cf(std::move(cf_in)) {} + }; + typedef std::map<Key,Value> Map; + static std::mutex _lock; + static Map _cached; + + static void release(Map::iterator entry); + +public: + class Token + { + private: + friend class CompileCache; + CompileCache::Map::iterator entry; + explicit Token(CompileCache::Map::iterator entry_in) + : entry(entry_in) {} + public: + typedef std::unique_ptr<Token> UP; + const CompiledFunction &get() const { return entry->second.cf; } + ~Token() { CompileCache::release(entry); } + }; + static Token::UP compile(const Function &function, PassParams pass_params); + static size_t num_cached(); + static size_t count_refs(); + +private: + struct CompileContext { + const Function &function; + PassParams pass_params; + Token::UP token; + CompileContext(const Function &function_in, + PassParams pass_params_in) + : function(function_in), + pass_params(pass_params_in), + token() {} + }; + + static void do_compile(CompileContext &ctx); +}; + +} // namespace vespalib::eval +} // namespace vespalib + diff --git a/eval/src/vespa/eval/eval/llvm/compiled_function.cpp b/eval/src/vespa/eval/eval/llvm/compiled_function.cpp new file mode 100644 index 00000000000..5c6214c503a --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/compiled_function.cpp @@ -0,0 +1,77 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "compiled_function.h" +#include <vespa/vespalib/util/benchmark_timer.h> +#include <vespa/vespalib/eval/node_traverser.h> +#include <vespa/vespalib/eval/check_type.h> +#include <vespa/vespalib/eval/tensor_nodes.h> +#include <vespa/vespalib/util/classname.h> + +namespace vespalib { +namespace eval { + +namespace { + +double empty_function(const double *) { return 0.0; } + +} // namespace vespalib::eval::<unnamed> + +CompiledFunction::CompiledFunction(const Function &function_in, PassParams pass_params_in, + const gbdt::Optimize::Chain &forest_optimizers) + : _llvm_wrapper(), + _address(nullptr), + _num_params(function_in.num_params()), + _pass_params(pass_params_in) +{ + _address = _llvm_wrapper.compile_function(function_in.num_params(), + (_pass_params == PassParams::ARRAY), + function_in.root(), + forest_optimizers); +} + +CompiledFunction::CompiledFunction(CompiledFunction &&rhs) + : _llvm_wrapper(std::move(rhs._llvm_wrapper)), + _address(rhs._address), + _num_params(rhs._num_params), + _pass_params(rhs._pass_params) +{ + rhs._address = nullptr; +} + +double +CompiledFunction::estimate_cost_us(const std::vector<double> ¶ms) const +{ + assert(_pass_params == PassParams::ARRAY); + assert(params.size() == _num_params); + auto function = get_function(); + auto actual = [&](){function(¶ms[0]);}; + auto baseline = [&](){empty_function(¶ms[0]);}; + return BenchmarkTimer::benchmark(actual, baseline, 4.0) * 1000.0 * 1000.0; +} + +Function::Issues +CompiledFunction::detect_issues(const Function &function) +{ + struct NotSupported : NodeTraverser { + std::vector<vespalib::string> issues; + bool open(const nodes::Node &) override { return true; } + void close(const nodes::Node &node) override { + if (nodes::check_type<nodes::TensorSum, + nodes::TensorMap, + nodes::TensorJoin, + nodes::TensorReduce, + nodes::TensorRename, + nodes::TensorLambda, + nodes::TensorConcat>(node)) { + issues.push_back(make_string("unsupported node type: %s", + getClassName(node).c_str())); + } + } + } checker; + function.root().traverse(checker); + return Function::Issues(std::move(checker.issues)); +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/llvm/compiled_function.h b/eval/src/vespa/eval/eval/llvm/compiled_function.h new file mode 100644 index 00000000000..3b10d9a20c5 --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/compiled_function.h @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/gbdt.h> +#include "llvm_wrapper.h" + +namespace vespalib { +namespace eval { + +/** + * A Function that has been compiled to machine code using LLVM. Note + * that tensors are currently not supported for compiled functions. + **/ +class CompiledFunction +{ +public: + // expand<N>::type will resolve to the type of a function that + // takes N separate double parameters and returns double. + + // count down N and add a single double parameter to the list of parameters + template <size_t N, typename... T> struct expand : expand<N - 1, double, T...> {}; + // when N is 0; define 'type' with the list of collected parameters + template <typename... T> struct expand<0, T...> { using type = double(*)(T...); }; + + using array_function = double (*)(const double *); + +private: + LLVMWrapper _llvm_wrapper; + void *_address; + size_t _num_params; + PassParams _pass_params; + +public: + typedef std::unique_ptr<CompiledFunction> UP; + CompiledFunction(const Function &function_in, PassParams pass_params_in, + const gbdt::Optimize::Chain &forest_optimizers); + CompiledFunction(const Function &function_in, PassParams pass_params_in) + : CompiledFunction(function_in, pass_params_in, gbdt::Optimize::best) {} + CompiledFunction(CompiledFunction &&rhs); + size_t num_params() const { return _num_params; } + PassParams pass_params() const { return _pass_params; } + template <size_t NUM_PARAMS> + typename expand<NUM_PARAMS>::type get_function() const { + assert(_pass_params == PassParams::SEPARATE); + assert(_num_params == NUM_PARAMS); + return ((typename expand<NUM_PARAMS>::type)_address); + } + array_function get_function() const { + assert(_pass_params == PassParams::ARRAY); + return ((array_function)_address); + } + const std::vector<gbdt::Forest::UP> &get_forests() const { + return _llvm_wrapper.get_forests(); + } + void dump() const { _llvm_wrapper.dump(); } + double estimate_cost_us(const std::vector<double> ¶ms) const; + static Function::Issues detect_issues(const Function &function); +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/llvm/deinline_forest.cpp b/eval/src/vespa/eval/eval/llvm/deinline_forest.cpp new file mode 100644 index 00000000000..b0b71ed1601 --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/deinline_forest.cpp @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "deinline_forest.h" + +namespace vespalib { +namespace eval { +namespace gbdt { + +DeinlineForest::DeinlineForest(const std::vector<const nodes::Node *> &trees) +{ + size_t idx = 0; + while (idx < trees.size()) { + size_t fragment_size = 0; + std::vector<const nodes::Node *> fragment; + while ((idx < trees.size()) && (fragment_size < 256)) { + fragment_size += TreeStats(*trees[idx]).size; + fragment.push_back(trees[idx++]); + } + void *address = _llvm_wrapper.compile_forest_fragment(fragment); + _fragments.push_back((array_function)address); + } +} + +Optimize::Result +DeinlineForest::optimize(const ForestStats &, + const std::vector<const nodes::Node *> &trees) +{ + return Optimize::Result(Forest::UP(new DeinlineForest(trees)), eval); +} + +double +DeinlineForest::eval(const Forest *forest, const double *input) +{ + const DeinlineForest &self = *((const DeinlineForest *)forest); + double sum = 0.0; + for (auto fragment: self._fragments) { + sum += fragment(input); + } + return sum; +} + +Optimize::Chain DeinlineForest::optimize_chain({optimize}); + +} // namespace vespalib::eval::gbdt +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/llvm/deinline_forest.h b/eval/src/vespa/eval/eval/llvm/deinline_forest.h new file mode 100644 index 00000000000..f44d1b3fc8b --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/deinline_forest.h @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/eval/gbdt.h> +#include "llvm_wrapper.h" + +namespace vespalib { +namespace eval { +namespace gbdt { + +/** + * GBDT forest optimizer performing automatic function de-inlining. + **/ +class DeinlineForest : public Forest +{ +private: + using array_function = double (*)(const double *); + + LLVMWrapper _llvm_wrapper; + std::vector<array_function> _fragments; + +public: + explicit DeinlineForest(const std::vector<const nodes::Node *> &trees); + static Optimize::Result optimize(const ForestStats &stats, + const std::vector<const nodes::Node *> &trees); + static double eval(const Forest *forest, const double *input); + static Optimize::Chain optimize_chain; +}; + +} // namespace vespalib::eval::gbdt +} // namespace vespalib::eval +} // namespace vespalib + diff --git a/eval/src/vespa/eval/eval/llvm/llvm_wrapper.cpp b/eval/src/vespa/eval/eval/llvm/llvm_wrapper.cpp new file mode 100644 index 00000000000..1222eeae837 --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/llvm_wrapper.cpp @@ -0,0 +1,626 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <cmath> +#include "llvm_wrapper.h" +#include <vespa/vespalib/eval/node_visitor.h> +#include <vespa/vespalib/eval/node_traverser.h> +#include <llvm/Analysis/Verifier.h> +#include <llvm/IR/IRBuilder.h> +#include <llvm/IR/Intrinsics.h> +#include <llvm/ExecutionEngine/ExecutionEngine.h> +#include <llvm/Analysis/Passes.h> +#include <llvm/IR/DataLayout.h> +#include <llvm/Transforms/Scalar.h> +#include <llvm/LinkAllPasses.h> +#include <llvm/Transforms/IPO/PassManagerBuilder.h> +#include <vespa/vespalib/eval/check_type.h> +#include <vespa/vespalib/stllike/hash_set.h> +#include <vespa/vespalib/util/approx.h> + +double vespalib_eval_ldexp(double a, double b) { return std::ldexp(a, b); } +double vespalib_eval_min(double a, double b) { return std::min(a, b); } +double vespalib_eval_max(double a, double b) { return std::max(a, b); } +double vespalib_eval_isnan(double a) { return (std::isnan(a) ? 1.0 : 0.0); } +double vespalib_eval_approx(double a, double b) { return (vespalib::approx_equal(a, b) ? 1.0 : 0.0); } +double vespalib_eval_relu(double a) { return std::max(a, 0.0); } +double vespalib_eval_sigmoid(double a) { return 1.0 / (1.0 + std::exp(-1.0 * a)); } + +namespace vespalib { +namespace eval { + +using namespace nodes; + +namespace { + +struct SetMemberHash : PluginState { + vespalib::hash_set<double> members; + explicit SetMemberHash(const Array &array) : members(array.size() * 3) { + for (size_t i = 0; i < array.size(); ++i) { + members.insert(array.get(i).get_const_value()); + } + } + static bool check_membership(const PluginState *state, double value) { + const SetMemberHash &my_state = *((const SetMemberHash *)state); + return (my_state.members.find(value) != my_state.members.end()); + } +}; + +struct FunctionBuilder : public NodeVisitor, public NodeTraverser { + + llvm::ExecutionEngine &engine; + llvm::LLVMContext &context; + llvm::Module &module; + llvm::IRBuilder<> builder; + std::vector<llvm::Value*> params; + std::vector<llvm::Value*> values; + std::vector<llvm::Value*> let_values; + llvm::Function *function; + bool use_array; + bool inside_forest; + const Node *forest_end; + const gbdt::Optimize::Chain &forest_optimizers; + std::vector<gbdt::Forest::UP> &forests; + std::vector<PluginState::UP> &plugin_state; + + FunctionBuilder(llvm::ExecutionEngine &engine_in, + llvm::LLVMContext &context_in, + llvm::Module &module_in, + const vespalib::string &name_in, + size_t num_params_in, + bool use_array_in, + const gbdt::Optimize::Chain &forest_optimizers_in, + std::vector<gbdt::Forest::UP> &forests_out, + std::vector<PluginState::UP> &plugin_state_out) + : engine(engine_in), + context(context_in), + module(module_in), + builder(context), + params(), + values(), + let_values(), + function(nullptr), + use_array(use_array_in), + inside_forest(false), + forest_end(nullptr), + forest_optimizers(forest_optimizers_in), + forests(forests_out), + plugin_state(plugin_state_out) + { + std::vector<llvm::Type*> param_types; + if (use_array_in) { + param_types.push_back(builder.getDoubleTy()->getPointerTo()); + } else { + param_types.resize(num_params_in, builder.getDoubleTy()); + } + llvm::FunctionType *function_type = llvm::FunctionType::get(builder.getDoubleTy(), param_types, false); + function = llvm::Function::Create(function_type, llvm::Function::ExternalLinkage, name_in.c_str(), &module); + function->addFnAttr(llvm::Attribute::AttrKind::NoInline); + llvm::BasicBlock *block = llvm::BasicBlock::Create(context, "entry", function); + builder.SetInsertPoint(block); + for (llvm::Function::arg_iterator itr = function->arg_begin(); itr != function->arg_end(); ++itr) { + params.push_back(itr); + } + } + + //------------------------------------------------------------------------- + + llvm::Value *get_param(size_t idx) { + if (!use_array) { + assert(idx < params.size()); + return params[idx]; + } + assert(params.size() == 1); + llvm::Value *param_array = params[0]; + llvm::Value *addr = builder.CreateGEP(param_array, builder.getInt64(idx)); + return builder.CreateLoad(addr); + } + + //------------------------------------------------------------------------- + + void push(llvm::Value *value) { + values.push_back(value); + } + + void discard() { + assert(!values.empty()); + values.pop_back(); + } + + llvm::Value *pop_bool() { + assert(!values.empty()); + llvm::Value *value = values.back(); + values.pop_back(); + if (value->getType()->isIntegerTy(1)) { + return value; + } + assert(value->getType()->isDoubleTy()); + return builder.CreateFCmpUNE(value, llvm::ConstantFP::get(context, llvm::APFloat(0.0)), "as_bool"); + } + + llvm::Value *pop_double() { + assert(!values.empty()); + llvm::Value *value = values.back(); + values.pop_back(); + if (value->getType()->isDoubleTy()) { + return value; + } + assert(value->getType()->isIntegerTy(1)); + return builder.CreateUIToFP(value, builder.getDoubleTy(), "as_double"); + } + + //------------------------------------------------------------------------- + + bool try_optimize_forest(const Node &item) { + auto trees = gbdt::extract_trees(item); + gbdt::ForestStats stats(trees); + auto optimize_result = gbdt::Optimize::apply_chain(forest_optimizers, stats, trees); + if (!optimize_result.valid()) { + return false; + } + forests.push_back(std::move(optimize_result.forest)); + void *eval_ptr = (void *) optimize_result.eval; + gbdt::Forest *forest = forests.back().get(); + std::vector<llvm::Type*> param_types; + param_types.push_back(builder.getVoidTy()->getPointerTo()); + param_types.push_back(builder.getDoubleTy()->getPointerTo()); + llvm::FunctionType *function_type = llvm::FunctionType::get(builder.getDoubleTy(), param_types, false); + llvm::PointerType *function_pointer_type = llvm::PointerType::get(function_type, 0); + llvm::Value *eval_fun = builder.CreateIntToPtr(builder.getInt64((uint64_t)eval_ptr), function_pointer_type, "inject_eval"); + llvm::Value *ctx = builder.CreateIntToPtr(builder.getInt64((uint64_t)forest), builder.getVoidTy()->getPointerTo(), "inject_ctx"); + push(builder.CreateCall2(eval_fun, ctx, function->arg_begin(), "call_eval")); + return true; + } + + //------------------------------------------------------------------------- + + bool open(const Node &node) { + if (node.is_const()) { + push_double(node.get_const_value()); + return false; + } + if (!inside_forest && use_array && node.is_forest()) { + if (try_optimize_forest(node)) { + return false; + } + inside_forest = true; + forest_end = &node; + } + if (check_type<Array, If, Let, In>(node)) { + node.accept(*this); + return false; + } + return true; + } + + void close(const Node &node) { + node.accept(*this); + if (inside_forest && (forest_end == &node)) { + inside_forest = false; + forest_end = nullptr; + } + } + + //------------------------------------------------------------------------- + + void build_root(const Node &node) { + node.traverse(*this); + } + + void build_forest_fragment(const std::vector<const Node *> &trees) { + inside_forest = true; + assert(!trees.empty()); + llvm::Value *sum = nullptr; + for (auto tree: trees) { + tree->traverse(*this); + llvm::Value *tree_value = pop_double(); + sum = (sum) + ? builder.CreateFAdd(sum, tree_value, "add_tree") + : tree_value; + } + push(sum); + inside_forest = false; + } + + void *compile() { + builder.CreateRet(pop_double()); + assert(values.empty()); + llvm::verifyFunction(*function); + return engine.getPointerToFunction(function); + } + + //------------------------------------------------------------------------- + + void push_double(double value) { + push(llvm::ConstantFP::get(builder.getDoubleTy(), value)); + } + + void make_error(size_t num_children) { + for (size_t i = 0; i < num_children; ++i) { + discard(); + } + push_double(error_value); + } + + void make_call_1(llvm::Function *fun) { + if (fun == nullptr || fun->arg_size() != 1) { + return make_error(1); + } + llvm::Value *a = pop_double(); + push(builder.CreateCall(fun, a)); + } + void make_call_1(const llvm::Intrinsic::ID &id) { + make_call_1(llvm::Intrinsic::getDeclaration(&module, id, builder.getDoubleTy())); + } + void make_call_1(const char *name) { + make_call_1(dynamic_cast<llvm::Function*>(module.getOrInsertFunction(name, + builder.getDoubleTy(), + builder.getDoubleTy(), nullptr))); + } + + void make_call_2(llvm::Function *fun) { + if (fun == nullptr || fun->arg_size() != 2) { + return make_error(2); + } + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateCall2(fun, a, b)); + } + void make_call_2(const llvm::Intrinsic::ID &id) { + make_call_2(llvm::Intrinsic::getDeclaration(&module, id, builder.getDoubleTy())); + } + void make_call_2(const char *name) { + make_call_2(dynamic_cast<llvm::Function*>(module.getOrInsertFunction(name, + builder.getDoubleTy(), + builder.getDoubleTy(), + builder.getDoubleTy(), nullptr))); + } + + //------------------------------------------------------------------------- + + // basic nodes + + virtual void visit(const Number &item) { + push_double(item.value()); + } + virtual void visit(const Symbol &item) { + if (item.id() >= 0) { + push(get_param(item.id())); + } else { + int let_offset = -(item.id() + 1); + assert(size_t(let_offset) < let_values.size()); + push(let_values[let_offset]); + } + } + virtual void visit(const String &item) { + push_double(item.hash()); + } + virtual void visit(const Array &item) { + // NB: visit not open + push_double(item.size()); + } + virtual void visit(const Neg &) { + llvm::Value *child = pop_double(); + push(builder.CreateFNeg(child, "neg_res")); + } + virtual void visit(const Not &) { + llvm::Value *child = pop_bool(); + push(builder.CreateNot(child, "not_res")); + } + virtual void visit(const If &item) { + // NB: visit not open + llvm::BasicBlock *true_block = llvm::BasicBlock::Create(context, "true_block", function); + llvm::BasicBlock *false_block = llvm::BasicBlock::Create(context, "false_block", function); + llvm::BasicBlock *merge_block = llvm::BasicBlock::Create(context, "merge_block", function); + item.cond().traverse(*this); // NB: recursion + llvm::Value *cond = pop_bool(); + builder.CreateCondBr(cond, true_block, false_block); + // true block + builder.SetInsertPoint(true_block); + item.true_expr().traverse(*this); // NB: recursion + llvm::Value *true_res = pop_double(); + llvm::BasicBlock *true_end = builder.GetInsertBlock(); + builder.CreateBr(merge_block); + // false block + builder.SetInsertPoint(false_block); + item.false_expr().traverse(*this); // NB: recursion + llvm::Value *false_res = pop_double(); + llvm::BasicBlock *false_end = builder.GetInsertBlock(); + builder.CreateBr(merge_block); + // merge block + builder.SetInsertPoint(merge_block); + llvm::PHINode *phi = builder.CreatePHI(builder.getDoubleTy(), 2, "if_res"); + phi->addIncoming(true_res, true_end); + phi->addIncoming(false_res, false_end); + push(phi); + } + virtual void visit(const Let &item) { + // NB: visit not open + item.value().traverse(*this); // NB: recursion + let_values.push_back(pop_double()); + item.expr().traverse(*this); // NB: recursion + let_values.pop_back(); + } + virtual void visit(const Error &) { + make_error(0); + } + + // tensor nodes (not supported in compiled expressions) + + virtual void visit(const TensorSum &node) { + make_error(node.num_children()); + } + virtual void visit(const TensorMap &node) { + make_error(node.num_children()); + } + virtual void visit(const TensorJoin &node) { + make_error(node.num_children()); + } + virtual void visit(const TensorReduce &node) { + make_error(node.num_children()); + } + virtual void visit(const TensorRename &node) { + make_error(node.num_children()); + } + virtual void visit(const TensorLambda &node) { + make_error(node.num_children()); + } + virtual void visit(const TensorConcat &node) { + make_error(node.num_children()); + } + + // operator nodes + + virtual void visit(const Add &) { + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateFAdd(a, b, "add_res")); + } + virtual void visit(const Sub &) { + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateFSub(a, b, "sub_res")); + } + virtual void visit(const Mul &) { + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateFMul(a, b, "mul_res")); + } + virtual void visit(const Div &) { + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateFDiv(a, b, "div_res")); + } + virtual void visit(const Pow &) { + make_call_2(llvm::Intrinsic::pow); + } + virtual void visit(const Equal &) { + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateFCmpOEQ(a, b, "cmp_eq_res")); + } + virtual void visit(const NotEqual &) { + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateFCmpUNE(a, b, "cmp_ne_res")); + } + virtual void visit(const Approx &) { + make_call_2("vespalib_eval_approx"); + } + virtual void visit(const Less &) { + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateFCmpOLT(a, b, "cmp_lt_res")); + } + virtual void visit(const LessEqual &) { + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateFCmpOLE(a, b, "cmp_le_res")); + } + virtual void visit(const Greater &) { + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateFCmpOGT(a, b, "cmp_gt_res")); + } + virtual void visit(const GreaterEqual &) { + llvm::Value *b = pop_double(); + llvm::Value *a = pop_double(); + push(builder.CreateFCmpOGE(a, b, "cmp_ge_res")); + } + virtual void visit(const In &item) { + // NB: visit not open + item.lhs().traverse(*this); // NB: recursion + llvm::Value *lhs = pop_double(); + auto array = as<Array>(item.rhs()); + if (array) { + if (array->is_const() && array->size() > 8) { + // build call to hash lookup + plugin_state.emplace_back(new SetMemberHash(*array)); + void *call_ptr = (void *) SetMemberHash::check_membership; + PluginState *state = plugin_state.back().get(); + std::vector<llvm::Type*> param_types; + param_types.push_back(builder.getVoidTy()->getPointerTo()); + param_types.push_back(builder.getDoubleTy()); + llvm::FunctionType *function_type = llvm::FunctionType::get(builder.getInt1Ty(), param_types, false); + llvm::PointerType *function_pointer_type = llvm::PointerType::get(function_type, 0); + llvm::Value *call_fun = builder.CreateIntToPtr(builder.getInt64((uint64_t)call_ptr), function_pointer_type, "inject_call_addr"); + llvm::Value *ctx = builder.CreateIntToPtr(builder.getInt64((uint64_t)state), builder.getVoidTy()->getPointerTo(), "inject_ctx"); + push(builder.CreateCall2(call_fun, ctx, lhs, "call_check_membership")); + } else { + // build explicit code to check all set members + llvm::Value *found = builder.getFalse(); + for (size_t i = 0; i < array->size(); ++i) { + array->get(i).traverse(*this); // NB: recursion + llvm::Value *elem = pop_double(); + llvm::Value *elem_eq = builder.CreateFCmpOEQ(lhs, elem, "elem_eq"); + found = builder.CreateOr(found, elem_eq, "found"); + } + push(found); + } + } else { + item.rhs().traverse(*this); // NB: recursion + llvm::Value *rhs = pop_double(); + push(builder.CreateFCmpOEQ(lhs, rhs, "rhs_eq")); + } + } + virtual void visit(const And &) { + llvm::Value *b = pop_bool(); + llvm::Value *a = pop_bool(); + push(builder.CreateAnd(a, b, "and_res")); + } + virtual void visit(const Or &) { + llvm::Value *b = pop_bool(); + llvm::Value *a = pop_bool(); + push(builder.CreateOr(a, b, "or_res")); + } + + // call nodes + + virtual void visit(const Cos &) { + make_call_1(llvm::Intrinsic::cos); + } + virtual void visit(const Sin &) { + make_call_1(llvm::Intrinsic::sin); + } + virtual void visit(const Tan &) { + make_call_1("tan"); + } + virtual void visit(const Cosh &) { + make_call_1("cosh"); + } + virtual void visit(const Sinh &) { + make_call_1("sinh"); + } + virtual void visit(const Tanh &) { + make_call_1("tanh"); + } + virtual void visit(const Acos &) { + make_call_1("acos"); + } + virtual void visit(const Asin &) { + make_call_1("asin"); + } + virtual void visit(const Atan &) { + make_call_1("atan"); + } + virtual void visit(const Exp &) { + make_call_1(llvm::Intrinsic::exp); + } + virtual void visit(const Log10 &) { + make_call_1(llvm::Intrinsic::log10); + } + virtual void visit(const Log &) { + make_call_1(llvm::Intrinsic::log); + } + virtual void visit(const Sqrt &) { + make_call_1(llvm::Intrinsic::sqrt); + } + virtual void visit(const Ceil &) { + make_call_1(llvm::Intrinsic::ceil); + } + virtual void visit(const Fabs &) { + make_call_1(llvm::Intrinsic::fabs); + } + virtual void visit(const Floor &) { + make_call_1(llvm::Intrinsic::floor); + } + virtual void visit(const Atan2 &) { + make_call_2("atan2"); + } + virtual void visit(const Ldexp &) { + make_call_2("vespalib_eval_ldexp"); + } + virtual void visit(const Pow2 &) { + make_call_2(llvm::Intrinsic::pow); + } + virtual void visit(const Fmod &) { + make_call_2("fmod"); + } + virtual void visit(const Min &) { + make_call_2("vespalib_eval_min"); + } + virtual void visit(const Max &) { + make_call_2("vespalib_eval_max"); + } + virtual void visit(const IsNan &) { + make_call_1("vespalib_eval_isnan"); + } + virtual void visit(const Relu &) { + make_call_1("vespalib_eval_relu"); + } + virtual void visit(const Sigmoid &) { + make_call_1("vespalib_eval_sigmoid"); + } +}; + +} // namespace vespalib::eval::<unnamed> + +struct InitializeNativeTarget { + InitializeNativeTarget() { + LLVMInitializeNativeTarget(); + } +} initialize_native_target; + +std::recursive_mutex LLVMWrapper::_global_llvm_lock; + +LLVMWrapper::LLVMWrapper() + : _context(nullptr), + _module(nullptr), + _engine(nullptr), + _num_functions(0), + _forests(), + _plugin_state() +{ + std::lock_guard<std::recursive_mutex> guard(_global_llvm_lock); + _context = new llvm::LLVMContext(); + _module = new llvm::Module("LLVMWrapper", *_context); + _engine = llvm::EngineBuilder(_module).setOptLevel(llvm::CodeGenOpt::Aggressive).create(); + assert(_engine != nullptr && "llvm jit not available for your platform"); +} + +LLVMWrapper::LLVMWrapper(LLVMWrapper &&rhs) + : _context(rhs._context), + _module(rhs._module), + _engine(rhs._engine), + _num_functions(rhs._num_functions), + _forests(std::move(rhs._forests)), + _plugin_state(std::move(rhs._plugin_state)) +{ + rhs._context = nullptr; + rhs._module = nullptr; + rhs._engine = nullptr; +} + +void * +LLVMWrapper::compile_function(size_t num_params, bool use_array, const Node &root, + const gbdt::Optimize::Chain &forest_optimizers) +{ + std::lock_guard<std::recursive_mutex> guard(_global_llvm_lock); + FunctionBuilder builder(*_engine, *_context, *_module, + vespalib::make_string("f%zu", ++_num_functions), + num_params, use_array, + forest_optimizers, _forests, _plugin_state); + builder.build_root(root); + return builder.compile(); +} + +void * +LLVMWrapper::compile_forest_fragment(const std::vector<const Node *> &fragment) +{ + std::lock_guard<std::recursive_mutex> guard(_global_llvm_lock); + FunctionBuilder builder(*_engine, *_context, *_module, + vespalib::make_string("f%zu", ++_num_functions), + 0, true, + gbdt::Optimize::none, _forests, _plugin_state); + builder.build_forest_fragment(fragment); + return builder.compile(); +} + +LLVMWrapper::~LLVMWrapper() { + std::lock_guard<std::recursive_mutex> guard(_global_llvm_lock); + delete _engine; + // _module is owned by _engine + delete _context; +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/llvm/llvm_wrapper.h b/eval/src/vespa/eval/eval/llvm/llvm_wrapper.h new file mode 100644 index 00000000000..ffb4fbd4986 --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/llvm_wrapper.h @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/gbdt.h> + +#include <llvm/IR/LLVMContext.h> +#include <llvm/IR/Module.h> +#include <llvm/ExecutionEngine/ExecutionEngine.h> +#include <llvm/PassManager.h> +#include <mutex> + +extern "C" { + double vespalib_eval_ldexp(double a, double b); + double vespalib_eval_min(double a, double b); + double vespalib_eval_max(double a, double b); + double vespalib_eval_isnan(double a); + double vespalib_eval_approx(double a, double b); + double vespalib_eval_relu(double a); + double vespalib_eval_sigmoid(double a); +}; + +namespace vespalib { +namespace eval { + +/** + * Simple interface used to track and clean up custom state. This is + * typically used to destruct native objects that are invoked from + * within the generated machine code as part of evaluation. An example + * is that large set membership checks against constant values will be + * transformed into lookups in a pre-generated hash table. + **/ +struct PluginState { + using UP = std::unique_ptr<PluginState>; + virtual ~PluginState() {} +}; + +/** + * Stuff related to LLVM code generation is wrapped in this + * class. This is mostly used by the CompiledFunction class. + **/ +class LLVMWrapper +{ +private: + llvm::LLVMContext *_context; + llvm::Module *_module; // owned by engine + llvm::ExecutionEngine *_engine; + size_t _num_functions; + std::vector<gbdt::Forest::UP> _forests; + std::vector<PluginState::UP> _plugin_state; + + static std::recursive_mutex _global_llvm_lock; + +public: + LLVMWrapper(); + LLVMWrapper(LLVMWrapper &&rhs); + void *compile_function(size_t num_params, bool use_array, const nodes::Node &root, + const gbdt::Optimize::Chain &forest_optimizers); + void *compile_forest_fragment(const std::vector<const nodes::Node *> &fragment); + const std::vector<gbdt::Forest::UP> &get_forests() const { return _forests; } + void dump() const { _module->dump(); } + ~LLVMWrapper(); +}; + +} // namespace vespalib::eval +} // namespace vespalib + diff --git a/eval/src/vespa/eval/eval/node_traverser.h b/eval/src/vespa/eval/eval/node_traverser.h new file mode 100644 index 00000000000..0ba32a728c3 --- /dev/null +++ b/eval/src/vespa/eval/eval/node_traverser.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "basic_nodes.h" + +namespace vespalib { +namespace eval { + +/** + * Interface used when traversing nodes. The open function is called + * before any children are traversed and the close function is called + * after all children are traversed. Children are traversed in the + * order defined by the Node::get_child function. If open returns + * false; no children of the node will be traversed and close will not + * be called for the node. + **/ +struct NodeTraverser { + + virtual bool open(const nodes::Node &) = 0; + virtual void close(const nodes::Node &) = 0; + + virtual ~NodeTraverser() {} +}; + +} // namespace vespalib::eval +} // namespace vespalib + diff --git a/eval/src/vespa/eval/eval/node_types.cpp b/eval/src/vespa/eval/eval/node_types.cpp new file mode 100644 index 00000000000..4834f102790 --- /dev/null +++ b/eval/src/vespa/eval/eval/node_types.cpp @@ -0,0 +1,275 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "check_type.h" +#include "function.h" +#include "node_traverser.h" +#include "node_types.h" +#include "node_visitor.h" + +namespace vespalib { +namespace eval { +namespace nodes { +namespace { + +class State +{ +private: + const std::vector<ValueType> &_params; + std::map<const Node *, ValueType> &_type_map; + std::vector<ValueType> _let_types; + std::vector<ValueType> _types; + +public: + State(const std::vector<ValueType> ¶ms, + std::map<const Node *, ValueType> &type_map) + : _params(params), _type_map(type_map), _let_types(), _types() {} + + const ValueType ¶m_type(size_t idx) { + assert(idx < _params.size()); + return _params[idx]; + } + const ValueType &let_type(size_t idx) { + assert(idx < _let_types.size()); + return _let_types[idx]; + } + const ValueType &peek(size_t ridx) const { + assert(_types.size() > ridx); + return _types[_types.size() - 1 - ridx]; + } + void bind(size_t prune_cnt, const ValueType &type_ref, const Node &node) { + ValueType type = type_ref; // need copy since type_ref might be inside _types + assert(_types.size() >= prune_cnt); + for (size_t i = 0; i < prune_cnt; ++i) { + _types.pop_back(); + } + _types.push_back(type); + _type_map.emplace(&node, type); + } + void push_let(const ValueType &type) { + _let_types.push_back(type); + } + void pop_let() { + assert(!_let_types.empty()); + _let_types.pop_back(); + } + void assert_valid_end_state() const { + assert(_let_types.empty()); + assert(_types.size() == 1); + } +}; + +void action_bind_let(State &state) { + state.push_let(state.peek(0)); +} + +void action_unbind_let(State &state) { + state.pop_let(); +} + +struct TypeResolver : public NodeVisitor, public NodeTraverser { + State state; + using action_function = void (*)(State &); + std::vector<std::pair<const Node *, action_function>> actions; + TypeResolver(const std::vector<ValueType> ¶ms_in, + std::map<const Node *, ValueType> &type_map_out) + : state(params_in, type_map_out), actions() {} + + //------------------------------------------------------------------------- + + void assert_valid_end_state() const { + assert(actions.empty()); + state.assert_valid_end_state(); + } + + void add_action(const Node &trigger, action_function action) { + actions.emplace_back(&trigger, action); + } + + void check_actions(const Node &node) { + if (!actions.empty() && (actions.back().first == &node)) { + actions.back().second(state); + actions.pop_back(); + } + } + + //------------------------------------------------------------------------- + + void bind_type(const ValueType &type, const Node &node) { + state.bind(node.num_children(), type, node); + } + + bool check_error(const Node &node) { + for (size_t i = 0; i < node.num_children(); ++i) { + if (state.peek(i).is_error()) { + bind_type(ValueType::error_type(), node); + return true; + } + } + return false; + } + + void resolve_op1(const Node &node) { + bind_type(state.peek(0), node); + } + + void resolve_op2(const Node &node) { + bind_type(ValueType::join(state.peek(1), state.peek(0)), node); + } + + //------------------------------------------------------------------------- + + virtual void visit(const Number &node) { + bind_type(ValueType::double_type(), node); + } + virtual void visit(const Symbol &node) { + if (node.id() >= 0) { // param value + bind_type(state.param_type(node.id()), node); + } else { // let binding + int let_offset = -(node.id() + 1); + bind_type(state.let_type(let_offset), node); + } + } + virtual void visit(const String &node) { + bind_type(ValueType::double_type(), node); + } + virtual void visit(const Array &node) { + bind_type(ValueType::double_type(), node); + } + virtual void visit(const Neg &node) { resolve_op1(node); } + virtual void visit(const Not &node) { resolve_op1(node); } + virtual void visit(const If &node) { + ValueType true_type = state.peek(1); + ValueType false_type = state.peek(0); + if (true_type == false_type) { + bind_type(true_type, node); + } else if (true_type.is_tensor() && false_type.is_tensor()) { + bind_type(ValueType::tensor_type({}), node); + } else { + bind_type(ValueType::any_type(), node); + } + } + virtual void visit(const Let &node) { + bind_type(state.peek(0), node); + } + virtual void visit(const Error &node) { + bind_type(ValueType::error_type(), node); + } + virtual void visit(const TensorSum &node) { + const ValueType &child = state.peek(0); + if (node.dimension().empty()) { + bind_type(child.reduce({}), node); + } else { + bind_type(child.reduce({node.dimension()}), node); + } + } + virtual void visit(const TensorMap &node) { resolve_op1(node); } + virtual void visit(const TensorJoin &node) { resolve_op2(node); } + virtual void visit(const TensorReduce &node) { + const ValueType &child = state.peek(0); + bind_type(child.reduce(node.dimensions()), node); + } + virtual void visit(const TensorRename &node) { + const ValueType &child = state.peek(0); + bind_type(child.rename(node.from(), node.to()), node); + } + virtual void visit(const TensorLambda &node) { + bind_type(node.type(), node); + } + virtual void visit(const TensorConcat &node) { + bind_type(ValueType::concat(state.peek(1), state.peek(0), node.dimension()), node); + } + + virtual void visit(const Add &node) { resolve_op2(node); } + virtual void visit(const Sub &node) { resolve_op2(node); } + virtual void visit(const Mul &node) { resolve_op2(node); } + virtual void visit(const Div &node) { resolve_op2(node); } + virtual void visit(const Pow &node) { resolve_op2(node); } + virtual void visit(const Equal &node) { resolve_op2(node); } + virtual void visit(const NotEqual &node) { resolve_op2(node); } + virtual void visit(const Approx &node) { resolve_op2(node); } + virtual void visit(const Less &node) { resolve_op2(node); } + virtual void visit(const LessEqual &node) { resolve_op2(node); } + virtual void visit(const Greater &node) { resolve_op2(node); } + virtual void visit(const GreaterEqual &node) { resolve_op2(node); } + virtual void visit(const In &node) { + bind_type(ValueType::double_type(), node); + } + virtual void visit(const And &node) { resolve_op2(node); } + virtual void visit(const Or &node) { resolve_op2(node); } + virtual void visit(const Cos &node) { resolve_op1(node); } + virtual void visit(const Sin &node) { resolve_op1(node); } + virtual void visit(const Tan &node) { resolve_op1(node); } + virtual void visit(const Cosh &node) { resolve_op1(node); } + virtual void visit(const Sinh &node) { resolve_op1(node); } + virtual void visit(const Tanh &node) { resolve_op1(node); } + virtual void visit(const Acos &node) { resolve_op1(node); } + virtual void visit(const Asin &node) { resolve_op1(node); } + virtual void visit(const Atan &node) { resolve_op1(node); } + virtual void visit(const Exp &node) { resolve_op1(node); } + virtual void visit(const Log10 &node) { resolve_op1(node); } + virtual void visit(const Log &node) { resolve_op1(node); } + virtual void visit(const Sqrt &node) { resolve_op1(node); } + virtual void visit(const Ceil &node) { resolve_op1(node); } + virtual void visit(const Fabs &node) { resolve_op1(node); } + virtual void visit(const Floor &node) { resolve_op1(node); } + virtual void visit(const Atan2 &node) { resolve_op2(node); } + virtual void visit(const Ldexp &node) { resolve_op2(node); } + virtual void visit(const Pow2 &node) { resolve_op2(node); } + virtual void visit(const Fmod &node) { resolve_op2(node); } + virtual void visit(const Min &node) { resolve_op2(node); } + virtual void visit(const Max &node) { resolve_op2(node); } + virtual void visit(const IsNan &node) { resolve_op1(node); } + virtual void visit(const Relu &node) { resolve_op1(node); } + virtual void visit(const Sigmoid &node) { resolve_op1(node); } + + //------------------------------------------------------------------------- + + virtual bool open(const Node &node) { + auto let = as<Let>(node); + if (let) { + add_action(let->expr(), action_unbind_let); + add_action(let->value(), action_bind_let); + } + return true; + } + + virtual void close(const Node &node) { + if (!check_error(node)) { + node.accept(*this); + } + check_actions(node); + } +}; + +} // namespace vespalib::eval::nodes::<unnamed> +} // namespace vespalib::eval::nodes + +NodeTypes::NodeTypes() + : _not_found(ValueType::any_type()), + _type_map() +{ +} + +NodeTypes::NodeTypes(const Function &function, const std::vector<ValueType> &input_types) + : _not_found(ValueType::error_type()), + _type_map() +{ + assert(input_types.size() == function.num_params()); + nodes::TypeResolver resolver(input_types, _type_map); + function.root().traverse(resolver); + resolver.assert_valid_end_state(); +} + +const ValueType & +NodeTypes::get_type(const nodes::Node &node) const +{ + auto pos = _type_map.find(&node); + if (pos == _type_map.end()) { + return _not_found; + } + return pos->second; +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/node_types.h b/eval/src/vespa/eval/eval/node_types.h new file mode 100644 index 00000000000..ed33eb59447 --- /dev/null +++ b/eval/src/vespa/eval/eval/node_types.h @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "value_type.h" +#include <map> + +namespace vespalib { +namespace eval { + +namespace nodes { class Node; } +class Function; + +/** + * Class keeping track of the output type of all intermediate + * calculations for a single function. The constructor performs type + * resolution for each node in the AST based on the type of all + * function parameters. The default constructor creates an empty type + * repo representing an unknown number of unknown values. + **/ +class NodeTypes +{ +private: + ValueType _not_found; + std::map<const nodes::Node*,ValueType> _type_map; +public: + NodeTypes(); + NodeTypes(const Function &function, const std::vector<ValueType> &input_types); + const ValueType &get_type(const nodes::Node &node) const; + template <typename P> + bool check_types(const P &pred) const { + for (const auto &entry: _type_map) { + if (!pred(entry.second)) { + return false; + } + } + return (_type_map.size() > 0); + } + bool all_types_are_double() const { + return check_types([](const ValueType &type) + { return type.is_double(); }); + } +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/node_visitor.h b/eval/src/vespa/eval/eval/node_visitor.h new file mode 100644 index 00000000000..3196a00820e --- /dev/null +++ b/eval/src/vespa/eval/eval/node_visitor.h @@ -0,0 +1,152 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "basic_nodes.h" +#include "tensor_nodes.h" +#include "operator_nodes.h" +#include "call_nodes.h" + +namespace vespalib { +namespace eval { + +/** + * Interface implemented by Node visitors to resolve the actual type + * of an abstract Node. This is typically used when directly + * evaluating an AST, when creating a binary compile cache key or when + * compiling an AST to machine code using LLVM. + **/ +struct NodeVisitor { + + // basic nodes + virtual void visit(const nodes::Number &) = 0; + virtual void visit(const nodes::Symbol &) = 0; + virtual void visit(const nodes::String &) = 0; + virtual void visit(const nodes::Array &) = 0; + virtual void visit(const nodes::Neg &) = 0; + virtual void visit(const nodes::Not &) = 0; + virtual void visit(const nodes::If &) = 0; + virtual void visit(const nodes::Let &) = 0; + virtual void visit(const nodes::Error &) = 0; + + // tensor nodes + virtual void visit(const nodes::TensorSum &) = 0; + virtual void visit(const nodes::TensorMap &) = 0; + virtual void visit(const nodes::TensorJoin &) = 0; + virtual void visit(const nodes::TensorReduce &) = 0; + virtual void visit(const nodes::TensorRename &) = 0; + virtual void visit(const nodes::TensorLambda &) = 0; + virtual void visit(const nodes::TensorConcat &) = 0; + + // operator nodes + virtual void visit(const nodes::Add &) = 0; + virtual void visit(const nodes::Sub &) = 0; + virtual void visit(const nodes::Mul &) = 0; + virtual void visit(const nodes::Div &) = 0; + virtual void visit(const nodes::Pow &) = 0; + virtual void visit(const nodes::Equal &) = 0; + virtual void visit(const nodes::NotEqual &) = 0; + virtual void visit(const nodes::Approx &) = 0; + virtual void visit(const nodes::Less &) = 0; + virtual void visit(const nodes::LessEqual &) = 0; + virtual void visit(const nodes::Greater &) = 0; + virtual void visit(const nodes::GreaterEqual &) = 0; + virtual void visit(const nodes::In &) = 0; + virtual void visit(const nodes::And &) = 0; + virtual void visit(const nodes::Or &) = 0; + + // call nodes + virtual void visit(const nodes::Cos &) = 0; + virtual void visit(const nodes::Sin &) = 0; + virtual void visit(const nodes::Tan &) = 0; + virtual void visit(const nodes::Cosh &) = 0; + virtual void visit(const nodes::Sinh &) = 0; + virtual void visit(const nodes::Tanh &) = 0; + virtual void visit(const nodes::Acos &) = 0; + virtual void visit(const nodes::Asin &) = 0; + virtual void visit(const nodes::Atan &) = 0; + virtual void visit(const nodes::Exp &) = 0; + virtual void visit(const nodes::Log10 &) = 0; + virtual void visit(const nodes::Log &) = 0; + virtual void visit(const nodes::Sqrt &) = 0; + virtual void visit(const nodes::Ceil &) = 0; + virtual void visit(const nodes::Fabs &) = 0; + virtual void visit(const nodes::Floor &) = 0; + virtual void visit(const nodes::Atan2 &) = 0; + virtual void visit(const nodes::Ldexp &) = 0; + virtual void visit(const nodes::Pow2 &) = 0; + virtual void visit(const nodes::Fmod &) = 0; + virtual void visit(const nodes::Min &) = 0; + virtual void visit(const nodes::Max &) = 0; + virtual void visit(const nodes::IsNan &) = 0; + virtual void visit(const nodes::Relu &) = 0; + virtual void visit(const nodes::Sigmoid &) = 0; + + virtual ~NodeVisitor() {} +}; + +/** + * Node visitor helper class that can be subclassed to ignore handling + * of all types not specifically handled. + **/ +struct EmptyNodeVisitor : NodeVisitor { + virtual void visit(const nodes::Number &) {} + virtual void visit(const nodes::Symbol &) {} + virtual void visit(const nodes::String &) {} + virtual void visit(const nodes::Array &) {} + virtual void visit(const nodes::Neg &) {} + virtual void visit(const nodes::Not &) {} + virtual void visit(const nodes::If &) {} + virtual void visit(const nodes::Let &) {} + virtual void visit(const nodes::Error &) {} + virtual void visit(const nodes::TensorSum &) {} + virtual void visit(const nodes::TensorMap &) {} + virtual void visit(const nodes::TensorJoin &) {} + virtual void visit(const nodes::TensorReduce &) {} + virtual void visit(const nodes::TensorRename &) {} + virtual void visit(const nodes::TensorLambda &) {} + virtual void visit(const nodes::TensorConcat &) {} + virtual void visit(const nodes::Add &) {} + virtual void visit(const nodes::Sub &) {} + virtual void visit(const nodes::Mul &) {} + virtual void visit(const nodes::Div &) {} + virtual void visit(const nodes::Pow &) {} + virtual void visit(const nodes::Equal &) {} + virtual void visit(const nodes::NotEqual &) {} + virtual void visit(const nodes::Approx &) {} + virtual void visit(const nodes::Less &) {} + virtual void visit(const nodes::LessEqual &) {} + virtual void visit(const nodes::Greater &) {} + virtual void visit(const nodes::GreaterEqual &) {} + virtual void visit(const nodes::In &) {} + virtual void visit(const nodes::And &) {} + virtual void visit(const nodes::Or &) {} + virtual void visit(const nodes::Cos &) {} + virtual void visit(const nodes::Sin &) {} + virtual void visit(const nodes::Tan &) {} + virtual void visit(const nodes::Cosh &) {} + virtual void visit(const nodes::Sinh &) {} + virtual void visit(const nodes::Tanh &) {} + virtual void visit(const nodes::Acos &) {} + virtual void visit(const nodes::Asin &) {} + virtual void visit(const nodes::Atan &) {} + virtual void visit(const nodes::Exp &) {} + virtual void visit(const nodes::Log10 &) {} + virtual void visit(const nodes::Log &) {} + virtual void visit(const nodes::Sqrt &) {} + virtual void visit(const nodes::Ceil &) {} + virtual void visit(const nodes::Fabs &) {} + virtual void visit(const nodes::Floor &) {} + virtual void visit(const nodes::Atan2 &) {} + virtual void visit(const nodes::Ldexp &) {} + virtual void visit(const nodes::Pow2 &) {} + virtual void visit(const nodes::Fmod &) {} + virtual void visit(const nodes::Min &) {} + virtual void visit(const nodes::Max &) {} + virtual void visit(const nodes::IsNan &) {} + virtual void visit(const nodes::Relu &) {} + virtual void visit(const nodes::Sigmoid &) {} +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/operation.cpp b/eval/src/vespa/eval/eval/operation.cpp new file mode 100644 index 00000000000..72b48beb1a2 --- /dev/null +++ b/eval/src/vespa/eval/eval/operation.cpp @@ -0,0 +1,96 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <cmath> +#include "operation.h" +#include "value.h" +#include "operation_visitor.h" + +namespace vespalib { +namespace eval { + +const Value & +UnaryOperation::perform(const Value &lhs, Stash &stash) const { + if (lhs.is_error()) { + return stash.create<ErrorValue>(); + } else if (lhs.is_double()) { + return stash.create<DoubleValue>(eval(lhs.as_double())); + } else { + return lhs.apply(*this, stash); + } +} + +const Value & +BinaryOperation::perform(const Value &lhs, const Value &rhs, Stash &stash) const { + if (lhs.is_error() || rhs.is_error()) { + return stash.create<ErrorValue>(); + } else if (lhs.is_double() && rhs.is_double()) { + return stash.create<DoubleValue>(eval(lhs.as_double(), rhs.as_double())); + } else if (lhs.is_double()) { + BindLeft unary_op(*this, lhs.as_double()); + return rhs.apply(unary_op, stash); + } else if (rhs.is_double()) { + BindRight unary_op(*this, rhs.as_double()); + return lhs.apply(unary_op, stash); + } else { + return lhs.apply(*this, rhs, stash); + } +} + +template <typename T> void Op1<T>::accept(OperationVisitor &visitor) const { + visitor.visit(static_cast<const T&>(*this)); +} + +template <typename T> void Op2<T>::accept(OperationVisitor &visitor) const { + visitor.visit(static_cast<const T&>(*this)); +} + +template <typename T> std::unique_ptr<BinaryOperation> Op2<T>::clone() const { + return std::make_unique<T>(); +} + +namespace operation { +double Neg::eval(double a) const { return -a; } +double Not::eval(double a) const { return (a != 0.0) ? 0.0 : 1.0; } +double Add::eval(double a, double b) const { return (a + b); } +double Sub::eval(double a, double b) const { return (a - b); } +double Mul::eval(double a, double b) const { return (a * b); } +double Div::eval(double a, double b) const { return (a / b); } +double Pow::eval(double a, double b) const { return std::pow(a, b); } +double Equal::eval(double a, double b) const { return (a == b) ? 1.0 : 0.0; } +double NotEqual::eval(double a, double b) const { return (a != b) ? 1.0 : 0.0; } +double Approx::eval(double a, double b) const { return approx_equal(a, b); } +double Less::eval(double a, double b) const { return (a < b) ? 1.0 : 0.0; } +double LessEqual::eval(double a, double b) const { return (a <= b) ? 1.0 : 0.0; } +double Greater::eval(double a, double b) const { return (a > b) ? 1.0 : 0.0; } +double GreaterEqual::eval(double a, double b) const { return (a >= b) ? 1.0 : 0.0; } +double And::eval(double a, double b) const { return ((a != 0.0) && (b != 0.0)) ? 1.0 : 0.0; } +double Or::eval(double a, double b) const { return ((a != 0.0) || (b != 0.0)) ? 1.0 : 0.0; } +double Cos::eval(double a) const { return std::cos(a); } +double Sin::eval(double a) const { return std::sin(a); } +double Tan::eval(double a) const { return std::tan(a); } +double Cosh::eval(double a) const { return std::cosh(a); } +double Sinh::eval(double a) const { return std::sinh(a); } +double Tanh::eval(double a) const { return std::tanh(a); } +double Acos::eval(double a) const { return std::acos(a); } +double Asin::eval(double a) const { return std::asin(a); } +double Atan::eval(double a) const { return std::atan(a); } +double Exp::eval(double a) const { return std::exp(a); } +double Log10::eval(double a) const { return std::log10(a); } +double Log::eval(double a) const { return std::log(a); } +double Sqrt::eval(double a) const { return std::sqrt(a); } +double Ceil::eval(double a) const { return std::ceil(a); } +double Fabs::eval(double a) const { return std::fabs(a); } +double Floor::eval(double a) const { return std::floor(a); } +double Atan2::eval(double a, double b) const { return std::atan2(a, b); } +double Ldexp::eval(double a, double b) const { return std::ldexp(a, b); } +double Fmod::eval(double a, double b) const { return std::fmod(a, b); } +double Min::eval(double a, double b) const { return std::min(a, b); } +double Max::eval(double a, double b) const { return std::max(a, b); } +double IsNan::eval(double a) const { return std::isnan(a) ? 1.0 : 0.0; } +double Relu::eval(double a) const { return std::max(a, 0.0); } +double Sigmoid::eval(double a) const { return 1.0 / (1.0 + std::exp(-1.0 * a)); } +} // namespace vespalib::eval::operation + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/operation.h b/eval/src/vespa/eval/eval/operation.h new file mode 100644 index 00000000000..bd730b5bce6 --- /dev/null +++ b/eval/src/vespa/eval/eval/operation.h @@ -0,0 +1,150 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "value.h" +#include <cmath> +#include <vespa/vespalib/util/approx.h> +#include <vespa/vespalib/util/stash.h> + +namespace vespalib { +namespace eval { + +struct OperationVisitor; + +/** + * An Operation represents the action taken based on what is described + * by an Operator or a Call AST node. All operations have underlying + * numeric meaning (that can be overridden for complex value + * types). They no longer have any textual counterpart and are only + * separated by the number of values they operate on. + **/ +struct Operation { + virtual void accept(OperationVisitor &visitor) const = 0; + virtual ~Operation() {} +}; + +/** + * Simple typecasting utility. + */ +template <typename T> +const T *as(const Operation &op) { return dynamic_cast<const T *>(&op); } + +//----------------------------------------------------------------------------- + +/** + * An Operation performing a calculation based on a single input + * value. + **/ +struct UnaryOperation : Operation { + const Value &perform(const Value &a, Stash &stash) const; + virtual double eval(double a) const = 0; +}; + +/** + * An Operation performing a calculation based on two input values. + **/ +struct BinaryOperation : Operation { + const Value &perform(const Value &a, const Value &b, Stash &stash) const; + virtual double eval(double a, double b) const = 0; + virtual std::unique_ptr<BinaryOperation> clone() const = 0; +}; + +//----------------------------------------------------------------------------- + +template <typename T> +struct Op1 : UnaryOperation { + virtual void accept(OperationVisitor &visitor) const override; +}; + +template <typename T> +struct Op2 : BinaryOperation { + virtual void accept(OperationVisitor &visitor) const override; + virtual std::unique_ptr<BinaryOperation> clone() const override; +}; + +//----------------------------------------------------------------------------- + +/** + * A non-trivial custom unary operation. Typically used for closures + * and lambdas. + **/ +struct CustomUnaryOperation : Op1<CustomUnaryOperation> {}; + +//----------------------------------------------------------------------------- + +/** + * This class binds the first parameter of a binary operation to a + * numeric value, acting as a custom unary operation. + **/ +class BindLeft : public CustomUnaryOperation +{ +private: + const BinaryOperation &_op; + double _a; +public: + BindLeft(const BinaryOperation &op, double a) : _op(op), _a(a) {} + double eval(double b) const override { return _op.eval(_a, b); } +}; + +/** + * This class binds the second parameter of a binary operation to a + * numeric value, acting as a custom unary operation. + **/ +class BindRight : public CustomUnaryOperation +{ +private: + const BinaryOperation &_op; + double _b; +public: + BindRight(const BinaryOperation &op, double b) : _op(op), _b(b) {} + double eval(double a) const override { return _op.eval(a, _b); } +}; + +//----------------------------------------------------------------------------- + +namespace operation { +struct Neg : Op1<Neg> { double eval(double a) const override; }; +struct Not : Op1<Not> { double eval(double a) const override; }; +struct Add : Op2<Add> { double eval(double a, double b) const override; }; +struct Sub : Op2<Sub> { double eval(double a, double b) const override; }; +struct Mul : Op2<Mul> { double eval(double a, double b) const override; }; +struct Div : Op2<Div> { double eval(double a, double b) const override; }; +struct Pow : Op2<Pow> { double eval(double a, double b) const override; }; +struct Equal : Op2<Equal> { double eval(double a, double b) const override; }; +struct NotEqual : Op2<NotEqual> { double eval(double a, double b) const override; }; +struct Approx : Op2<Approx> { double eval(double a, double b) const override; }; +struct Less : Op2<Less> { double eval(double a, double b) const override; }; +struct LessEqual : Op2<LessEqual> { double eval(double a, double b) const override; }; +struct Greater : Op2<Greater> { double eval(double a, double b) const override; }; +struct GreaterEqual : Op2<GreaterEqual> { double eval(double a, double b) const override; }; +struct And : Op2<And> { double eval(double a, double b) const override; }; +struct Or : Op2<Or> { double eval(double a, double b) const override; }; +struct Cos : Op1<Cos> { double eval(double a) const override; }; +struct Sin : Op1<Sin> { double eval(double a) const override; }; +struct Tan : Op1<Tan> { double eval(double a) const override; }; +struct Cosh : Op1<Cosh> { double eval(double a) const override; }; +struct Sinh : Op1<Sinh> { double eval(double a) const override; }; +struct Tanh : Op1<Tanh> { double eval(double a) const override; }; +struct Acos : Op1<Acos> { double eval(double a) const override; }; +struct Asin : Op1<Asin> { double eval(double a) const override; }; +struct Atan : Op1<Atan> { double eval(double a) const override; }; +struct Exp : Op1<Exp> { double eval(double a) const override; }; +struct Log10 : Op1<Log10> { double eval(double a) const override; }; +struct Log : Op1<Log> { double eval(double a) const override; }; +struct Sqrt : Op1<Sqrt> { double eval(double a) const override; }; +struct Ceil : Op1<Ceil> { double eval(double a) const override; }; +struct Fabs : Op1<Fabs> { double eval(double a) const override; }; +struct Floor : Op1<Floor> { double eval(double a) const override; }; +struct Atan2 : Op2<Atan2> { double eval(double a, double b) const override; }; +struct Ldexp : Op2<Ldexp> { double eval(double a, double b) const override; }; +struct Fmod : Op2<Fmod> { double eval(double a, double b) const override; }; +struct Min : Op2<Min> { double eval(double a, double b) const override; }; +struct Max : Op2<Max> { double eval(double a, double b) const override; }; +struct IsNan : Op1<IsNan> { double eval(double a) const override; }; +struct Relu : Op1<Relu> { double eval(double a) const override; }; +struct Sigmoid : Op1<Sigmoid> { double eval(double a) const override; }; +} // namespace vespalib::eval::operation + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/operation_visitor.h b/eval/src/vespa/eval/eval/operation_visitor.h new file mode 100644 index 00000000000..00623a3a0e8 --- /dev/null +++ b/eval/src/vespa/eval/eval/operation_visitor.h @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "operation.h" + +namespace vespalib { +namespace eval { + +/** + * Interface implemented by Operation visitors to resolve the actual + * type of an abstract Operation. + **/ +struct OperationVisitor { + virtual void visit(const operation::Neg &) = 0; + virtual void visit(const operation::Not &) = 0; + virtual void visit(const operation::Add &) = 0; + virtual void visit(const operation::Sub &) = 0; + virtual void visit(const operation::Mul &) = 0; + virtual void visit(const operation::Div &) = 0; + virtual void visit(const operation::Pow &) = 0; + virtual void visit(const operation::Equal &) = 0; + virtual void visit(const operation::NotEqual &) = 0; + virtual void visit(const operation::Approx &) = 0; + virtual void visit(const operation::Less &) = 0; + virtual void visit(const operation::LessEqual &) = 0; + virtual void visit(const operation::Greater &) = 0; + virtual void visit(const operation::GreaterEqual &) = 0; + virtual void visit(const operation::And &) = 0; + virtual void visit(const operation::Or &) = 0; + virtual void visit(const operation::Cos &) = 0; + virtual void visit(const operation::Sin &) = 0; + virtual void visit(const operation::Tan &) = 0; + virtual void visit(const operation::Cosh &) = 0; + virtual void visit(const operation::Sinh &) = 0; + virtual void visit(const operation::Tanh &) = 0; + virtual void visit(const operation::Acos &) = 0; + virtual void visit(const operation::Asin &) = 0; + virtual void visit(const operation::Atan &) = 0; + virtual void visit(const operation::Exp &) = 0; + virtual void visit(const operation::Log10 &) = 0; + virtual void visit(const operation::Log &) = 0; + virtual void visit(const operation::Sqrt &) = 0; + virtual void visit(const operation::Ceil &) = 0; + virtual void visit(const operation::Fabs &) = 0; + virtual void visit(const operation::Floor &) = 0; + virtual void visit(const operation::Atan2 &) = 0; + virtual void visit(const operation::Ldexp &) = 0; + virtual void visit(const operation::Fmod &) = 0; + virtual void visit(const operation::Min &) = 0; + virtual void visit(const operation::Max &) = 0; + virtual void visit(const operation::IsNan &) = 0; + virtual void visit(const operation::Relu &) = 0; + virtual void visit(const operation::Sigmoid &) = 0; + virtual void visit(const CustomUnaryOperation &) = 0; + virtual ~OperationVisitor() {} +}; + +/** + * Operation visitor helper class that can be subclassed to implement + * common handling of all types not specifically handled. + **/ +struct DefaultOperationVisitor : OperationVisitor { + virtual void visitDefault(const Operation &) = 0; + virtual void visit(const operation::Neg &op) override { visitDefault(op); } + virtual void visit(const operation::Not &op) override { visitDefault(op); } + virtual void visit(const operation::Add &op) override { visitDefault(op); } + virtual void visit(const operation::Sub &op) override { visitDefault(op); } + virtual void visit(const operation::Mul &op) override { visitDefault(op); } + virtual void visit(const operation::Div &op) override { visitDefault(op); } + virtual void visit(const operation::Pow &op) override { visitDefault(op); } + virtual void visit(const operation::Equal &op) override { visitDefault(op); } + virtual void visit(const operation::NotEqual &op) override { visitDefault(op); } + virtual void visit(const operation::Approx &op) override { visitDefault(op); } + virtual void visit(const operation::Less &op) override { visitDefault(op); } + virtual void visit(const operation::LessEqual &op) override { visitDefault(op); } + virtual void visit(const operation::Greater &op) override { visitDefault(op); } + virtual void visit(const operation::GreaterEqual &op) override { visitDefault(op); } + virtual void visit(const operation::And &op) override { visitDefault(op); } + virtual void visit(const operation::Or &op) override { visitDefault(op); } + virtual void visit(const operation::Cos &op) override { visitDefault(op); } + virtual void visit(const operation::Sin &op) override { visitDefault(op); } + virtual void visit(const operation::Tan &op) override { visitDefault(op); } + virtual void visit(const operation::Cosh &op) override { visitDefault(op); } + virtual void visit(const operation::Sinh &op) override { visitDefault(op); } + virtual void visit(const operation::Tanh &op) override { visitDefault(op); } + virtual void visit(const operation::Acos &op) override { visitDefault(op); } + virtual void visit(const operation::Asin &op) override { visitDefault(op); } + virtual void visit(const operation::Atan &op) override { visitDefault(op); } + virtual void visit(const operation::Exp &op) override { visitDefault(op); } + virtual void visit(const operation::Log10 &op) override { visitDefault(op); } + virtual void visit(const operation::Log &op) override { visitDefault(op); } + virtual void visit(const operation::Sqrt &op) override { visitDefault(op); } + virtual void visit(const operation::Ceil &op) override { visitDefault(op); } + virtual void visit(const operation::Fabs &op) override { visitDefault(op); } + virtual void visit(const operation::Floor &op) override { visitDefault(op); } + virtual void visit(const operation::Atan2 &op) override { visitDefault(op); } + virtual void visit(const operation::Ldexp &op) override { visitDefault(op); } + virtual void visit(const operation::Fmod &op) override { visitDefault(op); } + virtual void visit(const operation::Min &op) override { visitDefault(op); } + virtual void visit(const operation::Max &op) override { visitDefault(op); } + virtual void visit(const operation::IsNan &op) override { visitDefault(op); } + virtual void visit(const operation::Relu &op) override { visitDefault(op); } + virtual void visit(const operation::Sigmoid &op) override { visitDefault(op); } + virtual void visit(const CustomUnaryOperation &op) override { visitDefault(op); } +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/operator_nodes.cpp b/eval/src/vespa/eval/eval/operator_nodes.cpp new file mode 100644 index 00000000000..ab6955a8248 --- /dev/null +++ b/eval/src/vespa/eval/eval/operator_nodes.cpp @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "operator_nodes.h" +#include "node_visitor.h" + +namespace vespalib { +namespace eval { +namespace nodes { + +template <typename T> void OperatorHelper<T>::accept(NodeVisitor &visitor) const { + visitor.visit(static_cast<const T&>(*this)); +} + +OperatorRepo OperatorRepo::_instance; +OperatorRepo::OperatorRepo() : _map(), _max_size(0) { + add(nodes::Add()); + add(nodes::Sub()); + add(nodes::Mul()); + add(nodes::Div()); + add(nodes::Pow()); + add(nodes::Equal()); + add(nodes::NotEqual()); + add(nodes::Approx()); + add(nodes::Less()); + add(nodes::LessEqual()); + add(nodes::Greater()); + add(nodes::GreaterEqual()); + add(nodes::In()); + add(nodes::And()); + add(nodes::Or()); +} + +vespalib::string +In::dump(DumpContext &ctx) const +{ + vespalib::string str; + str += "("; + str += lhs().dump(ctx); + str += " in "; + str += rhs().dump(ctx); + str += ")"; + return str; +} + +} // namespace vespalib::eval::nodes +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/operator_nodes.h b/eval/src/vespa/eval/eval/operator_nodes.h new file mode 100644 index 00000000000..34b1e60d571 --- /dev/null +++ b/eval/src/vespa/eval/eval/operator_nodes.h @@ -0,0 +1,178 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cmath> +#include <memory> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/stringfmt.h> +#include "basic_nodes.h" +#include <map> + +namespace vespalib { +namespace eval { + +struct NodeVisitor; + +namespace nodes { + +/** + * Common superclass for AST nodes describing infix operators. Each + * operator has a left hand side expression and a right hand side + * expression. The parser will use Operator instances to resolve + * precedence. + **/ +class Operator : public Node { +public: + enum Order { LEFT, RIGHT }; + +private: + vespalib::string _op_str; + int _priority; + Order _order; + Node_UP _lhs; + Node_UP _rhs; + bool _is_const; + +public: + Operator(const vespalib::string &op_str_in, int priority_in, Order order_in) + : _op_str(op_str_in), _priority(priority_in), _order(order_in), _lhs(), _rhs(), _is_const(false) {} + vespalib::string op_str() const { return _op_str; } + int priority() const { return _priority; } + Order order() const { return _order; } + const Node &lhs() const { return *_lhs; } + const Node &rhs() const { return *_rhs; } + virtual bool is_const() const override { return _is_const; } + virtual size_t num_children() const override { return (_lhs && _rhs) ? 2 : 0; } + virtual const Node &get_child(size_t idx) const override { + assert(idx < 2); + return (idx == 0) ? lhs() : rhs(); + } + virtual void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_lhs)); + handler.handle(std::move(_rhs)); + } + + bool do_before(const Operator &other) { + if (priority() > other.priority()) { + return true; + } + if (other.priority() > priority()) { + return false; + } + assert(order() == other.order()); + return (order() == LEFT); + } + + virtual void bind(Node_UP lhs_in, Node_UP rhs_in) { + _lhs = std::move(lhs_in); + _rhs = std::move(rhs_in); + _is_const = (_lhs->is_const() && _rhs->is_const()); + } + + virtual vespalib::string dump(DumpContext &ctx) const { + vespalib::string str; + str += "("; + str += _lhs->dump(ctx); + str += op_str(); + str += _rhs->dump(ctx); + str += ")"; + return str; + } +}; +typedef std::unique_ptr<Operator> Operator_UP; + +//----------------------------------------------------------------------------- + +/** + * Repository for known operators. This is used by the parser to + * create appropriate operator nodes. + **/ +class OperatorRepo { +private: + static OperatorRepo _instance; + typedef nodes::Operator_UP (*factory_type)(); + std::map<vespalib::string,factory_type> _map; + size_t _max_size; + template <typename T> + void add(const T &op) { + vespalib::string op_str = op.op_str(); + _max_size = std::max(_max_size, op_str.size()); + _map[op_str] = T::create; + } + OperatorRepo(); +public: + static const OperatorRepo &instance() { return _instance; } + size_t max_size() const { return _max_size; } + nodes::Operator_UP create(vespalib::string &tmp) const { + for (; !tmp.empty(); tmp.resize(tmp.size() - 1)) { + auto result = _map.find(tmp); + if (result != _map.end()) { + return result->second(); + } + } + return nodes::Operator_UP(nullptr); + } + std::vector<vespalib::string> get_names() const { + std::vector<vespalib::string> ret; + for (const auto &entry: _map) { + ret.push_back(entry.first); + } + return ret; + } +}; + +//----------------------------------------------------------------------------- + +template <typename T> +struct OperatorHelper : Operator { + using Helper = OperatorHelper<T>; + OperatorHelper(const vespalib::string &op_str_in, int priority_in, Operator::Order order_in) + : Operator(op_str_in, priority_in, order_in) {} + virtual void accept(NodeVisitor &visitor) const override; + static Operator_UP create() { return Operator_UP(new T()); } +}; + +//----------------------------------------------------------------------------- + +class Add : public OperatorHelper<Add> { +private: + bool _is_forest; +public: + Add() : Helper("+", 101, LEFT), _is_forest(false) {} + virtual bool is_forest() const override { return _is_forest; } + bool check_forest() const { + bool lhs_ok = (lhs().is_tree() || lhs().is_forest()); + bool rhs_ok = (rhs().is_tree() || rhs().is_forest()); + return (lhs_ok && rhs_ok); + } + virtual void bind(Node_UP lhs_in, Node_UP rhs_in) override { + OperatorHelper<Add>::bind(std::move(lhs_in), std::move(rhs_in)); + _is_forest = check_forest(); + } +}; + +//----------------------------------------------------------------------------- + +struct Sub : OperatorHelper<Sub> { Sub() : Helper("-", 101, LEFT) {}}; +struct Mul : OperatorHelper<Mul> { Mul() : Helper("*", 102, LEFT) {}}; +struct Div : OperatorHelper<Div> { Div() : Helper("/", 102, LEFT) {}}; +struct Pow : OperatorHelper<Pow> { Pow() : Helper("^", 103, RIGHT) {}}; +struct Equal : OperatorHelper<Equal> { Equal() : Helper("==", 10, LEFT) {}}; +struct NotEqual : OperatorHelper<NotEqual> { NotEqual() : Helper("!=", 10, LEFT) {}}; +struct Approx : OperatorHelper<Approx> { Approx() : Helper("~=", 10, LEFT) {}}; +struct Less : OperatorHelper<Less> { Less() : Helper("<", 10, LEFT) {}}; +struct LessEqual : OperatorHelper<LessEqual> { LessEqual() : Helper("<=", 10, LEFT) {}}; +struct Greater : OperatorHelper<Greater> { Greater() : Helper(">", 10, LEFT) {}}; +struct GreaterEqual : OperatorHelper<GreaterEqual> { GreaterEqual() : Helper(">=", 10, LEFT) {}}; +struct In : OperatorHelper<In> { In() : Helper("in", 10, LEFT) {} + virtual vespalib::string dump(DumpContext &ctx) const override; +}; +struct And : OperatorHelper<And> { And() : Helper("&&", 2, LEFT) {}}; +struct Or : OperatorHelper<Or> { Or() : Helper("||", 1, LEFT) {}}; + +//----------------------------------------------------------------------------- + +} // namespace vespalib::eval::nodes +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/simple_tensor.cpp b/eval/src/vespa/eval/eval/simple_tensor.cpp new file mode 100644 index 00000000000..5da5b8a2134 --- /dev/null +++ b/eval/src/vespa/eval/eval/simple_tensor.cpp @@ -0,0 +1,561 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "simple_tensor.h" +#include "simple_tensor_engine.h" +#include "operation.h" +#include <algorithm> + +namespace vespalib { +namespace eval { + +using Address = SimpleTensor::Address; +using Cell = SimpleTensor::Cell; +using Cells = SimpleTensor::Cells; +using IndexList = std::vector<size_t>; +using Label = SimpleTensor::Label; +using CellRef = std::reference_wrapper<const Cell>; + +namespace { + +void assert_type(const ValueType &type) { + (void) type; + assert(!type.is_abstract()); + assert(type.is_double() || type.is_tensor()); +} + +void assert_address(const Address &address, const ValueType &type) { + assert(address.size() == type.dimensions().size()); + for (size_t i = 0; i < address.size(); ++i) { + if (type.dimensions()[i].is_mapped()) { + assert(address[i].is_mapped()); + } else { + assert(address[i].is_indexed()); + assert(address[i].index < type.dimensions()[i].size); + } + } +} + +Address select(const Address &address, const IndexList &selector) { + Address result; + for (size_t index: selector) { + result.push_back(address[index]); + } + return result; +} + +Address select(const Address &a, const Address &b, const IndexList &selector) { + Address result; + for (size_t index: selector) { + if (index < a.size()) { + result.push_back(a[index]); + } else { + result.push_back(b[index - a.size()]); + } + } + return result; +} + +size_t get_dimension_size(const ValueType &type, size_t dim_idx) { + if (dim_idx == ValueType::Dimension::npos) { + return 1; + } + return type.dimensions()[dim_idx].size; +} + +size_t get_dimension_index(const Address &addr, size_t dim_idx) { + if (dim_idx == ValueType::Dimension::npos) { + return 0; + } + return addr[dim_idx].index; +} + +const vespalib::string &reverse_rename(const vespalib::string &name, + const std::vector<vespalib::string> &from, + const std::vector<vespalib::string> &to) +{ + assert(from.size() == to.size()); + for (size_t idx = 0; idx < to.size(); ++idx) { + if (to[idx] == name) { + return from[idx]; + } + } + return name; +} + +/** + * Helper class used when building SimpleTensors. While a tensor + * in its final form simply contains a collection of cells, the + * builder keeps track of cell values as a block map instead. Each + * block is a dense multi-dimensional array that is addressed by + * the combination of all mapped Labels in a cell address. The + * indexed labels from the same cell address is used to address + * the appropriate cell value within the block. The reason for + * this is to make it easier to make sure that the indexed + * dimensions have entries for all valid Lables (densify with 0.0 + * as default value). + **/ +class Builder { +private: + class Block { + private: + const ValueType &_type; + const IndexList &_indexed; + std::vector<double> _values; + size_t offset_of(const Address &address) const { + size_t offset = 0; + for (size_t index: _indexed) { + size_t label = address[index].index; + size_t size = _type.dimensions()[index].size; + offset = (offset * size) + label; + } + return offset; + } + void subconvert(Address &address, size_t n, Cells &cells_out) const { + if (n < _indexed.size()) { + Label &label = address[_indexed[n]]; + size_t size = _type.dimensions()[_indexed[n]].size; + for (label.index = 0; label.index < size; ++label.index) { + subconvert(address, n + 1, cells_out); + } + } else { + cells_out.emplace_back(address, _values[offset_of(address)]); + } + } + public: + Block(const ValueType &type, const IndexList &indexed, size_t num_values) + : _type(type), _indexed(indexed), _values(num_values, 0.0) {} + void set(const Address &address, double value) { _values[offset_of(address)] = value; } + void convert(const Address &block_key, const IndexList &mapped, Cells &cells_out) const { + Address address(_type.dimensions().size(), Label(size_t(0))); + for (size_t i = 0; i < mapped.size(); ++i) { + address[mapped[i]] = block_key[i]; + } + subconvert(address, 0, cells_out); + } + }; + using BlockMap = std::map<Address,Block>; + ValueType _type; + IndexList _mapped; + IndexList _indexed; + size_t _block_size; + BlockMap _blocks; +public: + explicit Builder(const ValueType &type) + : _type(type), + _mapped(), + _indexed(), + _block_size(1), + _blocks() + { + assert_type(_type); + for (size_t i = 0; i < type.dimensions().size(); ++i) { + const auto &dimension = _type.dimensions()[i]; + if (dimension.is_mapped()) { + _mapped.push_back(i); + } else { + _block_size *= dimension.size; + _indexed.push_back(i); + } + } + if (_mapped.empty()) { + _blocks.emplace(Address(), Block(_type, _indexed, _block_size)); + } + } + void set(const Address &address, double value) { + assert_address(address, _type); + Address block_key = select(address, _mapped); + auto pos = _blocks.find(block_key); + if (pos == _blocks.end()) { + pos = _blocks.emplace(block_key, Block(_type, _indexed, _block_size)).first; + } + pos->second.set(address, value); + } + void set(const TensorSpec::Address &label_map, double value) { + Address address; + for (const auto &dimension: _type.dimensions()) { + auto pos = label_map.find(dimension.name); + assert(pos != label_map.end()); + address.emplace_back(pos->second); + } + set(address, value); + } + std::unique_ptr<SimpleTensor> build() { + Cells cells; + for (const auto &entry: _blocks) { + entry.second.convert(entry.first, _mapped, cells); + } + return std::make_unique<SimpleTensor>(_type, std::move(cells)); + } +}; + +/** + * Helper class used to calculate which dimensions are shared between + * types and which are not. Also calculates how address elements from + * cells with the different types should be combined into a single + * address. + **/ +struct TypeAnalyzer { + static constexpr size_t npos = -1; + IndexList only_a; + IndexList overlap_a; + IndexList overlap_b; + IndexList only_b; + IndexList combine; + size_t ignored_a; + size_t ignored_b; + TypeAnalyzer(const ValueType &lhs, const ValueType &rhs, const vespalib::string &ignore = "") + : only_a(), overlap_a(), overlap_b(), only_b(), combine(), ignored_a(npos), ignored_b(npos) + { + const auto &a = lhs.dimensions(); + const auto &b = rhs.dimensions(); + size_t b_idx = 0; + for (size_t a_idx = 0; a_idx < a.size(); ++a_idx) { + while ((b_idx < b.size()) && (b[b_idx].name < a[a_idx].name)) { + if (b[b_idx].name != ignore) { + only_b.push_back(b_idx); + combine.push_back(a.size() + b_idx); + } else { + ignored_b = b_idx; + } + ++b_idx; + } + if ((b_idx < b.size()) && (b[b_idx].name == a[a_idx].name)) { + if (a[a_idx].name != ignore) { + overlap_a.push_back(a_idx); + overlap_b.push_back(b_idx); + combine.push_back(a_idx); + } else { + ignored_a = a_idx; + ignored_b = b_idx; + } + ++b_idx; + } else { + if (a[a_idx].name != ignore) { + only_a.push_back(a_idx); + combine.push_back(a_idx); + } else { + ignored_a = a_idx; + } + } + } + while (b_idx < b.size()) { + if (b[b_idx].name != ignore) { + only_b.push_back(b_idx); + combine.push_back(a.size() + b_idx); + } else { + ignored_b = b_idx; + } + ++b_idx; + } + } +}; + +/** + * A view is a total ordering of cells from a SimpleTensor according + * to a subset of the dimensions in the tensor type. + **/ +class View { +public: + /** + * A range of cells within a view with equal values for all labels + * corresponding to the dimensions of the view. + **/ + class EqualRange { + private: + const CellRef *_begin; + const CellRef *_end; + public: + EqualRange(const CellRef *begin_in, const CellRef *end_in) + : _begin(begin_in), _end(end_in) {} + const CellRef *begin() const { return _begin; }; + const CellRef *end() const { return _end; } + bool empty() const { return (_begin == _end); } + }; +private: + /** + * Address comparator only looking at a subset of the labels. + **/ + struct Less { + IndexList selector; + explicit Less(const IndexList &selector_in) : selector(selector_in) {} + bool operator()(const CellRef &a, const CellRef &b) const { + for (size_t idx: selector) { + if (a.get().address[idx] != b.get().address[idx]) { + return (a.get().address[idx] < b.get().address[idx]); + } + } + return false; + } + }; + Less _less; + std::vector<CellRef> _refs; + + EqualRange make_range(const CellRef *begin) const { + const CellRef *end = (begin < refs_end()) ? (begin + 1) : begin; + while ((end < refs_end()) && !_less(*(end - 1), *end)) { + ++end; + } + return EqualRange(begin, end); + } + +public: + View(const SimpleTensor &tensor, const IndexList &selector) + : _less(selector), _refs() + { + for (const auto &cell: tensor.cells()) { + _refs.emplace_back(cell); + } + std::sort(_refs.begin(), _refs.end(), _less); + } + View(const EqualRange &range, const IndexList &selector) + : _less(selector), _refs() + { + for (const auto &cell: range) { + _refs.emplace_back(cell); + } + std::sort(_refs.begin(), _refs.end(), _less); + } + const IndexList &selector() const { return _less.selector; } + const CellRef *refs_begin() const { return &_refs[0]; } + const CellRef *refs_end() const { return (refs_begin() + _refs.size()); } + EqualRange first_range() const { return make_range(refs_begin()); } + EqualRange next_range(const EqualRange &prev) const { return make_range(prev.end()); } +}; + +/** + * Helper class used to find matching EqualRanges from two different + * SimpleTensor Views. + **/ +class ViewMatcher { +public: + /** + * Comparator used to cross-compare addresses across two different + * views only looking at the overlapping dimensions between the + * views. + **/ + struct CrossCompare { + enum class Result { LESS, EQUAL, GREATER }; + IndexList a_selector; + IndexList b_selector; + CrossCompare(const IndexList &a_selector_in, const IndexList &b_selector_in) + : a_selector(a_selector_in), b_selector(b_selector_in) + { + assert(a_selector.size() == b_selector.size()); + } + Result compare(const Cell &a, const Cell &b) const { + for (size_t i = 0; i < a_selector.size(); ++i) { + if (a.address[a_selector[i]] != b.address[b_selector[i]]) { + if (a.address[a_selector[i]] < b.address[b_selector[i]]) { + return Result::LESS; + } else { + return Result::GREATER; + } + } + } + return Result::EQUAL; + } + }; + using EqualRange = View::EqualRange; + +private: + const View &_a; + const View &_b; + EqualRange _a_range; + EqualRange _b_range; + CrossCompare _cmp; + + bool has_a() const { return !_a_range.empty(); } + bool has_b() const { return !_b_range.empty(); } + void next_a() { _a_range = _a.next_range(_a_range); } + void next_b() { _b_range = _b.next_range(_b_range); } + + void find_match() { + while (valid()) { + switch (_cmp.compare(*get_a().begin(), *get_b().begin())) { + case CrossCompare::Result::LESS: + next_a(); + break; + case CrossCompare::Result::GREATER: + next_b(); + break; + case CrossCompare::Result::EQUAL: + return; + } + } + } + +public: + ViewMatcher(const View &a, const View &b) + : _a(a), _b(b), _a_range(_a.first_range()), _b_range(b.first_range()), + _cmp(a.selector(), b.selector()) + { + find_match(); + } + bool valid() const { return (has_a() && has_b()); } + const EqualRange &get_a() const { return _a_range; } + const EqualRange &get_b() const { return _b_range; } + void next() { + next_a(); + next_b(); + find_match(); + } +}; + +} // namespace vespalib::eval::<unnamed> + +constexpr size_t TensorSpec::Label::npos; +constexpr size_t SimpleTensor::Label::npos; + +SimpleTensor::SimpleTensor(double value) + : Tensor(SimpleTensorEngine::ref()), + _type(ValueType::double_type()), + _cells({Cell({},value)}) +{ +} + +SimpleTensor::SimpleTensor(const ValueType &type_in, Cells &&cells_in) + : Tensor(SimpleTensorEngine::ref()), + _type(type_in), + _cells(std::move(cells_in)) +{ + assert_type(_type); + for (const auto &cell: _cells) { + assert_address(cell.address, _type); + } + std::sort(_cells.begin(), _cells.end(), + [](const auto &a, const auto &b){ return (a.address < b.address); }); +} + +std::unique_ptr<SimpleTensor> +SimpleTensor::reduce(const BinaryOperation &op, const std::vector<vespalib::string> &dimensions) const +{ + ValueType result_type = _type.reduce(dimensions); + Builder builder(result_type); + IndexList selector = TypeAnalyzer(_type, result_type).overlap_a; + View view(*this, selector); + for (View::EqualRange range = view.first_range(); !range.empty(); range = view.next_range(range)) { + auto pos = range.begin(); + double value = (pos++)->get().value; + for (; pos != range.end(); ++pos) { + value = op.eval(value, pos->get().value); + } + builder.set(select(range.begin()->get().address, selector), value); + } + return builder.build(); +} + +std::unique_ptr<SimpleTensor> +SimpleTensor::rename(const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to) const +{ + ValueType result_type = _type.rename(from, to); + Builder builder(result_type); + IndexList selector; + for (const auto &dim: result_type.dimensions()) { + selector.push_back(_type.dimension_index(reverse_rename(dim.name, from, to))); + } + for (auto &cell: _cells) { + builder.set(select(cell.address, selector), cell.value); + } + return builder.build(); +} + +std::unique_ptr<SimpleTensor> +SimpleTensor::create(const TensorSpec &spec) +{ + Builder builder(ValueType::from_spec(spec.type())); + for (const auto &cell: spec.cells()) { + builder.set(cell.first, cell.second); + } + return builder.build(); +} + +bool +SimpleTensor::equal(const SimpleTensor &a, const SimpleTensor &b) +{ + if (a.type() != b.type()) { + return false; + } + TypeAnalyzer type_info(a.type(), b.type()); + View view_a(a, type_info.overlap_a); + View view_b(b, type_info.overlap_b); + const CellRef *pos_a = view_a.refs_begin(); + const CellRef *end_a = view_a.refs_end(); + const CellRef *pos_b = view_b.refs_begin(); + const CellRef *end_b = view_b.refs_end(); + ViewMatcher::CrossCompare cmp(view_a.selector(), view_b.selector()); + while ((pos_a != end_a) && (pos_b != end_b)) { + if (cmp.compare(pos_a->get(), pos_b->get()) != ViewMatcher::CrossCompare::Result::EQUAL) { + return false; + } + if (pos_a->get().value != pos_b->get().value) { + return false; + } + ++pos_a; + ++pos_b; + } + return ((pos_a == end_a) && (pos_b == end_b)); +} + +std::unique_ptr<SimpleTensor> +SimpleTensor::map(const UnaryOperation &op, const SimpleTensor &a) +{ + Cells cells(a.cells()); + for (auto &cell: cells) { + cell.value = op.eval(cell.value); + } + return std::make_unique<SimpleTensor>(a.type(), std::move(cells)); +} + +std::unique_ptr<SimpleTensor> +SimpleTensor::join(const BinaryOperation &op, const SimpleTensor &a, const SimpleTensor &b) +{ + ValueType result_type = ValueType::join(a.type(), b.type()); + Builder builder(result_type); + TypeAnalyzer type_info(a.type(), b.type()); + View view_a(a, type_info.overlap_a); + View view_b(b, type_info.overlap_b); + for (ViewMatcher matcher(view_a, view_b); matcher.valid(); matcher.next()) { + for (const auto &ref_a: matcher.get_a()) { + for (const auto &ref_b: matcher.get_b()) { + builder.set(select(ref_a.get().address, ref_b.get().address, type_info.combine), + op.eval(ref_a.get().value, ref_b.get().value)); + } + } + } + return builder.build(); +} + +std::unique_ptr<SimpleTensor> +SimpleTensor::concat(const SimpleTensor &a, const SimpleTensor &b, const vespalib::string &dimension) +{ + ValueType result_type = ValueType::concat(a.type(), b.type(), dimension); + Builder builder(result_type); + TypeAnalyzer type_info(a.type(), b.type(), dimension); + View view_a(a, type_info.overlap_a); + View view_b(b, type_info.overlap_b); + size_t cat_dim_idx = result_type.dimension_index(dimension); + size_t cat_offset = get_dimension_size(a.type(), type_info.ignored_a); + for (ViewMatcher matcher(view_a, view_b); matcher.valid(); matcher.next()) { + View subview_a(matcher.get_a(), type_info.only_a); + View subview_b(matcher.get_b(), type_info.only_b); + for (auto range_a = subview_a.first_range(); !range_a.empty(); range_a = subview_a.next_range(range_a)) { + for (auto range_b = subview_b.first_range(); !range_b.empty(); range_b = subview_b.next_range(range_b)) { + Address addr = select(range_a.begin()->get().address, range_b.begin()->get().address, type_info.combine); + addr.insert(addr.begin() + cat_dim_idx, Label(size_t(0))); + for (const auto &ref: range_a) { + addr[cat_dim_idx].index = get_dimension_index(ref.get().address, type_info.ignored_a); + builder.set(addr, ref.get().value); + } + for (const auto &ref: range_b) { + addr[cat_dim_idx].index = cat_offset + get_dimension_index(ref.get().address, type_info.ignored_b); + builder.set(addr, ref.get().value); + } + } + } + } + return builder.build(); +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/simple_tensor.h b/eval/src/vespa/eval/eval/simple_tensor.h new file mode 100644 index 00000000000..90c268466da --- /dev/null +++ b/eval/src/vespa/eval/eval/simple_tensor.h @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/stash.h> +#include <memory> +#include <map> +#include "value_type.h" +#include "tensor.h" +#include "tensor_spec.h" + +namespace vespalib { +namespace eval { + +struct UnaryOperation; +struct BinaryOperation; + +/** + * A tensor supporting a mix of indexed and mapped dimensions. The + * goal for this class is to be a simple, complete and correct + * reference implementation supporting all relevant tensor operations. + **/ +class SimpleTensor : public Tensor +{ +public: + /** + * A label for a single dimension. This is either a string + * (mapped) or an integer (indexed). A sequence of Labels form an + * Address. The labels must have the same order as the dimensions + * in the tensor type (which are sorted on dimension name). Labels + * for mapped dimensions must be strings and labels for indexed + * dimensions must be integers smaller than the dimension size. + **/ + struct Label { + size_t index; + vespalib::string name; + static constexpr size_t npos = -1; + Label(const TensorSpec::Label &label) + : index(label.index), name(label.name) {} + bool operator<(const Label &rhs) const { + if (index != rhs.index) { + return (index < rhs.index); + } + return (name < rhs.name); + } + bool operator==(const Label &rhs) const { + return ((index == rhs.index) && (name == rhs.name)); + } + bool operator!=(const Label &rhs) const { return !(*this == rhs); } + bool is_mapped() const { return (index == npos); } + bool is_indexed() const { return (index != npos); } + }; + using Address = std::vector<Label>; + + /** + * A tensor has a type and contains a collection of Cells. Each + * cell has an Address and a value. + **/ + struct Cell { + Address address; + double value; + Cell(const Address &address_in, double value_in) + : address(address_in), value(value_in) {} + }; + using Cells = std::vector<Cell>; + +private: + ValueType _type; + Cells _cells; + +public: + explicit SimpleTensor(double value); + SimpleTensor(const ValueType &type_in, Cells &&cells_in); + const ValueType &type() const { return _type; } + const Cells &cells() const { return _cells; } + std::unique_ptr<SimpleTensor> reduce(const BinaryOperation &op, const std::vector<vespalib::string> &dimensions) const; + std::unique_ptr<SimpleTensor> rename(const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to) const; + static std::unique_ptr<SimpleTensor> create(const TensorSpec &spec); + static bool equal(const SimpleTensor &a, const SimpleTensor &b); + static std::unique_ptr<SimpleTensor> map(const UnaryOperation &op, const SimpleTensor &a); + static std::unique_ptr<SimpleTensor> join(const BinaryOperation &op, const SimpleTensor &a, const SimpleTensor &b); + static std::unique_ptr<SimpleTensor> concat(const SimpleTensor &a, const SimpleTensor &b, const vespalib::string &dimension); +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/simple_tensor_engine.cpp b/eval/src/vespa/eval/eval/simple_tensor_engine.cpp new file mode 100644 index 00000000000..62f2fa91cdf --- /dev/null +++ b/eval/src/vespa/eval/eval/simple_tensor_engine.cpp @@ -0,0 +1,147 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/util/stringfmt.h> +#include "simple_tensor_engine.h" +#include "simple_tensor.h" +#include "operation.h" + +namespace vespalib { +namespace eval { + +const SimpleTensorEngine SimpleTensorEngine::_engine; + +ValueType +SimpleTensorEngine::type_of(const Tensor &tensor) const +{ + assert(&tensor.engine() == this); + const SimpleTensor &simple_tensor = static_cast<const SimpleTensor&>(tensor); + return simple_tensor.type(); +} + +bool +SimpleTensorEngine::equal(const Tensor &a, const Tensor &b) const +{ + assert(&a.engine() == this); + assert(&b.engine() == this); + const SimpleTensor &simple_a = static_cast<const SimpleTensor&>(a); + const SimpleTensor &simple_b = static_cast<const SimpleTensor&>(b); + return SimpleTensor::equal(simple_a, simple_b); +} + +vespalib::string +SimpleTensorEngine::to_string(const Tensor &tensor) const +{ + assert(&tensor.engine() == this); + const SimpleTensor &simple_tensor = static_cast<const SimpleTensor&>(tensor); + vespalib::string out = vespalib::make_string("simple(%s) {\n", simple_tensor.type().to_spec().c_str()); + for (const auto &cell: simple_tensor.cells()) { + size_t n = 0; + out.append(" ["); + for (const auto &label: cell.address) { + if (n++) { + out.append(","); + } + if (label.is_mapped()) { + out.append(label.name); + } else { + out.append(vespalib::make_string("%zu", label.index)); + } + } + out.append(vespalib::make_string("]: %g\n", cell.value)); + } + out.append("}"); + return out; +} + +TensorSpec +SimpleTensorEngine::to_spec(const Tensor &tensor) const +{ + assert(&tensor.engine() == this); + const SimpleTensor &simple_tensor = static_cast<const SimpleTensor&>(tensor); + ValueType type = simple_tensor.type(); + const auto &dimensions = type.dimensions(); + TensorSpec spec(type.to_spec()); + for (const auto &cell: simple_tensor.cells()) { + TensorSpec::Address addr; + assert(cell.address.size() == dimensions.size()); + for (size_t i = 0; i < cell.address.size(); ++i) { + const auto &label = cell.address[i]; + if (label.is_mapped()) { + addr.emplace(dimensions[i].name, TensorSpec::Label(label.name)); + } else { + addr.emplace(dimensions[i].name, TensorSpec::Label(label.index)); + } + } + spec.add(addr, cell.value); + } + return spec; +} + +const SimpleTensor &to_simple(const Value &value, Stash &stash) { + auto tensor = value.as_tensor(); + if (tensor) { + assert(&tensor->engine() == &SimpleTensorEngine::ref()); + return static_cast<const SimpleTensor &>(*tensor); + } + return stash.create<SimpleTensor>(value.as_double()); +} + +std::unique_ptr<eval::Tensor> +SimpleTensorEngine::create(const TensorSpec &spec) const +{ + return SimpleTensor::create(spec); +} + +const Value & +SimpleTensorEngine::reduce(const eval::Tensor &tensor, const BinaryOperation &op, const std::vector<vespalib::string> &dimensions, Stash &stash) const +{ + assert(&tensor.engine() == this); + const SimpleTensor &simple_tensor = static_cast<const SimpleTensor&>(tensor); + auto result = simple_tensor.reduce(op, dimensions.empty() ? simple_tensor.type().dimension_names() : dimensions); + if (result->type().is_double()) { + assert(result->cells().size() == 1u); + return stash.create<DoubleValue>(result->cells()[0].value); + } + return stash.create<TensorValue>(std::move(result)); +} + +const Value & +SimpleTensorEngine::map(const UnaryOperation &op, const eval::Tensor &a, Stash &stash) const +{ + assert(&a.engine() == this); + const SimpleTensor &simple_a = static_cast<const SimpleTensor&>(a); + auto result = SimpleTensor::map(op, simple_a); + return stash.create<TensorValue>(std::move(result)); +} + +const Value & +SimpleTensorEngine::apply(const BinaryOperation &op, const eval::Tensor &a, const eval::Tensor &b, Stash &stash) const +{ + assert(&a.engine() == this); + assert(&b.engine() == this); + const SimpleTensor &simple_a = static_cast<const SimpleTensor&>(a); + const SimpleTensor &simple_b = static_cast<const SimpleTensor&>(b); + auto result = SimpleTensor::join(op, simple_a, simple_b); + return stash.create<TensorValue>(std::move(result)); +} + +const Value & +SimpleTensorEngine::concat(const Value &a, const Value &b, const vespalib::string &dimension, Stash &stash) const +{ + const SimpleTensor &simple_a = to_simple(a, stash); + const SimpleTensor &simple_b = to_simple(b, stash); + auto result = SimpleTensor::concat(simple_a, simple_b, dimension); + return stash.create<TensorValue>(std::move(result)); +} + +const Value & +SimpleTensorEngine::rename(const Value &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to, Stash &stash) const +{ + const SimpleTensor &simple_a = to_simple(a, stash); + auto result = simple_a.rename(from, to); + return stash.create<TensorValue>(std::move(result)); +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/simple_tensor_engine.h b/eval/src/vespa/eval/eval/simple_tensor_engine.h new file mode 100644 index 00000000000..a8617ba6036 --- /dev/null +++ b/eval/src/vespa/eval/eval/simple_tensor_engine.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "tensor_engine.h" + +namespace vespalib { +namespace eval { + +/** + * This is a TensorEngine implementation for the SimpleTensor + * reference implementation. + **/ +class SimpleTensorEngine : public TensorEngine +{ +private: + SimpleTensorEngine() {} + static const SimpleTensorEngine _engine; +public: + static const TensorEngine &ref() { return _engine; }; + + ValueType type_of(const Tensor &tensor) const override; + bool equal(const Tensor &a, const Tensor &b) const override; + vespalib::string to_string(const Tensor &tensor) const override; + TensorSpec to_spec(const Tensor &tensor) const override; + + std::unique_ptr<Tensor> create(const TensorSpec &spec) const override; + const Value &reduce(const Tensor &tensor, const BinaryOperation &op, const std::vector<vespalib::string> &dimensions, Stash &stash) const override; + const Value &map(const UnaryOperation &op, const Tensor &a, Stash &stash) const override; + const Value &apply(const BinaryOperation &op, const Tensor &a, const Tensor &b, Stash &stash) const override; + + const Value &concat(const Value &a, const Value &b, const vespalib::string &dimension, Stash &stash) const override; + const Value &rename(const Value &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to, Stash &stash) const override; +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/tensor.cpp b/eval/src/vespa/eval/eval/tensor.cpp new file mode 100644 index 00000000000..f79f14e5013 --- /dev/null +++ b/eval/src/vespa/eval/eval/tensor.cpp @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "tensor.h" +#include "tensor_engine.h" + +namespace vespalib { +namespace eval { + +bool +operator==(const Tensor &lhs, const Tensor &rhs) +{ + return ((&lhs.engine() == &rhs.engine()) && lhs.engine().equal(lhs, rhs)); +} + +std::ostream & +operator<<(std::ostream &out, const Tensor &tensor) +{ + out << tensor.engine().to_string(tensor); + return out; +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/tensor.h b/eval/src/vespa/eval/eval/tensor.h new file mode 100644 index 00000000000..9f32bf9b89c --- /dev/null +++ b/eval/src/vespa/eval/eval/tensor.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "value_type.h" + +namespace vespalib { +namespace eval { + +class TensorEngine; + +/** + * Base class for all tensors. Tensor operations are defined by the + * TensorEngine interface. The Tensor class itself is used as a tagged + * transport mechanism. Each Tensor is connected to a distinct engine + * which can be used to operate on it. When operating on multiple + * tensors at the same time they all need to be connected to the same + * engine. TensorEngines should only have a single static instance per + * implementation. + **/ +class Tensor +{ +private: + const TensorEngine &_engine; +protected: + explicit Tensor(const TensorEngine &engine_in) + : _engine(engine_in) {} +public: + Tensor(const Tensor &) = delete; + Tensor(Tensor &&) = delete; + Tensor &operator=(const Tensor &) = delete; + Tensor &operator=(Tensor &&) = delete; + const TensorEngine &engine() const { return _engine; } + virtual ~Tensor() {} +}; + +bool operator==(const Tensor &lhs, const Tensor &rhs); +std::ostream &operator<<(std::ostream &out, const Tensor &tensor); + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/tensor_engine.cpp b/eval/src/vespa/eval/eval/tensor_engine.cpp new file mode 100644 index 00000000000..6ca06e68618 --- /dev/null +++ b/eval/src/vespa/eval/eval/tensor_engine.cpp @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "tensor_engine.h" + +namespace vespalib { +namespace eval { + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/tensor_engine.h b/eval/src/vespa/eval/eval/tensor_engine.h new file mode 100644 index 00000000000..a7b29dbebf6 --- /dev/null +++ b/eval/src/vespa/eval/eval/tensor_engine.h @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <memory> +#include <vector> +#include <vespa/vespalib/stllike/string.h> +#include "value_type.h" +#include "tensor_function.h" + +namespace vespalib { + +class Stash; + +namespace eval { + +class Value; +class Tensor; +class TensorSpec; +struct UnaryOperation; +struct BinaryOperation; + +/** + * Top-level API for a tensor implementation. All Tensor operations + * are defined by the TensorEngine interface. The Tensor class itself + * is used as a tagged transport mechanism. Each Tensor is connected + * to a distinct engine which can be used to operate on it. When + * operating on multiple tensors at the same time they all need to be + * connected to the same engine. TensorEngines should only have a + * single static instance per implementation. + **/ +struct TensorEngine +{ + using ValueType = eval::ValueType; + using Tensor = eval::Tensor; + using TensorSpec = eval::TensorSpec; + using Value = eval::Value; + using BinaryOperation = eval::BinaryOperation; + using UnaryOperation = eval::UnaryOperation; + + virtual ValueType type_of(const Tensor &tensor) const = 0; + virtual bool equal(const Tensor &a, const Tensor &b) const = 0; + virtual vespalib::string to_string(const Tensor &tensor) const = 0; + virtual TensorSpec to_spec(const Tensor &tensor) const = 0; + + virtual TensorFunction::UP compile(tensor_function::Node_UP expr) const { return std::move(expr); } + + virtual std::unique_ptr<Tensor> create(const TensorSpec &spec) const = 0; + virtual const Value &reduce(const Tensor &tensor, const BinaryOperation &op, const std::vector<vespalib::string> &dimensions, Stash &stash) const = 0; + virtual const Value &map(const UnaryOperation &op, const Tensor &a, Stash &stash) const = 0; + virtual const Value &apply(const BinaryOperation &op, const Tensor &a, const Tensor &b, Stash &stash) const = 0; + + // havardpe: new API, WIP + virtual const Value &concat(const Value &a, const Value &b, const vespalib::string &dimension, Stash &stash) const = 0; + virtual const Value &rename(const Value &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to, Stash &stash) const = 0; + + virtual ~TensorEngine() {} +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/tensor_function.cpp b/eval/src/vespa/eval/eval/tensor_function.cpp new file mode 100644 index 00000000000..5750d90059f --- /dev/null +++ b/eval/src/vespa/eval/eval/tensor_function.cpp @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "tensor_function.h" +#include "value.h" +#include "operation.h" +#include "tensor.h" +#include "tensor_engine.h" + +namespace vespalib { +namespace eval { +namespace tensor_function { + +void Inject::accept(TensorFunctionVisitor &visitor) const { visitor.visit(*this); } +void Reduce::accept(TensorFunctionVisitor &visitor) const { visitor.visit(*this); } +void Map ::accept(TensorFunctionVisitor &visitor) const { visitor.visit(*this); } +void Apply ::accept(TensorFunctionVisitor &visitor) const { visitor.visit(*this); } + +//----------------------------------------------------------------------------- + +const Value & +Inject::eval(const Input &input, Stash &) const +{ + return input.get_tensor(tensor_id); +} + +const Value & +Reduce::eval(const Input &input, Stash &stash) const +{ + const Tensor &a = *tensor->eval(input, stash).as_tensor(); + const TensorEngine &engine = a.engine(); + return engine.reduce(a, *op, dimensions, stash); +} + +const Value & +Map::eval(const Input &input, Stash &stash) const +{ + const Tensor &a = *tensor->eval(input, stash).as_tensor(); + const TensorEngine &engine = a.engine(); + return engine.map(input.get_map_operation(map_operation_id), a, stash); +} + +const Value & +Apply::eval(const Input &input, Stash &stash) const +{ + const Tensor &a = *lhs_tensor->eval(input, stash).as_tensor(); + const Tensor &b = *rhs_tensor->eval(input, stash).as_tensor(); + const TensorEngine &engine = a.engine(); + return engine.apply(*op, a, b, stash); +} + +//----------------------------------------------------------------------------- + +Node_UP inject(const ValueType &type, size_t tensor_id) { + return std::make_unique<Inject>(type, tensor_id); +} + +Node_UP reduce(Node_UP tensor, const BinaryOperation &op, const std::vector<vespalib::string> &dimensions) { + ValueType result_type = tensor->result_type.reduce(dimensions); + return std::make_unique<Reduce>(result_type, std::move(tensor), op.clone(), dimensions); +} + +Node_UP map(size_t map_operation_id, Node_UP tensor) { + ValueType result_type = tensor->result_type; + return std::make_unique<Map>(result_type, map_operation_id, std::move(tensor)); +} + +Node_UP apply(const BinaryOperation &op, Node_UP lhs_tensor, Node_UP rhs_tensor) { + ValueType result_type = ValueType::join(lhs_tensor->result_type, rhs_tensor->result_type); + return std::make_unique<Apply>(result_type, op.clone(), std::move(lhs_tensor), std::move(rhs_tensor)); +} + +} // namespace vespalib::eval::tensor_function +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/tensor_function.h b/eval/src/vespa/eval/eval/tensor_function.h new file mode 100644 index 00000000000..37e17e64d8a --- /dev/null +++ b/eval/src/vespa/eval/eval/tensor_function.h @@ -0,0 +1,159 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <memory> +#include <vector> +#include <vespa/vespalib/stllike/string.h> +#include "value_type.h" +#include "operation.h" + +namespace vespalib { + +class Stash; + +namespace eval { + +class Value; +class Tensor; + +//----------------------------------------------------------------------------- + +/** + * A tensor function that can be evaluated. A TensorFunction will + * typically be produced by an implementation-specific compile step + * that takes an implementation-independent intermediate + * representation of the tensor function as input (tree of + * tensor_function::Node objects). + **/ +struct TensorFunction +{ + typedef std::unique_ptr<TensorFunction> UP; + + /** + * Interface used to obtain input to a tensor function. + **/ + struct Input { + virtual const Value &get_tensor(size_t id) const = 0; + virtual const UnaryOperation &get_map_operation(size_t id) const = 0; + virtual ~Input() {} + }; + + /** + * Evaluate this tensor function based on the given input. The + * given stash can be used to store temporary objects that need to + * be kept alive for the return value to be valid. The return + * value must conform to the result type indicated by the + * intermediate representation describing this tensor function. + * + * @return result of evaluating this tensor function + * @param input external stuff needed to evaluate this function + **/ + virtual const Value &eval(const Input &input, Stash &stash) const = 0; + + virtual ~TensorFunction() {} +}; + +//----------------------------------------------------------------------------- + +struct TensorFunctionVisitor; + +namespace tensor_function { + +/** + * Interface used to describe a tensor function as a tree of nodes + * with information about operation sequencing and intermediate result + * types. Each node in the tree will describe a single tensor + * operation. This is the intermediate representation of a tensor + * function. + * + * The intermediate representation of a tensor function can also be + * used to evaluate the tensor function it represents directly. This + * will invoke the immediate API on the tensor engine associated with + * the input tensors. In other words, the intermediate representation + * 'compiles to itself'. + **/ +struct Node : public TensorFunction +{ + ValueType result_type; + Node(const ValueType &result_type_in) : result_type(result_type_in) {} + virtual void accept(TensorFunctionVisitor &visitor) const = 0; + Node(const Node &) = delete; + Node &operator=(const Node &) = delete; + Node(Node &&) = delete; + Node &operator=(Node &&) = delete; +}; +using Node_UP = std::unique_ptr<Node>; + +/** + * Simple typecasting utility. + */ +template <typename T> +const T *as(const Node &node) { return dynamic_cast<const T *>(&node); } + +struct Inject : Node { + size_t tensor_id; + Inject(const ValueType &result_type_in, + size_t tensor_id_in) + : Node(result_type_in), tensor_id(tensor_id_in) {} + void accept(TensorFunctionVisitor &visitor) const override; + const Value &eval(const Input &input, Stash &) const override; +}; + +struct Reduce : Node { + Node_UP tensor; + std::unique_ptr<BinaryOperation> op; + std::vector<vespalib::string> dimensions; + Reduce(const ValueType &result_type_in, + Node_UP tensor_in, + std::unique_ptr<BinaryOperation> op_in, + const std::vector<vespalib::string> &dimensions_in) + : Node(result_type_in), tensor(std::move(tensor_in)), op(std::move(op_in)), dimensions(dimensions_in) {} + void accept(TensorFunctionVisitor &visitor) const override; + const Value &eval(const Input &input, Stash &stash) const override; +}; + +struct Map : Node { + size_t map_operation_id; + Node_UP tensor; + Map(const ValueType &result_type_in, + size_t map_operation_id_in, + Node_UP tensor_in) + : Node(result_type_in), map_operation_id(map_operation_id_in), tensor(std::move(tensor_in)) {} + void accept(TensorFunctionVisitor &visitor) const override; + const Value &eval(const Input &input, Stash &stash) const override; +}; + +struct Apply : Node { + std::unique_ptr<BinaryOperation> op; + Node_UP lhs_tensor; + Node_UP rhs_tensor; + Apply(const ValueType &result_type_in, + std::unique_ptr<BinaryOperation> op_in, + Node_UP lhs_tensor_in, + Node_UP rhs_tensor_in) + : Node(result_type_in), op(std::move(op_in)), + lhs_tensor(std::move(lhs_tensor_in)), rhs_tensor(std::move(rhs_tensor_in)) {} + void accept(TensorFunctionVisitor &visitor) const override; + const Value &eval(const Input &input, Stash &stash) const override; +}; + +Node_UP inject(const ValueType &type, size_t tensor_id); +Node_UP reduce(Node_UP tensor, const BinaryOperation &op, const std::vector<vespalib::string> &dimensions); +Node_UP map(size_t map_operation_id, Node_UP tensor); +Node_UP apply(const BinaryOperation &op, Node_UP lhs_tensor, Node_UP rhs_tensor); + +} // namespace vespalib::eval::tensor_function + +struct TensorFunctionVisitor { + virtual void visit(const tensor_function::Inject &) = 0; + virtual void visit(const tensor_function::Reduce &) = 0; + virtual void visit(const tensor_function::Map &) = 0; + virtual void visit(const tensor_function::Apply &) = 0; + virtual ~TensorFunctionVisitor() {} +}; + +//----------------------------------------------------------------------------- + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/tensor_nodes.cpp b/eval/src/vespa/eval/eval/tensor_nodes.cpp new file mode 100644 index 00000000000..01a7f690642 --- /dev/null +++ b/eval/src/vespa/eval/eval/tensor_nodes.cpp @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "tensor_nodes.h" +#include "node_visitor.h" + +namespace vespalib { +namespace eval { +namespace nodes { + +void TensorSum ::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void TensorMap ::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void TensorJoin ::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void TensorReduce::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void TensorRename::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void TensorLambda::accept(NodeVisitor &visitor) const { visitor.visit(*this); } +void TensorConcat::accept(NodeVisitor &visitor) const { visitor.visit(*this); } + +const AggrNames AggrNames::_instance; + +void +AggrNames::add(Aggr aggr, const vespalib::string &name) +{ + _name_aggr_map[name] = aggr; + _aggr_name_map[aggr] = name; +} + +AggrNames::AggrNames() + : _name_aggr_map(), + _aggr_name_map() +{ + add(Aggr::AVG, "avg"); + add(Aggr::COUNT, "count"); + add(Aggr::PROD, "prod"); + add(Aggr::SUM, "sum"); + add(Aggr::MAX, "max"); + add(Aggr::MIN, "min"); +} + +const vespalib::string * +AggrNames::name_of(Aggr aggr) +{ + const auto &map = _instance._aggr_name_map; + auto result = map.find(aggr); + if (result == map.end()) { + return nullptr; + } + return &(result->second); +} + +const Aggr * +AggrNames::from_name(const vespalib::string &name) +{ + const auto &map = _instance._name_aggr_map; + auto result = map.find(name); + if (result == map.end()) { + return nullptr; + } + return &(result->second); +} + +} // namespace vespalib::eval::nodes +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/tensor_nodes.h b/eval/src/vespa/eval/eval/tensor_nodes.h new file mode 100644 index 00000000000..461ea331170 --- /dev/null +++ b/eval/src/vespa/eval/eval/tensor_nodes.h @@ -0,0 +1,258 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "basic_nodes.h" +#include "function.h" +#include <vespa/vespalib/stllike/string.h> +#include <map> + +namespace vespalib { +namespace eval { +namespace nodes { + +class TensorSum : public Node { +private: + Node_UP _child; + vespalib::string _dimension; +public: + TensorSum(Node_UP child) : _child(std::move(child)), _dimension() {} + TensorSum(Node_UP child, const vespalib::string &dimension_in) + : _child(std::move(child)), _dimension(dimension_in) {} + const vespalib::string &dimension() const { return _dimension; } + vespalib::string dump(DumpContext &ctx) const override { + vespalib::string str; + str += "sum("; + str += _child->dump(ctx); + if (!_dimension.empty()) { + str += ","; + str += _dimension; + } + str += ")"; + return str; + } + void accept(NodeVisitor &visitor) const override; + size_t num_children() const override { return 1; } + const Node &get_child(size_t idx) const override { + (void) idx; + assert(idx == 0); + return *_child; + } + void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_child)); + } +}; + +class TensorMap : public Node { +private: + Node_UP _child; + Function _lambda; +public: + TensorMap(Node_UP child, Function lambda) + : _child(std::move(child)), _lambda(std::move(lambda)) {} + const Function &lambda() const { return _lambda; } + vespalib::string dump(DumpContext &ctx) const override { + vespalib::string str; + str += "map("; + str += _child->dump(ctx); + str += ","; + str += _lambda.dump_as_lambda(); + str += ")"; + return str; + } + void accept(NodeVisitor &visitor) const override; + size_t num_children() const override { return 1; } + const Node &get_child(size_t idx) const override { + (void) idx; + assert(idx == 0); + return *_child; + } + void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_child)); + } +}; + +class TensorJoin : public Node { +private: + Node_UP _lhs; + Node_UP _rhs; + Function _lambda; +public: + TensorJoin(Node_UP lhs, Node_UP rhs, Function lambda) + : _lhs(std::move(lhs)), _rhs(std::move(rhs)), _lambda(std::move(lambda)) {} + const Function &lambda() const { return _lambda; } + vespalib::string dump(DumpContext &ctx) const override { + vespalib::string str; + str += "join("; + str += _lhs->dump(ctx); + str += ","; + str += _rhs->dump(ctx); + str += ","; + str += _lambda.dump_as_lambda(); + str += ")"; + return str; + } + void accept(NodeVisitor &visitor) const override ; + size_t num_children() const override { return 2; } + const Node &get_child(size_t idx) const override { + assert(idx < 2); + return (idx == 0) ? *_lhs : *_rhs; + } + void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_lhs)); + handler.handle(std::move(_rhs)); + } +}; + +enum class Aggr { AVG, COUNT, PROD, SUM, MAX, MIN }; +class AggrNames { +private: + static const AggrNames _instance; + std::map<vespalib::string,Aggr> _name_aggr_map; + std::map<Aggr,vespalib::string> _aggr_name_map; + void add(Aggr aggr, const vespalib::string &name); + AggrNames(); +public: + static const vespalib::string *name_of(Aggr aggr); + static const Aggr *from_name(const vespalib::string &name); +}; + +class TensorReduce : public Node { +private: + Node_UP _child; + Aggr _aggr; + std::vector<vespalib::string> _dimensions; +public: + TensorReduce(Node_UP child, Aggr aggr_in, std::vector<vespalib::string> dimensions_in) + : _child(std::move(child)), _aggr(aggr_in), _dimensions(std::move(dimensions_in)) {} + const std::vector<vespalib::string> &dimensions() const { return _dimensions; } + Aggr aggr() const { return _aggr; } + vespalib::string dump(DumpContext &ctx) const override { + vespalib::string str; + str += "reduce("; + str += _child->dump(ctx); + str += ","; + str += *AggrNames::name_of(_aggr); + for (const auto &dimension: _dimensions) { + str += ","; + str += dimension; + } + str += ")"; + return str; + } + void accept(NodeVisitor &visitor) const override; + size_t num_children() const override { return 1; } + const Node &get_child(size_t idx) const override { + assert(idx == 0); + return *_child; + } + void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_child)); + } +}; + +class TensorRename : public Node { +private: + Node_UP _child; + std::vector<vespalib::string> _from; + std::vector<vespalib::string> _to; + static vespalib::string flatten(const std::vector<vespalib::string> &list) { + if (list.size() == 1) { + return list[0]; + } + vespalib::string str = "("; + for (size_t i = 0; i < list.size(); ++i) { + if (i > 0) { + str += ","; + } + str += list[i]; + } + str += ")"; + return str; + } +public: + TensorRename(Node_UP child, std::vector<vespalib::string> from_in, std::vector<vespalib::string> to_in) + : _child(std::move(child)), _from(std::move(from_in)), _to(std::move(to_in)) {} + const std::vector<vespalib::string> &from() const { return _from; } + const std::vector<vespalib::string> &to() const { return _to; } + vespalib::string dump(DumpContext &ctx) const override { + vespalib::string str; + str += "rename("; + str += _child->dump(ctx); + str += ","; + str += flatten(_from); + str += ","; + str += flatten(_to); + str += ")"; + return str; + } + void accept(NodeVisitor &visitor) const override; + size_t num_children() const override { return 1; } + const Node &get_child(size_t idx) const override { + assert(idx == 0); + return *_child; + } + void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_child)); + } +}; + +class TensorLambda : public Leaf { +private: + ValueType _type; + Function _lambda; +public: + TensorLambda(ValueType type_in, Function lambda) + : _type(std::move(type_in)), _lambda(std::move(lambda)) {} + const ValueType &type() const { return _type; } + const Function &lambda() const { return _lambda; } + vespalib::string dump(DumpContext &) const override { + vespalib::string str = _type.to_spec(); + vespalib::string expr = _lambda.dump(); + if (starts_with(expr, "(")) { + str += expr; + } else { + str += "("; + str += expr; + str += ")"; + } + return str; + } + void accept(NodeVisitor &visitor) const override; +}; + +class TensorConcat : public Node { +private: + Node_UP _lhs; + Node_UP _rhs; + vespalib::string _dimension; +public: + TensorConcat(Node_UP lhs, Node_UP rhs, const vespalib::string &dimension_in) + : _lhs(std::move(lhs)), _rhs(std::move(rhs)), _dimension(dimension_in) {} + const vespalib::string &dimension() const { return _dimension; } + vespalib::string dump(DumpContext &ctx) const override { + vespalib::string str; + str += "concat("; + str += _lhs->dump(ctx); + str += ","; + str += _rhs->dump(ctx); + str += ","; + str += _dimension; + str += ")"; + return str; + } + void accept(NodeVisitor &visitor) const override ; + size_t num_children() const override { return 2; } + const Node &get_child(size_t idx) const override { + assert(idx < 2); + return (idx == 0) ? *_lhs : *_rhs; + } + void detach_children(NodeHandler &handler) override { + handler.handle(std::move(_lhs)); + handler.handle(std::move(_rhs)); + } +}; + +} // namespace vespalib::eval::nodes +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/tensor_spec.cpp b/eval/src/vespa/eval/eval/tensor_spec.cpp new file mode 100644 index 00000000000..eec930b8da4 --- /dev/null +++ b/eval/src/vespa/eval/eval/tensor_spec.cpp @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/util/stringfmt.h> +#include "tensor_spec.h" +#include <iostream> + +namespace vespalib { +namespace eval { + +vespalib::string +TensorSpec::to_string() const +{ + vespalib::string out = vespalib::make_string("spec(%s) {\n", _type.c_str()); + for (const auto &cell: _cells) { + size_t n = 0; + out.append(" ["); + for (const auto &label: cell.first) { + if (n++) { + out.append(","); + } + if (label.second.is_mapped()) { + out.append(label.second.name); + } else { + out.append(vespalib::make_string("%zu", label.second.index)); + } + } + out.append(vespalib::make_string("]: %g\n", cell.second.value)); + } + out.append("}"); + return out; +} + +bool +operator==(const TensorSpec &lhs, const TensorSpec &rhs) +{ + return ((lhs.type() == rhs.type()) && + (lhs.cells() == rhs.cells())); +} + +std::ostream & +operator<<(std::ostream &out, const TensorSpec &spec) +{ + out << spec.to_string(); + return out; +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/tensor_spec.h b/eval/src/vespa/eval/eval/tensor_spec.h new file mode 100644 index 00000000000..06a9a3a2825 --- /dev/null +++ b/eval/src/vespa/eval/eval/tensor_spec.h @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/approx.h> +#include <memory> +#include <map> + +namespace vespalib { +namespace eval { + +/** + * An implementation-independent specification of the type and + * contents of a tensor. + **/ +class TensorSpec +{ +public: + struct Label { + size_t index; + vespalib::string name; + static constexpr size_t npos = -1; + Label(size_t index_in) : index(index_in), name() {} + Label(const vespalib::string &name_in) : index(npos), name(name_in) {} + Label(const char *name_in) : index(npos), name(name_in) {} + bool is_mapped() const { return (index == npos); } + bool is_indexed() const { return (index != npos); } + bool operator==(const Label &rhs) const { + return ((index == rhs.index) && + (name == rhs.name)); + } + bool operator<(const Label &rhs) const { + if (index != rhs.index) { + return (index < rhs.index); + } + return (name < rhs.name); + } + }; + struct Value { + double value; + Value(double value_in) : value(value_in) {} + operator double() const { return value; } + static bool both_nan(double a, double b) { + return (std::isnan(a) && std::isnan(b)); + } + bool operator==(const Value &rhs) const { + return (both_nan(value, rhs.value) || approx_equal(value, rhs.value)); + } + }; + using Address = std::map<vespalib::string,Label>; + using Cells = std::map<Address,Value>; +private: + vespalib::string _type; + Cells _cells; +public: + TensorSpec(const vespalib::string &type_spec) : _type(type_spec), _cells() {} + TensorSpec &add(const Address &address, double value) { + _cells.emplace(address, value); + return *this; + } + const vespalib::string &type() const { return _type; } + const Cells &cells() const { return _cells; } + vespalib::string to_string() const; +}; + +bool operator==(const TensorSpec &lhs, const TensorSpec &rhs); +std::ostream &operator<<(std::ostream &out, const TensorSpec &tensor); + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/test/CMakeLists.txt b/eval/src/vespa/eval/eval/test/CMakeLists.txt new file mode 100644 index 00000000000..3d132b4d113 --- /dev/null +++ b/eval/src/vespa/eval/eval/test/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(vespalib_vespalib_eval_test OBJECT + SOURCES + eval_spec.cpp + tensor_conformance.cpp + DEPENDS +) diff --git a/eval/src/vespa/eval/eval/test/eval_spec.cpp b/eval/src/vespa/eval/eval/test/eval_spec.cpp new file mode 100644 index 00000000000..482853fe8fa --- /dev/null +++ b/eval/src/vespa/eval/eval/test/eval_spec.cpp @@ -0,0 +1,372 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "eval_spec.h" +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/string_hash.h> +#include <cmath> + +namespace vespalib { +namespace eval { +namespace test { + +constexpr double my_nan = std::numeric_limits<double>::quiet_NaN(); +constexpr double my_inf = std::numeric_limits<double>::infinity(); +constexpr double my_error_value = 31212.0; + +vespalib::string +EvalSpec::EvalTest::as_string(const std::vector<vespalib::string> ¶m_names, + const std::vector<double> ¶m_values, + const vespalib::string &expression) +{ + assert(param_values.size() == param_names.size()); + vespalib::string str; + str += "f("; + for (size_t i = 0; i < param_names.size(); ++i) { + if (i > 0) { + str += ", "; + } + str += param_names[i]; + str += "="; + str += make_string("%g", param_values[i]); + } + str += ") { "; + str += expression; + str += " }"; + return str; +} + +bool +EvalSpec::EvalTest::is_same(double expected, double actual) { + if (std::isnan(expected)) { + return std::isnan(actual); + } + return (actual == expected); +} + +void +EvalSpec::add_terminal_cases() { + add_expression({}, "(-100)").add_case({}, -100.0); + add_expression({}, "(-10)").add_case({}, -10.0); + add_expression({}, "(-5.75)").add_case({}, -5.75); + add_expression({}, "(-4.5)").add_case({}, -4.5); + add_expression({}, "(-3)").add_case({}, -3.0); + add_expression({}, "(-2)").add_case({}, -2.0); + add_expression({}, "(-0.1)").add_case({}, -0.1); + add_expression({}, "0").add_case({}, 0.0); + add_expression({}, "0.1").add_case({}, 0.1); + add_expression({}, "2").add_case({}, 2.0); + add_expression({}, "3").add_case({}, 3.0); + add_expression({}, "4.5").add_case({}, 4.5); + add_expression({}, "5.75").add_case({}, 5.75); + add_expression({}, "10").add_case({}, 10.0); + add_expression({}, "100").add_case({}, 100.0); + add_rule({"a", -5.0, 5.0}, "a", [](double a){ return a; }); + add_expression({}, "[]").add_case({}, 0.0); + add_expression({}, "[1]").add_case({}, 1.0); + add_expression({}, "[1,2]").add_case({}, 2.0); + add_expression({}, "[1,2,3]").add_case({}, 3.0); + add_expression({}, "[3,2,1]").add_case({}, 3.0); + add_expression({}, "[1,1,1,1,1]").add_case({}, 5.0); + add_expression({}, "\"\"").add_case({}, vespalib::hash_code("")); + add_expression({}, "\"foo\"").add_case({}, vespalib::hash_code("foo")); + add_expression({}, "\"foo bar baz\"").add_case({}, vespalib::hash_code("foo bar baz")); + add_expression({}, "\">\\\\\\\"\\t\\n\\r\\f<\"").add_case({}, vespalib::hash_code(">\\\"\t\n\r\f<")); + add_expression({}, "\">\\x08\\x10\\x12\\x14<\"").add_case({}, vespalib::hash_code(">\x08\x10\x12\x14<")); +} + +void +EvalSpec::add_arithmetic_cases() { + add_rule({"a", -5.0, 5.0}, "(-a)", [](double a){ return -a; }); + add_rule({"a", -5.0, 5.0}, {"b", -5.0, 5.0}, "(a+b)", [](double a, double b){ return (a + b); }); + add_rule({"a", -5.0, 5.0}, {"b", -5.0, 5.0}, "(a-b)", [](double a, double b){ return (a - b); }); + add_rule({"a", -5.0, 5.0}, {"b", -5.0, 5.0}, "(a*b)", [](double a, double b){ return (a * b); }); + add_rule({"a", -5.0, 5.0}, {"b", -5.0, 5.0}, "(a/b)", [](double a, double b){ return (a / b); }); + add_rule({"a", -5.0, 5.0}, {"b", -5.0, 5.0}, "(a^b)", [](double a, double b){ return std::pow(a,b); }); + add_expression({"a", "b", "c", "d"}, "(((a+1)*(b-1))/((c+1)/(d-1)))") + .add_case({0.0, 2.0, 0.0, 2.0}, 1.0) + .add_case({1.0, 3.0, 0.0, 2.0}, 4.0) + .add_case({1.0, 3.0, 1.0, 2.0}, 2.0) + .add_case({1.0, 3.0, 1.0, 5.0}, 8.0); +} + +void +EvalSpec::add_function_call_cases() { + add_rule({"a", -1.0, 1.0}, "cos(a)", [](double a){ return std::cos(a); }); + add_rule({"a", -1.0, 1.0}, "sin(a)", [](double a){ return std::sin(a); }); + add_rule({"a", -1.0, 1.0}, "tan(a)", [](double a){ return std::tan(a); }); + add_rule({"a", -1.0, 1.0}, "cosh(a)", [](double a){ return std::cosh(a); }); + add_rule({"a", -1.0, 1.0}, "sinh(a)", [](double a){ return std::sinh(a); }); + add_rule({"a", -1.0, 1.0}, "tanh(a)", [](double a){ return std::tanh(a); }); + add_rule({"a", -1.0, 1.0}, "acos(a)", [](double a){ return std::acos(a); }); + add_rule({"a", -1.0, 1.0}, "asin(a)", [](double a){ return std::asin(a); }); + add_rule({"a", -1.0, 1.0}, "atan(a)", [](double a){ return std::atan(a); }); + add_rule({"a", -1.0, 1.0}, "exp(a)", [](double a){ return std::exp(a); }); + add_rule({"a", -1.0, 1.0}, "log10(a)", [](double a){ return std::log10(a); }); + add_rule({"a", -1.0, 1.0}, "log(a)", [](double a){ return std::log(a); }); + add_rule({"a", -1.0, 1.0}, "sqrt(a)", [](double a){ return std::sqrt(a); }); + add_rule({"a", -1.0, 1.0}, "ceil(a)", [](double a){ return std::ceil(a); }); + add_rule({"a", -1.0, 1.0}, "fabs(a)", [](double a){ return std::fabs(a); }); + add_rule({"a", -1.0, 1.0}, "floor(a)", [](double a){ return std::floor(a); }); + add_expression({"a"}, "isNan(a)") + .add_case({-1.0}, 0.0).add_case({-0.5}, 0.0).add_case({0.0}, 0.0).add_case({0.5}, 0.0).add_case({1.0}, 0.0) + .add_case({my_nan}, 1.0).add_case({my_inf}, 0.0).add_case({-my_inf}, 0.0); + add_rule({"a", -1.0, 1.0}, "relu(a)", [](double a){ return std::max(a, 0.0); }); + add_rule({"a", -1.0, 1.0}, "sigmoid(a)", [](double a){ return 1.0 / (1.0 + std::exp(-1.0 * a)); }); + add_rule({"a", -1.0, 1.0}, {"b", -1.0, 1.0}, "atan2(a,b)", [](double a, double b){ return std::atan2(a, b); }); + add_rule({"a", -1.0, 1.0}, {"b", -1.0, 1.0}, "ldexp(a,b)", [](double a, double b){ return std::ldexp(a, b); }); + add_rule({"a", -1.0, 1.0}, {"b", -1.0, 1.0}, "pow(a,b)", [](double a, double b){ return std::pow(a, b); }); + add_rule({"a", -1.0, 1.0}, {"b", -1.0, 1.0}, "fmod(a,b)", [](double a, double b){ return std::fmod(a, b); }); + add_rule({"a", -1.0, 1.0}, {"b", -1.0, 1.0}, "min(a,b)", [](double a, double b){ return std::min(a, b); }); + add_rule({"a", -1.0, 1.0}, {"b", -1.0, 1.0}, "max(a,b)", [](double a, double b){ return std::max(a, b); }); +} + +void +EvalSpec::add_tensor_operation_cases() { + add_rule({"a", -1.0, 1.0}, "sum(a)", [](double a){ return a; }); + add_rule({"a", -1.0, 1.0}, "map(a,f(x)(sin(x)))", [](double x){ return std::sin(x); }); + add_rule({"a", -1.0, 1.0}, "map(a,f(x)(x+x*3))", [](double x){ return (x + (x * 3)); }); + add_rule({"a", -1.0, 1.0}, {"b", -1.0, 1.0}, "join(a,b,f(x,y)(x+y))", [](double x, double y){ return (x + y); }); + add_rule({"a", -1.0, 1.0}, {"b", -1.0, 1.0}, "join(a,b,f(x,y)(x+y*3))", [](double x, double y){ return (x + (y * 3)); }); + add_rule({"a", -1.0, 1.0}, "reduce(a,sum)", [](double a){ return a; }); + add_rule({"a", -1.0, 1.0}, "reduce(a,prod)", [](double a){ return a; }); + add_rule({"a", -1.0, 1.0}, "reduce(a,count)", [](double){ return 1.0; }); + add_rule({"a", -1.0, 1.0}, "rename(a,x,y)", [](double){ return my_error_value; }); + add_rule({"a", -1.0, 1.0}, "rename(a,(x,y),(y,x))", [](double){ return my_error_value; }); + add_expression({}, "tensor(x[10])(x)"); + add_expression({}, "tensor(x[10],y[10])(x==y)"); + add_expression({"a","b"}, "concat(a,b,x)"); + add_expression({"a","b"}, "concat(a,b,y)"); +} + +void +EvalSpec::add_comparison_cases() { + add_expression({"a", "b"}, "(a==b)") + .add_case({my_nan, 2.0}, 0.0) + .add_case({2.0, my_nan}, 0.0) + .add_case({my_nan, my_nan}, 0.0) + .add_case({1.0, 2.0}, 0.0) + .add_case({2.0 - 1e-10, 2.0}, 0.0) + .add_case({2.0, 2.0}, 1.0) + .add_case({2.0 + 1e-10, 2.0}, 0.0) + .add_case({3.0, 2.0}, 0.0); + + add_expression({"a", "b"}, "(a!=b)") + .add_case({my_nan, 2.0}, 1.0) + .add_case({2.0, my_nan}, 1.0) + .add_case({my_nan, my_nan}, 1.0) + .add_case({1.0, 2.0}, 1.0) + .add_case({2.0 - 1e-10, 2.0}, 1.0) + .add_case({2.0, 2.0}, 0.0) + .add_case({2.0 + 1e-10, 2.0}, 1.0) + .add_case({3.0, 2.0}, 1.0); + + add_expression({"a", "b"}, "(a~=b)") + .add_case({my_nan, 2.0}, 0.0) + .add_case({2.0, my_nan}, 0.0) + .add_case({my_nan, my_nan}, 0.0) + .add_case({0.5, 0.5}, 1.0) + .add_case({1.0, 2.0}, 0.0) + .add_case({2.0, 2.0}, 1.0) + .add_case({3.0, 2.0}, 0.0) + .add_case({0.5 - 1e-10, 0.5}, 1.0) + .add_case({0.5, 0.5 - 1e-10}, 1.0) + .add_case({2.0 - 1e-10, 2.0}, 1.0) + .add_case({2.0, 2.0 - 1e-10}, 1.0) + .add_case({0.5 + 1e-10, 0.5}, 1.0) + .add_case({0.5, 0.5 + 1e-10}, 1.0) + .add_case({2.0 + 1e-10, 2.0}, 1.0) + .add_case({2.0, 2.0 + 1e-10}, 1.0) + .add_case({0.5 - 2e-7, 0.5}, 0.0) + .add_case({0.5, 0.5 - 2e-7}, 0.0) + .add_case({2.0 - 5e-7, 2.0}, 0.0) + .add_case({2.0, 2.0 - 5e-7}, 0.0) + .add_case({0.5 + 2e-7, 0.5}, 0.0) + .add_case({0.5, 0.5 + 2e-7}, 0.0) + .add_case({2.0 + 5e-7, 2.0}, 0.0) + .add_case({2.0, 2.0 + 5e-7}, 0.0); + + add_expression({"a", "b"}, "(a<b)") + .add_case({my_nan, 2.0}, 0.0) + .add_case({2.0, my_nan}, 0.0) + .add_case({my_nan, my_nan}, 0.0) + .add_case({1.0, 2.0}, 1.0) + .add_case({2.0 - 1e-10, 2.0}, 1.0) + .add_case({2.0, 2.0}, 0.0) + .add_case({2.0 + 1e-10, 2.0}, 0.0) + .add_case({3.0, 2.0}, 0.0); + + add_expression({"a", "b"}, "(a<=b)") + .add_case({my_nan, 2.0}, 0.0) + .add_case({2.0, my_nan}, 0.0) + .add_case({my_nan, my_nan}, 0.0) + .add_case({1.0, 2.0}, 1.0) + .add_case({2.0 - 1e-10, 2.0}, 1.0) + .add_case({2.0, 2.0}, 1.0) + .add_case({2.0 + 1e-10, 2.0}, 0.0) + .add_case({3.0, 2.0}, 0.0); + + add_expression({"a", "b"}, "(a>b)") + .add_case({my_nan, 2.0}, 0.0) + .add_case({2.0, my_nan}, 0.0) + .add_case({my_nan, my_nan}, 0.0) + .add_case({1.0, 2.0}, 0.0) + .add_case({2.0 - 1e-10, 2.0}, 0.0) + .add_case({2.0, 2.0}, 0.0) + .add_case({2.0 + 1e-10, 2.0}, 1.0) + .add_case({3.0, 2.0}, 1.0); + + add_expression({"a", "b"}, "(a>=b)") + .add_case({my_nan, 2.0}, 0.0) + .add_case({2.0, my_nan}, 0.0) + .add_case({my_nan, my_nan}, 0.0) + .add_case({1.0, 2.0}, 0.0) + .add_case({2.0 - 1e-10, 2.0}, 0.0) + .add_case({2.0, 2.0}, 1.0) + .add_case({2.0 + 1e-10, 2.0}, 1.0) + .add_case({3.0, 2.0}, 1.0); +} + +void +EvalSpec::add_set_membership_cases() +{ + add_expression({"a"}, "(a in [])") + .add_case({0.0}, 0.0) + .add_case({1.0}, 0.0) + .add_case({2.0}, 0.0); + + add_expression({"a"}, "(a in [[]])") + .add_case({0.0}, 1.0) + .add_case({1.0}, 0.0) + .add_case({2.0}, 0.0); + + add_expression({"a"}, "(a in [[[]]])") + .add_case({0.0}, 0.0) + .add_case({1.0}, 1.0) + .add_case({2.0}, 0.0); + + add_expression({"a", "b"}, "(a in b)") + .add_case({my_nan, 2.0}, 0.0) + .add_case({2.0, my_nan}, 0.0) + .add_case({my_nan, my_nan}, 0.0) + .add_case({1.0, 2.0}, 0.0) + .add_case({2.0 - 1e-10, 2.0}, 0.0) + .add_case({2.0, 2.0}, 1.0) + .add_case({2.0 + 1e-10, 2.0}, 0.0) + .add_case({3.0, 2.0}, 0.0); + + add_expression({"a", "b"}, "(a in [b])") + .add_case({my_nan, 2.0}, 0.0) + .add_case({2.0, my_nan}, 0.0) + .add_case({my_nan, my_nan}, 0.0) + .add_case({1.0, 2.0}, 0.0) + .add_case({2.0 - 1e-10, 2.0}, 0.0) + .add_case({2.0, 2.0}, 1.0) + .add_case({2.0 + 1e-10, 2.0}, 0.0) + .add_case({3.0, 2.0}, 0.0); + + add_expression({"a", "b"}, "(a in [[b]])") + .add_case({1.0, 2.0}, 1.0) + .add_case({2.0, 2.0}, 0.0); + + add_expression({"a", "b", "c", "d"}, "(a in [b,c,d])") + .add_case({0.0, 10.0, 20.0, 30.0}, 0.0) + .add_case({3.0, 10.0, 20.0, 30.0}, 0.0) + .add_case({10.0, 10.0, 20.0, 30.0}, 1.0) + .add_case({20.0, 10.0, 20.0, 30.0}, 1.0) + .add_case({30.0, 10.0, 20.0, 30.0}, 1.0) + .add_case({10.0, 30.0, 20.0, 10.0}, 1.0) + .add_case({20.0, 30.0, 20.0, 10.0}, 1.0) + .add_case({30.0, 30.0, 20.0, 10.0}, 1.0); +} + +void +EvalSpec::add_boolean_cases() { + add_expression({"a"}, "(!a)") + .add_cases({my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + [](double a)->double{ return !bool(a); }); + + add_expression({"a"}, "(!(!a))") + .add_cases({my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + [](double a)->double{ return bool(a); }); + + add_expression({"a", "b"}, "(a&&b)") + .add_cases({my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + {my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + [](double a, double b)->double{ return (bool(a) && bool(b)); }); + + add_expression({"a", "b"}, "(a||b)") + .add_cases({my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + {my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + [](double a, double b)->double{ return (bool(a) || bool(b)); }); +} + +void +EvalSpec::add_if_cases() { + add_expression({"a"}, "if(a,1,0)") + .add_cases({my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + [](double a){ if (a) { return 1.0; } else { return 0.0; } }); + + add_expression({"a", "b"}, "if(a,if(b,1,2),if(b,3,4))") + .add_cases({my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + {my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + [](double a, double b) + { + if (a) { + if (b) { + return 1.0; + } else { + return 2.0; + } + } else { + if (b) { + return 3.0; + } else { + return 4.0; + } + } + }); + add_expression({"a"}, "if(a,1,0,0.25)") + .add_cases({my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + [](double a){ if (a) { return 1.0; } else { return 0.0; } }); + add_expression({"a"}, "if(a,1,0,0.75)") + .add_cases({my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + [](double a){ if (a) { return 1.0; } else { return 0.0; } }); +} + +void +EvalSpec::add_let_cases() { + add_rule({"a", -10.0, 10.0}, "let(tmp,(a+1),(tmp*tmp))", [](double a){ return (a+1)*(a+1); }); + add_rule({"a", -10.0, 10.0}, "let(a,(a+1),((a*a)*a))", [](double a){ return (a+1)*(a+1)*(a+1); }); + add_rule({"a", -10.0, 10.0}, "let(a,(a+1),let(a,(a+1),let(b,2,let(a,(a+1),(a+b)))))", [](double a) { return (a + 5.0); }); + add_rule({"a", -10.0, 10.0}, {"b", -10.0, 10.0}, "let(a,(a*b),let(b,(b+a),(a*b)))", + [](double a, double b) + { + double let_a = (a * b); + double let_b = (b + let_a); + return (let_a * let_b); + }); +} + +void +EvalSpec::add_complex_cases() { + add_expression({"a", "b"}, "((a<3)||b)") + .add_cases({2.0, 4.0}, {0.0, 0.5, 1.0}, + [](double a, double b)->double{ return ((a < 3) || bool(b)); }); + + add_expression({"a", "b"}, "((a<3)==b)") + .add_cases({2.0, 4.0}, {0.0, 0.5, 1.0}, + [](double a, double b)->double{ return (double((a < 3)) == b); }); + + add_expression({"a"}, "(!(-a))") + .add_cases({my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + [](double a)->double{ return !bool(-a); }); + + add_expression({"a"}, "(-(!a))") + .add_cases({my_nan, -my_inf, -123.0, -1.0, -0.001, 0.0, 0.001, 1.0, 123.0, my_inf}, + [](double a)->double{ return -double(!bool(a)); }); +} + +} // namespace vespalib::eval::test +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/test/eval_spec.h b/eval/src/vespa/eval/eval/test/eval_spec.h new file mode 100644 index 00000000000..582c3b1c1e5 --- /dev/null +++ b/eval/src/vespa/eval/eval/test/eval_spec.h @@ -0,0 +1,161 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <initializer_list> +#include <cassert> + +namespace vespalib { +namespace eval { +namespace test { + +/** + * A collection of expressions with parameter bindings and their + * expected evaluation results. This is intended as the basis for + * conformance testing of evaluation engines. + **/ +class EvalSpec +{ +private: + typedef double (*fun_1_ref)(double); + typedef double (*fun_2_ref)(double, double); + + struct Expression { + struct Case { + std::vector<double> param_values; + double expected_result; + Case(std::initializer_list<double> param_values_in, double expected_result_in) + : param_values(param_values_in), expected_result(expected_result_in) {} + }; + std::vector<vespalib::string> param_names; + vespalib::string expression; + std::vector<Case> cases; + Expression(std::initializer_list<vespalib::string> param_names_in, vespalib::string expression_in) + : param_names(param_names_in), expression(expression_in) {} + + Expression &add_case(std::initializer_list<double> param_values, double expected_result) { + assert(param_values.size() == param_names.size()); + cases.emplace_back(param_values, expected_result); + return *this; + } + Expression &add_cases(std::initializer_list<double> a_values, fun_1_ref fun) { + for (double a: a_values) { + add_case({a}, fun(a)); + } + return *this; + } + Expression &add_cases(std::initializer_list<double> a_values, std::initializer_list<double> b_values, fun_2_ref fun) { + for (double a: a_values) { + for (double b: b_values) { + add_case({a, b}, fun(a, b)); + } + } + return *this; + } + }; + std::vector<Expression> expressions; + + Expression &add_expression(std::initializer_list<vespalib::string> param_names, vespalib::string expression) { + expressions.emplace_back(param_names, expression); + return expressions.back(); + } + + struct ParamSpec { + vespalib::string name; + double min; + double max; + std::vector<double> expand(size_t inner_samples) const { + std::vector<double> ret; + ret.push_back(min); + if (max == min) { + return ret; + } + ret.push_back(max); + if ((min < 0.0) && (max > 0.0)) { + ret.push_back(0.0); + } + double delta = (max - min) / (inner_samples + 1); + for(size_t i = 0; i < inner_samples; ++i) { + double x = min + (delta * (i + 1)); + if (x != 0.0) { + ret.push_back(x); + } + } + return ret; + } + }; + + void add_rule(const ParamSpec &a_spec, const vespalib::string &expression, fun_1_ref ref) { + Expression &expr = add_expression({a_spec.name}, expression); + std::vector<double> a_values = a_spec.expand(7); + for (double a: a_values) { + expr.add_case({a}, ref(a)); + } + } + + void add_rule(const ParamSpec &a_spec, const ParamSpec &b_spec, const vespalib::string &expression, fun_2_ref ref) { + Expression &expr = add_expression({a_spec.name, b_spec.name}, expression); + std::vector<double> a_values = a_spec.expand(5); + std::vector<double> b_values = b_spec.expand(5); + for (double a: a_values) { + for (double b: b_values) { + expr.add_case({a, b}, ref(a, b)); + } + } + } + +public: + struct EvalTest { + static vespalib::string as_string(const std::vector<vespalib::string> ¶m_names, + const std::vector<double> ¶m_values, + const vespalib::string &expression); + bool is_same(double expected, double actual); + virtual void next_expression(const std::vector<vespalib::string> ¶m_names, + const vespalib::string &expression) = 0; + virtual void handle_case(const std::vector<vespalib::string> ¶m_names, + const std::vector<double> ¶m_values, + const vespalib::string &expression, + double expected_result) = 0; + virtual ~EvalTest() {} + }; + //------------------------------------------------------------------------- + void add_terminal_cases(); // a, 1.0 + void add_arithmetic_cases(); // a + b, a ^ b + void add_function_call_cases(); // cos(a), max(a, b) + void add_tensor_operation_cases(); // map(a,f(x)(sin(x))) + void add_comparison_cases(); // a < b, c != d + void add_set_membership_cases(); // a in [x, y, z] + void add_boolean_cases(); // 1.0 && 0.0 + void add_if_cases(); // if (a < b, a, b) + void add_let_cases(); // let (a, b + 1, a * a) + void add_complex_cases(); // ... + //------------------------------------------------------------------------- + void add_all_cases() { + add_terminal_cases(); + add_arithmetic_cases(); + add_function_call_cases(); + add_tensor_operation_cases(); + add_comparison_cases(); + add_set_membership_cases(); + add_boolean_cases(); + add_if_cases(); + add_let_cases(); + add_complex_cases(); + } + //------------------------------------------------------------------------- + void each_case(EvalTest &test) const { + for (const Expression &expr: expressions) { + test.next_expression(expr.param_names, expr.expression); + for (const Expression::Case &expr_case: expr.cases) { + test.handle_case(expr.param_names, expr_case.param_values, expr.expression, + expr_case.expected_result); + } + } + } +}; + +} // namespace vespalib::eval::test +} // namespace vespalib::eval +} // namespace vespalib + diff --git a/eval/src/vespa/eval/eval/test/tensor_conformance.cpp b/eval/src/vespa/eval/eval/test/tensor_conformance.cpp new file mode 100644 index 00000000000..d554de52865 --- /dev/null +++ b/eval/src/vespa/eval/eval/test/tensor_conformance.cpp @@ -0,0 +1,1128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include "tensor_conformance.h" +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/eval/simple_tensor_engine.h> +#include <vespa/vespalib/eval/tensor_spec.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/tensor_function.h> +#include <vespa/vespalib/eval/interpreted_function.h> + +namespace vespalib { +namespace eval { +namespace test { +namespace { + +// Random access sequence of numbers +struct Sequence { + virtual double operator[](size_t i) const = 0; + virtual ~Sequence() {} +}; + +// Sequence of natural numbers (starting at 1) +struct N : Sequence { + double operator[](size_t i) const override { return (1.0 + i); } +}; + +// Sequence of another sequence divided by 10 +struct Div10 : Sequence { + const Sequence &seq; + Div10(const Sequence &seq_in) : seq(seq_in) {} + double operator[](size_t i) const override { return (seq[i] / 10.0); } +}; + +// Sequence of another sequence minus 2 +struct Sub2 : Sequence { + const Sequence &seq; + Sub2(const Sequence &seq_in) : seq(seq_in) {} + double operator[](size_t i) const override { return (seq[i] - 2.0); } +}; + +// Sequence of a unary operator applied to a sequence +struct OpSeq : Sequence { + const Sequence &seq; + const UnaryOperation &op; + OpSeq(const Sequence &seq_in, const UnaryOperation &op_in) : seq(seq_in), op(op_in) {} + double operator[](size_t i) const override { return op.eval(seq[i]); } +}; + +// Sequence of applying sigmoid to another sequence +struct Sigmoid : Sequence { + const Sequence &seq; + Sigmoid(const Sequence &seq_in) : seq(seq_in) {} + double operator[](size_t i) const override { return operation::Sigmoid().eval(seq[i]); } +}; + +// pre-defined sequence of numbers +struct Seq : Sequence { + std::vector<double> seq; + Seq() : seq() {} + Seq(const std::vector<double> &seq_in) : seq(seq_in) {} + double operator[](size_t i) const override { + ASSERT_LESS(i, seq.size()); + return seq[i]; + } +}; + +// Random access bit mask +struct Mask { + virtual bool operator[](size_t i) const = 0; + virtual ~Mask() {} +}; + +// Mask with all bits set +struct All : Mask { + bool operator[](size_t) const override { return true; } +}; + +// Mask with no bits set +struct None : Mask { + bool operator[](size_t) const override { return false; } +}; + +// Mask with false for each Nth index +struct SkipNth : Mask { + size_t n; + SkipNth(size_t n_in) : n(n_in) {} + bool operator[](size_t i) const override { return (i % n) != 0; } +}; + +// pre-defined mask +struct Bits : Mask { + std::vector<bool> bits; + Bits(const std::vector<bool> &bits_in) : bits(bits_in) {} + bool operator[](size_t i) const override { + ASSERT_LESS(i, bits.size()); + return bits[i]; + } +}; + +// A mask converted to a sequence of two unique values (mapped from true and false) +struct Mask2Seq : Sequence { + const Mask &mask; + double true_value; + double false_value; + Mask2Seq(const Mask &mask_in, double true_value_in = 1.0, double false_value_in = 0.0) + : mask(mask_in), true_value(true_value_in), false_value(false_value_in) {} + double operator[](size_t i) const override { return mask[i] ? true_value : false_value; } +}; + +// custom op1 +struct MyOp : CustomUnaryOperation { + double eval(double a) const override { return ((a + 1) * 2); } +}; + +// A collection of labels for a single dimension +struct Domain { + vespalib::string dimension; + size_t size; // indexed + std::vector<vespalib::string> keys; // mapped + Domain(const vespalib::string &dimension_in, size_t size_in) + : dimension(dimension_in), size(size_in), keys() {} + Domain(const vespalib::string &dimension_in, const std::vector<vespalib::string> &keys_in) + : dimension(dimension_in), size(0), keys(keys_in) {} +}; +using Layout = std::vector<Domain>; + +Domain x() { return Domain("x", {}); } +Domain x(size_t size) { return Domain("x", size); } +Domain x(const std::vector<vespalib::string> &keys) { return Domain("x", keys); } + +Domain y() { return Domain("y", {}); } +Domain y(size_t size) { return Domain("y", size); } +Domain y(const std::vector<vespalib::string> &keys) { return Domain("y", keys); } + +Domain z(size_t size) { return Domain("z", size); } +Domain z(const std::vector<vespalib::string> &keys) { return Domain("z", keys); } + +// Infer the tensor type spanned by the given spaces +vespalib::string infer_type(const Layout &layout) { + if (layout.empty()) { + return "double"; + } + std::vector<ValueType::Dimension> dimensions; + for (const auto &domain: layout) { + if (domain.size == 0) { + dimensions.emplace_back(domain.dimension); // mapped + } else { + dimensions.emplace_back(domain.dimension, domain.size); // indexed + } + } + return ValueType::tensor_type(dimensions).to_spec(); +} + +// Wrapper for the things needed to generate a tensor +struct Source { + using Address = TensorSpec::Address; + + const Layout &layout; + const Sequence &seq; + const Mask &mask; + Source(const Layout &layout_in, const Sequence &seq_in, const Mask &mask_in) + : layout(layout_in), seq(seq_in), mask(mask_in) {} +}; + +// Mix layout with a number sequence to make a tensor spec +class TensorSpecBuilder +{ +private: + using Label = TensorSpec::Label; + using Address = TensorSpec::Address; + + Source _source; + TensorSpec _spec; + Address _addr; + size_t _idx; + + void generate(size_t layout_idx) { + if (layout_idx == _source.layout.size()) { + if (_source.mask[_idx]) { + _spec.add(_addr, _source.seq[_idx]); + } + ++_idx; + } else { + const Domain &domain = _source.layout[layout_idx]; + if (domain.size > 0) { // indexed + for (size_t i = 0; i < domain.size; ++i) { + _addr.emplace(domain.dimension, Label(i)).first->second = Label(i); + generate(layout_idx + 1); + } + } else { // mapped + for (const vespalib::string &key: domain.keys) { + _addr.emplace(domain.dimension, Label(key)).first->second = Label(key); + generate(layout_idx + 1); + } + } + } + } + +public: + TensorSpecBuilder(const Layout &layout, const Sequence &seq, const Mask &mask) + : _source(layout, seq, mask), _spec(infer_type(layout)), _addr(), _idx(0) {} + TensorSpec build() { + generate(0); + return _spec; + } +}; +TensorSpec spec(const Layout &layout, const Sequence &seq, const Mask &mask) { + return TensorSpecBuilder(layout, seq, mask).build(); +} +TensorSpec spec(const Layout &layout, const Sequence &seq) { + return spec(layout, seq, All()); +} +TensorSpec spec(const Layout &layout) { + return spec(layout, Seq(), None()); +} +TensorSpec spec(const Domain &domain, const Sequence &seq, const Mask &mask) { + return spec(Layout({domain}), seq, mask); +} +TensorSpec spec(const Domain &domain, const Sequence &seq) { + return spec(Layout({domain}), seq); +} +TensorSpec spec(const Domain &domain) { + return spec(Layout({domain})); +} +TensorSpec spec(double value) { + return spec(Layout({}), Seq({value})); +} +TensorSpec spec() { + return spec(Layout({})); +} + +TensorSpec spec(const vespalib::string &type, + const std::vector<std::pair<TensorSpec::Address, TensorSpec::Value>> &cells) { + TensorSpec spec("tensor(" + type + ")"); + + for (const auto &cell : cells) { + spec.add(cell.first, cell.second); + } + return spec; +} + +// abstract evaluation wrapper +struct Eval { + // typed result wrapper + class Result { + private: + enum class Type { ERROR, NUMBER, TENSOR }; + Type _type; + double _number; + TensorSpec _tensor; + public: + Result(const Value &value) : _type(Type::ERROR), _number(error_value), _tensor("error") { + if (value.is_double()) { + _type = Type::NUMBER; + _number = value.as_double(); + _tensor = TensorSpec("double").add({}, _number); + } else if (value.is_tensor()) { + _type = Type::TENSOR; + _tensor = value.as_tensor()->engine().to_spec(*value.as_tensor()); + if (_tensor.type() == "double") { + _number = _tensor.cells().empty() ? 0.0 : _tensor.cells().begin()->second.value; + } + } + } + bool is_error() const { return (_type == Type::ERROR); } + bool is_number() const { return (_type == Type::NUMBER); } + bool is_tensor() const { return (_type == Type::TENSOR); } + double number() const { + EXPECT_TRUE(is_number()); + return _number; + } + const TensorSpec &tensor() const { + EXPECT_TRUE(is_tensor()); + return _tensor; + } + }; + virtual Result eval(const TensorEngine &) const { + TEST_ERROR("wrong signature"); + return Result(ErrorValue()); + } + virtual Result eval(const TensorEngine &, const TensorSpec &) const { + TEST_ERROR("wrong signature"); + return Result(ErrorValue()); + } + virtual Result eval(const TensorEngine &, const TensorSpec &, const TensorSpec &) const { + TEST_ERROR("wrong signature"); + return Result(ErrorValue()); + } + virtual ~Eval() {} +}; + +// catches exceptions trying to keep the test itself safe from eval side-effects +struct SafeEval : Eval { + const Eval &unsafe; + SafeEval(const Eval &unsafe_in) : unsafe(unsafe_in) {} + Result eval(const TensorEngine &engine) const override { + try { + return unsafe.eval(engine); + } catch (std::exception &e) { + TEST_ERROR(e.what()); + return Result(ErrorValue()); + } + } + Result eval(const TensorEngine &engine, const TensorSpec &a) const override { + try { + return unsafe.eval(engine, a); + } catch (std::exception &e) { + TEST_ERROR(e.what()); + return Result(ErrorValue()); + } + + } + Result eval(const TensorEngine &engine, const TensorSpec &a, const TensorSpec &b) const override { + try { + return unsafe.eval(engine, a, b); + } catch (std::exception &e) { + TEST_ERROR(e.what()); + return Result(ErrorValue()); + } + } +}; +SafeEval safe(const Eval &eval) { return SafeEval(eval); } + +const Value &check_type(const Value &value, const ValueType &expect_type) { + EXPECT_EQUAL(value.type(), expect_type); + return value; +} + +// expression(void) +struct Expr_V : Eval { + const vespalib::string &expr; + Expr_V(const vespalib::string &expr_in) : expr(expr_in) {} + Result eval(const TensorEngine &engine) const override { + Function fun = Function::parse(expr); + NodeTypes types(fun, {}); + InterpretedFunction ifun(engine, fun, types); + InterpretedFunction::Context ctx; + return Result(check_type(ifun.eval(ctx), types.get_type(fun.root()))); + } +}; + +// expression(tensor) +struct Expr_T : Eval { + const vespalib::string &expr; + Expr_T(const vespalib::string &expr_in) : expr(expr_in) {} + Result eval(const TensorEngine &engine, const TensorSpec &a) const override { + Function fun = Function::parse(expr); + auto a_type = ValueType::from_spec(a.type()); + NodeTypes types(fun, {a_type}); + InterpretedFunction ifun(engine, fun, types); + InterpretedFunction::Context ctx; + TensorValue va(engine.create(a)); + ctx.add_param(va); + return Result(check_type(ifun.eval(ctx), types.get_type(fun.root()))); + } +}; + +// expression(tensor,tensor) +struct Expr_TT : Eval { + vespalib::string expr; + Expr_TT(const vespalib::string &expr_in) : expr(expr_in) {} + Result eval(const TensorEngine &engine, const TensorSpec &a, const TensorSpec &b) const override { + Function fun = Function::parse(expr); + auto a_type = ValueType::from_spec(a.type()); + auto b_type = ValueType::from_spec(b.type()); + NodeTypes types(fun, {a_type, b_type}); + InterpretedFunction ifun(engine, fun, types); + InterpretedFunction::Context ctx; + TensorValue va(engine.create(a)); + TensorValue vb(engine.create(b)); + ctx.add_param(va); + ctx.add_param(vb); + return Result(check_type(ifun.eval(ctx), types.get_type(fun.root()))); + } +}; + +const Value &make_value(const TensorEngine &engine, const TensorSpec &spec, Stash &stash) { + if (spec.type() == "double") { + double number = spec.cells().empty() ? 0.0 : spec.cells().begin()->second.value; + return stash.create<DoubleValue>(number); + } + return stash.create<TensorValue>(engine.create(spec)); +} + +// evaluate tensor reduce operation using tensor engine immediate api +struct ImmediateReduce : Eval { + const BinaryOperation &op; + std::vector<vespalib::string> dimensions; + ImmediateReduce(const BinaryOperation &op_in) : op(op_in), dimensions() {} + ImmediateReduce(const BinaryOperation &op_in, const vespalib::string &dimension) + : op(op_in), dimensions({dimension}) {} + Result eval(const TensorEngine &engine, const TensorSpec &a) const override { + Stash stash; + return Result(engine.reduce(*engine.create(a), op, dimensions, stash)); + } +}; + +// evaluate tensor map operation using tensor engine immediate api +struct ImmediateMap : Eval { + const UnaryOperation &op; + ImmediateMap(const UnaryOperation &op_in) : op(op_in) {} + Result eval(const TensorEngine &engine, const TensorSpec &a) const override { + Stash stash; + return Result(engine.map(op, *engine.create(a), stash)); + } +}; + +// evaluate tensor apply operation using tensor engine immediate api +struct ImmediateApply : Eval { + const BinaryOperation &op; + ImmediateApply(const BinaryOperation &op_in) : op(op_in) {} + Result eval(const TensorEngine &engine, const TensorSpec &a, const TensorSpec &b) const override { + Stash stash; + return Result(engine.apply(op, *engine.create(a), *engine.create(b), stash)); + } +}; + +// evaluate tensor concat operation using tensor engine immediate api +struct ImmediateConcat : Eval { + vespalib::string dimension; + ImmediateConcat(const vespalib::string &dimension_in) : dimension(dimension_in) {} + Result eval(const TensorEngine &engine, const TensorSpec &a, const TensorSpec &b) const override { + Stash stash; + const auto &lhs = make_value(engine, a, stash); + const auto &rhs = make_value(engine, b, stash); + return Result(engine.concat(lhs, rhs, dimension, stash)); + } +}; + +// evaluate tensor rename operation using tensor engine immediate api +struct ImmediateRename : Eval { + std::vector<vespalib::string> from; + std::vector<vespalib::string> to; + ImmediateRename(const std::vector<vespalib::string> &from_in, const std::vector<vespalib::string> &to_in) + : from(from_in), to(to_in) {} + Result eval(const TensorEngine &engine, const TensorSpec &a) const override { + Stash stash; + const auto &lhs = make_value(engine, a, stash); + return Result(engine.rename(lhs, from, to, stash)); + } +}; + +const size_t tensor_id_a = 11; +const size_t tensor_id_b = 12; +const size_t map_operation_id = 22; + +// input used when evaluating in retained mode +struct Input : TensorFunction::Input { + std::vector<TensorValue> tensors; + const UnaryOperation *map_op; + Input(std::unique_ptr<Tensor> a) : tensors(), map_op(nullptr) { + tensors.emplace_back(std::move(a)); + } + Input(std::unique_ptr<Tensor> a, const UnaryOperation &op) : tensors(), map_op(&op) { + tensors.emplace_back(std::move(a)); + } + Input(std::unique_ptr<Tensor> a, std::unique_ptr<Tensor> b) : tensors(), map_op(nullptr) { + tensors.emplace_back(std::move(a)); + tensors.emplace_back(std::move(b)); + } + const Value &get_tensor(size_t id) const override { + size_t offset = (id - tensor_id_a); + ASSERT_GREATER(tensors.size(), offset); + return tensors[offset]; + } + const UnaryOperation &get_map_operation(size_t id) const { + ASSERT_TRUE(map_op != nullptr); + ASSERT_EQUAL(id, map_operation_id); + return *map_op; + } +}; + +// evaluate tensor reduce operation using tensor engine retained api +struct RetainedReduce : Eval { + const BinaryOperation &op; + std::vector<vespalib::string> dimensions; + RetainedReduce(const BinaryOperation &op_in) : op(op_in), dimensions() {} + RetainedReduce(const BinaryOperation &op_in, const vespalib::string &dimension) + : op(op_in), dimensions({dimension}) {} + Result eval(const TensorEngine &engine, const TensorSpec &a) const override { + auto a_type = ValueType::from_spec(a.type()); + auto ir = tensor_function::reduce(tensor_function::inject(a_type, tensor_id_a), op, dimensions); + ValueType expect_type = ir->result_type; + auto fun = engine.compile(std::move(ir)); + Input input(engine.create(a)); + Stash stash; + return Result(check_type(fun->eval(input, stash), expect_type)); + } +}; + +// evaluate tensor map operation using tensor engine retained api +struct RetainedMap : Eval { + const UnaryOperation &op; + RetainedMap(const UnaryOperation &op_in) : op(op_in) {} + Result eval(const TensorEngine &engine, const TensorSpec &a) const override { + auto a_type = ValueType::from_spec(a.type()); + auto ir = tensor_function::map(map_operation_id, tensor_function::inject(a_type, tensor_id_a)); + ValueType expect_type = ir->result_type; + auto fun = engine.compile(std::move(ir)); + Input input(engine.create(a), op); + Stash stash; + return Result(check_type(fun->eval(input, stash), expect_type)); + } +}; + +// evaluate tensor apply operation using tensor engine retained api +struct RetainedApply : Eval { + const BinaryOperation &op; + RetainedApply(const BinaryOperation &op_in) : op(op_in) {} + Result eval(const TensorEngine &engine, const TensorSpec &a, const TensorSpec &b) const override { + auto a_type = ValueType::from_spec(a.type()); + auto b_type = ValueType::from_spec(b.type()); + auto ir = tensor_function::apply(op, tensor_function::inject(a_type, tensor_id_a), + tensor_function::inject(b_type, tensor_id_b)); + ValueType expect_type = ir->result_type; + auto fun = engine.compile(std::move(ir)); + Input input(engine.create(a), engine.create(b)); + Stash stash; + return Result(check_type(fun->eval(input, stash), expect_type)); + } +}; + +// placeholder used for unused values in a sequence +const double X = error_value; + +// NaN value +const double my_nan = std::numeric_limits<double>::quiet_NaN(); + + +// Test wrapper to avoid passing global test parameters around +struct TestContext { + + const TensorEngine &ref_engine; + const TensorEngine &engine; + bool test_mixed_cases; + size_t skip_count; + + TestContext(const TensorEngine &engine_in, bool test_mixed_cases_in) + : ref_engine(SimpleTensorEngine::ref()), engine(engine_in), + test_mixed_cases(test_mixed_cases_in), skip_count(0) {} + + std::unique_ptr<Tensor> tensor(const TensorSpec &spec) { + auto result = engine.create(spec); + EXPECT_EQUAL(spec.type(), engine.type_of(*result).to_spec()); + return result; + } + + bool mixed(size_t n) { + if (!test_mixed_cases) { + skip_count += n; + } + return test_mixed_cases; + } + + //------------------------------------------------------------------------- + + void verify_create_type(const vespalib::string &type_spec) { + auto tensor = engine.create(TensorSpec(type_spec)); + EXPECT_TRUE(&engine == &tensor->engine()); + EXPECT_EQUAL(type_spec, engine.type_of(*tensor).to_spec()); + } + + void test_tensor_create_type() { + TEST_DO(verify_create_type("double")); + TEST_DO(verify_create_type("tensor(x{})")); + TEST_DO(verify_create_type("tensor(x{},y{})")); + TEST_DO(verify_create_type("tensor(x[5])")); + TEST_DO(verify_create_type("tensor(x[5],y[10])")); + if (mixed(2)) { + TEST_DO(verify_create_type("tensor(x{},y[10])")); + TEST_DO(verify_create_type("tensor(x[5],y{})")); + } + } + + //------------------------------------------------------------------------- + + void verify_equal(const TensorSpec &a, const TensorSpec &b) { + auto ta = tensor(a); + auto tb = tensor(b); + EXPECT_EQUAL(a, b); + EXPECT_EQUAL(*ta, *tb); + TensorSpec spec = engine.to_spec(*ta); + TensorSpec ref_spec = ref_engine.to_spec(*ref_engine.create(a)); + EXPECT_EQUAL(spec, ref_spec); + } + + void test_tensor_equality() { + TEST_DO(verify_equal(spec(), spec())); + TEST_DO(verify_equal(spec(10.0), spec(10.0))); + TEST_DO(verify_equal(spec(x()), spec(x()))); + TEST_DO(verify_equal(spec(x({"a"}), Seq({1})), spec(x({"a"}), Seq({1})))); + TEST_DO(verify_equal(spec({x({"a"}),y({"a"})}, Seq({1})), spec({y({"a"}),x({"a"})}, Seq({1})))); + TEST_DO(verify_equal(spec(x(3)), spec(x(3)))); + TEST_DO(verify_equal(spec({x(1),y(1)}, Seq({1})), spec({y(1),x(1)}, Seq({1})))); + if (mixed(2)) { + TEST_DO(verify_equal(spec({x({"a"}),y(1)}, Seq({1})), spec({y(1),x({"a"})}, Seq({1})))); + TEST_DO(verify_equal(spec({y({"a"}),x(1)}, Seq({1})), spec({x(1),y({"a"})}, Seq({1})))); + } + } + + //------------------------------------------------------------------------- + + void verify_not_equal(const TensorSpec &a, const TensorSpec &b) { + auto ta = tensor(a); + auto tb = tensor(b); + EXPECT_NOT_EQUAL(a, b); + EXPECT_NOT_EQUAL(b, a); + EXPECT_NOT_EQUAL(*ta, *tb); + EXPECT_NOT_EQUAL(*tb, *ta); + } + + void test_tensor_inequality() { + TEST_DO(verify_not_equal(spec(1.0), spec(2.0))); + TEST_DO(verify_not_equal(spec(), spec(x()))); + TEST_DO(verify_not_equal(spec(), spec(x(1)))); + TEST_DO(verify_not_equal(spec(x()), spec(x(1)))); + TEST_DO(verify_not_equal(spec(x()), spec(y()))); + TEST_DO(verify_not_equal(spec(x(1)), spec(x(2)))); + TEST_DO(verify_not_equal(spec(x(1)), spec(y(1)))); + TEST_DO(verify_not_equal(spec(x({"a"}), Seq({1})), spec(x({"a"}), Seq({2})))); + TEST_DO(verify_not_equal(spec(x({"a"}), Seq({1})), spec(x({"b"}), Seq({1})))); + TEST_DO(verify_not_equal(spec(x({"a"}), Seq({1})), spec({x({"a"}),y({"a"})}, Seq({1})))); + TEST_DO(verify_not_equal(spec(x(1), Seq({1})), spec(x(1), Seq({2})))); + TEST_DO(verify_not_equal(spec(x(1), Seq({1})), spec(x(2), Seq({1}), Bits({1,0})))); + TEST_DO(verify_not_equal(spec(x(2), Seq({1,1}), Bits({1,0})), + spec(x(2), Seq({1,1}), Bits({0,1})))); + TEST_DO(verify_not_equal(spec(x(1), Seq({1})), spec({x(1),y(1)}, Seq({1})))); + if (mixed(3)) { + TEST_DO(verify_not_equal(spec({x({"a"}),y(1)}, Seq({1})), spec({x({"a"}),y(1)}, Seq({2})))); + TEST_DO(verify_not_equal(spec({x({"a"}),y(1)}, Seq({1})), spec({x({"b"}),y(1)}, Seq({1})))); + TEST_DO(verify_not_equal(spec({x(2),y({"a"})}, Seq({1}), Bits({1,0})), + spec({x(2),y({"a"})}, Seq({X,1}), Bits({0,1})))); + } + } + + //------------------------------------------------------------------------- + + void verify_reduce_result(const Eval &eval, const TensorSpec &a, const Eval::Result &expect) { + if (expect.is_tensor()) { + EXPECT_EQUAL(eval.eval(engine, a).tensor(), expect.tensor()); + } else if (expect.is_number()) { + EXPECT_EQUAL(eval.eval(engine, a).number(), expect.number()); + } else { + TEST_FATAL("expected result should be valid"); + } + } + + void test_reduce_op(const vespalib::string &name, const BinaryOperation &op, const Sequence &seq) { + std::vector<Layout> layouts = { + {x(3)}, + {x(3),y(5)}, + {x(3),y(5),z(7)}, + {x({"a","b","c"})}, + {x({"a","b","c"}),y({"foo","bar"})}, + {x({"a","b","c"}),y({"foo","bar"}),z({"i","j","k","l"})} + }; + if (mixed(2 * 4)) { + layouts.push_back({x(3),y({"foo", "bar"}),z(7)}); + layouts.push_back({x({"a","b","c"}),y(5),z({"i","j","k","l"})}); + } + for (const Layout &layout: layouts) { + TensorSpec input = spec(layout, seq); + for (const Domain &domain: layout) { + Eval::Result expect = ImmediateReduce(op, domain.dimension).eval(ref_engine, input); + TEST_STATE(make_string("shape: %s, reduce dimension: %s", + infer_type(layout).c_str(), domain.dimension.c_str()).c_str()); + if (!name.empty()) { + vespalib::string expr = make_string("%s(a,%s)", name.c_str(), domain.dimension.c_str()); + TEST_DO(verify_reduce_result(Expr_T(expr), input, expect)); + } + TEST_DO(verify_reduce_result(ImmediateReduce(op, domain.dimension), input, expect)); + TEST_DO(verify_reduce_result(RetainedReduce(op, domain.dimension), input, expect)); + } + { + Eval::Result expect = ImmediateReduce(op).eval(ref_engine, input); + TEST_STATE(make_string("shape: %s, reduce all dimensions", + infer_type(layout).c_str()).c_str()); + if (!name.empty()) { + vespalib::string expr = make_string("%s(a)", name.c_str()); + TEST_DO(verify_reduce_result(Expr_T(expr), input, expect)); + } + TEST_DO(verify_reduce_result(ImmediateReduce(op), input, expect)); + TEST_DO(verify_reduce_result(RetainedReduce(op), input, expect)); + } + } + } + + void test_tensor_reduce() { + TEST_DO(test_reduce_op("sum", operation::Add(), N())); + TEST_DO(test_reduce_op("", operation::Mul(), Sigmoid(N()))); + TEST_DO(test_reduce_op("", operation::Min(), N())); + TEST_DO(test_reduce_op("", operation::Max(), N())); + } + + //------------------------------------------------------------------------- + + void test_map_op(const Eval &eval, const UnaryOperation &ref_op, const Sequence &seq) { + std::vector<Layout> layouts = { + {}, + {x(3)}, + {x(3),y(5)}, + {x(3),y(5),z(7)}, + {x({"a","b","c"})}, + {x({"a","b","c"}),y({"foo","bar"})}, + {x({"a","b","c"}),y({"foo","bar"}),z({"i","j","k","l"})} + }; + if (mixed(2)) { + layouts.push_back({x(3),y({"foo", "bar"}),z(7)}); + layouts.push_back({x({"a","b","c"}),y(5),z({"i","j","k","l"})}); + } + for (const Layout &layout: layouts) { + EXPECT_EQUAL(eval.eval(engine, spec(layout, seq)).tensor(), spec(layout, OpSeq(seq, ref_op))); + } + } + + void test_map_op(const vespalib::string &expr, const UnaryOperation &op, const Sequence &seq) { + TEST_DO(test_map_op(ImmediateMap(op), op, seq)); + TEST_DO(test_map_op(RetainedMap(op), op, seq)); + TEST_DO(test_map_op(Expr_T(expr), op, seq)); + } + + void test_tensor_map() { + TEST_DO(test_map_op("-a", operation::Neg(), Sub2(Div10(N())))); + TEST_DO(test_map_op("!a", operation::Not(), Mask2Seq(SkipNth(3)))); + TEST_DO(test_map_op("cos(a)", operation::Cos(), Div10(N()))); + TEST_DO(test_map_op("sin(a)", operation::Sin(), Div10(N()))); + TEST_DO(test_map_op("tan(a)", operation::Tan(), Div10(N()))); + TEST_DO(test_map_op("cosh(a)", operation::Cosh(), Div10(N()))); + TEST_DO(test_map_op("sinh(a)", operation::Sinh(), Div10(N()))); + TEST_DO(test_map_op("tanh(a)", operation::Tanh(), Div10(N()))); + TEST_DO(test_map_op("acos(a)", operation::Acos(), Sigmoid(Div10(N())))); + TEST_DO(test_map_op("asin(a)", operation::Asin(), Sigmoid(Div10(N())))); + TEST_DO(test_map_op("atan(a)", operation::Atan(), Div10(N()))); + TEST_DO(test_map_op("exp(a)", operation::Exp(), Div10(N()))); + TEST_DO(test_map_op("log10(a)", operation::Log10(), Div10(N()))); + TEST_DO(test_map_op("log(a)", operation::Log(), Div10(N()))); + TEST_DO(test_map_op("sqrt(a)", operation::Sqrt(), Div10(N()))); + TEST_DO(test_map_op("ceil(a)", operation::Ceil(), Div10(N()))); + TEST_DO(test_map_op("fabs(a)", operation::Fabs(), Div10(N()))); + TEST_DO(test_map_op("floor(a)", operation::Floor(), Div10(N()))); + TEST_DO(test_map_op("isNan(a)", operation::IsNan(), Mask2Seq(SkipNth(3), 1.0, my_nan))); + TEST_DO(test_map_op("relu(a)", operation::Relu(), Sub2(Div10(N())))); + TEST_DO(test_map_op("sigmoid(a)", operation::Sigmoid(), Sub2(Div10(N())))); + TEST_DO(test_map_op("(a+1)*2", MyOp(), Div10(N()))); + } + + //------------------------------------------------------------------------- + + void test_apply_op(const Eval &eval, + const TensorSpec &expect, + const TensorSpec &lhs, + const TensorSpec &rhs) { + EXPECT_EQUAL(safe(eval).eval(engine, lhs, rhs).tensor(), expect); + } + + void test_fixed_sparse_cases_apply_op(const Eval &eval, + const BinaryOperation &op) + { + TEST_DO(test_apply_op(eval, + spec("x{}", {}), + spec("x{}", { { {{"x","1"}}, 3 } }), + spec("x{}", { { {{"x","2"}}, 5 } }))); + TEST_DO(test_apply_op(eval, + spec("x{}", { { {{"x","1"}}, op.eval(3,5) } }), + spec("x{}", { { {{"x","1"}}, 3 } }), + spec("x{}", { { {{"x","1"}}, 5 } }))); + TEST_DO(test_apply_op(eval, + spec("x{}", { { {{"x","1"}}, op.eval(3,-5) } }), + spec("x{}", { { {{"x","1"}}, 3 } }), + spec("x{}", { { {{"x","1"}}, -5 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},y{},z{}", + { { {{"x","-"},{"y","2"},{"z","-"}}, + op.eval(5,7) }, + { {{"x","1"},{"y","-"},{"z","3"}}, + op.eval(3,11) } }), + spec("x{},y{}", + { { {{"x","-"},{"y","2"}}, 5 }, + { {{"x","1"},{"y","-"}}, 3 } }), + spec("y{},z{}", + { { {{"y","-"},{"z","3"}}, 11 }, + { {{"y","2"},{"z","-"}}, 7 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},y{},z{}", + { { {{"x","-"},{"y","2"},{"z","-"}}, + op.eval(7,5) }, + { {{"x","1"},{"y","-"},{"z","3"}}, + op.eval(11,3) } }), + spec("y{},z{}", + { { {{"y","-"},{"z","3"}}, 11 }, + { {{"y","2"},{"z","-"}}, 7 } }), + spec("x{},y{}", + { { {{"x","-"},{"y","2"}}, 5 }, + { {{"x","1"},{"y","-"}}, 3 } }))); + TEST_DO(test_apply_op(eval, + spec("y{},z{}", + { { {{"y","2"},{"z","-"}}, + op.eval(5,7) } }), + spec("y{}", { { {{"y","2"}}, 5 } }), + spec("y{},z{}", + { { {{"y","-"},{"z","3"}}, 11 }, + { {{"y","2"},{"z","-"}}, 7 } }))); + TEST_DO(test_apply_op(eval, + spec("y{},z{}", + { { {{"y","2"},{"z","-"}}, + op.eval(7,5) } }), + spec("y{},z{}", + { { {{"y","-"},{"z","3"}}, 11 }, + { {{"y","2"},{"z","-"}}, 7 } }), + spec("y{}", { { {{"y","2"}}, 5 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},y{}", + { { {{"x","-"},{"y","2"}}, + op.eval(5,7) } }), + spec("x{},y{}", + { { {{"x","-"},{"y","2"}}, 5 }, + { {{"x","1"},{"y","-"}}, 3 } }), + spec("y{}", { { {{"y","2"}}, 7 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},y{}", + { { {{"x","-"},{"y","2"}}, + op.eval(7,5) } }), + spec("y{}", { { {{"y","2"}}, 7 } }), + spec("x{},y{}", + { { {{"x","-"},{"y","2"}}, 5 }, + { {{"x","1"},{"y","-"}}, 3 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},z{}", + { { {{"x","1"},{"z","3"}}, + op.eval(3,11) } }), + spec("x{}", { { {{"x","1"}}, 3 } }), + spec("z{}", { { {{"z","3"}}, 11 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},z{}", + { { {{"x","1"},{"z","3"}}, + op.eval(11,3) } }), + spec("z{}",{ { {{"z","3"}}, 11 } }), + spec("x{}",{ { {{"x","1"}}, 3 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},y{}", + { { {{"x","1"},{"y","1"}}, + op.eval(3,5) }, + { {{"x","2"},{"y","1"}}, + op.eval(7,5) } }), + spec("x{}", + { { {{"x","1"}}, 3 }, + { {{"x","2"}}, 7 } }), + spec("y{}", + { { {{"y","1"}}, 5 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},y{},z{}", + { { {{"x","1"},{"y","1"},{"z","1"}}, + op.eval(1,7) }, + { {{"x","1"},{"y","1"},{"z","2"}}, + op.eval(1,13) }, + { {{"x","1"},{"y","2"},{"z","1"}}, + op.eval(5,11) }, + { {{"x","2"},{"y","1"},{"z","1"}}, + op.eval(3,7) }, + { {{"x","2"},{"y","1"},{"z","2"}}, + op.eval(3,13) } }), + spec("x{},y{}", + { { {{"x","1"},{"y","1"}}, 1 }, + { {{"x","1"},{"y","2"}}, 5 }, + { {{"x","2"},{"y","1"}}, 3 } }), + spec("y{},z{}", + { { {{"y","1"},{"z","1"}}, 7 }, + { {{"y","1"},{"z","2"}}, 13 }, + { {{"y","2"},{"z","1"}}, 11 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},y{},z{}", + { { {{"x","1"},{"y","1"},{"z","1"}}, + op.eval(1,7) } }), + spec("x{},y{}", + { { {{"x","1"},{"y","-"}}, 5 }, + { {{"x","1"},{"y","1"}}, 1 } }), + spec("y{},z{}", + { { {{"y","1"},{"z","1"}}, 7 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},y{},z{}", + { { {{"x","1"},{"y","-"},{"z","1"}}, + op.eval(5,11) }, + { {{"x","1"},{"y","1"},{"z","1"}}, + op.eval(1,7) } }), + spec("x{},y{}", + { { {{"x","1"},{"y","-"}}, 5 }, + { {{"x","1"},{"y","1"}}, 1 } }), + spec("y{},z{}", + { { {{"y","-"},{"z","1"}}, 11 }, + { {{"y","1"},{"z","1"}}, 7 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},y{},z{}", + { { {{"x","1"},{"y","1"},{"z","1"}}, + op.eval(1,7) } }), + spec("x{},y{}", + { { {{"x","-"},{"y","-"}}, 5 }, + { {{"x","1"},{"y","1"}}, 1 } }), + spec("y{},z{}", + { { {{"y","1"},{"z","1"}}, 7 } }))); + TEST_DO(test_apply_op(eval, + spec("x{},y{},z{}", + { { {{"x","-"},{"y","-"},{"z", "-"}}, + op.eval(5,11) }, + { {{"x","1"},{"y","1"},{"z","1"}}, + op.eval(1,7) } }), + spec("x{},y{}", + { { {{"x","-"},{"y","-"}}, 5 }, + { {{"x","1"},{"y","1"}}, 1 } }), + spec("y{},z{}", + { { {{"y","-"},{"z","-"}}, 11 }, + { {{"y","1"},{"z","1"}}, 7 } }))); + } + + void test_fixed_dense_cases_apply_op(const Eval &eval, + const BinaryOperation &op) + { + TEST_DO(test_apply_op(eval, + spec(op.eval(0,0)), spec(0.0), spec(0.0))); + TEST_DO(test_apply_op(eval, + spec(x(1), Seq({ op.eval(3,5) })), + spec(x(1), Seq({ 3 })), + spec(x(1), Seq({ 5 })))); + TEST_DO(test_apply_op(eval, + spec(x(1), Seq({ op.eval(3,-5) })), + spec(x(1), Seq({ 3 })), + spec(x(1), Seq({ -5 })))); + TEST_DO(test_apply_op(eval, + spec(x(2), Seq({ op.eval(3,7), op.eval(5,11) })), + spec(x(2), Seq({ 3, 5 })), + spec(x(2), Seq({ 7, 11 })))); + TEST_DO(test_apply_op(eval, + spec({x(1),y(1)}, Seq({ op.eval(3,5) })), + spec({x(1),y(1)}, Seq({ 3 })), + spec({x(1),y(1)}, Seq({ 5 })))); + TEST_DO(test_apply_op(eval, + spec(x(1), Seq({ op.eval(3, 0) })), + spec(x(1), Seq({ 3 })), + spec(x(2), Seq({ 0, 7 })))); + TEST_DO(test_apply_op(eval, + spec(x(1), Seq({ op.eval(0, 5) })), + spec(x(2), Seq({ 0, 3 })), + spec(x(1), Seq({ 5 })))); + TEST_DO(test_apply_op(eval, + spec({x(2),y(2),z(2)}, + Seq({ op.eval(1, 7), op.eval(1, 11), + op.eval(2, 13), op.eval(2, 17), + op.eval(3, 7), op.eval(3, 11), + op.eval(5, 13), op.eval(5, 17) + })), + spec({x(2),y(2)}, + Seq({ 1, 2, + 3, 5 })), + spec({y(2),z(2)}, + Seq({ 7, 11, + 13, 17 })))); + } + + void test_apply_op(const Eval &eval, const BinaryOperation &op, const Sequence &seq) { + std::vector<Layout> layouts = { + {}, {}, + {x(5)}, {x(5)}, + {x(5)}, {x(3)}, + {x(5)}, {y(5)}, + {x(5)}, {x(5),y(5)}, + {x(3),y(5)}, {x(4),y(4)}, + {x(3),y(5)}, {y(5),z(7)}, + {x({"a","b","c"})}, {x({"a","b","c"})}, + {x({"a","b","c"})}, {x({"a","b"})}, + {x({"a","b","c"})}, {y({"foo","bar","baz"})}, + {x({"a","b","c"})}, {x({"a","b","c"}),y({"foo","bar","baz"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, {x({"a","b","c"}),y({"foo","bar"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, {y({"foo","bar"}),z({"i","j","k","l"})} + }; + if (mixed(2)) { + layouts.push_back({x(3),y({"foo", "bar"})}); + layouts.push_back({y({"foo", "bar"}),z(7)}); + layouts.push_back({x({"a","b","c"}),y(5)}); + layouts.push_back({y(5),z({"i","j","k","l"})}); + } + ASSERT_TRUE((layouts.size() % 2) == 0); + for (size_t i = 0; i < layouts.size(); i += 2) { + TensorSpec lhs_input = spec(layouts[i], seq); + TensorSpec rhs_input = spec(layouts[i + 1], seq); + TEST_STATE(make_string("lhs shape: %s, rhs shape: %s", + lhs_input.type().c_str(), + rhs_input.type().c_str()).c_str()); + TensorSpec expect = ImmediateApply(op).eval(ref_engine, lhs_input, rhs_input).tensor(); + EXPECT_EQUAL(safe(eval).eval(engine, lhs_input, rhs_input).tensor(), expect); + } + TEST_DO(test_fixed_sparse_cases_apply_op(eval, op)); + TEST_DO(test_fixed_dense_cases_apply_op(eval, op)); + } + + void test_apply_op(const vespalib::string &expr, const BinaryOperation &op, const Sequence &seq) { + TEST_DO(test_apply_op(ImmediateApply(op), op, seq)); + TEST_DO(test_apply_op(RetainedApply(op), op, seq)); + TEST_DO(test_apply_op(Expr_TT(expr), op, seq)); + } + + void test_tensor_apply() { + TEST_DO(test_apply_op("a+b", operation::Add(), Div10(N()))); + TEST_DO(test_apply_op("a-b", operation::Sub(), Div10(N()))); + TEST_DO(test_apply_op("a*b", operation::Mul(), Div10(N()))); + TEST_DO(test_apply_op("a/b", operation::Div(), Div10(N()))); + TEST_DO(test_apply_op("a^b", operation::Pow(), Div10(N()))); + TEST_DO(test_apply_op("pow(a,b)", operation::Pow(), Div10(N()))); + TEST_DO(test_apply_op("a==b", operation::Equal(), Div10(N()))); + TEST_DO(test_apply_op("a!=b", operation::NotEqual(), Div10(N()))); + TEST_DO(test_apply_op("a~=b", operation::Approx(), Div10(N()))); + TEST_DO(test_apply_op("a<b", operation::Less(), Div10(N()))); + TEST_DO(test_apply_op("a<=b", operation::LessEqual(), Div10(N()))); + TEST_DO(test_apply_op("a>b", operation::Greater(), Div10(N()))); + TEST_DO(test_apply_op("a>=b", operation::GreaterEqual(), Div10(N()))); + TEST_DO(test_apply_op("a&&b", operation::And(), Mask2Seq(SkipNth(3)))); + TEST_DO(test_apply_op("a||b", operation::Or(), Mask2Seq(SkipNth(3)))); + TEST_DO(test_apply_op("atan2(a,b)", operation::Atan2(), Div10(N()))); + TEST_DO(test_apply_op("ldexp(a,b)", operation::Ldexp(), Div10(N()))); + TEST_DO(test_apply_op("fmod(a,b)", operation::Fmod(), Div10(N()))); + TEST_DO(test_apply_op("min(a,b)", operation::Min(), Div10(N()))); + TEST_DO(test_apply_op("max(a,b)", operation::Max(), Div10(N()))); + } + + //------------------------------------------------------------------------- + + void test_dot_product(double expect, + const TensorSpec &lhs, + const TensorSpec &rhs) + { + Expr_TT eval("sum(a*b)"); + EXPECT_EQUAL(expect, safe(eval).eval(engine, lhs, rhs).number()); + } + + void test_dot_product() { + TEST_DO(test_dot_product(((2 * 7) + (3 * 11) + (5 * 13)), + spec(x(3), Seq({ 2, 3, 5 })), + spec(x(3), Seq({ 7, 11, 13 })))); + TEST_DO(test_dot_product(((2 * 7) + (3 * 11)), + spec(x(2), Seq({ 2, 3 })), + spec(x(3), Seq({ 7, 11, 13 })))); + TEST_DO(test_dot_product(((2 * 7) + (3 * 11)), + spec(x(3), Seq({ 2, 3, 5 })), + spec(x(2), Seq({ 7, 11 })))); + } + + //------------------------------------------------------------------------- + + void test_concat(const TensorSpec &expect, + const TensorSpec &a, + const TensorSpec &b, + const vespalib::string &dimension) + { + ImmediateConcat eval(dimension); + EXPECT_EQUAL(eval.eval(engine, a, b).tensor(), expect); + } + + void test_concat() { + TEST_DO(test_concat(spec(x(2), Seq({10.0, 20.0})), spec(10.0), spec(20.0), "x")); + TEST_DO(test_concat(spec(x(2), Seq({10.0, 20.0})), spec(x(1), Seq({10.0})), spec(20.0), "x")); + TEST_DO(test_concat(spec(x(2), Seq({10.0, 20.0})), spec(10.0), spec(x(1), Seq({20.0})), "x")); + TEST_DO(test_concat(spec(x(5), Seq({1.0, 2.0, 3.0, 4.0, 5.0})), + spec(x(3), Seq({1.0, 2.0, 3.0})), + spec(x(2), Seq({4.0, 5.0})), "x")); + TEST_DO(test_concat(spec({x(2),y(4)}, Seq({1.0, 2.0, 5.0, 6.0, 3.0, 4.0, 5.0, 6.0})), + spec({x(2),y(2)}, Seq({1.0, 2.0, 3.0, 4.0})), + spec(y(2), Seq({5.0, 6.0})), "y")); + TEST_DO(test_concat(spec({x(4),y(2)}, Seq({1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 6.0})), + spec({x(2),y(2)}, Seq({1.0, 2.0, 3.0, 4.0})), + spec(x(2), Seq({5.0, 6.0})), "x")); + TEST_DO(test_concat(spec({x(2),y(2),z(3)}, Seq({1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0})), + spec(z(3), Seq({1.0, 2.0, 3.0})), + spec(y(2), Seq({4.0, 5.0})), "x")); + TEST_DO(test_concat(spec({x(2), y(2)}, Seq({1.0, 2.0, 4.0, 5.0})), + spec(y(3), Seq({1.0, 2.0, 3.0})), + spec(y(2), Seq({4.0, 5.0})), "x")); + } + + //------------------------------------------------------------------------- + + void test_rename(const TensorSpec &expect, + const TensorSpec &input, + const std::vector<vespalib::string> &from, + const std::vector<vespalib::string> &to) + { + ImmediateRename eval(from, to); + EXPECT_EQUAL(eval.eval(engine, input).tensor(), expect); + } + + void test_rename() { + TEST_DO(test_rename(spec(y(5), N()), spec(x(5), N()), {"x"}, {"y"})); + TEST_DO(test_rename(spec({x(5),z(5)}, N()), spec({y(5),z(5)}, N()), {"y"}, {"x"})); + TEST_DO(test_rename(spec({y(5),x(5)}, N()), spec({y(5),z(5)}, N()), {"z"}, {"x"})); + TEST_DO(test_rename(spec({z(5),y(5)}, N()), spec({x(5),y(5)}, N()), {"x"}, {"z"})); + TEST_DO(test_rename(spec({x(5),z(5)}, N()), spec({x(5),y(5)}, N()), {"y"}, {"z"})); + TEST_DO(test_rename(spec({y(5),x(5)}, N()), spec({x(5),y(5)}, N()), {"x","y"}, {"y","x"})); + } + + //------------------------------------------------------------------------- + + void run_tests() { + TEST_DO(test_tensor_create_type()); + TEST_DO(test_tensor_equality()); + TEST_DO(test_tensor_inequality()); + TEST_DO(test_tensor_reduce()); + TEST_DO(test_tensor_map()); + TEST_DO(test_tensor_apply()); + TEST_DO(test_dot_product()); + TEST_DO(test_concat()); + TEST_DO(test_rename()); + } +}; + +} // namespace vespalib::eval::test::<unnamed> + +void +TensorConformance::run_tests(const TensorEngine &engine, bool test_mixed_cases) +{ + TestContext ctx(engine, test_mixed_cases); + ctx.run_tests(); + if (ctx.skip_count > 0) { + fprintf(stderr, "WARNING: skipped %zu mixed test cases\n", ctx.skip_count); + } +} + +} // namespace vespalib::eval::test +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/test/tensor_conformance.h b/eval/src/vespa/eval/eval/test/tensor_conformance.h new file mode 100644 index 00000000000..ed1ff618f49 --- /dev/null +++ b/eval/src/vespa/eval/eval/test/tensor_conformance.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/eval/tensor_engine.h> + +namespace vespalib { +namespace eval { +namespace test { + +/** + * A collection of tensor-related tests that can be run for various + * implementations of the TensorEngine interface. + **/ +struct TensorConformance { + static void run_tests(const TensorEngine &engine, bool test_mixed_cases); +}; + +} // namespace vespalib::eval::test +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/value.cpp b/eval/src/vespa/eval/eval/value.cpp new file mode 100644 index 00000000000..3db6f500e10 --- /dev/null +++ b/eval/src/vespa/eval/eval/value.cpp @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "value.h" +#include "operation_visitor.h" +#include "tensor_engine.h" + +namespace vespalib { +namespace eval { + +const Value & +Value::apply(const UnaryOperation &, Stash &stash) const +{ + return stash.create<ErrorValue>(); +} + +const Value & +Value::apply(const BinaryOperation &, const Value &, Stash &stash) const +{ + return stash.create<ErrorValue>(); +} + +bool +TensorValue::equal(const Value &rhs) const +{ + return (rhs.is_tensor() && _tensor->engine().equal(*_tensor, *rhs.as_tensor())); +} + +const Value & +TensorValue::apply(const UnaryOperation &op, Stash &stash) const +{ + return _tensor->engine().map(op, *_tensor, stash); +} + +const Value & +TensorValue::apply(const BinaryOperation &op, const Value &rhs, Stash &stash) const +{ + const Tensor *other = rhs.as_tensor(); + if ((other == nullptr) || (&other->engine() != &_tensor->engine())) { + return stash.create<ErrorValue>(); + } + return _tensor->engine().apply(op, *_tensor, *other, stash); +} + +ValueType +TensorValue::type() const +{ + return _tensor->engine().type_of(*_tensor); +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/value.h b/eval/src/vespa/eval/eval/value.h new file mode 100644 index 00000000000..e8b682e84b5 --- /dev/null +++ b/eval/src/vespa/eval/eval/value.h @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <memory> +#include <vespa/vespalib/util/stash.h> +#include "tensor.h" +#include "value_type.h" + +namespace vespalib { +namespace eval { + +class Tensor; + +constexpr double error_value = 31212.0; + +struct UnaryOperation; +struct BinaryOperation; + +/** + * An abstract Value. Calculation using abstract values should be done + * using the perform function on the appropriate Operation. + **/ +struct Value { + typedef std::unique_ptr<Value> UP; + typedef std::reference_wrapper<const Value> CREF; + virtual bool is_error() const { return false; } + virtual bool is_double() const { return false; } + virtual bool is_tensor() const { return false; } + virtual double as_double() const { return 0.0; } + virtual bool as_bool() const { return false; } + virtual const Tensor *as_tensor() const { return nullptr; } + virtual bool equal(const Value &rhs) const = 0; + virtual const Value &apply(const UnaryOperation &op, Stash &stash) const; + virtual const Value &apply(const BinaryOperation &op, const Value &rhs, Stash &stash) const; + virtual ValueType type() const = 0; + virtual ~Value() {} +}; + +struct ErrorValue : public Value { + virtual bool is_error() const override { return true; } + virtual double as_double() const { return error_value; } + virtual bool equal(const Value &) const override { return false; } + ValueType type() const override { return ValueType::error_type(); } +}; + +class DoubleValue : public Value +{ +private: + double _value; +public: + DoubleValue(double value) : _value(value) {} + bool is_double() const override { return true; } + double as_double() const override { return _value; } + bool as_bool() const override { return (_value != 0.0); } + bool equal(const Value &rhs) const override { + return (rhs.is_double() && (_value == rhs.as_double())); + } + ValueType type() const override { return ValueType::double_type(); } +}; + +class TensorValue : public Value +{ +private: + const Tensor *_tensor; + std::unique_ptr<Tensor> _stored; +public: + TensorValue(const Tensor &value) : _tensor(&value), _stored() {} + TensorValue(std::unique_ptr<Tensor> value) : _tensor(value.get()), _stored(std::move(value)) {} + bool is_tensor() const override { return true; } + const Tensor *as_tensor() const override { return _tensor; } + bool equal(const Value &rhs) const override; + const Value &apply(const UnaryOperation &op, Stash &stash) const override; + const Value &apply(const BinaryOperation &op, const Value &rhs, Stash &stash) const override; + ValueType type() const override; +}; + +} // namespace vespalib::eval +} // namespace vespalib + +VESPA_CAN_SKIP_DESTRUCTION(::vespalib::eval::DoubleValue); diff --git a/eval/src/vespa/eval/eval/value_cache/CMakeLists.txt b/eval/src/vespa/eval/eval/value_cache/CMakeLists.txt new file mode 100644 index 00000000000..62cb89a9d0e --- /dev/null +++ b/eval/src/vespa/eval/eval/value_cache/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(vespalib_vespalib_eval_value_cache OBJECT + SOURCES + constant_value_cache.cpp + constant_tensor_loader.cpp + DEPENDS +) diff --git a/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp new file mode 100644 index 00000000000..eedccd3a33e --- /dev/null +++ b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> + +#include "constant_tensor_loader.h" +#include <set> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/eval/tensor.h> +#include <vespa/vespalib/eval/tensor_engine.h> +#include <vespa/vespalib/eval/tensor_spec.h> + +LOG_SETUP(".vespalib.eval.value_cache.constant_tensor_loader"); + +namespace vespalib { +namespace eval { + +using Memory = slime::Memory; +using Inspector = slime::Inspector; +using ObjectTraverser = slime::ObjectTraverser; + +namespace { + +struct File { + int file; + char *data; + size_t size; + File(const std::string &file_name) : file(open(file_name.c_str(), O_RDONLY)), data((char*)MAP_FAILED), size(0) { + struct stat info; + if ((file != -1) && (fstat(file, &info) == 0)) { + data = (char*)mmap(0, info.st_size, PROT_READ, MAP_SHARED, file, 0); + if (data != MAP_FAILED) { + size = info.st_size; + } + } + } + bool valid() const { return (data != MAP_FAILED); } + ~File() { + if (valid()) { + munmap(data, size); + } + if (file != -1) { + close(file); + } + } +}; + +struct AddressExtractor : ObjectTraverser { + const std::set<vespalib::string> &indexed; + TensorSpec::Address &address; + AddressExtractor(const std::set<vespalib::string> &indexed_in, + TensorSpec::Address &address_out) + : indexed(indexed_in), address(address_out) {} + void field(const Memory &symbol, const Inspector &inspector) override { + vespalib::string dimension = symbol.make_string(); + vespalib::string label = inspector.asString().make_string(); + if (dimension.empty() || label.empty()) { + return; + } + if (indexed.find(dimension) == indexed.end()) { + address.emplace(dimension, TensorSpec::Label(label)); + } else { + size_t index = strtoull(label.c_str(), nullptr, 10); + address.emplace(dimension, TensorSpec::Label(index)); + } + } +}; + +} // namespace vespalib::eval::<unnamed> + +using ErrorConstant = SimpleConstantValue<ErrorValue>; +using TensorConstant = SimpleConstantValue<TensorValue>; + +ConstantValue::UP +ConstantTensorLoader::create(const vespalib::string &path, const vespalib::string &type) const +{ + ValueType value_type = ValueType::from_spec(type); + if (value_type.is_error()) { + LOG(warning, "invalid type specification: %s", type.c_str()); + auto tensor = _engine.create(TensorSpec("double")); + return std::make_unique<TensorConstant>(_engine.type_of(*tensor), std::move(tensor)); + } + Slime slime; + File file(path); + if (!file.valid()) { + LOG(warning, "could not read file: %s", path.c_str()); + } else if (slime::JsonFormat::decode(Memory(file.data, file.size), slime) == 0) { + LOG(warning, "file contains invalid json: %s", path.c_str()); + } + std::set<vespalib::string> indexed; + for (const auto &dimension: value_type.dimensions()) { + if (dimension.is_indexed()) { + indexed.insert(dimension.name); + } + } + TensorSpec spec(type); + const Inspector &cells = slime.get()["cells"]; + for (size_t i = 0; i < cells.entries(); ++i) { + TensorSpec::Address address; + AddressExtractor extractor(indexed, address); + cells[i]["address"].traverse(extractor); + spec.add(address, cells[i]["value"].asDouble()); + } + auto tensor = _engine.create(spec); + return std::make_unique<TensorConstant>(_engine.type_of(*tensor), std::move(tensor)); +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.h b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.h new file mode 100644 index 00000000000..66fb3fad882 --- /dev/null +++ b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.h @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "constant_value.h" +#include <vespa/vespalib/eval/tensor_engine.h> +#include <vespa/vespalib/stllike/string.h> + +namespace vespalib { +namespace eval { + +/** + * A ConstantValueFactory that will load constant tensor values from + * file. The file is expected to be in json format with the same + * structure used when feeding. The tensor is created by first + * building a generic TensorSpec object and then converting it to a + * specific tensor using the TensorEngine interface. + **/ +class ConstantTensorLoader : public ConstantValueFactory +{ +private: + const TensorEngine &_engine; +public: + ConstantTensorLoader(const TensorEngine &engine) : _engine(engine) {} + ConstantValue::UP create(const vespalib::string &path, const vespalib::string &type) const override; +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/value_cache/constant_value.h b/eval/src/vespa/eval/eval/value_cache/constant_value.h new file mode 100644 index 00000000000..570276a50ab --- /dev/null +++ b/eval/src/vespa/eval/eval/value_cache/constant_value.h @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <memory> +#include <vespa/vespalib/eval/value.h> +#include <vespa/vespalib/eval/value_type.h> + +namespace vespalib { +namespace eval { + +/** + * Abstract wrapper of a typed constant value. The lifetime of the + * wrapper controls the lifetime of the underlying type and value as + * well. + **/ +struct ConstantValue { + virtual const ValueType &type() const = 0; + virtual const Value &value() const = 0; + using UP = std::unique_ptr<ConstantValue>; + virtual ~ConstantValue() {} +}; + +/** + * A simple implementation of a constant value that bundles together a + * ValueType instance with a specific Value subclass instance. + **/ +template <typename VALUE> +struct SimpleConstantValue : ConstantValue { + ValueType my_type; + VALUE my_value; + template <typename... Args> + SimpleConstantValue(const ValueType &type_in, Args &&...args) + : my_type(type_in), my_value(std::forward<Args>(args)...) {} + const ValueType &type() const override { return my_type; } + const Value &value() const override { return my_value; } +}; + +/** + * An abstract factory of constant values. The typical use-case for + * this will be to load constant values from file with a cache on top + * to share constants among users. + **/ +struct ConstantValueFactory { + virtual ConstantValue::UP create(const vespalib::string &path, const vespalib::string &type) const = 0; + virtual ~ConstantValueFactory() {} +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/value_cache/constant_value_cache.cpp b/eval/src/vespa/eval/eval/value_cache/constant_value_cache.cpp new file mode 100644 index 00000000000..fdda42fd0a5 --- /dev/null +++ b/eval/src/vespa/eval/eval/value_cache/constant_value_cache.cpp @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "constant_value_cache.h" + +namespace vespalib { +namespace eval { + +ConstantValueCache::Token::~Token() +{ + std::lock_guard<std::mutex> guard(cache->lock); + if (--(entry->second.num_refs) == 0) { + cache->cached.erase(entry); + } +} + +ConstantValueCache::ConstantValueCache(const ConstantValueFactory &factory) + : _factory(factory), + _cache(std::make_shared<Cache>()) +{ +} + +ConstantValue::UP +ConstantValueCache::create(const vespalib::string &path, const vespalib::string &type) const +{ + Cache::Key key = std::make_pair(path, type); + std::lock_guard<std::mutex> guard(_cache->lock); + auto pos = _cache->cached.find(key); + if (pos != _cache->cached.end()) { + ++(pos->second.num_refs); + return std::make_unique<Token>(_cache, pos); + } else { + auto res = _cache->cached.emplace(std::move(key), _factory.create(path, type)); + assert(res.second); + return std::make_unique<Token>(_cache, res.first); + } +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/value_cache/constant_value_cache.h b/eval/src/vespa/eval/eval/value_cache/constant_value_cache.h new file mode 100644 index 00000000000..d025aaf713f --- /dev/null +++ b/eval/src/vespa/eval/eval/value_cache/constant_value_cache.h @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "constant_value.h" + +#include <map> +#include <memory> +#include <mutex> + +namespace vespalib { +namespace eval { + +/** + * A cache enabling clients to share the constant values created by an + * underlying factory. The returned wrappers are used to ensure + * appropriate lifetime of created values. Used values are kept in the + * cache and unused values are evicted from the cache. + **/ +class ConstantValueCache : public ConstantValueFactory +{ +private: + struct Cache { + using SP = std::shared_ptr<Cache>; + using Key = std::pair<vespalib::string, vespalib::string>; + struct Value { + size_t num_refs; + ConstantValue::UP const_value; + Value(ConstantValue::UP const_value_in) + : num_refs(1), const_value(std::move(const_value_in)) {} + }; + using Map = std::map<Key,Value>; + std::mutex lock; + Map cached; + }; + + struct Token : ConstantValue { + Cache::SP cache; + Cache::Map::iterator entry; + Token(Cache::SP cache_in, Cache::Map::iterator entry_in) + : cache(std::move(cache_in)), entry(entry_in) {} + const ValueType &type() const override { return entry->second.const_value->type(); } + const Value &value() const override { return entry->second.const_value->value(); } + ~Token(); + }; + + const ConstantValueFactory &_factory; + Cache::SP _cache; + +public: + ConstantValueCache(const ConstantValueFactory &factory); + ConstantValue::UP create(const vespalib::string &path, const vespalib::string &type) const override; +}; + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/value_type.cpp b/eval/src/vespa/eval/eval/value_type.cpp new file mode 100644 index 00000000000..a038ee46583 --- /dev/null +++ b/eval/src/vespa/eval/eval/value_type.cpp @@ -0,0 +1,275 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "value_type.h" +#include "value_type_spec.h" + +namespace vespalib { +namespace eval { + +namespace { + +using Dimension = ValueType::Dimension; +using DimensionList = std::vector<Dimension>; + +size_t my_dimension_index(const std::vector<Dimension> &list, const vespalib::string &name) { + for (size_t idx = 0; idx < list.size(); ++idx) { + if (list[idx].name == name) { + return idx; + } + } + return ValueType::Dimension::npos; +} + +Dimension *find_dimension(std::vector<Dimension> &list, const vespalib::string &name) { + size_t idx = my_dimension_index(list, name); + return (idx != ValueType::Dimension::npos) ? &list[idx] : nullptr; +} + +const Dimension *find_dimension(const std::vector<Dimension> &list, const vespalib::string &name) { + size_t idx = my_dimension_index(list, name); + return (idx != ValueType::Dimension::npos) ? &list[idx] : nullptr; +} + +void sort_dimensions(DimensionList &dimensions) { + std::sort(dimensions.begin(), dimensions.end(), + [](const auto &a, const auto &b){ return (a.name < b.name); }); +} + +bool has_duplicates(const DimensionList &dimensions) { + for (size_t i = 1; i < dimensions.size(); ++i) { + if (dimensions[i - 1].name == dimensions[i].name) { + return true; + } + } + return false; +} + +struct DimensionResult { + bool mismatch; + DimensionList dimensions; + DimensionResult() : mismatch(false), dimensions() {} + void add(const Dimension &a) { + dimensions.push_back(a); + } + void unify(const Dimension &a, const Dimension &b) { + if (a.is_mapped() == b.is_mapped()) { + add(Dimension(a.name, std::min(a.size, b.size))); + } else { + mismatch = true; + } + } +}; + +DimensionResult my_join(const DimensionList &lhs, const DimensionList &rhs) { + DimensionResult result; + auto pos = rhs.begin(); + auto end = rhs.end(); + for (const Dimension &dim: lhs) { + while ((pos != end) && (pos->name < dim.name)) { + result.add(*pos++); + } + if ((pos != end) && (pos->name == dim.name)) { + result.unify(dim, *pos++); + } else { + result.add(dim); + } + } + while (pos != end) { + result.add(*pos++); + } + return result; +} + +struct Renamer { + const std::vector<vespalib::string> &from; + const std::vector<vespalib::string> &to; + size_t match_cnt; + Renamer(const std::vector<vespalib::string> &from_in, + const std::vector<vespalib::string> &to_in) + : from(from_in), to(to_in), match_cnt(0) {} + const vespalib::string &rename(const vespalib::string &name) { + for (size_t i = 0; i < from.size(); ++i) { + if (name == from[i]) { + ++match_cnt; + return to[i]; + } + } + return name; + } + bool matched_all() const { return (match_cnt == from.size()); } +}; + +} // namespace vespalib::tensor::<unnamed> + +constexpr size_t ValueType::Dimension::npos; + +ValueType::~ValueType() { } +bool +ValueType::is_sparse() const +{ + if (!is_tensor() || dimensions().empty()) { + return false; + } + for (const auto &dim : dimensions()) { + if (!dim.is_mapped()) { + return false; + } + } + return true; +} + +bool +ValueType::is_dense() const +{ + if (!is_tensor() || dimensions().empty()) { + return false; + } + for (const auto &dim : dimensions()) { + if (!dim.is_indexed()) { + return false; + } + } + return true; +} + +size_t +ValueType::dimension_index(const vespalib::string &name) const { + return my_dimension_index(_dimensions, name); +} + +std::vector<vespalib::string> +ValueType::dimension_names() const +{ + std::vector<vespalib::string> result; + for (const auto &dimension: _dimensions) { + result.push_back(dimension.name); + } + return result; +} + +ValueType +ValueType::reduce(const std::vector<vespalib::string> &dimensions_in) const +{ + if (is_error() || is_any()) { + return *this; + } else if (dimensions_in.empty()) { + return double_type(); + } else if (!is_tensor()) { + return error_type(); + } else if (_dimensions.empty()) { + return any_type(); + } + size_t removed = 0; + std::vector<Dimension> result; + for (const Dimension &d: _dimensions) { + if (std::find(dimensions_in.begin(), dimensions_in.end(), d.name) == dimensions_in.end()) { + result.push_back(d); + } else { + ++removed; + } + } + if (removed != dimensions_in.size()) { + return error_type(); + } + if (result.empty()) { + return double_type(); + } + return tensor_type(std::move(result)); +} + +ValueType +ValueType::rename(const std::vector<vespalib::string> &from, + const std::vector<vespalib::string> &to) const +{ + if (!maybe_tensor() || from.empty() || (from.size() != to.size())) { + return error_type(); + } + if (unknown_dimensions()) { + return any_type(); + } + Renamer renamer(from, to); + std::vector<Dimension> dim_list; + for (const auto &dim: _dimensions) { + dim_list.emplace_back(renamer.rename(dim.name), dim.size); + } + if (!renamer.matched_all()) { + return error_type(); + } + return tensor_type(dim_list); +} + +ValueType +ValueType::tensor_type(std::vector<Dimension> dimensions_in) +{ + sort_dimensions(dimensions_in); + if (has_duplicates(dimensions_in)) { + return error_type(); + } + return ValueType(Type::TENSOR, std::move(dimensions_in)); +} + +ValueType +ValueType::from_spec(const vespalib::string &spec) +{ + return value_type::from_spec(spec); +} + +vespalib::string +ValueType::to_spec() const +{ + return value_type::to_spec(*this); +} + +ValueType +ValueType::join(const ValueType &lhs, const ValueType &rhs) +{ + if (lhs.is_error() || rhs.is_error()) { + return error_type(); + } else if (lhs.is_double()) { + return rhs; + } else if (rhs.is_double()) { + return lhs; + } else if (lhs.unknown_dimensions() || rhs.unknown_dimensions()) { + return any_type(); + } + DimensionResult result = my_join(lhs._dimensions, rhs._dimensions); + if (result.mismatch) { + return error_type(); + } + return tensor_type(std::move(result.dimensions)); +} + +ValueType +ValueType::concat(const ValueType &lhs, const ValueType &rhs, const vespalib::string &dimension) +{ + if (lhs.is_error() || rhs.is_error()) { + return error_type(); + } else if (lhs.unknown_dimensions() || rhs.unknown_dimensions()) { + return any_type(); + } + DimensionResult result = my_join(lhs._dimensions, rhs._dimensions); + auto lhs_dim = find_dimension(lhs.dimensions(), dimension); + auto rhs_dim = find_dimension(rhs.dimensions(), dimension); + auto res_dim = find_dimension(result.dimensions, dimension); + if (result.mismatch || (res_dim && res_dim->is_mapped())) { + return error_type(); + } + if (res_dim) { + if (res_dim->is_bound()) { + res_dim->size = (lhs_dim ? lhs_dim->size : 1) + + (rhs_dim ? rhs_dim->size : 1); + } + } else { + result.dimensions.emplace_back(dimension, 2); + } + return tensor_type(std::move(result.dimensions)); +} + +std::ostream & +operator<<(std::ostream &os, const ValueType &type) { + return os << type.to_spec(); +} + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/value_type.h b/eval/src/vespa/eval/eval/value_type.h new file mode 100644 index 00000000000..f6d02336daa --- /dev/null +++ b/eval/src/vespa/eval/eval/value_type.h @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <vector> +#include <memory> + +namespace vespalib { +namespace eval { + +/** + * The type of a Value. This is used for type-resolution during + * compilation of interpreted functions using boxed polymorphic + * values. + **/ +class ValueType +{ +public: + enum class Type { ANY, ERROR, DOUBLE, TENSOR }; + struct Dimension { + static constexpr size_t npos = -1; + vespalib::string name; + size_t size; + Dimension(const vespalib::string &name_in) + : name(name_in), size(npos) {} + Dimension(const vespalib::string &name_in, size_t size_in) + : name(name_in), size(size_in) {} + bool operator==(const Dimension &rhs) const { + return ((name == rhs.name) && (size == rhs.size)); + } + bool operator!=(const Dimension &rhs) const { return !(*this == rhs); } + bool is_mapped() const { return (size == npos); } + bool is_indexed() const { return (size != npos); } + bool is_bound() const { return ((size != npos) && (size != 0)); } + }; + +private: + Type _type; + std::vector<Dimension> _dimensions; + + explicit ValueType(Type type_in) + : _type(type_in), _dimensions() {} + ValueType(Type type_in, std::vector<Dimension> &&dimensions_in) + : _type(type_in), _dimensions(std::move(dimensions_in)) {} + +public: + ~ValueType(); + Type type() const { return _type; } + bool is_any() const { return (_type == Type::ANY); } + bool is_error() const { return (_type == Type::ERROR); } + bool is_double() const { return (_type == Type::DOUBLE); } + bool is_tensor() const { return (_type == Type::TENSOR); } + bool is_sparse() const; + bool is_dense() const; + const std::vector<Dimension> &dimensions() const { return _dimensions; } + size_t dimension_index(const vespalib::string &name) const; + std::vector<vespalib::string> dimension_names() const; + bool maybe_tensor() const { return (is_any() || is_tensor()); } + bool unknown_dimensions() const { return (maybe_tensor() && _dimensions.empty()); } + bool is_abstract() const { + for (const auto &dimension: _dimensions) { + if (dimension.is_indexed() && !dimension.is_bound()) { + return true; + } + } + return (is_any() || (is_tensor() && (dimensions().empty()))); + } + bool operator==(const ValueType &rhs) const { + return ((_type == rhs._type) && (_dimensions == rhs._dimensions)); + } + bool operator!=(const ValueType &rhs) const { return !(*this == rhs); } + + ValueType reduce(const std::vector<vespalib::string> &dimensions_in) const; + ValueType rename(const std::vector<vespalib::string> &from, + const std::vector<vespalib::string> &to) const; + + static ValueType any_type() { return ValueType(Type::ANY); } + static ValueType error_type() { return ValueType(Type::ERROR); }; + static ValueType double_type() { return ValueType(Type::DOUBLE); } + static ValueType tensor_type(std::vector<Dimension> dimensions_in); + static ValueType from_spec(const vespalib::string &spec); + vespalib::string to_spec() const; + static ValueType join(const ValueType &lhs, const ValueType &rhs); + static ValueType concat(const ValueType &lhs, const ValueType &rhs, const vespalib::string &dimension); +}; + +std::ostream &operator<<(std::ostream &os, const ValueType &type); + +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/value_type_spec.cpp b/eval/src/vespa/eval/eval/value_type_spec.cpp new file mode 100644 index 00000000000..6d3aabef142 --- /dev/null +++ b/eval/src/vespa/eval/eval/value_type_spec.cpp @@ -0,0 +1,211 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "value_type.h" +#include <vespa/vespalib/stllike/asciistream.h> +#include <algorithm> +#include "value_type_spec.h" +#include <vespa/vespalib/util/stringfmt.h> + +namespace vespalib { +namespace eval { +namespace value_type { + +namespace { + +class ParseContext +{ +private: + const char *_pos; + const char *_end; + const char *&_pos_after; + char _curr; + bool _failed; + +public: + ParseContext(const char *pos, const char *end, const char *&pos_out) + : _pos(pos), _end(end), _pos_after(pos_out), _curr(0), _failed(false) + { + if (_pos < _end) { + _curr = *_pos; + } + } + ~ParseContext() { + if (!_failed) { + _pos_after = _pos; + } else { + _pos_after = nullptr; + } + } + void fail() { + _failed = true; + _curr = 0; + } + bool failed() const { return _failed; } + void next() { _curr = (_curr && (_pos < _end)) ? *(++_pos) : 0; } + char get() const { return _curr; } + bool eos() const { return !_curr; } + void eat(char c) { + if (_curr == c) { + next(); + } else { + fail(); + } + } + void skip_spaces() { + while (!eos() && isspace(_curr)) { + next(); + } + } +}; + +bool is_ident(char c, bool first) { + return ((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c == '_') || + (c >= '0' && c <= '9' && !first)); +} + +vespalib::string parse_ident(ParseContext &ctx) { + ctx.skip_spaces(); + vespalib::string ident; + if (is_ident(ctx.get(), true)) { + ident.push_back(ctx.get()); + for (ctx.next(); is_ident(ctx.get(), false); ctx.next()) { + ident.push_back(ctx.get()); + } + } + ctx.skip_spaces(); + return ident; +} + +size_t parse_int(ParseContext &ctx) { + vespalib::string num; + for (; isdigit(ctx.get()); ctx.next()) { + num.push_back(ctx.get()); + } + if (num.empty()) { + ctx.fail(); + } + return atoi(num.c_str()); +} + +ValueType::Dimension parse_dimension(ParseContext &ctx) { + ValueType::Dimension dimension(parse_ident(ctx)); + ctx.skip_spaces(); + if (ctx.get() == '{') { + ctx.next(); // '{' + ctx.skip_spaces(); + ctx.eat('}'); + } else if (ctx.get() == '[') { + ctx.next(); // '[' + ctx.skip_spaces(); + if (ctx.get() == ']') { + dimension.size = 0; + } else { + dimension.size = parse_int(ctx); + ctx.skip_spaces(); + } + ctx.eat(']'); + } else { + ctx.fail(); + } + return dimension; +} + +std::vector<ValueType::Dimension> parse_dimension_list(ParseContext &ctx) { + std::vector<ValueType::Dimension> list; + ctx.skip_spaces(); + if (ctx.get() == '(') { + ctx.eat('('); + ctx.skip_spaces(); + while (!ctx.eos() && (ctx.get() != ')')) { + if (!list.empty()) { + ctx.eat(','); + } + list.push_back(parse_dimension(ctx)); + ctx.skip_spaces(); + } + ctx.eat(')'); + } + ctx.skip_spaces(); + return list; +} + +} // namespace vespalib::eval::value_type::<anonymous> + +ValueType +parse_spec(const char *pos_in, const char *end_in, const char *&pos_out) +{ + ParseContext ctx(pos_in, end_in, pos_out); + vespalib::string type_name = parse_ident(ctx); + if (type_name == "any") { + return ValueType::any_type(); + } else if (type_name == "error") { + return ValueType::error_type(); + } else if (type_name == "double") { + return ValueType::double_type(); + } else if (type_name == "tensor") { + std::vector<ValueType::Dimension> list = parse_dimension_list(ctx); + if (!ctx.failed()) { + return ValueType::tensor_type(std::move(list)); + } + } else { + ctx.fail(); + } + return ValueType::error_type(); +} + +ValueType +from_spec(const vespalib::string &spec) +{ + const char *after = nullptr; + const char *end = spec.data() + spec.size(); + ValueType type = parse_spec(spec.data(), end, after); + if (after != end) { + return ValueType::error_type(); + } + return type; +} + +vespalib::string +to_spec(const ValueType &type) +{ + asciistream os; + size_t cnt = 0; + switch (type.type()) { + case ValueType::Type::ANY: + os << "any"; + break; + case ValueType::Type::ERROR: + os << "error"; + break; + case ValueType::Type::DOUBLE: + os << "double"; + break; + case ValueType::Type::TENSOR: + os << "tensor"; + if (!type.dimensions().empty()) { + os << "("; + for (const auto &d: type.dimensions()) { + if (cnt++ > 0) { + os << ","; + } + if (d.size == ValueType::Dimension::npos) { + os << d.name << "{}"; + } else if (d.size == 0) { + os << d.name << "[]"; + } else { + os << d.name << "[" << d.size << "]"; + } + } + os << ")"; + } + break; + } + return os.str(); +} + +} // namespace vespalib::eval::value_type +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/value_type_spec.h b/eval/src/vespa/eval/eval/value_type_spec.h new file mode 100644 index 00000000000..dedfeec929e --- /dev/null +++ b/eval/src/vespa/eval/eval/value_type_spec.h @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "value_type.h" + +namespace vespalib { +namespace eval { +namespace value_type { + +ValueType parse_spec(const char *pos_in, const char *end_in, const char *&pos_out); + +ValueType from_spec(const vespalib::string &str); +vespalib::string to_spec(const ValueType &type); + +} // namespace vespalib::eval::value_type +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/vm_forest.cpp b/eval/src/vespa/eval/eval/vm_forest.cpp new file mode 100644 index 00000000000..5c7164bab73 --- /dev/null +++ b/eval/src/vespa/eval/eval/vm_forest.cpp @@ -0,0 +1,255 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "gbdt.h" +#include "vm_forest.h" +#include <vespa/vespalib/eval/basic_nodes.h> +#include <vespa/vespalib/eval/call_nodes.h> +#include <vespa/vespalib/eval/operator_nodes.h> + +namespace vespalib { +namespace eval { +namespace gbdt { + +namespace { + +//----------------------------------------------------------------------------- + +constexpr uint32_t LEAF = 0; +constexpr uint32_t LESS = 1; +constexpr uint32_t IN = 2; + +// layout: +// +// <feature+types>: [feature ref|my type|left child type|right child type] +// bits: 20 4 4 4 +// +// LEAF: [const] +// bits: 64 +// +// LESS: [<feature+types>][const][skip] +// bits 32 64 32 +// +// IN: [<feature+types>][skip|set size](set size)X[const] +// bits 32 24 8 64 + +const double *as_double_ptr(const uint32_t *pos) { + return reinterpret_cast<const double*>(pos); +} + +bool find_in(double value, const double *set, const double *end) { + for (; set < end; ++set) { + if (value == *set) { + return true; + } + } + return false; +} + +double less_only_find_leaf(const double *input, const uint32_t *pos, uint32_t node_type) { + for (;;) { + if (input[pos[0] >> 12] < *as_double_ptr(pos + 1)) { + node_type = (pos[0] & 0xf0) >> 4; + pos += 4; + } else { + node_type = (pos[0] & 0xf); + pos += 4 + pos[3]; + } + if (node_type == LEAF) { + return *as_double_ptr(pos); + } + } +} + +double general_find_leaf(const double *input, const uint32_t *pos, uint32_t node_type) { + for (;;) { + if (node_type == LESS) { + if (input[pos[0] >> 12] < *as_double_ptr(pos + 1)) { + node_type = (pos[0] & 0xf0) >> 4; + pos += 4; + } else { + node_type = (pos[0] & 0xf); + pos += 4 + pos[3]; + } + if (node_type == LEAF) { + return *as_double_ptr(pos); + } + } else { + if (find_in(input[pos[0] >> 12], as_double_ptr(pos + 2), + as_double_ptr(pos + 2 + (2 * (pos[1] & 0xff))))) + { + node_type = (pos[0] & 0xf0) >> 4; + pos += 2 + (2 * (pos[1] & 0xff)); + } else { + node_type = (pos[0] & 0xf); + pos += (2 + (2 * (pos[1] & 0xff))) + (pos[1] >> 8); + } + if (node_type == LEAF) { + return *as_double_ptr(pos); + } + } + } +} + +//----------------------------------------------------------------------------- + +void encode_const(double value, std::vector<uint32_t> &model_out) { + union { + double d[1]; + uint32_t i[2]; + } buf; + assert(sizeof(buf) == sizeof(double)); + buf.d[0] = value; + model_out.push_back(buf.i[0]); + model_out.push_back(buf.i[1]); +} + +uint32_t encode_node(const nodes::Node &node_in, std::vector<uint32_t> &model_out); + +void encode_less(const nodes::Less &less, + const nodes::Node &left_child, const nodes::Node &right_child, + std::vector<uint32_t> &model_out) +{ + size_t meta_idx = model_out.size(); + auto symbol = nodes::as<nodes::Symbol>(less.lhs()); + assert(symbol && (symbol->id() >= 0)); + model_out.push_back(uint32_t(symbol->id()) << 12); + assert(less.rhs().is_const()); + encode_const(less.rhs().get_const_value(), model_out); + size_t skip_idx = model_out.size(); + model_out.push_back(0); // left child size placeholder + uint32_t left_type = encode_node(left_child, model_out); + model_out[skip_idx] = (model_out.size() - (skip_idx + 1)); + uint32_t right_type = encode_node(right_child, model_out); + model_out[meta_idx] |= ((LESS << 8) | (left_type << 4) | right_type); +} + +void encode_in(const nodes::In &in, + const nodes::Node &left_child, const nodes::Node &right_child, + std::vector<uint32_t> &model_out) +{ + size_t meta_idx = model_out.size(); + auto symbol = nodes::as<nodes::Symbol>(in.lhs()); + assert(symbol && (symbol->id() >= 0)); + model_out.push_back(uint32_t(symbol->id()) << 12); + assert(in.rhs().is_const()); + auto array = nodes::as<nodes::Array>(in.rhs()); + size_t set_size_idx = model_out.size(); + if (array) { + model_out.push_back(array->size()); + for (size_t i = 0; i < array->size(); ++i) { + encode_const(array->get(i).get_const_value(), model_out); + } + } else { + model_out.push_back(1); + encode_const(in.rhs().get_const_value(), model_out); + } + size_t left_idx = model_out.size(); + uint32_t left_type = encode_node(left_child, model_out); + model_out[set_size_idx] |= (model_out.size() - left_idx) << 8; + uint32_t right_type = encode_node(right_child, model_out); + model_out[meta_idx] |= ((IN << 8) | (left_type << 4) | right_type); +} + +uint32_t encode_node(const nodes::Node &node_in, std::vector<uint32_t> &model_out) { + auto if_node = nodes::as<nodes::If>(node_in); + if (if_node) { + auto less = nodes::as<nodes::Less>(if_node->cond()); + auto in = nodes::as<nodes::In>(if_node->cond()); + if (less) { + encode_less(*less, if_node->true_expr(), if_node->false_expr(), model_out); + return LESS; + } else { + assert(in); + encode_in(*in, if_node->true_expr(), if_node->false_expr(), model_out); + return IN; + } + } else { + assert(node_in.is_const()); + encode_const(node_in.get_const_value(), model_out); + return LEAF; + } +} + +void encode_tree(const nodes::Node &root_in, std::vector<uint32_t> &model_out) { + size_t size_idx = model_out.size(); + model_out.push_back(0); // tree size placeholder + encode_node(root_in, model_out); + model_out[size_idx] = (model_out.size() - (size_idx + 1)); +} + +//----------------------------------------------------------------------------- + +Optimize::Result optimize(const std::vector<const nodes::Node *> &trees, + Forest::eval_function eval) +{ + std::vector<uint32_t> model; + for (const nodes::Node *tree: trees) { + encode_tree(*tree, model); + } + return Optimize::Result(Forest::UP(new VMForest(std::move(model))), eval); +} + +//----------------------------------------------------------------------------- + +} // namespace vespalib::eval::gbdt::<unnamed> + +//----------------------------------------------------------------------------- + +Optimize::Result +VMForest::less_only_optimize(const ForestStats &stats, + const std::vector<const nodes::Node *> &trees) +{ + if (stats.total_in_checks > 0) { + return Optimize::Result(); + } + return optimize(trees, less_only_eval); +} + +double +VMForest::less_only_eval(const Forest *forest, const double *input) +{ + const VMForest &self = *((const VMForest *)forest); + const uint32_t *pos = &self._model[0]; + const uint32_t *end = pos + self._model.size(); + double sum = 0.0; + while (pos < end) { + uint32_t tree_size = *pos++; + sum += less_only_find_leaf(input, pos, (*pos & 0xf00) >> 8); + pos += tree_size; + } + return sum; +} + +Optimize::Result +VMForest::general_optimize(const ForestStats &stats, + const std::vector<const nodes::Node *> &trees) +{ + if (stats.max_set_size > 255) { + return Optimize::Result(); + } + return optimize(trees, general_eval); +} + +double +VMForest::general_eval(const Forest *forest, const double *input) +{ + const VMForest &self = *((const VMForest *)forest); + const uint32_t *pos = &self._model[0]; + const uint32_t *end = pos + self._model.size(); + double sum = 0.0; + while (pos < end) { + uint32_t tree_size = *pos++; + sum += general_find_leaf(input, pos, (*pos & 0xf00) >> 8); + pos += tree_size; + } + return sum; +} + +Optimize::Chain VMForest::optimize_chain({less_only_optimize, general_optimize}); + +//----------------------------------------------------------------------------- + +} // namespace vespalib::eval::gbdt +} // namespace vespalib::eval +} // namespace vespalib diff --git a/eval/src/vespa/eval/eval/vm_forest.h b/eval/src/vespa/eval/eval/vm_forest.h new file mode 100644 index 00000000000..48e2bdf9cf6 --- /dev/null +++ b/eval/src/vespa/eval/eval/vm_forest.h @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "gbdt.h" + +namespace vespalib { +namespace eval { +namespace gbdt { + +/** + * GBDT forest optimizer using a compact tree representation combined + * with a leaf-node search and aggregate evaluation strategy. This + * code is very similar to the old VM instruction for MLR expressions. + **/ +class VMForest : public Forest +{ +private: + std::vector<uint32_t> _model; + +public: + VMForest(std::vector<uint32_t> &&model) : _model(std::move(model)) {} + static Optimize::Result less_only_optimize(const ForestStats &stats, + const std::vector<const nodes::Node *> &trees); + static double less_only_eval(const Forest *forest, const double *); + static Optimize::Result general_optimize(const ForestStats &stats, + const std::vector<const nodes::Node *> &trees); + static double general_eval(const Forest *forest, const double *); + static Optimize::Chain optimize_chain; +}; + +} // namespace vespalib::eval::gbdt +} // namespace vespalib::eval +} // namespace vespalib + diff --git a/eval/src/vespa/eval/tensor/CMakeLists.txt b/eval/src/vespa/eval/tensor/CMakeLists.txt new file mode 100644 index 00000000000..7ed5e4d60d5 --- /dev/null +++ b/eval/src/vespa/eval/tensor/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(vespalib_vespalib_tensor + SOURCES + default_tensor_engine.cpp + tensor.cpp + tensor_address.cpp + tensor_apply.cpp + tensor_factory.cpp + tensor_mapper.cpp + $<TARGET_OBJECTS:vespalib_vespalib_tensor_sparse> + $<TARGET_OBJECTS:vespalib_vespalib_tensor_dense> + $<TARGET_OBJECTS:vespalib_vespalib_tensor_serialization> + INSTALL lib64 + DEPENDS + vespalib +) diff --git a/eval/src/vespa/eval/tensor/cell_function.h b/eval/src/vespa/eval/tensor/cell_function.h new file mode 100644 index 00000000000..778f9dea3d0 --- /dev/null +++ b/eval/src/vespa/eval/tensor/cell_function.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <functional> + +namespace vespalib { +namespace tensor { + +/** + * Interface for a function to be applied on cells in a tensor. + */ +struct CellFunction +{ + typedef std::reference_wrapper<const CellFunction> CREF; + virtual ~CellFunction() {} + virtual double apply(double value) const = 0; +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/default_tensor.h b/eval/src/vespa/eval/tensor/default_tensor.h new file mode 100644 index 00000000000..2423e677eff --- /dev/null +++ b/eval/src/vespa/eval/tensor/default_tensor.h @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sparse/sparse_tensor.h" +#include "sparse/sparse_tensor_builder.h" + +namespace vespalib { +namespace tensor { + +struct DefaultTensor { + using type = SparseTensor; + using builder = SparseTensorBuilder; +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp new file mode 100644 index 00000000000..9b81bcf205b --- /dev/null +++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp @@ -0,0 +1,243 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "default_tensor_engine.h" +#include <vespa/vespalib/eval/value.h> +#include <vespa/vespalib/eval/tensor_spec.h> +#include <vespa/vespalib/eval/operation_visitor.h> +#include "tensor.h" +#include "dense/dense_tensor_builder.h" +#include "dense/dense_tensor_function_compiler.h" +#include "default_tensor.h" + +namespace vespalib { +namespace tensor { + +using Value = eval::Value; +using ErrorValue = eval::ErrorValue; +using DoubleValue = eval::DoubleValue; +using TensorValue = eval::TensorValue; + +const DefaultTensorEngine DefaultTensorEngine::_engine; + +eval::ValueType +DefaultTensorEngine::type_of(const Tensor &tensor) const +{ + assert(&tensor.engine() == this); + const tensor::Tensor &my_tensor = static_cast<const tensor::Tensor &>(tensor); + return my_tensor.getType(); +} + +bool +DefaultTensorEngine::equal(const Tensor &a, const Tensor &b) const +{ + assert(&a.engine() == this); + assert(&b.engine() == this); + const tensor::Tensor &my_a = static_cast<const tensor::Tensor &>(a); + const tensor::Tensor &my_b = static_cast<const tensor::Tensor &>(b); + if (my_a.getType().type() != my_b.getType().type()) { + return false; + } + return my_a.equals(my_b); +} + +vespalib::string +DefaultTensorEngine::to_string(const Tensor &tensor) const +{ + assert(&tensor.engine() == this); + const tensor::Tensor &my_tensor = static_cast<const tensor::Tensor &>(tensor); + return my_tensor.toString(); +} + +eval::TensorSpec +DefaultTensorEngine::to_spec(const Tensor &tensor) const +{ + assert(&tensor.engine() == this); + const tensor::Tensor &my_tensor = static_cast<const tensor::Tensor &>(tensor); + return my_tensor.toSpec(); +} + +eval::TensorFunction::UP +DefaultTensorEngine::compile(eval::tensor_function::Node_UP expr) const +{ + return DenseTensorFunctionCompiler::compile(std::move(expr)); +} + +struct IsAddOperation : public eval::DefaultOperationVisitor { + bool result = false; + void visitDefault(const eval::Operation &) override {} + void visit(const eval::operation::Add &) override { result = true; } +}; + +std::unique_ptr<eval::Tensor> +DefaultTensorEngine::create(const TensorSpec &spec) const +{ + ValueType type = ValueType::from_spec(spec.type()); + bool is_dense = false; + bool is_sparse = false; + for (const auto &dimension: type.dimensions()) { + if (dimension.is_mapped()) { + is_sparse = true; + } + if (dimension.is_indexed()) { + is_dense = true; + } + } + if (is_dense && is_sparse) { + return DefaultTensor::builder().build(); + } else if (is_dense) { + DenseTensorBuilder builder; + std::map<vespalib::string,DenseTensorBuilder::Dimension> dimension_map; + for (const auto &dimension: type.dimensions()) { + dimension_map[dimension.name] = builder.defineDimension(dimension.name, dimension.size); + } + for (const auto &cell: spec.cells()) { + const auto &address = cell.first; + for (const auto &binding: address) { + builder.addLabel(dimension_map[binding.first], binding.second.index); + } + builder.addCell(cell.second); + } + return builder.build(); + } else { // sparse + DefaultTensor::builder builder; + std::map<vespalib::string,DefaultTensor::builder::Dimension> dimension_map; + for (const auto &dimension: type.dimensions()) { + dimension_map[dimension.name] = builder.define_dimension(dimension.name); + } + for (const auto &cell: spec.cells()) { + const auto &address = cell.first; + for (const auto &binding: address) { + builder.add_label(dimension_map[binding.first], binding.second.name); + } + builder.add_cell(cell.second); + } + return builder.build(); + } +} + +const eval::Value & +DefaultTensorEngine::reduce(const Tensor &tensor, const BinaryOperation &op, const std::vector<vespalib::string> &dimensions, Stash &stash) const +{ + assert(&tensor.engine() == this); + const tensor::Tensor &my_tensor = static_cast<const tensor::Tensor &>(tensor); + IsAddOperation check; + op.accept(check); + tensor::Tensor::UP result; + if (check.result) { + if (dimensions.empty()) { // sum + return stash.create<eval::DoubleValue>(my_tensor.sum()); + } else { // dimension sum + for (const auto &dimension: dimensions) { + if (result) { + result = result->sum(dimension); + } else { + result = my_tensor.sum(dimension); + } + } + } + } else { + result = my_tensor.reduce(op, dimensions); + } + if (result) { + eval::ValueType result_type(result->getType()); + if (result_type.is_tensor()) { + return stash.create<TensorValue>(std::move(result)); + } + if (result_type.is_double()) { + return stash.create<eval::DoubleValue>(result->sum()); + } + } + return stash.create<ErrorValue>(); +} + +struct CellFunctionAdapter : tensor::CellFunction { + const eval::UnaryOperation &op; + CellFunctionAdapter(const eval::UnaryOperation &op_in) : op(op_in) {} + virtual double apply(double value) const override { return op.eval(value); } +}; + +const eval::Value & +DefaultTensorEngine::map(const UnaryOperation &op, const Tensor &a, Stash &stash) const +{ + assert(&a.engine() == this); + const tensor::Tensor &my_a = static_cast<const tensor::Tensor &>(a); + CellFunctionAdapter cell_function(op); + return stash.create<TensorValue>(my_a.apply(cell_function)); +} + +struct TensorOperationOverride : eval::DefaultOperationVisitor { + const tensor::Tensor &lhs; + const tensor::Tensor &rhs; + tensor::Tensor::UP result; + TensorOperationOverride(const tensor::Tensor &lhs_in, + const tensor::Tensor &rhs_in) + : lhs(lhs_in), rhs(rhs_in), result() {} + virtual void visitDefault(const eval::Operation &op) override { + // empty result indicates error + const eval::BinaryOperation *binaryOp = + dynamic_cast<const eval::BinaryOperation *>(&op); + if (binaryOp) { + result = lhs.apply(*binaryOp, rhs); + } + } + virtual void visit(const eval::operation::Add &) override { + result = lhs.add(rhs); + } + virtual void visit(const eval::operation::Sub &) override { + result = lhs.subtract(rhs); + } + virtual void visit(const eval::operation::Mul &) override { + if (lhs.getType() == rhs.getType()) { + result = lhs.match(rhs); + } else { + result = lhs.multiply(rhs); + } + } + virtual void visit(const eval::operation::Min &) override { + result = lhs.min(rhs); + } + virtual void visit(const eval::operation::Max &) override { + result = lhs.max(rhs); + } +}; + +const eval::Value & +DefaultTensorEngine::apply(const BinaryOperation &op, const Tensor &a, const Tensor &b, Stash &stash) const +{ + assert(&a.engine() == this); + assert(&b.engine() == this); + const tensor::Tensor &my_a = static_cast<const tensor::Tensor &>(a); + const tensor::Tensor &my_b = static_cast<const tensor::Tensor &>(b); + if (my_a.getType().type() != my_b.getType().type()) { + return stash.create<ErrorValue>(); + } + TensorOperationOverride tensor_override(my_a, my_b); + op.accept(tensor_override); + if (tensor_override.result) { + return stash.create<TensorValue>(std::move(tensor_override.result)); + } else { + return stash.create<ErrorValue>(); + } +} + +const Value & +DefaultTensorEngine::concat(const Value &a, const Value &b, const vespalib::string &dimension, Stash &stash) const +{ + (void) a; + (void) b; + (void) dimension; + return stash.create<ErrorValue>(); +} + +const Value & +DefaultTensorEngine::rename(const Value &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to, Stash &stash) const +{ + (void) a; + (void) from; + (void) to; + return stash.create<ErrorValue>(); +} + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.h b/eval/src/vespa/eval/tensor/default_tensor_engine.h new file mode 100644 index 00000000000..ac223721843 --- /dev/null +++ b/eval/src/vespa/eval/tensor/default_tensor_engine.h @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/eval/tensor_engine.h> + +namespace vespalib { +namespace tensor { + +/** + * This is a tensor engine implementation wrapping the default tensor + * implementations (dense/sparse). + **/ +class DefaultTensorEngine : public eval::TensorEngine +{ +private: + DefaultTensorEngine() {} + static const DefaultTensorEngine _engine; +public: + static const TensorEngine &ref() { return _engine; }; + + ValueType type_of(const Tensor &tensor) const override; + bool equal(const Tensor &a, const Tensor &b) const override; + vespalib::string to_string(const Tensor &tensor) const override; + TensorSpec to_spec(const Tensor &tensor) const override; + + virtual eval::TensorFunction::UP compile(eval::tensor_function::Node_UP expr) const override; + + std::unique_ptr<Tensor> create(const TensorSpec &spec) const override; + const Value &reduce(const Tensor &tensor, const BinaryOperation &op, const std::vector<vespalib::string> &dimensions, Stash &stash) const override; + const Value &map(const UnaryOperation &op, const Tensor &a, Stash &stash) const override; + const Value &apply(const BinaryOperation &op, const Tensor &a, const Tensor &b, Stash &stash) const override; + + const Value &concat(const Value &a, const Value &b, const vespalib::string &dimension, Stash &stash) const override; + const Value &rename(const Value &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to, Stash &stash) const override; +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt new file mode 100644 index 00000000000..094c57619dc --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(vespalib_vespalib_tensor_dense OBJECT + SOURCES + direct_dense_tensor_builder.cpp + dense_dot_product_function.cpp + dense_tensor.cpp + dense_tensor_address_combiner.cpp + dense_tensor_builder.cpp + dense_tensor_cells_iterator.cpp + dense_tensor_function_compiler.cpp + dense_tensor_view.cpp + mutable_dense_tensor_view.cpp + DEPENDS +) diff --git a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp new file mode 100644 index 00000000000..fbbcd949c29 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "dense_dot_product_function.h" +#include "dense_tensor.h" +#include "dense_tensor_view.h" +#include <vespa/vespalib/eval/value.h> +#include <vespa/vespalib/tensor/tensor.h> + +namespace vespalib { +namespace tensor { + +using CellsRef = DenseTensorView::CellsRef; + +DenseDotProductFunction::DenseDotProductFunction(size_t lhsTensorId_, size_t rhsTensorId_) + : _lhsTensorId(lhsTensorId_), + _rhsTensorId(rhsTensorId_), + _hwAccelerator(hwaccelrated::IAccelrated::getAccelrator()) +{ +} + +namespace { + +CellsRef +getCellsRef(const eval::Value &value) +{ + const Tensor *tensor = static_cast<const Tensor *>(value.as_tensor()); + const DenseTensorView *denseTensor = static_cast<const DenseTensorView *>(tensor); + return denseTensor->cellsRef(); +} + +} + +const eval::Value & +DenseDotProductFunction::eval(const Input &input, Stash &stash) const +{ + DenseTensorView::CellsRef lhsCells = getCellsRef(input.get_tensor(_lhsTensorId)); + DenseTensorView::CellsRef rhsCells = getCellsRef(input.get_tensor(_rhsTensorId)); + size_t numCells = std::min(lhsCells.size(), rhsCells.size()); + double result = _hwAccelerator->dotProduct(lhsCells.cbegin(), rhsCells.cbegin(), numCells); + return stash.create<eval::DoubleValue>(result); +} + +} // namespace tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h new file mode 100644 index 00000000000..9676003ef93 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/eval/tensor_function.h> +#include <vespa/vespalib/hwaccelrated/iaccelrated.h> + +namespace vespalib { +namespace tensor { + +/** + * Tensor function for a dot product between two 1-dimensional dense tensors. + */ +class DenseDotProductFunction : public eval::TensorFunction +{ +private: + using InjectUP = std::unique_ptr<eval::tensor_function::Inject>; + + size_t _lhsTensorId; + size_t _rhsTensorId; + hwaccelrated::IAccelrated::UP _hwAccelerator; + +public: + DenseDotProductFunction(size_t lhsTensorId_, size_t rhsTensorId_); + size_t lhsTensorId() const { return _lhsTensorId; } + size_t rhsTensorId() const { return _rhsTensorId; } + virtual const eval::Value &eval(const Input &input, Stash &stash) const override; +}; + +} // namespace tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor.cpp new file mode 100644 index 00000000000..5967ea71820 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor.cpp @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "dense_tensor.h" +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/eval/operation.h> +#include <sstream> + +using vespalib::eval::TensorSpec; + +namespace vespalib { +namespace tensor { + +namespace { + +size_t +calcCellsSize(const eval::ValueType &type) +{ + size_t cellsSize = 1; + for (const auto &dim : type.dimensions()) { + cellsSize *= dim.size; + } + return cellsSize; +} + +void +checkCellsSize(const DenseTensor &arg) +{ + auto cellsSize = calcCellsSize(arg.type()); + if (arg.cells().size() != cellsSize) { + throw IllegalStateException(make_string("Wrong cell size, " + "expected=%zu, " + "actual=%zu", + cellsSize, + arg.cells().size())); + } +} + +} + +DenseTensor::DenseTensor() + : DenseTensorView(_type), + _type(eval::ValueType::double_type()), + _cells(1) +{ + initCellsRef(CellsRef(_cells)); +} + +DenseTensor::DenseTensor(const eval::ValueType &type_in, + const Cells &cells_in) + : DenseTensorView(_type), + _type(type_in), + _cells(cells_in) +{ + initCellsRef(CellsRef(_cells)); + checkCellsSize(*this); +} + + +DenseTensor::DenseTensor(const eval::ValueType &type_in, + Cells &&cells_in) + : DenseTensorView(_type), + _type(type_in), + _cells(std::move(cells_in)) +{ + initCellsRef(CellsRef(_cells)); + checkCellsSize(*this); +} + +DenseTensor::DenseTensor(eval::ValueType &&type_in, + Cells &&cells_in) + : DenseTensorView(_type), + _type(std::move(type_in)), + _cells(std::move(cells_in)) +{ + initCellsRef(CellsRef(_cells)); + checkCellsSize(*this); +} + +bool +DenseTensor::operator==(const DenseTensor &rhs) const +{ + return (_type == rhs._type) && + (_cells == rhs._cells); +} + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor.h b/eval/src/vespa/eval/tensor/dense/dense_tensor.h new file mode 100644 index 00000000000..bf59a639bdc --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/eval/value_type.h> +#include "dense_tensor_cells_iterator.h" +#include "dense_tensor_view.h" + +namespace vespalib { +namespace tensor { + +/** + * A dense tensor where all dimensions are indexed. + * Tensor cells are stored in an underlying array according to the order of the dimensions. + */ +class DenseTensor : public DenseTensorView +{ +public: + typedef std::unique_ptr<DenseTensor> UP; + using Cells = std::vector<double>; + using CellsIterator = DenseTensorCellsIterator; + +private: + eval::ValueType _type; + Cells _cells; + +public: + DenseTensor(); + DenseTensor(const eval::ValueType &type_in, + const Cells &cells_in); + DenseTensor(const eval::ValueType &type_in, + Cells &&cells_in); + DenseTensor(eval::ValueType &&type_in, + Cells &&cells_in); + bool operator==(const DenseTensor &rhs) const; + const Cells &cells() const { return _cells; } + +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_address_combiner.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_address_combiner.cpp new file mode 100644 index 00000000000..53af60bd101 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_address_combiner.cpp @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "dense_tensor_address_combiner.h" +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/util/stringfmt.h> + +namespace vespalib { +namespace tensor { + +using Address = DenseTensorAddressCombiner::Address; + +namespace { + +class AddressReader +{ +private: + const Address &_address; + size_t _idx; + +public: + AddressReader(const Address &address) + : _address(address), + _idx(0) + {} + size_t nextLabel() { + return _address[_idx++]; + } + bool valid() { + return _idx < _address.size(); + } +}; + +} + +DenseTensorAddressCombiner::DenseTensorAddressCombiner(const eval::ValueType &lhs, + const eval::ValueType &rhs) + : _ops(), + _combinedAddress() +{ + auto rhsItr = rhs.dimensions().cbegin(); + auto rhsItrEnd = rhs.dimensions().cend(); + for (const auto &lhsDim : lhs.dimensions()) { + while ((rhsItr != rhsItrEnd) && (rhsItr->name < lhsDim.name)) { + _ops.push_back(AddressOp::RHS); + ++rhsItr; + } + if ((rhsItr != rhsItrEnd) && (rhsItr->name == lhsDim.name)) { + _ops.push_back(AddressOp::BOTH); + ++rhsItr; + } else { + _ops.push_back(AddressOp::LHS); + } + } + while (rhsItr != rhsItrEnd) { + _ops.push_back(AddressOp::RHS); + ++rhsItr; + } +} + +bool +DenseTensorAddressCombiner::combine(const CellsIterator &lhsItr, + const CellsIterator &rhsItr) +{ + _combinedAddress.clear(); + AddressReader lhsReader(lhsItr.address()); + AddressReader rhsReader(rhsItr.address()); + for (const auto &op : _ops) { + switch (op) { + case AddressOp::LHS: + _combinedAddress.emplace_back(lhsReader.nextLabel()); + break; + case AddressOp::RHS: + _combinedAddress.emplace_back(rhsReader.nextLabel()); + break; + case AddressOp::BOTH: + size_t lhsLabel = lhsReader.nextLabel(); + size_t rhsLabel = rhsReader.nextLabel(); + if (lhsLabel != rhsLabel) { + return false; + } + _combinedAddress.emplace_back(lhsLabel); + } + } + assert(!lhsReader.valid()); + assert(!rhsReader.valid()); + return true; +} + +eval::ValueType +DenseTensorAddressCombiner::combineDimensions(const eval::ValueType &lhs, + const eval::ValueType &rhs) +{ + // NOTE: both lhs and rhs are sorted according to dimension names. + std::vector<eval::ValueType::Dimension> result; + auto lhsItr = lhs.dimensions().cbegin(); + auto rhsItr = rhs.dimensions().cbegin(); + while (lhsItr != lhs.dimensions().end() && + rhsItr != rhs.dimensions().end()) { + if (lhsItr->name == rhsItr->name) { + result.emplace_back(lhsItr->name, + std::min(lhsItr->size, rhsItr->size)); + ++lhsItr; + ++rhsItr; + } else if (lhsItr->name < rhsItr->name) { + result.emplace_back(*lhsItr++); + } else { + result.emplace_back(*rhsItr++); + } + } + while (lhsItr != lhs.dimensions().end()) { + result.emplace_back(*lhsItr++); + } + while (rhsItr != rhs.dimensions().end()) { + result.emplace_back(*rhsItr++); + } + return (result.empty() ? + eval::ValueType::double_type() : + eval::ValueType::tensor_type(std::move(result))); +} + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_address_combiner.h b/eval/src/vespa/eval/tensor/dense/dense_tensor_address_combiner.h new file mode 100644 index 00000000000..75336da311b --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_address_combiner.h @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/eval/value_type.h> +#include "dense_tensor_cells_iterator.h" + +namespace vespalib { +namespace tensor { + + +/** + * Combines two dense tensor addresses to a new tensor address. + * The resulting dimensions is the union of the input dimensions and + * common dimensions must have matching labels. + */ +class DenseTensorAddressCombiner +{ +public: + using Address = std::vector<size_t>; + +private: + enum class AddressOp { + LHS, + RHS, + BOTH + }; + + using CellsIterator = DenseTensorCellsIterator; + + std::vector<AddressOp> _ops; + Address _combinedAddress; + +public: + DenseTensorAddressCombiner(const eval::ValueType &lhs, + const eval::ValueType &rhs); + + bool combine(const CellsIterator &lhsItr, + const CellsIterator &rhsItr); + const Address &address() const { return _combinedAddress; } + + static eval::ValueType combineDimensions(const eval::ValueType &lhs, const eval::ValueType &rhs); + +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_apply.h b/eval/src/vespa/eval/tensor/dense/dense_tensor_apply.h new file mode 100644 index 00000000000..d2411cebbd9 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_apply.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace vespalib { +namespace tensor { + +class Tensor; +class DenseTensor; + +namespace dense { + +/** + * Creates a new tensor using all combinations of input tensor cells with matching + * labels for common dimensions, using func to calculate new cell value + * based on the cell values in the input tensors. + */ +template <typename Function> +std::unique_ptr<Tensor> +apply(const DenseTensorView &lhs, const Tensor &rhs, Function &&func); +template <typename Function> +std::unique_ptr<Tensor> +apply(const DenseTensorView &lhs, const DenseTensorView &rhs, Function &&func); + +} // namespace vespalib::tensor::dense +} // namespace vespalib::tensor +} // namespace vespalib + diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_apply.hpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_apply.hpp new file mode 100644 index 00000000000..73a737e6ff3 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_apply.hpp @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "dense_tensor_apply.h" +#include "dense_tensor_address_combiner.h" +#include "direct_dense_tensor_builder.h" + +namespace vespalib { +namespace tensor { +namespace dense { + +template <typename Function> +std::unique_ptr<Tensor> +apply(const DenseTensorView &lhs, const DenseTensorView &rhs, Function &&func) +{ + DenseTensorAddressCombiner combiner(lhs.type(), rhs.type()); + DirectDenseTensorBuilder builder(DenseTensorAddressCombiner::combineDimensions(lhs.type(), rhs.type())); + for (DenseTensorCellsIterator lhsItr = lhs.cellsIterator(); lhsItr.valid(); lhsItr.next()) { + for (DenseTensorCellsIterator rhsItr = rhs.cellsIterator(); rhsItr.valid(); rhsItr.next()) { + bool combineSuccess = combiner.combine(lhsItr, rhsItr); + if (combineSuccess) { + builder.insertCell(combiner.address(), func(lhsItr.cell(), rhsItr.cell())); + } + } + } + return builder.build(); +} + +template <typename Function> +std::unique_ptr<Tensor> +apply(const DenseTensorView &lhs, const Tensor &rhs, Function &&func) +{ + const DenseTensorView *view = dynamic_cast<const DenseTensorView *>(&rhs); + if (view) { + return apply(lhs, *view, func); + } + const DenseTensor *dense = dynamic_cast<const DenseTensor *>(&rhs); + if (dense) { + return apply(lhs, DenseTensorView(*dense), func); + } + return Tensor::UP(); +} + +} // namespace vespalib::tensor::dense +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.cpp new file mode 100644 index 00000000000..872be49f9b4 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.cpp @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "dense_tensor_builder.h" +#include <vespa/vespalib/util/exceptions.h> +#include <cassert> + +using vespalib::IllegalArgumentException; +using vespalib::make_string; + +namespace vespalib { +namespace tensor { + +namespace { + +constexpr size_t UNDEFINED_LABEL = std::numeric_limits<size_t>::max(); + +void +validateLabelInRange(size_t label, size_t dimensionSize, const vespalib::string &dimension) +{ + if (label >= dimensionSize) { + throw IllegalArgumentException(make_string( + "Label '%zu' for dimension '%s' is outside range [0, %zu>", + label, dimension.c_str(), dimensionSize)); + } +} + +void +validateLabelNotSpecified(size_t oldLabel, const vespalib::string &dimension) +{ + if (oldLabel != UNDEFINED_LABEL) { + throw IllegalArgumentException(make_string( + "Label for dimension '%s' is already specified with value '%zu'", + dimension.c_str(), oldLabel)); + } +} + +eval::ValueType +makeValueType(std::vector<eval::ValueType::Dimension> &&dimensions) { + return (dimensions.empty() ? + eval::ValueType::double_type() : + eval::ValueType::tensor_type(std::move(dimensions))); +} + +} + +void +DenseTensorBuilder::allocateCellsStorage() +{ + size_t cellsSize = 1; + for (const auto &dimension : _dimensions) { + cellsSize *= dimension.size; + } + _cells.resize(cellsSize, 0); +} + + +void +DenseTensorBuilder::sortDimensions() +{ + std::sort(_dimensions.begin(), _dimensions.end(), + [](const eval::ValueType::Dimension &lhs, + const eval::ValueType::Dimension &rhs) + { return lhs.name < rhs.name; }); + _dimensionsMapping.resize(_dimensions.size()); + Dimension dim = 0; + for (const auto &dimension : _dimensions) { + auto itr = _dimensionsEnum.find(dimension.name); + assert(itr != _dimensionsEnum.end()); + _dimensionsMapping[itr->second] = dim; + ++dim; + } +} + +size_t +DenseTensorBuilder::calculateCellAddress() +{ + size_t result = 0; + size_t multiplier = 1; + for (int64_t i = (_addressBuilder.size() - 1); i >= 0; --i) { + const size_t label = _addressBuilder[i]; + const auto &dim = _dimensions[i]; + if (label == UNDEFINED_LABEL) { + throw IllegalArgumentException(make_string("Label for dimension '%s' is undefined. " + "Expected a value in the range [0, %zu>", + dim.name.c_str(), dim.size)); + } + result += (label * multiplier); + multiplier *= dim.size; + _addressBuilder[i] = UNDEFINED_LABEL; + } + return result; +} + +DenseTensorBuilder::DenseTensorBuilder() + : _dimensionsEnum(), + _dimensions(), + _cells(), + _addressBuilder(), + _dimensionsMapping() +{ +} + +DenseTensorBuilder::Dimension +DenseTensorBuilder::defineDimension(const vespalib::string &dimension, + size_t dimensionSize) +{ + auto itr = _dimensionsEnum.find(dimension); + if (itr != _dimensionsEnum.end()) { + return itr->second; + } + assert(_cells.empty()); + Dimension result = _dimensionsEnum.size(); + _dimensionsEnum.insert(std::make_pair(dimension, result)); + _dimensions.emplace_back(dimension, dimensionSize); + _addressBuilder.push_back(UNDEFINED_LABEL); + assert(_dimensions.size() == (result + 1)); + assert(_addressBuilder.size() == (result + 1)); + return result; +} + +DenseTensorBuilder & +DenseTensorBuilder::addLabel(Dimension dimension, size_t label) +{ + if (_cells.empty()) { + sortDimensions(); + allocateCellsStorage(); + } + assert(dimension < _dimensions.size()); + assert(dimension < _addressBuilder.size()); + Dimension mappedDimension = _dimensionsMapping[dimension]; + const auto &dim = _dimensions[mappedDimension]; + validateLabelInRange(label, dim.size, dim.name); + validateLabelNotSpecified(_addressBuilder[mappedDimension], + dim.name); + _addressBuilder[mappedDimension] = label; + return *this; +} + +DenseTensorBuilder & +DenseTensorBuilder::addCell(double value) +{ + if (_cells.empty()) { + sortDimensions(); + allocateCellsStorage(); + } + size_t cellAddress = calculateCellAddress(); + assert(cellAddress < _cells.size()); + _cells[cellAddress] = value; + return *this; +} + +Tensor::UP +DenseTensorBuilder::build() +{ + if (_cells.empty()) { + allocateCellsStorage(); + } + Tensor::UP result = std::make_unique<DenseTensor>(makeValueType(std::move(_dimensions)), + std::move(_cells)); + _dimensionsEnum.clear(); + _dimensions.clear(); + DenseTensor::Cells().swap(_cells); + _addressBuilder.clear(); + _dimensionsMapping.clear(); + return result; +} + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.h b/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.h new file mode 100644 index 00000000000..31e3b7cf451 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "dense_tensor.h" +#include <vespa/vespalib/stllike/hash_map.h> +#include <vespa/vespalib/tensor/tensor_builder.h> + +namespace vespalib { +namespace tensor { + +/** + * A builder of for dense tensors. + */ +class DenseTensorBuilder +{ +public: + using Dimension = TensorBuilder::Dimension; + +private: + vespalib::hash_map<vespalib::string, size_t> _dimensionsEnum; + std::vector<eval::ValueType::Dimension> _dimensions; + DenseTensor::Cells _cells; + std::vector<size_t> _addressBuilder; + std::vector<Dimension> _dimensionsMapping; + + void allocateCellsStorage(); + void sortDimensions(); + size_t calculateCellAddress(); + +public: + DenseTensorBuilder(); + + Dimension defineDimension(const vespalib::string &dimension, size_t dimensionSize); + DenseTensorBuilder &addLabel(Dimension dimension, size_t label); + DenseTensorBuilder &addCell(double value); + Tensor::UP build(); +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_cells_iterator.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_cells_iterator.cpp new file mode 100644 index 00000000000..84311e47e5a --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_cells_iterator.cpp @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "dense_tensor_cells_iterator.h" + +namespace vespalib { +namespace tensor { + +void +DenseTensorCellsIterator::next() +{ + ++_cellIdx; + if (valid()) { + for (int64_t i = (_address.size() - 1); i >= 0; --i) { + _address[i] = (_address[i] + 1) % _type.dimensions()[i].size; + if (_address[i] != 0) { + // Outer dimension labels can only be increased when this label wraps around. + break; + } + } + } +} + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_cells_iterator.h b/eval/src/vespa/eval/tensor/dense/dense_tensor_cells_iterator.h new file mode 100644 index 00000000000..c3d00fdb28d --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_cells_iterator.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/eval/value_type.h> +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/util/arrayref.h> + +namespace vespalib { +namespace tensor { + +/** + * Utility class to iterate over cells in a dense tensor. + */ +class DenseTensorCellsIterator +{ +private: + using CellsRef = vespalib::ConstArrayRef<double>; + const eval::ValueType &_type; + CellsRef _cells; + size_t _cellIdx; + std::vector<size_t> _address; + +public: + DenseTensorCellsIterator(const eval::ValueType &type_in, CellsRef cells) + : _type(type_in), + _cells(cells), + _cellIdx(0), + _address(type_in.dimensions().size(), 0) + {} + bool valid() const { return _cellIdx < _cells.size(); } + void next(); + double cell() const { return _cells[_cellIdx]; } + const std::vector<size_t> &address() const { return _address; } + const eval::ValueType &type() const { return _type; } +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_function_compiler.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_function_compiler.cpp new file mode 100644 index 00000000000..8d981ece848 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_function_compiler.cpp @@ -0,0 +1,76 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "dense_dot_product_function.h" +#include "dense_tensor_function_compiler.h" +#include <vespa/vespalib/eval/operation_visitor.h> +#include <vespa/vespalib/eval/operation_visitor.h> +#include <vespa/vespalib/test/insertion_operators.h> +#include <iostream> + +using namespace vespalib::eval; +using namespace vespalib::eval::tensor_function; +using namespace vespalib::eval::operation; + +namespace vespalib { +namespace tensor { + +namespace { + +template <typename T> +bool +isType(const BinaryOperation &op) +{ + return (as<T>(op) != nullptr); +} + +bool +willReduceAllDimensions(const std::vector<vespalib::string> &dimensions) +{ + return (dimensions.empty() || (dimensions.size() == 1)); +} + +bool +is1dDenseTensor(const ValueType &type) +{ + return (type.is_dense() && (type.dimensions().size() == 1)); +} + +bool +isCompatibleTensorsForDotProduct(const ValueType &lhsType, const ValueType &rhsType) +{ + return (is1dDenseTensor(lhsType) && + is1dDenseTensor(rhsType) && + (lhsType.dimensions()[0].name == rhsType.dimensions()[0].name)); +} + +struct DotProductFunctionCompiler +{ + static TensorFunction::UP compile(Node_UP expr) { + const Reduce *reduce = as<Reduce>(*expr); + if (reduce && isType<Add>(*reduce->op) && willReduceAllDimensions(reduce->dimensions)) { + const Apply *apply = as<Apply>(*reduce->tensor); + if (apply && isType<Mul>(*apply->op)) { + const Inject *lhsTensor = as<Inject>(*apply->lhs_tensor); + const Inject *rhsTensor = as<Inject>(*apply->rhs_tensor); + if (lhsTensor && rhsTensor && + isCompatibleTensorsForDotProduct(lhsTensor->result_type, rhsTensor->result_type)) + { + return std::make_unique<DenseDotProductFunction>(lhsTensor->tensor_id, rhsTensor->tensor_id); + } + } + } + return std::move(expr); + } +}; + +} + +TensorFunction::UP +DenseTensorFunctionCompiler::compile(Node_UP expr) +{ + return DotProductFunctionCompiler::compile(std::move(expr)); +} + +} // namespace tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_function_compiler.h b/eval/src/vespa/eval/tensor/dense/dense_tensor_function_compiler.h new file mode 100644 index 00000000000..9d05d414bf1 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_function_compiler.h @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/eval/tensor_function.h> + +namespace vespalib { +namespace tensor { + +/** + * Class that recognizes calculations over dense tensors (in tensor function intermediate representation) + * and compiles this into an explicit tensor function. + */ +struct DenseTensorFunctionCompiler +{ + static eval::TensorFunction::UP compile(eval::tensor_function::Node_UP expr); +}; + +} // namespace tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_reduce.h b/eval/src/vespa/eval/tensor/dense/dense_tensor_reduce.h new file mode 100644 index 00000000000..58dccf9dd0b --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_reduce.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "dense_tensor.h" + +namespace vespalib { +namespace tensor { +namespace dense { + +/** + * Returns a tensor with the given dimension(s) removed and the cell values in that dimension(s) + * combined using the given func. + */ +template<typename Function> +std::unique_ptr<Tensor> +reduce(const DenseTensorView &tensor, const std::vector<vespalib::string> &dimensions, Function &&func); + +} // namespace dense +} // namespace tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_reduce.hpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_reduce.hpp new file mode 100644 index 00000000000..ed532ab14d6 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_reduce.hpp @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "dense_tensor_reduce.h" + +namespace vespalib { +namespace tensor { +namespace dense { + +using Cells = DenseTensorView::Cells; +using CellsRef = DenseTensorView::CellsRef; + +namespace { + +size_t +calcCellsSize(const eval::ValueType &type) +{ + size_t cellsSize = 1; + for (const auto &dim : type.dimensions()) { + cellsSize *= dim.size; + } + return cellsSize; +} + + +class DimensionReducer +{ +private: + eval::ValueType _type; + Cells _cellsResult; + size_t _innerDimSize; + size_t _sumDimSize; + size_t _outerDimSize; + + void setup(const eval::ValueType &oldType, + const vespalib::string &dimensionToRemove) { + auto itr = std::lower_bound(oldType.dimensions().cbegin(), + oldType.dimensions().cend(), + dimensionToRemove, + [](const auto &dim, const auto &dimension) + { return dim.name < dimension; }); + if ((itr != oldType.dimensions().end()) && (itr->name == dimensionToRemove)) { + for (auto outerItr = oldType.dimensions().cbegin(); outerItr != itr; ++outerItr) { + _outerDimSize *= outerItr->size; + } + _sumDimSize = itr->size; + for (++itr; itr != oldType.dimensions().cend(); ++itr) { + _innerDimSize *= itr->size; + } + } else { + _outerDimSize = calcCellsSize(oldType); + } + } + +public: + DimensionReducer(const eval::ValueType &oldType, + const string &dimensionToRemove) + : _type(oldType.reduce({ dimensionToRemove })), + _cellsResult(calcCellsSize(_type)), + _innerDimSize(1), + _sumDimSize(1), + _outerDimSize(1) + { + setup(oldType, dimensionToRemove); + } + + template <typename Function> + DenseTensor::UP + reduceCells(CellsRef cellsIn, Function &&func) { + auto itr_in = cellsIn.cbegin(); + auto itr_out = _cellsResult.begin(); + for (size_t outerDim = 0; outerDim < _outerDimSize; ++outerDim) { + auto saved_itr = itr_out; + for (size_t innerDim = 0; innerDim < _innerDimSize; ++innerDim) { + *itr_out = *itr_in; + ++itr_out; + ++itr_in; + } + for (size_t sumDim = 1; sumDim < _sumDimSize; ++sumDim) { + itr_out = saved_itr; + for (size_t innerDim = 0; innerDim < _innerDimSize; ++innerDim) { + *itr_out = func(*itr_out, *itr_in); + ++itr_out; + ++itr_in; + } + } + } + assert(itr_out == _cellsResult.end()); + assert(itr_in == cellsIn.cend()); + return std::make_unique<DenseTensor>(std::move(_type), std::move(_cellsResult)); + } +}; + +template <typename Function> +DenseTensor::UP +reduce(const DenseTensorView &tensor, const vespalib::string &dimensionToRemove, Function &&func) +{ + DimensionReducer reducer(tensor.type(), dimensionToRemove); + return reducer.reduceCells(tensor.cellsRef(), func); +} + +} + +template <typename Function> +std::unique_ptr<Tensor> +reduce(const DenseTensorView &tensor, const std::vector<vespalib::string> &dimensions, Function &&func) +{ + if (dimensions.size() == 1) { + return reduce(tensor, dimensions[0], func); + } else if (dimensions.size() > 0) { + DenseTensor::UP result = reduce(tensor, dimensions[0], func); + for (size_t i = 1; i < dimensions.size(); ++i) { + DenseTensor::UP tmpResult = reduce(DenseTensorView(*result), + dimensions[i], func); + result = std::move(tmpResult); + } + return result; + } else { + return std::unique_ptr<Tensor>(); + } +} + +} // namespace dense +} // namespace tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_view.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_view.cpp new file mode 100644 index 00000000000..4bb9219059c --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_view.cpp @@ -0,0 +1,350 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "dense_tensor_view.h" +#include "dense_tensor_apply.hpp" +#include "dense_tensor_reduce.hpp" +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/tensor/tensor_address_builder.h> +#include <vespa/vespalib/tensor/tensor_visitor.h> +#include <vespa/vespalib/eval/operation.h> +#include <sstream> + +using vespalib::eval::TensorSpec; + +namespace vespalib { +namespace tensor { + +namespace { + +string +dimensionsAsString(const eval::ValueType &type) +{ + std::ostringstream oss; + bool first = true; + oss << "["; + for (const auto &dim : type.dimensions()) { + if (!first) { + oss << ","; + } + first = false; + oss << dim.name << ":" << dim.size; + } + oss << "]"; + return oss.str(); +} + +size_t +calcCellsSize(const eval::ValueType &type) +{ + size_t cellsSize = 1; + for (const auto &dim : type.dimensions()) { + cellsSize *= dim.size; + } + return cellsSize; +} + + +void +checkCellsSize(const DenseTensorView &arg) +{ + auto cellsSize = calcCellsSize(arg.type()); + if (arg.cellsRef().size() != cellsSize) { + throw IllegalStateException(make_string("wrong cell size, " + "expected=%zu, " + "actual=%zu", + cellsSize, + arg.cellsRef().size())); + } +} + +void +checkDimensions(const DenseTensorView &lhs, const DenseTensorView &rhs, + vespalib::stringref operation) +{ + if (lhs.type() != rhs.type()) { + throw IllegalStateException(make_string("mismatching dimensions for " + "dense tensor %s, " + "lhs dimensions = '%s', " + "rhs dimensions = '%s'", + operation.c_str(), + dimensionsAsString(lhs.type()).c_str(), + dimensionsAsString(rhs.type()).c_str())); + } + checkCellsSize(lhs); + checkCellsSize(rhs); +} + + +/* + * Join the cells of two tensors. + * + * The given function is used to calculate the resulting cell value + * for overlapping cells. + */ +template <typename Function> +Tensor::UP +joinDenseTensors(const DenseTensorView &lhs, const DenseTensorView &rhs, + Function &&func) +{ + DenseTensor::Cells cells; + cells.reserve(lhs.cellsRef().size()); + auto rhsCellItr = rhs.cellsRef().cbegin(); + for (const auto &lhsCell : lhs.cellsRef()) { + cells.push_back(func(lhsCell, *rhsCellItr)); + ++rhsCellItr; + } + assert(rhsCellItr == rhs.cellsRef().cend()); + return std::make_unique<DenseTensor>(lhs.type(), + std::move(cells)); +} + + +template <typename Function> +Tensor::UP +joinDenseTensors(const DenseTensorView &lhs, const Tensor &rhs, + vespalib::stringref operation, + Function &&func) +{ + const DenseTensorView *view = dynamic_cast<const DenseTensorView *>(&rhs); + if (view) { + checkDimensions(lhs, *view, operation); + return joinDenseTensors(lhs, *view, func); + } + return Tensor::UP(); +} + +bool sameCells(DenseTensorView::CellsRef lhs, DenseTensorView::CellsRef rhs) +{ + if (lhs.size() != rhs.size()) { + return false; + } + for (size_t i = 0; i < lhs.size(); ++i) { + if (lhs[i] != rhs[i]) { + return false; + } + } + return true; +} + +} + + +DenseTensorView::DenseTensorView(const DenseTensor &rhs) + : _typeRef(rhs.type()), + _cellsRef(rhs.cellsRef()) +{ +} + + +bool +DenseTensorView::operator==(const DenseTensorView &rhs) const +{ + return (_typeRef == rhs._typeRef) && sameCells(_cellsRef, rhs._cellsRef); +} + +eval::ValueType +DenseTensorView::getType() const +{ + return _typeRef; +} + +double +DenseTensorView::sum() const +{ + double result = 0.0; + for (const auto &cell : _cellsRef) { + result += cell; + } + return result; +} + +Tensor::UP +DenseTensorView::add(const Tensor &arg) const +{ + return dense::apply(*this, arg, + [](double lhsValue, double rhsValue) + { return lhsValue + rhsValue; }); +} + +Tensor::UP +DenseTensorView::subtract(const Tensor &arg) const +{ + return dense::apply(*this, arg, + [](double lhsValue, double rhsValue) + { return lhsValue - rhsValue; }); +} + +Tensor::UP +DenseTensorView::multiply(const Tensor &arg) const +{ + return dense::apply(*this, arg, + [](double lhsValue, double rhsValue) + { return lhsValue * rhsValue; }); +} + +Tensor::UP +DenseTensorView::min(const Tensor &arg) const +{ + return dense::apply(*this, arg, + [](double lhsValue, double rhsValue) + { return std::min(lhsValue, rhsValue); }); +} + +Tensor::UP +DenseTensorView::max(const Tensor &arg) const +{ + return dense::apply(*this, arg, + [](double lhsValue, double rhsValue) + { return std::max(lhsValue, rhsValue); }); +} + +Tensor::UP +DenseTensorView::match(const Tensor &arg) const +{ + return joinDenseTensors(*this, arg, "match", + [](double lhsValue, double rhsValue) + { return (lhsValue * rhsValue); }); +} + +Tensor::UP +DenseTensorView::apply(const CellFunction &func) const +{ + Cells newCells(_cellsRef.size()); + auto itr = newCells.begin(); + for (const auto &cell : _cellsRef) { + *itr = func.apply(cell); + ++itr; + } + assert(itr == newCells.end()); + return std::make_unique<DenseTensor>(_typeRef, std::move(newCells)); +} + +Tensor::UP +DenseTensorView::sum(const vespalib::string &dimension) const +{ + return dense::reduce(*this, { dimension }, + [](double lhsValue, double rhsValue) + { return lhsValue + rhsValue; }); +} + +bool +DenseTensorView::equals(const Tensor &arg) const +{ + const DenseTensorView *view = dynamic_cast<const DenseTensorView *>(&arg); + if (view) { + return *this == *view; + } + return false; +} + +vespalib::string +DenseTensorView::toString() const +{ + std::ostringstream stream; + stream << *this; + return stream.str(); +} + +Tensor::UP +DenseTensorView::clone() const +{ + return std::make_unique<DenseTensor>(_typeRef, + Cells(_cellsRef.cbegin(), _cellsRef.cend())); +} + +namespace { + +void +buildAddress(const DenseTensorCellsIterator &itr, TensorSpec::Address &address) +{ + auto addressItr = itr.address().begin(); + for (const auto &dim : itr.type().dimensions()) { + address.emplace(std::make_pair(dim.name, TensorSpec::Label(*addressItr++))); + } + assert(addressItr == itr.address().end()); +} + +} + +TensorSpec +DenseTensorView::toSpec() const +{ + TensorSpec result(getType().to_spec()); + TensorSpec::Address address; + for (CellsIterator itr(_typeRef, _cellsRef); itr.valid(); itr.next()) { + buildAddress(itr, address); + result.add(address, itr.cell()); + address.clear(); + } + return result; +} + +void +DenseTensorView::print(std::ostream &out) const +{ + // TODO (geirst): print on common format. + out << "[ "; + bool first = true; + for (const auto &dim : _typeRef.dimensions()) { + if (!first) { + out << ", "; + } + out << dim.name << ":" << dim.size; + first = false; + } + out << " ] { "; + first = true; + for (const auto &cell : cellsRef()) { + if (!first) { + out << ", "; + } + out << cell; + first = false; + } + out << " }"; +} + +void +DenseTensorView::accept(TensorVisitor &visitor) const +{ + CellsIterator iterator(_typeRef, _cellsRef); + TensorAddressBuilder addressBuilder; + TensorAddress address; + vespalib::string label; + while (iterator.valid()) { + addressBuilder.clear(); + auto rawIndex = iterator.address().begin(); + for (const auto &dimension : _typeRef.dimensions()) { + label = vespalib::make_string("%zu", *rawIndex); + addressBuilder.add(dimension.name, label); + ++rawIndex; + } + address = addressBuilder.build(); + visitor.visit(address, iterator.cell()); + iterator.next(); + } +} + +Tensor::UP +DenseTensorView::apply(const eval::BinaryOperation &op, const Tensor &arg) const +{ + return dense::apply(*this, arg, + [&op](double lhsValue, double rhsValue) + { return op.eval(lhsValue, rhsValue); }); +} + +Tensor::UP +DenseTensorView::reduce(const eval::BinaryOperation &op, + const std::vector<vespalib::string> &dimensions) const +{ + return dense::reduce(*this, + (dimensions.empty() ? _typeRef.dimension_names() : dimensions), + [&op](double lhsValue, double rhsValue) + { return op.eval(lhsValue, rhsValue); }); +} + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h b/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h new file mode 100644 index 00000000000..218e1e4c4c6 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/eval/value_type.h> +#include "dense_tensor_cells_iterator.h" + +namespace vespalib { +namespace tensor { + +class DenseTensor; + +/** + * A view to a dense tensor where all dimensions are indexed. + * Tensor cells are stored in an underlying array according to the order of the dimensions. + */ +class DenseTensorView : public Tensor +{ +public: + using Cells = std::vector<double>; + using CellsRef = ConstArrayRef<double>; + using CellsIterator = DenseTensorCellsIterator; + +private: + const eval::ValueType &_typeRef; +protected: + CellsRef _cellsRef; + + void initCellsRef(CellsRef cells_in) { + _cellsRef = cells_in; + } + +public: + explicit DenseTensorView(const DenseTensor &rhs); + DenseTensorView(const eval::ValueType &type_in, CellsRef cells_in) + : _typeRef(type_in), + _cellsRef(cells_in) + {} + DenseTensorView(const eval::ValueType &type_in) + : _typeRef(type_in), + _cellsRef() + {} + const eval::ValueType &type() const { return _typeRef; } + const CellsRef &cellsRef() const { return _cellsRef; } + bool operator==(const DenseTensorView &rhs) const; + CellsIterator cellsIterator() const { return CellsIterator(_typeRef, _cellsRef); } + + virtual eval::ValueType getType() const override; + virtual double sum() const override; + virtual Tensor::UP add(const Tensor &arg) const override; + virtual Tensor::UP subtract(const Tensor &arg) const override; + virtual Tensor::UP multiply(const Tensor &arg) const override; + virtual Tensor::UP min(const Tensor &arg) const override; + virtual Tensor::UP max(const Tensor &arg) const override; + virtual Tensor::UP match(const Tensor &arg) const override; + virtual Tensor::UP apply(const CellFunction &func) const override; + virtual Tensor::UP sum(const vespalib::string &dimension) const override; + virtual Tensor::UP apply(const eval::BinaryOperation &op, + const Tensor &arg) const override; + virtual Tensor::UP reduce(const eval::BinaryOperation &op, + const std::vector<vespalib::string> &dimensions) + const override; + virtual bool equals(const Tensor &arg) const override; + virtual void print(std::ostream &out) const override; + virtual vespalib::string toString() const override; + virtual Tensor::UP clone() const override; + virtual eval::TensorSpec toSpec() const override; + virtual void accept(TensorVisitor &visitor) const override; +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/direct_dense_tensor_builder.cpp b/eval/src/vespa/eval/tensor/dense/direct_dense_tensor_builder.cpp new file mode 100644 index 00000000000..8a7ed1928ef --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/direct_dense_tensor_builder.cpp @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "direct_dense_tensor_builder.h" + +namespace vespalib { +namespace tensor { + +using Address = DirectDenseTensorBuilder::Address; +using eval::ValueType; + +namespace { + +size_t +calculateCellsSize(const ValueType &type) +{ + size_t cellsSize = 1; + for (const auto &dim : type.dimensions()) { + cellsSize *= dim.size; + } + return cellsSize; +} + +size_t +calculateCellAddress(const Address &address, const ValueType &type) +{ + assert(address.size() == type.dimensions().size()); + size_t result = 0; + for (size_t i = 0; i < address.size(); ++i) { + result *= type.dimensions()[i].size; + result += address[i]; + } + return result; +} + +} + +DirectDenseTensorBuilder::DirectDenseTensorBuilder(const ValueType &type_in) + : _type(type_in), + _cells(calculateCellsSize(_type)) +{ +} + +void +DirectDenseTensorBuilder::insertCell(const Address &address, double cellValue) +{ + size_t cellAddress = calculateCellAddress(address, _type); + assert(cellAddress < _cells.size()); + _cells[cellAddress] = cellValue; +} + +Tensor::UP +DirectDenseTensorBuilder::build() +{ + return std::make_unique<DenseTensor>(std::move(_type), std::move(_cells)); +} + +} // namespace tensor +} // namesapce vespalib diff --git a/eval/src/vespa/eval/tensor/dense/direct_dense_tensor_builder.h b/eval/src/vespa/eval/tensor/dense/direct_dense_tensor_builder.h new file mode 100644 index 00000000000..b5329860e86 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/direct_dense_tensor_builder.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "dense_tensor.h" + +namespace vespalib { +namespace tensor { + +/** + * Class for building a dense tensor by inserting cell values directly into underlying array of cells. + */ +class DirectDenseTensorBuilder +{ +public: + using Cells = DenseTensor::Cells; + using Address = std::vector<size_t>; + +private: + eval::ValueType _type; + Cells _cells; + +public: + DirectDenseTensorBuilder(const eval::ValueType &type_in); + void insertCell(const Address &address, double cellValue); + Tensor::UP build(); +}; + +} // namespace tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/mutable_dense_tensor_view.cpp b/eval/src/vespa/eval/tensor/dense/mutable_dense_tensor_view.cpp new file mode 100644 index 00000000000..582bb25db53 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/mutable_dense_tensor_view.cpp @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "mutable_dense_tensor_view.h" + +using vespalib::eval::ValueType; + +namespace vespalib { +namespace tensor { + +MutableDenseTensorView::MutableValueType::MutableValueType(ValueType type_in) + : _type(type_in) +{ + std::vector<ValueType::Dimension> &dimensions = + const_cast<std::vector<ValueType::Dimension> &>(_type.dimensions()); + for (auto &dim : dimensions) { + if (!dim.is_bound()) { + _unboundDimSizes.emplace_back(&dim.size); + } + } +} + +MutableDenseTensorView::MutableDenseTensorView(ValueType type_in) + : DenseTensorView(_concreteType.type(), CellsRef()), + _concreteType(type_in) +{ +} + +MutableDenseTensorView::MutableDenseTensorView(ValueType type_in, CellsRef cells_in) + : DenseTensorView(_concreteType.type(), cells_in), + _concreteType(type_in) +{ +} + +} // namespace tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/dense/mutable_dense_tensor_view.h b/eval/src/vespa/eval/tensor/dense/mutable_dense_tensor_view.h new file mode 100644 index 00000000000..f5580d45e77 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/mutable_dense_tensor_view.h @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "dense_tensor_view.h" + +namespace vespalib { +namespace tensor { + +/** + * A mutable view to a dense tensor where all dimensions are indexed. + */ +class MutableDenseTensorView : public DenseTensorView +{ +private: + struct MutableValueType + { + private: + eval::ValueType _type; + std::vector<size_t *> _unboundDimSizes; + + public: + MutableValueType(eval::ValueType type_in); + const eval::ValueType &type() const { return _type; } + void setUnboundDimensions(const uint32_t *unboundDimSizeBegin, const uint32_t *unboundDimSizeEnd) { + const uint32_t *unboundDimSizePtr = unboundDimSizeBegin; + for (auto unboundDimSize : _unboundDimSizes) { + *unboundDimSize = *unboundDimSizePtr++; + } + assert(unboundDimSizePtr == unboundDimSizeEnd); + (void) unboundDimSizeEnd; + } + void setUnboundDimensionsForEmptyTensor() { + for (auto unboundDimSize : _unboundDimSizes) { + *unboundDimSize = 1; + } + } + }; + + MutableValueType _concreteType; + +public: + MutableDenseTensorView(eval::ValueType type_in); + MutableDenseTensorView(eval::ValueType type_in, CellsRef cells_in); + void setCells(CellsRef cells_in) { + _cellsRef = cells_in; + } + void setUnboundDimensions(const uint32_t *unboundDimSizeBegin, const uint32_t *unboundDimSizeEnd) { + _concreteType.setUnboundDimensions(unboundDimSizeBegin, unboundDimSizeEnd); + } + void setUnboundDimensionsForEmptyTensor() { + _concreteType.setUnboundDimensionsForEmptyTensor(); + } +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/direct_tensor_builder.h b/eval/src/vespa/eval/tensor/direct_tensor_builder.h new file mode 100644 index 00000000000..d07bcf68486 --- /dev/null +++ b/eval/src/vespa/eval/tensor/direct_tensor_builder.h @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace vespalib { +namespace tensor { + +/** + * Forward declaration of utility class to build tensor of type TensorT, + * to be used by tensor operations. + */ +template <typename TensorT> class DirectTensorBuilder; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/join_tensors.h b/eval/src/vespa/eval/tensor/join_tensors.h new file mode 100644 index 00000000000..b5feb99b5d5 --- /dev/null +++ b/eval/src/vespa/eval/tensor/join_tensors.h @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "tensor.h" +#include "direct_tensor_builder.h" + +namespace vespalib { +namespace tensor { + +/* + * Join the cells of two tensors. + * The given function is used to calculate the resulting cell value for overlapping cells. + */ +template <typename TensorImplType, typename Function> +Tensor::UP +joinTensors(const TensorImplType &lhs, + const TensorImplType &rhs, + Function &&func) +{ + DirectTensorBuilder<TensorImplType> + builder(lhs.combineDimensionsWith(rhs), lhs.cells()); + for (const auto &rhsCell : rhs.cells()) { + builder.insertCell(rhsCell.first, rhsCell.second, func); + } + return builder.build(); +} + +/* + * Join the cells of two tensors, where the rhs values are treated as negated values. + * The given function is used to calculate the resulting cell value for overlapping cells. + */ +template <typename TensorImplType, typename Function> +Tensor::UP +joinTensorsNegated(const TensorImplType &lhs, + const TensorImplType &rhs, + Function &&func) +{ + DirectTensorBuilder<TensorImplType> + builder(lhs.combineDimensionsWith(rhs), lhs.cells()); + for (const auto &rhsCell : rhs.cells()) { + builder.insertCell(rhsCell.first, -rhsCell.second, func); + } + return builder.build(); +} + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/serialization/CMakeLists.txt b/eval/src/vespa/eval/tensor/serialization/CMakeLists.txt new file mode 100644 index 00000000000..1f178dd7118 --- /dev/null +++ b/eval/src/vespa/eval/tensor/serialization/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(vespalib_vespalib_tensor_serialization OBJECT + SOURCES + sparse_binary_format.cpp + dense_binary_format.cpp + slime_binary_format.cpp + typed_binary_format.cpp + DEPENDS +) diff --git a/eval/src/vespa/eval/tensor/serialization/dense_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/dense_binary_format.cpp new file mode 100644 index 00000000000..a3fddafe8f5 --- /dev/null +++ b/eval/src/vespa/eval/tensor/serialization/dense_binary_format.cpp @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "dense_binary_format.h" +#include <vespa/vespalib/tensor/dense/dense_tensor.h> +#include <vespa/vespalib/objects/nbostream.h> + + +using vespalib::nbostream; + +namespace vespalib { +namespace tensor { + +namespace { + +eval::ValueType +makeValueType(std::vector<eval::ValueType::Dimension> &&dimensions) { + return (dimensions.empty() ? + eval::ValueType::double_type() : + eval::ValueType::tensor_type(std::move(dimensions))); +} + +} + +void +DenseBinaryFormat::serialize(nbostream &stream, const DenseTensor &tensor) +{ + stream.putInt1_4Bytes(tensor.type().dimensions().size()); + size_t cellsSize = 1; + for (const auto &dimension : tensor.type().dimensions()) { + stream.writeSmallString(dimension.name); + stream.putInt1_4Bytes(dimension.size); + cellsSize *= dimension.size; + } + const DenseTensor::Cells &cells = tensor.cells(); + assert(cells.size() == cellsSize); + for (const auto &value : cells) { + stream << value; + } +} + + +std::unique_ptr<DenseTensor> +DenseBinaryFormat::deserialize(nbostream &stream) +{ + vespalib::string dimensionName; + std::vector<eval::ValueType::Dimension> dimensions; + DenseTensor::Cells cells; + size_t dimensionsSize = stream.getInt1_4Bytes(); + size_t dimensionSize; + size_t cellsSize = 1; + while (dimensions.size() < dimensionsSize) { + stream.readSmallString(dimensionName); + dimensionSize = stream.getInt1_4Bytes(); + dimensions.emplace_back(dimensionName, dimensionSize); + cellsSize *= dimensionSize; + } + cells.reserve(cellsSize); + double cellValue = 0.0; + for (size_t i = 0; i < cellsSize; ++i) { + stream >> cellValue; + cells.emplace_back(cellValue); + } + return std::make_unique<DenseTensor>(makeValueType(std::move(dimensions)), + std::move(cells)); +} + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/serialization/dense_binary_format.h b/eval/src/vespa/eval/tensor/serialization/dense_binary_format.h new file mode 100644 index 00000000000..cb080b6b4ee --- /dev/null +++ b/eval/src/vespa/eval/tensor/serialization/dense_binary_format.h @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace vespalib { + +class nbostream; + +namespace tensor { + +class DenseTensor; + +/** + * Class for serializing a dense tensor. + */ +class DenseBinaryFormat +{ +public: + static void serialize(nbostream &stream, const DenseTensor &tensor); + static std::unique_ptr<DenseTensor> deserialize(nbostream &stream); +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/serialization/slime_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/slime_binary_format.cpp new file mode 100644 index 00000000000..0bdcbe2c124 --- /dev/null +++ b/eval/src/vespa/eval/tensor/serialization/slime_binary_format.cpp @@ -0,0 +1,107 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "slime_binary_format.h" +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/tensor_builder.h> +#include <vespa/vespalib/tensor/tensor_visitor.h> +#include <vespa/vespalib/data/slime/inserter.h> +#include <vespa/vespalib/data/slime/cursor.h> +#include <vespa/vespalib/data/slime/slime.h> + + +namespace vespalib { +namespace tensor { + + +using slime::Inserter; +using slime::SlimeInserter; +using slime::Cursor; +using slime::ObjectInserter; +using slime::Memory; + +namespace { + +Memory memory_address("address"); +Memory memory_cells("cells"); +Memory memory_dimensions("dimensions"); +Memory memory_value("value"); + +void writeTensorAddress(Cursor &cursor, const TensorAddress &value) { + ObjectInserter addressInserter(cursor, memory_address); + Cursor &addressCursor = addressInserter.insertObject(); + for (const auto &elem : value.elements()) { + Memory dimension(elem.dimension()); + Memory label(elem.label()); + addressCursor.setString(dimension, label); + } +} + +} + +class SlimeBinaryFormatSerializer : public TensorVisitor +{ + Cursor &_tensor; // cursor for whole tensor + Cursor &_dimensions; // cursor for dimensions array + Cursor &_cells; // cursor for cells array +public: + SlimeBinaryFormatSerializer(Inserter &inserter); + virtual ~SlimeBinaryFormatSerializer() override; + virtual void visit(const TensorAddress &address, double value) override; + void serialize(const Tensor &tensor); +}; + +SlimeBinaryFormatSerializer::SlimeBinaryFormatSerializer(Inserter &inserter) + : _tensor(inserter.insertObject()), + _dimensions(_tensor.setArray(memory_dimensions)), + _cells(_tensor.setArray(memory_cells)) +{ +} + + +SlimeBinaryFormatSerializer::~SlimeBinaryFormatSerializer() +{ +} + +void +SlimeBinaryFormatSerializer::visit(const TensorAddress &address, + double value) +{ + Cursor &cellCursor = _cells.addObject(); + writeTensorAddress(cellCursor, address); + cellCursor.setDouble(memory_value, value); +} + + +void +SlimeBinaryFormatSerializer::serialize(const Tensor &tensor) +{ + eval::ValueType type(tensor.getType()); + for (const auto & dimension : type.dimensions()) { + _dimensions.addString(Memory(dimension.name)); + } + tensor.accept(*this); +} + + +void +SlimeBinaryFormat::serialize(Inserter &inserter, const Tensor &tensor) +{ + SlimeBinaryFormatSerializer serializer(inserter); + serializer.serialize(tensor); +} + + +std::unique_ptr<Slime> +SlimeBinaryFormat::serialize(const Tensor &tensor) +{ + auto slime = std::make_unique<Slime>(); + SlimeInserter inserter(*slime); + serialize(inserter, tensor); + return std::move(slime); +} + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/serialization/slime_binary_format.h b/eval/src/vespa/eval/tensor/serialization/slime_binary_format.h new file mode 100644 index 00000000000..0af80c5de61 --- /dev/null +++ b/eval/src/vespa/eval/tensor/serialization/slime_binary_format.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace vespalib { + +class Slime; + +namespace slime { class Inserter; } + +namespace tensor { + +class Tensor; +class TensorBuilder; + +/** + * Class for serializing a tensor into a slime object. + */ +class SlimeBinaryFormat +{ +public: + static void serialize(slime::Inserter &inserter, const Tensor &tensor); + static std::unique_ptr<Slime> serialize(const Tensor &tensor); +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp new file mode 100644 index 00000000000..af7a92d2c68 --- /dev/null +++ b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "sparse_binary_format.h" +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/tensor_builder.h> +#include <vespa/vespalib/tensor/tensor_visitor.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <sstream> + + +using vespalib::nbostream; + +namespace vespalib { +namespace tensor { + + +namespace { + +vespalib::string undefinedLabel(""); + +void +writeTensorAddress(nbostream &output, + const eval::ValueType &type, + const TensorAddress &value) +{ + auto elemItr = value.elements().cbegin(); + auto elemItrEnd = value.elements().cend(); + for (const auto &dimension : type.dimensions()) { + if (elemItr != elemItrEnd && dimension.name == elemItr->dimension()) { + output.writeSmallString(elemItr->label()); + ++elemItr; + } else { + output.writeSmallString(undefinedLabel); + } + } + assert(elemItr == elemItrEnd); +} + +} + +class SparseBinaryFormatSerializer : public TensorVisitor +{ + uint32_t _numCells; + nbostream _cells; + eval::ValueType _type; + +public: + SparseBinaryFormatSerializer(); + virtual ~SparseBinaryFormatSerializer() override; + virtual void visit(const TensorAddress &address, double value) override; + void serialize(nbostream &stream, const Tensor &tensor); +}; + +SparseBinaryFormatSerializer::SparseBinaryFormatSerializer() + : _numCells(0u), + _cells(), + _type(eval::ValueType::error_type()) +{ +} + + +SparseBinaryFormatSerializer::~SparseBinaryFormatSerializer() +{ +} + +void +SparseBinaryFormatSerializer::visit(const TensorAddress &address, + double value) +{ + ++_numCells; + writeTensorAddress(_cells, _type, address); + _cells << value; +} + + +void +SparseBinaryFormatSerializer::serialize(nbostream &stream, + const Tensor &tensor) +{ + _type = tensor.getType(); + tensor.accept(*this); + stream.putInt1_4Bytes(_type.dimensions().size()); + for (const auto &dimension : _type.dimensions()) { + stream.writeSmallString(dimension.name); + } + stream.putInt1_4Bytes(_numCells); + stream.write(_cells.peek(), _cells.size()); +} + + +void +SparseBinaryFormat::serialize(nbostream &stream, const Tensor &tensor) +{ + SparseBinaryFormatSerializer serializer; + serializer.serialize(stream, tensor); +} + + +void +SparseBinaryFormat::deserialize(nbostream &stream, TensorBuilder &builder) +{ + vespalib::string str; + size_t dimensionsSize = stream.getInt1_4Bytes(); + std::vector<TensorBuilder::Dimension> dimensions; + while (dimensions.size() < dimensionsSize) { + stream.readSmallString(str); + dimensions.emplace_back(builder.define_dimension(str)); + } + size_t cellsSize = stream.getInt1_4Bytes(); + double cellValue = 0.0; + for (size_t cellIdx = 0; cellIdx < cellsSize; ++cellIdx) { + for (size_t dimension = 0; dimension < dimensionsSize; ++dimension) { + stream.readSmallString(str); + if (!str.empty()) { + builder.add_label(dimensions[dimension], str); + } + } + stream >> cellValue; + builder.add_cell(cellValue); + } +} + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.h b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.h new file mode 100644 index 00000000000..6102c13130e --- /dev/null +++ b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.h @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace vespalib { + +class nbostream; + +namespace tensor { + +class Tensor; +class TensorBuilder; + +/** + * Class for serializing a tensor. + */ +class SparseBinaryFormat +{ +public: + static void serialize(nbostream &stream, const Tensor &tensor); + static void deserialize(nbostream &stream, TensorBuilder &builder); +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp new file mode 100644 index 00000000000..80d0252dfe1 --- /dev/null +++ b/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "typed_binary_format.h" +#include "sparse_binary_format.h" +#include "dense_binary_format.h" +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/tensor/default_tensor.h> +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/dense/dense_tensor.h> + +using vespalib::nbostream; + +namespace vespalib { +namespace tensor { + + +void +TypedBinaryFormat::serialize(nbostream &stream, const Tensor &tensor) +{ + const DenseTensor *denseTensor = dynamic_cast<const DenseTensor *>(&tensor); + if (denseTensor != nullptr) { + stream.putInt1_4Bytes(DENSE_BINARY_FORMAT_TYPE); + DenseBinaryFormat::serialize(stream, *denseTensor); + } else { + stream.putInt1_4Bytes(SPARSE_BINARY_FORMAT_TYPE); + SparseBinaryFormat::serialize(stream, tensor); + } +} + + +std::unique_ptr<Tensor> +TypedBinaryFormat::deserialize(nbostream &stream) +{ + auto formatId = stream.getInt1_4Bytes(); + if (formatId == SPARSE_BINARY_FORMAT_TYPE) { + DefaultTensor::builder builder; + SparseBinaryFormat::deserialize(stream, builder); + return builder.build(); + } + if (formatId == DENSE_BINARY_FORMAT_TYPE) { + return DenseBinaryFormat::deserialize(stream); + } + abort(); +} + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/serialization/typed_binary_format.h b/eval/src/vespa/eval/tensor/serialization/typed_binary_format.h new file mode 100644 index 00000000000..772f820ffc5 --- /dev/null +++ b/eval/src/vespa/eval/tensor/serialization/typed_binary_format.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace vespalib { + +class nbostream; + +namespace tensor { + +class Tensor; +class TensorBuilder; + +/** + * Class for serializing a tensor. + */ +class TypedBinaryFormat +{ + static constexpr uint32_t SPARSE_BINARY_FORMAT_TYPE = 1u; + static constexpr uint32_t DENSE_BINARY_FORMAT_TYPE = 2u; +public: + static void serialize(nbostream &stream, const Tensor &tensor); + static std::unique_ptr<Tensor> deserialize(nbostream &stream); +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt b/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt new file mode 100644 index 00000000000..7d8725ad610 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(vespalib_vespalib_tensor_sparse OBJECT + SOURCES + sparse_tensor.cpp + sparse_tensor_address_combiner.cpp + sparse_tensor_address_reducer.cpp + sparse_tensor_match.cpp + sparse_tensor_builder.cpp + sparse_tensor_unsorted_address_builder.cpp + DEPENDS +) diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h new file mode 100644 index 00000000000..ad0257d8ec5 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/tensor/direct_tensor_builder.h> +#include "sparse_tensor.h" +#include "sparse_tensor_address_builder.h" +#include "sparse_tensor_address_padder.h" + +namespace vespalib { +namespace tensor { + +/** + * Utility class to build tensors of type SparseTensor, to be used by + * tensor operations. + */ +template <> class DirectTensorBuilder<SparseTensor> +{ +public: + using TensorImplType = SparseTensor; + using Cells = typename TensorImplType::Cells; + using AddressBuilderType = SparseTensorAddressBuilder; + using AddressRefType = SparseTensorAddressRef; + +private: + Stash _stash; + eval::ValueType _type; + Cells _cells; + +public: + void + copyCells(const Cells &cells_in) + { + for (const auto &cell : cells_in) { + SparseTensorAddressRef oldRef = cell.first; + SparseTensorAddressRef newRef(oldRef, _stash); + _cells[newRef] = cell.second; + } + } + + void + copyCells(const Cells &cells_in, const eval::ValueType &cells_in_type) + { + SparseTensorAddressPadder addressPadder(_type, + cells_in_type); + for (const auto &cell : cells_in) { + addressPadder.padAddress(cell.first); + SparseTensorAddressRef oldRef = addressPadder.getAddressRef(); + SparseTensorAddressRef newRef(oldRef, _stash); + _cells[newRef] = cell.second; + } + } + + DirectTensorBuilder() + : _stash(TensorImplType::STASH_CHUNK_SIZE), + _type(eval::ValueType::double_type()), + _cells() + { + } + + DirectTensorBuilder(const eval::ValueType &type_in) + : _stash(TensorImplType::STASH_CHUNK_SIZE), + _type(type_in), + _cells() + { + } + + DirectTensorBuilder(const eval::ValueType &type_in, + const Cells &cells_in) + : _stash(TensorImplType::STASH_CHUNK_SIZE), + _type(type_in), + _cells() + { + copyCells(cells_in); + } + + DirectTensorBuilder(const eval::ValueType &type_in, + const Cells &cells_in, + const eval::ValueType &cells_in_type) + : _stash(TensorImplType::STASH_CHUNK_SIZE), + _type(type_in), + _cells() + { + if (type_in.dimensions().size() == cells_in_type.dimensions().size()) { + copyCells(cells_in); + } else { + copyCells(cells_in, cells_in_type); + } + } + + Tensor::UP build() { + return std::make_unique<SparseTensor>(std::move(_type), + std::move(_cells), + std::move(_stash)); + } + + template <class Function> + void insertCell(SparseTensorAddressRef address, double value, + Function &&func) + { + SparseTensorAddressRef oldRef(address); + auto res = _cells.insert(std::make_pair(oldRef, value)); + if (res.second) { + // Replace key with own copy + res.first->first = SparseTensorAddressRef(oldRef, _stash); + } else { + res.first->second = func(res.first->second, value); + } + } + + void insertCell(SparseTensorAddressRef address, double value) { + // This address should not already exist and a new cell should be inserted. + insertCell(address, value, [](double, double) -> double { abort(); }); + } + + template <class Function> + void insertCell(SparseTensorAddressBuilder &address, double value, + Function &&func) + { + insertCell(address.getAddressRef(), value, func); + } + + void insertCell(SparseTensorAddressBuilder &address, double value) { + // This address should not already exist and a new cell should be inserted. + insertCell(address.getAddressRef(), value, [](double, double) -> double { abort(); }); + } + + eval::ValueType &type() { return _type; } + Cells &cells() { return _cells; } +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp new file mode 100644 index 00000000000..4387b4b1fad --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp @@ -0,0 +1,314 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "sparse_tensor.h" +#include "sparse_tensor_address_builder.h" +#include "sparse_tensor_match.h" +#include "sparse_tensor_apply.hpp" +#include "sparse_tensor_reduce.hpp" +#include <vespa/vespalib/tensor/tensor_address_builder.h> +#include <vespa/vespalib/tensor/tensor_apply.h> +#include <vespa/vespalib/tensor/tensor_visitor.h> +#include <vespa/vespalib/eval/operation.h> +#include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/stllike/hash_map_equal.hpp> +#include <vespa/vespalib/util/array_equal.hpp> +#include <sstream> + +using vespalib::eval::TensorSpec; + +namespace vespalib { +namespace tensor { + +namespace { + +using Cells = SparseTensor::Cells; + +void +copyCells(Cells &cells, const Cells &cells_in, Stash &stash) +{ + for (const auto &cell : cells_in) { + SparseTensorAddressRef oldRef = cell.first; + SparseTensorAddressRef newRef(oldRef, stash); + cells[newRef] = cell.second; + } +} + +void +printAddress(std::ostream &out, const SparseTensorAddressRef &ref, + const eval::ValueType &type) +{ + out << "{"; + bool first = true; + SparseTensorAddressDecoder addr(ref); + for (auto &dim : type.dimensions()) { + auto label = addr.decodeLabel(); + if (label.size() != 0u) { + if (!first) { + out << ","; + } + out << dim.name << ":" << label; + first = false; + } + } + assert(!addr.valid()); + out << "}"; +} + +} + +SparseTensor::SparseTensor(const eval::ValueType &type_in, + const Cells &cells_in) + : _type(type_in), + _cells(), + _stash(STASH_CHUNK_SIZE) +{ + copyCells(_cells, cells_in, _stash); +} + + +SparseTensor::SparseTensor(eval::ValueType &&type_in, + Cells &&cells_in, Stash &&stash_in) + : _type(std::move(type_in)), + _cells(std::move(cells_in)), + _stash(std::move(stash_in)) +{ +} + + +bool +SparseTensor::operator==(const SparseTensor &rhs) const +{ + return _type == rhs._type && _cells == rhs._cells; +} + + +eval::ValueType +SparseTensor::combineDimensionsWith(const SparseTensor &rhs) const +{ + std::vector<eval::ValueType::Dimension> result; + std::set_union(_type.dimensions().cbegin(), _type.dimensions().cend(), + rhs._type.dimensions().cbegin(), rhs._type.dimensions().cend(), + std::back_inserter(result), + [](const eval::ValueType::Dimension &lhsDim, + const eval::ValueType::Dimension &rhsDim) + { return lhsDim.name < rhsDim.name; }); + return (result.empty() ? + eval::ValueType::double_type() : + eval::ValueType::tensor_type(std::move(result))); +} + +eval::ValueType +SparseTensor::getType() const +{ + return _type; +} + +double +SparseTensor::sum() const +{ + double result = 0.0; + for (const auto &cell : _cells) { + result += cell.second; + } + return result; +} + +Tensor::UP +SparseTensor::add(const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + if (!rhs) { + return Tensor::UP(); + } + return sparse::apply(*this, *rhs, [](double lhsValue, double rhsValue) + { return lhsValue + rhsValue; }); +} + +Tensor::UP +SparseTensor::subtract(const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + if (!rhs) { + return Tensor::UP(); + } + return sparse::apply(*this, *rhs, [](double lhsValue, double rhsValue) + { return lhsValue - rhsValue; }); +} + +Tensor::UP +SparseTensor::multiply(const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + if (!rhs) { + return Tensor::UP(); + } + return sparse::apply(*this, *rhs, [](double lhsValue, double rhsValue) + { return lhsValue * rhsValue; }); +} + +Tensor::UP +SparseTensor::min(const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + if (!rhs) { + return Tensor::UP(); + } + return sparse::apply(*this, *rhs, [](double lhsValue, double rhsValue) + { return std::min(lhsValue, rhsValue); }); +} + +Tensor::UP +SparseTensor::max(const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + if (!rhs) { + return Tensor::UP(); + } + return sparse::apply(*this, *rhs, [](double lhsValue, double rhsValue) + { return std::max(lhsValue, rhsValue); }); +} + +Tensor::UP +SparseTensor::match(const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + if (!rhs) { + return Tensor::UP(); + } + return SparseTensorMatch(*this, *rhs).result(); +} + +Tensor::UP +SparseTensor::apply(const CellFunction &func) const +{ + return TensorApply<SparseTensor>(*this, func).result(); +} + +Tensor::UP +SparseTensor::sum(const vespalib::string &dimension) const +{ + return sparse::reduce(*this, { dimension }, + [](double lhsValue, double rhsValue) + { return lhsValue + rhsValue; }); +} + +bool +SparseTensor::equals(const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + if (!rhs) { + return false; + } + return *this == *rhs; +} + +vespalib::string +SparseTensor::toString() const +{ + std::ostringstream stream; + stream << *this; + return stream.str(); +} + +Tensor::UP +SparseTensor::clone() const +{ + return std::make_unique<SparseTensor>(_type, _cells); +} + +namespace { + +void +buildAddress(const eval::ValueType &type, + SparseTensorAddressDecoder &decoder, + TensorSpec::Address &address) +{ + for (const auto &dimension : type.dimensions()) { + auto label = decoder.decodeLabel(); + address.emplace(std::make_pair(dimension.name, TensorSpec::Label(label))); + } + assert(!decoder.valid()); +} + +} + +TensorSpec +SparseTensor::toSpec() const +{ + TensorSpec result(getType().to_spec()); + TensorSpec::Address address; + for (const auto &cell : _cells) { + SparseTensorAddressDecoder decoder(cell.first); + buildAddress(_type, decoder, address); + result.add(address, cell.second); + address.clear(); + } + if (_type.dimensions().empty() && _cells.empty()) { + result.add(address, 0.0); + } + return result; +} + +void +SparseTensor::print(std::ostream &out) const +{ + out << "{ "; + bool first = true; + for (const auto &cell : cells()) { + if (!first) { + out << ", "; + } + printAddress(out, cell.first, _type); + out << ":" << cell.second; + first = false; + } + out << " }"; +} + +void +SparseTensor::accept(TensorVisitor &visitor) const +{ + TensorAddressBuilder addrBuilder; + TensorAddress addr; + for (const auto &cell : _cells) { + SparseTensorAddressDecoder decoder(cell.first); + addrBuilder.clear(); + for (const auto &dimension : _type.dimensions()) { + auto label = decoder.decodeLabel(); + if (label.size() != 0u) { + addrBuilder.add(dimension.name, label); + } + } + assert(!decoder.valid()); + addr = addrBuilder.build(); + visitor.visit(addr, cell.second); + } +} + +Tensor::UP +SparseTensor::apply(const eval::BinaryOperation &op, const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + if (!rhs) { + return Tensor::UP(); + } + return sparse::apply(*this, *rhs, + [&op](double lhsValue, double rhsValue) + { return op.eval(lhsValue, rhsValue); }); +} + +Tensor::UP +SparseTensor::reduce(const eval::BinaryOperation &op, + const std::vector<vespalib::string> &dimensions) const +{ + return sparse::reduce(*this, + dimensions, + [&op](double lhsValue, double rhsValue) + { return op.eval(lhsValue, rhsValue); }); +} + +} // namespace vespalib::tensor + +} // namespace vespalib + +VESPALIB_HASH_MAP_INSTANTIATE(vespalib::tensor::SparseTensorAddressRef, double); diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h new file mode 100644 index 00000000000..e6682011ba2 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/tensor/cell_function.h> +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/tensor_address.h> +#include "sparse_tensor_address_ref.h" +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/stllike/hash_map.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/stash.h> + +namespace vespalib { +namespace tensor { + +/** + * A tensor implementation using serialized tensor addresses to + * improve CPU cache and TLB hit ratio, relative to SimpleTensor + * implementation. + */ +class SparseTensor : public Tensor +{ +public: + using Cells = vespalib::hash_map<SparseTensorAddressRef, double>; + + static constexpr size_t STASH_CHUNK_SIZE = 16384u; + +private: + eval::ValueType _type; + Cells _cells; + Stash _stash; + +public: + explicit SparseTensor(const eval::ValueType &type_in, + const Cells &cells_in); + SparseTensor(eval::ValueType &&type_in, + Cells &&cells_in, Stash &&stash_in); + const Cells &cells() const { return _cells; } + const eval::ValueType &type() const { return _type; } + bool operator==(const SparseTensor &rhs) const; + eval::ValueType combineDimensionsWith(const SparseTensor &rhs) const; + + virtual eval::ValueType getType() const override; + virtual double sum() const override; + virtual Tensor::UP add(const Tensor &arg) const override; + virtual Tensor::UP subtract(const Tensor &arg) const override; + virtual Tensor::UP multiply(const Tensor &arg) const override; + virtual Tensor::UP min(const Tensor &arg) const override; + virtual Tensor::UP max(const Tensor &arg) const override; + virtual Tensor::UP match(const Tensor &arg) const override; + virtual Tensor::UP apply(const CellFunction &func) const override; + virtual Tensor::UP sum(const vespalib::string &dimension) const override; + virtual Tensor::UP apply(const eval::BinaryOperation &op, + const Tensor &arg) const override; + virtual Tensor::UP reduce(const eval::BinaryOperation &op, + const std::vector<vespalib::string> &dimensions) + const override; + virtual bool equals(const Tensor &arg) const override; + virtual void print(std::ostream &out) const override; + virtual vespalib::string toString() const override; + virtual Tensor::UP clone() const override; + virtual eval::TensorSpec toSpec() const override; + virtual void accept(TensorVisitor &visitor) const override; +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h new file mode 100644 index 00000000000..c1678d89018 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <vector> +#include "sparse_tensor_address_ref.h" + +namespace vespalib { +namespace tensor { + + +/** + * A writer to serialize tensor addresses into a compact representation. + * All dimensions in the tensors are present, empty label is the "undefined" + * value. + * + * Format: (labelStr NUL)* + */ +class SparseTensorAddressBuilder +{ +private: + std::vector<char> _address; + + void + append(vespalib::stringref str) + { + const char *cstr = str.c_str(); + _address.insert(_address.end(), cstr, cstr + str.size() + 1); + } +public: + SparseTensorAddressBuilder() + : _address() + { + } + void add(vespalib::stringref label) { append(label); } + void addUndefined() { _address.emplace_back('\0'); } + void clear() { _address.clear(); } + SparseTensorAddressRef getAddressRef() const { + return SparseTensorAddressRef(&_address[0], _address.size()); + } + bool empty() const { return _address.empty(); } +}; + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp new file mode 100644 index 00000000000..1fa765aacfa --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "sparse_tensor_address_combiner.h" +#include "sparse_tensor_address_decoder.h" +#include <vespa/vespalib/eval/value_type.h> + +namespace vespalib { +namespace tensor { +namespace sparse { + +TensorAddressCombiner::TensorAddressCombiner(const eval::ValueType &lhs, + const eval::ValueType &rhs) +{ + auto rhsItr = rhs.dimensions().cbegin(); + auto rhsItrEnd = rhs.dimensions().cend(); + for (auto &lhsDim : lhs.dimensions()) { + while (rhsItr != rhsItrEnd && rhsItr->name < lhsDim.name) { + _ops.push_back(AddressOp::RHS); + ++rhsItr; + } + if (rhsItr != rhsItrEnd && rhsItr->name == lhsDim.name) { + _ops.push_back(AddressOp::BOTH); + ++rhsItr; + } else { + _ops.push_back(AddressOp::LHS); + } + } + while (rhsItr != rhsItrEnd) { + _ops.push_back(AddressOp::RHS); + ++rhsItr; + } +} + +TensorAddressCombiner::~TensorAddressCombiner() +{ +} + +bool +TensorAddressCombiner::combine(SparseTensorAddressRef lhsRef, + SparseTensorAddressRef rhsRef) +{ + clear(); + SparseTensorAddressDecoder lhs(lhsRef); + SparseTensorAddressDecoder rhs(rhsRef); + for (auto op : _ops) { + switch (op) { + case AddressOp::LHS: + add(lhs.decodeLabel()); + break; + case AddressOp::RHS: + add(rhs.decodeLabel()); + break; + case AddressOp::BOTH: + auto lhsLabel(lhs.decodeLabel()); + auto rhsLabel(rhs.decodeLabel()); + if (lhsLabel != rhsLabel) { + return false; + } + add(lhsLabel); + } + } + assert(!lhs.valid()); + assert(!rhs.valid()); + return true; +} + +} // namespace vespalib::tensor::sparse +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h new file mode 100644 index 00000000000..4340db30297 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sparse_tensor_address_builder.h" +#include <vespa/vespalib/tensor/types.h> + +namespace vespalib { +namespace eval { class ValueType; } +namespace tensor { +namespace sparse { + +/** + * Combine two tensor addresses to a new tensor address. Common dimensions + * must have matching labels. + */ +class TensorAddressCombiner : public SparseTensorAddressBuilder +{ + enum class AddressOp + { + LHS, + RHS, + BOTH + }; + + std::vector<AddressOp> _ops; + +public: + TensorAddressCombiner(const eval::ValueType &lhs, + const eval::ValueType &rhs); + + ~TensorAddressCombiner(); + + bool combine(SparseTensorAddressRef lhsRef, SparseTensorAddressRef rhsRef); +}; + + +} // namespace vespalib::tensor::sparse +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_decoder.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_decoder.h new file mode 100644 index 00000000000..94cb9373bc2 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_decoder.h @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include "sparse_tensor_address_ref.h" + +namespace vespalib { + + +namespace tensor { + +/** + * A decoder for a serialized tensor address, with only labels present. + */ +class SparseTensorAddressDecoder +{ + const char *_cur; + const char *_end; +public: + SparseTensorAddressDecoder(SparseTensorAddressRef ref) + : _cur(static_cast<const char *>(ref.start())), + _end(_cur + ref.size()) + { + } + + bool valid() const { return _cur != _end; } + + void skipLabel() { + while (*_cur != '\0') { + ++_cur; + } + ++_cur; + } + vespalib::stringref decodeLabel() { + const char *base = _cur; + skipLabel(); + return vespalib::stringref(base, _cur - base - 1); + } + +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_padder.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_padder.h new file mode 100644 index 00000000000..89372004a09 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_padder.h @@ -0,0 +1,72 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sparse_tensor_address_builder.h" +#include "sparse_tensor_address_decoder.h" +#include <cassert> + +namespace vespalib { +namespace tensor { + + +/** + * This class transforms serialized sparse tensor addresses by padding + * in "undefined" labels for new dimensions. + */ +class SparseTensorAddressPadder : public SparseTensorAddressBuilder +{ + enum class PadOp + { + PAD, + COPY + }; + + std::vector<PadOp> _padOps; + +public: + SparseTensorAddressPadder(const eval::ValueType &resultType, + const eval::ValueType &inputType) + : SparseTensorAddressBuilder(), + _padOps() + { + auto resultDimsItr = resultType.dimensions().cbegin(); + auto resultDimsItrEnd = resultType.dimensions().cend(); + for (auto &dim : inputType.dimensions()) { + while (resultDimsItr != resultDimsItrEnd && + resultDimsItr->name < dim.name) { + _padOps.push_back(PadOp::PAD); + ++resultDimsItr; + } + assert(resultDimsItr != resultDimsItrEnd && + resultDimsItr->name == dim.name); + _padOps.push_back(PadOp::COPY); + ++resultDimsItr; + } + while (resultDimsItr != resultDimsItrEnd) { + _padOps.push_back(PadOp::PAD); + ++resultDimsItr; + } + } + + void + padAddress(SparseTensorAddressRef ref) + { + clear(); + SparseTensorAddressDecoder addr(ref); + for (auto op : _padOps) { + switch (op) { + case PadOp::PAD: + addUndefined(); + break; + default: + add(addr.decodeLabel()); + } + } + assert(!addr.valid()); + } +}; + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_reducer.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_reducer.cpp new file mode 100644 index 00000000000..277bf7963e0 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_reducer.cpp @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "sparse_tensor_address_reducer.h" +#include <vespa/vespalib/eval/value_type.h> +#include <vespa/vespalib/stllike/hash_set.hpp> + +namespace vespalib { +namespace tensor { +namespace sparse { + +TensorAddressReducer::TensorAddressReducer(const eval::ValueType &type, + const std::vector<vespalib::string> & + removeDimensions) + : SparseTensorAddressBuilder(), + _ops() +{ + TensorDimensionsSet removeSet(removeDimensions.cbegin(), + removeDimensions.cend()); + _ops.reserve(type.dimensions().size()); + for (auto &dim : type.dimensions()) { + if (removeSet.find(dim.name) != removeSet.end()) { + _ops.push_back(AddressOp::REMOVE); + } else { + _ops.push_back(AddressOp::COPY); + } + } +} + +TensorAddressReducer::~TensorAddressReducer() +{ +} + +} // namespace vespalib::tensor::sparse +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_reducer.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_reducer.h new file mode 100644 index 00000000000..d1698681a55 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_reducer.h @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sparse_tensor_address_builder.h" +#include <vespa/vespalib/tensor/types.h> +#include "sparse_tensor_address_decoder.h" +#include <cassert> + +namespace vespalib { +namespace eval { class ValueType; } +namespace tensor { +namespace sparse { + +/** + * Reduce sparse tensor address by removing one or more dimensions. + */ +class TensorAddressReducer : public SparseTensorAddressBuilder +{ + enum AddressOp + { + REMOVE, + COPY + }; + + using AddressOps = std::vector<AddressOp>; + + AddressOps _ops; + +public: + TensorAddressReducer(const eval::ValueType &type, + const std::vector<vespalib::string> &removeDimensions); + + ~TensorAddressReducer(); + + void reduce(SparseTensorAddressRef ref) + { + clear(); + SparseTensorAddressDecoder decoder(ref); + for (auto op : _ops) { + switch (op) { + case AddressOp::REMOVE: + decoder.skipLabel(); + break; + case AddressOp::COPY: + add(decoder.decodeLabel()); + } + } + assert(!decoder.valid()); + } +}; + + +} // namespace vespalib::tensor::sparse +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_ref.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_ref.h new file mode 100644 index 00000000000..4358ce501a2 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_ref.h @@ -0,0 +1,72 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <vector> +#include <vespa/vespalib/util/stash.h> + +namespace vespalib { + +// From vespalib/util/hashmap.h +size_t hashValue(const void * buf, size_t sz); + +namespace tensor { + +/** + * A reference to a compact sparse immutable address to a tensor cell. + */ +class SparseTensorAddressRef +{ + const void *_start; + size_t _size; + size_t _hash; +public: + SparseTensorAddressRef() + : _start(nullptr), _size(0u), _hash(0u) + { + } + + SparseTensorAddressRef(const void *start_in, size_t size_in) + : _start(start_in), _size(size_in), + _hash(calcHash()) + { + } + + SparseTensorAddressRef(const SparseTensorAddressRef rhs, Stash &stash) + : _start(nullptr), + _size(rhs._size), + _hash(rhs._hash) + { + void *res = stash.alloc(rhs._size); + memcpy(res, rhs._start, rhs._size); + _start = res; + } + + size_t hash() const { return _hash; } + + size_t calcHash() const { return hashValue(_start, _size); } + + bool operator<(const SparseTensorAddressRef &rhs) const { + size_t minSize = std::min(_size, rhs._size); + int res = memcmp(_start, rhs._start, minSize); + if (res != 0) { + return res < 0; + } + return _size < rhs._size; + } + + bool operator==(const SparseTensorAddressRef &rhs) const + { + if (_size != rhs._size || _hash != rhs._hash) { + return false; + } + return memcmp(_start, rhs._start, _size) == 0; + } + + const void *start() const { return _start; } + size_t size() const { return _size; } +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h new file mode 100644 index 00000000000..e0a8b2cee5b --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace vespalib { +namespace tensor { +class Tensor; +class SparseTensor; +namespace sparse { + +/** + * Create new tensor using all combinations of input tensor cells with matching + * labels for common dimensions, using func to calculate new cell value + * based on the cell values in the input tensors. + */ +template <typename Function> +std::unique_ptr<Tensor> +apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func); + + +} // namespace vespalib::tensor::sparse +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp new file mode 100644 index 00000000000..b32b09a01ac --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sparse_tensor_apply.h" +#include "sparse_tensor_address_combiner.h" +#include <vespa/vespalib/tensor/direct_tensor_builder.h> +#include "direct_sparse_tensor_builder.h" + +namespace vespalib { +namespace tensor { +namespace sparse { + +template <typename Function> +std::unique_ptr<Tensor> +apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func) +{ + DirectTensorBuilder<SparseTensor> builder(lhs.combineDimensionsWith(rhs)); + TensorAddressCombiner addressCombiner(lhs.type(), rhs.type()); + for (const auto &lhsCell : lhs.cells()) { + for (const auto &rhsCell : rhs.cells()) { + bool combineSuccess = addressCombiner.combine(lhsCell.first, + rhsCell.first); + if (combineSuccess) { + builder.insertCell(addressCombiner.getAddressRef(), + func(lhsCell.second, rhsCell.second)); + } + } + } + return builder.build(); +} + +} // namespace vespalib::tensor::sparse +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_builder.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_builder.cpp new file mode 100644 index 00000000000..afab04fef6c --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_builder.cpp @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "sparse_tensor_builder.h" +#include <cassert> + +namespace vespalib { +namespace tensor { + +SparseTensorBuilder::SparseTensorBuilder() + : TensorBuilder(), + _addressBuilder(), + _normalizedAddressBuilder(), + _cells(), + _stash(SparseTensor::STASH_CHUNK_SIZE), + _dimensionsEnum(), + _dimensions(), + _type(eval::ValueType::double_type()), + _type_made(false) +{ +} + +SparseTensorBuilder::~SparseTensorBuilder() +{ +} + + +void +SparseTensorBuilder::makeType() +{ + assert(!_type_made); + assert(_cells.empty()); + std::vector<eval::ValueType::Dimension> dimensions; + dimensions.reserve(_dimensions.size()); + for (const auto &dim : _dimensions) { + dimensions.emplace_back(dim); + } + _type = (dimensions.empty() ? + eval::ValueType::double_type() : + eval::ValueType::tensor_type(std::move(dimensions))); + _type_made = true; +} + + +TensorBuilder::Dimension +SparseTensorBuilder::define_dimension(const vespalib::string &dimension) +{ + auto it = _dimensionsEnum.find(dimension); + if (it != _dimensionsEnum.end()) { + return it->second; + } + assert(!_type_made); + Dimension res = _dimensionsEnum.size(); + auto insres = _dimensionsEnum.insert(std::make_pair(dimension, res)); + (void) insres; + assert(insres.second); + assert(insres.first->second == res); + assert(_dimensions.size() == res); + _dimensions.push_back(dimension); + return res; +} + +TensorBuilder & +SparseTensorBuilder::add_label(Dimension dimension, + const vespalib::string &label) +{ + assert(dimension <= _dimensions.size()); + _addressBuilder.add(_dimensions[dimension], label); + return *this; +} + +TensorBuilder & +SparseTensorBuilder::add_cell(double value) +{ + if (!_type_made) { + makeType(); + } + _addressBuilder.buildTo(_normalizedAddressBuilder, _type); + SparseTensorAddressRef taddress(_normalizedAddressBuilder.getAddressRef()); + // Make a persistent copy of sparse tensor address owned by _stash + SparseTensorAddressRef address(taddress, _stash); + _cells[address] = value; + _addressBuilder.clear(); + _normalizedAddressBuilder.clear(); + return *this; +} + + +Tensor::UP +SparseTensorBuilder::build() +{ + assert(_addressBuilder.empty()); + if (!_type_made) { + makeType(); + } + Tensor::UP ret = std::make_unique<SparseTensor>(std::move(_type), + std::move(_cells), + std::move(_stash)); + SparseTensor::Cells().swap(_cells); + _dimensionsEnum.clear(); + _dimensions.clear(); + _type = eval::ValueType::double_type(); + _type_made = false; + return ret; +} + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_builder.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_builder.h new file mode 100644 index 00000000000..c6808614dd4 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_builder.h @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sparse_tensor.h" +#include "sparse_tensor_address_builder.h" +#include "sparse_tensor_unsorted_address_builder.h" +#include <vespa/vespalib/tensor/tensor_builder.h> +#include <vespa/vespalib/tensor/tensor_address.h> +#include <vespa/vespalib/stllike/hash_map.h> +#include <vespa/vespalib/util/stash.h> + +namespace vespalib { +namespace tensor { + +/** + * A builder of sparse tensors. + */ +class SparseTensorBuilder : public TensorBuilder +{ + SparseTensorUnsortedAddressBuilder _addressBuilder; // unsorted dimensions + SparseTensorAddressBuilder _normalizedAddressBuilder; // sorted dimensions + SparseTensor::Cells _cells; + Stash _stash; + vespalib::hash_map<vespalib::string, uint32_t> _dimensionsEnum; + std::vector<vespalib::string> _dimensions; + eval::ValueType _type; + bool _type_made; + + void makeType(); +public: + SparseTensorBuilder(); + virtual ~SparseTensorBuilder(); + + virtual Dimension + define_dimension(const vespalib::string &dimension) override; + virtual TensorBuilder & + add_label(Dimension dimension, + const vespalib::string &label) override; + virtual TensorBuilder &add_cell(double value) override; + + virtual Tensor::UP build() override; +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp new file mode 100644 index 00000000000..4add729d290 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "sparse_tensor_match.h" + +namespace vespalib { +namespace tensor { + +namespace { + +enum class AddressOp +{ + REMOVE, + PAD, + COPY +}; + + +void +buildTransformOps(std::vector<AddressOp> &ops, + const eval::ValueType &lhs, + const eval::ValueType &rhs) +{ + auto rhsItr = rhs.dimensions().cbegin(); + auto rhsItrEnd = rhs.dimensions().cend(); + for (auto &lhsDim : lhs.dimensions()) { + while (rhsItr != rhsItrEnd && rhsItr->name < lhsDim.name) { + ops.push_back(AddressOp::PAD); + ++rhsItr; + } + if (rhsItr != rhsItrEnd && rhsItr->name == lhsDim.name) { + ops.push_back(AddressOp::COPY); + ++rhsItr; + } else { + ops.push_back(AddressOp::REMOVE); + } + } + while (rhsItr != rhsItrEnd) { + ops.push_back(AddressOp::PAD); + ++rhsItr; + } +} + + +bool +transformAddress(SparseTensorAddressBuilder &builder, + SparseTensorAddressRef ref, + const std::vector<AddressOp> &ops) +{ + builder.clear(); + SparseTensorAddressDecoder addr(ref); + for (auto op : ops) { + switch (op) { + case AddressOp::REMOVE: + { + auto label = addr.decodeLabel(); + if (label.size() != 0u) { + return false; + } + } + break; + case AddressOp::PAD: + builder.addUndefined(); + break; + case AddressOp::COPY: + builder.add(addr.decodeLabel()); + } + } + assert(!addr.valid()); + return true; +} + +} + + +void +SparseTensorMatch::fastMatch(const TensorImplType &lhs, + const TensorImplType &rhs) +{ + for (const auto &lhsCell : lhs.cells()) { + auto rhsItr = rhs.cells().find(lhsCell.first); + if (rhsItr != rhs.cells().end()) { + _builder.insertCell(lhsCell.first, lhsCell.second * rhsItr->second); + } + } +} + +void +SparseTensorMatch::slowMatch(const TensorImplType &lhs, + const TensorImplType &rhs) +{ + std::vector<AddressOp> ops; + SparseTensorAddressBuilder addressBuilder; + SparseTensorAddressPadder addressPadder(_builder.type(), + lhs.type()); + buildTransformOps(ops, lhs.type(), rhs.type()); + for (const auto &lhsCell : lhs.cells()) { + if (!transformAddress(addressBuilder, lhsCell.first, ops)) { + continue; + } + SparseTensorAddressRef ref(addressBuilder.getAddressRef()); + auto rhsItr = rhs.cells().find(ref); + if (rhsItr != rhs.cells().end()) { + addressPadder.padAddress(lhsCell.first); + _builder.insertCell(addressPadder, lhsCell.second * rhsItr->second); + } + } +} + +SparseTensorMatch::SparseTensorMatch(const TensorImplType &lhs, + const TensorImplType &rhs) + : Parent(lhs.combineDimensionsWith(rhs)) +{ + if ((lhs.type().dimensions().size() == rhs.type().dimensions().size()) && + (lhs.type().dimensions().size() == _builder.type().dimensions().size())) { + fastMatch(lhs, rhs); + } else { + slowMatch(lhs, rhs); + } +} + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h new file mode 100644 index 00000000000..f12fddc51f4 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/tensor/tensor_operation.h> + +namespace vespalib { +namespace tensor { + +/** + * Returns the match product of two tensors. + * This returns a tensor which contains the matching cells in the two tensors, + * with their values multiplied. + * + * If the two tensors have exactly the same dimensions, this is the Hadamard product. + */ +class SparseTensorMatch : public TensorOperation<SparseTensor> +{ +public: + using Parent = TensorOperation<SparseTensor>; + using typename Parent::TensorImplType; + using Parent::_builder; +private: + void fastMatch(const TensorImplType &lhs, const TensorImplType &rhs); + void slowMatch(const TensorImplType &lhs, const TensorImplType &rhs); +public: + SparseTensorMatch(const TensorImplType &lhs, const TensorImplType &rhs); +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp new file mode 100644 index 00000000000..4c9b7043ac4 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sparse_tensor_address_reducer.h" +#include <vespa/vespalib/tensor/direct_tensor_builder.h> +#include "direct_sparse_tensor_builder.h" + +namespace vespalib { +namespace tensor { +namespace sparse { + +template <typename Function> +std::unique_ptr<Tensor> +reduceAll(const SparseTensor &tensor, + DirectTensorBuilder<SparseTensor> &builder, Function &&func) +{ + auto itr = tensor.cells().begin(); + auto itrEnd = tensor.cells().end(); + double result = 0.0; + if (itr != itrEnd) { + result = itr->second; + ++itr; + } + for (; itr != itrEnd; ++itr) { + result = func(result, itr->second); + } + builder.insertCell(SparseTensorAddressBuilder().getAddressRef(), result); + return builder.build(); +} + +template <typename Function> +std::unique_ptr<Tensor> +reduceAll(const SparseTensor &tensor, Function &&func) +{ + DirectTensorBuilder<SparseTensor> builder; + return reduceAll(tensor, builder, func); +} + +template <typename Function> +std::unique_ptr<Tensor> +reduce(const SparseTensor &tensor, + const std::vector<vespalib::string> &dimensions, Function &&func) +{ + if (dimensions.empty()) { + return reduceAll(tensor, func); + } + DirectTensorBuilder<SparseTensor> builder(tensor.type().reduce(dimensions)); + if (builder.type().dimensions().empty()) { + return reduceAll(tensor, builder, func); + } + TensorAddressReducer addressReducer(tensor.type(), dimensions); + for (const auto &cell : tensor.cells()) { + addressReducer.reduce(cell.first); + builder.insertCell(addressReducer.getAddressRef(), cell.second, func); + } + return builder.build(); +} + +} // namespace vespalib::tensor::sparse +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_unsorted_address_builder.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_unsorted_address_builder.cpp new file mode 100644 index 00000000000..9361cbcf7f8 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_unsorted_address_builder.cpp @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "sparse_tensor_unsorted_address_builder.h" +#include "sparse_tensor_address_builder.h" +#include <vespa/vespalib/eval/value_type.h> +#include <algorithm> + +namespace vespalib { +namespace tensor { + +SparseTensorUnsortedAddressBuilder::SparseTensorUnsortedAddressBuilder() + : _elementStrings(), + _elements() +{ +} + + +void +SparseTensorUnsortedAddressBuilder::buildTo(SparseTensorAddressBuilder & + builder, + const eval::ValueType &type) +{ + const char *base = &_elementStrings[0]; + std::sort(_elements.begin(), _elements.end(), + [=](const ElementRef &lhs, const ElementRef &rhs) + { return lhs.getDimension(base) < rhs.getDimension(base); }); + // build normalized address with sorted dimensions + auto dimsItr = type.dimensions().cbegin(); + auto dimsItrEnd = type.dimensions().cend(); + for (const auto &element : _elements) { + while ((dimsItr != dimsItrEnd) && + (dimsItr->name < element.getDimension(base))) { + builder.addUndefined(); + ++dimsItr; + } + assert((dimsItr != dimsItrEnd) && + (dimsItr->name == element.getDimension(base))); + builder.add(element.getLabel(base)); + ++dimsItr; + } + while (dimsItr != dimsItrEnd) { + builder.addUndefined(); + ++dimsItr; + } +} + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_unsorted_address_builder.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_unsorted_address_builder.h new file mode 100644 index 00000000000..5fcf9590a89 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_unsorted_address_builder.h @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <vector> +#include <vespa/vespalib/tensor/types.h> + +namespace vespalib { +namespace eval { class ValueType; } +namespace tensor { + +class SparseTensorAddressBuilder; + +/** + * A builder that buffers up a tensor address with unsorted + * dimensions. + */ +class SparseTensorUnsortedAddressBuilder +{ + struct ElementStringRef + { + uint32_t _base; + uint32_t _len; + ElementStringRef(uint32_t base, uint32_t len) + : _base(base), _len(len) + { + } + vespalib::stringref asStringRef(const char *base) const + { + return vespalib::stringref(base + _base, _len); + } + }; + struct ElementRef + { + ElementStringRef _dimension; + ElementStringRef _label; + ElementRef(ElementStringRef dimension, + ElementStringRef label) + : _dimension(dimension), + _label(label) + { + } + vespalib::stringref getDimension(const char *base) const { + return _dimension.asStringRef(base); + } + vespalib::stringref getLabel(const char *base) const { + return _label.asStringRef(base); + } + }; + std::vector<char> _elementStrings; // unsorted dimensions + std::vector<ElementRef> _elements; // unsorted dimensions + + ElementStringRef + append(vespalib::stringref str) + { + const char *cstr = str.c_str(); + uint32_t start = _elementStrings.size(); + _elementStrings.insert(_elementStrings.end(), + cstr, cstr + str.size() + 1); + return ElementStringRef(start, str.size()); + } + +public: + SparseTensorUnsortedAddressBuilder(); + bool empty() const { return _elementStrings.empty(); } + void add(vespalib::stringref dimension, vespalib::stringref label) + { + _elements.emplace_back(append(dimension), append(label)); + } + /* + * Sort the stored tensor address and pass it over to a strict + * tensor address builder in sorted order. + */ + void buildTo(SparseTensorAddressBuilder &builder, + const eval::ValueType &type); + void clear() { _elementStrings.clear(); _elements.clear(); } +}; + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor.cpp b/eval/src/vespa/eval/tensor/tensor.cpp new file mode 100644 index 00000000000..11ef2b0ad00 --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor.cpp @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "tensor.h" +#include <sstream> +#include "default_tensor_engine.h" + +namespace vespalib { +namespace tensor { + +Tensor::Tensor() + : eval::Tensor(DefaultTensorEngine::ref()) +{ +} + +std::ostream & +operator<<(std::ostream &out, const Tensor &value) +{ + value.print(out); + return out; +} + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor.h b/eval/src/vespa/eval/tensor/tensor.h new file mode 100644 index 00000000000..9e4f4a9bff0 --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor.h @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "cell_function.h" +#include "tensor_address.h" +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/eval/tensor.h> +#include <vespa/vespalib/eval/tensor_spec.h> +#include <vespa/vespalib/eval/value_type.h> + +namespace vespalib { +namespace eval { class BinaryOperation; } +namespace tensor { + +class TensorVisitor; + +/** + * Interface for operations on a tensor (sparse multi-dimensional array). + * + * A sparse tensor is a set of cells containing scalar values. + * Each cell is identified by its address, which consists of a set of dimension -> label pairs, + * where both dimension and label is a string on the form of an identifier or integer. + */ +struct Tensor : public eval::Tensor +{ + typedef std::unique_ptr<Tensor> UP; + typedef std::reference_wrapper<const Tensor> CREF; + + Tensor(); + virtual ~Tensor() {} + virtual eval::ValueType getType() const = 0; + virtual double sum() const = 0; + virtual Tensor::UP add(const Tensor &arg) const = 0; + virtual Tensor::UP subtract(const Tensor &arg) const = 0; + virtual Tensor::UP multiply(const Tensor &arg) const = 0; + virtual Tensor::UP min(const Tensor &arg) const = 0; + virtual Tensor::UP max(const Tensor &arg) const = 0; + virtual Tensor::UP match(const Tensor &arg) const = 0; + virtual Tensor::UP apply(const CellFunction &func) const = 0; + virtual Tensor::UP sum(const vespalib::string &dimension) const = 0; + virtual Tensor::UP apply(const eval::BinaryOperation &op, + const Tensor &arg) const = 0; + virtual Tensor::UP reduce(const eval::BinaryOperation &op, + const std::vector<vespalib::string> &dimensions) + const = 0; + virtual bool equals(const Tensor &arg) const = 0; + virtual void print(std::ostream &out) const = 0; + virtual vespalib::string toString() const = 0; + virtual Tensor::UP clone() const = 0; + virtual eval::TensorSpec toSpec() const = 0; + virtual void accept(TensorVisitor &visitor) const = 0; +}; + +std::ostream &operator<<(std::ostream &out, const Tensor &value); + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_address.cpp b/eval/src/vespa/eval/tensor/tensor_address.cpp new file mode 100644 index 00000000000..10bd1bc082a --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_address.cpp @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "tensor_address.h" +#include <algorithm> +#include <ostream> + +namespace vespalib { +namespace tensor { + +const vespalib::string TensorAddress::Element::UNDEFINED_LABEL = "(undefined)"; + +TensorAddress::TensorAddress() + : _elements() +{ +} + +TensorAddress::TensorAddress(const Elements &elements_in) + : _elements(elements_in) +{ + std::sort(_elements.begin(), _elements.end()); +} + +bool +TensorAddress::hasDimension(const vespalib::string &dimension) const +{ + for (const auto &elem : _elements) { + if (elem.dimension() == dimension) { + return true; + } + } + return false; +} + +bool +TensorAddress::operator<(const TensorAddress &rhs) const +{ + if (_elements.size() == rhs._elements.size()) { + for (size_t i = 0; i < _elements.size(); ++i) { + if (_elements[i] != rhs._elements[i]) { + return _elements[i] < rhs._elements[i]; + } + } + } + return _elements.size() < rhs._elements.size(); +} + +bool +TensorAddress::operator==(const TensorAddress &rhs) const +{ + return _elements == rhs._elements; +} + +size_t +TensorAddress::hash() const +{ + size_t hashCode = 1; + for (const auto &elem : _elements) { + hashCode = 31 * hashCode + elem.hash(); + } + return hashCode; +} + +std::ostream & +operator<<(std::ostream &out, const TensorAddress::Elements &elements) +{ + out << "{"; + bool first = true; + for (const auto &elem : elements) { + if (!first) { + out << ","; + } + out << elem.dimension() << ":" << elem.label(); + first = false; + } + out << "}"; + return out; +} + +std::ostream & +operator<<(std::ostream &out, const TensorAddress &value) +{ + out << value.elements(); + return out; +} + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_address.h b/eval/src/vespa/eval/tensor/tensor_address.h new file mode 100644 index 00000000000..07f6d5b88ea --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_address.h @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/hash_fun.h> +#include <vespa/vespalib/stllike/string.h> +#include <iosfwd> +#include <map> +#include <vector> + +namespace vespalib { +namespace tensor { + +/** + * A sparse immutable address to a tensor cell. + * + * Only dimensions which have a different label than "undefined" are explicitly included. + * Tensor addresses are ordered by increasing size primarily, + * and by the natural order of the elements in sorted order secondarily. + */ +class TensorAddress +{ +public: + typedef std::unique_ptr<TensorAddress> UP; + + class Element + { + private: + vespalib::string _dimension; + vespalib::string _label; + + public: + static const vespalib::string UNDEFINED_LABEL; + Element(const vespalib::string &dimension_in, const vespalib::string &label_in) + : _dimension(dimension_in), _label(label_in) + {} + const vespalib::string &dimension() const { return _dimension; } + const vespalib::string &label() const { return _label; } + bool operator<(const Element &rhs) const { + if (_dimension == rhs._dimension) { + // Define sort order when dimension is the same to be able + // to do set operations over element vectors. + return _label < rhs._label; + } + return _dimension < rhs._dimension; + } + bool operator==(const Element &rhs) const { + return (_dimension == rhs._dimension) && (_label == rhs._label); + } + bool operator!=(const Element &rhs) const { + return !(*this == rhs); + } + size_t hash() const { + return hashValue(_dimension.c_str()) + hashValue(_label.c_str()); + } + }; + + typedef std::vector<Element> Elements; + +private: + Elements _elements; + +public: + TensorAddress(); + explicit TensorAddress(const Elements &elements_in); + explicit TensorAddress(Elements &&elements_in) + : _elements(std::move(elements_in)) + { + } + const Elements &elements() const { return _elements; } + bool hasDimension(const vespalib::string &dimension) const; + bool operator<(const TensorAddress &rhs) const; + bool operator==(const TensorAddress &rhs) const; + size_t hash() const; +}; + +std::ostream &operator<<(std::ostream &out, const TensorAddress::Elements &elements); +std::ostream &operator<<(std::ostream &out, const TensorAddress &value); + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_address_builder.h b/eval/src/vespa/eval/tensor/tensor_address_builder.h new file mode 100644 index 00000000000..46ce3088528 --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_address_builder.h @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "tensor_address.h" + +namespace vespalib { +namespace tensor { + + +/** + * A builder for tensor addresses. + */ +class TensorAddressBuilder +{ + TensorAddress::Elements _elements; +public: + TensorAddressBuilder() + : _elements() + { + } + void add(vespalib::stringref dimension, vespalib::stringref label) { + _elements.emplace_back(dimension, label); + } + TensorAddress build() { return TensorAddress(_elements); } + void clear(void) { _elements.clear(); } +}; + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_address_element_iterator.h b/eval/src/vespa/eval/tensor/tensor_address_element_iterator.h new file mode 100644 index 00000000000..a250331de5f --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_address_element_iterator.h @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace vespalib { +namespace tensor { + +using DimensionsSet = vespalib::hash_set<vespalib::stringref>; + +/** + * An iterator for tensor address elements used to simplify 3-way merge + * between two tensor addresses and a dimension vector. + */ +template <class Address> +class TensorAddressElementIterator { + using InnerIterator = typename Address::Elements::const_iterator; + InnerIterator _itr; + InnerIterator _itrEnd; +public: + TensorAddressElementIterator(const Address &address) + : _itr(address.elements().cbegin()), + _itrEnd(address.elements().cend()) + { + } + bool valid() const { return (_itr != _itrEnd); } + vespalib::stringref dimension() const { return _itr->dimension(); } + vespalib::stringref label() const { return _itr->label(); } + void next() { ++_itr; } + bool skipToDimension(vespalib::stringref rhsDimension) { + for (;;) { + if (!valid()) { + return false; + } + if (dimension() < rhsDimension) { + next(); + } else { + return (dimension() == rhsDimension); + } + } + } +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_apply.cpp b/eval/src/vespa/eval/tensor/tensor_apply.cpp new file mode 100644 index 00000000000..8384d997122 --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_apply.cpp @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "tensor_apply.h" + +namespace vespalib { +namespace tensor { + +template <class TensorT> +TensorApply<TensorT>::TensorApply(const TensorImplType &tensor, + const CellFunction &func) + : Parent(tensor.type()) +{ + for (const auto &cell : tensor.cells()) { + _builder.insertCell(cell.first, func.apply(cell.second)); + } +} + +template class TensorApply<SparseTensor>; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_apply.h b/eval/src/vespa/eval/tensor/tensor_apply.h new file mode 100644 index 00000000000..52be67ed30c --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_apply.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "cell_function.h" +#include "tensor_operation.h" + +namespace vespalib { +namespace tensor { + +/** + * Returns a tensor with the given function applied to all cells in the input tensor. + */ +template <class TensorT> +class TensorApply : public TensorOperation<TensorT> +{ +public: + using Parent = TensorOperation<TensorT>; + using typename Parent::TensorImplType; + using Parent::_builder; + TensorApply(const TensorImplType &tensor, const CellFunction &func); +}; + +extern template class TensorApply<SparseTensor>; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_builder.h b/eval/src/vespa/eval/tensor/tensor_builder.h new file mode 100644 index 00000000000..2b97d09a1e6 --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_builder.h @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> + +namespace vespalib { +namespace tensor { + +class Tensor; + +/** + * An interfrace for builder of tensors (sparse multi-dimensional array). + * + * A sparse tensor is a set of cells containing scalar values. Each + * cell is identified by its address, which consists of a set of + * dimension -> label pairs, where both dimension and label is a + * string on the form of an identifier or integer. + */ +class TensorBuilder +{ +public: + using Dimension = uint32_t; + virtual ~TensorBuilder() { } + + virtual Dimension define_dimension(const vespalib::string &dimension) = 0; + virtual TensorBuilder & + add_label(Dimension dimension, const vespalib::string &label) = 0; + virtual TensorBuilder &add_cell(double value) = 0; + virtual std::unique_ptr<Tensor> build() = 0; +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_factory.cpp b/eval/src/vespa/eval/tensor/tensor_factory.cpp new file mode 100644 index 00000000000..f8496180f7f --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_factory.cpp @@ -0,0 +1,76 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "tensor.h" +#include "tensor_factory.h" +#include "tensor_builder.h" +#include <vespa/vespalib/tensor/dense/dense_tensor_builder.h> + +namespace vespalib { +namespace tensor { + +std::unique_ptr<Tensor> +TensorFactory::create(const TensorCells &cells, + TensorBuilder &builder) { + for (const auto &cell : cells) { + for (const auto &addressElem : cell.first) { + const auto &dimension = addressElem.first; + builder.define_dimension(dimension); + } + } + for (const auto &cell : cells) { + for (const auto &addressElem : cell.first) { + const auto &dimension = addressElem.first; + const auto &label = addressElem.second; + builder.add_label(builder.define_dimension(dimension), label); + } + builder.add_cell(cell.second); + } + return builder.build(); +} + + +std::unique_ptr<Tensor> +TensorFactory::create(const TensorCells &cells, + const TensorDimensions &dimensions, + TensorBuilder &builder) { + for (const auto &dimension : dimensions) { + builder.define_dimension(dimension); + } + return create(cells, builder); +} + + +std::unique_ptr<Tensor> +TensorFactory::createDense(const DenseTensorCells &cells) +{ + std::map<std::string, size_t> dimensionSizes; + DenseTensorBuilder builder; + for (const auto &cell : cells) { + for (const auto &addressElem : cell.first) { + dimensionSizes[addressElem.first] = + std::max(dimensionSizes[addressElem.first], + (addressElem.second + 1)); + } + } + std::map<std::string, + typename DenseTensorBuilder::Dimension> dimensionEnums; + for (const auto &dimensionElem : dimensionSizes) { + dimensionEnums[dimensionElem.first] = + builder.defineDimension(dimensionElem.first, + dimensionElem.second); + } + for (const auto &cell : cells) { + for (const auto &addressElem : cell.first) { + const auto &dimension = addressElem.first; + size_t label = addressElem.second; + builder.addLabel(dimensionEnums[dimension], label); + } + builder.addCell(cell.second); + } + return builder.build(); +} + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_factory.h b/eval/src/vespa/eval/tensor/tensor_factory.h new file mode 100644 index 00000000000..db66cf7bedf --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_factory.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "types.h" + +namespace vespalib { +namespace tensor { + + +class Tensor; +class TensorBuilder; + +/** + * A factory for creating tensors based on stl structures (TensorCells and + * TensorDimensions) in unit tests. + */ +class TensorFactory { +public: + static std::unique_ptr<Tensor> + create(const TensorCells &cells, TensorBuilder &builder); + static std::unique_ptr<Tensor> + create(const TensorCells &cells, const TensorDimensions &dimensions, + TensorBuilder &builder); + static std::unique_ptr<Tensor> + createDense(const DenseTensorCells &cells); +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_mapper.cpp b/eval/src/vespa/eval/tensor/tensor_mapper.cpp new file mode 100644 index 00000000000..f8a1f99cb5b --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_mapper.cpp @@ -0,0 +1,336 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "tensor_mapper.h" +#include "tensor.h" +#include "tensor_visitor.h" +#include <vespa/vespalib/tensor/sparse/direct_sparse_tensor_builder.h> +#include <vespa/vespalib/tensor/dense/dense_tensor.h> +#include "tensor_address_element_iterator.h" +#include "default_tensor.h" + +using vespalib::eval::ValueType; + +namespace vespalib { +namespace tensor { + +namespace { + +template <class TensorT> +class SparseTensorMapper : public TensorVisitor +{ + using Builder = DirectTensorBuilder<TensorT>; + using AddressBuilderType = typename Builder::AddressBuilderType; + + Builder _builder; + AddressBuilderType _addressBuilder; + + void mapAddress(const TensorAddress &address); + virtual void visit(const TensorAddress &address, double value) override; + + SparseTensorMapper(const ValueType &type); + + ~SparseTensorMapper(); + + std::unique_ptr<Tensor> build(); +public: + static std::unique_ptr<Tensor> + map(const Tensor &tensor, const ValueType &type); +}; + +template <class TensorT> +SparseTensorMapper<TensorT>:: +SparseTensorMapper(const ValueType &type) + : TensorVisitor(), + _builder(type), + _addressBuilder() +{ +} + +template <class TensorT> +SparseTensorMapper<TensorT>::~SparseTensorMapper() +{ +} + +template <class TensorT> +std::unique_ptr<Tensor> +SparseTensorMapper<TensorT>::build() +{ + return _builder.build(); +} + +template <> +void +SparseTensorMapper<SparseTensor>:: +mapAddress(const TensorAddress &address) +{ + _addressBuilder.clear(); + TensorAddressElementIterator<TensorAddress> addressIterator(address); + for (const auto &dimension : _builder.type().dimensions()) { + if (addressIterator.skipToDimension(dimension.name)) { + _addressBuilder.add(addressIterator.label()); + addressIterator.next(); + } else { + // output dimension not in input + _addressBuilder.addUndefined(); + } + } +} + +template <class TensorT> +void +SparseTensorMapper<TensorT>::visit(const TensorAddress &address, double value) +{ + mapAddress(address); + _builder.insertCell(_addressBuilder, value, + [](double oldValue, double newValue) + { return oldValue + newValue; }); +} + +template <class TensorT> +std::unique_ptr<Tensor> +SparseTensorMapper<TensorT>::map(const Tensor &tensor, + const ValueType &type) +{ + SparseTensorMapper<TensorT> mapper(type); + tensor.accept(mapper); + return mapper.build(); +} + +static constexpr uint32_t BAD_LABEL = std::numeric_limits<uint32_t>::max(); +static constexpr uint32_t BAD_ADDRESS = std::numeric_limits<uint32_t>::max(); + +uint32_t mapLabelToNumber(vespalib::stringref label) { + uint32_t result = 0; + for (char c : label) { + if (c < '0' || c > '9') { + return BAD_LABEL; // bad char + } + result = result * 10 + (c - '0'); + if (result > 100000000) { + return BAD_LABEL; // overflow + } + } + return result; +} + +class DenseTensorTypeMapper : public TensorVisitor +{ + ValueType _type; + std::vector<ValueType::Dimension> _dimensions; + + bool addressOK(const TensorAddress &address); + void expandUnboundDimensions(const TensorAddress &address); + + virtual void visit(const TensorAddress &address, double value) override; + + DenseTensorTypeMapper(const ValueType &type); + ~DenseTensorTypeMapper(); + + ValueType build(); +public: + static ValueType map(const Tensor &tensor, const ValueType &type); +}; + +bool +DenseTensorTypeMapper::addressOK(const TensorAddress &address) +{ + TensorAddressElementIterator<TensorAddress> addressIterator(address); + auto dimIterator = _dimensions.begin(); + for (const auto &dimension : _type.dimensions()) { + if (addressIterator.skipToDimension(dimension.name)) { + uint32_t label = mapLabelToNumber(addressIterator.label()); + if (label == BAD_LABEL || + (dimension.is_bound() && label >= dimIterator->size)) { + return false; + } + addressIterator.next(); + } + ++dimIterator; + } + assert(dimIterator == _dimensions.end()); + return true; +} + + +void +DenseTensorTypeMapper::expandUnboundDimensions(const TensorAddress &address) +{ + TensorAddressElementIterator<TensorAddress> addressIterator(address); + auto dimIterator = _dimensions.begin(); + for (const auto &dimension : _type.dimensions()) { + if (addressIterator.skipToDimension(dimension.name)) { + uint32_t label = mapLabelToNumber(addressIterator.label()); + if (label != BAD_LABEL && + !dimension.is_bound() && + label >= dimIterator->size) { + dimIterator->size = label + 1; + } + addressIterator.next(); + } + ++dimIterator; + } + assert(dimIterator == _dimensions.end()); +} + +void +DenseTensorTypeMapper::visit(const TensorAddress &address, double value) +{ + (void) value; + if (addressOK(address)) { + expandUnboundDimensions(address); + } +} + +DenseTensorTypeMapper::DenseTensorTypeMapper(const ValueType &type) + : _type(type), + _dimensions(type.dimensions()) +{ + for (auto &dimension : _dimensions) { + if (!dimension.is_bound()) + dimension.size = 1; + } +} + +DenseTensorTypeMapper::~DenseTensorTypeMapper() +{ +} + +ValueType +DenseTensorTypeMapper::build() +{ + return ValueType::tensor_type(std::move(_dimensions)); +} + +ValueType +DenseTensorTypeMapper::map(const Tensor &tensor, const ValueType &type) +{ + DenseTensorTypeMapper mapper(type); + tensor.accept(mapper); + return mapper.build(); +} + +class DenseTensorMapper : public TensorVisitor +{ + eval::ValueType _type; + DenseTensor::Cells _cells; + + uint32_t mapAddressToIndex(const TensorAddress &address); + virtual void visit(const TensorAddress &address, double value) override; + + DenseTensorMapper(const ValueType &type); + ~DenseTensorMapper(); + + std::unique_ptr<Tensor> build(); +public: + static std::unique_ptr<Tensor> + map(const Tensor &tensor, const ValueType &type); +}; + +DenseTensorMapper::DenseTensorMapper(const ValueType &type) + : _type(type), + _cells() +{ + size_t size = 1; + for (const auto &dimension : type.dimensions()) { + size *= dimension.size; + } + _cells.resize(size); +} + +DenseTensorMapper::~DenseTensorMapper() +{ +} + +std::unique_ptr<Tensor> +DenseTensorMapper::build() +{ + return std::make_unique<DenseTensor>(std::move(_type), + std::move(_cells)); +} + +uint32_t +DenseTensorMapper::mapAddressToIndex(const TensorAddress &address) +{ + uint32_t idx = 0; + TensorAddressElementIterator<TensorAddress> addressIterator(address); + for (const auto &dimension : _type.dimensions()) { + if (addressIterator.skipToDimension(dimension.name)) { + uint32_t label = mapLabelToNumber(addressIterator.label()); + if (label == BAD_LABEL || label >= dimension.size) { + return BAD_ADDRESS; + } + idx = idx * dimension.size + label; + addressIterator.next(); + } else { + // output dimension not in input + idx = idx * dimension.size; + } + } + return idx; +} + +void +DenseTensorMapper::visit(const TensorAddress &address, double value) +{ + uint32_t idx = mapAddressToIndex(address); + if (idx != BAD_ADDRESS) { + assert(idx < _cells.size()); + _cells[idx] += value; + } +} + +std::unique_ptr<Tensor> +DenseTensorMapper::map(const Tensor &tensor, const ValueType &type) +{ + DenseTensorMapper mapper(type.is_abstract() ? + DenseTensorTypeMapper::map(tensor, type) : + type); + tensor.accept(mapper); + return mapper.build(); +} + +} // namespace vespalib::tensor::<anonymous> + +TensorMapper::TensorMapper(const ValueType &type) + : _type(type) +{ +} + +TensorMapper::~TensorMapper() +{ +} + +template <typename TensorT> +std::unique_ptr<Tensor> +TensorMapper::mapToSparse(const Tensor &tensor, const ValueType &type) +{ + assert(type.is_sparse()); + return SparseTensorMapper<TensorT>::map(tensor, type); +} + +std::unique_ptr<Tensor> +TensorMapper::mapToDense(const Tensor &tensor, const ValueType &type) +{ + assert(type.is_dense()); + return DenseTensorMapper::map(tensor, type); +} + +std::unique_ptr<Tensor> +TensorMapper::map(const Tensor &tensor) const +{ + if (_type.is_sparse()) { + return mapToSparse<DefaultTensor::type>(tensor, _type); + } else if (_type.is_dense()) { + return mapToDense(tensor, _type); + } else { + return std::unique_ptr<Tensor>(); + } +} + +template +std::unique_ptr<Tensor> +TensorMapper::mapToSparse<SparseTensor>(const Tensor &tensor, + const ValueType &type); + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_mapper.h b/eval/src/vespa/eval/tensor/tensor_mapper.h new file mode 100644 index 00000000000..e0394fbd3f7 --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_mapper.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/eval/value_type.h> + +namespace vespalib { +namespace tensor { + +class Tensor; + +/** + * Class to map a tensor to a given tensor type. Dimensions in input + * tensor not present in tensor type are ignored. Dimensions in tensor + * type not present in input tensor gets default label (undefined + * (empty string) for sparse tensors, 0 for dense tensors). Values are + * accumulated for identical mapped addresses. + * + * Dense tensor type has further restrictions: label must contain only + * numerical digits (0-9). Empty string equals 0. If the label is + * parsed to a value outside the dimension range or the parsing fails, + * then the cell ((address, value) pair) is ignored. + */ +class TensorMapper +{ + eval::ValueType _type; +public: + TensorMapper(const eval::ValueType &type); + ~TensorMapper(); + + template <typename TensorT> + static std::unique_ptr<Tensor> + mapToSparse(const Tensor &tensor, const eval::ValueType &type); + + static std::unique_ptr<Tensor> + mapToDense(const Tensor &tensor, const eval::ValueType &type); + + std::unique_ptr<Tensor> map(const Tensor &tensor) const; +}; + + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_operation.h b/eval/src/vespa/eval/tensor/tensor_operation.h new file mode 100644 index 00000000000..c4fc88f3b5e --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_operation.h @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "direct_tensor_builder.h" +#include <vespa/vespalib/tensor/sparse/direct_sparse_tensor_builder.h> + +namespace vespalib { +namespace tensor { + +/** + * Base class for an operation over tensors. + */ +template <class TensorT> +class TensorOperation +{ +public: + using TensorImplType = TensorT; + using MyTensorBuilder = DirectTensorBuilder<TensorT>; + using Cells = typename TensorImplType::Cells; + using AddressBuilderType = typename MyTensorBuilder::AddressBuilderType; + using AddressRefType = typename MyTensorBuilder::AddressRefType; +protected: + MyTensorBuilder _builder; + eval::ValueType &_type; + Cells &_cells; + +public: + TensorOperation() + : _builder(), + _type(_builder.type()), + _cells(_builder.cells()) + {} + TensorOperation(const eval::ValueType &type) + : _builder(type), + _type(_builder.type()), + _cells(_builder.cells()) + {} + TensorOperation(const eval::ValueType &type, const Cells &cells) + : _builder(type, cells), + _type(_builder.type()), + _cells(_builder.cells()) + {} + Tensor::UP result() { + return _builder.build(); + } +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/tensor_visitor.h b/eval/src/vespa/eval/tensor/tensor_visitor.h new file mode 100644 index 00000000000..11aa8a8dad8 --- /dev/null +++ b/eval/src/vespa/eval/tensor/tensor_visitor.h @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "tensor_address.h" +#include <vespa/vespalib/stllike/string.h> +#include "types.h" + +namespace vespalib { +namespace tensor { + +/** + * Class for visiting a tensor. First visit must specify dimensions, + * remaining visits must specify tensor addresses and values. + */ +class TensorVisitor +{ +public: + virtual ~TensorVisitor() {} + virtual void visit(const TensorAddress &address, double value) = 0; +}; + +} // namespace vespalib::tensor +} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/types.h b/eval/src/vespa/eval/tensor/types.h new file mode 100644 index 00000000000..7bdb37b8ac2 --- /dev/null +++ b/eval/src/vespa/eval/tensor/types.h @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <map> +#include <vespa/vespalib/stllike/hash_set.h> + +namespace vespalib { +namespace tensor { + +using TensorCells = std::map<std::map<vespalib::string, vespalib::string>, double>; +using TensorDimensions = std::vector<vespalib::string>; +using TensorDimensionsSet = vespalib::hash_set<vespalib::string>; +using DenseTensorCells = std::map<std::map<vespalib::string, size_t>, double>; + +} // namespace vespalib::tensor +} // namespace vespalib |