diff options
author | Håvard Pettersen <havardpe@oath.com> | 2020-04-08 09:57:14 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2020-04-30 10:03:56 +0000 |
commit | c6d0163069974aea86fbec8e43f55a21f12b5844 (patch) | |
tree | f0fb88835fa814a625dfb42a0a18c6b89f45117c | |
parent | a5e6a9170120c5efa9dd620d35eba5377581fbb1 (diff) |
lambda peek optimizer
17 files changed, 881 insertions, 30 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 8eb198a9a0b..bb13638cf1d 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -18,6 +18,7 @@ vespa_define_module( src/tests/eval/function_speed src/tests/eval/gbdt src/tests/eval/interpreted_function + src/tests/eval/node_tools src/tests/eval/node_types src/tests/eval/param_usage src/tests/eval/simple_tensor diff --git a/eval/src/tests/eval/node_tools/CMakeLists.txt b/eval/src/tests/eval/node_tools/CMakeLists.txt new file mode 100644 index 00000000000..437aea3ac8f --- /dev/null +++ b/eval/src/tests/eval/node_tools/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_node_tools_test_app TEST + SOURCES + node_tools_test.cpp + DEPENDS + vespaeval +) +vespa_add_test(NAME eval_node_tools_test_app COMMAND eval_node_tools_test_app) diff --git a/eval/src/tests/eval/node_tools/node_tools_test.cpp b/eval/src/tests/eval/node_tools/node_tools_test.cpp new file mode 100644 index 00000000000..ca89650127e --- /dev/null +++ b/eval/src/tests/eval/node_tools/node_tools_test.cpp @@ -0,0 +1,120 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/eval/eval/function.h> +#include <vespa/eval/eval/node_tools.h> + +using namespace vespalib::eval; + +auto make_copy(const Function &fun) { + std::vector<vespalib::string> params; + for (size_t i = 0; i < fun.num_params(); ++i) { + params.push_back(fun.param_name(i)); + } + return Function::create(NodeTools::copy(fun.root()), params); +} + +void verify_copy(const vespalib::string &expr, const vespalib::string &expect) { + auto fun = Function::parse(expr); + auto fun_copy = make_copy(*fun); + EXPECT_EQUAL(fun_copy->dump(), expect); +} +void verify_copy(const vespalib::string &expr) { verify_copy(expr, expr); } + +TEST("require that required parameter count can be detected") { + auto function = Function::parse({"a","b","c"}, "(c+a)+(b+1)"); + const auto &root = function->root(); + ASSERT_EQUAL(root.num_children(), 2u); + const auto &n_c_a = root.get_child(0); + const auto &n_b_1 = root.get_child(1); + ASSERT_EQUAL(n_c_a.num_children(), 2u); + const auto &n_c = n_c_a.get_child(0); + const auto &n_a = n_c_a.get_child(1); + ASSERT_EQUAL(n_b_1.num_children(), 2u); + const auto &n_b = n_b_1.get_child(0); + const auto &n_1 = n_b_1.get_child(1); + EXPECT_EQUAL(NodeTools::min_num_params(root), 3u); + EXPECT_EQUAL(NodeTools::min_num_params(n_c_a), 3u); + EXPECT_EQUAL(NodeTools::min_num_params(n_b_1), 2u); + EXPECT_EQUAL(NodeTools::min_num_params(n_c), 3u); + EXPECT_EQUAL(NodeTools::min_num_params(n_a), 1u); + EXPECT_EQUAL(NodeTools::min_num_params(n_b), 2u); + EXPECT_EQUAL(NodeTools::min_num_params(n_1), 0u); +} + +TEST("require that basic node types can be copied") { + TEST_DO(verify_copy("123")); + TEST_DO(verify_copy("foo")); + TEST_DO(verify_copy("\"string value\"")); + TEST_DO(verify_copy("(a in [1,\"2\",3])")); + TEST_DO(verify_copy("(-a)")); + TEST_DO(verify_copy("(!a)")); + TEST_DO(verify_copy("if(a,b,c)")); + TEST_DO(verify_copy("if(a,b,c,0.7)")); + TEST_DO(verify_copy("#", "[]...[missing value]...[#]")); +} + +TEST("require that operator node types can be copied") { + TEST_DO(verify_copy("(a+b)")); + TEST_DO(verify_copy("(a-b)")); + TEST_DO(verify_copy("(a*b)")); + TEST_DO(verify_copy("(a/b)")); + TEST_DO(verify_copy("(a%b)")); + TEST_DO(verify_copy("(a^b)")); + TEST_DO(verify_copy("(a==b)")); + TEST_DO(verify_copy("(a!=b)")); + TEST_DO(verify_copy("(a~=b)")); + TEST_DO(verify_copy("(a<b)")); + TEST_DO(verify_copy("(a<=b)")); + TEST_DO(verify_copy("(a>b)")); + TEST_DO(verify_copy("(a>=b)")); + TEST_DO(verify_copy("(a&&b)")); + TEST_DO(verify_copy("(a||b)")); +} + +TEST("require that call node types can be copied") { + TEST_DO(verify_copy("cos(a)")); + TEST_DO(verify_copy("sin(a)")); + TEST_DO(verify_copy("tan(a)")); + TEST_DO(verify_copy("cosh(a)")); + TEST_DO(verify_copy("sinh(a)")); + TEST_DO(verify_copy("tanh(a)")); + TEST_DO(verify_copy("acos(a)")); + TEST_DO(verify_copy("asin(a)")); + TEST_DO(verify_copy("atan(a)")); + TEST_DO(verify_copy("exp(a)")); + TEST_DO(verify_copy("log10(a)")); + TEST_DO(verify_copy("log(a)")); + TEST_DO(verify_copy("sqrt(a)")); + TEST_DO(verify_copy("ceil(a)")); + TEST_DO(verify_copy("fabs(a)")); + TEST_DO(verify_copy("floor(a)")); + TEST_DO(verify_copy("atan2(a,b)")); + TEST_DO(verify_copy("ldexp(a,b)")); + TEST_DO(verify_copy("pow(a,b)")); + TEST_DO(verify_copy("fmod(a,b)")); + TEST_DO(verify_copy("min(a,b)")); + TEST_DO(verify_copy("max(a,b)")); + TEST_DO(verify_copy("isNan(a)")); + TEST_DO(verify_copy("relu(a)")); + TEST_DO(verify_copy("sigmoid(a)")); + TEST_DO(verify_copy("elu(a)")); +} + +TEST("require that tensor node types can NOT be copied (yet)") { + TEST_DO(verify_copy("map(a,f(x)(x))", "not implemented")); + TEST_DO(verify_copy("join(a,b,f(x,y)(x*y))", "not implemented")); + TEST_DO(verify_copy("merge(a,b,f(x,y)(y))", "not implemented")); + TEST_DO(verify_copy("reduce(a,sum)", "not implemented")); + TEST_DO(verify_copy("rename(a,x,y)", "not implemented")); + TEST_DO(verify_copy("concat(a,b,x)", "not implemented")); + TEST_DO(verify_copy("tensor(x[3]):[1,2,3]", "not implemented")); + TEST_DO(verify_copy("tensor(x[3])(x)", "not implemented")); + TEST_DO(verify_copy("a{x:0}", "not implemented")); +} + +TEST("require that nested expressions can be copied") { + TEST_DO(verify_copy("min(a,if(((b+3)==7),(!c),(d+7)))")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/tests/eval/tensor_lambda/tensor_lambda_test.cpp b/eval/src/tests/eval/tensor_lambda/tensor_lambda_test.cpp index 5b0f2cf0a7e..13f2dc1041d 100644 --- a/eval/src/tests/eval/tensor_lambda/tensor_lambda_test.cpp +++ b/eval/src/tests/eval/tensor_lambda/tensor_lambda_test.cpp @@ -6,6 +6,8 @@ #include <vespa/eval/eval/simple_tensor_engine.h> #include <vespa/eval/tensor/default_tensor_engine.h> #include <vespa/eval/tensor/dense/dense_replace_type_function.h> +#include <vespa/eval/tensor/dense/dense_cell_range_function.h> +#include <vespa/eval/tensor/dense/dense_lambda_peek_function.h> #include <vespa/eval/tensor/dense/dense_fast_rename_optimizer.h> #include <vespa/eval/tensor/dense/dense_tensor.h> #include <vespa/eval/eval/test/tensor_model.hpp> @@ -27,42 +29,81 @@ EvalFixture::ParamRepo make_params() { return EvalFixture::ParamRepo() .add("a", spec(1)) .add("x3", spec({x(3)}, N())) - .add("x3f", spec(float_cells({x(3)}), N())); + .add("x3f", spec(float_cells({x(3)}), N())) + .add("x3m", spec({x({"0", "1", "2"})}, N())) + .add("x3y5", spec({x(3), y(5)}, N())) + .add("x3y5f", spec(float_cells({x(3), y(5)}), N())) + .add("x15", spec({x(15)}, N())) + .add("x15f", spec(float_cells({x(15)}), N())); } EvalFixture::ParamRepo param_repo = make_params(); -void verify_dynamic(const vespalib::string &expr, const vespalib::string &expect) { +template <typename T, typename F> +void verify_impl(const vespalib::string &expr, const vespalib::string &expect, F &&inspect) { EvalFixture fixture(prod_engine, expr, param_repo, true); + EvalFixture slow_fixture(prod_engine, expr, param_repo, false); + EXPECT_EQUAL(fixture.result(), slow_fixture.result()); EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo)); EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expect, param_repo)); - auto info = fixture.find_all<Lambda>(); - EXPECT_EQUAL(info.size(), 1u); + auto info = fixture.find_all<T>(); + if (EXPECT_EQUAL(info.size(), 1u)) { + inspect(info[0]); + } +} +template <typename T> +void verify_impl(const vespalib::string &expr, const vespalib::string &expect) { + verify_impl<T>(expr, expect, [](const T*){}); +} + +void verify_generic(const vespalib::string &expr, const vespalib::string &expect) { + verify_impl<Lambda>(expr, expect); +} + +void verify_reshape(const vespalib::string &expr, const vespalib::string &expect) { + verify_impl<DenseReplaceTypeFunction>(expr, expect); +} + +void verify_range(const vespalib::string &expr, const vespalib::string &expect) { + verify_impl<DenseCellRangeFunction>(expr, expect); +} + +void verify_compiled(const vespalib::string &expr, const vespalib::string &expect, + const vespalib::string &expect_idx_fun) +{ + verify_impl<DenseLambdaPeekFunction>(expr, expect, + [&](const DenseLambdaPeekFunction *info) + { + EXPECT_EQUAL(info->idx_fun_dump(), expect_idx_fun); + }); } void verify_const(const vespalib::string &expr, const vespalib::string &expect) { - EvalFixture fixture(prod_engine, expr, param_repo, true); - EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo)); - EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expect, param_repo)); - auto info = fixture.find_all<ConstValue>(); - EXPECT_EQUAL(info.size(), 1u); + verify_impl<ConstValue>(expr, expect); } +//----------------------------------------------------------------------------- + TEST("require that simple constant tensor lambda works") { TEST_DO(verify_const("tensor(x[3])(x+1)", "tensor(x[3]):[1,2,3]")); } TEST("require that simple dynamic tensor lambda works") { - TEST_DO(verify_dynamic("tensor(x[3])(x+a)", "tensor(x[3]):[1,2,3]")); + TEST_DO(verify_generic("tensor(x[3])(x+a)", "tensor(x[3]):[1,2,3]")); } TEST("require that tensor lambda can be used for tensor slicing") { - TEST_DO(verify_dynamic("tensor(x[2])(x3{x:(x+a)})", "tensor(x[2]):[2,3]")); - TEST_DO(verify_dynamic("tensor(x[2])(a+x3{x:(x)})", "tensor(x[2]):[2,3]")); + TEST_DO(verify_generic("tensor(x[2])(x3{x:(x+a)})", "tensor(x[2]):[2,3]")); + TEST_DO(verify_generic("tensor(x[2])(a+x3{x:(x)})", "tensor(x[2]):[2,3]")); } -TEST("require that tensor lambda can be used for tensor casting") { - TEST_DO(verify_dynamic("tensor(x[3])(x3f{x:(x)})", "tensor(x[3]):[1,2,3]")); - TEST_DO(verify_dynamic("tensor<float>(x[3])(x3{x:(x)})", "tensor<float>(x[3]):[1,2,3]")); +TEST("require that tensor lambda can be used for cell type casting") { + TEST_DO(verify_compiled("tensor(x[3])(x3f{x:(x)})", "tensor(x[3]):[1,2,3]", "f(x)(x)")); + TEST_DO(verify_compiled("tensor<float>(x[3])(x3{x:(x)})", "tensor<float>(x[3]):[1,2,3]", "f(x)(x)")); +} + +TEST("require that tensor lambda can be used to convert from sparse to dense tensors") { + TEST_DO(verify_generic("tensor(x[3])(x3m{x:(x)})", "tensor(x[3]):[1,2,3]")); + TEST_DO(verify_generic("tensor(x[2])(x3m{x:(x)})", "tensor(x[2]):[1,2]")); } TEST("require that constant nested tensor lambda using tensor peek works") { @@ -70,7 +111,44 @@ TEST("require that constant nested tensor lambda using tensor peek works") { } TEST("require that dynamic nested tensor lambda using tensor peek works") { - TEST_DO(verify_dynamic("tensor(x[2])(tensor(y[2])((x+y)+a){y:(x)})", "tensor(x[2]):[1,3]")); + TEST_DO(verify_generic("tensor(x[2])(tensor(y[2])((x+y)+a){y:(x)})", "tensor(x[2]):[1,3]")); +} + +TEST("require that tensor reshape is optimized") { + TEST_DO(verify_reshape("tensor(x[15])(x3y5{x:(x/5),y:(x%5)})", "x15")); + TEST_DO(verify_reshape("tensor(x[3],y[5])(x15{x:(x*5+y)})", "x3y5")); + TEST_DO(verify_reshape("tensor<float>(x[15])(x3y5f{x:(x/5),y:(x%5)})", "x15f")); +} + +TEST("require that tensor reshape with non-matching cell type requires cell copy") { + TEST_DO(verify_compiled("tensor(x[15])(x3y5f{x:(x/5),y:(x%5)})", "x15", "f(x)((floor((x/5))*5)+(x%5))")); + TEST_DO(verify_compiled("tensor<float>(x[15])(x3y5{x:(x/5),y:(x%5)})", "x15f", "f(x)((floor((x/5))*5)+(x%5))")); + TEST_DO(verify_compiled("tensor(x[3],y[5])(x15f{x:(x*5+y)})", "x3y5", "f(x,y)((x*5)+y)")); + TEST_DO(verify_compiled("tensor<float>(x[3],y[5])(x15{x:(x*5+y)})", "x3y5f", "f(x,y)((x*5)+y)")); +} + +TEST("require that tensor cell subrange view is optimized") { + TEST_DO(verify_range("tensor(y[5])(x3y5{x:1,y:(y)})", "x3y5{x:1}")); + TEST_DO(verify_range("tensor(x[3])(x15{x:(x+5)})", "tensor(x[3]):[6,7,8]")); +} + +TEST("require that tensor cell subrange with non-matching cell type requires cell copy") { + TEST_DO(verify_compiled("tensor(x[3])(x15f{x:(x+5)})", "tensor(x[3]):[6,7,8]", "f(x)(x+5)")); + TEST_DO(verify_compiled("tensor<float>(x[3])(x15{x:(x+5)})", "tensor<float>(x[3]):[6,7,8]", "f(x)(x+5)")); +} + +TEST("require that non-continuous cell extraction is optimized") { + TEST_DO(verify_compiled("tensor(x[3])(x3y5{x:(x),y:2})", "x3y5{y:2}", "f(x)((floor(x)*5)+2)")); + TEST_DO(verify_compiled("tensor(x[3])(x3y5f{x:(x),y:2})", "x3y5{y:2}", "f(x)((floor(x)*5)+2)")); + TEST_DO(verify_compiled("tensor<float>(x[3])(x3y5{x:(x),y:2})", "x3y5f{y:2}", "f(x)((floor(x)*5)+2)")); + TEST_DO(verify_compiled("tensor<float>(x[3])(x3y5f{x:(x),y:2})", "x3y5f{y:2}", "f(x)((floor(x)*5)+2)")); +} + +TEST("require that out-of-bounds cell extraction is not optimized") { + TEST_DO(verify_generic("tensor(x[3])(x3y5{x:1,y:(x+3)})", "tensor(x[3]):[9,10,0]")); + TEST_DO(verify_generic("tensor(x[3])(x3y5{x:1,y:(x-1)})", "tensor(x[3]):[0,6,7]")); + TEST_DO(verify_generic("tensor(x[3])(x3y5{x:(x+1),y:(x)})", "tensor(x[3]):[6,12,0]")); + TEST_DO(verify_generic("tensor(x[3])(x3y5{x:(x-1),y:(x)})", "tensor(x[3]):[0,2,8]")); } TEST("require that non-double result from inner tensor lambda function fails type resolving") { diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt index 20a7141393f..002a027c3a9 100644 --- a/eval/src/vespa/eval/eval/CMakeLists.txt +++ b/eval/src/vespa/eval/eval/CMakeLists.txt @@ -13,6 +13,7 @@ vespa_add_library(eval_eval OBJECT key_gen.cpp lazy_params.cpp make_tensor_function.cpp + node_tools.cpp node_types.cpp operation.cpp operator_nodes.cpp diff --git a/eval/src/vespa/eval/eval/llvm/compiled_function.cpp b/eval/src/vespa/eval/eval/llvm/compiled_function.cpp index 4101cf10e1f..043ad248251 100644 --- a/eval/src/vespa/eval/eval/llvm/compiled_function.cpp +++ b/eval/src/vespa/eval/eval/llvm/compiled_function.cpp @@ -31,16 +31,16 @@ double my_resolve(void *ctx, size_t idx) { return ((double *)ctx)[idx]; } } // namespace vespalib::eval::<unnamed> -CompiledFunction::CompiledFunction(const Function &function_in, PassParams pass_params_in, +CompiledFunction::CompiledFunction(const nodes::Node &root_in, size_t num_params_in, PassParams pass_params_in, const gbdt::Optimize::Chain &forest_optimizers) : _llvm_wrapper(), _address(nullptr), - _num_params(function_in.num_params()), + _num_params(num_params_in), _pass_params(pass_params_in) { - size_t id = _llvm_wrapper.make_function(function_in.num_params(), + size_t id = _llvm_wrapper.make_function(num_params_in, _pass_params, - function_in.root(), + root_in, forest_optimizers); _llvm_wrapper.compile(); _address = _llvm_wrapper.get_function_address(id); @@ -120,7 +120,7 @@ CompiledFunction::estimate_cost_us(const std::vector<double> ¶ms, double bud } Function::Issues -CompiledFunction::detect_issues(const Function &function) +CompiledFunction::detect_issues(const nodes::Node &node) { struct NotSupported : NodeTraverser { std::vector<vespalib::string> issues; @@ -141,7 +141,7 @@ CompiledFunction::detect_issues(const Function &function) } } } checker; - function.root().traverse(checker); + node.traverse(checker); return Function::Issues(std::move(checker.issues)); } diff --git a/eval/src/vespa/eval/eval/llvm/compiled_function.h b/eval/src/vespa/eval/eval/llvm/compiled_function.h index 40737e3ff92..6fb68e2df72 100644 --- a/eval/src/vespa/eval/eval/llvm/compiled_function.h +++ b/eval/src/vespa/eval/eval/llvm/compiled_function.h @@ -38,10 +38,14 @@ private: public: typedef std::unique_ptr<CompiledFunction> UP; - CompiledFunction(const Function &function_in, PassParams pass_params_in, + CompiledFunction(const nodes::Node &root_in, size_t num_params_in, PassParams pass_params_in, const gbdt::Optimize::Chain &forest_optimizers); + CompiledFunction(const Function &function_in, PassParams pass_params_in, const gbdt::Optimize::Chain &forest_optimizers) + : CompiledFunction(function_in.root(), function_in.num_params(), pass_params_in, forest_optimizers) {} + CompiledFunction(const nodes::Node &root_in, size_t num_params_in, PassParams pass_params_in) + : CompiledFunction(root_in, num_params_in, pass_params_in, gbdt::Optimize::best) {} CompiledFunction(const Function &function_in, PassParams pass_params_in) - : CompiledFunction(function_in, pass_params_in, gbdt::Optimize::best) {} + : CompiledFunction(function_in.root(), function_in.num_params(), pass_params_in, gbdt::Optimize::best) {} CompiledFunction(CompiledFunction &&rhs); size_t num_params() const { return _num_params; } PassParams pass_params() const { return _pass_params; } @@ -63,7 +67,10 @@ public: return _llvm_wrapper.get_forests(); } double estimate_cost_us(const std::vector<double> ¶ms, double budget = 5.0) const; - static Function::Issues detect_issues(const Function &function); + static Function::Issues detect_issues(const nodes::Node &node); + static Function::Issues detect_issues(const Function &function) { + return detect_issues(function.root()); + } static bool should_use_lazy_params(const Function &function); }; diff --git a/eval/src/vespa/eval/eval/node_tools.cpp b/eval/src/vespa/eval/eval/node_tools.cpp new file mode 100644 index 00000000000..179594963e9 --- /dev/null +++ b/eval/src/vespa/eval/eval/node_tools.cpp @@ -0,0 +1,209 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "node_tools.h" +#include <vespa/eval/eval/node_traverser.h> +#include <vespa/eval/eval/node_visitor.h> + +using namespace vespalib::eval; +using namespace vespalib::eval::nodes; + +namespace vespalib::eval { + +namespace { + +struct CountParams : NodeTraverser, EmptyNodeVisitor { + size_t result = 0; + void visit(const Symbol &symbol) override { + result = std::max(result, symbol.id() + 1); + } + bool open(const Node &) override { return true; } + void close(const Node &node) override { node.accept(*this); } +}; + +struct CopyNode : NodeTraverser, NodeVisitor { + + std::unique_ptr<Error> error; + std::vector<Node_UP> stack; + + CopyNode() : error(), stack() {} + ~CopyNode(); + + Node_UP get_result() { + if (error) { + return std::move(error); + } + if (stack.size() != 1) { + return std::make_unique<Error>("invalid result stack"); + } + return std::move(stack.back()); + } + + //------------------------------------------------------------------------- + + void fail(const vespalib::string &msg) { + if (!error) { + error = std::make_unique<Error>(msg); + } + } + + void not_implemented(const Node &) { + fail("not implemented"); + } + + std::vector<Node_UP> get_children(size_t n) { + std::vector<Node_UP> result; + if (stack.size() >= n) { + for (size_t i = 0; i < n; ++i) { + result.push_back(std::move(stack[stack.size() - (n - i)])); + } + stack.resize(stack.size() - n); + } else { + fail("stack underflow"); + for (size_t i = 0; i < n; ++i) { + result.push_back(std::make_unique<Error>("placeholder")); + } + } + return result; + } + + //------------------------------------------------------------------------- + + void wire_operator(Operator_UP op) { + auto list = get_children(2); + op->bind(std::move(list[0]), std::move(list[1])); + stack.push_back(std::move(op)); + } + + void wire_call(Call_UP call) { + auto list = get_children(call->num_params()); + for (size_t i = 0; i < list.size(); ++i) { + call->bind_next(std::move(list[i])); + } + stack.push_back(std::move(call)); + } + + template <typename T> void copy_operator(const T &) { wire_operator(T::create()); } + template <typename T> void copy_call(const T &) { wire_call(T::create()); } + + //------------------------------------------------------------------------- + + // basic nodes + void visit(const Number &node) override { + stack.push_back(std::make_unique<Number>(node.value())); + } + void visit(const Symbol &node) override { + stack.push_back(std::make_unique<Symbol>(node.id())); + } + void visit(const String &node) override { + stack.push_back(std::make_unique<String>(node.value())); + } + void visit(const In &node) override { + for (size_t i = 0; i < node.num_entries(); ++i) { + // only String/Number allowed here; copy to stack + node.get_entry(i).accept(*this); + } + auto list = get_children(node.num_entries() + 1); + auto my_node = std::make_unique<In>(std::move(list[0])); + for (size_t i = 1; i < list.size(); ++i) { + my_node->add_entry(std::move(list[i])); + } + stack.push_back(std::move(my_node)); + } + void visit(const Neg &) override { + auto list = get_children(1); + stack.push_back(std::make_unique<Neg>(std::move(list[0]))); + } + void visit(const Not &) override { + auto list = get_children(1); + stack.push_back(std::make_unique<Not>(std::move(list[0]))); + } + void visit(const If &node) override { + auto list = get_children(3); + stack.push_back(std::make_unique<If>(std::move(list[0]), std::move(list[1]), std::move(list[2]), node.p_true())); + } + void visit(const Error &node) override { + stack.push_back(std::make_unique<Error>(node.message())); + } + + // tensor nodes + void visit(const TensorMap &node) override { not_implemented(node); } + void visit(const TensorJoin &node) override { not_implemented(node); } + void visit(const TensorMerge &node) override { not_implemented(node); } + void visit(const TensorReduce &node) override { not_implemented(node); } + void visit(const TensorRename &node) override { not_implemented(node); } + void visit(const TensorConcat &node) override { not_implemented(node); } + void visit(const TensorCreate &node) override { not_implemented(node); } + void visit(const TensorLambda &node) override { not_implemented(node); } + void visit(const TensorPeek &node) override { not_implemented(node); } + + // operator nodes + void visit(const Add &node) override { copy_operator(node); } + void visit(const Sub &node) override { copy_operator(node); } + void visit(const Mul &node) override { copy_operator(node); } + void visit(const Div &node) override { copy_operator(node); } + void visit(const Mod &node) override { copy_operator(node); } + void visit(const Pow &node) override { copy_operator(node); } + void visit(const Equal &node) override { copy_operator(node); } + void visit(const NotEqual &node) override { copy_operator(node); } + void visit(const Approx &node) override { copy_operator(node); } + void visit(const Less &node) override { copy_operator(node); } + void visit(const LessEqual &node) override { copy_operator(node); } + void visit(const Greater &node) override { copy_operator(node); } + void visit(const GreaterEqual &node) override { copy_operator(node); } + void visit(const And &node) override { copy_operator(node); } + void visit(const Or &node) override { copy_operator(node); } + + // call nodes + void visit(const Cos &node) override { copy_call(node); } + void visit(const Sin &node) override { copy_call(node); } + void visit(const Tan &node) override { copy_call(node); } + void visit(const Cosh &node) override { copy_call(node); } + void visit(const Sinh &node) override { copy_call(node); } + void visit(const Tanh &node) override { copy_call(node); } + void visit(const Acos &node) override { copy_call(node); } + void visit(const Asin &node) override { copy_call(node); } + void visit(const Atan &node) override { copy_call(node); } + void visit(const Exp &node) override { copy_call(node); } + void visit(const Log10 &node) override { copy_call(node); } + void visit(const Log &node) override { copy_call(node); } + void visit(const Sqrt &node) override { copy_call(node); } + void visit(const Ceil &node) override { copy_call(node); } + void visit(const Fabs &node) override { copy_call(node); } + void visit(const Floor &node) override { copy_call(node); } + void visit(const Atan2 &node) override { copy_call(node); } + void visit(const Ldexp &node) override { copy_call(node); } + void visit(const Pow2 &node) override { copy_call(node); } + void visit(const Fmod &node) override { copy_call(node); } + void visit(const Min &node) override { copy_call(node); } + void visit(const Max &node) override { copy_call(node); } + void visit(const IsNan &node) override { copy_call(node); } + void visit(const Relu &node) override { copy_call(node); } + void visit(const Sigmoid &node) override { copy_call(node); } + void visit(const Elu &node) override { copy_call(node); } + + // traverse nodes + bool open(const Node &) override { return !error; } + void close(const Node &node) override { node.accept(*this); } +}; + +CopyNode::~CopyNode() = default; + +} // namespace vespalib::eval::<unnamed> + +size_t +NodeTools::min_num_params(const Node &node) +{ + CountParams count_params; + node.traverse(count_params); + return count_params.result; +} + +Node_UP +NodeTools::copy(const Node &node) +{ + CopyNode copy_node; + node.traverse(copy_node); + return copy_node.get_result(); +} + +} // namespace vespalib::eval diff --git a/eval/src/vespa/eval/eval/node_tools.h b/eval/src/vespa/eval/eval/node_tools.h new file mode 100644 index 00000000000..a358056c90f --- /dev/null +++ b/eval/src/vespa/eval/eval/node_tools.h @@ -0,0 +1,16 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <memory> + +namespace vespalib::eval { + +namespace nodes { struct Node; } + +struct NodeTools { + static size_t min_num_params(const nodes::Node &node); + static std::unique_ptr<nodes::Node> copy(const nodes::Node &node); +}; + +} // namespace vespalib::eval diff --git a/eval/src/vespa/eval/eval/tensor_function.h b/eval/src/vespa/eval/eval/tensor_function.h index 2cc70f50b15..584cfea2581 100644 --- a/eval/src/vespa/eval/eval/tensor_function.h +++ b/eval/src/vespa/eval/eval/tensor_function.h @@ -343,6 +343,9 @@ private: public: Lambda(const ValueType &result_type_in, const std::vector<size_t> &bindings_in, const Function &lambda_in, NodeTypes lambda_types_in) : Node(result_type_in), _bindings(bindings_in), _lambda(lambda_in.shared_from_this()), _lambda_types(std::move(lambda_types_in)) {} + const std::vector<size_t> bindings() const { return _bindings; } + const Function &lambda() const { return *_lambda; } + const NodeTypes &types() const { return _lambda_types; } static TensorSpec create_spec_impl(const ValueType &type, const LazyParams ¶ms, const std::vector<size_t> &bind, const InterpretedFunction &fun); TensorSpec create_spec(const LazyParams ¶ms, const InterpretedFunction &fun) const { return create_spec_impl(result_type(), params, _bindings, fun); diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.cpp b/eval/src/vespa/eval/eval/test/eval_fixture.cpp index 7c4d4caf854..325fb208319 100644 --- a/eval/src/vespa/eval/eval/test/eval_fixture.cpp +++ b/eval/src/vespa/eval/eval/test/eval_fixture.cpp @@ -26,7 +26,14 @@ NodeTypes get_types(const Function &function, const ParamRepo ¶m_repo) { param_types.push_back(ValueType::from_spec(pos->second.value.type())); ASSERT_TRUE(!param_types.back().is_error()); } - return NodeTypes(function, param_types); + NodeTypes node_types(function, param_types); + if (!node_types.errors().empty()) { + for (const auto &msg: node_types.errors()) { + fprintf(stderr, "eval_fixture: type error: %s\n", msg.c_str()); + } + } + ASSERT_TRUE(node_types.errors().empty()); + return node_types; } std::set<size_t> get_mutable(const Function &function, const ParamRepo ¶m_repo) { diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt index 9e4c9857bd1..1b9b51d6ad2 100644 --- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt +++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt @@ -2,11 +2,14 @@ vespa_add_library(eval_tensor_dense OBJECT SOURCES dense_add_dimension_optimizer.cpp + dense_cell_range_function.cpp dense_dimension_combiner.cpp dense_dot_product_function.cpp dense_fast_rename_optimizer.cpp dense_inplace_join_function.cpp dense_inplace_map_function.cpp + dense_lambda_peek_function.cpp + dense_lambda_peek_optimizer.cpp dense_matmul_function.cpp dense_remove_dimension_optimizer.cpp dense_replace_type_function.cpp @@ -14,7 +17,6 @@ vespa_add_library(eval_tensor_dense OBJECT dense_tensor_address_mapper.cpp dense_tensor_cells_iterator.cpp dense_tensor_create_function.cpp - dense_lambda_peek_optimizer.cpp dense_tensor_modify.cpp dense_tensor_peek_function.cpp dense_tensor_reduce.cpp diff --git a/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp new file mode 100644 index 00000000000..7a616d91064 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.cpp @@ -0,0 +1,55 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "dense_cell_range_function.h" +#include "dense_tensor_view.h" +#include <vespa/eval/eval/value.h> + +namespace vespalib::tensor { + +using eval::Value; +using eval::ValueType; +using eval::TensorFunction; +using eval::TensorEngine; +using eval::as; +using namespace eval::tensor_function; + +namespace { + +template <typename CT> +void my_cell_range_op(eval::InterpretedFunction::State &state, uint64_t param) { + const auto *self = (const DenseCellRangeFunction *)(param); + auto old_cells = DenseTensorView::typify_cells<CT>(state.peek(0)); + ConstArrayRef<CT> new_cells(&old_cells[self->offset()], self->length()); + state.pop_push(state.stash.create<DenseTensorView>(self->result_type(), TypedCells(new_cells))); +} + +struct MyCellRangeOp { + template <typename CT> + static auto get_fun() { return my_cell_range_op<CT>; } +}; + +} // namespace vespalib::tensor::<unnamed> + +DenseCellRangeFunction::DenseCellRangeFunction(const eval::ValueType &result_type, + const eval::TensorFunction &child, + size_t offset, size_t length) + : eval::tensor_function::Op1(result_type, child), + _offset(offset), + _length(length) +{ +} + +DenseCellRangeFunction::~DenseCellRangeFunction() +{ +} + +eval::InterpretedFunction::Instruction +DenseCellRangeFunction::compile_self(const TensorEngine &, Stash &) const +{ + static_assert(sizeof(uint64_t) == sizeof(this)); + assert(result_type().cell_type() == child().result_type().cell_type()); + auto op = select_1<MyCellRangeOp>(result_type().cell_type()); + return eval::InterpretedFunction::Instruction(op, (uint64_t)this); +} + +} // namespace vespalib::tensor diff --git a/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.h b/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.h new file mode 100644 index 00000000000..148f28495de --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_cell_range_function.h @@ -0,0 +1,31 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::tensor { + +/** + * Tensor function creating a view to a continuous range of cells in + * another tensor. The value type will (typically) change, but the + * cell type must remain the same. + **/ +class DenseCellRangeFunction : public eval::tensor_function::Op1 +{ +private: + size_t _offset; + size_t _length; + +public: + DenseCellRangeFunction(const eval::ValueType &result_type, + const eval::TensorFunction &child, + size_t offset, size_t length); + ~DenseCellRangeFunction(); + size_t offset() const { return _offset; } + size_t length() const { return _length; } + eval::InterpretedFunction::Instruction compile_self(const eval::TensorEngine &engine, Stash &stash) const override; + bool result_is_mutable() const override { return child().result_is_mutable(); } +}; + +} // namespace vespalib::tensor diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp new file mode 100644 index 00000000000..1cc928a25ca --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp @@ -0,0 +1,91 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "dense_lambda_peek_function.h" +#include "dense_tensor_view.h" +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/llvm/compile_cache.h> + +namespace vespalib::tensor { + +using eval::CompileCache; +using eval::Function; +using eval::InterpretedFunction; +using eval::PassParams; +using eval::TensorEngine; +using eval::TensorFunction; +using eval::Value; +using eval::ValueType; +using eval::as; +using namespace eval::tensor_function; + +namespace { + +struct Self { + const ValueType &result_type; + CompileCache::Token::UP compile_token; + Self(const ValueType &result_type_in, const Function &function) + : result_type(result_type_in), + compile_token(CompileCache::compile(function, PassParams::ARRAY)) {} +}; + +bool step_params(std::vector<double> ¶ms, const ValueType &type) { + const auto &dims = type.dimensions(); + for (size_t idx = params.size(); idx-- > 0; ) { + if (size_t(params[idx] += 1.0) < dims[idx].size) { + return true; + } else { + params[idx] = 0.0; + } + } + return false; +} + +template <typename DST_CT, typename SRC_CT> +void my_lambda_peek_op(InterpretedFunction::State &state, uint64_t param) { + const auto *self = (const Self *)(param); + auto src_cells = DenseTensorView::typify_cells<SRC_CT>(state.peek(0)); + ArrayRef<DST_CT> dst_cells = state.stash.create_array<DST_CT>(self->result_type.dense_subspace_size()); + DST_CT *dst = &dst_cells[0]; + std::vector<double> params(self->result_type.dimensions().size(), 0.0); + auto idx_fun = self->compile_token->get().get_function(); + do { + *dst++ = src_cells[size_t(idx_fun(¶ms[0]))]; + } while(step_params(params, self->result_type)); + state.pop_push(state.stash.create<DenseTensorView>(self->result_type, TypedCells(dst_cells))); +} + +struct MyLambdaPeekOp { + template <typename DST_CT, typename SRC_CT> + static auto get_fun() { return my_lambda_peek_op<DST_CT, SRC_CT>; } +}; + +} // namespace vespalib::tensor::<unnamed> + +DenseLambdaPeekFunction::DenseLambdaPeekFunction(const ValueType &result_type, + const TensorFunction &child, + std::shared_ptr<Function const> idx_fun) + : Op1(result_type, child), + _idx_fun(std::move(idx_fun)) +{ +} + +DenseLambdaPeekFunction::~DenseLambdaPeekFunction() +{ +} + +InterpretedFunction::Instruction +DenseLambdaPeekFunction::compile_self(const TensorEngine &, Stash &stash) const +{ + const Self &self = stash.create<Self>(result_type(), *_idx_fun); + auto op = select_2<MyLambdaPeekOp>(result_type().cell_type(), child().result_type().cell_type()); + static_assert(sizeof(uint64_t) == sizeof(&self)); + assert(child().result_type().is_dense()); + return InterpretedFunction::Instruction(op, (uint64_t)&self); +} + +vespalib::string +DenseLambdaPeekFunction::idx_fun_dump() const { + return _idx_fun->dump_as_lambda(); +} + +} // namespace vespalib::tensor diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.h b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.h new file mode 100644 index 00000000000..6d664aa76d2 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.h @@ -0,0 +1,31 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::tensor { + +/** + * Tensor function creating a new dense tensor based on peeking cells + * of a single existing dense tensor. Which cells to peek is described + * by a single (compilable) function mapping the individual dimension + * indexes of the tensor to be created into global cell indexes of the + * tensor to be peeked. + **/ +class DenseLambdaPeekFunction : public eval::tensor_function::Op1 +{ +private: + std::shared_ptr<eval::Function const> _idx_fun; + +public: + DenseLambdaPeekFunction(const eval::ValueType &result_type, + const eval::TensorFunction &child, + std::shared_ptr<eval::Function const> idx_fun); + ~DenseLambdaPeekFunction(); + eval::InterpretedFunction::Instruction compile_self(const eval::TensorEngine &engine, Stash &stash) const override; + vespalib::string idx_fun_dump() const; + bool result_is_mutable() const override { return true; } +}; + +} // namespace vespalib::tensor diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_optimizer.cpp b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_optimizer.cpp index 14954a77834..13544bf6c84 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_optimizer.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_optimizer.cpp @@ -1,13 +1,204 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "dense_lambda_peek_optimizer.h" +#include "dense_tensor_view.h" +#include "dense_replace_type_function.h" +#include "dense_cell_range_function.h" +#include "dense_lambda_peek_function.h" +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/node_tools.h> +#include <vespa/eval/eval/basic_nodes.h> +#include <vespa/eval/eval/operator_nodes.h> +#include <vespa/eval/eval/call_nodes.h> +#include <vespa/eval/eval/tensor_nodes.h> +#include <vespa/eval/eval/llvm/compile_cache.h> +#include <optional> + +using namespace vespalib::eval; +using namespace vespalib::eval::nodes; namespace vespalib::tensor { -const eval::TensorFunction & -DenseLambdaPeekOptimizer::optimize(const eval::TensorFunction &expr, Stash &) +namespace { + +// 'simple peek': deterministic peek into a single parameter with +// compilable dimension index expressions. +const TensorPeek *find_simple_peek(const tensor_function::Lambda &lambda) { + const Function &function = lambda.lambda(); + const size_t num_dims = lambda.result_type().dimensions().size(); + auto peek = as<TensorPeek>(function.root()); + if (peek && (function.num_params() == (num_dims + 1))) { + auto param = as<Symbol>(peek->get_child(0)); + if (param && (param->id() == num_dims)) { + for (size_t i = 1; i < peek->num_children(); ++i) { + const Node &dim_expr = peek->get_child(i); + if (NodeTools::min_num_params(dim_expr) > num_dims) { + return nullptr; + } + if (CompiledFunction::detect_issues(dim_expr)) { + return nullptr; + } + } + return peek; + } + } + return nullptr; +} + +Node_UP make_dim_expr(const TensorPeek::Dim &src_dim) { + if (src_dim.second.is_expr()) { + return NodeTools::copy(*src_dim.second.expr); + } else { + return std::make_unique<Number>(as_number(src_dim.second.label)); + } +} + +template <typename OP> +Node_UP make_op(Node_UP a, Node_UP b) { + auto res = std::make_unique<OP>(); + res->bind(std::move(a), std::move(b)); + return res; +} + +Node_UP make_floor(Node_UP a) { + auto res = std::make_unique<Floor>(); + res->bind_next(std::move(a)); + return res; +} + +struct PeekAnalyzer { + std::vector<size_t> dst_dim_sizes; + std::vector<size_t> src_dim_sizes; + std::vector<CompiledFunction::UP> src_dim_funs; + std::shared_ptr<Function const> src_idx_fun; + + struct CellRange { + size_t offset; + size_t length; + bool is_full(size_t num_cells) const { + return ((offset == 0) && (length == num_cells)); + } + }; + + struct Result { + bool valid; + std::optional<CellRange> cell_range; + static Result simple(CellRange range) { return Result{true, range}; } + static Result complex() { return Result{true, std::nullopt}; } + static Result invalid() { return Result{false, std::nullopt}; } + }; + + PeekAnalyzer(const ValueType &dst_type, const ValueType &src_type, + const TensorPeek::DimList &dim_list) + { + for (const auto dim: dst_type.dimensions()) { + dst_dim_sizes.push_back(dim.size); + } + for (const auto dim: src_type.dimensions()) { + src_dim_sizes.push_back(dim.size); + } + Node_UP idx_expr; + size_t num_params = dst_dim_sizes.size(); + for (size_t i = 0; i < dim_list.size(); ++i) { + auto dim_expr = make_dim_expr(dim_list[i]); + src_dim_funs.push_back(std::make_unique<CompiledFunction>(*dim_expr, num_params, PassParams::ARRAY)); + if (i == 0) { + idx_expr = std::move(dim_expr); + } else { + idx_expr = make_floor(std::move(idx_expr)); + idx_expr = make_op<Mul>(std::move(idx_expr), std::make_unique<Number>(src_dim_sizes[i])); + idx_expr = make_op<Add>(std::move(idx_expr), std::move(dim_expr)); + } + } + src_idx_fun = Function::create(std::move(idx_expr), dst_type.dimension_names()); + auto str = src_idx_fun->dump_as_lambda(); + } + + bool step_params(std::vector<double> ¶ms) { + for (size_t idx = params.size(); idx-- > 0; ) { + if (size_t(params[idx] += 1.0) < dst_dim_sizes[idx]) { + return true; + } else { + params[idx] = 0.0; + } + } + return false; + } + + size_t calculate_index(const std::vector<size_t> &src_address) { + size_t result = 0; + for (size_t i = 0; i < src_address.size(); ++i) { + result *= src_dim_sizes[i]; + result += src_address[i]; + } + return result; + } + + Result analyze_indexes() { + CellRange range{0,0}; + bool is_complex = false; + std::vector<double> params(dst_dim_sizes.size(), 0.0); + std::vector<size_t> src_address(src_dim_sizes.size(), 0); + do { + for (size_t i = 0; i < src_dim_funs.size(); ++i) { + auto dim_fun = src_dim_funs[i]->get_function(); + size_t dim_idx = dim_fun(¶ms[0]); + if (dim_idx >= src_dim_sizes[i]) { + return Result::invalid(); + } + src_address[i] = dim_idx; + } + size_t idx = calculate_index(src_address); + if (range.length == 0) { + range.offset = idx; + } + if (idx == (range.offset + range.length)) { + ++range.length; + } else { + is_complex = true; + } + } while(step_params(params)); + if (is_complex) { + return Result::complex(); + } + return Result::simple(range); + } +}; + +} // namespace vespalib::tensor::<unnamed> + +const TensorFunction & +DenseLambdaPeekOptimizer::optimize(const TensorFunction &expr, Stash &stash) { + if (auto lambda = as<tensor_function::Lambda>(expr)) { + if (auto peek = find_simple_peek(*lambda)) { + const ValueType &dst_type = lambda->result_type(); + const ValueType &src_type = lambda->types().get_type(peek->param()); + if (src_type.is_dense()) { + assert(lambda->bindings().size() == 1); + assert(src_type.dimensions().size() == peek->dim_list().size()); + size_t param_idx = lambda->bindings()[0]; + PeekAnalyzer analyzer(dst_type, src_type, peek->dim_list()); + auto result = analyzer.analyze_indexes(); + if (result.valid) { + const auto &get_param = tensor_function::inject(src_type, param_idx, stash); + if (result.cell_range && (dst_type.cell_type() == src_type.cell_type())) { + auto cell_range = result.cell_range.value(); + if (cell_range.is_full(src_type.dense_subspace_size())) { + return DenseReplaceTypeFunction::create_compact(dst_type, get_param, stash); + } else { + return stash.create<DenseCellRangeFunction>(dst_type, get_param, + cell_range.offset, cell_range.length); + } + } else { + return stash.create<DenseLambdaPeekFunction>(dst_type, get_param, + std::move(analyzer.src_idx_fun)); + } + } + } + } + } return expr; } -} +} // namespace vespalib::tensor |