summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2020-11-03 18:23:35 +0100
committerGitHub <noreply@github.com>2020-11-03 18:23:35 +0100
commit0997ddfd9b032f44fa9767b1602f5f9f2bd744b5 (patch)
treee8dcfdc0c790f72d5c85685f97b471afcbf4b0f4 /eval
parent0257c2fe0b871d73197be67cc845de4986ed00bd (diff)
parentd6718f2dc8c08b191e2af3003da59bff8a558401 (diff)
Merge pull request #15162 from vespa-engine/havardpe/optimize-tensor-function
untangle factory-based optimization pipeline from DefaultTensorEngine
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/eval/tensor_function/tensor_function_test.cpp47
-rw-r--r--eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp45
-rw-r--r--eval/src/vespa/eval/eval/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/eval/engine_or_factory.cpp11
-rw-r--r--eval/src/vespa/eval/eval/engine_or_factory.h1
-rw-r--r--eval/src/vespa/eval/eval/interpreted_function.cpp3
-rw-r--r--eval/src/vespa/eval/eval/optimize_tensor_function.cpp95
-rw-r--r--eval/src/vespa/eval/eval/optimize_tensor_function.h15
-rw-r--r--eval/src/vespa/eval/eval/test/eval_fixture.cpp3
-rw-r--r--eval/src/vespa/eval/tensor/default_tensor_engine.cpp2
10 files changed, 153 insertions, 70 deletions
diff --git a/eval/src/tests/eval/tensor_function/tensor_function_test.cpp b/eval/src/tests/eval/tensor_function/tensor_function_test.cpp
index 5c33cdacc44..9441061d6e1 100644
--- a/eval/src/tests/eval/tensor_function/tensor_function_test.cpp
+++ b/eval/src/tests/eval/tensor_function/tensor_function_test.cpp
@@ -38,9 +38,6 @@ struct EvalCtx {
ictx = std::make_unique<InterpretedFunction::Context>(*ifun);
return ifun->eval(*ictx, SimpleObjectParams(params));
}
- const TensorFunction &compile(const TensorFunction &expr) {
- return engine.optimize(expr, stash);
- }
Value::UP make_double(double value) {
return engine.from_spec(TensorSpec("double").add({}, value));
}
@@ -196,8 +193,7 @@ TEST("require that const_value works") {
const auto &fun = const_value(*my_const, ctx.stash);
EXPECT_TRUE(!fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that tensor injection works") {
@@ -207,8 +203,7 @@ TEST("require that tensor injection works") {
const auto &fun = inject(ValueType::from_spec("tensor(x[2],y[2])"), a_id, ctx.stash);
EXPECT_TRUE(!fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that partial tensor reduction works") {
@@ -218,8 +213,7 @@ TEST("require that partial tensor reduction works") {
const auto &fun = reduce(inject(ValueType::from_spec("tensor(x[3],y[2])"), a_id, ctx.stash), Aggr::SUM, {"y"}, ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that full tensor reduction works") {
@@ -228,8 +222,7 @@ TEST("require that full tensor reduction works") {
const auto &fun = reduce(inject(ValueType::from_spec("tensor(x[3],y[2])"), a_id, ctx.stash), Aggr::SUM, {}, ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_EQUAL(ValueType::from_spec("double"), fun.result_type());
- const auto &prog = ctx.compile(fun);
- const Value &result = ctx.eval(prog);
+ const Value &result = ctx.eval(fun);
EXPECT_TRUE(result.is_double());
EXPECT_EQUAL(21.0, result.as_double());
}
@@ -241,8 +234,7 @@ TEST("require that tensor map works") {
const auto &fun = map(inject(ValueType::from_spec("tensor(x{},y{})"), a_id, ctx.stash), operation::Neg::f, ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that tensor join works") {
@@ -255,8 +247,7 @@ TEST("require that tensor join works") {
operation::Mul::f, ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that tensor merge works") {
@@ -269,8 +260,7 @@ TEST("require that tensor merge works") {
operation::Add::f, ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that tensor concat works") {
@@ -283,8 +273,7 @@ TEST("require that tensor concat works") {
"y", ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that tensor create works") {
@@ -305,8 +294,7 @@ TEST("require that tensor create works") {
ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that single value tensor peek works") {
@@ -328,8 +316,7 @@ TEST("require that single value tensor peek works") {
ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that tensor subspace tensor peek works") {
@@ -340,8 +327,7 @@ TEST("require that tensor subspace tensor peek works") {
const auto &fun = peek(t, {{"x", "bar"}}, ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that automatic string conversion tensor peek works") {
@@ -353,8 +339,7 @@ TEST("require that automatic string conversion tensor peek works") {
const auto &fun = peek(t, {{"x", a}}, ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_TRUE(fun.result_type().is_double());
- const auto &prog = ctx.compile(fun);
- const Value &result = ctx.eval(prog);
+ const Value &result = ctx.eval(fun);
EXPECT_TRUE(result.is_double());
EXPECT_EQUAL(2.0, result.as_double());
}
@@ -367,8 +352,7 @@ TEST("require that tensor rename works") {
{"x"}, {"z"}, ctx.stash);
EXPECT_TRUE(fun.result_is_mutable());
EXPECT_EQUAL(expect->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect, ctx.eval(fun)));
}
TEST("require that if_node works") {
@@ -383,10 +367,9 @@ TEST("require that if_node works") {
inject(ValueType::from_spec("tensor(x[2])"), c_id, ctx.stash), ctx.stash);
EXPECT_TRUE(!fun.result_is_mutable());
EXPECT_EQUAL(expect_true->type(), fun.result_type());
- const auto &prog = ctx.compile(fun);
- TEST_DO(verify_equal(*expect_true, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect_true, ctx.eval(fun)));
ctx.replace_tensor(a_id, ctx.make_false());
- TEST_DO(verify_equal(*expect_false, ctx.eval(prog)));
+ TEST_DO(verify_equal(*expect_false, ctx.eval(fun)));
}
TEST("require that if_node result is mutable only when both children produce mutable results") {
diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
index 7182d66f8aa..bcd021b05fb 100644
--- a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
+++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
@@ -34,6 +34,7 @@
#include <vespa/eval/eval/value_codec.h>
#include <vespa/eval/eval/operation.h>
#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/optimize_tensor_function.h>
#include <vespa/eval/tensor/default_tensor_engine.h>
#include <vespa/eval/tensor/default_value_builder_factory.h>
#include <vespa/vespalib/util/benchmark_timer.h>
@@ -136,21 +137,21 @@ struct Impl {
const auto &lhs_node = tensor_function::inject(lhs, 0, stash);
const auto &rhs_node = tensor_function::inject(rhs, 1, stash);
const auto &join_node = tensor_function::join(lhs_node, rhs_node, function, stash);
- const auto &node = optimize ? engine.optimize(join_node, stash) : join_node;
+ const auto &node = optimize ? optimize_tensor_function(engine, join_node, stash) : join_node;
return node.compile_self(engine, stash);
}
Instruction create_reduce(const ValueType &lhs, Aggr aggr, const std::vector<vespalib::string> &dims, Stash &stash) const {
// create a complete tensor function, but only compile the relevant instruction
const auto &lhs_node = tensor_function::inject(lhs, 0, stash);
const auto &reduce_node = tensor_function::reduce(lhs_node, aggr, dims, stash);
- const auto &node = optimize ? engine.optimize(reduce_node, stash) : reduce_node;
+ const auto &node = optimize ? optimize_tensor_function(engine, reduce_node, stash) : reduce_node;
return node.compile_self(engine, stash);
}
Instruction create_rename(const ValueType &lhs, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to, Stash &stash) const {
// create a complete tensor function, but only compile the relevant instruction
const auto &lhs_node = tensor_function::inject(lhs, 0, stash);
const auto &rename_node = tensor_function::rename(lhs_node, from, to, stash);
- const auto &node = optimize ? engine.optimize(rename_node, stash) : rename_node;
+ const auto &node = optimize ? optimize_tensor_function(engine, rename_node, stash) : rename_node;
return node.compile_self(engine, stash);
}
Instruction create_merge(const ValueType &lhs, const ValueType &rhs, operation::op2_t function, Stash &stash) const {
@@ -158,7 +159,7 @@ struct Impl {
const auto &lhs_node = tensor_function::inject(lhs, 0, stash);
const auto &rhs_node = tensor_function::inject(rhs, 1, stash);
const auto &merge_node = tensor_function::merge(lhs_node, rhs_node, function, stash);
- const auto &node = optimize ? engine.optimize(merge_node, stash) : merge_node;
+ const auto &node = optimize ? optimize_tensor_function(engine, merge_node, stash) : merge_node;
return node.compile_self(engine, stash);
}
Instruction create_concat(const ValueType &lhs, const ValueType &rhs, const std::string &dimension, Stash &stash) const {
@@ -167,14 +168,14 @@ struct Impl {
const auto &rhs_node = tensor_function::inject(rhs, 1, stash);
const auto &concat_node = tensor_function::concat(lhs_node, rhs_node, dimension, stash);
return concat_node.compile_self(engine, stash);
- const auto &node = optimize ? engine.optimize(concat_node, stash) : concat_node;
+ const auto &node = optimize ? optimize_tensor_function(engine, concat_node, stash) : concat_node;
return node.compile_self(engine, stash);
}
Instruction create_map(const ValueType &lhs, operation::op1_t function, Stash &stash) const {
// create a complete tensor function, but only compile the relevant instruction
const auto &lhs_node = tensor_function::inject(lhs, 0, stash);
const auto &map_node = tensor_function::map(lhs_node, function, stash);
- const auto &node = optimize ? engine.optimize(map_node, stash) : map_node;
+ const auto &node = optimize ? optimize_tensor_function(engine, map_node, stash) : map_node;
return node.compile_self(engine, stash);
}
Instruction create_tensor_create(const ValueType &proto_type, const TensorSpec &proto, Stash &stash) const {
@@ -185,7 +186,7 @@ struct Impl {
spec.emplace(cell.first, my_double);
}
const auto &create_tensor_node = tensor_function::create(proto_type, spec, stash);
- const auto &node = optimize ? engine.optimize(create_tensor_node, stash) : create_tensor_node;
+ const auto &node = optimize ? optimize_tensor_function(engine, create_tensor_node, stash) : create_tensor_node;
return node.compile_self(engine, stash);
}
Instruction create_tensor_lambda(const ValueType &type, const Function &function, const ValueType &p0_type, Stash &stash) const {
@@ -194,7 +195,7 @@ struct Impl {
NodeTypes types(function, arg_types);
EXPECT_EQ(types.errors(), std::vector<vespalib::string>());
const auto &tensor_lambda_node = tensor_function::lambda(type, {0}, function, std::move(types), stash);
- const auto &node = optimize ? engine.optimize(tensor_lambda_node, stash) : tensor_lambda_node;
+ const auto &node = optimize ? optimize_tensor_function(engine, tensor_lambda_node, stash) : tensor_lambda_node;
return node.compile_self(engine, stash);
}
Instruction create_tensor_peek(const ValueType &type, const MyPeekSpec &my_spec, Stash &stash) const {
@@ -218,30 +219,30 @@ struct Impl {
}
}
const auto &peek_node = tensor_function::peek(my_param, spec, stash);
- const auto &node = optimize ? engine.optimize(peek_node, stash) : peek_node;
+ const auto &node = optimize ? optimize_tensor_function(engine, peek_node, stash) : peek_node;
return node.compile_self(engine, stash);
}
};
//-----------------------------------------------------------------------------
-Impl default_tensor_engine_impl(1, "DefaultTensorEngine", "OLD PROD", DefaultTensorEngine::ref(), false);
-Impl simple_value_impl(3, " SimpleValue", " SimpleV", SimpleValueBuilderFactory::get(), false);
-Impl fast_value_impl(0, " FastValue", "NEW PROD", FastValueBuilderFactory::get(), false);
-Impl optimized_fast_value_impl(2, "Optimized FastValue", "Optimize", FastValueBuilderFactory::get(), true);
-Impl default_tensor_value_impl(4, " DefaultValue", "DefaultV", DefaultValueBuilderFactory::get(), false);
-vespalib::string short_header("--------");
+Impl optimized_fast_value_impl(0, " Optimized FastValue", "NEW PROD", FastValueBuilderFactory::get(), true);
+Impl optimized_default_tensor_engine_impl(1, "Optimized DefaultTensorEngine", "OLD PROD", DefaultTensorEngine::ref(), true);
+Impl fast_value_impl(2, " FastValue", " FastV", FastValueBuilderFactory::get(), false);
+Impl default_tensor_engine_impl(3, " DefaultTensorEngine", "DefaultT", DefaultTensorEngine::ref(), false);
+Impl simple_value_impl(4, " SimpleValue", " SimpleV", SimpleValueBuilderFactory::get(), false);
+vespalib::string short_header("--------");
constexpr double budget = 5.0;
constexpr double best_limit = 0.95; // everything within 95% of best performance gets a star
-constexpr double bad_limit = 0.90; // BAD: new prod has performance lower than 90% of old prod
+constexpr double bad_limit = 0.90; // BAD: new prod has performance lower than 90% of old prod
constexpr double good_limit = 1.10; // GOOD: new prod has performance higher than 110% of old prod
-std::vector<CREF<Impl>> impl_list = {default_tensor_engine_impl,
- simple_value_impl,
- fast_value_impl,
+std::vector<CREF<Impl>> impl_list = {simple_value_impl,
optimized_fast_value_impl,
- default_tensor_value_impl};
+ optimized_default_tensor_engine_impl,
+ fast_value_impl,
+ default_tensor_engine_impl};
//-----------------------------------------------------------------------------
@@ -982,8 +983,8 @@ int main(int argc, char **argv) {
const std::string run_only_prod_option = "--limit-implementations";
if ((argc > 1) && (argv[1] == run_only_prod_option )) {
impl_list.clear();
- impl_list.push_back(fast_value_impl);
- impl_list.push_back(default_tensor_engine_impl);
+ impl_list.push_back(optimized_fast_value_impl);
+ impl_list.push_back(optimized_default_tensor_engine_impl);
++argv;
--argc;
}
diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt
index 6c1f99265a7..d27de8e3d21 100644
--- a/eval/src/vespa/eval/eval/CMakeLists.txt
+++ b/eval/src/vespa/eval/eval/CMakeLists.txt
@@ -22,6 +22,7 @@ vespa_add_library(eval_eval OBJECT
node_types.cpp
operation.cpp
operator_nodes.cpp
+ optimize_tensor_function.cpp
param_usage.cpp
simple_tensor.cpp
simple_tensor_engine.cpp
diff --git a/eval/src/vespa/eval/eval/engine_or_factory.cpp b/eval/src/vespa/eval/eval/engine_or_factory.cpp
index e4f710be625..4a95a57e10e 100644
--- a/eval/src/vespa/eval/eval/engine_or_factory.cpp
+++ b/eval/src/vespa/eval/eval/engine_or_factory.cpp
@@ -36,17 +36,6 @@ EngineOrFactory::get_shared(EngineOrFactory hint)
return shared;
}
-const TensorFunction &
-EngineOrFactory::optimize(const TensorFunction &expr, Stash &stash) const {
- if (is_engine()) {
- return engine().optimize(expr, stash);
- } else if (&factory() == &FastValueBuilderFactory::get()) {
- return tensor::DefaultTensorEngine::ref().optimize(expr, stash);
- } else {
- return expr;
- }
-}
-
TensorSpec
EngineOrFactory::to_spec(const Value &value) const
{
diff --git a/eval/src/vespa/eval/eval/engine_or_factory.h b/eval/src/vespa/eval/eval/engine_or_factory.h
index 4784356ae8d..e1f7c503bcd 100644
--- a/eval/src/vespa/eval/eval/engine_or_factory.h
+++ b/eval/src/vespa/eval/eval/engine_or_factory.h
@@ -42,7 +42,6 @@ public:
const TensorEngine &engine() const { return *std::get<engine_t>(_value); }
const ValueBuilderFactory &factory() const { return *std::get<factory_t>(_value); }
// functions that can be called with either engine or factory
- const TensorFunction &optimize(const TensorFunction &expr, Stash &stash) const;
TensorSpec to_spec(const Value &value) const;
std::unique_ptr<Value> from_spec(const TensorSpec &spec) const;
void encode(const Value &value, nbostream &output) const;
diff --git a/eval/src/vespa/eval/eval/interpreted_function.cpp b/eval/src/vespa/eval/eval/interpreted_function.cpp
index 2b0e915d69a..1016b929574 100644
--- a/eval/src/vespa/eval/eval/interpreted_function.cpp
+++ b/eval/src/vespa/eval/eval/interpreted_function.cpp
@@ -6,6 +6,7 @@
#include "tensor_nodes.h"
#include "tensor_engine.h"
#include "make_tensor_function.h"
+#include "optimize_tensor_function.h"
#include "compile_tensor_function.h"
#include "simple_tensor_engine.h"
#include <vespa/vespalib/util/classname.h>
@@ -73,7 +74,7 @@ InterpretedFunction::InterpretedFunction(EngineOrFactory engine, const nodes::No
_tensor_engine(engine)
{
const TensorFunction &plain_fun = make_tensor_function(engine, root, types, _stash);
- const TensorFunction &optimized = engine.optimize(plain_fun, _stash);
+ const TensorFunction &optimized = optimize_tensor_function(engine, plain_fun, _stash);
_program = compile_tensor_function(engine, optimized, _stash);
}
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
new file mode 100644
index 00000000000..83f806178e8
--- /dev/null
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -0,0 +1,95 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "optimize_tensor_function.h"
+#include "tensor_function.h"
+#include "tensor_engine.h"
+#include "simple_value.h"
+
+#include <vespa/eval/tensor/dense/dense_dot_product_function.h>
+#include <vespa/eval/tensor/dense/dense_xw_product_function.h>
+#include <vespa/eval/tensor/dense/dense_matmul_function.h>
+#include <vespa/eval/tensor/dense/dense_multi_matmul_function.h>
+#include <vespa/eval/tensor/dense/dense_fast_rename_optimizer.h>
+#include <vespa/eval/tensor/dense/dense_add_dimension_optimizer.h>
+#include <vespa/eval/tensor/dense/dense_single_reduce_function.h>
+#include <vespa/eval/tensor/dense/dense_remove_dimension_optimizer.h>
+#include <vespa/eval/tensor/dense/dense_lambda_peek_optimizer.h>
+#include <vespa/eval/tensor/dense/dense_lambda_function.h>
+#include <vespa/eval/tensor/dense/dense_simple_expand_function.h>
+#include <vespa/eval/tensor/dense/dense_simple_join_function.h>
+#include <vespa/eval/tensor/dense/dense_number_join_function.h>
+#include <vespa/eval/tensor/dense/dense_pow_as_map_optimizer.h>
+#include <vespa/eval/tensor/dense/dense_simple_map_function.h>
+#include <vespa/eval/tensor/dense/vector_from_doubles_function.h>
+#include <vespa/eval/tensor/dense/dense_tensor_create_function.h>
+#include <vespa/eval/tensor/dense/dense_tensor_peek_function.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".eval.eval.optimize_tensor_function");
+
+namespace vespalib::eval {
+
+namespace {
+
+using namespace vespalib::tensor;
+
+const TensorFunction &optimize_for_factory(const ValueBuilderFactory &factory, const TensorFunction &expr, Stash &stash) {
+ if (&factory == &SimpleValueBuilderFactory::get()) {
+ // never optimize simple value evaluation
+ return expr;
+ }
+ using Child = TensorFunction::Child;
+ Child root(expr);
+ {
+ std::vector<Child::CREF> nodes({root});
+ for (size_t i = 0; i < nodes.size(); ++i) {
+ nodes[i].get().get().push_children(nodes);
+ }
+ while (!nodes.empty()) {
+ const Child &child = nodes.back().get();
+ child.set(DenseDotProductFunction::optimize(child.get(), stash));
+ child.set(DenseXWProductFunction::optimize(child.get(), stash));
+ child.set(DenseMatMulFunction::optimize(child.get(), stash));
+ child.set(DenseMultiMatMulFunction::optimize(child.get(), stash));
+ nodes.pop_back();
+ }
+ }
+ {
+ std::vector<Child::CREF> nodes({root});
+ for (size_t i = 0; i < nodes.size(); ++i) {
+ nodes[i].get().get().push_children(nodes);
+ }
+ while (!nodes.empty()) {
+ const Child &child = nodes.back().get();
+ child.set(DenseSimpleExpandFunction::optimize(child.get(), stash));
+ child.set(DenseAddDimensionOptimizer::optimize(child.get(), stash));
+ child.set(DenseRemoveDimensionOptimizer::optimize(child.get(), stash));
+ child.set(VectorFromDoublesFunction::optimize(child.get(), stash));
+ child.set(DenseTensorCreateFunction::optimize(child.get(), stash));
+ child.set(DenseTensorPeekFunction::optimize(child.get(), stash));
+ child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash));
+ child.set(DenseLambdaFunction::optimize(child.get(), stash));
+ child.set(DenseFastRenameOptimizer::optimize(child.get(), stash));
+ child.set(DensePowAsMapOptimizer::optimize(child.get(), stash));
+ child.set(DenseSimpleMapFunction::optimize(child.get(), stash));
+ child.set(DenseSimpleJoinFunction::optimize(child.get(), stash));
+ child.set(DenseNumberJoinFunction::optimize(child.get(), stash));
+ child.set(DenseSingleReduceFunction::optimize(child.get(), stash));
+ nodes.pop_back();
+ }
+ }
+ return root.get();
+}
+
+} // namespace vespalib::eval::<unnamed>
+
+const TensorFunction &optimize_tensor_function(EngineOrFactory engine, const TensorFunction &function, Stash &stash) {
+ LOG(debug, "tensor function before optimization:\n%s\n", function.as_string().c_str());
+ const TensorFunction &optimized = (engine.is_engine())
+ ? engine.engine().optimize(function, stash)
+ : optimize_for_factory(engine.factory(), function, stash);
+ LOG(debug, "tensor function after optimization:\n%s\n", optimized.as_string().c_str());
+ return optimized;
+}
+
+} // namespace vespalib::eval
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.h b/eval/src/vespa/eval/eval/optimize_tensor_function.h
new file mode 100644
index 00000000000..bc2bc10cca6
--- /dev/null
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.h
@@ -0,0 +1,15 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "engine_or_factory.h"
+
+namespace vespalib { class Stash; }
+
+namespace vespalib::eval {
+
+struct TensorFunction;
+
+const TensorFunction &optimize_tensor_function(EngineOrFactory engine, const TensorFunction &function, Stash &stash);
+
+} // namespace vespalib::eval
diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.cpp b/eval/src/vespa/eval/eval/test/eval_fixture.cpp
index a353f3a9ae2..b7655a6ee2f 100644
--- a/eval/src/vespa/eval/eval/test/eval_fixture.cpp
+++ b/eval/src/vespa/eval/eval/test/eval_fixture.cpp
@@ -3,6 +3,7 @@
#include <vespa/vespalib/testkit/test_kit.h>
#include "eval_fixture.h"
#include <vespa/eval/eval/make_tensor_function.h>
+#include <vespa/eval/eval/optimize_tensor_function.h>
#include <vespa/vespalib/util/stringfmt.h>
using vespalib::make_string_short::fmt;
@@ -203,7 +204,7 @@ EvalFixture::EvalFixture(EngineOrFactory engine,
_mutable_set(get_mutable(*_function, param_repo)),
_plain_tensor_function(make_tensor_function(_engine, _function->root(), _node_types, _stash)),
_patched_tensor_function(maybe_patch(allow_mutable, _plain_tensor_function, _mutable_set, _stash)),
- _tensor_function(optimized ? _engine.optimize(_patched_tensor_function, _stash) : _patched_tensor_function),
+ _tensor_function(optimized ? optimize_tensor_function(engine, _patched_tensor_function, _stash) : _patched_tensor_function),
_ifun(_engine, _tensor_function),
_ictx(_ifun),
_param_values(make_params(_engine, *_function, param_repo)),
diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
index d44e822792b..b50092c88b5 100644
--- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
+++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
@@ -277,7 +277,6 @@ DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const
{
using Child = TensorFunction::Child;
Child root(expr);
- LOG(debug, "tensor function before optimization:\n%s\n", root.get().as_string().c_str());
{
std::vector<Child::CREF> nodes({root});
for (size_t i = 0; i < nodes.size(); ++i) {
@@ -316,7 +315,6 @@ DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const
nodes.pop_back();
}
}
- LOG(debug, "tensor function after optimization:\n%s\n", root.get().as_string().c_str());
return root.get();
}