summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2020-05-14 12:50:57 +0000
committerHåvard Pettersen <havardpe@oath.com>2020-05-22 09:51:33 +0000
commit9e5649a43a601f5b4f017a454b0577434d0fdf7d (patch)
tree53298b04c0876de9215a2dff318f5dbe262ac25c /eval
parent2ad86490f46dd7989125fec51b542344b4e28368 (diff)
simple dense join
Diffstat (limited to 'eval')
-rw-r--r--eval/CMakeLists.txt1
-rw-r--r--eval/src/tests/tensor/dense_inplace_join_function/dense_inplace_join_function_test.cpp62
-rw-r--r--eval/src/tests/tensor/dense_simple_join_function/CMakeLists.txt8
-rw-r--r--eval/src/tests/tensor/dense_simple_join_function/dense_simple_join_function_test.cpp224
-rw-r--r--eval/src/vespa/eval/eval/test/eval_fixture.cpp18
-rw-r--r--eval/src/vespa/eval/eval/test/eval_fixture.h5
-rw-r--r--eval/src/vespa/eval/tensor/default_tensor_engine.cpp4
-rw-r--r--eval/src/vespa/eval/tensor/dense/CMakeLists.txt2
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_inplace_join_function.cpp114
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_inplace_join_function.h33
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp312
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_simple_join_function.h38
12 files changed, 626 insertions, 195 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 29d3c192139..7471fc51fe7 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -39,6 +39,7 @@ vespa_define_module(
src/tests/tensor/dense_multi_matmul_function
src/tests/tensor/dense_remove_dimension_optimizer
src/tests/tensor/dense_replace_type_function
+ src/tests/tensor/dense_simple_join_function
src/tests/tensor/dense_single_reduce_function
src/tests/tensor/dense_tensor_create_function
src/tests/tensor/dense_tensor_peek_function
diff --git a/eval/src/tests/tensor/dense_inplace_join_function/dense_inplace_join_function_test.cpp b/eval/src/tests/tensor/dense_inplace_join_function/dense_inplace_join_function_test.cpp
index 80321ac3d22..ba2b1ba0023 100644
--- a/eval/src/tests/tensor/dense_inplace_join_function/dense_inplace_join_function_test.cpp
+++ b/eval/src/tests/tensor/dense_inplace_join_function/dense_inplace_join_function_test.cpp
@@ -5,7 +5,6 @@
#include <vespa/eval/eval/simple_tensor.h>
#include <vespa/eval/eval/simple_tensor_engine.h>
#include <vespa/eval/tensor/default_tensor_engine.h>
-#include <vespa/eval/tensor/dense/dense_inplace_join_function.h>
#include <vespa/eval/tensor/dense/dense_tensor.h>
#include <vespa/eval/eval/test/tensor_model.hpp>
#include <vespa/eval/eval/test/eval_fixture.h>
@@ -53,7 +52,7 @@ EvalFixture::ParamRepo make_params() {
}
EvalFixture::ParamRepo param_repo = make_params();
-void verify_optimized(const vespalib::string &expr, size_t cnt, size_t param_idx) {
+void verify_optimized(const vespalib::string &expr, size_t param_idx) {
EvalFixture fixture(prod_engine, expr, param_repo, true, true);
EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo));
for (size_t i = 0; i < fixture.num_params(); ++i) {
@@ -64,23 +63,18 @@ void verify_optimized(const vespalib::string &expr, size_t cnt, size_t param_idx
EXPECT_NOT_EQUAL(fixture.get_param(i), fixture.result());
}
}
- auto info = fixture.find_all<DenseInplaceJoinFunction>();
- ASSERT_EQUAL(info.size(), cnt);
- for (size_t i = 0; i < cnt; ++i) {
- EXPECT_TRUE(info[i]->result_is_mutable());
- }
}
-void verify_p0_optimized(const vespalib::string &expr, size_t cnt) {
- verify_optimized(expr, cnt, 0);
+void verify_p0_optimized(const vespalib::string &expr) {
+ verify_optimized(expr, 0);
}
-void verify_p1_optimized(const vespalib::string &expr, size_t cnt) {
- verify_optimized(expr, cnt, 1);
+void verify_p1_optimized(const vespalib::string &expr) {
+ verify_optimized(expr, 1);
}
-void verify_p2_optimized(const vespalib::string &expr, size_t cnt) {
- verify_optimized(expr, cnt, 2);
+void verify_p2_optimized(const vespalib::string &expr) {
+ verify_optimized(expr, 2);
}
void verify_not_optimized(const vespalib::string &expr) {
@@ -89,21 +83,19 @@ void verify_not_optimized(const vespalib::string &expr) {
for (size_t i = 0; i < fixture.num_params(); ++i) {
EXPECT_NOT_EQUAL(fixture.get_param(i), fixture.result());
}
- auto info = fixture.find_all<DenseInplaceJoinFunction>();
- EXPECT_TRUE(info.empty());
}
TEST("require that mutable dense concrete tensors are optimized") {
- TEST_DO(verify_p0_optimized("mut_x5_A-mut_x5_B", 1));
- TEST_DO(verify_p0_optimized("mut_x5_A-con_x5_B", 1));
- TEST_DO(verify_p1_optimized("con_x5_A-mut_x5_B", 1));
- TEST_DO(verify_p0_optimized("mut_x5y3_A-mut_x5y3_B", 1));
- TEST_DO(verify_p0_optimized("mut_x5y3_A-con_x5y3_B", 1));
- TEST_DO(verify_p1_optimized("con_x5y3_A-mut_x5y3_B", 1));
+ TEST_DO(verify_p1_optimized("mut_x5_A-mut_x5_B"));
+ TEST_DO(verify_p0_optimized("mut_x5_A-con_x5_B"));
+ TEST_DO(verify_p1_optimized("con_x5_A-mut_x5_B"));
+ TEST_DO(verify_p1_optimized("mut_x5y3_A-mut_x5y3_B"));
+ TEST_DO(verify_p0_optimized("mut_x5y3_A-con_x5y3_B"));
+ TEST_DO(verify_p1_optimized("con_x5y3_A-mut_x5y3_B"));
}
TEST("require that self-join operations can be optimized") {
- TEST_DO(verify_p0_optimized("mut_x5_A+mut_x5_A", 1));
+ TEST_DO(verify_p0_optimized("mut_x5_A+mut_x5_A"));
}
TEST("require that join(tensor,scalar) operations are not optimized") {
@@ -111,15 +103,15 @@ TEST("require that join(tensor,scalar) operations are not optimized") {
TEST_DO(verify_not_optimized("mut_dbl_A-mut_x5_B"));
}
-TEST("require that join with different tensor shapes are not optimized") {
- TEST_DO(verify_not_optimized("mut_x5_A*mut_x5y3_B"));
+TEST("require that join with different tensor shapes are optimized") {
+ TEST_DO(verify_p1_optimized("mut_x5_A*mut_x5y3_B"));
}
TEST("require that inplace join operations can be chained") {
- TEST_DO(verify_p0_optimized("mut_x5_A-(mut_x5_B-mut_x5_C)", 2));
- TEST_DO(verify_p0_optimized("(mut_x5_A-con_x5_B)-con_x5_C", 2));
- TEST_DO(verify_p1_optimized("con_x5_A-(mut_x5_B-con_x5_C)", 2));
- TEST_DO(verify_p2_optimized("con_x5_A-(con_x5_B-mut_x5_C)", 2));
+ TEST_DO(verify_p2_optimized("mut_x5_A+(mut_x5_B+mut_x5_C)"));
+ TEST_DO(verify_p0_optimized("(mut_x5_A+con_x5_B)+con_x5_C"));
+ TEST_DO(verify_p1_optimized("con_x5_A+(mut_x5_B+con_x5_C)"));
+ TEST_DO(verify_p2_optimized("con_x5_A+(con_x5_B+mut_x5_C)"));
}
TEST("require that non-mutable tensors are not optimized") {
@@ -136,21 +128,13 @@ TEST("require that mapped tensors are not optimized") {
TEST_DO(verify_not_optimized("mut_x_sparse+mut_x_sparse"));
}
-TEST("require that inplace join can be debug dumped") {
- EvalFixture fixture(prod_engine, "con_x5_A-mut_x5_B", param_repo, true, true);
- auto info = fixture.find_all<DenseInplaceJoinFunction>();
- ASSERT_EQUAL(info.size(), 1u);
- EXPECT_TRUE(info[0]->result_is_mutable());
- fprintf(stderr, "%s\n", info[0]->as_string().c_str());
-}
-
TEST("require that optimization works with float cells") {
- TEST_DO(verify_p0_optimized("mut_x5f_D-mut_x5f_E", 1));
+ TEST_DO(verify_p1_optimized("mut_x5f_D-mut_x5f_E"));
}
TEST("require that overwritten value must have same cell type as result") {
- TEST_DO(verify_p0_optimized("mut_x5_A-mut_x5f_D", 1));
- TEST_DO(verify_p1_optimized("mut_x5f_D-mut_x5_A", 1));
+ TEST_DO(verify_p0_optimized("mut_x5_A-mut_x5f_D"));
+ TEST_DO(verify_p1_optimized("mut_x5f_D-mut_x5_A"));
TEST_DO(verify_not_optimized("con_x5_A-mut_x5f_D"));
TEST_DO(verify_not_optimized("mut_x5f_D-con_x5_A"));
}
diff --git a/eval/src/tests/tensor/dense_simple_join_function/CMakeLists.txt b/eval/src/tests/tensor/dense_simple_join_function/CMakeLists.txt
new file mode 100644
index 00000000000..8a2df392145
--- /dev/null
+++ b/eval/src/tests/tensor/dense_simple_join_function/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_dense_simple_join_function_test_app TEST
+ SOURCES
+ dense_simple_join_function_test.cpp
+ DEPENDS
+ vespaeval
+)
+vespa_add_test(NAME eval_dense_simple_join_function_test_app COMMAND eval_dense_simple_join_function_test_app)
diff --git a/eval/src/tests/tensor/dense_simple_join_function/dense_simple_join_function_test.cpp b/eval/src/tests/tensor/dense_simple_join_function/dense_simple_join_function_test.cpp
new file mode 100644
index 00000000000..cac8af2bdce
--- /dev/null
+++ b/eval/src/tests/tensor/dense_simple_join_function/dense_simple_join_function_test.cpp
@@ -0,0 +1,224 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/simple_tensor.h>
+#include <vespa/eval/eval/simple_tensor_engine.h>
+#include <vespa/eval/tensor/default_tensor_engine.h>
+#include <vespa/eval/tensor/dense/dense_simple_join_function.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+
+#include <vespa/vespalib/util/stringfmt.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+using namespace vespalib::tensor;
+using namespace vespalib::eval::tensor_function;
+
+using vespalib::make_string_short::fmt;
+
+using Primary = DenseSimpleJoinFunction::Primary;
+using Overlap = DenseSimpleJoinFunction::Overlap;
+
+std::ostream &operator<<(std::ostream &os, Primary primary)
+{
+ switch(primary) {
+ case Primary::LHS: return os << "LHS";
+ case Primary::RHS: return os << "RHS";
+ }
+ abort();
+}
+
+std::ostream &operator<<(std::ostream &os, Overlap overlap)
+{
+ switch(overlap) {
+ case Overlap::FULL: return os << "FULL";
+ case Overlap::INNER: return os << "INNER";
+ case Overlap::OUTER: return os << "OUTER";
+ }
+ abort();
+}
+
+const TensorEngine &prod_engine = DefaultTensorEngine::ref();
+
+EvalFixture::ParamRepo make_params() {
+ return EvalFixture::ParamRepo()
+ .add("a", spec(1.5))
+ .add("b", spec(2.5))
+ .add("sparse", spec({x({"a"})}, N()))
+ .add("mixed", spec({x({"a"}),y(5)}, N()))
+ .add_cube("a", 1, "b", 1, "c", 1)
+ .add_cube("x", 1, "y", 1, "z", 1)
+ .add_cube("x", 3, "y", 5, "z", 3)
+ .add_vector("x", 5)
+ .add_dense({{"c", 5}, {"d", 1}})
+ .add_dense({{"b", 1}, {"c", 5}})
+ .add_matrix("x", 3, "y", 5, [](size_t idx){ return double((idx * 2) + 3); })
+ .add_matrix("x", 3, "y", 5, [](size_t idx){ return double((idx * 3) + 2); })
+ .add_vector("y", 5, [](size_t idx){ return double((idx * 2) + 3); })
+ .add_vector("y", 5, [](size_t idx){ return double((idx * 3) + 2); })
+ .add_matrix("y", 5, "z", 3, [](size_t idx){ return double((idx * 2) + 3); })
+ .add_matrix("y", 5, "z", 3, [](size_t idx){ return double((idx * 3) + 2); });
+}
+EvalFixture::ParamRepo param_repo = make_params();
+
+void verify_optimized(const vespalib::string &expr, Primary primary, Overlap overlap, bool pri_mut, size_t factor, int p_inplace = -1) {
+ EvalFixture slow_fixture(prod_engine, expr, param_repo, false);
+ EvalFixture fixture(prod_engine, expr, param_repo, true, true);
+ EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo));
+ EXPECT_EQUAL(fixture.result(), slow_fixture.result());
+ auto info = fixture.find_all<DenseSimpleJoinFunction>();
+ ASSERT_EQUAL(info.size(), 1u);
+ EXPECT_TRUE(info[0]->result_is_mutable());
+ EXPECT_EQUAL(info[0]->primary(), primary);
+ EXPECT_EQUAL(info[0]->overlap(), overlap);
+ EXPECT_EQUAL(info[0]->primary_is_mutable(), pri_mut);
+ EXPECT_EQUAL(info[0]->factor(), factor);
+ EXPECT_TRUE((p_inplace == -1) || (fixture.num_params() > size_t(p_inplace)));
+ for (size_t i = 0; i < fixture.num_params(); ++i) {
+ if (i == size_t(p_inplace)) {
+ EXPECT_EQUAL(fixture.get_param(i), fixture.result());
+ } else {
+ EXPECT_NOT_EQUAL(fixture.get_param(i), fixture.result());
+ }
+ }
+}
+
+void verify_not_optimized(const vespalib::string &expr) {
+ EvalFixture slow_fixture(prod_engine, expr, param_repo, false);
+ EvalFixture fixture(prod_engine, expr, param_repo, true);
+ EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo));
+ EXPECT_EQUAL(fixture.result(), slow_fixture.result());
+ auto info = fixture.find_all<DenseSimpleJoinFunction>();
+ EXPECT_TRUE(info.empty());
+}
+
+TEST("require that basic join is optimized") {
+ TEST_DO(verify_optimized("y5+y5$2", Primary::RHS, Overlap::FULL, false, 1));
+}
+
+TEST("require that unit join is optimized") {
+ TEST_DO(verify_optimized("a1b1c1+x1y1z1", Primary::RHS, Overlap::FULL, false, 1));
+}
+
+TEST("require that trivial dimensions do not affect overlap calculation") {
+ TEST_DO(verify_optimized("c5d1+b1c5", Primary::RHS, Overlap::FULL, false, 1));
+}
+
+TEST("require that outer nesting is preferred to inner nesting") {
+ TEST_DO(verify_optimized("a1b1c1+y5", Primary::RHS, Overlap::OUTER, false, 5));
+}
+
+TEST("require that non-subset join is not optimized") {
+ TEST_DO(verify_not_optimized("x5+y5"));
+}
+
+TEST("require that subset join with complex overlap is not optimized") {
+ TEST_DO(verify_not_optimized("x3y5z3+y5"));
+}
+
+struct LhsRhs {
+ vespalib::string lhs;
+ vespalib::string rhs;
+ size_t lhs_size;
+ size_t rhs_size;
+ Overlap overlap;
+ size_t factor;
+ LhsRhs(const vespalib::string &lhs_in, const vespalib::string &rhs_in,
+ size_t lhs_size_in, size_t rhs_size_in, Overlap overlap_in) noexcept
+ : lhs(lhs_in), rhs(rhs_in), lhs_size(lhs_size_in), rhs_size(rhs_size_in), overlap(overlap_in), factor(1)
+ {
+ if (lhs_size > rhs_size) {
+ ASSERT_EQUAL(lhs_size % rhs_size, 0u);
+ factor = (lhs_size / rhs_size);
+ } else {
+ ASSERT_EQUAL(rhs_size % lhs_size, 0u);
+ factor = (rhs_size / lhs_size);
+ }
+ }
+};
+
+vespalib::string adjust_param(const vespalib::string &str, bool float_cells, bool mut_cells, bool is_rhs) {
+ vespalib::string result = str;
+ if (mut_cells) {
+ result = "@" + result;
+ }
+ if (float_cells) {
+ result += "f";
+ }
+ if (is_rhs) {
+ result += "$2";
+ }
+ return result;
+}
+
+TEST("require that various parameter combinations work") {
+ for (bool left_float: {false, true}) {
+ for (bool right_float: {false, true}) {
+ bool float_result = (left_float && right_float);
+ for (bool left_mut: {false, true}) {
+ for (bool right_mut: {false, true}) {
+ for (const char *op_pattern: {"%s+%s", "%s-%s", "%s*%s"}) {
+ for (const LhsRhs &params:
+ { LhsRhs("y5", "y5", 5, 5, Overlap::FULL),
+ LhsRhs("y5", "x3y5", 5, 15, Overlap::INNER),
+ LhsRhs("y5", "y5z3", 5, 15, Overlap::OUTER),
+ LhsRhs("x3y5", "y5", 15, 5, Overlap::INNER),
+ LhsRhs("y5z3", "y5", 15, 5, Overlap::OUTER)})
+ {
+ vespalib::string left = adjust_param(params.lhs, left_float, left_mut, false);
+ vespalib::string right = adjust_param(params.rhs, right_float, right_mut, true);
+ vespalib::string expr = fmt(op_pattern, left.c_str(), right.c_str());
+ TEST_STATE(expr.c_str());
+ Primary primary = Primary::RHS;
+ if (params.overlap == Overlap::FULL) {
+ bool w_lhs = ((left_float == float_result) && left_mut);
+ bool w_rhs = ((right_float == float_result) && right_mut);
+ if (w_lhs && !w_rhs) {
+ primary = Primary::LHS;
+ }
+ } else if (params.lhs_size > params.rhs_size) {
+ primary = Primary::LHS;
+ }
+ bool pri_mut = (primary == Primary::LHS) ? left_mut : right_mut;
+ bool pri_float = (primary == Primary::LHS) ? left_float : right_float;
+ int p_inplace = -1;
+ if (pri_mut && (pri_float == float_result)) {
+ p_inplace = (primary == Primary::LHS) ? 0 : 1;
+ }
+ verify_optimized(expr, primary, params.overlap, pri_mut, params.factor, p_inplace);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST("require that scalar values are not optimized") {
+ TEST_DO(verify_not_optimized("a+b"));
+ TEST_DO(verify_not_optimized("a+y5"));
+ TEST_DO(verify_not_optimized("y5+b"));
+ TEST_DO(verify_not_optimized("a+sparse"));
+ TEST_DO(verify_not_optimized("sparse+a"));
+ TEST_DO(verify_not_optimized("a+mixed"));
+ TEST_DO(verify_not_optimized("mixed+a"));
+}
+
+TEST("require that mapped tensors are not optimized") {
+ TEST_DO(verify_not_optimized("sparse+sparse"));
+ TEST_DO(verify_not_optimized("sparse+y5"));
+ TEST_DO(verify_not_optimized("y5+sparse"));
+ TEST_DO(verify_not_optimized("sparse+mixed"));
+ TEST_DO(verify_not_optimized("mixed+sparse"));
+}
+
+TEST("require mixed tensors are not optimized") {
+ TEST_DO(verify_not_optimized("mixed+mixed"));
+ TEST_DO(verify_not_optimized("mixed+y5"));
+ TEST_DO(verify_not_optimized("y5+mixed"));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.cpp b/eval/src/vespa/eval/eval/test/eval_fixture.cpp
index dd27ba1f147..0b27ba1f2df 100644
--- a/eval/src/vespa/eval/eval/test/eval_fixture.cpp
+++ b/eval/src/vespa/eval/eval/test/eval_fixture.cpp
@@ -127,6 +127,13 @@ TensorSpec make_dense(const vespalib::string &type,
} // namespace vespalib::eval::test
ParamRepo &
+ParamRepo::add(const vespalib::string &name, TensorSpec value_in, bool is_mutable_in) {
+ ASSERT_TRUE(map.find(name) == map.end());
+ map.insert_or_assign(name, Param(std::move(value_in), is_mutable_in));
+ return *this;
+}
+
+ParamRepo &
EvalFixture::ParamRepo::add_vector(const char *d1, size_t s1, gen_fun_t gen)
{
return add_dense({{d1, s1}}, gen);
@@ -159,8 +166,15 @@ EvalFixture::ParamRepo::add_dense(const std::vector<std::pair<vespalib::string,
type += fmt("%s[%zu]", dim.first.c_str(), dim.second);
prev = dim.first;
}
- add(name, make_dense(fmt("tensor(%s)", type.c_str()), dims, gen));
- add(name + "f", make_dense(fmt("tensor<float>(%s)", type.c_str()), dims, gen));
+ int cpy = 1;
+ vespalib::string suffix = "";
+ while (map.find(name + suffix) != map.end()) {
+ suffix = fmt("$%d", ++cpy);
+ }
+ add(name + suffix, make_dense(fmt("tensor(%s)", type.c_str()), dims, gen));
+ add(name + "f" + suffix, make_dense(fmt("tensor<float>(%s)", type.c_str()), dims, gen));
+ add_mutable("@" + name + suffix, make_dense(fmt("tensor(%s)", type.c_str()), dims, gen));
+ add_mutable("@" + name + "f" + suffix, make_dense(fmt("tensor<float>(%s)", type.c_str()), dims, gen));
return *this;
}
diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.h b/eval/src/vespa/eval/eval/test/eval_fixture.h
index 1010b5e58a8..31fcee49782 100644
--- a/eval/src/vespa/eval/eval/test/eval_fixture.h
+++ b/eval/src/vespa/eval/eval/test/eval_fixture.h
@@ -30,10 +30,7 @@ public:
using gen_fun_t = std::function<double(size_t)>;
static double gen_N(size_t seq) { return (seq + 1); }
ParamRepo() : map() {}
- ParamRepo &add(const vespalib::string &name, TensorSpec value_in, bool is_mutable_in) {
- map.insert_or_assign(name, Param(std::move(value_in), is_mutable_in));
- return *this;
- }
+ ParamRepo &add(const vespalib::string &name, TensorSpec value_in, bool is_mutable_in);
ParamRepo &add(const vespalib::string &name, const TensorSpec &value) {
return add(name, value, false);
}
diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
index a817c1454d8..2b13c7652c7 100644
--- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
+++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
@@ -17,7 +17,7 @@
#include "dense/dense_single_reduce_function.h"
#include "dense/dense_remove_dimension_optimizer.h"
#include "dense/dense_lambda_peek_optimizer.h"
-#include "dense/dense_inplace_join_function.h"
+#include "dense/dense_simple_join_function.h"
#include "dense/dense_inplace_map_function.h"
#include "dense/vector_from_doubles_function.h"
#include "dense/dense_tensor_create_function.h"
@@ -291,7 +291,7 @@ DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const
child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash));
child.set(DenseFastRenameOptimizer::optimize(child.get(), stash));
child.set(DenseInplaceMapFunction::optimize(child.get(), stash));
- child.set(DenseInplaceJoinFunction::optimize(child.get(), stash));
+ child.set(DenseSimpleJoinFunction::optimize(child.get(), stash));
child.set(DenseSingleReduceFunction::optimize(child.get(), stash));
nodes.pop_back();
}
diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
index 0131ff28398..141e5901988 100644
--- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
+++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
@@ -6,7 +6,6 @@ vespa_add_library(eval_tensor_dense OBJECT
dense_dimension_combiner.cpp
dense_dot_product_function.cpp
dense_fast_rename_optimizer.cpp
- dense_inplace_join_function.cpp
dense_inplace_map_function.cpp
dense_lambda_peek_function.cpp
dense_lambda_peek_optimizer.cpp
@@ -14,6 +13,7 @@ vespa_add_library(eval_tensor_dense OBJECT
dense_multi_matmul_function.cpp
dense_remove_dimension_optimizer.cpp
dense_replace_type_function.cpp
+ dense_simple_join_function.cpp
dense_single_reduce_function.cpp
dense_tensor.cpp
dense_tensor_address_mapper.cpp
diff --git a/eval/src/vespa/eval/tensor/dense/dense_inplace_join_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_inplace_join_function.cpp
deleted file mode 100644
index 2107c7661f2..00000000000
--- a/eval/src/vespa/eval/tensor/dense/dense_inplace_join_function.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "dense_inplace_join_function.h"
-#include "dense_tensor_view.h"
-#include <vespa/vespalib/objects/objectvisitor.h>
-#include <vespa/eval/eval/value.h>
-
-namespace vespalib::tensor {
-
-using eval::Value;
-using eval::ValueType;
-using eval::TensorFunction;
-using eval::TensorEngine;
-using eval::as;
-using namespace eval::tensor_function;
-
-namespace {
-
-template <typename LCT, typename RCT>
-void my_inplace_join_left_op(eval::InterpretedFunction::State &state, uint64_t param) {
- join_fun_t function = (join_fun_t)param;
- auto lhs_cells = unconstify(DenseTensorView::typify_cells<LCT>(state.peek(1)));
- auto rhs_cells = DenseTensorView::typify_cells<RCT>(state.peek(0));
- for (size_t i = 0; i < lhs_cells.size(); ++i) {
- lhs_cells[i] = function(lhs_cells[i], rhs_cells[i]);
- }
- state.stack.pop_back();
-}
-
-template <typename LCT, typename RCT>
-void my_inplace_join_right_op(eval::InterpretedFunction::State &state, uint64_t param) {
- join_fun_t function = (join_fun_t)param;
- auto lhs_cells = DenseTensorView::typify_cells<LCT>(state.peek(1));
- auto rhs_cells = unconstify(DenseTensorView::typify_cells<RCT>(state.peek(0)));
- for (size_t i = 0; i < rhs_cells.size(); ++i) {
- rhs_cells[i] = function(lhs_cells[i], rhs_cells[i]);
- }
- const Value &result = state.stack.back();
- state.pop_pop_push(result);
-}
-
-struct MyInplaceJoinLeftOp {
- template <typename LCT, typename RCT>
- static auto get_fun() { return my_inplace_join_left_op<LCT,RCT>; }
-};
-
-struct MyInplaceJoinRightOp {
- template <typename LCT, typename RCT>
- static auto get_fun() { return my_inplace_join_right_op<LCT,RCT>; }
-};
-
-eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct, bool write_left) {
- if (write_left) {
- return select_2<MyInplaceJoinLeftOp>(lct, rct);
- } else {
- return select_2<MyInplaceJoinRightOp>(lct, rct);
- }
-}
-
-} // namespace vespalib::tensor::<unnamed>
-
-
-DenseInplaceJoinFunction::DenseInplaceJoinFunction(const ValueType &result_type,
- const TensorFunction &lhs,
- const TensorFunction &rhs,
- join_fun_t function_in,
- bool write_left_in)
- : eval::tensor_function::Join(result_type, lhs, rhs, function_in),
- _write_left(write_left_in)
-{
-}
-
-DenseInplaceJoinFunction::~DenseInplaceJoinFunction()
-{
-}
-
-eval::InterpretedFunction::Instruction
-DenseInplaceJoinFunction::compile_self(const TensorEngine &, Stash &) const
-{
- auto op = my_select(lhs().result_type().cell_type(),
- rhs().result_type().cell_type(), _write_left);
- return eval::InterpretedFunction::Instruction(op, (uint64_t)function());
-}
-
-void
-DenseInplaceJoinFunction::visit_self(vespalib::ObjectVisitor &visitor) const
-{
- Super::visit_self(visitor);
- visitor.visitBool("write_left", _write_left);
-}
-
-const TensorFunction &
-DenseInplaceJoinFunction::optimize(const eval::TensorFunction &expr, Stash &stash)
-{
- if (auto join = as<Join>(expr)) {
- const TensorFunction &lhs = join->lhs();
- const TensorFunction &rhs = join->rhs();
- if (lhs.result_type().is_dense() &&
- (lhs.result_type().dimensions() == rhs.result_type().dimensions()))
- {
- if (lhs.result_is_mutable() && (lhs.result_type() == expr.result_type())) {
- return stash.create<DenseInplaceJoinFunction>(join->result_type(), lhs, rhs,
- join->function(), /* write left: */ true);
- }
- if (rhs.result_is_mutable() && (rhs.result_type() == expr.result_type())) {
- return stash.create<DenseInplaceJoinFunction>(join->result_type(), lhs, rhs,
- join->function(), /* write left: */ false);
- }
- }
- }
- return expr;
-}
-
-} // namespace vespalib::tensor
diff --git a/eval/src/vespa/eval/tensor/dense/dense_inplace_join_function.h b/eval/src/vespa/eval/tensor/dense/dense_inplace_join_function.h
deleted file mode 100644
index acd1a2d716b..00000000000
--- a/eval/src/vespa/eval/tensor/dense/dense_inplace_join_function.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/eval/eval/tensor_function.h>
-
-namespace vespalib::tensor {
-
-/**
- * Tensor function for inplace join operation on mutable dense tensors.
- **/
-class DenseInplaceJoinFunction : public eval::tensor_function::Join
-{
- using Super = eval::tensor_function::Join;
-public:
- using join_fun_t = ::vespalib::eval::tensor_function::join_fun_t;
-private:
- bool _write_left;
-public:
- DenseInplaceJoinFunction(const eval::ValueType &result_type,
- const TensorFunction &lhs,
- const TensorFunction &rhs,
- join_fun_t function_in,
- bool write_left_in);
- ~DenseInplaceJoinFunction();
- bool write_left() const { return _write_left; }
- bool result_is_mutable() const override { return true; }
- eval::InterpretedFunction::Instruction compile_self(const eval::TensorEngine &engine, Stash &stash) const override;
- void visit_self(vespalib::ObjectVisitor &visitor) const override;
- static const eval::TensorFunction &optimize(const eval::TensorFunction &expr, Stash &stash);
-};
-
-} // namespace vespalib::tensor
diff --git a/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp
new file mode 100644
index 00000000000..6b0d65c0743
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp
@@ -0,0 +1,312 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "dense_simple_join_function.h"
+#include "dense_tensor_view.h"
+#include <vespa/vespalib/objects/objectvisitor.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/operation.h>
+#include <optional>
+#include <algorithm>
+
+namespace vespalib::tensor {
+
+using vespalib::ArrayRef;
+
+using eval::Value;
+using eval::ValueType;
+using eval::TensorFunction;
+using eval::TensorEngine;
+using eval::as;
+
+using namespace eval::operation;
+using namespace eval::tensor_function;
+
+using Primary = DenseSimpleJoinFunction::Primary;
+using Overlap = DenseSimpleJoinFunction::Overlap;
+
+using op_function = eval::InterpretedFunction::op_function;
+using Instruction = eval::InterpretedFunction::Instruction;
+using State = eval::InterpretedFunction::State;
+
+namespace {
+
+struct JoinParams {
+ const ValueType &result_type;
+ size_t factor;
+ join_fun_t function;
+ JoinParams(const ValueType &result_type_in, size_t factor_in, join_fun_t function_in)
+ : result_type(result_type_in), factor(factor_in), function(function_in) {}
+};
+
+struct CallFun {
+ join_fun_t function;
+ CallFun(const JoinParams &params) : function(params.function) {}
+ double eval(double a, double b) const { return function(a, b); }
+};
+
+struct AddFun {
+ AddFun(const JoinParams &) {}
+ template <typename A, typename B>
+ auto eval(A a, B b) const { return (a + b); }
+};
+
+struct MulFun {
+ MulFun(const JoinParams &) {}
+ template <typename A, typename B>
+ auto eval(A a, B b) const { return (a * b); }
+};
+
+// needed for asymmetric operations like Sub and Div
+template <typename Fun>
+struct SwapFun {
+ Fun fun;
+ SwapFun(const JoinParams &params) : fun(params) {}
+ template <typename A, typename B>
+ auto eval(A a, B b) const { return fun.eval(b, a); }
+};
+
+template <typename OCT, typename PCT, typename SCT, typename Fun>
+void apply_fun_1_to_n(OCT *dst, const PCT *pri, SCT sec, size_t n, const Fun &fun) {
+ for (size_t i = 0; i < n; ++i) {
+ dst[i] = fun.eval(pri[i], sec);
+ }
+}
+
+template <typename OCT, typename PCT, typename SCT, typename Fun>
+void apply_fun_n_to_n(OCT *dst, const PCT *pri, const SCT *sec, size_t n, const Fun &fun) {
+ for (size_t i = 0; i < n; ++i) {
+ dst[i] = fun.eval(pri[i], sec[i]);
+ }
+}
+
+template <typename OCT, bool pri_mut, typename PCT>
+ArrayRef<OCT> make_dst_cells(ConstArrayRef<PCT> pri_cells, Stash &stash) {
+ if constexpr (pri_mut && std::is_same<PCT,OCT>::value) {
+ return unconstify(pri_cells);
+ } else {
+ return stash.create_array<OCT>(pri_cells.size());
+ }
+}
+
+template <typename LCT, typename RCT, typename Fun, bool swap, Overlap overlap, bool pri_mut>
+void my_simple_join_op(State &state, uint64_t param) {
+ using PCT = typename std::conditional<swap,RCT,LCT>::type;
+ using SCT = typename std::conditional<swap,LCT,RCT>::type;
+ using OCT = typename eval::UnifyCellTypes<PCT,SCT>::type;
+ using OP = typename std::conditional<swap,SwapFun<Fun>,Fun>::type;
+ const JoinParams &params = *(JoinParams*)param;
+ OP my_op(params);
+ auto pri_cells = DenseTensorView::typify_cells<PCT>(state.peek(swap ? 0 : 1));
+ auto sec_cells = DenseTensorView::typify_cells<SCT>(state.peek(swap ? 1 : 0));
+ auto dst_cells = make_dst_cells<OCT, pri_mut>(pri_cells, state.stash);
+ if (overlap == Overlap::FULL) {
+ apply_fun_n_to_n(dst_cells.begin(), pri_cells.begin(), sec_cells.begin(), dst_cells.size(), my_op);
+ } else if (overlap == Overlap::OUTER) {
+ size_t offset = 0;
+ size_t factor = params.factor;
+ for (SCT cell: sec_cells) {
+ apply_fun_1_to_n(dst_cells.begin() + offset, pri_cells.begin() + offset, cell, factor, my_op);
+ offset += factor;
+ }
+ } else {
+ assert(overlap == Overlap::INNER);
+ size_t offset = 0;
+ size_t factor = params.factor;
+ for (size_t i = 0; i < factor; ++i) {
+ apply_fun_n_to_n(dst_cells.begin() + offset, pri_cells.begin() + offset, sec_cells.begin(), sec_cells.size(), my_op);
+ offset += sec_cells.size();
+ }
+ }
+ state.pop_pop_push(state.stash.create<DenseTensorView>(params.result_type, TypedCells(dst_cells)));
+}
+
+//-----------------------------------------------------------------------------
+
+template <typename Fun, bool swap, Overlap overlap, bool pri_mut>
+struct MySimpleJoinOp {
+ template <typename LCT, typename RCT>
+ static auto get_fun() { return my_simple_join_op<LCT,RCT,Fun,swap,overlap,pri_mut>; }
+};
+
+template <bool swap, Overlap overlap, bool pri_mut>
+op_function my_select_4(ValueType::CellType lct,
+ ValueType::CellType rct,
+ join_fun_t fun_hint)
+{
+ if (fun_hint == Add::f) {
+ return select_2<MySimpleJoinOp<AddFun,swap,overlap,pri_mut>>(lct, rct);
+ } else if (fun_hint == Mul::f) {
+ return select_2<MySimpleJoinOp<MulFun,swap,overlap,pri_mut>>(lct, rct);
+ } else {
+ return select_2<MySimpleJoinOp<CallFun,swap,overlap,pri_mut>>(lct, rct);
+ }
+}
+
+template <bool swap, Overlap overlap>
+op_function my_select_3(ValueType::CellType lct,
+ ValueType::CellType rct,
+ bool pri_mut,
+ join_fun_t fun_hint)
+{
+ if (pri_mut) {
+ return my_select_4<swap, overlap, true>(lct, rct, fun_hint);
+ } else {
+ return my_select_4<swap, overlap, false>(lct, rct, fun_hint);
+ }
+}
+
+template <bool swap>
+op_function my_select_2(ValueType::CellType lct,
+ ValueType::CellType rct,
+ Overlap overlap,
+ bool pri_mut,
+ join_fun_t fun_hint)
+{
+ switch (overlap) {
+ case Overlap::INNER: return my_select_3<swap, Overlap::INNER>(lct, rct, pri_mut, fun_hint);
+ case Overlap::OUTER: return my_select_3<swap, Overlap::OUTER>(lct, rct, pri_mut, fun_hint);
+ case Overlap::FULL: return my_select_3<swap, Overlap::FULL>(lct, rct, pri_mut, fun_hint);
+ }
+ abort();
+}
+
+op_function my_select(ValueType::CellType lct,
+ ValueType::CellType rct,
+ Primary primary,
+ Overlap overlap,
+ bool pri_mut,
+ join_fun_t fun_hint)
+{
+ switch (primary) {
+ case Primary::LHS: return my_select_2<false>(lct, rct, overlap, pri_mut, fun_hint);
+ case Primary::RHS: return my_select_2<true>(lct, rct, overlap, pri_mut, fun_hint);
+ }
+ abort();
+}
+
+//-----------------------------------------------------------------------------
+
+bool can_use_as_output(const TensorFunction &fun, ValueType::CellType result_cell_type) {
+ return (fun.result_is_mutable() && (fun.result_type().cell_type() == result_cell_type));
+}
+
+Primary select_primary(const TensorFunction &lhs, const TensorFunction &rhs, ValueType::CellType result_cell_type) {
+ size_t lhs_size = lhs.result_type().dense_subspace_size();
+ size_t rhs_size = rhs.result_type().dense_subspace_size();
+ if (lhs_size > rhs_size) {
+ return Primary::LHS;
+ } else if (rhs_size > lhs_size) {
+ return Primary::RHS;
+ } else {
+ bool can_write_lhs = can_use_as_output(lhs, result_cell_type);
+ bool can_write_rhs = can_use_as_output(rhs, result_cell_type);
+ if (can_write_lhs && !can_write_rhs) {
+ return Primary::LHS;
+ } else {
+ // prefer using rhs as output due to write recency
+ return Primary::RHS;
+ }
+ }
+}
+
+std::vector<ValueType::Dimension> strip_trivial(const std::vector<ValueType::Dimension> &dim_list) {
+ std::vector<ValueType::Dimension> result;
+ std::copy_if(dim_list.begin(), dim_list.end(), std::back_inserter(result),
+ [](const auto &dim){ return (dim.size != 1); });
+ return result;
+}
+
+std::optional<Overlap> detect_overlap(const TensorFunction &primary, const TensorFunction &secondary) {
+ std::vector<ValueType::Dimension> a = strip_trivial(primary.result_type().dimensions());
+ std::vector<ValueType::Dimension> b = strip_trivial(secondary.result_type().dimensions());
+ if (b.size() > a.size()) {
+ return std::nullopt;
+ } else if (b == a) {
+ return Overlap::FULL;
+ } else if (std::equal(b.begin(), b.end(), a.begin())) {
+ // prefer OUTER to INNER (for empty b) due to loop nesting
+ return Overlap::OUTER;
+ } else if (std::equal(b.rbegin(), b.rend(), a.rbegin())) {
+ return Overlap::INNER;
+ } else {
+ return std::nullopt;
+ }
+}
+
+std::optional<Overlap> detect_overlap(const TensorFunction &lhs, const TensorFunction &rhs, Primary primary) {
+ return (primary == Primary::LHS) ? detect_overlap(lhs, rhs) : detect_overlap(rhs, lhs);
+}
+
+} // namespace vespalib::tensor::<unnamed>
+
+//-----------------------------------------------------------------------------
+
+DenseSimpleJoinFunction::DenseSimpleJoinFunction(const ValueType &result_type,
+ const TensorFunction &lhs,
+ const TensorFunction &rhs,
+ join_fun_t function_in,
+ Primary primary_in,
+ Overlap overlap_in)
+ : Join(result_type, lhs, rhs, function_in),
+ _primary(primary_in),
+ _overlap(overlap_in)
+{
+}
+
+DenseSimpleJoinFunction::~DenseSimpleJoinFunction() = default;
+
+bool
+DenseSimpleJoinFunction::primary_is_mutable() const
+{
+ if (_primary == Primary::LHS) {
+ return lhs().result_is_mutable();
+ } else {
+ return rhs().result_is_mutable();
+ }
+}
+
+size_t
+DenseSimpleJoinFunction::factor() const
+{
+ const TensorFunction &p = (_primary == Primary::LHS) ? lhs() : rhs();
+ const TensorFunction &s = (_primary == Primary::LHS) ? rhs() : lhs();
+ size_t a = p.result_type().dense_subspace_size();
+ size_t b = s.result_type().dense_subspace_size();
+ assert((a % b) == 0);
+ return (a / b);
+}
+
+Instruction
+DenseSimpleJoinFunction::compile_self(const TensorEngine &, Stash &stash) const
+{
+ const JoinParams &params = stash.create<JoinParams>(result_type(), factor(), function());
+ auto op = my_select(lhs().result_type().cell_type(),
+ rhs().result_type().cell_type(),
+ _primary, _overlap,
+ primary_is_mutable(),
+ function());
+ static_assert(sizeof(uint64_t) == sizeof(&params));
+ return Instruction(op, (uint64_t)(&params));
+}
+
+const TensorFunction &
+DenseSimpleJoinFunction::optimize(const TensorFunction &expr, Stash &stash)
+{
+ if (auto join = as<Join>(expr)) {
+ const TensorFunction &lhs = join->lhs();
+ const TensorFunction &rhs = join->rhs();
+ if (lhs.result_type().is_dense() && rhs.result_type().is_dense()) {
+ Primary primary = select_primary(lhs, rhs, join->result_type().cell_type());
+ std::optional<Overlap> overlap = detect_overlap(lhs, rhs, primary);
+ if (overlap.has_value()) {
+ const TensorFunction &ptf = (primary == Primary::LHS) ? lhs : rhs;
+ assert(ptf.result_type().dense_subspace_size() == join->result_type().dense_subspace_size());
+ return stash.create<DenseSimpleJoinFunction>(join->result_type(), lhs, rhs, join->function(),
+ primary, overlap.value());
+ }
+ }
+ }
+ return expr;
+}
+
+} // namespace vespalib::tensor
diff --git a/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.h b/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.h
new file mode 100644
index 00000000000..b4b52fcb8ab
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.h
@@ -0,0 +1,38 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::tensor {
+
+/**
+ * Tensor function for simple join operations on dense tensors.
+ **/
+class DenseSimpleJoinFunction : public eval::tensor_function::Join
+{
+ using Super = eval::tensor_function::Join;
+public:
+ enum class Primary : uint8_t { LHS, RHS };
+ enum class Overlap : uint8_t { INNER, OUTER, FULL };
+ using join_fun_t = ::vespalib::eval::tensor_function::join_fun_t;
+private:
+ Primary _primary;
+ Overlap _overlap;
+public:
+ DenseSimpleJoinFunction(const eval::ValueType &result_type,
+ const TensorFunction &lhs,
+ const TensorFunction &rhs,
+ join_fun_t function_in,
+ Primary primary_in,
+ Overlap overlap_in);
+ ~DenseSimpleJoinFunction() override;
+ Primary primary() const { return _primary; }
+ Overlap overlap() const { return _overlap; }
+ bool primary_is_mutable() const;
+ size_t factor() const;
+ eval::InterpretedFunction::Instruction compile_self(const eval::TensorEngine &engine, Stash &stash) const override;
+ static const eval::TensorFunction &optimize(const eval::TensorFunction &expr, Stash &stash);
+};
+
+} // namespace vespalib::tensor