summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2022-06-02 08:29:28 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2022-06-07 08:48:55 +0000
commitfba0288cbe69a0ea3644ce085ecb84eb86cd1e9f (patch)
tree7c82c65cfd7bfa71c0a24d784bb03518f588e0cf /eval
parent38e71d4979792c42b0d163268ad1335cf3176b37 (diff)
112 mixed dot product optimization
Diffstat (limited to 'eval')
-rw-r--r--eval/CMakeLists.txt1
-rw-r--r--eval/src/tests/instruction/mixed_112_dot_product/CMakeLists.txt9
-rw-r--r--eval/src/tests/instruction/mixed_112_dot_product/mixed_112_dot_product_test.cpp92
-rw-r--r--eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp2
-rw-r--r--eval/src/vespa/eval/eval/optimize_tensor_function.cpp2
-rw-r--r--eval/src/vespa/eval/eval/test/eval_fixture.h4
-rw-r--r--eval/src/vespa/eval/instruction/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp261
-rw-r--r--eval/src/vespa/eval/instruction/mixed_112_dot_product.h31
9 files changed, 399 insertions, 4 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 56db263fed4..3dca80885f7 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -72,6 +72,7 @@ vespa_define_module(
src/tests/instruction/inplace_map_function
src/tests/instruction/join_with_number
src/tests/instruction/l2_distance
+ src/tests/instruction/mixed_112_dot_product
src/tests/instruction/mixed_inner_product_function
src/tests/instruction/mixed_simple_join_function
src/tests/instruction/pow_as_map_optimizer
diff --git a/eval/src/tests/instruction/mixed_112_dot_product/CMakeLists.txt b/eval/src/tests/instruction/mixed_112_dot_product/CMakeLists.txt
new file mode 100644
index 00000000000..fae2f185afb
--- /dev/null
+++ b/eval/src/tests/instruction/mixed_112_dot_product/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_mixed_112_dot_product_test_app TEST
+ SOURCES
+ mixed_112_dot_product_test.cpp
+ DEPENDS
+ vespaeval
+ GTest::GTest
+)
+vespa_add_test(NAME eval_mixed_112_dot_product_test_app COMMAND eval_mixed_112_dot_product_test_app)
diff --git a/eval/src/tests/instruction/mixed_112_dot_product/mixed_112_dot_product_test.cpp b/eval/src/tests/instruction/mixed_112_dot_product/mixed_112_dot_product_test.cpp
new file mode 100644
index 00000000000..d3c4d89cf47
--- /dev/null
+++ b/eval/src/tests/instruction/mixed_112_dot_product/mixed_112_dot_product_test.cpp
@@ -0,0 +1,92 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/instruction/mixed_112_dot_product.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/eval/eval/test/gen_spec.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+using vespalib::make_string_short::fmt;
+
+//-----------------------------------------------------------------------------
+
+struct FunInfo {
+ using LookFor = Mixed112DotProduct;
+ void verify(const LookFor &fun) const {
+ EXPECT_TRUE(fun.result_is_mutable());
+ }
+};
+
+void verify_optimized_cell_types(const vespalib::string &expr)
+{
+ CellTypeSpace stable(CellTypeUtils::list_stable_types(), 3);
+ CellTypeSpace unstable(CellTypeUtils::list_unstable_types(), 3);
+ EvalFixture::verify<FunInfo>(expr, {FunInfo()}, CellTypeSpace(stable).same());
+ EvalFixture::verify<FunInfo>(expr, {}, CellTypeSpace(stable).different());
+ EvalFixture::verify<FunInfo>(expr, {}, unstable);
+}
+
+void verify_optimized(const vespalib::string &expr, size_t num_params = 3)
+{
+ CellTypeSpace just_float({CellType::FLOAT}, num_params);
+ EvalFixture::verify<FunInfo>(expr, {FunInfo()}, just_float);
+}
+
+void verify_not_optimized(const vespalib::string &expr) {
+ CellTypeSpace just_double({CellType::DOUBLE}, 3);
+ EvalFixture::verify<FunInfo>(expr, {}, just_double);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(Mixed112DotProduct, expression_can_be_optimized)
+{
+ verify_optimized_cell_types("reduce(x5_2*y8*x7_1y8,sum)");
+}
+
+TEST(Mixed112DotProduct, inverse_dimension_matching_is_handled) {
+ verify_optimized("reduce(y5_2*x8*x8y7_1,sum)");
+}
+
+TEST(Mixed112DotProduct, different_input_placement_is_handled)
+{
+ std::array<vespalib::string,3> params = {"x3_1", "y3", "x3_1y3"};
+ for (size_t p1 = 0; p1 < params.size(); ++p1) {
+ for (size_t p2 = 0; p2 < params.size(); ++p2) {
+ for (size_t p3 = 0; p3 < params.size(); ++p3) {
+ if ((p1 != p2) && (p1 != p3) && (p2 != p3)) {
+ verify_optimized(fmt("reduce((%s*%s)*%s,sum)", params[p1].c_str(), params[p2].c_str(), params[p3].c_str()));
+ verify_optimized(fmt("reduce(%s*(%s*%s),sum)", params[p1].c_str(), params[p2].c_str(), params[p3].c_str()));
+ }
+ }
+ }
+ }
+}
+
+TEST(Mixed112DotProduct, expression_can_be_optimized_with_extra_tensors)
+{
+ verify_optimized("reduce((x5_2*y4)*(x5_1y4*x3_1),sum)", 4);
+ verify_optimized("reduce((x5_2*x3_1)*(y4*x5_1y4),sum)", 4);
+}
+
+TEST(Mixed112DotProduct, similar_expressions_are_not_optimized)
+{
+ verify_not_optimized("reduce(x5_2*y4*x5_1y4,prod)");
+ verify_not_optimized("reduce(x5_2+y4*x5_1y4,sum)");
+ verify_not_optimized("reduce(x5_2*y4+x5_1y4,sum)");
+ verify_not_optimized("reduce(x5_2*z4*x5_1y4,sum)");
+ verify_not_optimized("reduce(x5_2*y4*x5_1z4,sum)");
+ verify_not_optimized("reduce(x5_2*x1_1y4*x5_1y4,sum)");
+ verify_not_optimized("reduce(x5_2*y4*x5_1,sum)");
+ verify_not_optimized("reduce(x5*y4*x5y4,sum)");
+ verify_not_optimized("reduce(x5_1*y4_1*x5_1y4_1,sum)");
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp b/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp
index 9325a203ff3..bab45afe114 100644
--- a/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp
+++ b/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp
@@ -13,8 +13,6 @@ using namespace vespalib::eval::test;
using vespalib::make_string_short::fmt;
-const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get();
-
//-----------------------------------------------------------------------------
struct FunInfo {
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index f9d3b1c6f54..b6258acc9cb 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -7,6 +7,7 @@
#include <vespa/eval/instruction/dense_dot_product_function.h>
#include <vespa/eval/instruction/sparse_dot_product_function.h>
#include <vespa/eval/instruction/sparse_112_dot_product.h>
+#include <vespa/eval/instruction/mixed_112_dot_product.h>
#include <vespa/eval/instruction/sparse_merge_function.h>
#include <vespa/eval/instruction/sparse_no_overlap_join_function.h>
#include <vespa/eval/instruction/sparse_full_overlap_join_function.h>
@@ -67,6 +68,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te
run_optimize_pass(root, [&stash](const Child &child)
{
child.set(Sparse112DotProduct::optimize(child.get(), stash));
+ child.set(Mixed112DotProduct::optimize(child.get(), stash));
child.set(BestSimilarityFunction::optimize(child.get(), stash));
child.set(L2Distance::optimize(child.get(), stash));
});
diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.h b/eval/src/vespa/eval/eval/test/eval_fixture.h
index b078a900778..d2244cd4f5f 100644
--- a/eval/src/vespa/eval/eval/test/eval_fixture.h
+++ b/eval/src/vespa/eval/eval/test/eval_fixture.h
@@ -154,9 +154,9 @@ public:
EvalFixture fixture(prod_factory(), expr, param_repo, true, true);
EvalFixture slow_fixture(prod_factory(), expr, param_repo, false, false);
EvalFixture test_fixture(test_factory(), expr, param_repo, true, true);
- REQUIRE_EQ(fixture.result(), EvalFixture::ref(expr, param_repo));
- REQUIRE_EQ(fixture.result(), slow_fixture.result());
REQUIRE_EQ(fixture.result(), test_fixture.result());
+ REQUIRE_EQ(fixture.result(), slow_fixture.result());
+ REQUIRE_EQ(fixture.result(), EvalFixture::ref(expr, param_repo));
auto info = fixture.find_all<typename FunInfo::LookFor>();
REQUIRE_EQ(info.size(), fun_info.size());
for (size_t i = 0; i < fun_info.size(); ++i) {
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index b614606199c..f1ec8aa49a9 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -31,6 +31,7 @@ vespa_add_library(eval_instruction OBJECT
inplace_map_function.cpp
join_with_number_function.cpp
l2_distance.cpp
+ mixed_112_dot_product.cpp
mixed_inner_product_function.cpp
mixed_simple_join_function.cpp
pow_as_map_optimizer.cpp
diff --git a/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp b/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp
new file mode 100644
index 00000000000..8bfa4b07980
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp
@@ -0,0 +1,261 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "mixed_112_dot_product.h"
+#include <vespa/eval/eval/fast_value.hpp>
+#include <vespa/vespalib/util/typify.h>
+#include <vespa/vespalib/util/require.h>
+#include <vespa/eval/eval/visit_stuff.h>
+#include <cblas.h>
+#include <algorithm>
+#include <optional>
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+using namespace operation;
+using namespace instruction;
+
+namespace {
+
+template <typename CT> double my_dot_product(const CT * lhs, const CT * rhs, size_t count);
+template <> double my_dot_product<double>(const double * lhs, const double * rhs, size_t count) {
+ return cblas_ddot(count, lhs, 1, rhs, 1);
+}
+template <> double my_dot_product<float>(const float * lhs, const float * rhs, size_t count) {
+ return cblas_sdot(count, lhs, 1, rhs, 1);
+}
+
+template <typename T, size_t N>
+ConstArrayRef<const T *> as_ccar(std::array<T *, N> &array) {
+ return {array.data(), array.size()};
+}
+
+template <typename T>
+ConstArrayRef<T> as_car(T &value) {
+ return {&value, 1};
+}
+
+constexpr std::array<size_t, 1> single_dim = { 0 };
+
+template <typename CT>
+double my_mixed_112_dot_product_fallback(const Value::Index &a_idx, const Value::Index &c_idx,
+ const CT *a_cells, const CT *b_cells, const CT *c_cells,
+ size_t dense_size) __attribute__((noinline));
+template <typename CT>
+double my_mixed_112_dot_product_fallback(const Value::Index &a_idx, const Value::Index &c_idx,
+ const CT *a_cells, const CT *b_cells, const CT *c_cells,
+ size_t dense_size)
+{
+ double result = 0.0;
+ size_t a_space = 0;
+ size_t c_space = 0;
+ std::array<string_id, 1> c_addr;
+ std::array<string_id*, 1> c_addr_ref = {&c_addr[0]};
+ auto outer = a_idx.create_view({});
+ auto model = c_idx.create_view({&single_dim[0], 1});
+ outer->lookup({});
+ while (outer->next_result(as_car(c_addr_ref[0]), a_space)) {
+ model->lookup(as_ccar(c_addr_ref));
+ if (model->next_result({}, c_space)) {
+ result += my_dot_product<CT>(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space];
+ }
+ }
+ return result;
+}
+
+template <typename CT>
+double my_fast_mixed_112_dot_product(const FastAddrMap *a_map, const FastAddrMap *c_map,
+ const CT *a_cells, const CT *b_cells, const CT *c_cells,
+ size_t dense_size)
+{
+ double result = 0.0;
+ const auto &a_labels = a_map->labels();
+ for (size_t a_space = 0; a_space < a_labels.size(); ++a_space) {
+ if (a_cells[a_space] != 0.0) { // handle pseudo-sparse input
+ auto c_space = c_map->lookup_singledim(a_labels[a_space]);
+ if (c_space != FastAddrMap::npos()) {
+ result += my_dot_product<CT>(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space];
+ }
+ }
+ }
+ return result;
+}
+
+template <typename CT>
+void my_mixed_112_dot_product_op(InterpretedFunction::State &state, uint64_t dense_size) {
+ const auto &a_idx = state.peek(2).index();
+ const auto &c_idx = state.peek(0).index();
+ const CT *a_cells = state.peek(2).cells().unsafe_typify<CT>().cbegin();
+ const CT *b_cells = state.peek(1).cells().unsafe_typify<CT>().cbegin();
+ const CT *c_cells = state.peek(0).cells().unsafe_typify<CT>().cbegin();
+ double result = __builtin_expect(are_fast(a_idx, c_idx), true)
+ ? my_fast_mixed_112_dot_product<CT>(&as_fast(a_idx).map, &as_fast(c_idx).map,
+ a_cells, b_cells, c_cells, dense_size)
+ : my_mixed_112_dot_product_fallback<CT>(a_idx, c_idx, a_cells, b_cells, c_cells, dense_size);
+ state.pop_pop_pop_push(state.stash.create<DoubleValue>(result));
+}
+
+InterpretedFunction::op_function my_select(CellType cell_type) {
+ if (cell_type == CellType::DOUBLE) {
+ return my_mixed_112_dot_product_op<double>;
+ }
+ if (cell_type == CellType::FLOAT) {
+ return my_mixed_112_dot_product_op<float>;
+ }
+ abort();
+}
+
+// Try to collect input nodes and organize them into a 3-way dot
+// product between one 1d sparse tensor, one 1d dense tensor and one
+// 2d mixed tensor. Cell types must be all float or all double.
+
+struct InputState {
+ std::optional<CellType> cell_type;
+ const TensorFunction *sparse = nullptr;
+ const TensorFunction *dense = nullptr;
+ const TensorFunction *mixed = nullptr;
+ bool failed = false;
+
+ void collect_cell_type(CellType ct) {
+ if (cell_type.has_value()) {
+ if (ct != cell_type.value()) {
+ failed = true;
+ }
+ } else {
+ cell_type = ct;
+ }
+ }
+
+ void try_save(const TensorFunction *&my_ptr, const TensorFunction &node) {
+ if (my_ptr == nullptr) {
+ my_ptr = &node;
+ } else {
+ failed = true;
+ }
+ }
+
+ void collect(const TensorFunction &node) {
+ const auto &type = node.result_type();
+ collect_cell_type(type.cell_type());
+ if (type.is_sparse()) {
+ try_save(sparse, node);
+ } else if (type.is_dense()) {
+ try_save(dense, node);
+ } else if (type.has_dimensions()) {
+ try_save(mixed, node);
+ } else {
+ failed = true;
+ }
+ }
+
+ bool verify() const {
+ // all parts must have been collected successfully
+ if (failed || !cell_type.has_value() || (sparse == nullptr) || (dense == nullptr) || (mixed == nullptr)) {
+ return false;
+ }
+ // common cell type must be appropriate
+ if ((cell_type.value() != CellType::FLOAT) && (cell_type.value() != CellType::DOUBLE)) {
+ return false;
+ }
+ // number of dimensions must match the expected 112 pattern
+ if ((sparse->result_type().dimensions().size() != 1) ||
+ (dense->result_type().dimensions().size() != 1) ||
+ (mixed->result_type().dimensions().size() != 2))
+ {
+ return false;
+ }
+ // the product of the sparse and dense tensors must fully overlap the mixed tensor
+ const ValueType::Dimension *mapped = &mixed->result_type().dimensions()[0];
+ const ValueType::Dimension *indexed = &mixed->result_type().dimensions()[1];
+ if (!mapped->is_mapped()) {
+ std::swap(mapped, indexed);
+ }
+ assert(mapped->is_mapped());
+ assert(indexed->is_indexed());
+ return ((*mapped == sparse->result_type().dimensions()[0]) &&
+ (*indexed == dense->result_type().dimensions()[0]));
+ }
+};
+
+// Try to find inputs that form a 112 mixed dot product.
+
+struct FindInputs {
+ const TensorFunction *a = nullptr;
+ const TensorFunction *b = nullptr;
+ const TensorFunction *c = nullptr;
+
+ bool try_match(const TensorFunction &one, const TensorFunction &two) {
+ auto join = as<Join>(two);
+ if (join && (join->function() == Mul::f)) {
+ InputState state;
+ state.collect(one);
+ state.collect(join->lhs());
+ state.collect(join->rhs());
+ if (state.verify()) {
+ a = state.sparse;
+ b = state.dense;
+ c = state.mixed;
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+} // namespace <unnamed>
+
+Mixed112DotProduct::Mixed112DotProduct(const TensorFunction &a_in,
+ const TensorFunction &b_in,
+ const TensorFunction &c_in)
+ : tensor_function::Node(DoubleValue::shared_type()),
+ _a(a_in),
+ _b(b_in),
+ _c(c_in)
+{
+}
+
+InterpretedFunction::Instruction
+Mixed112DotProduct::compile_self(const ValueBuilderFactory &, Stash &) const
+{
+ REQUIRE_EQ(_a.get().result_type().cell_type(), _b.get().result_type().cell_type());
+ REQUIRE_EQ(_a.get().result_type().cell_type(), _c.get().result_type().cell_type());
+ REQUIRE_EQ(_b.get().result_type().dense_subspace_size(), _c.get().result_type().dense_subspace_size());
+ auto op = my_select(_a.get().result_type().cell_type());
+ return InterpretedFunction::Instruction(op, _c.get().result_type().dense_subspace_size());
+}
+
+void
+Mixed112DotProduct::push_children(std::vector<Child::CREF> &children) const
+{
+ children.emplace_back(_a);
+ children.emplace_back(_b);
+ children.emplace_back(_c);
+}
+
+void
+Mixed112DotProduct::visit_children(vespalib::ObjectVisitor &visitor) const
+{
+ ::visit(visitor, "a", _a.get());
+ ::visit(visitor, "b", _b.get());
+ ::visit(visitor, "c", _c.get());
+}
+
+const TensorFunction &
+Mixed112DotProduct::optimize(const TensorFunction &expr, Stash &stash)
+{
+ auto reduce = as<Reduce>(expr);
+ if (reduce && (reduce->aggr() == Aggr::SUM) && expr.result_type().is_double()) {
+ auto join = as<Join>(reduce->child());
+ if (join && (join->function() == Mul::f)) {
+ FindInputs inputs;
+ if (inputs.try_match(join->lhs(), join->rhs()) ||
+ inputs.try_match(join->rhs(), join->lhs()))
+ {
+ return stash.create<Mixed112DotProduct>(*inputs.a, *inputs.b, *inputs.c);
+ }
+ }
+ }
+ return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/mixed_112_dot_product.h b/eval/src/vespa/eval/instruction/mixed_112_dot_product.h
new file mode 100644
index 00000000000..c02ccf65032
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/mixed_112_dot_product.h
@@ -0,0 +1,31 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function for the dot product between (the expansion of a 1d
+ * sparse tensor and a 1d dense tensor) and (a 2d mixed tensor).
+ */
+class Mixed112DotProduct : public tensor_function::Node
+{
+private:
+ Child _a; // 1d sparse
+ Child _b; // 1d dense
+ Child _c; // 2d mixed
+
+public:
+ Mixed112DotProduct(const TensorFunction &a_in,
+ const TensorFunction &b_in,
+ const TensorFunction &c_in);
+ InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+ bool result_is_mutable() const override { return true; }
+ void push_children(std::vector<Child::CREF> &children) const final override;
+ void visit_children(vespalib::ObjectVisitor &visitor) const final override;
+ static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace