diff options
author | Håvard Pettersen <havardpe@oath.com> | 2021-12-20 15:05:22 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2021-12-21 14:12:39 +0000 |
commit | bc219a3cb4c01ce449584284aa7ff03afb9e9dca (patch) | |
tree | c2e2b417f2e5f3ee3148637e5b91a12b105fca90 /eval | |
parent | 28ae61202ad963955cf92719bab9b9d97181d5dd (diff) |
sparse 112 dot product
Diffstat (limited to 'eval')
9 files changed, 377 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 2e0af3acfa7..eed4fa5ce66 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -75,6 +75,7 @@ vespa_define_module( src/tests/instruction/mixed_simple_join_function src/tests/instruction/pow_as_map_optimizer src/tests/instruction/remove_trivial_dimension_optimizer + src/tests/instruction/sparse_112_dot_product src/tests/instruction/sparse_dot_product_function src/tests/instruction/sparse_full_overlap_join_function src/tests/instruction/sparse_merge_function diff --git a/eval/src/tests/instruction/sparse_112_dot_product/CMakeLists.txt b/eval/src/tests/instruction/sparse_112_dot_product/CMakeLists.txt new file mode 100644 index 00000000000..af7f59f091b --- /dev/null +++ b/eval/src/tests/instruction/sparse_112_dot_product/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_sparse_112_dot_product_test_app TEST + SOURCES + sparse_112_dot_product_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_sparse_112_dot_product_test_app COMMAND eval_sparse_112_dot_product_test_app) diff --git a/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp b/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp new file mode 100644 index 00000000000..7dcddc3bf80 --- /dev/null +++ b/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp @@ -0,0 +1,88 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/instruction/sparse_112_dot_product.h> +#include <vespa/eval/eval/test/eval_fixture.h> +#include <vespa/eval/eval/test/gen_spec.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get(); + +//----------------------------------------------------------------------------- + +struct FunInfo { + using LookFor = Sparse112DotProduct; + void verify(const LookFor &fun) const { + EXPECT_TRUE(fun.result_is_mutable()); + } +}; + +void verify_optimized_cell_types(const vespalib::string &expr) +{ + CellTypeSpace types(CellTypeUtils::list_types(), 3); + EvalFixture::verify<FunInfo>(expr, {FunInfo()}, CellTypeSpace(types).same()); + EvalFixture::verify<FunInfo>(expr, {}, CellTypeSpace(types).different()); +} + +void verify_optimized(const vespalib::string &expr, size_t num_params = 3) +{ + CellTypeSpace just_float({CellType::FLOAT}, num_params); + EvalFixture::verify<FunInfo>(expr, {FunInfo()}, just_float); +} + +void verify_not_optimized(const vespalib::string &expr) { + CellTypeSpace just_double({CellType::DOUBLE}, 3); + EvalFixture::verify<FunInfo>(expr, {}, just_double); +} + +//----------------------------------------------------------------------------- + +TEST(Sparse112DotProduct, expression_can_be_optimized) +{ + verify_optimized_cell_types("reduce(x5_2*y4_2*x5_1y4_1,sum)"); +} + +TEST(Sparse112DotProduct, different_input_placement_is_handeled) +{ + std::array<vespalib::string,3> params = {"x3_1", "y3_1", "x3_1y3_1"}; + for (size_t p1 = 0; p1 < params.size(); ++p1) { + for (size_t p2 = 0; p2 < params.size(); ++p2) { + for (size_t p3 = 0; p3 < params.size(); ++p3) { + if ((p1 != p2) && (p1 != p3) && (p2 != p3)) { + verify_optimized(fmt("reduce((%s*%s)*%s,sum)", params[p1].c_str(), params[p2].c_str(), params[p3].c_str())); + verify_optimized(fmt("reduce(%s*(%s*%s),sum)", params[p1].c_str(), params[p2].c_str(), params[p3].c_str())); + } + } + } + } +} + +TEST(Sparse112DotProduct, expression_can_be_optimized_with_extra_tensors) +{ + verify_optimized("reduce((x5_2*y4_2)*(x5_1y4_1*x3_1),sum)", 4); + verify_optimized("reduce((x5_2*x3_1)*(y4_2*x5_1y4_1),sum)", 4); +} + +TEST(Sparse112DotProduct, similar_expressions_are_not_optimized) +{ + verify_not_optimized("reduce(x5_2*y4_2*x5_1y4_1,prod)"); + verify_not_optimized("reduce(x5_2+y4_2*x5_1y4_1,sum)"); + verify_not_optimized("reduce(x5_2*y4_2+x5_1y4_1,sum)"); + verify_not_optimized("reduce(x5_2*z4_2*x5_1y4_1,sum)"); + verify_not_optimized("reduce(x5_2*y4_2*x5_1z4_1,sum)"); + verify_not_optimized("reduce(x5_2*x1_1y4_2*x5_1y4_1,sum)"); + verify_not_optimized("reduce(x5_2*y4_2*x5_1,sum)"); + verify_not_optimized("reduce(x5*y4*x5y4,sum)"); + verify_not_optimized("reduce(x5*y4_1*x5y4_1,sum)"); +} + +//----------------------------------------------------------------------------- + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index 66b44dbbf49..8a07b049a69 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -153,6 +153,10 @@ inline bool are_fast(const Value::Index &a, const Value::Index &b) { return (is_fast(a) && is_fast(b)); } +inline bool are_fast(const Value::Index &a, const Value::Index &b, const Value::Index &c) { + return (is_fast(a) && is_fast(b) && is_fast(c)); +} + constexpr const FastValueIndex &as_fast(const Value::Index &index) { return static_cast<const FastValueIndex &>(index); } diff --git a/eval/src/vespa/eval/eval/interpreted_function.h b/eval/src/vespa/eval/eval/interpreted_function.h index b5eaf3a8b9c..57d7f79caf4 100644 --- a/eval/src/vespa/eval/eval/interpreted_function.h +++ b/eval/src/vespa/eval/eval/interpreted_function.h @@ -50,6 +50,11 @@ public: stack.pop_back(); stack.back() = value; } + void pop_pop_pop_push(const Value &value) { + stack.pop_back(); + stack.pop_back(); + stack.back() = value; + } void pop_n_push(size_t n, const Value &value) { stack.resize(stack.size() - (n - 1), value); stack.back() = value; diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp index e1520d4deb2..f9d3b1c6f54 100644 --- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp +++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp @@ -6,6 +6,7 @@ #include <vespa/eval/instruction/dense_dot_product_function.h> #include <vespa/eval/instruction/sparse_dot_product_function.h> +#include <vespa/eval/instruction/sparse_112_dot_product.h> #include <vespa/eval/instruction/sparse_merge_function.h> #include <vespa/eval/instruction/sparse_no_overlap_join_function.h> #include <vespa/eval/instruction/sparse_full_overlap_join_function.h> @@ -65,6 +66,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te }); run_optimize_pass(root, [&stash](const Child &child) { + child.set(Sparse112DotProduct::optimize(child.get(), stash)); child.set(BestSimilarityFunction::optimize(child.get(), stash)); child.set(L2Distance::optimize(child.get(), stash)); }); diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index 56184c113d4..b614606199c 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -36,6 +36,7 @@ vespa_add_library(eval_instruction OBJECT pow_as_map_optimizer.cpp remove_trivial_dimension_optimizer.cpp replace_type_function.cpp + sparse_112_dot_product.cpp sparse_dot_product_function.cpp sparse_full_overlap_join_function.cpp sparse_merge_function.cpp diff --git a/eval/src/vespa/eval/instruction/sparse_112_dot_product.cpp b/eval/src/vespa/eval/instruction/sparse_112_dot_product.cpp new file mode 100644 index 00000000000..080f51e384a --- /dev/null +++ b/eval/src/vespa/eval/instruction/sparse_112_dot_product.cpp @@ -0,0 +1,236 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "sparse_112_dot_product.h" +#include <vespa/eval/eval/fast_value.hpp> +#include <vespa/vespalib/util/typify.h> +#include <vespa/vespalib/util/require.h> +#include <vespa/eval/eval/visit_stuff.h> +#include <algorithm> + +namespace vespalib::eval { + +using namespace tensor_function; +using namespace operation; +using namespace instruction; + +namespace { + +template <typename T, size_t N> +ConstArrayRef<T> as_car(std::array<T, N> &array) { + return {array.data(), array.size()}; +} + +template <typename T, size_t N> +ConstArrayRef<const T *> as_ccar(std::array<T *, N> &array) { + return {array.data(), array.size()}; +} + +template <typename T> +ConstArrayRef<T> as_car(T &value) { + return {&value, 1}; +} + +constexpr std::array<size_t, 2> both_dims = { 0, 1 }; + +template <typename CT> +double my_sparse_112_dot_product_fallback(const Value::Index &a_idx, const Value::Index &b_idx, const Value::Index &c_idx, + const CT *a_cells, const CT *b_cells, const CT *c_cells) __attribute__((noinline)); +template <typename CT> +double my_sparse_112_dot_product_fallback(const Value::Index &a_idx, const Value::Index &b_idx, const Value::Index &c_idx, + const CT *a_cells, const CT *b_cells, const CT *c_cells) +{ + double result = 0.0; + size_t a_space = 0; + size_t b_space = 0; + size_t c_space = 0; + std::array<string_id, 2> c_addr; + std::array<string_id*, 2> c_addr_ref = {&c_addr[0], &c_addr[1]}; + auto outer = a_idx.create_view({}); + auto inner = b_idx.create_view({}); + auto model = c_idx.create_view({&both_dims[0], 2}); + outer->lookup({}); + while (outer->next_result(as_car(c_addr_ref[0]), a_space)) { + inner->lookup({}); + while (inner->next_result(as_car(c_addr_ref[1]), b_space)) { + model->lookup(as_ccar(c_addr_ref)); + if (model->next_result({}, c_space)) { + result += (a_cells[a_space] * b_cells[b_space] * c_cells[c_space]); + } + } + } + return result; +} + +template <typename CT> +double my_fast_sparse_112_dot_product(const FastAddrMap *a_map, const FastAddrMap *b_map, const FastAddrMap *c_map, + const CT *a_cells, const CT *b_cells, const CT *c_cells) +{ + double result = 0.0; + std::array<string_id, 2> c_addr; + const auto &a_labels = a_map->labels(); + for (size_t a_space = 0; a_space < a_labels.size(); ++a_space) { + if (a_cells[a_space] != 0.0) { // handle pseudo-sparse input + c_addr[0] = a_labels[a_space]; + const auto &b_labels = b_map->labels(); + for (size_t b_space = 0; b_space < b_labels.size(); ++b_space) { + if (b_cells[b_space] != 0.0) { // handle pseudo-sparse input + c_addr[1] = b_labels[b_space]; + auto c_space = c_map->lookup(as_car(c_addr)); + if (c_space != FastAddrMap::npos()) { + result += (a_cells[a_space] * b_cells[b_space] * c_cells[c_space]); + } + } + } + } + } + return result; +} + +template <typename CT> +void my_sparse_112_dot_product_op(InterpretedFunction::State &state, uint64_t) { + const auto &a_idx = state.peek(2).index(); + const auto &b_idx = state.peek(1).index(); + const auto &c_idx = state.peek(0).index(); + const CT *a_cells = state.peek(2).cells().unsafe_typify<CT>().cbegin(); + const CT *b_cells = state.peek(1).cells().unsafe_typify<CT>().cbegin(); + const CT *c_cells = state.peek(0).cells().unsafe_typify<CT>().cbegin(); + double result = __builtin_expect(are_fast(a_idx, b_idx, c_idx), true) + ? my_fast_sparse_112_dot_product<CT>(&as_fast(a_idx).map, &as_fast(b_idx).map, &as_fast(c_idx).map, + a_cells, b_cells, c_cells) + : my_sparse_112_dot_product_fallback<CT>(a_idx, b_idx, c_idx, a_cells, b_cells, c_cells); + state.pop_pop_pop_push(state.stash.create<DoubleValue>(result)); +} + +struct MyGetFun { + template <typename CT> + static auto invoke() { return my_sparse_112_dot_product_op<CT>; } +}; + +using MyTypify = TypifyValue<TypifyCellType>; + +// Try to collect input nodes and organize them into a dot product +// between (n sparse non-overlapping single-dimension tensors) and (a +// sparse n-dimensional tensor) all having the same cell type. + +struct InputState { + std::vector<const TensorFunction *> single; + const TensorFunction *multi = nullptr; + bool collision = false; + + void collect(const TensorFunction &node) { + const auto &type = node.result_type(); + if (type.is_sparse()) { + if (type.dimensions().size() == 1) { + single.push_back(&node); + } else { + if (multi) { + collision = true; + } else { + multi = &node; + } + } + } + } + void finalize() { + std::sort(single.begin(), single.end(), [](const auto *a, const auto *b) + { return (a->result_type().dimensions()[0].name < b->result_type().dimensions()[0].name); }); + } + bool verify(size_t n) const { + if (collision || (single.size() != n) || (multi == nullptr) || (multi->result_type().dimensions().size() != n)) { + return false; + } + const auto &multi_type = multi->result_type(); + for (size_t i = 0; i < n; ++i) { + const auto &single_type = single[i]->result_type(); + if ((single_type.cell_type() != multi_type.cell_type()) || + (single_type.dimensions()[0].name != multi_type.dimensions()[i].name)) + { + return false; + } + } + return true; + } +}; + +// Try to find inputs that form a 112 dot product. + +struct FindInputs { + const TensorFunction *a = nullptr; + const TensorFunction *b = nullptr; + const TensorFunction *c = nullptr; + + bool try_match(const TensorFunction &one, const TensorFunction &two) { + auto join = as<Join>(two); + if (join && (join->function() == Mul::f)) { + InputState state; + state.collect(one); + state.collect(join->lhs()); + state.collect(join->rhs()); + state.finalize(); + if (state.verify(2)) { + a = state.single[0]; + b = state.single[1]; + c = state.multi; + return true; + } + } + return false; + } +}; + +} // namespace <unnamed> + +Sparse112DotProduct::Sparse112DotProduct(const TensorFunction &a_in, + const TensorFunction &b_in, + const TensorFunction &c_in) + : tensor_function::Node(DoubleValue::shared_type()), + _a(a_in), + _b(b_in), + _c(c_in) +{ +} + +InterpretedFunction::Instruction +Sparse112DotProduct::compile_self(const ValueBuilderFactory &, Stash &) const +{ + REQUIRE_EQ(_a.get().result_type().cell_type(), _b.get().result_type().cell_type()); + REQUIRE_EQ(_a.get().result_type().cell_type(), _c.get().result_type().cell_type()); + auto op = typify_invoke<1,MyTypify,MyGetFun>(_a.get().result_type().cell_type()); + return InterpretedFunction::Instruction(op); +} + +void +Sparse112DotProduct::push_children(std::vector<Child::CREF> &children) const +{ + children.emplace_back(_a); + children.emplace_back(_b); + children.emplace_back(_c); +} + +void +Sparse112DotProduct::visit_children(vespalib::ObjectVisitor &visitor) const +{ + ::visit(visitor, "a", _a.get()); + ::visit(visitor, "b", _b.get()); + ::visit(visitor, "c", _c.get()); +} + +const TensorFunction & +Sparse112DotProduct::optimize(const TensorFunction &expr, Stash &stash) +{ + auto reduce = as<Reduce>(expr); + if (reduce && (reduce->aggr() == Aggr::SUM) && expr.result_type().is_double()) { + auto join = as<Join>(reduce->child()); + if (join && (join->function() == Mul::f)) { + FindInputs inputs; + if (inputs.try_match(join->lhs(), join->rhs()) || + inputs.try_match(join->rhs(), join->lhs())) + { + return stash.create<Sparse112DotProduct>(*inputs.a, *inputs.b, *inputs.c); + } + } + } + return expr; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/sparse_112_dot_product.h b/eval/src/vespa/eval/instruction/sparse_112_dot_product.h new file mode 100644 index 00000000000..2344a5eee2d --- /dev/null +++ b/eval/src/vespa/eval/instruction/sparse_112_dot_product.h @@ -0,0 +1,31 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::eval { + +/** + * Tensor function for the dot product between the expansion of two 1d + * sparse tensors and a 2d sparse tensor. + */ +class Sparse112DotProduct : public tensor_function::Node +{ +private: + Child _a; + Child _b; + Child _c; + +public: + Sparse112DotProduct(const TensorFunction &a_in, + const TensorFunction &b_in, + const TensorFunction &c_in); + InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; + bool result_is_mutable() const override { return true; } + void push_children(std::vector<Child::CREF> &children) const final override; + void visit_children(vespalib::ObjectVisitor &visitor) const final override; + static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); +}; + +} // namespace |