diff options
Diffstat (limited to 'eval')
9 files changed, 399 insertions, 4 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 56db263fed4..3dca80885f7 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -72,6 +72,7 @@ vespa_define_module( src/tests/instruction/inplace_map_function src/tests/instruction/join_with_number src/tests/instruction/l2_distance + src/tests/instruction/mixed_112_dot_product src/tests/instruction/mixed_inner_product_function src/tests/instruction/mixed_simple_join_function src/tests/instruction/pow_as_map_optimizer diff --git a/eval/src/tests/instruction/mixed_112_dot_product/CMakeLists.txt b/eval/src/tests/instruction/mixed_112_dot_product/CMakeLists.txt new file mode 100644 index 00000000000..fae2f185afb --- /dev/null +++ b/eval/src/tests/instruction/mixed_112_dot_product/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_mixed_112_dot_product_test_app TEST + SOURCES + mixed_112_dot_product_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_mixed_112_dot_product_test_app COMMAND eval_mixed_112_dot_product_test_app) diff --git a/eval/src/tests/instruction/mixed_112_dot_product/mixed_112_dot_product_test.cpp b/eval/src/tests/instruction/mixed_112_dot_product/mixed_112_dot_product_test.cpp new file mode 100644 index 00000000000..d3c4d89cf47 --- /dev/null +++ b/eval/src/tests/instruction/mixed_112_dot_product/mixed_112_dot_product_test.cpp @@ -0,0 +1,92 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/instruction/mixed_112_dot_product.h> +#include <vespa/eval/eval/test/eval_fixture.h> +#include <vespa/eval/eval/test/gen_spec.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +//----------------------------------------------------------------------------- + +struct FunInfo { + using LookFor = Mixed112DotProduct; + void verify(const LookFor &fun) const { + EXPECT_TRUE(fun.result_is_mutable()); + } +}; + +void verify_optimized_cell_types(const vespalib::string &expr) +{ + CellTypeSpace stable(CellTypeUtils::list_stable_types(), 3); + CellTypeSpace unstable(CellTypeUtils::list_unstable_types(), 3); + EvalFixture::verify<FunInfo>(expr, {FunInfo()}, CellTypeSpace(stable).same()); + EvalFixture::verify<FunInfo>(expr, {}, CellTypeSpace(stable).different()); + EvalFixture::verify<FunInfo>(expr, {}, unstable); +} + +void verify_optimized(const vespalib::string &expr, size_t num_params = 3) +{ + CellTypeSpace just_float({CellType::FLOAT}, num_params); + EvalFixture::verify<FunInfo>(expr, {FunInfo()}, just_float); +} + +void verify_not_optimized(const vespalib::string &expr) { + CellTypeSpace just_double({CellType::DOUBLE}, 3); + EvalFixture::verify<FunInfo>(expr, {}, just_double); +} + +//----------------------------------------------------------------------------- + +TEST(Mixed112DotProduct, expression_can_be_optimized) +{ + verify_optimized_cell_types("reduce(x5_2*y8*x7_1y8,sum)"); +} + +TEST(Mixed112DotProduct, inverse_dimension_matching_is_handled) { + verify_optimized("reduce(y5_2*x8*x8y7_1,sum)"); +} + +TEST(Mixed112DotProduct, different_input_placement_is_handled) +{ + std::array<vespalib::string,3> params = {"x3_1", "y3", "x3_1y3"}; + for (size_t p1 = 0; p1 < params.size(); ++p1) { + for (size_t p2 = 0; p2 < params.size(); ++p2) { + for (size_t p3 = 0; p3 < params.size(); ++p3) { + if ((p1 != p2) && (p1 != p3) && (p2 != p3)) { + verify_optimized(fmt("reduce((%s*%s)*%s,sum)", params[p1].c_str(), params[p2].c_str(), params[p3].c_str())); + verify_optimized(fmt("reduce(%s*(%s*%s),sum)", params[p1].c_str(), params[p2].c_str(), params[p3].c_str())); + } + } + } + } +} + +TEST(Mixed112DotProduct, expression_can_be_optimized_with_extra_tensors) +{ + verify_optimized("reduce((x5_2*y4)*(x5_1y4*x3_1),sum)", 4); + verify_optimized("reduce((x5_2*x3_1)*(y4*x5_1y4),sum)", 4); +} + +TEST(Mixed112DotProduct, similar_expressions_are_not_optimized) +{ + verify_not_optimized("reduce(x5_2*y4*x5_1y4,prod)"); + verify_not_optimized("reduce(x5_2+y4*x5_1y4,sum)"); + verify_not_optimized("reduce(x5_2*y4+x5_1y4,sum)"); + verify_not_optimized("reduce(x5_2*z4*x5_1y4,sum)"); + verify_not_optimized("reduce(x5_2*y4*x5_1z4,sum)"); + verify_not_optimized("reduce(x5_2*x1_1y4*x5_1y4,sum)"); + verify_not_optimized("reduce(x5_2*y4*x5_1,sum)"); + verify_not_optimized("reduce(x5*y4*x5y4,sum)"); + verify_not_optimized("reduce(x5_1*y4_1*x5_1y4_1,sum)"); +} + +//----------------------------------------------------------------------------- + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp b/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp index 9325a203ff3..bab45afe114 100644 --- a/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp +++ b/eval/src/tests/instruction/sparse_112_dot_product/sparse_112_dot_product_test.cpp @@ -13,8 +13,6 @@ using namespace vespalib::eval::test; using vespalib::make_string_short::fmt; -const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get(); - //----------------------------------------------------------------------------- struct FunInfo { diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp index f9d3b1c6f54..b6258acc9cb 100644 --- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp +++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp @@ -7,6 +7,7 @@ #include <vespa/eval/instruction/dense_dot_product_function.h> #include <vespa/eval/instruction/sparse_dot_product_function.h> #include <vespa/eval/instruction/sparse_112_dot_product.h> +#include <vespa/eval/instruction/mixed_112_dot_product.h> #include <vespa/eval/instruction/sparse_merge_function.h> #include <vespa/eval/instruction/sparse_no_overlap_join_function.h> #include <vespa/eval/instruction/sparse_full_overlap_join_function.h> @@ -67,6 +68,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te run_optimize_pass(root, [&stash](const Child &child) { child.set(Sparse112DotProduct::optimize(child.get(), stash)); + child.set(Mixed112DotProduct::optimize(child.get(), stash)); child.set(BestSimilarityFunction::optimize(child.get(), stash)); child.set(L2Distance::optimize(child.get(), stash)); }); diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.h b/eval/src/vespa/eval/eval/test/eval_fixture.h index b078a900778..d2244cd4f5f 100644 --- a/eval/src/vespa/eval/eval/test/eval_fixture.h +++ b/eval/src/vespa/eval/eval/test/eval_fixture.h @@ -154,9 +154,9 @@ public: EvalFixture fixture(prod_factory(), expr, param_repo, true, true); EvalFixture slow_fixture(prod_factory(), expr, param_repo, false, false); EvalFixture test_fixture(test_factory(), expr, param_repo, true, true); - REQUIRE_EQ(fixture.result(), EvalFixture::ref(expr, param_repo)); - REQUIRE_EQ(fixture.result(), slow_fixture.result()); REQUIRE_EQ(fixture.result(), test_fixture.result()); + REQUIRE_EQ(fixture.result(), slow_fixture.result()); + REQUIRE_EQ(fixture.result(), EvalFixture::ref(expr, param_repo)); auto info = fixture.find_all<typename FunInfo::LookFor>(); REQUIRE_EQ(info.size(), fun_info.size()); for (size_t i = 0; i < fun_info.size(); ++i) { diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index b614606199c..f1ec8aa49a9 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -31,6 +31,7 @@ vespa_add_library(eval_instruction OBJECT inplace_map_function.cpp join_with_number_function.cpp l2_distance.cpp + mixed_112_dot_product.cpp mixed_inner_product_function.cpp mixed_simple_join_function.cpp pow_as_map_optimizer.cpp diff --git a/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp b/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp new file mode 100644 index 00000000000..8bfa4b07980 --- /dev/null +++ b/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp @@ -0,0 +1,261 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "mixed_112_dot_product.h" +#include <vespa/eval/eval/fast_value.hpp> +#include <vespa/vespalib/util/typify.h> +#include <vespa/vespalib/util/require.h> +#include <vespa/eval/eval/visit_stuff.h> +#include <cblas.h> +#include <algorithm> +#include <optional> + +namespace vespalib::eval { + +using namespace tensor_function; +using namespace operation; +using namespace instruction; + +namespace { + +template <typename CT> double my_dot_product(const CT * lhs, const CT * rhs, size_t count); +template <> double my_dot_product<double>(const double * lhs, const double * rhs, size_t count) { + return cblas_ddot(count, lhs, 1, rhs, 1); +} +template <> double my_dot_product<float>(const float * lhs, const float * rhs, size_t count) { + return cblas_sdot(count, lhs, 1, rhs, 1); +} + +template <typename T, size_t N> +ConstArrayRef<const T *> as_ccar(std::array<T *, N> &array) { + return {array.data(), array.size()}; +} + +template <typename T> +ConstArrayRef<T> as_car(T &value) { + return {&value, 1}; +} + +constexpr std::array<size_t, 1> single_dim = { 0 }; + +template <typename CT> +double my_mixed_112_dot_product_fallback(const Value::Index &a_idx, const Value::Index &c_idx, + const CT *a_cells, const CT *b_cells, const CT *c_cells, + size_t dense_size) __attribute__((noinline)); +template <typename CT> +double my_mixed_112_dot_product_fallback(const Value::Index &a_idx, const Value::Index &c_idx, + const CT *a_cells, const CT *b_cells, const CT *c_cells, + size_t dense_size) +{ + double result = 0.0; + size_t a_space = 0; + size_t c_space = 0; + std::array<string_id, 1> c_addr; + std::array<string_id*, 1> c_addr_ref = {&c_addr[0]}; + auto outer = a_idx.create_view({}); + auto model = c_idx.create_view({&single_dim[0], 1}); + outer->lookup({}); + while (outer->next_result(as_car(c_addr_ref[0]), a_space)) { + model->lookup(as_ccar(c_addr_ref)); + if (model->next_result({}, c_space)) { + result += my_dot_product<CT>(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space]; + } + } + return result; +} + +template <typename CT> +double my_fast_mixed_112_dot_product(const FastAddrMap *a_map, const FastAddrMap *c_map, + const CT *a_cells, const CT *b_cells, const CT *c_cells, + size_t dense_size) +{ + double result = 0.0; + const auto &a_labels = a_map->labels(); + for (size_t a_space = 0; a_space < a_labels.size(); ++a_space) { + if (a_cells[a_space] != 0.0) { // handle pseudo-sparse input + auto c_space = c_map->lookup_singledim(a_labels[a_space]); + if (c_space != FastAddrMap::npos()) { + result += my_dot_product<CT>(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space]; + } + } + } + return result; +} + +template <typename CT> +void my_mixed_112_dot_product_op(InterpretedFunction::State &state, uint64_t dense_size) { + const auto &a_idx = state.peek(2).index(); + const auto &c_idx = state.peek(0).index(); + const CT *a_cells = state.peek(2).cells().unsafe_typify<CT>().cbegin(); + const CT *b_cells = state.peek(1).cells().unsafe_typify<CT>().cbegin(); + const CT *c_cells = state.peek(0).cells().unsafe_typify<CT>().cbegin(); + double result = __builtin_expect(are_fast(a_idx, c_idx), true) + ? my_fast_mixed_112_dot_product<CT>(&as_fast(a_idx).map, &as_fast(c_idx).map, + a_cells, b_cells, c_cells, dense_size) + : my_mixed_112_dot_product_fallback<CT>(a_idx, c_idx, a_cells, b_cells, c_cells, dense_size); + state.pop_pop_pop_push(state.stash.create<DoubleValue>(result)); +} + +InterpretedFunction::op_function my_select(CellType cell_type) { + if (cell_type == CellType::DOUBLE) { + return my_mixed_112_dot_product_op<double>; + } + if (cell_type == CellType::FLOAT) { + return my_mixed_112_dot_product_op<float>; + } + abort(); +} + +// Try to collect input nodes and organize them into a 3-way dot +// product between one 1d sparse tensor, one 1d dense tensor and one +// 2d mixed tensor. Cell types must be all float or all double. + +struct InputState { + std::optional<CellType> cell_type; + const TensorFunction *sparse = nullptr; + const TensorFunction *dense = nullptr; + const TensorFunction *mixed = nullptr; + bool failed = false; + + void collect_cell_type(CellType ct) { + if (cell_type.has_value()) { + if (ct != cell_type.value()) { + failed = true; + } + } else { + cell_type = ct; + } + } + + void try_save(const TensorFunction *&my_ptr, const TensorFunction &node) { + if (my_ptr == nullptr) { + my_ptr = &node; + } else { + failed = true; + } + } + + void collect(const TensorFunction &node) { + const auto &type = node.result_type(); + collect_cell_type(type.cell_type()); + if (type.is_sparse()) { + try_save(sparse, node); + } else if (type.is_dense()) { + try_save(dense, node); + } else if (type.has_dimensions()) { + try_save(mixed, node); + } else { + failed = true; + } + } + + bool verify() const { + // all parts must have been collected successfully + if (failed || !cell_type.has_value() || (sparse == nullptr) || (dense == nullptr) || (mixed == nullptr)) { + return false; + } + // common cell type must be appropriate + if ((cell_type.value() != CellType::FLOAT) && (cell_type.value() != CellType::DOUBLE)) { + return false; + } + // number of dimensions must match the expected 112 pattern + if ((sparse->result_type().dimensions().size() != 1) || + (dense->result_type().dimensions().size() != 1) || + (mixed->result_type().dimensions().size() != 2)) + { + return false; + } + // the product of the sparse and dense tensors must fully overlap the mixed tensor + const ValueType::Dimension *mapped = &mixed->result_type().dimensions()[0]; + const ValueType::Dimension *indexed = &mixed->result_type().dimensions()[1]; + if (!mapped->is_mapped()) { + std::swap(mapped, indexed); + } + assert(mapped->is_mapped()); + assert(indexed->is_indexed()); + return ((*mapped == sparse->result_type().dimensions()[0]) && + (*indexed == dense->result_type().dimensions()[0])); + } +}; + +// Try to find inputs that form a 112 mixed dot product. + +struct FindInputs { + const TensorFunction *a = nullptr; + const TensorFunction *b = nullptr; + const TensorFunction *c = nullptr; + + bool try_match(const TensorFunction &one, const TensorFunction &two) { + auto join = as<Join>(two); + if (join && (join->function() == Mul::f)) { + InputState state; + state.collect(one); + state.collect(join->lhs()); + state.collect(join->rhs()); + if (state.verify()) { + a = state.sparse; + b = state.dense; + c = state.mixed; + return true; + } + } + return false; + } +}; + +} // namespace <unnamed> + +Mixed112DotProduct::Mixed112DotProduct(const TensorFunction &a_in, + const TensorFunction &b_in, + const TensorFunction &c_in) + : tensor_function::Node(DoubleValue::shared_type()), + _a(a_in), + _b(b_in), + _c(c_in) +{ +} + +InterpretedFunction::Instruction +Mixed112DotProduct::compile_self(const ValueBuilderFactory &, Stash &) const +{ + REQUIRE_EQ(_a.get().result_type().cell_type(), _b.get().result_type().cell_type()); + REQUIRE_EQ(_a.get().result_type().cell_type(), _c.get().result_type().cell_type()); + REQUIRE_EQ(_b.get().result_type().dense_subspace_size(), _c.get().result_type().dense_subspace_size()); + auto op = my_select(_a.get().result_type().cell_type()); + return InterpretedFunction::Instruction(op, _c.get().result_type().dense_subspace_size()); +} + +void +Mixed112DotProduct::push_children(std::vector<Child::CREF> &children) const +{ + children.emplace_back(_a); + children.emplace_back(_b); + children.emplace_back(_c); +} + +void +Mixed112DotProduct::visit_children(vespalib::ObjectVisitor &visitor) const +{ + ::visit(visitor, "a", _a.get()); + ::visit(visitor, "b", _b.get()); + ::visit(visitor, "c", _c.get()); +} + +const TensorFunction & +Mixed112DotProduct::optimize(const TensorFunction &expr, Stash &stash) +{ + auto reduce = as<Reduce>(expr); + if (reduce && (reduce->aggr() == Aggr::SUM) && expr.result_type().is_double()) { + auto join = as<Join>(reduce->child()); + if (join && (join->function() == Mul::f)) { + FindInputs inputs; + if (inputs.try_match(join->lhs(), join->rhs()) || + inputs.try_match(join->rhs(), join->lhs())) + { + return stash.create<Mixed112DotProduct>(*inputs.a, *inputs.b, *inputs.c); + } + } + } + return expr; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/mixed_112_dot_product.h b/eval/src/vespa/eval/instruction/mixed_112_dot_product.h new file mode 100644 index 00000000000..c02ccf65032 --- /dev/null +++ b/eval/src/vespa/eval/instruction/mixed_112_dot_product.h @@ -0,0 +1,31 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::eval { + +/** + * Tensor function for the dot product between (the expansion of a 1d + * sparse tensor and a 1d dense tensor) and (a 2d mixed tensor). + */ +class Mixed112DotProduct : public tensor_function::Node +{ +private: + Child _a; // 1d sparse + Child _b; // 1d dense + Child _c; // 2d mixed + +public: + Mixed112DotProduct(const TensorFunction &a_in, + const TensorFunction &b_in, + const TensorFunction &c_in); + InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; + bool result_is_mutable() const override { return true; } + void push_children(std::vector<Child::CREF> &children) const final override; + void visit_children(vespalib::ObjectVisitor &visitor) const final override; + static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); +}; + +} // namespace |