diff options
author | Arne Juul <arnej@verizonmedia.com> | 2021-01-18 14:06:52 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2021-01-20 09:14:31 +0000 |
commit | 89b818a17a94cfd569819ce299b76af87e7f62b3 (patch) | |
tree | 7e813a6603ec086e65dd543714fbecb293c1c19c /eval | |
parent | 651e5cf6c1ddabcf5f1a750cc95a699c7517a41e (diff) |
add mixed dot product optimizer
* use BLAS
* sort CMakeLists
Diffstat (limited to 'eval')
7 files changed, 372 insertions, 16 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 3c94639d0aa..8d6f48adb21 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -32,12 +32,25 @@ vespa_define_module( src/tests/eval/tensor_function src/tests/eval/tensor_lambda src/tests/eval/tensor_spec + src/tests/eval/typed_cells src/tests/eval/value_cache src/tests/eval/value_codec src/tests/eval/value_type - src/tests/eval/typed_cells src/tests/gp/ponder_nov2017 + src/tests/instruction/add_trivial_dimension_optimizer + src/tests/instruction/dense_dot_product_function + src/tests/instruction/dense_inplace_join_function + src/tests/instruction/dense_matmul_function + src/tests/instruction/dense_multi_matmul_function + src/tests/instruction/dense_replace_type_function + src/tests/instruction/dense_simple_expand_function + src/tests/instruction/dense_simple_join_function + src/tests/instruction/dense_simple_map_function + src/tests/instruction/dense_single_reduce_function + src/tests/instruction/dense_tensor_create_function + src/tests/instruction/dense_tensor_peek_function src/tests/instruction/dense_xw_product_function + src/tests/instruction/fast_rename_optimizer src/tests/instruction/generic_concat src/tests/instruction/generic_create src/tests/instruction/generic_join @@ -46,23 +59,11 @@ vespa_define_module( src/tests/instruction/generic_peek src/tests/instruction/generic_reduce src/tests/instruction/generic_rename - src/tests/instruction/dense_dot_product_function - src/tests/instruction/dense_matmul_function - src/tests/instruction/dense_multi_matmul_function - src/tests/instruction/dense_simple_expand_function - src/tests/instruction/dense_tensor_peek_function src/tests/instruction/index_lookup_table src/tests/instruction/join_with_number - src/tests/instruction/add_trivial_dimension_optimizer - src/tests/instruction/fast_rename_optimizer - src/tests/instruction/dense_inplace_join_function + src/tests/instruction/mixed_inner_product_function src/tests/instruction/pow_as_map_optimizer src/tests/instruction/remove_trivial_dimension_optimizer - src/tests/instruction/dense_replace_type_function - src/tests/instruction/dense_simple_join_function - src/tests/instruction/dense_simple_map_function - src/tests/instruction/dense_single_reduce_function - src/tests/instruction/dense_tensor_create_function src/tests/instruction/vector_from_doubles_function src/tests/streamed/value src/tests/tensor/instruction_benchmark diff --git a/eval/src/tests/instruction/mixed_inner_product_function/CMakeLists.txt b/eval/src/tests/instruction/mixed_inner_product_function/CMakeLists.txt new file mode 100644 index 00000000000..16f59dc28ed --- /dev/null +++ b/eval/src/tests/instruction/mixed_inner_product_function/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_mixed_inner_product_function_test_app TEST + SOURCES + mixed_inner_product_function_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_mixed_inner_product_function_test_app COMMAND eval_mixed_inner_product_function_test_app) diff --git a/eval/src/tests/instruction/mixed_inner_product_function/mixed_inner_product_function_test.cpp b/eval/src/tests/instruction/mixed_inner_product_function/mixed_inner_product_function_test.cpp new file mode 100644 index 00000000000..278bda888f4 --- /dev/null +++ b/eval/src/tests/instruction/mixed_inner_product_function/mixed_inner_product_function_test.cpp @@ -0,0 +1,146 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/tensor_function.h> +#include <vespa/eval/eval/test/eval_fixture.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/instruction/dense_dot_product_function.h> +#include <vespa/eval/instruction/dense_matmul_function.h> +#include <vespa/eval/instruction/dense_multi_matmul_function.h> +#include <vespa/eval/instruction/dense_xw_product_function.h> +#include <vespa/eval/instruction/mixed_inner_product_function.h> +#include <vespa/vespalib/util/stash.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> + +#include <vespa/log/log.h> +LOG_SETUP("mixed_inner_product_function_test"); + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get(); + +struct MyVecSeq : Sequence { + double bias; + double operator[](size_t i) const override { return (i + bias); } + MyVecSeq(double cellBias) : bias(cellBias) {} +}; + +std::function<double(size_t)> my_vec_gen(double cellBias) { + return [=] (size_t i) { return i + cellBias; }; +} + +//----------------------------------------------------------------------------- + +EvalFixture::ParamRepo make_params() { + return EvalFixture::ParamRepo() + .add_vector("x", 3, my_vec_gen(2.0)) + .add_vector("x", 3, my_vec_gen(13.25)) + .add_vector("y", 3, my_vec_gen(4.0)) + .add_vector("z", 3, my_vec_gen(0.25)) + .add_matrix("x", 3, "y", 1, my_vec_gen(5.0)) + .add_matrix("x", 1, "y", 3, my_vec_gen(6.0)) + .add_matrix("x", 3, "y", 3, my_vec_gen(1.5)) + .add_matrix("x", 3, "z", 3, my_vec_gen(2.5)) + .add_cube("x", 3, "y", 3, "z", 3, my_vec_gen(-4.0)) + .add("mix_x3zm", spec({x(3),z({"c","d"})}, MyVecSeq(0.5))) + .add("mix_y3zm", spec({y(3),z({"c","d"})}, MyVecSeq(3.5))) + .add("mix_x3zm_f", spec(float_cells({x(3),z({"c","d"})}), MyVecSeq(0.5))) + .add("mix_y3zm_f", spec(float_cells({y(3),z({"c","d"})}), MyVecSeq(3.5))) + ; + +} +EvalFixture::ParamRepo param_repo = make_params(); + +void assert_mixed_optimized(const vespalib::string &expr) { + EvalFixture fixture(prod_factory, expr, param_repo, true); + EXPECT_EQ(fixture.result(), EvalFixture::ref(expr, param_repo)); + auto info = fixture.find_all<MixedInnerProductFunction>(); + ASSERT_EQ(info.size(), 1u); + EXPECT_TRUE(info[0]->result_is_mutable()); +} + +void assert_not_mixed_optimized(const vespalib::string &expr) { + EvalFixture fixture(prod_factory, expr, param_repo, true); + EXPECT_EQ(fixture.result(), EvalFixture::ref(expr, param_repo)); + auto info = fixture.find_all<MixedInnerProductFunction>(); + ASSERT_EQ(info.size(), 0u); +} + +void assert_dense_optimized(const vespalib::string &expr) { + EvalFixture fixture(prod_factory, expr, param_repo, true); + EXPECT_EQ(fixture.result(), EvalFixture::ref(expr, param_repo)); + auto info = fixture.find_all<MixedInnerProductFunction>(); + ASSERT_EQ(info.size(), 0u); + auto info2 = fixture.find_all<DenseDotProductFunction>(); + auto info3 = fixture.find_all<DenseMatMulFunction>(); + auto info4 = fixture.find_all<DenseMultiMatMulFunction>(); + auto info5 = fixture.find_all<DenseXWProductFunction>(); + ASSERT_EQ(info2.size() + info3.size() + info4.size() + info5.size(), 1u); +} + +//----------------------------------------------------------------------------- + +TEST(MixedInnerProduct, use_dense_optimizers_when_possible) { + // actually, all these trigger DenseXWProduct + assert_dense_optimized("reduce(x3 * x3y1,sum,x)"); + assert_dense_optimized("reduce(y3 * x1y3,sum,y)"); + assert_dense_optimized("reduce(y3 * x3y3,sum,y)"); + assert_dense_optimized("reduce(x1y3 * y3,sum,y)"); + assert_dense_optimized("reduce(x3y3 * y3,sum,y)"); +} + +TEST(MixedInnerProduct, trigger_optimizer_when_possible) { + assert_mixed_optimized("reduce(x3 * mix_x3zm,sum,x)"); + assert_mixed_optimized("reduce(x3f * mix_x3zm,sum,x)"); + assert_mixed_optimized("reduce(x3 * mix_x3zm_f,sum,x)"); + assert_mixed_optimized("reduce(x3f * mix_x3zm_f,sum,x)"); + assert_mixed_optimized("reduce(x3$2 * mix_x3zm,sum,x)"); + assert_mixed_optimized("reduce(x3f$2 * mix_x3zm,sum,x)"); + assert_mixed_optimized("reduce(y3 * mix_y3zm,sum,y)"); + assert_mixed_optimized("reduce(y3f * mix_y3zm,sum,y)"); + assert_mixed_optimized("reduce(y3 * mix_y3zm_f,sum,y)"); + assert_mixed_optimized("reduce(y3f * mix_y3zm_f,sum,y)"); + assert_mixed_optimized("reduce(x3y1 * mix_x3zm,sum,x)"); + assert_mixed_optimized("reduce(x3y1f * mix_x3zm,sum,x)"); + assert_mixed_optimized("reduce(x3y1 * mix_x3zm,sum,x,y)"); + assert_mixed_optimized("reduce(x3y1f * mix_x3zm,sum,x,y)"); + assert_mixed_optimized("reduce(x1y3 * mix_y3zm,sum,y)"); + assert_mixed_optimized("reduce(x1y3f * mix_y3zm,sum,y)"); + assert_mixed_optimized("reduce(x1y3 * x1y3,sum,y)"); + assert_mixed_optimized("reduce(x1y3 * x1y3f,sum,y)"); + assert_mixed_optimized("reduce(x1y3f * x1y3,sum,y)"); + assert_mixed_optimized("reduce(x1y3f * x1y3f,sum,y)"); + assert_mixed_optimized("reduce(x1y3 * mix_y3zm,sum,y)"); + assert_mixed_optimized("reduce(x1y3f * mix_y3zm,sum,y)"); + assert_mixed_optimized("reduce(mix_x3zm * x3,sum,x)"); + assert_mixed_optimized("reduce(mix_x3zm * x3f,sum,x)"); + assert_mixed_optimized("reduce(mix_x3zm * x3y1,sum,x)"); + assert_mixed_optimized("reduce(mix_x3zm * x3y1f,sum,x)"); + assert_mixed_optimized("reduce(mix_y3zm * y3,sum,y)"); + assert_mixed_optimized("reduce(mix_y3zm * y3f,sum,y)"); + assert_mixed_optimized("reduce(mix_y3zm * x1y3,sum,y)"); + assert_mixed_optimized("reduce(mix_y3zm * x1y3f,sum,y)"); +} + +TEST(MixedInnerProduct, should_not_trigger_optimizer_for_other_cases) { + assert_not_mixed_optimized("reduce(x3y3z3 * x3,sum,x)"); + assert_not_mixed_optimized("reduce(x3y3z3 * y3,sum,y)"); + assert_not_mixed_optimized("reduce(x3y3z3 * x3y3,sum,x,y)"); + assert_not_mixed_optimized("reduce(x3y3 * mix_y3zm,sum,y)"); + assert_not_mixed_optimized("reduce(mix_y3zm * x3,sum,x,y)"); +} + +TEST(MixedInnerProduct, check_compatibility_with_complex_types) { + ValueType vec_type = ValueType::from_spec("tensor<float>(f[1],g[2],i[1],x[3],y[1])"); + ValueType mix_type = ValueType::from_spec("tensor<double>(cat{},g[2],host{},k[1],x[3],z{})"); + ValueType res_type = ValueType::join(vec_type,mix_type).reduce({"g","k","i","x"}); + EXPECT_EQ(MixedInnerProductFunction::compatible_types(res_type, mix_type, vec_type), true); + EXPECT_EQ(MixedInnerProductFunction::compatible_types(res_type, vec_type, mix_type), false); +} + +//----------------------------------------------------------------------------- + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp index 02d6bdbf0f3..0d7a6937c0d 100644 --- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp +++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp @@ -5,6 +5,7 @@ #include "simple_value.h" #include <vespa/eval/instruction/dense_dot_product_function.h> +#include <vespa/eval/instruction/mixed_inner_product_function.h> #include <vespa/eval/instruction/dense_xw_product_function.h> #include <vespa/eval/instruction/dense_matmul_function.h> #include <vespa/eval/instruction/dense_multi_matmul_function.h> @@ -47,6 +48,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &factory, c child.set(DenseXWProductFunction::optimize(child.get(), stash)); child.set(DenseMatMulFunction::optimize(child.get(), stash)); child.set(DenseMultiMatMulFunction::optimize(child.get(), stash)); + child.set(MixedInnerProductFunction::optimize(child.get(), stash)); nodes.pop_back(); } } diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index 452ed51610a..4b3b357e2b9 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -9,8 +9,6 @@ vespa_add_library(eval_instruction OBJECT dense_lambda_peek_optimizer.cpp dense_matmul_function.cpp dense_multi_matmul_function.cpp - pow_as_map_optimizer.cpp - remove_trivial_dimension_optimizer.cpp dense_simple_expand_function.cpp dense_simple_join_function.cpp dense_simple_map_function.cpp @@ -30,6 +28,9 @@ vespa_add_library(eval_instruction OBJECT generic_rename.cpp index_lookup_table.cpp join_with_number_function.cpp + mixed_inner_product_function.cpp + pow_as_map_optimizer.cpp + remove_trivial_dimension_optimizer.cpp replace_type_function.cpp vector_from_doubles_function.cpp ) diff --git a/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp b/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp new file mode 100644 index 00000000000..0c63d61cf16 --- /dev/null +++ b/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp @@ -0,0 +1,134 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "mixed_inner_product_function.h" +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/value.h> +#include <cblas.h> + +namespace vespalib::eval { + +using namespace tensor_function; +using namespace operation; + +namespace { + +template <typename LCT, typename RCT> +double my_dot_product(const LCT * &lhs, const RCT * rhs, size_t count) { + double result = 0.0; + for (size_t i = 0; i < count; ++i) { + result += (*lhs++) * (*rhs++); + } + return result; +} + +template <> +double my_dot_product<double,double>(const double * &lhs, const double * rhs, size_t count) { + double result = cblas_ddot(count, lhs, 1, rhs, 1); + lhs += count; + return result; +} + +template <> +double my_dot_product<float,float>(const float * &lhs, const float * rhs, size_t count) { + double result = cblas_sdot(count, lhs, 1, rhs, 1); + lhs += count; + return result; +} + +template <typename MCT, typename VCT, typename OCT> +void my_mixed_inner_product_op(InterpretedFunction::State &state, uint64_t param_in) { + const auto ¶m = unwrap_param<MixedInnerProductParam>(param_in); + const auto &mixed = state.peek(1); + const auto &dense = state.peek(0); + auto m_cells = mixed.cells().typify<MCT>(); + auto v_cells = dense.cells().typify<VCT>(); + const auto &index = mixed.index(); + size_t num_subspaces = index.size(); + size_t num_output_cells = num_subspaces * param.out_subspace_size; + ArrayRef<OCT> out_cells = state.stash.create_uninitialized_array<OCT>(num_output_cells); + const MCT *m_cp = m_cells.begin(); + OCT *out = out_cells.begin(); + for (size_t i = 0; i < num_output_cells; ++i) { + const VCT *v_cp = v_cells.begin(); + *out++ = my_dot_product(m_cp, v_cp, param.vector_size); + } + assert(out == out_cells.end()); + assert(m_cp == m_cells.end()); + state.pop_pop_push(state.stash.create<ValueView>(param.res_type, index, TypedCells(out_cells))); +} + + +struct SelectMixedInnerProduct { + template <typename MCT, typename VCT, typename OCT> + static auto invoke() { return my_mixed_inner_product_op<MCT,VCT,OCT>; } +}; + +} // namespace <unnamed> + +MixedInnerProductFunction::MixedInnerProductFunction(const ValueType &res_type_in, + const TensorFunction &lhs_in, + const TensorFunction &rhs_in) + : tensor_function::Op2(res_type_in, lhs_in, rhs_in) +{ +} + +InterpretedFunction::Instruction +MixedInnerProductFunction::compile_self(const ValueBuilderFactory &, Stash &stash) const +{ + const auto &mix_type = lhs().result_type(); + const auto &vec_type = rhs().result_type(); + auto ¶m = stash.create<MixedInnerProductParam>(result_type(), mix_type, vec_type); + using MyTypify = TypifyValue<TypifyCellType>; + auto op = typify_invoke<3,MyTypify,SelectMixedInnerProduct>(mix_type.cell_type(), + vec_type.cell_type(), + result_type().cell_type()); + return InterpretedFunction::Instruction(op, wrap_param<MixedInnerProductParam>(param)); +} + +bool +MixedInnerProductFunction::compatible_types(const ValueType &res, const ValueType &mixed, const ValueType &vector) +{ + if (vector.is_dense() && ! res.is_scalar()) { + auto dense_dims = vector.nontrivial_indexed_dimensions(); + auto mixed_dims = mixed.nontrivial_indexed_dimensions(); + while (! dense_dims.empty()) { + if (mixed_dims.empty()) { + return false; + } + const auto &name = dense_dims.back().name; + if (res.dimension_index(name) != ValueType::Dimension::npos) { + return false; + } + if (name != mixed_dims.back().name) { + return false; + } + dense_dims.pop_back(); + mixed_dims.pop_back(); + } + return true; + } + return false; +} + +const TensorFunction & +MixedInnerProductFunction::optimize(const TensorFunction &expr, Stash &stash) +{ + const auto & res_type = expr.result_type(); + auto reduce = as<Reduce>(expr); + if ((! res_type.is_scalar()) && reduce && (reduce->aggr() == Aggr::SUM)) { + auto join = as<Join>(reduce->child()); + if (join && (join->function() == Mul::f)) { + const TensorFunction &lhs = join->lhs(); + const TensorFunction &rhs = join->rhs(); + if (compatible_types(res_type, lhs.result_type(), rhs.result_type())) { + return stash.create<MixedInnerProductFunction>(res_type, lhs, rhs); + } + if (compatible_types(res_type, rhs.result_type(), lhs.result_type())) { + return stash.create<MixedInnerProductFunction>(res_type, rhs, lhs); + } + } + } + return expr; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/mixed_inner_product_function.h b/eval/src/vespa/eval/instruction/mixed_inner_product_function.h new file mode 100644 index 00000000000..165e1339a5b --- /dev/null +++ b/eval/src/vespa/eval/instruction/mixed_inner_product_function.h @@ -0,0 +1,63 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::eval { + +/** + * Optimized tensor function for dot-product inside a bigger (possibly + * mixed) tensor. To trigger this, the function must be in the form + * reduce((mixed tensor)*(vector),sum,dimension names) + * with "vector" being a dense tensor with the same dimensions that + * are reduced, "mixed tensor" must contain all these dimension, and + * they must also be the innermost (alphabetically last) indexed + * dimensions in the mixed tensor. + * Simple example: + * mixed: tensor(category{},x[32]) + * vector: tensor(x[32]) + * expression: reduce(mixed*vector,sum,x) + * result: tensor(category{}) + * More complex example: + * mixed: tensor<double>(a{},b[31],c{},d[42],e{},f[5],g{}) + * vector: tensor<float>(d[42],f[5]) + * expression: reduce(mixed*vector,sum,d,f) + * result: tensor<double>(a{},b[31],c{},e{},g{}) + * Note: + * if the bigger tensor is dense, other optimizers are likely + * to pick up the operation, even if this function could also + * handle them. + **/ +struct MixedInnerProductParam { + ValueType res_type; + size_t vector_size; + size_t out_subspace_size; + + MixedInnerProductParam(const ValueType &res_type_in, + const ValueType &mix_type, + const ValueType &vec_type) + : res_type(res_type_in), + vector_size(vec_type.dense_subspace_size()), + out_subspace_size(res_type.dense_subspace_size()) + { + assert(vector_size * out_subspace_size == mix_type.dense_subspace_size()); + } +}; + +/** + * Tensor function for a dot product inside a mixed tensor. + */ +class MixedInnerProductFunction : public tensor_function::Op2 +{ +public: + MixedInnerProductFunction(const ValueType &res_type_in, + const TensorFunction &mix_in, + const TensorFunction &vec_in); + InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; + bool result_is_mutable() const override { return true; } + static bool compatible_types(const ValueType &res, const ValueType &mixed, const ValueType &dense); + static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); +}; + +} // namespace |