diff options
author | Håvard Pettersen <havardpe@oath.com> | 2020-05-06 15:36:58 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2020-05-07 12:41:36 +0000 |
commit | 0865c453d198c3974880c56699a9fd01ca286d43 (patch) | |
tree | dca743013ca0bb2c5e352855d0ef6d255669200b /eval | |
parent | 8f774bc613caf2006da29989a322730dcb936514 (diff) |
dense single reduce
Diffstat (limited to 'eval')
-rw-r--r-- | eval/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/tests/eval/aggr/aggr_test.cpp | 11 | ||||
-rw-r--r-- | eval/src/tests/tensor/dense_single_reduce_function/CMakeLists.txt | 8 | ||||
-rw-r--r-- | eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp | 123 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/aggr.cpp | 72 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/aggr.h | 66 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/test/eval_fixture.cpp | 30 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/test/eval_fixture.h | 11 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/default_tensor_engine.cpp | 54 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp | 134 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h | 31 |
12 files changed, 450 insertions, 92 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 9cd5f396144..29d3c192139 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -39,6 +39,7 @@ vespa_define_module( src/tests/tensor/dense_multi_matmul_function src/tests/tensor/dense_remove_dimension_optimizer src/tests/tensor/dense_replace_type_function + src/tests/tensor/dense_single_reduce_function src/tests/tensor/dense_tensor_create_function src/tests/tensor/dense_tensor_peek_function src/tests/tensor/dense_xw_product_function diff --git a/eval/src/tests/eval/aggr/aggr_test.cpp b/eval/src/tests/eval/aggr/aggr_test.cpp index d81d6bb0ff3..a028d0da6c2 100644 --- a/eval/src/tests/eval/aggr/aggr_test.cpp +++ b/eval/src/tests/eval/aggr/aggr_test.cpp @@ -6,6 +6,17 @@ using vespalib::Stash; using namespace vespalib::eval; +TEST("require that aggregator list returns appropriate entries") { + auto list = Aggregator::list(); + ASSERT_EQUAL(list.size(), 6u); + EXPECT_EQUAL(int(list[0]), int(Aggr::AVG)); + EXPECT_EQUAL(int(list[1]), int(Aggr::COUNT)); + EXPECT_EQUAL(int(list[2]), int(Aggr::PROD)); + EXPECT_EQUAL(int(list[3]), int(Aggr::SUM)); + EXPECT_EQUAL(int(list[4]), int(Aggr::MAX)); + EXPECT_EQUAL(int(list[5]), int(Aggr::MIN)); +} + TEST("require that AVG aggregator works as expected") { Stash stash; Aggregator &aggr = Aggregator::create(Aggr::AVG, stash); diff --git a/eval/src/tests/tensor/dense_single_reduce_function/CMakeLists.txt b/eval/src/tests/tensor/dense_single_reduce_function/CMakeLists.txt new file mode 100644 index 00000000000..42b00699c31 --- /dev/null +++ b/eval/src/tests/tensor/dense_single_reduce_function/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_dense_single_reduce_function_test_app TEST + SOURCES + dense_single_reduce_function_test.cpp + DEPENDS + vespaeval +) +vespa_add_test(NAME eval_dense_single_reduce_function_test_app COMMAND eval_dense_single_reduce_function_test_app) diff --git a/eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp b/eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp new file mode 100644 index 00000000000..949c5277e18 --- /dev/null +++ b/eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp @@ -0,0 +1,123 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/eval/eval/tensor_function.h> +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/simple_tensor.h> +#include <vespa/eval/eval/simple_tensor_engine.h> +#include <vespa/eval/tensor/default_tensor_engine.h> +#include <vespa/eval/tensor/dense/dense_single_reduce_function.h> +#include <vespa/eval/tensor/dense/dense_tensor.h> +#include <vespa/eval/tensor/dense/dense_tensor_view.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/eval/test/eval_fixture.h> + +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/stash.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; +using namespace vespalib::tensor; +using namespace vespalib::eval::tensor_function; + +const TensorEngine &prod_engine = DefaultTensorEngine::ref(); + +EvalFixture::ParamRepo make_params() { + return EvalFixture::ParamRepo() + .add_dense({{"a", 2}, {"b", 3}, {"c", 4}, {"d", 5}}) + .add_cube("a", 2, "b", 1, "c", 1) + .add_cube("a", 1, "b", 2, "c", 1) + .add_cube("a", 1, "b", 1, "c", 2) + .add_cube("a", 1, "b", 1, "c", 1) + .add_vector("a", 10) + .add("xy_mapped", spec({x({"a", "b"}),y({"x", "y"})}, N())) + .add("xyz_mixed", spec({x({"a", "b"}),y({"x", "y"}),z(3)}, N())); +} +EvalFixture::ParamRepo param_repo = make_params(); + +void verify_optimized(const vespalib::string &expr, size_t dim_idx, Aggr aggr) +{ + EvalFixture slow_fixture(prod_engine, expr, param_repo, false); + EvalFixture fixture(prod_engine, expr, param_repo, true); + EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo)); + EXPECT_EQUAL(fixture.result(), slow_fixture.result()); + auto info = fixture.find_all<DenseSingleReduceFunction>(); + ASSERT_EQUAL(info.size(), 1u); + EXPECT_TRUE(info[0]->result_is_mutable()); + EXPECT_EQUAL(info[0]->dim_idx(), dim_idx); + EXPECT_EQUAL(int(info[0]->aggr()), int(aggr)); +} + +void verify_not_optimized(const vespalib::string &expr) { + EvalFixture slow_fixture(prod_engine, expr, param_repo, false); + EvalFixture fixture(prod_engine, expr, param_repo, true); + EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo)); + EXPECT_EQUAL(fixture.result(), slow_fixture.result()); + auto info = fixture.find_all<DenseSingleReduceFunction>(); + EXPECT_TRUE(info.empty()); +} + +TEST("require that multi-dimensional reduce is not optimized") { + TEST_DO(verify_not_optimized("reduce(a2b3c4d5,sum,a,b)")); + TEST_DO(verify_not_optimized("reduce(a2b3c4d5,sum,c,d)")); +} + +TEST("require that reduce to scalar is not optimized") { + TEST_DO(verify_not_optimized("reduce(a10,sum,a)")); + TEST_DO(verify_not_optimized("reduce(a10,sum)")); +} + +TEST("require that sparse reduce is not optimized") { + TEST_DO(verify_not_optimized("reduce(xy_mapped,sum,x)")); + TEST_DO(verify_not_optimized("reduce(xy_mapped,sum,y)")); +} + +TEST("require that mixed reduce is not optimized") { + TEST_DO(verify_not_optimized("reduce(xyz_mixed,sum,x)")); + TEST_DO(verify_not_optimized("reduce(xyz_mixed,sum,y)")); + TEST_DO(verify_not_optimized("reduce(xyz_mixed,sum,z)")); +} + +// NB: these are shadowed by the remove dimension optimization +TEST("require that reducing self-aggregating trivial dimensions is not optimized") { + TEST_DO(verify_not_optimized("reduce(a1b1c1,avg,c)")); + TEST_DO(verify_not_optimized("reduce(a1b1c1,prod,c)")); + TEST_DO(verify_not_optimized("reduce(a1b1c1,sum,c)")); + TEST_DO(verify_not_optimized("reduce(a1b1c1,max,c)")); + TEST_DO(verify_not_optimized("reduce(a1b1c1,min,c)")); +} + +TEST("require that reducing trivial dimension with COUNT is 'optimized'") { + TEST_DO(verify_optimized("reduce(a1b1c1,count,a)", 0, Aggr::COUNT)); + TEST_DO(verify_optimized("reduce(a1b1c1,count,b)", 1, Aggr::COUNT)); + TEST_DO(verify_optimized("reduce(a1b1c1,count,c)", 2, Aggr::COUNT)); +} + +vespalib::string make_expr(const vespalib::string &arg, const vespalib::string &dim, bool float_cells, Aggr aggr) { + return make_string("reduce(%s%s,%s,%s)", arg.c_str(), float_cells ? "f" : "", AggrNames::name_of(aggr)->c_str(), dim.c_str()); +} + +void verify_optimized_multi(const vespalib::string &arg, const vespalib::string &dim, size_t dim_idx) { + for (bool float_cells: {false, true}) { + for (Aggr aggr: Aggregator::list()) { + auto expr = make_expr(arg, dim, float_cells, aggr); + TEST_DO(verify_optimized(expr, dim_idx, aggr)); + } + } +} + +TEST("require that normal dense single reduce works") { + TEST_DO(verify_optimized_multi("a2b3c4d5", "a", 0)); + TEST_DO(verify_optimized_multi("a2b3c4d5", "b", 1)); + TEST_DO(verify_optimized_multi("a2b3c4d5", "c", 2)); + TEST_DO(verify_optimized_multi("a2b3c4d5", "d", 3)); +} + +TEST("require that minimal dense single reduce works") { + TEST_DO(verify_optimized_multi("a2b1c1", "a", 0)); + TEST_DO(verify_optimized_multi("a1b2c1", "b", 1)); + TEST_DO(verify_optimized_multi("a1b1c2", "c", 2)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/vespa/eval/eval/aggr.cpp b/eval/src/vespa/eval/eval/aggr.cpp index 8aacac64041..d10bbc4abb8 100644 --- a/eval/src/vespa/eval/eval/aggr.cpp +++ b/eval/src/vespa/eval/eval/aggr.cpp @@ -11,53 +11,12 @@ namespace eval { namespace { -struct Avg : Aggregator { - double sum = 0.0; - size_t cnt = 1; - void first(double value) override { - sum = value; - cnt = 1; - } - void next(double value) override { - sum += value; - ++cnt; - } - double result() const override { return (sum / cnt); } -}; - -struct Count : Aggregator { - size_t cnt = 0; - void first(double) override { cnt = 1; } - void next(double) override { ++cnt; } - double result() const override { return cnt; } -}; - -struct Prod : Aggregator { - double prod = 0.0; - void first(double value) override { prod = value; } - void next(double value) override { prod *= value; } - double result() const override { return prod; } -}; - -struct Sum : Aggregator { - double sum = 0.0; - void first(double value) override { sum = value; } - void next(double value) override { sum += value; } - double result() const override { return sum; } -}; - -struct Max : Aggregator { - double max = 0.0; - void first(double value) override { max = value; } - void next(double value) override { max = std::max(max, value); } - double result() const override { return max; } -}; - -struct Min : Aggregator { - double min = 0.0; - void first(double value) override { min = value; } - void next(double value) override { min = std::min(min, value); } - double result() const override { return min; } +template <typename T> +struct Wrapper : Aggregator { + T aggr; + virtual void first(double value) final override { aggr.first(value); } + virtual void next(double value) final override { aggr.next(value); } + virtual double result() const final override { return aggr.result(); } }; } // namespace vespalib::eval::<unnamed> @@ -113,15 +72,22 @@ Aggregator & Aggregator::create(Aggr aggr, Stash &stash) { switch (aggr) { - case Aggr::AVG: return stash.create<Avg>(); - case Aggr::COUNT: return stash.create<Count>(); - case Aggr::PROD: return stash.create<Prod>(); - case Aggr::SUM: return stash.create<Sum>(); - case Aggr::MAX: return stash.create<Max>(); - case Aggr::MIN: return stash.create<Min>(); + case Aggr::AVG: return stash.create<Wrapper<aggr::Avg<double>>>(); + case Aggr::COUNT: return stash.create<Wrapper<aggr::Count<double>>>(); + case Aggr::PROD: return stash.create<Wrapper<aggr::Prod<double>>>(); + case Aggr::SUM: return stash.create<Wrapper<aggr::Sum<double>>>(); + case Aggr::MAX: return stash.create<Wrapper<aggr::Max<double>>>(); + case Aggr::MIN: return stash.create<Wrapper<aggr::Min<double>>>(); } LOG_ABORT("should not be reached"); } +std::vector<Aggr> +Aggregator::list() +{ + return std::vector<Aggr>({ Aggr::AVG, Aggr::COUNT, Aggr::PROD, + Aggr::SUM, Aggr::MAX, Aggr::MIN }); +} + } // namespace vespalib::eval } // namespace vespalib diff --git a/eval/src/vespa/eval/eval/aggr.h b/eval/src/vespa/eval/eval/aggr.h index 4f2284b4ba7..8dea54d8abc 100644 --- a/eval/src/vespa/eval/eval/aggr.h +++ b/eval/src/vespa/eval/eval/aggr.h @@ -3,6 +3,7 @@ #pragma once #include <vespa/vespalib/stllike/string.h> +#include <vector> #include <map> namespace vespalib { @@ -50,7 +51,72 @@ struct Aggregator { virtual double result() const = 0; virtual ~Aggregator(); static Aggregator &create(Aggr aggr, Stash &stash); + static std::vector<Aggr> list(); }; +namespace aggr { + +template <typename T> class Avg { +private: + T _sum = 0.0; + size_t _cnt = 1; +public: + void first(T value) { + _sum = value; + _cnt = 1; + } + void next(T value) { + _sum += value; + ++_cnt; + } + T result() const { return (_sum / _cnt); } +}; + +template <typename T> class Count { +private: + size_t _cnt = 0; +public: + void first(T) { _cnt = 1; } + void next(T) { ++_cnt; } + T result() const { return _cnt; } +}; + +template <typename T> class Prod { +private: + T _prod = 0.0; +public: + void first(T value) { _prod = value; } + void next(T value) { _prod *= value; } + T result() const { return _prod; } +}; + +template <typename T> class Sum { +private: + T _sum = 0.0; +public: + void first(T value) { _sum = value; } + void next(T value) { _sum += value; } + T result() const { return _sum; } +}; + +template <typename T> class Max { +private: + T _max = 0.0; +public: + void first(T value) { _max = value; } + void next(T value) { _max = std::max(_max, value); } + T result() const { return _max; } +}; + +template <typename T> class Min { +private: + T _min = 0.0; +public: + void first(T value) { _min = value; } + void next(T value) { _min = std::min(_min, value); } + T result() const { return _min; } +}; + +} // namespave vespalib::eval::aggr } // namespace vespalib::eval } // namespace vespalib diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.cpp b/eval/src/vespa/eval/eval/test/eval_fixture.cpp index 7ce05ccea8f..dd27ba1f147 100644 --- a/eval/src/vespa/eval/eval/test/eval_fixture.cpp +++ b/eval/src/vespa/eval/eval/test/eval_fixture.cpp @@ -101,51 +101,51 @@ std::vector<Value::CREF> get_refs(const std::vector<Value::UP> &values) { void add_cell_values(TensorSpec &spec, TensorSpec::Address &addr, const std::vector<std::pair<vespalib::string, size_t> > &dims, - size_t idx, size_t &seq) + size_t idx, size_t &seq, std::function<double(size_t)> gen) { if (idx < dims.size()) { for (size_t i = 0; i < dims[idx].second; ++i) { addr.emplace(dims[idx].first, TensorSpec::Label(i)).first->second = TensorSpec::Label(i); - add_cell_values(spec, addr, dims, idx + 1, seq); + add_cell_values(spec, addr, dims, idx + 1, seq, gen); } } else { - spec.add(addr, seq++); + spec.add(addr, gen(seq++)); } } TensorSpec make_dense(const vespalib::string &type, const std::vector<std::pair<vespalib::string, size_t> > &dims, - size_t seed) + std::function<double(size_t)> gen) { TensorSpec spec(type); TensorSpec::Address addr; - size_t seq = seed; - add_cell_values(spec, addr, dims, 0, seq); + size_t seq = 0; + add_cell_values(spec, addr, dims, 0, seq, gen); return spec; } } // namespace vespalib::eval::test ParamRepo & -EvalFixture::ParamRepo::add_vector(const char *d1, size_t s1, size_t seed) +EvalFixture::ParamRepo::add_vector(const char *d1, size_t s1, gen_fun_t gen) { - return add_dense({{d1, s1}}, seed); + return add_dense({{d1, s1}}, gen); } ParamRepo & -EvalFixture::ParamRepo::add_matrix(const char *d1, size_t s1, const char *d2, size_t s2, size_t seed) +EvalFixture::ParamRepo::add_matrix(const char *d1, size_t s1, const char *d2, size_t s2, gen_fun_t gen) { - return add_dense({{d1, s1}, {d2, s2}}, seed); + return add_dense({{d1, s1}, {d2, s2}}, gen); } ParamRepo & -EvalFixture::ParamRepo::add_cube(const char *d1, size_t s1, const char *d2, size_t s2, const char *d3, size_t s3, size_t seed) +EvalFixture::ParamRepo::add_cube(const char *d1, size_t s1, const char *d2, size_t s2, const char *d3, size_t s3, gen_fun_t gen) { - return add_dense({{d1, s1}, {d2, s2}, {d3, s3}}, seed); + return add_dense({{d1, s1}, {d2, s2}, {d3, s3}}, gen); } ParamRepo & -EvalFixture::ParamRepo::add_dense(const std::vector<std::pair<vespalib::string, size_t> > &dims, size_t seed) +EvalFixture::ParamRepo::add_dense(const std::vector<std::pair<vespalib::string, size_t> > &dims, gen_fun_t gen) { vespalib::string prev; vespalib::string name; @@ -159,8 +159,8 @@ EvalFixture::ParamRepo::add_dense(const std::vector<std::pair<vespalib::string, type += fmt("%s[%zu]", dim.first.c_str(), dim.second); prev = dim.first; } - add(name, make_dense(fmt("tensor(%s)", type.c_str()), dims, seed)); - add(name + "f", make_dense(fmt("tensor<float>(%s)", type.c_str()), dims, seed)); + add(name, make_dense(fmt("tensor(%s)", type.c_str()), dims, gen)); + add(name + "f", make_dense(fmt("tensor<float>(%s)", type.c_str()), dims, gen)); return *this; } diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.h b/eval/src/vespa/eval/eval/test/eval_fixture.h index f11fb8ebf22..1010b5e58a8 100644 --- a/eval/src/vespa/eval/eval/test/eval_fixture.h +++ b/eval/src/vespa/eval/eval/test/eval_fixture.h @@ -10,6 +10,7 @@ #include <vespa/eval/tensor/default_tensor_engine.h> #include <vespa/vespalib/util/stash.h> #include <set> +#include <functional> namespace vespalib::eval::test { @@ -26,6 +27,8 @@ public: struct ParamRepo { std::map<vespalib::string,Param> map; + using gen_fun_t = std::function<double(size_t)>; + static double gen_N(size_t seq) { return (seq + 1); } ParamRepo() : map() {} ParamRepo &add(const vespalib::string &name, TensorSpec value_in, bool is_mutable_in) { map.insert_or_assign(name, Param(std::move(value_in), is_mutable_in)); @@ -37,10 +40,10 @@ public: ParamRepo &add_mutable(const vespalib::string &name, const TensorSpec &value) { return add(name, value, true); } - ParamRepo &add_vector(const char *d1, size_t s1, size_t seed = 1); - ParamRepo &add_matrix(const char *d1, size_t s1, const char *d2, size_t s2, size_t seed = 1); - ParamRepo &add_cube(const char *d1, size_t s1, const char *d2, size_t s2, const char *d3, size_t s3, size_t seed = 1); - ParamRepo &add_dense(const std::vector<std::pair<vespalib::string, size_t> > &dims, size_t seed = 1); + ParamRepo &add_vector(const char *d1, size_t s1, gen_fun_t = gen_N); + ParamRepo &add_matrix(const char *d1, size_t s1, const char *d2, size_t s2, gen_fun_t gen = gen_N); + ParamRepo &add_cube(const char *d1, size_t s1, const char *d2, size_t s2, const char *d3, size_t s3, gen_fun_t gen = gen_N); + ParamRepo &add_dense(const std::vector<std::pair<vespalib::string, size_t> > &dims, gen_fun_t gen = gen_N); ~ParamRepo() {} }; diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp index 0cadbd64372..a817c1454d8 100644 --- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp +++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp @@ -14,6 +14,7 @@ #include "dense/dense_multi_matmul_function.h" #include "dense/dense_fast_rename_optimizer.h" #include "dense/dense_add_dimension_optimizer.h" +#include "dense/dense_single_reduce_function.h" #include "dense/dense_remove_dimension_optimizer.h" #include "dense/dense_lambda_peek_optimizer.h" #include "dense/dense_inplace_join_function.h" @@ -260,27 +261,40 @@ DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const { using Child = TensorFunction::Child; Child root(expr); - std::vector<Child::CREF> nodes({root}); - for (size_t i = 0; i < nodes.size(); ++i) { - nodes[i].get().get().push_children(nodes); - } LOG(debug, "tensor function before optimization:\n%s\n", root.get().as_string().c_str()); - while (!nodes.empty()) { - const Child &child = nodes.back(); - child.set(VectorFromDoublesFunction::optimize(child.get(), stash)); - child.set(DenseTensorCreateFunction::optimize(child.get(), stash)); - child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash)); - child.set(DenseTensorPeekFunction::optimize(child.get(), stash)); - child.set(DenseDotProductFunction::optimize(child.get(), stash)); - child.set(DenseXWProductFunction::optimize(child.get(), stash)); - child.set(DenseMatMulFunction::optimize(child.get(), stash)); - child.set(DenseMultiMatMulFunction::optimize(child.get(), stash)); - child.set(DenseFastRenameOptimizer::optimize(child.get(), stash)); - child.set(DenseAddDimensionOptimizer::optimize(child.get(), stash)); - child.set(DenseRemoveDimensionOptimizer::optimize(child.get(), stash)); - child.set(DenseInplaceMapFunction::optimize(child.get(), stash)); - child.set(DenseInplaceJoinFunction::optimize(child.get(), stash)); - nodes.pop_back(); + { + std::vector<Child::CREF> nodes({root}); + for (size_t i = 0; i < nodes.size(); ++i) { + nodes[i].get().get().push_children(nodes); + } + while (!nodes.empty()) { + const Child &child = nodes.back().get(); + child.set(DenseDotProductFunction::optimize(child.get(), stash)); + child.set(DenseXWProductFunction::optimize(child.get(), stash)); + child.set(DenseMatMulFunction::optimize(child.get(), stash)); + child.set(DenseMultiMatMulFunction::optimize(child.get(), stash)); + nodes.pop_back(); + } + } + { + std::vector<Child::CREF> nodes({root}); + for (size_t i = 0; i < nodes.size(); ++i) { + nodes[i].get().get().push_children(nodes); + } + while (!nodes.empty()) { + const Child &child = nodes.back().get(); + child.set(DenseAddDimensionOptimizer::optimize(child.get(), stash)); + child.set(DenseRemoveDimensionOptimizer::optimize(child.get(), stash)); + child.set(VectorFromDoublesFunction::optimize(child.get(), stash)); + child.set(DenseTensorCreateFunction::optimize(child.get(), stash)); + child.set(DenseTensorPeekFunction::optimize(child.get(), stash)); + child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash)); + child.set(DenseFastRenameOptimizer::optimize(child.get(), stash)); + child.set(DenseInplaceMapFunction::optimize(child.get(), stash)); + child.set(DenseInplaceJoinFunction::optimize(child.get(), stash)); + child.set(DenseSingleReduceFunction::optimize(child.get(), stash)); + nodes.pop_back(); + } } LOG(debug, "tensor function after optimization:\n%s\n", root.get().as_string().c_str()); return root.get(); diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt index 7019749e123..0131ff28398 100644 --- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt +++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt @@ -14,6 +14,7 @@ vespa_add_library(eval_tensor_dense OBJECT dense_multi_matmul_function.cpp dense_remove_dimension_optimizer.cpp dense_replace_type_function.cpp + dense_single_reduce_function.cpp dense_tensor.cpp dense_tensor_address_mapper.cpp dense_tensor_cells_iterator.cpp diff --git a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp new file mode 100644 index 00000000000..663993b6c26 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp @@ -0,0 +1,134 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "dense_single_reduce_function.h" +#include "dense_tensor_view.h" +#include <vespa/eval/eval/value.h> + +namespace vespalib::tensor { + +using eval::Aggr; +using eval::InterpretedFunction; +using eval::TensorEngine; +using eval::TensorFunction; +using eval::Value; +using eval::ValueType; +using eval::as; + +using namespace eval::tensor_function; +using namespace eval::aggr; + +namespace { + +struct Params { + const ValueType &result_type; + size_t outer_size; + size_t dim_size; + size_t inner_size; + Params(const ValueType &result_type_in, const ValueType &child_type, size_t dim_idx) + : result_type(result_type_in), outer_size(1), dim_size(1), inner_size(1) + { + for (size_t i = 0; i < child_type.dimensions().size(); ++i) { + if (i < dim_idx) { + outer_size *= child_type.dimensions()[i].size; + } else if (i == dim_idx) { + dim_size *= child_type.dimensions()[i].size; + } else { + inner_size *= child_type.dimensions()[i].size; + } + } + } +}; + +template <typename CT, typename AGGR> +CT reduce_cells(const CT *src, size_t dim_size, size_t stride, AGGR &aggr) { + aggr.first(*src); + for (size_t i = 1; i < dim_size; ++i) { + src += stride; + aggr.next(*src); + } + return aggr.result(); +} + +template <typename CT, typename AGGR> +void my_single_reduce_op(InterpretedFunction::State &state, uint64_t param) { + const auto ¶ms = *(const Params *)(param); + const CT *src = DenseTensorView::typify_cells<CT>(state.peek(0)).cbegin(); + auto dst_cells = state.stash.create_array<CT>(params.outer_size * params.inner_size); + AGGR aggr; + CT *dst = dst_cells.begin(); + const size_t block_size = (params.dim_size * params.inner_size); + for (size_t outer = 0; outer < params.outer_size; ++outer) { + for (size_t inner = 0; inner < params.inner_size; ++inner) { + *dst++ = reduce_cells<CT, AGGR>(src + inner, params.dim_size, params.inner_size, aggr); + } + src += block_size; + } + state.pop_push(state.stash.create<DenseTensorView>(params.result_type, TypedCells(dst_cells))); +} + +template <typename CT> +InterpretedFunction::op_function my_select_2(Aggr aggr) { + switch (aggr) { + case Aggr::AVG: return my_single_reduce_op<CT, Avg<CT>>; + case Aggr::COUNT: return my_single_reduce_op<CT, Count<CT>>; + case Aggr::PROD: return my_single_reduce_op<CT, Prod<CT>>; + case Aggr::SUM: return my_single_reduce_op<CT, Sum<CT>>; + case Aggr::MAX: return my_single_reduce_op<CT, Max<CT>>; + case Aggr::MIN: return my_single_reduce_op<CT, Min<CT>>; + } + abort(); +} + +InterpretedFunction::op_function my_select(CellType cell_type, Aggr aggr) { + if (cell_type == ValueType::CellType::DOUBLE) { + return my_select_2<double>(aggr); + } + if (cell_type == ValueType::CellType::FLOAT) { + return my_select_2<float>(aggr); + } + abort(); +} + +bool check_input_type(const ValueType &type) { + return (type.is_dense() && ((type.cell_type() == CellType::FLOAT) || (type.cell_type() == CellType::DOUBLE))); +} + +} // namespace vespalib::tensor::<unnamed> + +DenseSingleReduceFunction::DenseSingleReduceFunction(const ValueType &result_type, + const TensorFunction &child, + size_t dim_idx, Aggr aggr) + : Op1(result_type, child), + _dim_idx(dim_idx), + _aggr(aggr) +{ +} + +DenseSingleReduceFunction::~DenseSingleReduceFunction() = default; + +InterpretedFunction::Instruction +DenseSingleReduceFunction::compile_self(const TensorEngine &, Stash &stash) const +{ + auto op = my_select(result_type().cell_type(), _aggr); + auto ¶ms = stash.create<Params>(result_type(), child().result_type(), _dim_idx); + static_assert(sizeof(uint64_t) == sizeof(¶ms)); + return InterpretedFunction::Instruction(op, (uint64_t)¶ms); +} + +const TensorFunction & +DenseSingleReduceFunction::optimize(const TensorFunction &expr, Stash &stash) +{ + auto reduce = as<Reduce>(expr); + if (reduce && (reduce->dimensions().size() == 1) && + check_input_type(reduce->child().result_type()) && + expr.result_type().is_dense()) + { + size_t dim_idx = reduce->child().result_type().dimension_index(reduce->dimensions()[0]); + assert(dim_idx != ValueType::Dimension::npos); + assert(expr.result_type().cell_type() == reduce->child().result_type().cell_type()); + return stash.create<DenseSingleReduceFunction>(expr.result_type(), reduce->child(), dim_idx, reduce->aggr()); + } + return expr; +} + +} // namespace vespalib::tensor diff --git a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h new file mode 100644 index 00000000000..037994727fc --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h @@ -0,0 +1,31 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::tensor { + +/** + * Tensor function reducing a single dimension of a dense + * tensor where the result is also a dense tensor. + **/ +class DenseSingleReduceFunction : public eval::tensor_function::Op1 +{ +private: + size_t _dim_idx; + eval::Aggr _aggr; + +public: + DenseSingleReduceFunction(const eval::ValueType &result_type, + const eval::TensorFunction &child, + size_t dim_idx, eval::Aggr aggr); + ~DenseSingleReduceFunction() override; + size_t dim_idx() const { return _dim_idx; } + eval::Aggr aggr() const { return _aggr; } + bool result_is_mutable() const override { return true; } + eval::InterpretedFunction::Instruction compile_self(const eval::TensorEngine &engine, Stash &stash) const override; + static const eval::TensorFunction &optimize(const eval::TensorFunction &expr, Stash &stash); +}; + +} // namespace vespalib::tensor |