diff options
author | Håvard Pettersen <havardpe@oath.com> | 2021-01-18 09:54:01 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2021-01-19 16:58:21 +0000 |
commit | 5ab81b067850113b15016c11354fe7259906b1a2 (patch) | |
tree | d81cb462345ddbc9377bee8f9490e9861256c6aa /eval | |
parent | 4e6d9b184867553f740a65124fdb2d9c380caf22 (diff) |
forward or ignore index in relevant mixed tensor reduce cases
Diffstat (limited to 'eval')
6 files changed, 107 insertions, 28 deletions
diff --git a/eval/src/tests/eval/aggr/aggr_test.cpp b/eval/src/tests/eval/aggr/aggr_test.cpp index 9045df68305..5eddb026406 100644 --- a/eval/src/tests/eval/aggr/aggr_test.cpp +++ b/eval/src/tests/eval/aggr/aggr_test.cpp @@ -85,8 +85,9 @@ TEST("require that PROD aggregator works as expected") { EXPECT_TRUE(aggr.enum_value() == Aggr::PROD); } -TEST("require that Prod combine works as expected") { +TEST("require that Prod static API works as expected") { using Type = Prod<double>; + EXPECT_EQUAL(Type::null_value(), 1.0); EXPECT_EQUAL(Type::combine(3,7), 21.0); EXPECT_EQUAL(Type::combine(5,4), 20.0); } @@ -103,8 +104,9 @@ TEST("require that SUM aggregator works as expected") { EXPECT_TRUE(aggr.enum_value() == Aggr::SUM); } -TEST("require that Sum combine works as expected") { +TEST("require that Sum static API works as expected") { using Type = Sum<double>; + EXPECT_EQUAL(Type::null_value(), 0.0); EXPECT_EQUAL(Type::combine(3,7), 10.0); EXPECT_EQUAL(Type::combine(5,4), 9.0); } @@ -121,8 +123,10 @@ TEST("require that MAX aggregator works as expected") { EXPECT_TRUE(aggr.enum_value() == Aggr::MAX); } -TEST("require that Max combine works as expected") { +TEST("require that Max static API works as expected") { using Type = Max<double>; + EXPECT_EQUAL(Max<double>::null_value(), -std::numeric_limits<double>::infinity()); + EXPECT_EQUAL(Max<float>::null_value(), -std::numeric_limits<float>::infinity()); EXPECT_EQUAL(Type::combine(3,7), 7.0); EXPECT_EQUAL(Type::combine(5,4), 5.0); } @@ -165,8 +169,10 @@ TEST("require that MIN aggregator works as expected") { EXPECT_TRUE(aggr.enum_value() == Aggr::MIN); } -TEST("require that Min combine works as expected") { +TEST("require that Min static API works as expected") { using Type = Min<double>; + EXPECT_EQUAL(Min<double>::null_value(), std::numeric_limits<double>::infinity()); + EXPECT_EQUAL(Min<float>::null_value(), std::numeric_limits<float>::infinity()); EXPECT_EQUAL(Type::combine(3,7), 3.0); EXPECT_EQUAL(Type::combine(5,4), 4.0); } diff --git a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp index 3ab971dd34d..9e2090fa968 100644 --- a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp +++ b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp @@ -32,7 +32,8 @@ std::vector<Layout> layouts = { float_cells({x({"a","b","c"}),y({"foo","bar"}),z({"i","j","k","l"})}), {x(3),y({"foo", "bar"}),z(7)}, {x({"a","b","c"}),y(5),z({"i","j","k","l"})}, - float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})}) + float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})}), + {x(3),y({}),z(7)} }; TensorSpec perform_generic_reduce(const TensorSpec &a, Aggr aggr, const std::vector<vespalib::string> &dims, @@ -69,7 +70,9 @@ TEST(GenericReduceTest, sparse_reduce_plan_can_be_created) { void test_generic_reduce_with(const ValueBuilderFactory &factory) { for (const Layout &layout: layouts) { TensorSpec input = spec(layout, Div16(N())); + SCOPED_TRACE(fmt("tensor type: %s, num_cells: %zu", input.type().c_str(), input.cells().size())); for (Aggr aggr: {Aggr::SUM, Aggr::AVG, Aggr::MIN, Aggr::MAX}) { + SCOPED_TRACE(fmt("aggregator: %s", AggrNames::name_of(aggr)->c_str())); for (const Domain &domain: layout) { auto expect = ReferenceOperations::reduce(input, aggr, {domain.dimension}).normalize(); auto actual = perform_generic_reduce(input, aggr, {domain.dimension}, factory); diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp index 89bddfda933..3345d7dc8ee 100644 --- a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp +++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp @@ -814,6 +814,14 @@ TEST(DenseJoin, partial_overlap) { benchmark_join("dense partial overlap multiply", lhs, rhs, operation::Mul::f); } +TEST(DenseJoin, subset_overlap) { + auto lhs = make_cube(D::idx("a", 16), D::idx("b", 16), D::idx("c", 16), 1.0); + auto rhs_inner = make_matrix(D::idx("b", 16), D::idx("c", 16), 2.0); + auto rhs_outer = make_matrix(D::idx("a", 16), D::idx("b", 16), 3.0); + benchmark_join("dense subset overlap inner multiply", lhs, rhs_inner, operation::Mul::f); + benchmark_join("dense subset overlap outer multiply", lhs, rhs_outer, operation::Mul::f); +} + TEST(DenseJoin, no_overlap) { auto lhs = make_cube(D::idx("a", 4), D::idx("e", 4), D::idx("f", 4), 1.0); auto rhs = make_cube(D::idx("b", 4), D::idx("c", 4), D::idx("d", 4), 2.0); diff --git a/eval/src/vespa/eval/eval/aggr.h b/eval/src/vespa/eval/eval/aggr.h index d2e7a64b51b..516ead0f0bf 100644 --- a/eval/src/vespa/eval/eval/aggr.h +++ b/eval/src/vespa/eval/eval/aggr.h @@ -61,9 +61,10 @@ struct Aggregator { namespace aggr { -// can we start by picking any value from the set to be reduced and -// use the templated aggregator 'combine' function in arbitrary order -// to end up with (approximately) the correct result? +// can we start by picking any value from the set to be reduced (or +// the special aggregator-specific null_value) and use the templated +// aggregator 'combine' function in arbitrary order to end up with +// (approximately) the correct result? constexpr bool is_simple(Aggr aggr) { return ((aggr == Aggr::PROD) || (aggr == Aggr::SUM) || @@ -124,12 +125,13 @@ private: T _prod; public: using value_type = T; - constexpr Prod() : _prod{1} {} + constexpr Prod() : _prod{null_value()} {} constexpr Prod(T value) : _prod{value} {} - constexpr void sample(T value) { _prod *= value; } - constexpr void merge(const Prod &rhs) { _prod *= rhs._prod; } + constexpr void sample(T value) { _prod = combine(_prod, value); } + constexpr void merge(const Prod &rhs) { _prod = combine(_prod, rhs._prod); } constexpr T result() const { return _prod; } static constexpr Aggr enum_value() { return Aggr::PROD; } + static constexpr T null_value() { return 1; } static constexpr T combine(T a, T b) { return (a * b); } }; @@ -138,12 +140,13 @@ private: T _sum; public: using value_type = T; - constexpr Sum() : _sum{0} {} + constexpr Sum() : _sum{null_value()} {} constexpr Sum(T value) : _sum{value} {} - constexpr void sample(T value) { _sum += value; } - constexpr void merge(const Sum &rhs) { _sum += rhs._sum; } + constexpr void sample(T value) { _sum = combine(_sum, value); } + constexpr void merge(const Sum &rhs) { _sum = combine(_sum, rhs._sum); } constexpr T result() const { return _sum; } static constexpr Aggr enum_value() { return Aggr::SUM; } + static constexpr T null_value() { return 0; } static constexpr T combine(T a, T b) { return (a + b); } }; @@ -152,12 +155,13 @@ private: T _max; public: using value_type = T; - constexpr Max() : _max{-std::numeric_limits<T>::infinity()} {} + constexpr Max() : _max{null_value()} {} constexpr Max(T value) : _max{value} {} - constexpr void sample(T value) { _max = std::max(_max, value); } - constexpr void merge(const Max &rhs) { _max = std::max(_max, rhs._max); } + constexpr void sample(T value) { _max = combine(_max, value); } + constexpr void merge(const Max &rhs) { _max = combine(_max, rhs._max); } constexpr T result() const { return _max; } static constexpr Aggr enum_value() { return Aggr::MAX; } + static constexpr T null_value() { return -std::numeric_limits<T>::infinity(); } static constexpr T combine(T a, T b) { return std::max(a,b); } }; @@ -204,12 +208,13 @@ private: T _min; public: using value_type = T; - constexpr Min() : _min{std::numeric_limits<T>::infinity()} {} + constexpr Min() : _min{null_value()} {} constexpr Min(T value) : _min{value} {} - constexpr void sample(T value) { _min = std::min(_min, value); } - constexpr void merge(const Min &rhs) { _min = std::min(_min, rhs._min); } + constexpr void sample(T value) { _min = combine(_min, value); } + constexpr void merge(const Min &rhs) { _min = combine(_min, rhs._min); } constexpr T result() const { return _min; } static constexpr Aggr enum_value() { return Aggr::MIN; } + static constexpr T null_value() { return std::numeric_limits<T>::infinity(); } static constexpr T combine(T a, T b) { return std::min(a,b); } }; diff --git a/eval/src/vespa/eval/instruction/generic_reduce.cpp b/eval/src/vespa/eval/instruction/generic_reduce.cpp index b6393d0d713..2d4144d64b1 100644 --- a/eval/src/vespa/eval/instruction/generic_reduce.cpp +++ b/eval/src/vespa/eval/instruction/generic_reduce.cpp @@ -8,6 +8,7 @@ #include <vespa/vespalib/util/typify.h> #include <vespa/vespalib/util/overload.h> #include <vespa/vespalib/util/visit_ranges.h> +#include <algorithm> #include <cassert> #include <array> @@ -48,7 +49,7 @@ struct SparseReduceState { std::vector<string_id> full_address; std::vector<string_id*> fetch_address; std::vector<string_id*> keep_address; - size_t subspace; + size_t subspace; SparseReduceState(const SparseReducePlan &plan) : full_address(plan.keep_dims.size() + plan.num_reduce_dims), @@ -72,8 +73,8 @@ Value::UP generic_reduce(const Value &value, const ReduceParam ¶m) { auto cells = value.cells().typify<ICT>(); ArrayArrayMap<string_id,AGGR> map(param.sparse_plan.keep_dims.size(), - param.dense_plan.out_size, - value.index().size()); + param.dense_plan.out_size, + value.index().size()); SparseReduceState sparse(param.sparse_plan); auto full_view = value.index().create_view({}); full_view->lookup({}); @@ -94,9 +95,7 @@ generic_reduce(const Value &value, const ReduceParam ¶m) { }); if ((map.size() == 0) && param.sparse_plan.keep_dims.empty()) { auto zero = builder->add_subspace(); - for (size_t i = 0; i < zero.size(); ++i) { - zero[i] = OCT{}; - } + std::fill(zero.begin(), zero.end(), OCT{}); } return builder->build(std::move(builder)); } @@ -109,6 +108,50 @@ void my_generic_reduce_op(State &state, uint64_t param_in) { auto &result = state.stash.create<std::unique_ptr<Value>>(std::move(up)); const Value &result_ref = *(result.get()); state.pop_push(result_ref); +} + +template <typename ICT, typename OCT, typename AGGR, bool forward_index> +void my_generic_dense_reduce_op(State &state, uint64_t param_in) { + const auto ¶m = unwrap_param<ReduceParam>(param_in); + const Value &value = state.peek(0); + auto cells = value.cells().typify<ICT>(); + const auto &index = value.index(); + size_t num_subspaces = index.size(); + size_t out_cells_size = forward_index ? (param.dense_plan.out_size * num_subspaces) : param.dense_plan.out_size; + auto out_cells = state.stash.create_uninitialized_array<OCT>(out_cells_size); + if (num_subspaces > 0) { + if constexpr (aggr::is_simple(AGGR::enum_value())) { + OCT *dst = out_cells.begin(); + std::fill(out_cells.begin(), out_cells.end(), AGGR::null_value()); + auto combine = [&](size_t src_idx, size_t dst_idx) { dst[dst_idx] = AGGR::combine(dst[dst_idx], cells[src_idx]); }; + for (size_t i = 0; i < num_subspaces; ++i) { + param.dense_plan.execute(i * param.dense_plan.in_size, combine); + if (forward_index) { + dst += param.dense_plan.out_size; + } + } + } else { + std::vector<AGGR> aggr_state(out_cells_size); + AGGR *dst = &aggr_state[0]; + auto sample = [&](size_t src_idx, size_t dst_idx) { dst[dst_idx].sample(cells[src_idx]); }; + for (size_t i = 0; i < num_subspaces; ++i) { + param.dense_plan.execute(i * param.dense_plan.in_size, sample); + if (forward_index) { + dst += param.dense_plan.out_size; + } + } + for (size_t i = 0; i < aggr_state.size(); ++i) { + out_cells[i] = aggr_state[i].result(); + } + } + } else if (!forward_index) { + std::fill(out_cells.begin(), out_cells.end(), OCT{}); + } + if (forward_index) { + state.pop_push(state.stash.create<ValueView>(param.res_type, index, TypedCells(out_cells))); + } else { + state.pop_push(state.stash.create<DenseValueView>(param.res_type, TypedCells(out_cells))); + } }; template <typename ICT, typename OCT, typename AGGR> @@ -147,10 +190,17 @@ void my_full_reduce_op(State &state, uint64_t) { struct SelectGenericReduceOp { template <typename ICT, typename OCT, typename AGGR> static auto invoke(const ReduceParam ¶m) { + using AggrType = typename AGGR::template templ<OCT>; if (param.res_type.is_scalar()) { - return my_full_reduce_op<ICT, OCT, typename AGGR::template templ<OCT>>; + return my_full_reduce_op<ICT, OCT, AggrType>; + } + if (param.sparse_plan.should_forward_index()) { + return my_generic_dense_reduce_op<ICT, OCT, AggrType, true>; } - return my_generic_reduce_op<ICT, OCT, typename AGGR::template templ<OCT>>; + if (param.res_type.is_dense()) { + return my_generic_dense_reduce_op<ICT, OCT, AggrType, false>; + } + return my_generic_reduce_op<ICT, OCT, AggrType>; } }; @@ -227,6 +277,12 @@ SparseReducePlan::SparseReducePlan(const ValueType &type, const ValueType &res_t } } +bool +SparseReducePlan::should_forward_index() const +{ + return ((num_reduce_dims == 0) && (!keep_dims.empty())); +} + SparseReducePlan::~SparseReducePlan() = default; //----------------------------------------------------------------------------- diff --git a/eval/src/vespa/eval/instruction/generic_reduce.h b/eval/src/vespa/eval/instruction/generic_reduce.h index f753a3e51cd..5faafb0325d 100644 --- a/eval/src/vespa/eval/instruction/generic_reduce.h +++ b/eval/src/vespa/eval/instruction/generic_reduce.h @@ -30,6 +30,7 @@ struct DenseReducePlan { struct SparseReducePlan { size_t num_reduce_dims; std::vector<size_t> keep_dims; + bool should_forward_index() const; SparseReducePlan(const ValueType &type, const ValueType &res_type); ~SparseReducePlan(); }; |