summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2020-11-20 08:14:16 +0100
committerGitHub <noreply@github.com>2020-11-20 08:14:16 +0100
commitb7f88b27a8e2183981a8bc8966f181e816e0dc56 (patch)
tree7037e0571054ad364d662c356513d5411734f49b /eval
parent9f3970a99e29f29885b23a9373ce595d1f9ed687 (diff)
parentaef0081a38061e9fa69b32a3351ae14c357734eb (diff)
Merge pull request #15396 from vespa-engine/havardpe/split-reduce-operations-and-combine-adjacent-dimensions
combine dimensions and split reduce operations
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/eval/aggr/aggr_test.cpp10
-rw-r--r--eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp106
-rw-r--r--eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp31
-rw-r--r--eval/src/vespa/eval/eval/aggr.h10
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_remove_dimension_optimizer.cpp11
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp153
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h38
7 files changed, 275 insertions, 84 deletions
diff --git a/eval/src/tests/eval/aggr/aggr_test.cpp b/eval/src/tests/eval/aggr/aggr_test.cpp
index db000840f59..9045df68305 100644
--- a/eval/src/tests/eval/aggr/aggr_test.cpp
+++ b/eval/src/tests/eval/aggr/aggr_test.cpp
@@ -29,6 +29,16 @@ TEST("require that aggr::is_simple works as expected") {
EXPECT_TRUE (aggr::is_simple(Aggr::MIN));
}
+TEST("require that aggr::is_ident works as expected") {
+ EXPECT_TRUE (aggr::is_ident(Aggr::AVG));
+ EXPECT_FALSE(aggr::is_ident(Aggr::COUNT));
+ EXPECT_TRUE (aggr::is_ident(Aggr::PROD));
+ EXPECT_TRUE (aggr::is_ident(Aggr::SUM));
+ EXPECT_TRUE (aggr::is_ident(Aggr::MAX));
+ EXPECT_TRUE (aggr::is_ident(Aggr::MEDIAN));
+ EXPECT_TRUE (aggr::is_ident(Aggr::MIN));
+}
+
TEST("require that aggr::is_complex works as expected") {
EXPECT_FALSE(aggr::is_complex(Aggr::AVG));
EXPECT_FALSE(aggr::is_complex(Aggr::COUNT));
diff --git a/eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp b/eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp
index 949c5277e18..d3495befe7e 100644
--- a/eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp
+++ b/eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp
@@ -26,6 +26,7 @@ const TensorEngine &prod_engine = DefaultTensorEngine::ref();
EvalFixture::ParamRepo make_params() {
return EvalFixture::ParamRepo()
.add_dense({{"a", 2}, {"b", 3}, {"c", 4}, {"d", 5}})
+ .add_dense({{"a", 9}, {"b", 9}, {"c", 9}, {"d", 9}})
.add_cube("a", 2, "b", 1, "c", 1)
.add_cube("a", 1, "b", 2, "c", 1)
.add_cube("a", 1, "b", 1, "c", 2)
@@ -36,17 +37,35 @@ EvalFixture::ParamRepo make_params() {
}
EvalFixture::ParamRepo param_repo = make_params();
-void verify_optimized(const vespalib::string &expr, size_t dim_idx, Aggr aggr)
-{
+struct ReduceSpec {
+ size_t outer_size;
+ size_t reduce_size;
+ size_t inner_size;
+ Aggr aggr;
+};
+
+void verify_optimized_impl(const vespalib::string &expr, const std::vector<ReduceSpec> &spec_list) {
EvalFixture slow_fixture(prod_engine, expr, param_repo, false);
EvalFixture fixture(prod_engine, expr, param_repo, true);
EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo));
EXPECT_EQUAL(fixture.result(), slow_fixture.result());
auto info = fixture.find_all<DenseSingleReduceFunction>();
- ASSERT_EQUAL(info.size(), 1u);
- EXPECT_TRUE(info[0]->result_is_mutable());
- EXPECT_EQUAL(info[0]->dim_idx(), dim_idx);
- EXPECT_EQUAL(int(info[0]->aggr()), int(aggr));
+ ASSERT_EQUAL(info.size(), spec_list.size());
+ for (size_t i = 0; i < spec_list.size(); ++i) {
+ EXPECT_TRUE(info[i]->result_is_mutable());
+ EXPECT_EQUAL(info[i]->outer_size(), spec_list[i].outer_size);
+ EXPECT_EQUAL(info[i]->reduce_size(), spec_list[i].reduce_size);
+ EXPECT_EQUAL(info[i]->inner_size(), spec_list[i].inner_size);
+ EXPECT_EQUAL(int(info[i]->aggr()), int(spec_list[i].aggr));
+ }
+}
+
+void verify_optimized(const vespalib::string &expr, const ReduceSpec &spec) {
+ verify_optimized_impl(expr, {spec});
+}
+
+void verify_optimized(const vespalib::string &expr, const ReduceSpec &spec1, const ReduceSpec &spec2) {
+ verify_optimized_impl(expr, {spec1, spec2});
}
void verify_not_optimized(const vespalib::string &expr) {
@@ -58,11 +77,6 @@ void verify_not_optimized(const vespalib::string &expr) {
EXPECT_TRUE(info.empty());
}
-TEST("require that multi-dimensional reduce is not optimized") {
- TEST_DO(verify_not_optimized("reduce(a2b3c4d5,sum,a,b)"));
- TEST_DO(verify_not_optimized("reduce(a2b3c4d5,sum,c,d)"));
-}
-
TEST("require that reduce to scalar is not optimized") {
TEST_DO(verify_not_optimized("reduce(a10,sum,a)"));
TEST_DO(verify_not_optimized("reduce(a10,sum)"));
@@ -79,45 +93,83 @@ TEST("require that mixed reduce is not optimized") {
TEST_DO(verify_not_optimized("reduce(xyz_mixed,sum,z)"));
}
-// NB: these are shadowed by the remove dimension optimization
-TEST("require that reducing self-aggregating trivial dimensions is not optimized") {
+TEST("require that reducing trivial dimensions is not optimized") {
TEST_DO(verify_not_optimized("reduce(a1b1c1,avg,c)"));
+ TEST_DO(verify_not_optimized("reduce(a1b1c1,count,c)"));
TEST_DO(verify_not_optimized("reduce(a1b1c1,prod,c)"));
TEST_DO(verify_not_optimized("reduce(a1b1c1,sum,c)"));
TEST_DO(verify_not_optimized("reduce(a1b1c1,max,c)"));
+ TEST_DO(verify_not_optimized("reduce(a1b1c1,median,c)"));
TEST_DO(verify_not_optimized("reduce(a1b1c1,min,c)"));
}
-TEST("require that reducing trivial dimension with COUNT is 'optimized'") {
- TEST_DO(verify_optimized("reduce(a1b1c1,count,a)", 0, Aggr::COUNT));
- TEST_DO(verify_optimized("reduce(a1b1c1,count,b)", 1, Aggr::COUNT));
- TEST_DO(verify_optimized("reduce(a1b1c1,count,c)", 2, Aggr::COUNT));
+TEST("require that atleast_8 dense single reduce works") {
+ TEST_DO(verify_optimized("reduce(a9b9c9d9,avg,a)", {1, 9, 729, Aggr::AVG}));
+ TEST_DO(verify_optimized("reduce(a9b9c9d9,avg,b)", {9, 9, 81, Aggr::AVG}));
+ TEST_DO(verify_optimized("reduce(a9b9c9d9,avg,c)", {81, 9, 9, Aggr::AVG}));
+ TEST_DO(verify_optimized("reduce(a9b9c9d9,avg,d)", {729, 9, 1, Aggr::AVG}));
+ TEST_DO(verify_optimized("reduce(a9b9c9d9,sum,c,d)", {81, 81, 1, Aggr::SUM}));
+}
+
+TEST("require that simple aggregators can be decomposed into multiple reduce operations") {
+ TEST_DO(verify_optimized("reduce(a2b3c4d5,sum,a,c)", {3, 4, 5, Aggr::SUM}, {1, 2, 60, Aggr::SUM}));
+ TEST_DO(verify_optimized("reduce(a2b3c4d5,min,a,c)", {3, 4, 5, Aggr::MIN}, {1, 2, 60, Aggr::MIN}));
+ TEST_DO(verify_optimized("reduce(a2b3c4d5,max,a,c)", {3, 4, 5, Aggr::MAX}, {1, 2, 60, Aggr::MAX}));
+}
+
+TEST("require that reduce dimensions can be listed in reverse order") {
+ TEST_DO(verify_optimized("reduce(a2b3c4d5,sum,c,a)", {3, 4, 5, Aggr::SUM}, {1, 2, 60, Aggr::SUM}));
+ TEST_DO(verify_optimized("reduce(a2b3c4d5,min,c,a)", {3, 4, 5, Aggr::MIN}, {1, 2, 60, Aggr::MIN}));
+ TEST_DO(verify_optimized("reduce(a2b3c4d5,max,c,a)", {3, 4, 5, Aggr::MAX}, {1, 2, 60, Aggr::MAX}));
+}
+
+TEST("require that non-simple aggregators cannot be decomposed into multiple reduce operations") {
+ TEST_DO(verify_not_optimized("reduce(a2b3c4d5,avg,a,c)"));
+ TEST_DO(verify_not_optimized("reduce(a2b3c4d5,count,a,c)"));
+ TEST_DO(verify_not_optimized("reduce(a2b3c4d5,median,a,c)"));
}
vespalib::string make_expr(const vespalib::string &arg, const vespalib::string &dim, bool float_cells, Aggr aggr) {
return make_string("reduce(%s%s,%s,%s)", arg.c_str(), float_cells ? "f" : "", AggrNames::name_of(aggr)->c_str(), dim.c_str());
}
-void verify_optimized_multi(const vespalib::string &arg, const vespalib::string &dim, size_t dim_idx) {
+void verify_optimized_multi(const vespalib::string &arg, const vespalib::string &dim, size_t outer_size, size_t reduce_size, size_t inner_size) {
for (bool float_cells: {false, true}) {
for (Aggr aggr: Aggregator::list()) {
- auto expr = make_expr(arg, dim, float_cells, aggr);
- TEST_DO(verify_optimized(expr, dim_idx, aggr));
+ if (aggr != Aggr::PROD) {
+ auto expr = make_expr(arg, dim, float_cells, aggr);
+ TEST_DO(verify_optimized(expr, {outer_size, reduce_size, inner_size, aggr}));
+ }
}
}
}
TEST("require that normal dense single reduce works") {
- TEST_DO(verify_optimized_multi("a2b3c4d5", "a", 0));
- TEST_DO(verify_optimized_multi("a2b3c4d5", "b", 1));
- TEST_DO(verify_optimized_multi("a2b3c4d5", "c", 2));
- TEST_DO(verify_optimized_multi("a2b3c4d5", "d", 3));
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "a", 1, 2, 60));
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "b", 2, 3, 20));
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "c", 6, 4, 5));
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "d", 24, 5, 1));
+}
+
+TEST("require that dimension-combined dense single reduce works") {
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "a,b", 1, 6, 20));
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "b,c", 2, 12, 5));
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "c,d", 6, 20, 1));
}
TEST("require that minimal dense single reduce works") {
- TEST_DO(verify_optimized_multi("a2b1c1", "a", 0));
- TEST_DO(verify_optimized_multi("a1b2c1", "b", 1));
- TEST_DO(verify_optimized_multi("a1b1c2", "c", 2));
+ TEST_DO(verify_optimized_multi("a2b1c1", "a", 1, 2, 1));
+ TEST_DO(verify_optimized_multi("a1b2c1", "b", 1, 2, 1));
+ TEST_DO(verify_optimized_multi("a1b1c2", "c", 1, 2, 1));
+}
+
+TEST("require that trivial dimensions can be trivially reduced") {
+ TEST_DO(verify_optimized_multi("a2b1c1", "a,b", 1, 2, 1));
+ TEST_DO(verify_optimized_multi("a2b1c1", "a,c", 1, 2, 1));
+ TEST_DO(verify_optimized_multi("a1b2c1", "b,a", 1, 2, 1));
+ TEST_DO(verify_optimized_multi("a1b2c1", "b,c", 1, 2, 1));
+ TEST_DO(verify_optimized_multi("a1b1c2", "c,a", 1, 2, 1));
+ TEST_DO(verify_optimized_multi("a1b1c2", "c,b", 1, 2, 1));
}
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
index 8887f2cb6aa..816923bb87c 100644
--- a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
+++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
@@ -122,6 +122,32 @@ MyPeekSpec verbatim_peek() { return MyPeekSpec(false); }
//-----------------------------------------------------------------------------
+struct MultiOpParam {
+ std::vector<Instruction> list;
+};
+
+void my_multi_instruction_op(InterpretedFunction::State &state, uint64_t param_in) {
+ const auto &param = *(MultiOpParam*)(param_in);
+ for (const auto &item: param.list) {
+ item.perform(state);
+ }
+}
+
+void collect_op1_chain(const TensorFunction &node, const EngineOrFactory &engine, Stash &stash, std::vector<Instruction> &list) {
+ if (auto op1 = as<tensor_function::Op1>(node)) {
+ collect_op1_chain(op1->child(), engine, stash, list);
+ list.push_back(node.compile_self(engine, stash));
+ }
+}
+
+Instruction compile_op1_chain(const TensorFunction &node, const EngineOrFactory &engine, Stash &stash) {
+ auto &param = stash.create<MultiOpParam>();
+ collect_op1_chain(node, engine, stash, param.list);
+ return {my_multi_instruction_op,(uint64_t)(&param)};
+}
+
+//-----------------------------------------------------------------------------
+
struct Impl {
size_t order;
vespalib::string name;
@@ -145,7 +171,10 @@ struct Impl {
const auto &lhs_node = tensor_function::inject(lhs, 0, stash);
const auto &reduce_node = tensor_function::reduce(lhs_node, aggr, dims, stash);
const auto &node = optimize ? optimize_tensor_function(engine, reduce_node, stash) : reduce_node;
- return node.compile_self(engine, stash);
+ // since reduce might be optimized into multiple chained
+ // instructions, we need some extra magic to package these
+ // instructions into a single compound instruction.
+ return compile_op1_chain(node, engine, stash);
}
Instruction create_rename(const ValueType &lhs, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to, Stash &stash) const {
// create a complete tensor function, but only compile the relevant instruction
diff --git a/eval/src/vespa/eval/eval/aggr.h b/eval/src/vespa/eval/eval/aggr.h
index 2173205dc82..e69b1071e61 100644
--- a/eval/src/vespa/eval/eval/aggr.h
+++ b/eval/src/vespa/eval/eval/aggr.h
@@ -71,6 +71,16 @@ constexpr bool is_simple(Aggr aggr) {
(aggr == Aggr::MIN));
}
+// will a single value reduce to itself?
+constexpr bool is_ident(Aggr aggr) {
+ return ((aggr == Aggr::AVG) ||
+ (aggr == Aggr::PROD) ||
+ (aggr == Aggr::SUM) ||
+ (aggr == Aggr::MAX) ||
+ (aggr == Aggr::MEDIAN) ||
+ (aggr == Aggr::MIN));
+}
+
// should we avoid doing clever stuff with this aggregator?
constexpr bool is_complex(Aggr aggr) {
return (aggr == Aggr::MEDIAN);
diff --git a/eval/src/vespa/eval/tensor/dense/dense_remove_dimension_optimizer.cpp b/eval/src/vespa/eval/tensor/dense/dense_remove_dimension_optimizer.cpp
index 0cecd588317..a48527e83f5 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_remove_dimension_optimizer.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_remove_dimension_optimizer.cpp
@@ -14,15 +14,6 @@ using namespace eval::tensor_function;
namespace {
-bool is_ident_aggr(Aggr aggr) {
- return ((aggr == Aggr::AVG) ||
- (aggr == Aggr::PROD) ||
- (aggr == Aggr::SUM) ||
- (aggr == Aggr::MAX) ||
- (aggr == Aggr::MEDIAN) ||
- (aggr == Aggr::MIN));
-}
-
bool is_trivial_dim_list(const ValueType &type, const std::vector<vespalib::string> &dim_list) {
size_t npos = ValueType::Dimension::npos;
for (const vespalib::string &dim: dim_list) {
@@ -43,7 +34,7 @@ DenseRemoveDimensionOptimizer::optimize(const eval::TensorFunction &expr, Stash
const TensorFunction &child = reduce->child();
if (expr.result_type().is_dense() &&
child.result_type().is_dense() &&
- is_ident_aggr(reduce->aggr()) &&
+ eval::aggr::is_ident(reduce->aggr()) &&
is_trivial_dim_list(child.result_type(), reduce->dimensions()))
{
assert(expr.result_type().cell_type() == child.result_type().cell_type());
diff --git a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp
index 2d8afa638ce..5f688657645 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp
@@ -4,6 +4,7 @@
#include "dense_tensor_view.h"
#include <vespa/vespalib/util/typify.h>
#include <vespa/eval/eval/value.h>
+#include <cassert>
namespace vespalib::tensor {
@@ -25,27 +26,16 @@ namespace {
struct Params {
const ValueType &result_type;
size_t outer_size;
- size_t dim_size;
+ size_t reduce_size;
size_t inner_size;
- Params(const ValueType &result_type_in, const ValueType &child_type, size_t dim_idx)
- : result_type(result_type_in), outer_size(1), dim_size(1), inner_size(1)
- {
- for (size_t i = 0; i < child_type.dimensions().size(); ++i) {
- if (i < dim_idx) {
- outer_size *= child_type.dimensions()[i].size;
- } else if (i == dim_idx) {
- dim_size *= child_type.dimensions()[i].size;
- } else {
- inner_size *= child_type.dimensions()[i].size;
- }
- }
- }
+ Params(const ValueType &result_type_in, size_t outer_size_in, size_t reduce_size_in, size_t inner_size_in)
+ : result_type(result_type_in), outer_size(outer_size_in), reduce_size(reduce_size_in), inner_size(inner_size_in) {}
};
template <typename CT, typename AGGR>
-CT reduce_cells(const CT *src, size_t dim_size, size_t stride) {
+CT reduce_cells(const CT *src, size_t reduce_size, size_t stride) {
AGGR aggr(*src);
- for (size_t i = 1; i < dim_size; ++i) {
+ for (size_t i = 1; i < reduce_size; ++i) {
src += stride;
aggr.sample(*src);
}
@@ -88,17 +78,17 @@ auto reduce_cells_atleast_8(const CT *src, size_t n, size_t stride) {
template <typename CT, typename AGGR, bool atleast_8, bool is_inner>
void trace_reduce_impl(const Params &params, const CT *src, CT *dst) {
constexpr bool aggr_is_complex = is_complex(AGGR::enum_value());
- const size_t block_size = (params.dim_size * params.inner_size);
+ const size_t block_size = (params.reduce_size * params.inner_size);
for (size_t outer = 0; outer < params.outer_size; ++outer) {
for (size_t inner = 0; inner < params.inner_size; ++inner) {
if (atleast_8 && !aggr_is_complex) {
if (is_inner) {
- *dst++ = reduce_cells_atleast_8<CT, AGGR>(src + inner, params.dim_size);
+ *dst++ = reduce_cells_atleast_8<CT, AGGR>(src + inner, params.reduce_size);
} else {
- *dst++ = reduce_cells_atleast_8<CT, AGGR>(src + inner, params.dim_size, params.inner_size);
+ *dst++ = reduce_cells_atleast_8<CT, AGGR>(src + inner, params.reduce_size, params.inner_size);
}
} else {
- *dst++ = reduce_cells<CT, AGGR>(src + inner, params.dim_size, params.inner_size);
+ *dst++ = reduce_cells<CT, AGGR>(src + inner, params.reduce_size, params.inner_size);
}
}
src += block_size;
@@ -112,7 +102,7 @@ void fold_reduce_impl(const Params &params, const CT *src, CT *dst) {
for (size_t inner = 0; inner < params.inner_size; ++inner) {
*dst++ = *src++;
}
- for (size_t dim = 1; dim < params.dim_size; ++dim) {
+ for (size_t dim = 1; dim < params.reduce_size; ++dim) {
dst = saved_dst;
for (size_t inner = 0; inner < params.inner_size; ++inner) {
*dst = AGGR::combine(*dst, *src++);
@@ -147,14 +137,99 @@ struct MyGetFun {
using MyTypify = TypifyValue<TypifyCellType,TypifyAggr,TypifyBool>;
+std::pair<std::vector<vespalib::string>,ValueType> sort_and_drop_trivial(const std::vector<vespalib::string> &list_in, const ValueType &type_in) {
+ std::vector<vespalib::string> dropped;
+ std::vector<vespalib::string> list_out;
+ for (const auto &dim_name: list_in) {
+ auto dim_idx = type_in.dimension_index(dim_name);
+ assert(dim_idx != ValueType::Dimension::npos);
+ const auto &dim = type_in.dimensions()[dim_idx];
+ assert(dim.is_indexed());
+ if (dim.is_trivial()) {
+ dropped.push_back(dim_name);
+ } else {
+ list_out.push_back(dim_name);
+ }
+ }
+ std::sort(list_out.begin(), list_out.end());
+ ValueType type_out = dropped.empty() ? type_in : type_in.reduce(dropped);
+ assert(!type_out.is_error());
+ return {list_out, type_out};
+}
+
+template <typename T> struct VectorLookupLoop {
+ const std::vector<T> &list;
+ size_t index;
+ VectorLookupLoop(const std::vector<T> &list_in) : list(list_in), index(0) {}
+ bool valid() const { return (index < list.size()); }
+ void next() { ++index; }
+ const T &get() const { return list[index]; }
+};
+
+DenseSingleReduceSpec extract_next(const eval::ValueType &type, eval::Aggr aggr,
+ std::vector<vespalib::string> &todo)
+{
+ size_t outer_size = 1;
+ size_t reduce_size = 1;
+ size_t inner_size = 1;
+ auto dims = type.nontrivial_indexed_dimensions();
+ std::vector<vespalib::string> do_now;
+ std::vector<vespalib::string> do_later;
+ auto a = VectorLookupLoop(dims);
+ auto b = VectorLookupLoop(todo);
+ while (a.valid() && b.valid() && (a.get().name < b.get())) {
+ outer_size *= a.get().size;
+ a.next();
+ }
+ while (a.valid() && b.valid() && (a.get().name == b.get())) {
+ reduce_size *= a.get().size;
+ do_now.push_back(b.get());
+ a.next();
+ b.next();
+ }
+ while (a.valid()) {
+ inner_size *= a.get().size;
+ a.next();
+ }
+ while (b.valid()) {
+ do_later.push_back(b.get());
+ b.next();
+ }
+ todo = do_later;
+ assert(!do_now.empty());
+ return {type.reduce(do_now), outer_size, reduce_size, inner_size, aggr};
+}
+
} // namespace vespalib::tensor::<unnamed>
-DenseSingleReduceFunction::DenseSingleReduceFunction(const ValueType &result_type,
- const TensorFunction &child,
- size_t dim_idx, Aggr aggr)
- : Op1(result_type, child),
- _dim_idx(dim_idx),
- _aggr(aggr)
+std::vector<DenseSingleReduceSpec>
+make_dense_single_reduce_list(const eval::ValueType &type, eval::Aggr aggr,
+ const std::vector<vespalib::string> &reduce_dims)
+{
+ auto res_type = type.reduce(reduce_dims);
+ if (reduce_dims.empty() || !type.is_dense() || !res_type.is_dense()) {
+ return {};
+ }
+ std::vector<DenseSingleReduceSpec> list;
+ auto [todo, curr_type] = sort_and_drop_trivial(reduce_dims, type);
+ while (!todo.empty()) {
+ list.push_back(extract_next(curr_type, aggr, todo));
+ curr_type = list.back().result_type;
+ }
+ assert(curr_type == res_type);
+ if ((list.size() > 1) && !eval::aggr::is_simple(aggr)) {
+ return {};
+ }
+ return list;
+}
+
+DenseSingleReduceFunction::DenseSingleReduceFunction(const DenseSingleReduceSpec &spec,
+ const TensorFunction &child)
+ : Op1(spec.result_type, child),
+ _outer_size(spec.outer_size),
+ _reduce_size(spec.reduce_size),
+ _inner_size(spec.inner_size),
+ _aggr(spec.aggr)
{
}
@@ -163,25 +238,25 @@ DenseSingleReduceFunction::~DenseSingleReduceFunction() = default;
InterpretedFunction::Instruction
DenseSingleReduceFunction::compile_self(eval::EngineOrFactory, Stash &stash) const
{
- auto &params = stash.create<Params>(result_type(), child().result_type(), _dim_idx);
auto op = typify_invoke<4,MyTypify,MyGetFun>(result_type().cell_type(), _aggr,
- (params.dim_size >= 8),
- (params.inner_size == 1));
+ (_reduce_size >= 8), (_inner_size == 1));
+ auto &params = stash.create<Params>(result_type(), _outer_size, _reduce_size, _inner_size);
return InterpretedFunction::Instruction(op, wrap_param<Params>(params));
}
const TensorFunction &
DenseSingleReduceFunction::optimize(const TensorFunction &expr, Stash &stash)
{
- auto reduce = as<Reduce>(expr);
- if (reduce && (reduce->dimensions().size() == 1) &&
- reduce->child().result_type().is_dense() &&
- expr.result_type().is_dense())
- {
- size_t dim_idx = reduce->child().result_type().dimension_index(reduce->dimensions()[0]);
- assert(dim_idx != ValueType::Dimension::npos);
- assert(expr.result_type().cell_type() == reduce->child().result_type().cell_type());
- return stash.create<DenseSingleReduceFunction>(expr.result_type(), reduce->child(), dim_idx, reduce->aggr());
+ if (auto reduce = as<Reduce>(expr)) {
+ const auto &child = reduce->child();
+ auto spec_list = make_dense_single_reduce_list(child.result_type(), reduce->aggr(), reduce->dimensions());
+ if (!spec_list.empty()) {
+ const auto *prev = &child;
+ for (const auto &spec: spec_list) {
+ prev = &stash.create<DenseSingleReduceFunction>(spec, *prev);
+ }
+ return *prev;
+ }
}
return expr;
}
diff --git a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h
index 7f9313df600..f2db3155290 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h
+++ b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h
@@ -6,22 +6,46 @@
namespace vespalib::tensor {
+struct DenseSingleReduceSpec {
+ eval::ValueType result_type;
+ size_t outer_size;
+ size_t reduce_size;
+ size_t inner_size;
+ eval::Aggr aggr;
+};
+
+/**
+ * Decompose the specified reduce operation into a sequence of single
+ * dense reduce operations. Returns an empty list if decomposition
+ * fails.
+ **/
+std::vector<DenseSingleReduceSpec>
+make_dense_single_reduce_list(const eval::ValueType &type, eval::Aggr aggr,
+ const std::vector<vespalib::string> &reduce_dims);
+
/**
- * Tensor function reducing a single dimension of a dense
- * tensor where the result is also a dense tensor.
+ * Tensor function reducing a single dimension of a dense tensor where
+ * the result is also a dense tensor. The optimize function may create
+ * multiple tensor functions to compose a multi-stage reduce
+ * operation. Adjacent reduced dimensions will be handled is if they
+ * were a single dimension. Trivial dimensions will be trivially
+ * reduced along with any other dimension.
**/
class DenseSingleReduceFunction : public eval::tensor_function::Op1
{
private:
- size_t _dim_idx;
+ size_t _outer_size;
+ size_t _reduce_size;
+ size_t _inner_size;
eval::Aggr _aggr;
public:
- DenseSingleReduceFunction(const eval::ValueType &result_type,
- const eval::TensorFunction &child,
- size_t dim_idx, eval::Aggr aggr);
+ DenseSingleReduceFunction(const DenseSingleReduceSpec &spec,
+ const eval::TensorFunction &child);
~DenseSingleReduceFunction() override;
- size_t dim_idx() const { return _dim_idx; }
+ size_t outer_size() const { return _outer_size; }
+ size_t reduce_size() const { return _reduce_size; }
+ size_t inner_size() const { return _inner_size; }
eval::Aggr aggr() const { return _aggr; }
bool result_is_mutable() const override { return true; }
eval::InterpretedFunction::Instruction compile_self(eval::EngineOrFactory engine, Stash &stash) const override;