summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2021-01-18 09:54:01 +0000
committerHåvard Pettersen <havardpe@oath.com>2021-01-19 16:58:21 +0000
commit5ab81b067850113b15016c11354fe7259906b1a2 (patch)
treed81cb462345ddbc9377bee8f9490e9861256c6aa /eval
parent4e6d9b184867553f740a65124fdb2d9c380caf22 (diff)
forward or ignore index in relevant mixed tensor reduce cases
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/eval/aggr/aggr_test.cpp14
-rw-r--r--eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp5
-rw-r--r--eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp8
-rw-r--r--eval/src/vespa/eval/eval/aggr.h35
-rw-r--r--eval/src/vespa/eval/instruction/generic_reduce.cpp72
-rw-r--r--eval/src/vespa/eval/instruction/generic_reduce.h1
6 files changed, 107 insertions, 28 deletions
diff --git a/eval/src/tests/eval/aggr/aggr_test.cpp b/eval/src/tests/eval/aggr/aggr_test.cpp
index 9045df68305..5eddb026406 100644
--- a/eval/src/tests/eval/aggr/aggr_test.cpp
+++ b/eval/src/tests/eval/aggr/aggr_test.cpp
@@ -85,8 +85,9 @@ TEST("require that PROD aggregator works as expected") {
EXPECT_TRUE(aggr.enum_value() == Aggr::PROD);
}
-TEST("require that Prod combine works as expected") {
+TEST("require that Prod static API works as expected") {
using Type = Prod<double>;
+ EXPECT_EQUAL(Type::null_value(), 1.0);
EXPECT_EQUAL(Type::combine(3,7), 21.0);
EXPECT_EQUAL(Type::combine(5,4), 20.0);
}
@@ -103,8 +104,9 @@ TEST("require that SUM aggregator works as expected") {
EXPECT_TRUE(aggr.enum_value() == Aggr::SUM);
}
-TEST("require that Sum combine works as expected") {
+TEST("require that Sum static API works as expected") {
using Type = Sum<double>;
+ EXPECT_EQUAL(Type::null_value(), 0.0);
EXPECT_EQUAL(Type::combine(3,7), 10.0);
EXPECT_EQUAL(Type::combine(5,4), 9.0);
}
@@ -121,8 +123,10 @@ TEST("require that MAX aggregator works as expected") {
EXPECT_TRUE(aggr.enum_value() == Aggr::MAX);
}
-TEST("require that Max combine works as expected") {
+TEST("require that Max static API works as expected") {
using Type = Max<double>;
+ EXPECT_EQUAL(Max<double>::null_value(), -std::numeric_limits<double>::infinity());
+ EXPECT_EQUAL(Max<float>::null_value(), -std::numeric_limits<float>::infinity());
EXPECT_EQUAL(Type::combine(3,7), 7.0);
EXPECT_EQUAL(Type::combine(5,4), 5.0);
}
@@ -165,8 +169,10 @@ TEST("require that MIN aggregator works as expected") {
EXPECT_TRUE(aggr.enum_value() == Aggr::MIN);
}
-TEST("require that Min combine works as expected") {
+TEST("require that Min static API works as expected") {
using Type = Min<double>;
+ EXPECT_EQUAL(Min<double>::null_value(), std::numeric_limits<double>::infinity());
+ EXPECT_EQUAL(Min<float>::null_value(), std::numeric_limits<float>::infinity());
EXPECT_EQUAL(Type::combine(3,7), 3.0);
EXPECT_EQUAL(Type::combine(5,4), 4.0);
}
diff --git a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp
index 3ab971dd34d..9e2090fa968 100644
--- a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp
+++ b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp
@@ -32,7 +32,8 @@ std::vector<Layout> layouts = {
float_cells({x({"a","b","c"}),y({"foo","bar"}),z({"i","j","k","l"})}),
{x(3),y({"foo", "bar"}),z(7)},
{x({"a","b","c"}),y(5),z({"i","j","k","l"})},
- float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})})
+ float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})}),
+ {x(3),y({}),z(7)}
};
TensorSpec perform_generic_reduce(const TensorSpec &a, Aggr aggr, const std::vector<vespalib::string> &dims,
@@ -69,7 +70,9 @@ TEST(GenericReduceTest, sparse_reduce_plan_can_be_created) {
void test_generic_reduce_with(const ValueBuilderFactory &factory) {
for (const Layout &layout: layouts) {
TensorSpec input = spec(layout, Div16(N()));
+ SCOPED_TRACE(fmt("tensor type: %s, num_cells: %zu", input.type().c_str(), input.cells().size()));
for (Aggr aggr: {Aggr::SUM, Aggr::AVG, Aggr::MIN, Aggr::MAX}) {
+ SCOPED_TRACE(fmt("aggregator: %s", AggrNames::name_of(aggr)->c_str()));
for (const Domain &domain: layout) {
auto expect = ReferenceOperations::reduce(input, aggr, {domain.dimension}).normalize();
auto actual = perform_generic_reduce(input, aggr, {domain.dimension}, factory);
diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
index 89bddfda933..3345d7dc8ee 100644
--- a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
+++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
@@ -814,6 +814,14 @@ TEST(DenseJoin, partial_overlap) {
benchmark_join("dense partial overlap multiply", lhs, rhs, operation::Mul::f);
}
+TEST(DenseJoin, subset_overlap) {
+ auto lhs = make_cube(D::idx("a", 16), D::idx("b", 16), D::idx("c", 16), 1.0);
+ auto rhs_inner = make_matrix(D::idx("b", 16), D::idx("c", 16), 2.0);
+ auto rhs_outer = make_matrix(D::idx("a", 16), D::idx("b", 16), 3.0);
+ benchmark_join("dense subset overlap inner multiply", lhs, rhs_inner, operation::Mul::f);
+ benchmark_join("dense subset overlap outer multiply", lhs, rhs_outer, operation::Mul::f);
+}
+
TEST(DenseJoin, no_overlap) {
auto lhs = make_cube(D::idx("a", 4), D::idx("e", 4), D::idx("f", 4), 1.0);
auto rhs = make_cube(D::idx("b", 4), D::idx("c", 4), D::idx("d", 4), 2.0);
diff --git a/eval/src/vespa/eval/eval/aggr.h b/eval/src/vespa/eval/eval/aggr.h
index d2e7a64b51b..516ead0f0bf 100644
--- a/eval/src/vespa/eval/eval/aggr.h
+++ b/eval/src/vespa/eval/eval/aggr.h
@@ -61,9 +61,10 @@ struct Aggregator {
namespace aggr {
-// can we start by picking any value from the set to be reduced and
-// use the templated aggregator 'combine' function in arbitrary order
-// to end up with (approximately) the correct result?
+// can we start by picking any value from the set to be reduced (or
+// the special aggregator-specific null_value) and use the templated
+// aggregator 'combine' function in arbitrary order to end up with
+// (approximately) the correct result?
constexpr bool is_simple(Aggr aggr) {
return ((aggr == Aggr::PROD) ||
(aggr == Aggr::SUM) ||
@@ -124,12 +125,13 @@ private:
T _prod;
public:
using value_type = T;
- constexpr Prod() : _prod{1} {}
+ constexpr Prod() : _prod{null_value()} {}
constexpr Prod(T value) : _prod{value} {}
- constexpr void sample(T value) { _prod *= value; }
- constexpr void merge(const Prod &rhs) { _prod *= rhs._prod; }
+ constexpr void sample(T value) { _prod = combine(_prod, value); }
+ constexpr void merge(const Prod &rhs) { _prod = combine(_prod, rhs._prod); }
constexpr T result() const { return _prod; }
static constexpr Aggr enum_value() { return Aggr::PROD; }
+ static constexpr T null_value() { return 1; }
static constexpr T combine(T a, T b) { return (a * b); }
};
@@ -138,12 +140,13 @@ private:
T _sum;
public:
using value_type = T;
- constexpr Sum() : _sum{0} {}
+ constexpr Sum() : _sum{null_value()} {}
constexpr Sum(T value) : _sum{value} {}
- constexpr void sample(T value) { _sum += value; }
- constexpr void merge(const Sum &rhs) { _sum += rhs._sum; }
+ constexpr void sample(T value) { _sum = combine(_sum, value); }
+ constexpr void merge(const Sum &rhs) { _sum = combine(_sum, rhs._sum); }
constexpr T result() const { return _sum; }
static constexpr Aggr enum_value() { return Aggr::SUM; }
+ static constexpr T null_value() { return 0; }
static constexpr T combine(T a, T b) { return (a + b); }
};
@@ -152,12 +155,13 @@ private:
T _max;
public:
using value_type = T;
- constexpr Max() : _max{-std::numeric_limits<T>::infinity()} {}
+ constexpr Max() : _max{null_value()} {}
constexpr Max(T value) : _max{value} {}
- constexpr void sample(T value) { _max = std::max(_max, value); }
- constexpr void merge(const Max &rhs) { _max = std::max(_max, rhs._max); }
+ constexpr void sample(T value) { _max = combine(_max, value); }
+ constexpr void merge(const Max &rhs) { _max = combine(_max, rhs._max); }
constexpr T result() const { return _max; }
static constexpr Aggr enum_value() { return Aggr::MAX; }
+ static constexpr T null_value() { return -std::numeric_limits<T>::infinity(); }
static constexpr T combine(T a, T b) { return std::max(a,b); }
};
@@ -204,12 +208,13 @@ private:
T _min;
public:
using value_type = T;
- constexpr Min() : _min{std::numeric_limits<T>::infinity()} {}
+ constexpr Min() : _min{null_value()} {}
constexpr Min(T value) : _min{value} {}
- constexpr void sample(T value) { _min = std::min(_min, value); }
- constexpr void merge(const Min &rhs) { _min = std::min(_min, rhs._min); }
+ constexpr void sample(T value) { _min = combine(_min, value); }
+ constexpr void merge(const Min &rhs) { _min = combine(_min, rhs._min); }
constexpr T result() const { return _min; }
static constexpr Aggr enum_value() { return Aggr::MIN; }
+ static constexpr T null_value() { return std::numeric_limits<T>::infinity(); }
static constexpr T combine(T a, T b) { return std::min(a,b); }
};
diff --git a/eval/src/vespa/eval/instruction/generic_reduce.cpp b/eval/src/vespa/eval/instruction/generic_reduce.cpp
index b6393d0d713..2d4144d64b1 100644
--- a/eval/src/vespa/eval/instruction/generic_reduce.cpp
+++ b/eval/src/vespa/eval/instruction/generic_reduce.cpp
@@ -8,6 +8,7 @@
#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/overload.h>
#include <vespa/vespalib/util/visit_ranges.h>
+#include <algorithm>
#include <cassert>
#include <array>
@@ -48,7 +49,7 @@ struct SparseReduceState {
std::vector<string_id> full_address;
std::vector<string_id*> fetch_address;
std::vector<string_id*> keep_address;
- size_t subspace;
+ size_t subspace;
SparseReduceState(const SparseReducePlan &plan)
: full_address(plan.keep_dims.size() + plan.num_reduce_dims),
@@ -72,8 +73,8 @@ Value::UP
generic_reduce(const Value &value, const ReduceParam &param) {
auto cells = value.cells().typify<ICT>();
ArrayArrayMap<string_id,AGGR> map(param.sparse_plan.keep_dims.size(),
- param.dense_plan.out_size,
- value.index().size());
+ param.dense_plan.out_size,
+ value.index().size());
SparseReduceState sparse(param.sparse_plan);
auto full_view = value.index().create_view({});
full_view->lookup({});
@@ -94,9 +95,7 @@ generic_reduce(const Value &value, const ReduceParam &param) {
});
if ((map.size() == 0) && param.sparse_plan.keep_dims.empty()) {
auto zero = builder->add_subspace();
- for (size_t i = 0; i < zero.size(); ++i) {
- zero[i] = OCT{};
- }
+ std::fill(zero.begin(), zero.end(), OCT{});
}
return builder->build(std::move(builder));
}
@@ -109,6 +108,50 @@ void my_generic_reduce_op(State &state, uint64_t param_in) {
auto &result = state.stash.create<std::unique_ptr<Value>>(std::move(up));
const Value &result_ref = *(result.get());
state.pop_push(result_ref);
+}
+
+template <typename ICT, typename OCT, typename AGGR, bool forward_index>
+void my_generic_dense_reduce_op(State &state, uint64_t param_in) {
+ const auto &param = unwrap_param<ReduceParam>(param_in);
+ const Value &value = state.peek(0);
+ auto cells = value.cells().typify<ICT>();
+ const auto &index = value.index();
+ size_t num_subspaces = index.size();
+ size_t out_cells_size = forward_index ? (param.dense_plan.out_size * num_subspaces) : param.dense_plan.out_size;
+ auto out_cells = state.stash.create_uninitialized_array<OCT>(out_cells_size);
+ if (num_subspaces > 0) {
+ if constexpr (aggr::is_simple(AGGR::enum_value())) {
+ OCT *dst = out_cells.begin();
+ std::fill(out_cells.begin(), out_cells.end(), AGGR::null_value());
+ auto combine = [&](size_t src_idx, size_t dst_idx) { dst[dst_idx] = AGGR::combine(dst[dst_idx], cells[src_idx]); };
+ for (size_t i = 0; i < num_subspaces; ++i) {
+ param.dense_plan.execute(i * param.dense_plan.in_size, combine);
+ if (forward_index) {
+ dst += param.dense_plan.out_size;
+ }
+ }
+ } else {
+ std::vector<AGGR> aggr_state(out_cells_size);
+ AGGR *dst = &aggr_state[0];
+ auto sample = [&](size_t src_idx, size_t dst_idx) { dst[dst_idx].sample(cells[src_idx]); };
+ for (size_t i = 0; i < num_subspaces; ++i) {
+ param.dense_plan.execute(i * param.dense_plan.in_size, sample);
+ if (forward_index) {
+ dst += param.dense_plan.out_size;
+ }
+ }
+ for (size_t i = 0; i < aggr_state.size(); ++i) {
+ out_cells[i] = aggr_state[i].result();
+ }
+ }
+ } else if (!forward_index) {
+ std::fill(out_cells.begin(), out_cells.end(), OCT{});
+ }
+ if (forward_index) {
+ state.pop_push(state.stash.create<ValueView>(param.res_type, index, TypedCells(out_cells)));
+ } else {
+ state.pop_push(state.stash.create<DenseValueView>(param.res_type, TypedCells(out_cells)));
+ }
};
template <typename ICT, typename OCT, typename AGGR>
@@ -147,10 +190,17 @@ void my_full_reduce_op(State &state, uint64_t) {
struct SelectGenericReduceOp {
template <typename ICT, typename OCT, typename AGGR> static auto invoke(const ReduceParam &param) {
+ using AggrType = typename AGGR::template templ<OCT>;
if (param.res_type.is_scalar()) {
- return my_full_reduce_op<ICT, OCT, typename AGGR::template templ<OCT>>;
+ return my_full_reduce_op<ICT, OCT, AggrType>;
+ }
+ if (param.sparse_plan.should_forward_index()) {
+ return my_generic_dense_reduce_op<ICT, OCT, AggrType, true>;
}
- return my_generic_reduce_op<ICT, OCT, typename AGGR::template templ<OCT>>;
+ if (param.res_type.is_dense()) {
+ return my_generic_dense_reduce_op<ICT, OCT, AggrType, false>;
+ }
+ return my_generic_reduce_op<ICT, OCT, AggrType>;
}
};
@@ -227,6 +277,12 @@ SparseReducePlan::SparseReducePlan(const ValueType &type, const ValueType &res_t
}
}
+bool
+SparseReducePlan::should_forward_index() const
+{
+ return ((num_reduce_dims == 0) && (!keep_dims.empty()));
+}
+
SparseReducePlan::~SparseReducePlan() = default;
//-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/instruction/generic_reduce.h b/eval/src/vespa/eval/instruction/generic_reduce.h
index f753a3e51cd..5faafb0325d 100644
--- a/eval/src/vespa/eval/instruction/generic_reduce.h
+++ b/eval/src/vespa/eval/instruction/generic_reduce.h
@@ -30,6 +30,7 @@ struct DenseReducePlan {
struct SparseReducePlan {
size_t num_reduce_dims;
std::vector<size_t> keep_dims;
+ bool should_forward_index() const;
SparseReducePlan(const ValueType &type, const ValueType &res_type);
~SparseReducePlan();
};