summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2020-05-06 15:36:58 +0000
committerHåvard Pettersen <havardpe@oath.com>2020-05-07 12:41:36 +0000
commit0865c453d198c3974880c56699a9fd01ca286d43 (patch)
treedca743013ca0bb2c5e352855d0ef6d255669200b /eval
parent8f774bc613caf2006da29989a322730dcb936514 (diff)
dense single reduce
Diffstat (limited to 'eval')
-rw-r--r--eval/CMakeLists.txt1
-rw-r--r--eval/src/tests/eval/aggr/aggr_test.cpp11
-rw-r--r--eval/src/tests/tensor/dense_single_reduce_function/CMakeLists.txt8
-rw-r--r--eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp123
-rw-r--r--eval/src/vespa/eval/eval/aggr.cpp72
-rw-r--r--eval/src/vespa/eval/eval/aggr.h66
-rw-r--r--eval/src/vespa/eval/eval/test/eval_fixture.cpp30
-rw-r--r--eval/src/vespa/eval/eval/test/eval_fixture.h11
-rw-r--r--eval/src/vespa/eval/tensor/default_tensor_engine.cpp54
-rw-r--r--eval/src/vespa/eval/tensor/dense/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp134
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h31
12 files changed, 450 insertions, 92 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 9cd5f396144..29d3c192139 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -39,6 +39,7 @@ vespa_define_module(
src/tests/tensor/dense_multi_matmul_function
src/tests/tensor/dense_remove_dimension_optimizer
src/tests/tensor/dense_replace_type_function
+ src/tests/tensor/dense_single_reduce_function
src/tests/tensor/dense_tensor_create_function
src/tests/tensor/dense_tensor_peek_function
src/tests/tensor/dense_xw_product_function
diff --git a/eval/src/tests/eval/aggr/aggr_test.cpp b/eval/src/tests/eval/aggr/aggr_test.cpp
index d81d6bb0ff3..a028d0da6c2 100644
--- a/eval/src/tests/eval/aggr/aggr_test.cpp
+++ b/eval/src/tests/eval/aggr/aggr_test.cpp
@@ -6,6 +6,17 @@
using vespalib::Stash;
using namespace vespalib::eval;
+TEST("require that aggregator list returns appropriate entries") {
+ auto list = Aggregator::list();
+ ASSERT_EQUAL(list.size(), 6u);
+ EXPECT_EQUAL(int(list[0]), int(Aggr::AVG));
+ EXPECT_EQUAL(int(list[1]), int(Aggr::COUNT));
+ EXPECT_EQUAL(int(list[2]), int(Aggr::PROD));
+ EXPECT_EQUAL(int(list[3]), int(Aggr::SUM));
+ EXPECT_EQUAL(int(list[4]), int(Aggr::MAX));
+ EXPECT_EQUAL(int(list[5]), int(Aggr::MIN));
+}
+
TEST("require that AVG aggregator works as expected") {
Stash stash;
Aggregator &aggr = Aggregator::create(Aggr::AVG, stash);
diff --git a/eval/src/tests/tensor/dense_single_reduce_function/CMakeLists.txt b/eval/src/tests/tensor/dense_single_reduce_function/CMakeLists.txt
new file mode 100644
index 00000000000..42b00699c31
--- /dev/null
+++ b/eval/src/tests/tensor/dense_single_reduce_function/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_dense_single_reduce_function_test_app TEST
+ SOURCES
+ dense_single_reduce_function_test.cpp
+ DEPENDS
+ vespaeval
+)
+vespa_add_test(NAME eval_dense_single_reduce_function_test_app COMMAND eval_dense_single_reduce_function_test_app)
diff --git a/eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp b/eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp
new file mode 100644
index 00000000000..949c5277e18
--- /dev/null
+++ b/eval/src/tests/tensor/dense_single_reduce_function/dense_single_reduce_function_test.cpp
@@ -0,0 +1,123 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/simple_tensor.h>
+#include <vespa/eval/eval/simple_tensor_engine.h>
+#include <vespa/eval/tensor/default_tensor_engine.h>
+#include <vespa/eval/tensor/dense/dense_single_reduce_function.h>
+#include <vespa/eval/tensor/dense/dense_tensor.h>
+#include <vespa/eval/tensor/dense/dense_tensor_view.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/eval/eval/test/eval_fixture.h>
+
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/stash.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+using namespace vespalib::tensor;
+using namespace vespalib::eval::tensor_function;
+
+const TensorEngine &prod_engine = DefaultTensorEngine::ref();
+
+EvalFixture::ParamRepo make_params() {
+ return EvalFixture::ParamRepo()
+ .add_dense({{"a", 2}, {"b", 3}, {"c", 4}, {"d", 5}})
+ .add_cube("a", 2, "b", 1, "c", 1)
+ .add_cube("a", 1, "b", 2, "c", 1)
+ .add_cube("a", 1, "b", 1, "c", 2)
+ .add_cube("a", 1, "b", 1, "c", 1)
+ .add_vector("a", 10)
+ .add("xy_mapped", spec({x({"a", "b"}),y({"x", "y"})}, N()))
+ .add("xyz_mixed", spec({x({"a", "b"}),y({"x", "y"}),z(3)}, N()));
+}
+EvalFixture::ParamRepo param_repo = make_params();
+
+void verify_optimized(const vespalib::string &expr, size_t dim_idx, Aggr aggr)
+{
+ EvalFixture slow_fixture(prod_engine, expr, param_repo, false);
+ EvalFixture fixture(prod_engine, expr, param_repo, true);
+ EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo));
+ EXPECT_EQUAL(fixture.result(), slow_fixture.result());
+ auto info = fixture.find_all<DenseSingleReduceFunction>();
+ ASSERT_EQUAL(info.size(), 1u);
+ EXPECT_TRUE(info[0]->result_is_mutable());
+ EXPECT_EQUAL(info[0]->dim_idx(), dim_idx);
+ EXPECT_EQUAL(int(info[0]->aggr()), int(aggr));
+}
+
+void verify_not_optimized(const vespalib::string &expr) {
+ EvalFixture slow_fixture(prod_engine, expr, param_repo, false);
+ EvalFixture fixture(prod_engine, expr, param_repo, true);
+ EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo));
+ EXPECT_EQUAL(fixture.result(), slow_fixture.result());
+ auto info = fixture.find_all<DenseSingleReduceFunction>();
+ EXPECT_TRUE(info.empty());
+}
+
+TEST("require that multi-dimensional reduce is not optimized") {
+ TEST_DO(verify_not_optimized("reduce(a2b3c4d5,sum,a,b)"));
+ TEST_DO(verify_not_optimized("reduce(a2b3c4d5,sum,c,d)"));
+}
+
+TEST("require that reduce to scalar is not optimized") {
+ TEST_DO(verify_not_optimized("reduce(a10,sum,a)"));
+ TEST_DO(verify_not_optimized("reduce(a10,sum)"));
+}
+
+TEST("require that sparse reduce is not optimized") {
+ TEST_DO(verify_not_optimized("reduce(xy_mapped,sum,x)"));
+ TEST_DO(verify_not_optimized("reduce(xy_mapped,sum,y)"));
+}
+
+TEST("require that mixed reduce is not optimized") {
+ TEST_DO(verify_not_optimized("reduce(xyz_mixed,sum,x)"));
+ TEST_DO(verify_not_optimized("reduce(xyz_mixed,sum,y)"));
+ TEST_DO(verify_not_optimized("reduce(xyz_mixed,sum,z)"));
+}
+
+// NB: these are shadowed by the remove dimension optimization
+TEST("require that reducing self-aggregating trivial dimensions is not optimized") {
+ TEST_DO(verify_not_optimized("reduce(a1b1c1,avg,c)"));
+ TEST_DO(verify_not_optimized("reduce(a1b1c1,prod,c)"));
+ TEST_DO(verify_not_optimized("reduce(a1b1c1,sum,c)"));
+ TEST_DO(verify_not_optimized("reduce(a1b1c1,max,c)"));
+ TEST_DO(verify_not_optimized("reduce(a1b1c1,min,c)"));
+}
+
+TEST("require that reducing trivial dimension with COUNT is 'optimized'") {
+ TEST_DO(verify_optimized("reduce(a1b1c1,count,a)", 0, Aggr::COUNT));
+ TEST_DO(verify_optimized("reduce(a1b1c1,count,b)", 1, Aggr::COUNT));
+ TEST_DO(verify_optimized("reduce(a1b1c1,count,c)", 2, Aggr::COUNT));
+}
+
+vespalib::string make_expr(const vespalib::string &arg, const vespalib::string &dim, bool float_cells, Aggr aggr) {
+ return make_string("reduce(%s%s,%s,%s)", arg.c_str(), float_cells ? "f" : "", AggrNames::name_of(aggr)->c_str(), dim.c_str());
+}
+
+void verify_optimized_multi(const vespalib::string &arg, const vespalib::string &dim, size_t dim_idx) {
+ for (bool float_cells: {false, true}) {
+ for (Aggr aggr: Aggregator::list()) {
+ auto expr = make_expr(arg, dim, float_cells, aggr);
+ TEST_DO(verify_optimized(expr, dim_idx, aggr));
+ }
+ }
+}
+
+TEST("require that normal dense single reduce works") {
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "a", 0));
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "b", 1));
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "c", 2));
+ TEST_DO(verify_optimized_multi("a2b3c4d5", "d", 3));
+}
+
+TEST("require that minimal dense single reduce works") {
+ TEST_DO(verify_optimized_multi("a2b1c1", "a", 0));
+ TEST_DO(verify_optimized_multi("a1b2c1", "b", 1));
+ TEST_DO(verify_optimized_multi("a1b1c2", "c", 2));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/eval/src/vespa/eval/eval/aggr.cpp b/eval/src/vespa/eval/eval/aggr.cpp
index 8aacac64041..d10bbc4abb8 100644
--- a/eval/src/vespa/eval/eval/aggr.cpp
+++ b/eval/src/vespa/eval/eval/aggr.cpp
@@ -11,53 +11,12 @@ namespace eval {
namespace {
-struct Avg : Aggregator {
- double sum = 0.0;
- size_t cnt = 1;
- void first(double value) override {
- sum = value;
- cnt = 1;
- }
- void next(double value) override {
- sum += value;
- ++cnt;
- }
- double result() const override { return (sum / cnt); }
-};
-
-struct Count : Aggregator {
- size_t cnt = 0;
- void first(double) override { cnt = 1; }
- void next(double) override { ++cnt; }
- double result() const override { return cnt; }
-};
-
-struct Prod : Aggregator {
- double prod = 0.0;
- void first(double value) override { prod = value; }
- void next(double value) override { prod *= value; }
- double result() const override { return prod; }
-};
-
-struct Sum : Aggregator {
- double sum = 0.0;
- void first(double value) override { sum = value; }
- void next(double value) override { sum += value; }
- double result() const override { return sum; }
-};
-
-struct Max : Aggregator {
- double max = 0.0;
- void first(double value) override { max = value; }
- void next(double value) override { max = std::max(max, value); }
- double result() const override { return max; }
-};
-
-struct Min : Aggregator {
- double min = 0.0;
- void first(double value) override { min = value; }
- void next(double value) override { min = std::min(min, value); }
- double result() const override { return min; }
+template <typename T>
+struct Wrapper : Aggregator {
+ T aggr;
+ virtual void first(double value) final override { aggr.first(value); }
+ virtual void next(double value) final override { aggr.next(value); }
+ virtual double result() const final override { return aggr.result(); }
};
} // namespace vespalib::eval::<unnamed>
@@ -113,15 +72,22 @@ Aggregator &
Aggregator::create(Aggr aggr, Stash &stash)
{
switch (aggr) {
- case Aggr::AVG: return stash.create<Avg>();
- case Aggr::COUNT: return stash.create<Count>();
- case Aggr::PROD: return stash.create<Prod>();
- case Aggr::SUM: return stash.create<Sum>();
- case Aggr::MAX: return stash.create<Max>();
- case Aggr::MIN: return stash.create<Min>();
+ case Aggr::AVG: return stash.create<Wrapper<aggr::Avg<double>>>();
+ case Aggr::COUNT: return stash.create<Wrapper<aggr::Count<double>>>();
+ case Aggr::PROD: return stash.create<Wrapper<aggr::Prod<double>>>();
+ case Aggr::SUM: return stash.create<Wrapper<aggr::Sum<double>>>();
+ case Aggr::MAX: return stash.create<Wrapper<aggr::Max<double>>>();
+ case Aggr::MIN: return stash.create<Wrapper<aggr::Min<double>>>();
}
LOG_ABORT("should not be reached");
}
+std::vector<Aggr>
+Aggregator::list()
+{
+ return std::vector<Aggr>({ Aggr::AVG, Aggr::COUNT, Aggr::PROD,
+ Aggr::SUM, Aggr::MAX, Aggr::MIN });
+}
+
} // namespace vespalib::eval
} // namespace vespalib
diff --git a/eval/src/vespa/eval/eval/aggr.h b/eval/src/vespa/eval/eval/aggr.h
index 4f2284b4ba7..8dea54d8abc 100644
--- a/eval/src/vespa/eval/eval/aggr.h
+++ b/eval/src/vespa/eval/eval/aggr.h
@@ -3,6 +3,7 @@
#pragma once
#include <vespa/vespalib/stllike/string.h>
+#include <vector>
#include <map>
namespace vespalib {
@@ -50,7 +51,72 @@ struct Aggregator {
virtual double result() const = 0;
virtual ~Aggregator();
static Aggregator &create(Aggr aggr, Stash &stash);
+ static std::vector<Aggr> list();
};
+namespace aggr {
+
+template <typename T> class Avg {
+private:
+ T _sum = 0.0;
+ size_t _cnt = 1;
+public:
+ void first(T value) {
+ _sum = value;
+ _cnt = 1;
+ }
+ void next(T value) {
+ _sum += value;
+ ++_cnt;
+ }
+ T result() const { return (_sum / _cnt); }
+};
+
+template <typename T> class Count {
+private:
+ size_t _cnt = 0;
+public:
+ void first(T) { _cnt = 1; }
+ void next(T) { ++_cnt; }
+ T result() const { return _cnt; }
+};
+
+template <typename T> class Prod {
+private:
+ T _prod = 0.0;
+public:
+ void first(T value) { _prod = value; }
+ void next(T value) { _prod *= value; }
+ T result() const { return _prod; }
+};
+
+template <typename T> class Sum {
+private:
+ T _sum = 0.0;
+public:
+ void first(T value) { _sum = value; }
+ void next(T value) { _sum += value; }
+ T result() const { return _sum; }
+};
+
+template <typename T> class Max {
+private:
+ T _max = 0.0;
+public:
+ void first(T value) { _max = value; }
+ void next(T value) { _max = std::max(_max, value); }
+ T result() const { return _max; }
+};
+
+template <typename T> class Min {
+private:
+ T _min = 0.0;
+public:
+ void first(T value) { _min = value; }
+ void next(T value) { _min = std::min(_min, value); }
+ T result() const { return _min; }
+};
+
+} // namespave vespalib::eval::aggr
} // namespace vespalib::eval
} // namespace vespalib
diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.cpp b/eval/src/vespa/eval/eval/test/eval_fixture.cpp
index 7ce05ccea8f..dd27ba1f147 100644
--- a/eval/src/vespa/eval/eval/test/eval_fixture.cpp
+++ b/eval/src/vespa/eval/eval/test/eval_fixture.cpp
@@ -101,51 +101,51 @@ std::vector<Value::CREF> get_refs(const std::vector<Value::UP> &values) {
void add_cell_values(TensorSpec &spec, TensorSpec::Address &addr,
const std::vector<std::pair<vespalib::string, size_t> > &dims,
- size_t idx, size_t &seq)
+ size_t idx, size_t &seq, std::function<double(size_t)> gen)
{
if (idx < dims.size()) {
for (size_t i = 0; i < dims[idx].second; ++i) {
addr.emplace(dims[idx].first, TensorSpec::Label(i)).first->second = TensorSpec::Label(i);
- add_cell_values(spec, addr, dims, idx + 1, seq);
+ add_cell_values(spec, addr, dims, idx + 1, seq, gen);
}
} else {
- spec.add(addr, seq++);
+ spec.add(addr, gen(seq++));
}
}
TensorSpec make_dense(const vespalib::string &type,
const std::vector<std::pair<vespalib::string, size_t> > &dims,
- size_t seed)
+ std::function<double(size_t)> gen)
{
TensorSpec spec(type);
TensorSpec::Address addr;
- size_t seq = seed;
- add_cell_values(spec, addr, dims, 0, seq);
+ size_t seq = 0;
+ add_cell_values(spec, addr, dims, 0, seq, gen);
return spec;
}
} // namespace vespalib::eval::test
ParamRepo &
-EvalFixture::ParamRepo::add_vector(const char *d1, size_t s1, size_t seed)
+EvalFixture::ParamRepo::add_vector(const char *d1, size_t s1, gen_fun_t gen)
{
- return add_dense({{d1, s1}}, seed);
+ return add_dense({{d1, s1}}, gen);
}
ParamRepo &
-EvalFixture::ParamRepo::add_matrix(const char *d1, size_t s1, const char *d2, size_t s2, size_t seed)
+EvalFixture::ParamRepo::add_matrix(const char *d1, size_t s1, const char *d2, size_t s2, gen_fun_t gen)
{
- return add_dense({{d1, s1}, {d2, s2}}, seed);
+ return add_dense({{d1, s1}, {d2, s2}}, gen);
}
ParamRepo &
-EvalFixture::ParamRepo::add_cube(const char *d1, size_t s1, const char *d2, size_t s2, const char *d3, size_t s3, size_t seed)
+EvalFixture::ParamRepo::add_cube(const char *d1, size_t s1, const char *d2, size_t s2, const char *d3, size_t s3, gen_fun_t gen)
{
- return add_dense({{d1, s1}, {d2, s2}, {d3, s3}}, seed);
+ return add_dense({{d1, s1}, {d2, s2}, {d3, s3}}, gen);
}
ParamRepo &
-EvalFixture::ParamRepo::add_dense(const std::vector<std::pair<vespalib::string, size_t> > &dims, size_t seed)
+EvalFixture::ParamRepo::add_dense(const std::vector<std::pair<vespalib::string, size_t> > &dims, gen_fun_t gen)
{
vespalib::string prev;
vespalib::string name;
@@ -159,8 +159,8 @@ EvalFixture::ParamRepo::add_dense(const std::vector<std::pair<vespalib::string,
type += fmt("%s[%zu]", dim.first.c_str(), dim.second);
prev = dim.first;
}
- add(name, make_dense(fmt("tensor(%s)", type.c_str()), dims, seed));
- add(name + "f", make_dense(fmt("tensor<float>(%s)", type.c_str()), dims, seed));
+ add(name, make_dense(fmt("tensor(%s)", type.c_str()), dims, gen));
+ add(name + "f", make_dense(fmt("tensor<float>(%s)", type.c_str()), dims, gen));
return *this;
}
diff --git a/eval/src/vespa/eval/eval/test/eval_fixture.h b/eval/src/vespa/eval/eval/test/eval_fixture.h
index f11fb8ebf22..1010b5e58a8 100644
--- a/eval/src/vespa/eval/eval/test/eval_fixture.h
+++ b/eval/src/vespa/eval/eval/test/eval_fixture.h
@@ -10,6 +10,7 @@
#include <vespa/eval/tensor/default_tensor_engine.h>
#include <vespa/vespalib/util/stash.h>
#include <set>
+#include <functional>
namespace vespalib::eval::test {
@@ -26,6 +27,8 @@ public:
struct ParamRepo {
std::map<vespalib::string,Param> map;
+ using gen_fun_t = std::function<double(size_t)>;
+ static double gen_N(size_t seq) { return (seq + 1); }
ParamRepo() : map() {}
ParamRepo &add(const vespalib::string &name, TensorSpec value_in, bool is_mutable_in) {
map.insert_or_assign(name, Param(std::move(value_in), is_mutable_in));
@@ -37,10 +40,10 @@ public:
ParamRepo &add_mutable(const vespalib::string &name, const TensorSpec &value) {
return add(name, value, true);
}
- ParamRepo &add_vector(const char *d1, size_t s1, size_t seed = 1);
- ParamRepo &add_matrix(const char *d1, size_t s1, const char *d2, size_t s2, size_t seed = 1);
- ParamRepo &add_cube(const char *d1, size_t s1, const char *d2, size_t s2, const char *d3, size_t s3, size_t seed = 1);
- ParamRepo &add_dense(const std::vector<std::pair<vespalib::string, size_t> > &dims, size_t seed = 1);
+ ParamRepo &add_vector(const char *d1, size_t s1, gen_fun_t = gen_N);
+ ParamRepo &add_matrix(const char *d1, size_t s1, const char *d2, size_t s2, gen_fun_t gen = gen_N);
+ ParamRepo &add_cube(const char *d1, size_t s1, const char *d2, size_t s2, const char *d3, size_t s3, gen_fun_t gen = gen_N);
+ ParamRepo &add_dense(const std::vector<std::pair<vespalib::string, size_t> > &dims, gen_fun_t gen = gen_N);
~ParamRepo() {}
};
diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
index 0cadbd64372..a817c1454d8 100644
--- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
+++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
@@ -14,6 +14,7 @@
#include "dense/dense_multi_matmul_function.h"
#include "dense/dense_fast_rename_optimizer.h"
#include "dense/dense_add_dimension_optimizer.h"
+#include "dense/dense_single_reduce_function.h"
#include "dense/dense_remove_dimension_optimizer.h"
#include "dense/dense_lambda_peek_optimizer.h"
#include "dense/dense_inplace_join_function.h"
@@ -260,27 +261,40 @@ DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const
{
using Child = TensorFunction::Child;
Child root(expr);
- std::vector<Child::CREF> nodes({root});
- for (size_t i = 0; i < nodes.size(); ++i) {
- nodes[i].get().get().push_children(nodes);
- }
LOG(debug, "tensor function before optimization:\n%s\n", root.get().as_string().c_str());
- while (!nodes.empty()) {
- const Child &child = nodes.back();
- child.set(VectorFromDoublesFunction::optimize(child.get(), stash));
- child.set(DenseTensorCreateFunction::optimize(child.get(), stash));
- child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash));
- child.set(DenseTensorPeekFunction::optimize(child.get(), stash));
- child.set(DenseDotProductFunction::optimize(child.get(), stash));
- child.set(DenseXWProductFunction::optimize(child.get(), stash));
- child.set(DenseMatMulFunction::optimize(child.get(), stash));
- child.set(DenseMultiMatMulFunction::optimize(child.get(), stash));
- child.set(DenseFastRenameOptimizer::optimize(child.get(), stash));
- child.set(DenseAddDimensionOptimizer::optimize(child.get(), stash));
- child.set(DenseRemoveDimensionOptimizer::optimize(child.get(), stash));
- child.set(DenseInplaceMapFunction::optimize(child.get(), stash));
- child.set(DenseInplaceJoinFunction::optimize(child.get(), stash));
- nodes.pop_back();
+ {
+ std::vector<Child::CREF> nodes({root});
+ for (size_t i = 0; i < nodes.size(); ++i) {
+ nodes[i].get().get().push_children(nodes);
+ }
+ while (!nodes.empty()) {
+ const Child &child = nodes.back().get();
+ child.set(DenseDotProductFunction::optimize(child.get(), stash));
+ child.set(DenseXWProductFunction::optimize(child.get(), stash));
+ child.set(DenseMatMulFunction::optimize(child.get(), stash));
+ child.set(DenseMultiMatMulFunction::optimize(child.get(), stash));
+ nodes.pop_back();
+ }
+ }
+ {
+ std::vector<Child::CREF> nodes({root});
+ for (size_t i = 0; i < nodes.size(); ++i) {
+ nodes[i].get().get().push_children(nodes);
+ }
+ while (!nodes.empty()) {
+ const Child &child = nodes.back().get();
+ child.set(DenseAddDimensionOptimizer::optimize(child.get(), stash));
+ child.set(DenseRemoveDimensionOptimizer::optimize(child.get(), stash));
+ child.set(VectorFromDoublesFunction::optimize(child.get(), stash));
+ child.set(DenseTensorCreateFunction::optimize(child.get(), stash));
+ child.set(DenseTensorPeekFunction::optimize(child.get(), stash));
+ child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash));
+ child.set(DenseFastRenameOptimizer::optimize(child.get(), stash));
+ child.set(DenseInplaceMapFunction::optimize(child.get(), stash));
+ child.set(DenseInplaceJoinFunction::optimize(child.get(), stash));
+ child.set(DenseSingleReduceFunction::optimize(child.get(), stash));
+ nodes.pop_back();
+ }
}
LOG(debug, "tensor function after optimization:\n%s\n", root.get().as_string().c_str());
return root.get();
diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
index 7019749e123..0131ff28398 100644
--- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
+++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
@@ -14,6 +14,7 @@ vespa_add_library(eval_tensor_dense OBJECT
dense_multi_matmul_function.cpp
dense_remove_dimension_optimizer.cpp
dense_replace_type_function.cpp
+ dense_single_reduce_function.cpp
dense_tensor.cpp
dense_tensor_address_mapper.cpp
dense_tensor_cells_iterator.cpp
diff --git a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp
new file mode 100644
index 00000000000..663993b6c26
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.cpp
@@ -0,0 +1,134 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "dense_single_reduce_function.h"
+#include "dense_tensor_view.h"
+#include <vespa/eval/eval/value.h>
+
+namespace vespalib::tensor {
+
+using eval::Aggr;
+using eval::InterpretedFunction;
+using eval::TensorEngine;
+using eval::TensorFunction;
+using eval::Value;
+using eval::ValueType;
+using eval::as;
+
+using namespace eval::tensor_function;
+using namespace eval::aggr;
+
+namespace {
+
+struct Params {
+ const ValueType &result_type;
+ size_t outer_size;
+ size_t dim_size;
+ size_t inner_size;
+ Params(const ValueType &result_type_in, const ValueType &child_type, size_t dim_idx)
+ : result_type(result_type_in), outer_size(1), dim_size(1), inner_size(1)
+ {
+ for (size_t i = 0; i < child_type.dimensions().size(); ++i) {
+ if (i < dim_idx) {
+ outer_size *= child_type.dimensions()[i].size;
+ } else if (i == dim_idx) {
+ dim_size *= child_type.dimensions()[i].size;
+ } else {
+ inner_size *= child_type.dimensions()[i].size;
+ }
+ }
+ }
+};
+
+template <typename CT, typename AGGR>
+CT reduce_cells(const CT *src, size_t dim_size, size_t stride, AGGR &aggr) {
+ aggr.first(*src);
+ for (size_t i = 1; i < dim_size; ++i) {
+ src += stride;
+ aggr.next(*src);
+ }
+ return aggr.result();
+}
+
+template <typename CT, typename AGGR>
+void my_single_reduce_op(InterpretedFunction::State &state, uint64_t param) {
+ const auto &params = *(const Params *)(param);
+ const CT *src = DenseTensorView::typify_cells<CT>(state.peek(0)).cbegin();
+ auto dst_cells = state.stash.create_array<CT>(params.outer_size * params.inner_size);
+ AGGR aggr;
+ CT *dst = dst_cells.begin();
+ const size_t block_size = (params.dim_size * params.inner_size);
+ for (size_t outer = 0; outer < params.outer_size; ++outer) {
+ for (size_t inner = 0; inner < params.inner_size; ++inner) {
+ *dst++ = reduce_cells<CT, AGGR>(src + inner, params.dim_size, params.inner_size, aggr);
+ }
+ src += block_size;
+ }
+ state.pop_push(state.stash.create<DenseTensorView>(params.result_type, TypedCells(dst_cells)));
+}
+
+template <typename CT>
+InterpretedFunction::op_function my_select_2(Aggr aggr) {
+ switch (aggr) {
+ case Aggr::AVG: return my_single_reduce_op<CT, Avg<CT>>;
+ case Aggr::COUNT: return my_single_reduce_op<CT, Count<CT>>;
+ case Aggr::PROD: return my_single_reduce_op<CT, Prod<CT>>;
+ case Aggr::SUM: return my_single_reduce_op<CT, Sum<CT>>;
+ case Aggr::MAX: return my_single_reduce_op<CT, Max<CT>>;
+ case Aggr::MIN: return my_single_reduce_op<CT, Min<CT>>;
+ }
+ abort();
+}
+
+InterpretedFunction::op_function my_select(CellType cell_type, Aggr aggr) {
+ if (cell_type == ValueType::CellType::DOUBLE) {
+ return my_select_2<double>(aggr);
+ }
+ if (cell_type == ValueType::CellType::FLOAT) {
+ return my_select_2<float>(aggr);
+ }
+ abort();
+}
+
+bool check_input_type(const ValueType &type) {
+ return (type.is_dense() && ((type.cell_type() == CellType::FLOAT) || (type.cell_type() == CellType::DOUBLE)));
+}
+
+} // namespace vespalib::tensor::<unnamed>
+
+DenseSingleReduceFunction::DenseSingleReduceFunction(const ValueType &result_type,
+ const TensorFunction &child,
+ size_t dim_idx, Aggr aggr)
+ : Op1(result_type, child),
+ _dim_idx(dim_idx),
+ _aggr(aggr)
+{
+}
+
+DenseSingleReduceFunction::~DenseSingleReduceFunction() = default;
+
+InterpretedFunction::Instruction
+DenseSingleReduceFunction::compile_self(const TensorEngine &, Stash &stash) const
+{
+ auto op = my_select(result_type().cell_type(), _aggr);
+ auto &params = stash.create<Params>(result_type(), child().result_type(), _dim_idx);
+ static_assert(sizeof(uint64_t) == sizeof(&params));
+ return InterpretedFunction::Instruction(op, (uint64_t)&params);
+}
+
+const TensorFunction &
+DenseSingleReduceFunction::optimize(const TensorFunction &expr, Stash &stash)
+{
+ auto reduce = as<Reduce>(expr);
+ if (reduce && (reduce->dimensions().size() == 1) &&
+ check_input_type(reduce->child().result_type()) &&
+ expr.result_type().is_dense())
+ {
+ size_t dim_idx = reduce->child().result_type().dimension_index(reduce->dimensions()[0]);
+ assert(dim_idx != ValueType::Dimension::npos);
+ assert(expr.result_type().cell_type() == reduce->child().result_type().cell_type());
+ return stash.create<DenseSingleReduceFunction>(expr.result_type(), reduce->child(), dim_idx, reduce->aggr());
+ }
+ return expr;
+}
+
+} // namespace vespalib::tensor
diff --git a/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h
new file mode 100644
index 00000000000..037994727fc
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/dense_single_reduce_function.h
@@ -0,0 +1,31 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::tensor {
+
+/**
+ * Tensor function reducing a single dimension of a dense
+ * tensor where the result is also a dense tensor.
+ **/
+class DenseSingleReduceFunction : public eval::tensor_function::Op1
+{
+private:
+ size_t _dim_idx;
+ eval::Aggr _aggr;
+
+public:
+ DenseSingleReduceFunction(const eval::ValueType &result_type,
+ const eval::TensorFunction &child,
+ size_t dim_idx, eval::Aggr aggr);
+ ~DenseSingleReduceFunction() override;
+ size_t dim_idx() const { return _dim_idx; }
+ eval::Aggr aggr() const { return _aggr; }
+ bool result_is_mutable() const override { return true; }
+ eval::InterpretedFunction::Instruction compile_self(const eval::TensorEngine &engine, Stash &stash) const override;
+ static const eval::TensorFunction &optimize(const eval::TensorFunction &expr, Stash &stash);
+};
+
+} // namespace vespalib::tensor