summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
Diffstat (limited to 'eval')
-rw-r--r--eval/src/apps/tensor_conformance/generate.cpp2
-rw-r--r--eval/src/tests/eval/aggr/aggr_test.cpp36
-rw-r--r--eval/src/vespa/eval/eval/aggr.cpp16
-rw-r--r--eval/src/vespa/eval/eval/aggr.h53
-rw-r--r--eval/src/vespa/eval/eval/test/eval_spec.cpp1
-rw-r--r--eval/src/vespa/eval/eval/test/tensor_conformance.cpp1
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_remove_dimension_optimizer.cpp9
7 files changed, 98 insertions, 20 deletions
diff --git a/eval/src/apps/tensor_conformance/generate.cpp b/eval/src/apps/tensor_conformance/generate.cpp
index df1c06593cb..ea535758cda 100644
--- a/eval/src/apps/tensor_conformance/generate.cpp
+++ b/eval/src/apps/tensor_conformance/generate.cpp
@@ -47,6 +47,8 @@ void generate_tensor_reduce(TestBuilder &dst) {
generate_reduce(Aggr::PROD, SigmoidF(N()), dst);
generate_reduce(Aggr::SUM, N(), dst);
generate_reduce(Aggr::MAX, N(), dst);
+ // add MEDIAN cases when supported in Java
+ // generate_reduce(Aggr::MEDIAN, N(), dst);
generate_reduce(Aggr::MIN, N(), dst);
}
diff --git a/eval/src/tests/eval/aggr/aggr_test.cpp b/eval/src/tests/eval/aggr/aggr_test.cpp
index 603fdb508e2..b3e9c625fd9 100644
--- a/eval/src/tests/eval/aggr/aggr_test.cpp
+++ b/eval/src/tests/eval/aggr/aggr_test.cpp
@@ -9,13 +9,14 @@ using namespace vespalib::eval::aggr;
TEST("require that aggregator list returns appropriate entries") {
auto list = Aggregator::list();
- ASSERT_EQUAL(list.size(), 6u);
+ ASSERT_EQUAL(list.size(), 7u);
EXPECT_EQUAL(int(list[0]), int(Aggr::AVG));
EXPECT_EQUAL(int(list[1]), int(Aggr::COUNT));
EXPECT_EQUAL(int(list[2]), int(Aggr::PROD));
EXPECT_EQUAL(int(list[3]), int(Aggr::SUM));
EXPECT_EQUAL(int(list[4]), int(Aggr::MAX));
- EXPECT_EQUAL(int(list[5]), int(Aggr::MIN));
+ EXPECT_EQUAL(int(list[5]), int(Aggr::MEDIAN));
+ EXPECT_EQUAL(int(list[6]), int(Aggr::MIN));
}
TEST("require that AVG aggregator works as expected") {
@@ -73,6 +74,31 @@ TEST("require that MAX aggregator works as expected") {
aggr.next(200.0), EXPECT_EQUAL(aggr.result(), 200.0);
}
+TEST("require that MEDIAN aggregator works as expected") {
+ Stash stash;
+ Aggregator &aggr = Aggregator::create(Aggr::MEDIAN, stash);
+ EXPECT_TRUE(std::isnan(aggr.result()));
+ aggr.first(10.0), EXPECT_EQUAL(aggr.result(), 10.0);
+ aggr.next(20.0), EXPECT_EQUAL(aggr.result(), 15.0);
+ aggr.next(7.0), EXPECT_EQUAL(aggr.result(), 10.0);
+ aggr.next(40.0), EXPECT_EQUAL(aggr.result(), 15.0);
+ aggr.next(16.0), EXPECT_EQUAL(aggr.result(), 16.0);
+ aggr.first(100.0), EXPECT_EQUAL(aggr.result(), 100.0);
+ aggr.next(200.0), EXPECT_EQUAL(aggr.result(), 150.0);
+}
+
+TEST("require that MEDIAN aggregator handles NaN values") {
+ Stash stash;
+ Aggregator &aggr = Aggregator::create(Aggr::MEDIAN, stash);
+ double my_nan = std::numeric_limits<double>::quiet_NaN();
+ aggr.first(10.0);
+ EXPECT_EQUAL(aggr.result(), 10.0);
+ aggr.next(my_nan);
+ EXPECT_TRUE(std::isnan(aggr.result()));
+ aggr.next(20.0);
+ EXPECT_TRUE(std::isnan(aggr.result()));
+}
+
TEST("require that MIN aggregator works as expected") {
Stash stash;
Aggregator &aggr = Aggregator::create(Aggr::MIN, stash);
@@ -103,11 +129,17 @@ float aggr_merge(const std::vector<float> &a, const std::vector<float> &b) {
}
TEST("require that aggregator merge works") {
+ float my_nan = std::numeric_limits<float>::quiet_NaN();
EXPECT_EQUAL(aggr_merge<Avg>({1,2},{3,4}), 2.5);
EXPECT_EQUAL(aggr_merge<Count>({1,2},{3,4}), 4.0);
EXPECT_EQUAL(aggr_merge<Prod>({1,2},{3,4}), 24.0);
EXPECT_EQUAL(aggr_merge<Sum>({1,2},{3,4}), 10.0);
EXPECT_EQUAL(aggr_merge<Max>({1,2},{3,4}), 4.0);
+ EXPECT_EQUAL(aggr_merge<Median>({1,2},{3,4}), 2.5);
+ EXPECT_EQUAL(aggr_merge<Median>({1,2},{3,4,5}), 3);
+ EXPECT_EQUAL(aggr_merge<Median>({0,1,2},{3,4}), 2);
+ EXPECT_TRUE(std::isnan(aggr_merge<Median>({1,2,my_nan,3},{4,5})));
+ EXPECT_TRUE(std::isnan(aggr_merge<Median>({1,2,3},{4,my_nan,5})));
EXPECT_EQUAL(aggr_merge<Min>({1,2},{3,4}), 1.0);
}
diff --git a/eval/src/vespa/eval/eval/aggr.cpp b/eval/src/vespa/eval/eval/aggr.cpp
index e731c7a1f09..4abd5e41f47 100644
--- a/eval/src/vespa/eval/eval/aggr.cpp
+++ b/eval/src/vespa/eval/eval/aggr.cpp
@@ -34,12 +34,13 @@ AggrNames::AggrNames()
: _name_aggr_map(),
_aggr_name_map()
{
- add(Aggr::AVG, "avg");
- add(Aggr::COUNT, "count");
- add(Aggr::PROD, "prod");
- add(Aggr::SUM, "sum");
- add(Aggr::MAX, "max");
- add(Aggr::MIN, "min");
+ add(Aggr::AVG, "avg");
+ add(Aggr::COUNT, "count");
+ add(Aggr::PROD, "prod");
+ add(Aggr::SUM, "sum");
+ add(Aggr::MAX, "max");
+ add(Aggr::MEDIAN, "median");
+ add(Aggr::MIN, "min");
}
const vespalib::string *
@@ -82,7 +83,8 @@ std::vector<Aggr>
Aggregator::list()
{
return std::vector<Aggr>({ Aggr::AVG, Aggr::COUNT, Aggr::PROD,
- Aggr::SUM, Aggr::MAX, Aggr::MIN });
+ Aggr::SUM, Aggr::MAX, Aggr::MEDIAN,
+ Aggr::MIN });
}
} // namespace vespalib::eval
diff --git a/eval/src/vespa/eval/eval/aggr.h b/eval/src/vespa/eval/eval/aggr.h
index 050287d183c..f52c029eee5 100644
--- a/eval/src/vespa/eval/eval/aggr.h
+++ b/eval/src/vespa/eval/eval/aggr.h
@@ -7,6 +7,8 @@
#include <limits>
#include <vector>
#include <map>
+#include <algorithm>
+#include <cmath>
namespace vespalib {
@@ -20,7 +22,7 @@ struct BinaryOperation;
* Enumeration of all different aggregators that are allowed to be
* used in tensor reduce expressions.
**/
-enum class Aggr { AVG, COUNT, PROD, SUM, MAX, MIN };
+enum class Aggr { AVG, COUNT, PROD, SUM, MAX, MEDIAN, MIN };
/**
* Utiliy class used to map between aggregator enum value and symbolic
@@ -120,6 +122,42 @@ public:
constexpr T result() const { return _max; }
};
+template <typename T> class Median {
+private:
+ std::vector<T> _seen;
+public:
+ constexpr Median() : _seen() {}
+ constexpr Median(T value) : _seen({value}) {}
+ constexpr void sample(T value) { _seen.push_back(value); }
+ constexpr void merge(const Median &rhs) {
+ for (T value: rhs._seen) {
+ _seen.push_back(value);
+ }
+ };
+ constexpr T result() const {
+ if (_seen.empty()) {
+ return std::numeric_limits<T>::quiet_NaN();
+ }
+ std::vector<T> tmp;
+ tmp.reserve(_seen.size());
+ for (T value: _seen) {
+ if (!std::isnan(value)) {
+ tmp.push_back(value);
+ } else {
+ return std::numeric_limits<T>::quiet_NaN();
+ }
+ }
+ size_t n = (tmp.size() / 2);
+ std::nth_element(tmp.begin(), tmp.begin() + n, tmp.end());
+ T result = tmp[n]; // the nth element
+ if ((tmp.size() % 2) == 0) {
+ result += *std::max_element(tmp.begin(), tmp.begin() + n);
+ result /= T{2};
+ }
+ return result;
+ }
+};
+
template <typename T> class Min {
private:
T _min;
@@ -137,12 +175,13 @@ struct TypifyAggr {
template <template<typename> typename TT> using Result = TypifyResultSimpleTemplate<TT>;
template <typename F> static decltype(auto) resolve(Aggr aggr, F &&f) {
switch (aggr) {
- case Aggr::AVG: return f(Result<aggr::Avg>());
- case Aggr::COUNT: return f(Result<aggr::Count>());
- case Aggr::PROD: return f(Result<aggr::Prod>());
- case Aggr::SUM: return f(Result<aggr::Sum>());
- case Aggr::MAX: return f(Result<aggr::Max>());
- case Aggr::MIN: return f(Result<aggr::Min>());
+ case Aggr::AVG: return f(Result<aggr::Avg>());
+ case Aggr::COUNT: return f(Result<aggr::Count>());
+ case Aggr::PROD: return f(Result<aggr::Prod>());
+ case Aggr::SUM: return f(Result<aggr::Sum>());
+ case Aggr::MAX: return f(Result<aggr::Max>());
+ case Aggr::MEDIAN: return f(Result<aggr::Median>());
+ case Aggr::MIN: return f(Result<aggr::Min>());
}
abort();
}
diff --git a/eval/src/vespa/eval/eval/test/eval_spec.cpp b/eval/src/vespa/eval/eval/test/eval_spec.cpp
index b1dfa6d3c9c..6b80b65df6c 100644
--- a/eval/src/vespa/eval/eval/test/eval_spec.cpp
+++ b/eval/src/vespa/eval/eval/test/eval_spec.cpp
@@ -173,6 +173,7 @@ EvalSpec::add_tensor_operation_cases() {
add_rule({"a", -1.0, 1.0}, "reduce(a,prod)", [](double a){ return a; });
add_rule({"a", -1.0, 1.0}, "reduce(a,sum)", [](double a){ return a; });
add_rule({"a", -1.0, 1.0}, "reduce(a,max)", [](double a){ return a; });
+ add_rule({"a", -1.0, 1.0}, "reduce(a,median)", [](double a){ return a; });
add_rule({"a", -1.0, 1.0}, "reduce(a,min)", [](double a){ return a; });
add_expression({"a"}, "rename(a,x,y)");
add_expression({"a"}, "rename(a,(x,y),(y,x))");
diff --git a/eval/src/vespa/eval/eval/test/tensor_conformance.cpp b/eval/src/vespa/eval/eval/test/tensor_conformance.cpp
index 0e703e81073..701595920ac 100644
--- a/eval/src/vespa/eval/eval/test/tensor_conformance.cpp
+++ b/eval/src/vespa/eval/eval/test/tensor_conformance.cpp
@@ -358,6 +358,7 @@ struct TestContext {
TEST_DO(test_reduce_op(Aggr::PROD, SigmoidF(N())));
TEST_DO(test_reduce_op(Aggr::SUM, N()));
TEST_DO(test_reduce_op(Aggr::MAX, N()));
+ TEST_DO(test_reduce_op(Aggr::MEDIAN, N()));
TEST_DO(test_reduce_op(Aggr::MIN, N()));
}
diff --git a/eval/src/vespa/eval/tensor/dense/dense_remove_dimension_optimizer.cpp b/eval/src/vespa/eval/tensor/dense/dense_remove_dimension_optimizer.cpp
index a64d5edbb37..0cecd588317 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_remove_dimension_optimizer.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_remove_dimension_optimizer.cpp
@@ -15,10 +15,11 @@ using namespace eval::tensor_function;
namespace {
bool is_ident_aggr(Aggr aggr) {
- return ((aggr == Aggr::AVG) ||
- (aggr == Aggr::PROD) ||
- (aggr == Aggr::SUM) ||
- (aggr == Aggr::MAX) ||
+ return ((aggr == Aggr::AVG) ||
+ (aggr == Aggr::PROD) ||
+ (aggr == Aggr::SUM) ||
+ (aggr == Aggr::MAX) ||
+ (aggr == Aggr::MEDIAN) ||
(aggr == Aggr::MIN));
}