move reference operations to a common place

* lift reference implementations from unit tests * write a reference TensorCreate implementation * use these from unit tests * some minor cosmetic changes with structured bindings
author: Arne Juul <arnej@verizonmedia.com> 2020-11-25 08:09:53 +0000
committer: Arne Juul <arnej@verizonmedia.com> 2020-11-25 10:05:14 +0000
commit: b8387e5455c63a39ae62795f2484217ad7480b12 (patch)
tree: 22f21f3015c773d8d57c7fa6eb35951ecb47af92 /eval
parent: e5a873f35ba72592c747e4c6eb9e7eafb0a0b462 (diff)
11 files changed, 389 insertions, 207 deletions
diff --git a/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp b/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp
index aaea8fdcb28..c59d9783648 100644
--- a/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp
+++ b/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp
@@ -8,6 +8,7 @@
 #include <vespa/eval/eval/value_codec.h>
 #include <vespa/eval/instruction/generic_concat.h>
 #include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/test/reference_operations.h>
 #include <vespa/eval/eval/test/tensor_model.hpp>
 #include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/gtest/gtest.h>
@@ -64,63 +65,6 @@ TensorSpec perform_simpletensor_concat(const TensorSpec &a, const TensorSpec &b,
     return SimpleTensorEngine::ref().to_spec(*out);
 }
 
-bool concat_address(const TensorSpec::Address &me, const TensorSpec::Address &other,
-                    const std::string &concat_dim, size_t my_offset,
-                    TensorSpec::Address &my_out, TensorSpec::Address &other_out)
-{
-    my_out.insert_or_assign(concat_dim, my_offset);
-    for (const auto &my_dim: me) {
-        const auto & name = my_dim.first;
-        const auto & label = my_dim.second;
-        if (name == concat_dim) {
-            my_out.insert_or_assign(name, label.index + my_offset);
-        } else {
-            auto pos = other.find(name);
-            if ((pos == other.end()) || (pos->second == label)) {
-                my_out.insert_or_assign(name, label);
-                other_out.insert_or_assign(name, label);
-            } else {
-                return false;
-            }
-        }
-    }
-    return true;
-}
-
-bool concat_addresses(const TensorSpec::Address &a, const TensorSpec::Address &b,
-                      const std::string &concat_dim, size_t b_offset,
-                      TensorSpec::Address &a_out, TensorSpec::Address &b_out)
-{
-    return concat_address(a, b, concat_dim,        0, a_out, b_out) &&
-           concat_address(b, a, concat_dim, b_offset, b_out, a_out);
-}
-
-TensorSpec reference_concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim) {
-    ValueType a_type = ValueType::from_spec(a.type());
-    ValueType b_type = ValueType::from_spec(b.type());
-    ValueType res_type = ValueType::concat(a_type, b_type, concat_dim);
-    EXPECT_FALSE(res_type.is_error());
-    size_t b_offset = 1;
-    size_t concat_dim_index = a_type.dimension_index(concat_dim);
-    if (concat_dim_index != ValueType::Dimension::npos) {
-        const auto &dim = a_type.dimensions()[concat_dim_index];
-        EXPECT_TRUE(dim.is_indexed());
-        b_offset = dim.size;
-    }
-    TensorSpec result(res_type.to_spec());
-    for (const auto &cell_a: a.cells()) {
-        for (const auto &cell_b: b.cells()) {
-            TensorSpec::Address addr_a;
-            TensorSpec::Address addr_b;
-            if (concat_addresses(cell_a.first, cell_b.first, concat_dim, b_offset, addr_a, addr_b)) {
-                result.add(addr_a, cell_a.second);
-                result.add(addr_b, cell_b.second);
-            }
-        }
-    }
-    return result;
-}
-
 TensorSpec perform_generic_concat(const TensorSpec &a, const TensorSpec &b,
                                   const std::string &concat_dim, const ValueBuilderFactory &factory)
 {
@@ -138,7 +82,7 @@ TEST(GenericConcatTest, generic_reference_concat_works) {
         const TensorSpec lhs = spec(concat_layouts[i], N());
         const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N()));
         SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
-        auto actual = reference_concat(lhs, rhs, "y");
+        auto actual = ReferenceOperations::concat(lhs, rhs, "y");
         auto expect = perform_simpletensor_concat(lhs, rhs, "y");
         EXPECT_EQ(actual, expect);
     }
@@ -151,7 +95,7 @@ void test_generic_concat_with(const ValueBuilderFactory &factory) {
         const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N()));
         SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
         auto actual = perform_generic_concat(lhs, rhs, "y", factory);
-        auto expect = reference_concat(lhs, rhs, "y");
+        auto expect = ReferenceOperations::concat(lhs, rhs, "y");
         EXPECT_EQ(actual, expect);
     }
 }
@@ -202,7 +146,7 @@ TEST(GenericConcatTest, immediate_generic_concat_works) {
         const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N()));
         SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
         auto actual = immediate_generic_concat(lhs, rhs, "y");
-        auto expect = reference_concat(lhs, rhs, "y");
+        auto expect = ReferenceOperations::concat(lhs, rhs, "y");
         EXPECT_EQ(actual, expect);
     }
 }
diff --git a/eval/src/tests/instruction/generic_create/generic_create_test.cpp b/eval/src/tests/instruction/generic_create/generic_create_test.cpp
index e07db870ad2..08534f32d3c 100644
--- a/eval/src/tests/instruction/generic_create/generic_create_test.cpp
+++ b/eval/src/tests/instruction/generic_create/generic_create_test.cpp
@@ -5,6 +5,7 @@
 #include <vespa/eval/eval/value_codec.h>
 #include <vespa/eval/instruction/generic_create.h>
 #include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/test/reference_operations.h>
 #include <vespa/eval/eval/test/tensor_model.hpp>
 #include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/gtest/gtest.h>
@@ -53,6 +54,19 @@ bool operator< (const NumberedCellSpec &a, const NumberedCellSpec &b) {
     return a.num < b.num;
 }
 
+TensorSpec reference_create(const TensorSpec &a) {
+    std::vector<TensorSpec> children;
+    ReferenceOperations::CreateSpec spec;
+    for (const auto & [addr, value] : a.cells()) {
+        size_t child_idx = children.size();
+        spec.emplace(addr, child_idx);
+        TensorSpec child("double");
+        child.add({}, value);
+        children.push_back(child);
+    }
+    return ReferenceOperations::create(a.type(), spec, children);
+}
+
 TensorSpec perform_generic_create(const TensorSpec &a, const ValueBuilderFactory &factory)
 {
     ValueType res_type = ValueType::from_spec(a.type());
@@ -80,12 +94,13 @@ void test_generic_create_with(const ValueBuilderFactory &factory) {
     for (const auto & layout : create_layouts) {
         TensorSpec full = spec(layout, N());
         auto actual = perform_generic_create(full, factory);
-        EXPECT_EQ(actual, full);
+        auto expect = reference_create(full);
+        EXPECT_EQ(actual, expect);
         for (size_t n : {2, 3, 4, 5}) {
             TensorSpec partial = remove_each(full, n);
             actual = perform_generic_create(partial, factory);
-            auto filled = spec_from_value(*value_from_spec(partial, SimpleValueBuilderFactory::get()));
-            EXPECT_EQ(actual, filled);
+            expect = reference_create(partial);
+            EXPECT_EQ(actual, expect);
         }
     }
 }
diff --git a/eval/src/tests/instruction/generic_join/generic_join_test.cpp b/eval/src/tests/instruction/generic_join/generic_join_test.cpp
index 558f20d2e10..a81294c8d25 100644
--- a/eval/src/tests/instruction/generic_join/generic_join_test.cpp
+++ b/eval/src/tests/instruction/generic_join/generic_join_test.cpp
@@ -5,6 +5,7 @@
 #include <vespa/eval/eval/value_codec.h>
 #include <vespa/eval/instruction/generic_join.h>
 #include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/test/reference_operations.h>
 #include <vespa/eval/eval/test/tensor_model.hpp>
 #include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/gtest/gtest.h>
@@ -53,23 +54,6 @@ bool join_address(const TensorSpec::Address &a, const TensorSpec::Address &b, Te
     return true;
 }
 
-TensorSpec reference_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) {
-    ValueType res_type = ValueType::join(ValueType::from_spec(a.type()), ValueType::from_spec(b.type()));
-    EXPECT_FALSE(res_type.is_error());
-    TensorSpec result(res_type.to_spec());
-    for (const auto &cell_a: a.cells()) {
-        for (const auto &cell_b: b.cells()) {
-            TensorSpec::Address addr;
-            if (join_address(cell_a.first, cell_b.first, addr) &&
-                join_address(cell_b.first, cell_a.first, addr))
-            {
-                result.add(addr, function(cell_a.second, cell_b.second));
-            }
-        }
-    }
-    return result;
-}
-
 TensorSpec perform_generic_join(const TensorSpec &a, const TensorSpec &b,
                                 join_fun_t function, const ValueBuilderFactory &factory)
 {
@@ -130,7 +114,7 @@ TEST(GenericJoinTest, generic_join_works_for_simple_and_fast_values) {
         TensorSpec rhs = spec(join_layouts[i + 1], Div16(N()));
         for (auto fun: {operation::Add::f, operation::Sub::f, operation::Mul::f, operation::Div::f}) {
             SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
-            auto expect = reference_join(lhs, rhs, fun);
+            auto expect = ReferenceOperations::join(lhs, rhs, fun);
             auto simple = perform_generic_join(lhs, rhs, fun, SimpleValueBuilderFactory::get());
             auto fast = perform_generic_join(lhs, rhs, fun, FastValueBuilderFactory::get());
             EXPECT_EQ(simple, expect);
@@ -154,7 +138,7 @@ TEST(GenericJoinTest, immediate_generic_join_works) {
         TensorSpec rhs = spec(join_layouts[i + 1], Div16(N()));
         for (auto fun: {operation::Add::f, operation::Sub::f, operation::Mul::f, operation::Div::f}) {
             SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
-            auto expect = reference_join(lhs, rhs, fun);
+            auto expect = ReferenceOperations::join(lhs, rhs, fun);
             auto actual = immediate_generic_join(lhs, rhs, fun);
             EXPECT_EQ(actual, expect);
         }
diff --git a/eval/src/tests/instruction/generic_map/generic_map_test.cpp b/eval/src/tests/instruction/generic_map/generic_map_test.cpp
index 63a9563a11b..ba6a1630777 100644
--- a/eval/src/tests/instruction/generic_map/generic_map_test.cpp
+++ b/eval/src/tests/instruction/generic_map/generic_map_test.cpp
@@ -5,6 +5,7 @@
 #include <vespa/eval/eval/value_codec.h>
 #include <vespa/eval/instruction/generic_map.h>
 #include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/test/reference_operations.h>
 #include <vespa/eval/eval/test/tensor_model.hpp>
 #include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/gtest/gtest.h>
@@ -30,16 +31,6 @@ std::vector<Layout> map_layouts = {
     float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})})
 };
 
-TensorSpec reference_map(const TensorSpec &a, map_fun_t func) {
-    ValueType res_type = ValueType::from_spec(a.type());
-    EXPECT_FALSE(res_type.is_error());
-    TensorSpec result(res_type.to_spec());
-    for (const auto &cell: a.cells()) {
-        result.add(cell.first, func(cell.second));
-    }
-    return result;
-}
-
 TensorSpec perform_generic_map(const TensorSpec &a, map_fun_t func, const ValueBuilderFactory &factory)
 {
     auto lhs = value_from_spec(a, factory);
@@ -54,7 +45,7 @@ void test_generic_map_with(const ValueBuilderFactory &factory) {
         ValueType lhs_type = ValueType::from_spec(lhs.type());
         for (auto func : {operation::Floor::f, operation::Fabs::f, operation::Square::f, operation::Inv::f}) {
             SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str()));
-            auto expect = reference_map(lhs, func);
+            auto expect = ReferenceOperations::map(lhs, func);
             auto actual = perform_generic_map(lhs, func, factory);
             EXPECT_EQ(actual, expect);
         }
@@ -82,7 +73,7 @@ TEST(GenericMapTest, immediate_generic_map_works) {
         ValueType lhs_type = ValueType::from_spec(lhs.type());
         for (auto func : {operation::Floor::f, operation::Fabs::f, operation::Square::f, operation::Inv::f}) {
             SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str()));
-            auto expect = reference_map(lhs, func);
+            auto expect = ReferenceOperations::map(lhs, func);
             auto actual = immediate_generic_map(lhs, func, SimpleValueBuilderFactory::get());
             EXPECT_EQ(actual, expect);
         }
diff --git a/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp b/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp
index 5166ef6ccc9..a43169a6959 100644
--- a/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp
+++ b/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp
@@ -5,6 +5,7 @@
 #include <vespa/eval/eval/value_codec.h>
 #include <vespa/eval/instruction/generic_merge.h>
 #include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/test/reference_operations.h>
 #include <vespa/eval/eval/test/tensor_model.hpp>
 #include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/gtest/gtest.h>
@@ -33,29 +34,6 @@ std::vector<Layout> merge_layouts = {
     {x({"a","b","c"}),y(5)},                            {x({"b","c","d"}),y(5)}
 };
 
-
-TensorSpec reference_merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) {
-    ValueType res_type = ValueType::merge(ValueType::from_spec(a.type()),
-                                          ValueType::from_spec(b.type()));
-    EXPECT_FALSE(res_type.is_error());
-    TensorSpec result(res_type.to_spec());
-    for (const auto &cell: a.cells()) {
-        auto other = b.cells().find(cell.first);
-        if (other == b.cells().end()) {
-            result.add(cell.first, cell.second);
-        } else {
-            result.add(cell.first, fun(cell.second, other->second));
-        }
-    }
-    for (const auto &cell: b.cells()) {
-        auto other = a.cells().find(cell.first);
-        if (other == a.cells().end()) {
-            result.add(cell.first, cell.second);
-        }
-    }
-    return result;
-}
-
 TensorSpec perform_generic_merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun, const ValueBuilderFactory &factory) {
     Stash stash;
     auto lhs = value_from_spec(a, factory);
@@ -72,7 +50,7 @@ void test_generic_merge_with(const ValueBuilderFactory &factory) {
         TensorSpec rhs = spec(merge_layouts[i + 1], Div16(N()));
         SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
         for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f, operation::Max::f}) {
-            auto expect = reference_merge(lhs, rhs, fun);
+            auto expect = ReferenceOperations::merge(lhs, rhs, fun);
             auto actual = perform_generic_merge(lhs, rhs, fun, factory);
             EXPECT_EQ(actual, expect);
         }
@@ -102,7 +80,7 @@ TEST(GenericMergeTest, immediate_generic_merge_works) {
         TensorSpec rhs = spec(merge_layouts[i + 1], Div16(N()));
         SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
         for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f, operation::Max::f}) {
-            auto expect = reference_merge(lhs, rhs, fun);
+            auto expect = ReferenceOperations::merge(lhs, rhs, fun);
             auto actual = immediate_generic_merge(lhs, rhs, fun);
             EXPECT_EQ(actual, expect);
         }
diff --git a/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp b/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp
index ef78d0cde68..ca7d4ea1746 100644
--- a/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp
+++ b/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp
@@ -6,6 +6,7 @@
 #include <vespa/eval/eval/value_codec.h>
 #include <vespa/eval/instruction/generic_peek.h>
 #include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/test/reference_operations.h>
 #include <vespa/eval/eval/test/tensor_model.hpp>
 #include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/util/overload.h>
@@ -36,55 +37,31 @@ std::vector<Layout> peek_layouts = {
 
 using PeekSpec = GenericPeek::SpecMap;
 
-TensorSpec reference_peek(const TensorSpec &param, const vespalib::string &result_type, const PeekSpec &spec) {
-    TensorSpec result(result_type);
-    ValueType param_type = ValueType::from_spec(param.type());
-    auto is_mapped_dim = [&](const vespalib::string &name) {
-        size_t dim_idx = param_type.dimension_index(name);
-        assert(dim_idx != ValueType::Dimension::npos);
-        const auto &param_dim = param_type.dimensions()[dim_idx];
-        return param_dim.is_mapped();
-    };
-    TensorSpec::Address addr;
+TensorSpec reference_peek(const TensorSpec &param, const PeekSpec &spec) {
+    std::vector<TensorSpec> children;
+    PeekSpec with_indexes;
     for (const auto & [dim_name, label_or_child] : spec) {
+        const vespalib::string &dim = dim_name;
         std::visit(vespalib::overload
                    {
-                       [&,&dim_name = dim_name](const TensorSpec::Label &label) {
-                           addr.emplace(dim_name, label);
+                       [&](const TensorSpec::Label &label) {
+                           with_indexes.emplace(dim, label);
                        },
-                       [&,&dim_name = dim_name](const size_t &child_value) {
+                       [&](const size_t &child_value) {
                            // here, label_or_child is a size_t specifying the value
                            // we pretend a child produced
-                           if (is_mapped_dim(dim_name)) {
-                               // (but cast to signed first, to allow labels like the string "-2")
-                               addr.emplace(dim_name, vespalib::make_string("%zd", ssize_t(child_value)));
-                           } else {
-                               addr.emplace(dim_name, child_value);
-                           }
+                           size_t child_idx = children.size();
+                           TensorSpec child("double");
+                           // (but cast to signed first, to allow labels like the string "-2")
+                           child.add({}, ssize_t(child_value));
+                           children.push_back(child);
+                           with_indexes.emplace(dim, child_idx);
                        }
                    }, label_or_child);
     }
-    for (const auto &cell: param.cells()) {
-        bool keep = true;
-        TensorSpec::Address my_addr;
-        for (const auto &binding: cell.first) {
-            auto pos = addr.find(binding.first);
-            if (pos == addr.end()) {
-                my_addr.emplace(binding.first, binding.second);
-            } else {
-                if (!(pos->second == binding.second)) {
-                    keep = false;
-                }
-            }
-        }
-        if (keep) {
-            result.add(my_addr, cell.second);
-        }
-    }
-    return spec_from_value(*value_from_spec(result, SimpleValueBuilderFactory::get()));
+    return ReferenceOperations::peek(param, with_indexes, children);
 }
 
-
 TensorSpec perform_generic_peek(const TensorSpec &a, const ValueType &result_type,
                                 PeekSpec spec, const ValueBuilderFactory &factory)
 {
@@ -174,7 +151,7 @@ void verify_peek_equal(const TensorSpec &input,
     }
     if (reduce_dims.empty()) return;
     ValueType result_type = param_type.reduce(reduce_dims);
-    auto expect = reference_peek(input, result_type.to_spec(), spec);
+    auto expect = reference_peek(input, spec);
     SCOPED_TRACE(fmt("peek input: %s\n  peek spec: %s\n  peek result %s\n",
                      input.to_string().c_str(),
                      to_str(spec).c_str(),
diff --git a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp
index d894d273f02..beac7a9df3d 100644
--- a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp
+++ b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp
@@ -5,6 +5,7 @@
 #include <vespa/eval/eval/value_codec.h>
 #include <vespa/eval/instruction/generic_reduce.h>
 #include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/test/reference_operations.h>
 #include <vespa/eval/eval/test/tensor_model.hpp>
 #include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/gtest/gtest.h>
@@ -34,35 +35,6 @@ std::vector<Layout> layouts = {
     float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})})
 };
 
-TensorSpec reference_reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr) {
-    Stash stash;
-    ValueType res_type = ValueType::from_spec(a.type()).reduce(dims);
-    EXPECT_FALSE(res_type.is_error());
-    std::map<TensorSpec::Address,std::optional<Aggregator*>> my_map;
-    for (const auto &cell: a.cells()) {
-        TensorSpec::Address addr;
-        for (const auto &dim: cell.first) {
-            if (res_type.dimension_index(dim.first) != ValueType::Dimension::npos) {
-                addr.insert_or_assign(dim.first, dim.second);
-            }
-        }
-        auto [pos, is_empty] = my_map.emplace(addr, std::nullopt);
-        if (is_empty) {
-            pos->second = &Aggregator::create(aggr, stash);
-            pos->second.value()->first(cell.second);
-        } else {
-            pos->second.value()->next(cell.second);
-        }
-    }
-    TensorSpec result(res_type.to_spec());
-    for (const auto &my_entry: my_map) {
-        result.add(my_entry.first, my_entry.second.value()->result());
-    }
-    // use SimpleValue to add implicit cells with default value
-    const auto &factory = SimpleValueBuilderFactory::get();
-    return spec_from_value(*value_from_spec(result, factory));
-}
-
 TensorSpec perform_generic_reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims,
                                   Aggr aggr, const ValueBuilderFactory &factory)
 {
@@ -99,11 +71,11 @@ void test_generic_reduce_with(const ValueBuilderFactory &factory) {
         TensorSpec input = spec(layout, Div16(N()));
         for (Aggr aggr: {Aggr::SUM, Aggr::AVG, Aggr::MIN, Aggr::MAX}) {
             for (const Domain &domain: layout) {
-                auto expect = reference_reduce(input, {domain.dimension}, aggr);
+                auto expect = ReferenceOperations::reduce(input, {domain.dimension}, aggr);
                 auto actual = perform_generic_reduce(input, {domain.dimension}, aggr, factory);
                 EXPECT_EQ(actual, expect);
             }
-            auto expect = reference_reduce(input, {}, aggr);
+            auto expect = ReferenceOperations::reduce(input, {}, aggr);
             auto actual = perform_generic_reduce(input, {}, aggr, factory);
             EXPECT_EQ(actual, expect);
         }
@@ -130,11 +102,11 @@ TEST(GenericReduceTest, immediate_generic_reduce_works) {
         TensorSpec input = spec(layout, Div16(N()));
         for (Aggr aggr: {Aggr::SUM, Aggr::AVG, Aggr::MIN, Aggr::MAX}) {
             for (const Domain &domain: layout) {
-                auto expect = reference_reduce(input, {domain.dimension}, aggr);
+                auto expect = ReferenceOperations::reduce(input, {domain.dimension}, aggr);
                 auto actual = immediate_generic_reduce(input, {domain.dimension}, aggr);
                 EXPECT_EQ(actual, expect);
             }
-            auto expect = reference_reduce(input, {}, aggr);
+            auto expect = ReferenceOperations::reduce(input, {}, aggr);
             auto actual = immediate_generic_reduce(input, {}, aggr);
             EXPECT_EQ(actual, expect);
         }
diff --git a/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp b/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp
index b2e30a8b78c..a7e6b8d807b 100644
--- a/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp
+++ b/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp
@@ -6,6 +6,7 @@
 #include <vespa/eval/instruction/generic_rename.h>
 #include <vespa/eval/eval/interpreted_function.h>
 #include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/eval/eval/test/reference_operations.h>
 #include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/gtest/gtest.h>
 
@@ -98,20 +99,6 @@ vespalib::string rename_dimension(const vespalib::string &name, const FromTo &ft
     return name;
 }
 
-TensorSpec reference_rename(const TensorSpec &a, const FromTo &ft) {
-    ValueType res_type = ValueType::from_spec(a.type()).rename(ft.from, ft.to);
-    EXPECT_FALSE(res_type.is_error());
-    TensorSpec result(res_type.to_spec());
-    for (const auto &cell: a.cells()) {
-        TensorSpec::Address addr;
-        for (const auto &dim: cell.first) {
-            addr.insert_or_assign(rename_dimension(dim.first, ft), dim.second);
-        }
-        result.add(addr, cell.second);
-    }
-    return result;
-}
-
 TensorSpec perform_generic_rename(const TensorSpec &a,
                                   const FromTo &ft, const ValueBuilderFactory &factory)
 {
@@ -132,7 +119,7 @@ void test_generic_rename_with(const ValueBuilderFactory &factory) {
             if (renamed_type.is_error()) continue;
             // printf("type %s -> %s\n", lhs_type.to_spec().c_str(), renamed_type.to_spec().c_str());
             SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str()));
-            auto expect = reference_rename(lhs, from_to);
+            auto expect = ReferenceOperations::rename(lhs, from_to.from, from_to.to);
             auto actual = perform_generic_rename(lhs, from_to, factory);
             EXPECT_EQ(actual, expect);
         }
@@ -165,7 +152,7 @@ TEST(GenericRenameTest, immediate_generic_rename_works) {
             if (renamed_type.is_error()) continue;
             // printf("type %s -> %s\n", lhs_type.to_spec().c_str(), renamed_type.to_spec().c_str());
             SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str()));
-            auto expect = reference_rename(lhs, from_to);
+            auto expect = ReferenceOperations::rename(lhs, from_to.from, from_to.to);
             auto actual = immediate_generic_rename(lhs, from_to);
             EXPECT_EQ(actual, expect);
         }
diff --git a/eval/src/vespa/eval/eval/test/CMakeLists.txt b/eval/src/vespa/eval/eval/test/CMakeLists.txt
index 6e88beab9b7..f3b0750d503 100644
--- a/eval/src/vespa/eval/eval/test/CMakeLists.txt
+++ b/eval/src/vespa/eval/eval/test/CMakeLists.txt
@@ -3,6 +3,7 @@ vespa_add_library(eval_eval_test OBJECT
     SOURCES
     eval_fixture.cpp
     eval_spec.cpp
+    reference_operations.cpp
     tensor_conformance.cpp
     test_io.cpp
     value_compare.cpp
diff --git a/eval/src/vespa/eval/eval/test/reference_operations.cpp b/eval/src/vespa/eval/eval/test/reference_operations.cpp
new file mode 100644
index 00000000000..fdf7ba98f70
--- /dev/null
+++ b/eval/src/vespa/eval/eval/test/reference_operations.cpp
@@ -0,0 +1,295 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "reference_operations.h"
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/vespalib/util/overload.h>
+#include <vespa/vespalib/util/visit_ranges.h>
+#include <vespa/vespalib/util/stash.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <cassert>
+
+namespace vespalib::eval {
+
+namespace {
+
+bool concat_address(const TensorSpec::Address &me, const TensorSpec::Address &other,
+                    const std::string &concat_dim, size_t my_offset,
+                    TensorSpec::Address &my_out, TensorSpec::Address &other_out)
+{
+    my_out.insert_or_assign(concat_dim, my_offset);
+    for (const auto &my_dim: me) {
+        const auto & name = my_dim.first;
+        const auto & label = my_dim.second;
+        if (name == concat_dim) {
+            my_out.insert_or_assign(name, label.index + my_offset);
+        } else {
+            auto pos = other.find(name);
+            if ((pos == other.end()) || (pos->second == label)) {
+                my_out.insert_or_assign(name, label);
+                other_out.insert_or_assign(name, label);
+            } else {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+bool concat_addresses(const TensorSpec::Address &a, const TensorSpec::Address &b,
+                      const std::string &concat_dim, size_t b_offset,
+                      TensorSpec::Address &a_out, TensorSpec::Address &b_out)
+{
+    return concat_address(a, b, concat_dim,        0, a_out, b_out) &&
+           concat_address(b, a, concat_dim, b_offset, b_out, a_out);
+}
+
+double value_from_child(const TensorSpec &child) {
+    double sum = 0.0;
+    for (const auto & [addr, value] : child.cells()) {
+        sum += value;
+    }
+    return sum;
+}
+
+bool join_address(const TensorSpec::Address &a, const TensorSpec::Address &b, TensorSpec::Address &addr) {
+    for (const auto &dim_a: a) {
+        auto pos_b = b.find(dim_a.first);
+        if ((pos_b != b.end()) && !(pos_b->second == dim_a.second)) {
+            return false;
+        }
+        addr.insert_or_assign(dim_a.first, dim_a.second);
+    }
+    return true;
+}
+
+vespalib::string rename_dimension(const vespalib::string &name, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to) {
+    for (size_t i = 0; i < from.size(); ++i) {
+        if (name == from[i]) {
+            return to[i];
+        }
+    }
+    return name;
+}
+
+} // namespace <unnamed>
+
+
+TensorSpec ReferenceOperations::concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim) {
+    ValueType a_type = ValueType::from_spec(a.type());
+    ValueType b_type = ValueType::from_spec(b.type());
+    ValueType res_type = ValueType::concat(a_type, b_type, concat_dim);
+    TensorSpec result(res_type.to_spec());
+    if (res_type.is_error()) {
+        return result;
+    }
+    size_t b_offset = 1;
+    size_t concat_dim_index = a_type.dimension_index(concat_dim);
+    if (concat_dim_index != ValueType::Dimension::npos) {
+        const auto &dim = a_type.dimensions()[concat_dim_index];
+        assert(dim.is_indexed()); // type resolving (above) should catch this
+        b_offset = dim.size;
+    }
+    for (const auto &cell_a: a.cells()) {
+        for (const auto &cell_b: b.cells()) {
+            TensorSpec::Address addr_a;
+            TensorSpec::Address addr_b;
+            if (concat_addresses(cell_a.first, cell_b.first, concat_dim, b_offset, addr_a, addr_b)) {
+                result.add(addr_a, cell_a.second);
+                result.add(addr_b, cell_b.second);
+            }
+        }
+    }
+    return result;
+}
+
+
+TensorSpec ReferenceOperations::create(const vespalib::string &type, const CreateSpec &spec, const std::vector<TensorSpec> &children) {
+    TensorSpec result(type);
+    if (ValueType::from_spec(type).is_error()) {
+        return result;
+    }
+    for (const auto & [addr, child_idx] : spec) {
+        assert(child_idx < children.size());
+        const auto &child = children[child_idx];
+        double val = value_from_child(child);
+        result.add(addr, val);
+    }
+    // use SimpleValue to add implicit cells with default value
+    const auto &factory = SimpleValueBuilderFactory::get();
+    return spec_from_value(*value_from_spec(result, factory));
+}
+
+
+TensorSpec ReferenceOperations::join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) {
+    ValueType res_type = ValueType::join(ValueType::from_spec(a.type()), ValueType::from_spec(b.type()));
+    TensorSpec result(res_type.to_spec());
+    if (res_type.is_error()) {
+        return result;
+    }
+    for (const auto &cell_a: a.cells()) {
+        for (const auto &cell_b: b.cells()) {
+            TensorSpec::Address addr;
+            if (join_address(cell_a.first, cell_b.first, addr) &&
+                join_address(cell_b.first, cell_a.first, addr))
+            {
+                result.add(addr, function(cell_a.second, cell_b.second));
+            }
+        }
+    }
+    return result;
+}
+
+
+TensorSpec ReferenceOperations::map(const TensorSpec &a, map_fun_t func) {
+    ValueType res_type = ValueType::from_spec(a.type());
+    TensorSpec result(res_type.to_spec());
+    if (res_type.is_error()) {
+        return result;
+    }
+    for (const auto & [ addr, value ]: a.cells()) {
+        result.add(addr, func(value));
+    }
+    return result;
+}
+
+
+TensorSpec ReferenceOperations::merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) {
+    ValueType res_type = ValueType::merge(ValueType::from_spec(a.type()),
+                                          ValueType::from_spec(b.type()));
+    TensorSpec result(res_type.to_spec());
+    if (res_type.is_error()) {
+        return result;
+    }
+    for (const auto & [ addr, value ]: a.cells()) {
+        auto other = b.cells().find(addr);
+        if (other == b.cells().end()) {
+            result.add(addr, value);
+        } else {
+            result.add(addr, fun(value, other->second));
+        }
+    }
+    for (const auto & [ addr, value ]: b.cells()) {
+        auto other = a.cells().find(addr);
+        if (other == a.cells().end()) {
+            result.add(addr, value);
+        }
+    }
+    return result;
+}
+
+
+TensorSpec ReferenceOperations::peek(const TensorSpec &param, const PeekSpec &peek_spec, const std::vector<TensorSpec> &children) {
+    if (peek_spec.empty()) {
+        return TensorSpec(ValueType::error_type().to_spec());
+    }
+    std::vector<vespalib::string> peek_dims;
+    for (const auto & [dim_name, label_or_child] : peek_spec) {
+        peek_dims.push_back(dim_name);
+    }
+    ValueType param_type = ValueType::from_spec(param.type());
+    ValueType result_type = param_type.reduce(peek_dims);
+    TensorSpec result(result_type.to_spec());
+    if (result_type.is_error()) {
+        return result;
+    }
+    auto is_mapped_dim = [&](const vespalib::string &name) {
+        size_t dim_idx = param_type.dimension_index(name);
+        assert(dim_idx != ValueType::Dimension::npos);
+        const auto &param_dim = param_type.dimensions()[dim_idx];
+        return param_dim.is_mapped();
+    };
+    TensorSpec::Address addr;
+    for (const auto & [dim_name, label_or_child] : peek_spec) {
+        const vespalib::string &dim = dim_name;
+        std::visit(vespalib::overload
+                   {
+                       [&](const TensorSpec::Label &label) {
+                           addr.emplace(dim, label);
+                       },
+                       [&](const size_t &child_idx) {
+                           assert(child_idx < children.size());
+                           const auto &child = children[child_idx];
+                           double child_value = value_from_child(child);
+                           if (is_mapped_dim(dim)) {
+                               addr.emplace(dim, vespalib::make_string("%zd", int64_t(child_value)));
+                           } else {
+                               addr.emplace(dim, child_value);
+                           }
+                       }
+                   }, label_or_child);
+    }
+    for (const auto &cell: param.cells()) {
+        bool keep = true;
+        TensorSpec::Address my_addr;
+        for (const auto &binding: cell.first) {
+            auto pos = addr.find(binding.first);
+            if (pos == addr.end()) {
+                my_addr.emplace(binding.first, binding.second);
+            } else {
+                if (!(pos->second == binding.second)) {
+                    keep = false;
+                }
+            }
+        }
+        if (keep) {
+            result.add(my_addr, cell.second);
+        }
+    }
+    // use SimpleValue to add implicit cells with default value
+    const auto &factory = SimpleValueBuilderFactory::get();
+    return spec_from_value(*value_from_spec(result, factory));
+}
+
+
+TensorSpec ReferenceOperations::reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr) {
+    ValueType res_type = ValueType::from_spec(a.type()).reduce(dims);
+    TensorSpec result(res_type.to_spec());
+    if (res_type.is_error()) {
+        return result;
+    }
+    Stash stash;
+    std::map<TensorSpec::Address,std::optional<Aggregator*>> my_map;
+    for (const auto &cell: a.cells()) {
+        TensorSpec::Address addr;
+        for (const auto &dim: cell.first) {
+            if (res_type.dimension_index(dim.first) != ValueType::Dimension::npos) {
+                addr.insert_or_assign(dim.first, dim.second);
+            }
+        }
+        auto [pos, is_empty] = my_map.emplace(addr, std::nullopt);
+        if (is_empty) {
+            pos->second = &Aggregator::create(aggr, stash);
+            pos->second.value()->first(cell.second);
+        } else {
+            pos->second.value()->next(cell.second);
+        }
+    }
+    for (const auto &my_entry: my_map) {
+        result.add(my_entry.first, my_entry.second.value()->result());
+    }
+    // use SimpleValue to add implicit cells with default value
+    const auto &factory = SimpleValueBuilderFactory::get();
+    return spec_from_value(*value_from_spec(result, factory));
+}
+
+
+TensorSpec ReferenceOperations::rename(const TensorSpec &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to) {
+    assert(from.size() == to.size());
+    ValueType res_type = ValueType::from_spec(a.type()).rename(from, to);
+    TensorSpec result(res_type.to_spec());
+    if (res_type.is_error()) {
+        return result;
+    }
+    for (const auto &cell: a.cells()) {
+        TensorSpec::Address addr;
+        for (const auto &dim: cell.first) {
+            addr.insert_or_assign(rename_dimension(dim.first, from, to), dim.second);
+        }
+        result.add(addr, cell.second);
+    }
+    return result;
+}
+
+
+} // namespace
diff --git a/eval/src/vespa/eval/eval/test/reference_operations.h b/eval/src/vespa/eval/eval/test/reference_operations.h
new file mode 100644
index 00000000000..dd33c4cd3e5
--- /dev/null
+++ b/eval/src/vespa/eval/eval/test/reference_operations.h
@@ -0,0 +1,38 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/aggr.h>
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value_type.h>
+
+#include <vector>
+#include <map>
+#include <variant>
+
+namespace vespalib::eval {
+
+struct ReferenceOperations {
+    using map_fun_t = vespalib::eval::operation::op1_t;
+    using join_fun_t = vespalib::eval::operation::op2_t;
+
+    // for create: mapping from cell address to index of child that computes the cell value
+    using CreateSpec = std::map<TensorSpec::Address, size_t>;
+
+    // for Peek: a verbatim label or the index of a child that computes the label value
+    using LabelOrChildIndex = std::variant<TensorSpec::Label, size_t>;
+    // for Peek: mapping from dimension name to verbatim label or child
+    using PeekSpec = std::map<vespalib::string, LabelOrChildIndex>;
+
+    static TensorSpec concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim);
+    static TensorSpec create(const vespalib::string &type, const CreateSpec &spec, const std::vector<TensorSpec> &children);
+    static TensorSpec join(const TensorSpec &a, const TensorSpec &b, join_fun_t function);
+    static TensorSpec map(const TensorSpec &a, map_fun_t func);
+    static TensorSpec merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun);
+    static TensorSpec peek(const TensorSpec &param, const PeekSpec &spec, const std::vector<TensorSpec> &children);
+    static TensorSpec reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr);
+    static TensorSpec rename(const TensorSpec &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to);
+};
+
+} // namespace
author	Arne Juul <arnej@verizonmedia.com>	2020-11-25 08:09:53 +0000
committer	Arne Juul <arnej@verizonmedia.com>	2020-11-25 10:05:14 +0000
commit	b8387e5455c63a39ae62795f2484217ad7480b12 (patch)
tree	22f21f3015c773d8d57c7fa6eb35951ecb47af92 /eval
parent	e5a873f35ba72592c747e4c6eb9e7eafb0a0b462 (diff)