diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-11-25 08:09:53 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-11-25 10:05:14 +0000 |
commit | b8387e5455c63a39ae62795f2484217ad7480b12 (patch) | |
tree | 22f21f3015c773d8d57c7fa6eb35951ecb47af92 /eval | |
parent | e5a873f35ba72592c747e4c6eb9e7eafb0a0b462 (diff) |
move reference operations to a common place
* lift reference implementations from unit tests
* write a reference TensorCreate implementation
* use these from unit tests
* some minor cosmetic changes with structured bindings
Diffstat (limited to 'eval')
11 files changed, 389 insertions, 207 deletions
diff --git a/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp b/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp index aaea8fdcb28..c59d9783648 100644 --- a/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp +++ b/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp @@ -8,6 +8,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_concat.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -64,63 +65,6 @@ TensorSpec perform_simpletensor_concat(const TensorSpec &a, const TensorSpec &b, return SimpleTensorEngine::ref().to_spec(*out); } -bool concat_address(const TensorSpec::Address &me, const TensorSpec::Address &other, - const std::string &concat_dim, size_t my_offset, - TensorSpec::Address &my_out, TensorSpec::Address &other_out) -{ - my_out.insert_or_assign(concat_dim, my_offset); - for (const auto &my_dim: me) { - const auto & name = my_dim.first; - const auto & label = my_dim.second; - if (name == concat_dim) { - my_out.insert_or_assign(name, label.index + my_offset); - } else { - auto pos = other.find(name); - if ((pos == other.end()) || (pos->second == label)) { - my_out.insert_or_assign(name, label); - other_out.insert_or_assign(name, label); - } else { - return false; - } - } - } - return true; -} - -bool concat_addresses(const TensorSpec::Address &a, const TensorSpec::Address &b, - const std::string &concat_dim, size_t b_offset, - TensorSpec::Address &a_out, TensorSpec::Address &b_out) -{ - return concat_address(a, b, concat_dim, 0, a_out, b_out) && - concat_address(b, a, concat_dim, b_offset, b_out, a_out); -} - -TensorSpec reference_concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim) { - ValueType a_type = ValueType::from_spec(a.type()); - ValueType b_type = ValueType::from_spec(b.type()); - ValueType res_type = ValueType::concat(a_type, b_type, concat_dim); - EXPECT_FALSE(res_type.is_error()); - size_t b_offset = 1; - size_t concat_dim_index = a_type.dimension_index(concat_dim); - if (concat_dim_index != ValueType::Dimension::npos) { - const auto &dim = a_type.dimensions()[concat_dim_index]; - EXPECT_TRUE(dim.is_indexed()); - b_offset = dim.size; - } - TensorSpec result(res_type.to_spec()); - for (const auto &cell_a: a.cells()) { - for (const auto &cell_b: b.cells()) { - TensorSpec::Address addr_a; - TensorSpec::Address addr_b; - if (concat_addresses(cell_a.first, cell_b.first, concat_dim, b_offset, addr_a, addr_b)) { - result.add(addr_a, cell_a.second); - result.add(addr_b, cell_b.second); - } - } - } - return result; -} - TensorSpec perform_generic_concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim, const ValueBuilderFactory &factory) { @@ -138,7 +82,7 @@ TEST(GenericConcatTest, generic_reference_concat_works) { const TensorSpec lhs = spec(concat_layouts[i], N()); const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N())); SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - auto actual = reference_concat(lhs, rhs, "y"); + auto actual = ReferenceOperations::concat(lhs, rhs, "y"); auto expect = perform_simpletensor_concat(lhs, rhs, "y"); EXPECT_EQ(actual, expect); } @@ -151,7 +95,7 @@ void test_generic_concat_with(const ValueBuilderFactory &factory) { const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N())); SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); auto actual = perform_generic_concat(lhs, rhs, "y", factory); - auto expect = reference_concat(lhs, rhs, "y"); + auto expect = ReferenceOperations::concat(lhs, rhs, "y"); EXPECT_EQ(actual, expect); } } @@ -202,7 +146,7 @@ TEST(GenericConcatTest, immediate_generic_concat_works) { const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N())); SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); auto actual = immediate_generic_concat(lhs, rhs, "y"); - auto expect = reference_concat(lhs, rhs, "y"); + auto expect = ReferenceOperations::concat(lhs, rhs, "y"); EXPECT_EQ(actual, expect); } } diff --git a/eval/src/tests/instruction/generic_create/generic_create_test.cpp b/eval/src/tests/instruction/generic_create/generic_create_test.cpp index e07db870ad2..08534f32d3c 100644 --- a/eval/src/tests/instruction/generic_create/generic_create_test.cpp +++ b/eval/src/tests/instruction/generic_create/generic_create_test.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_create.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -53,6 +54,19 @@ bool operator< (const NumberedCellSpec &a, const NumberedCellSpec &b) { return a.num < b.num; } +TensorSpec reference_create(const TensorSpec &a) { + std::vector<TensorSpec> children; + ReferenceOperations::CreateSpec spec; + for (const auto & [addr, value] : a.cells()) { + size_t child_idx = children.size(); + spec.emplace(addr, child_idx); + TensorSpec child("double"); + child.add({}, value); + children.push_back(child); + } + return ReferenceOperations::create(a.type(), spec, children); +} + TensorSpec perform_generic_create(const TensorSpec &a, const ValueBuilderFactory &factory) { ValueType res_type = ValueType::from_spec(a.type()); @@ -80,12 +94,13 @@ void test_generic_create_with(const ValueBuilderFactory &factory) { for (const auto & layout : create_layouts) { TensorSpec full = spec(layout, N()); auto actual = perform_generic_create(full, factory); - EXPECT_EQ(actual, full); + auto expect = reference_create(full); + EXPECT_EQ(actual, expect); for (size_t n : {2, 3, 4, 5}) { TensorSpec partial = remove_each(full, n); actual = perform_generic_create(partial, factory); - auto filled = spec_from_value(*value_from_spec(partial, SimpleValueBuilderFactory::get())); - EXPECT_EQ(actual, filled); + expect = reference_create(partial); + EXPECT_EQ(actual, expect); } } } diff --git a/eval/src/tests/instruction/generic_join/generic_join_test.cpp b/eval/src/tests/instruction/generic_join/generic_join_test.cpp index 558f20d2e10..a81294c8d25 100644 --- a/eval/src/tests/instruction/generic_join/generic_join_test.cpp +++ b/eval/src/tests/instruction/generic_join/generic_join_test.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_join.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -53,23 +54,6 @@ bool join_address(const TensorSpec::Address &a, const TensorSpec::Address &b, Te return true; } -TensorSpec reference_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) { - ValueType res_type = ValueType::join(ValueType::from_spec(a.type()), ValueType::from_spec(b.type())); - EXPECT_FALSE(res_type.is_error()); - TensorSpec result(res_type.to_spec()); - for (const auto &cell_a: a.cells()) { - for (const auto &cell_b: b.cells()) { - TensorSpec::Address addr; - if (join_address(cell_a.first, cell_b.first, addr) && - join_address(cell_b.first, cell_a.first, addr)) - { - result.add(addr, function(cell_a.second, cell_b.second)); - } - } - } - return result; -} - TensorSpec perform_generic_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function, const ValueBuilderFactory &factory) { @@ -130,7 +114,7 @@ TEST(GenericJoinTest, generic_join_works_for_simple_and_fast_values) { TensorSpec rhs = spec(join_layouts[i + 1], Div16(N())); for (auto fun: {operation::Add::f, operation::Sub::f, operation::Mul::f, operation::Div::f}) { SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - auto expect = reference_join(lhs, rhs, fun); + auto expect = ReferenceOperations::join(lhs, rhs, fun); auto simple = perform_generic_join(lhs, rhs, fun, SimpleValueBuilderFactory::get()); auto fast = perform_generic_join(lhs, rhs, fun, FastValueBuilderFactory::get()); EXPECT_EQ(simple, expect); @@ -154,7 +138,7 @@ TEST(GenericJoinTest, immediate_generic_join_works) { TensorSpec rhs = spec(join_layouts[i + 1], Div16(N())); for (auto fun: {operation::Add::f, operation::Sub::f, operation::Mul::f, operation::Div::f}) { SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - auto expect = reference_join(lhs, rhs, fun); + auto expect = ReferenceOperations::join(lhs, rhs, fun); auto actual = immediate_generic_join(lhs, rhs, fun); EXPECT_EQ(actual, expect); } diff --git a/eval/src/tests/instruction/generic_map/generic_map_test.cpp b/eval/src/tests/instruction/generic_map/generic_map_test.cpp index 63a9563a11b..ba6a1630777 100644 --- a/eval/src/tests/instruction/generic_map/generic_map_test.cpp +++ b/eval/src/tests/instruction/generic_map/generic_map_test.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_map.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -30,16 +31,6 @@ std::vector<Layout> map_layouts = { float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})}) }; -TensorSpec reference_map(const TensorSpec &a, map_fun_t func) { - ValueType res_type = ValueType::from_spec(a.type()); - EXPECT_FALSE(res_type.is_error()); - TensorSpec result(res_type.to_spec()); - for (const auto &cell: a.cells()) { - result.add(cell.first, func(cell.second)); - } - return result; -} - TensorSpec perform_generic_map(const TensorSpec &a, map_fun_t func, const ValueBuilderFactory &factory) { auto lhs = value_from_spec(a, factory); @@ -54,7 +45,7 @@ void test_generic_map_with(const ValueBuilderFactory &factory) { ValueType lhs_type = ValueType::from_spec(lhs.type()); for (auto func : {operation::Floor::f, operation::Fabs::f, operation::Square::f, operation::Inv::f}) { SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str())); - auto expect = reference_map(lhs, func); + auto expect = ReferenceOperations::map(lhs, func); auto actual = perform_generic_map(lhs, func, factory); EXPECT_EQ(actual, expect); } @@ -82,7 +73,7 @@ TEST(GenericMapTest, immediate_generic_map_works) { ValueType lhs_type = ValueType::from_spec(lhs.type()); for (auto func : {operation::Floor::f, operation::Fabs::f, operation::Square::f, operation::Inv::f}) { SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str())); - auto expect = reference_map(lhs, func); + auto expect = ReferenceOperations::map(lhs, func); auto actual = immediate_generic_map(lhs, func, SimpleValueBuilderFactory::get()); EXPECT_EQ(actual, expect); } diff --git a/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp b/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp index 5166ef6ccc9..a43169a6959 100644 --- a/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp +++ b/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_merge.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -33,29 +34,6 @@ std::vector<Layout> merge_layouts = { {x({"a","b","c"}),y(5)}, {x({"b","c","d"}),y(5)} }; - -TensorSpec reference_merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { - ValueType res_type = ValueType::merge(ValueType::from_spec(a.type()), - ValueType::from_spec(b.type())); - EXPECT_FALSE(res_type.is_error()); - TensorSpec result(res_type.to_spec()); - for (const auto &cell: a.cells()) { - auto other = b.cells().find(cell.first); - if (other == b.cells().end()) { - result.add(cell.first, cell.second); - } else { - result.add(cell.first, fun(cell.second, other->second)); - } - } - for (const auto &cell: b.cells()) { - auto other = a.cells().find(cell.first); - if (other == a.cells().end()) { - result.add(cell.first, cell.second); - } - } - return result; -} - TensorSpec perform_generic_merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun, const ValueBuilderFactory &factory) { Stash stash; auto lhs = value_from_spec(a, factory); @@ -72,7 +50,7 @@ void test_generic_merge_with(const ValueBuilderFactory &factory) { TensorSpec rhs = spec(merge_layouts[i + 1], Div16(N())); SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f, operation::Max::f}) { - auto expect = reference_merge(lhs, rhs, fun); + auto expect = ReferenceOperations::merge(lhs, rhs, fun); auto actual = perform_generic_merge(lhs, rhs, fun, factory); EXPECT_EQ(actual, expect); } @@ -102,7 +80,7 @@ TEST(GenericMergeTest, immediate_generic_merge_works) { TensorSpec rhs = spec(merge_layouts[i + 1], Div16(N())); SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f, operation::Max::f}) { - auto expect = reference_merge(lhs, rhs, fun); + auto expect = ReferenceOperations::merge(lhs, rhs, fun); auto actual = immediate_generic_merge(lhs, rhs, fun); EXPECT_EQ(actual, expect); } diff --git a/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp b/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp index ef78d0cde68..ca7d4ea1746 100644 --- a/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp +++ b/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp @@ -6,6 +6,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_peek.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/overload.h> @@ -36,55 +37,31 @@ std::vector<Layout> peek_layouts = { using PeekSpec = GenericPeek::SpecMap; -TensorSpec reference_peek(const TensorSpec ¶m, const vespalib::string &result_type, const PeekSpec &spec) { - TensorSpec result(result_type); - ValueType param_type = ValueType::from_spec(param.type()); - auto is_mapped_dim = [&](const vespalib::string &name) { - size_t dim_idx = param_type.dimension_index(name); - assert(dim_idx != ValueType::Dimension::npos); - const auto ¶m_dim = param_type.dimensions()[dim_idx]; - return param_dim.is_mapped(); - }; - TensorSpec::Address addr; +TensorSpec reference_peek(const TensorSpec ¶m, const PeekSpec &spec) { + std::vector<TensorSpec> children; + PeekSpec with_indexes; for (const auto & [dim_name, label_or_child] : spec) { + const vespalib::string &dim = dim_name; std::visit(vespalib::overload { - [&,&dim_name = dim_name](const TensorSpec::Label &label) { - addr.emplace(dim_name, label); + [&](const TensorSpec::Label &label) { + with_indexes.emplace(dim, label); }, - [&,&dim_name = dim_name](const size_t &child_value) { + [&](const size_t &child_value) { // here, label_or_child is a size_t specifying the value // we pretend a child produced - if (is_mapped_dim(dim_name)) { - // (but cast to signed first, to allow labels like the string "-2") - addr.emplace(dim_name, vespalib::make_string("%zd", ssize_t(child_value))); - } else { - addr.emplace(dim_name, child_value); - } + size_t child_idx = children.size(); + TensorSpec child("double"); + // (but cast to signed first, to allow labels like the string "-2") + child.add({}, ssize_t(child_value)); + children.push_back(child); + with_indexes.emplace(dim, child_idx); } }, label_or_child); } - for (const auto &cell: param.cells()) { - bool keep = true; - TensorSpec::Address my_addr; - for (const auto &binding: cell.first) { - auto pos = addr.find(binding.first); - if (pos == addr.end()) { - my_addr.emplace(binding.first, binding.second); - } else { - if (!(pos->second == binding.second)) { - keep = false; - } - } - } - if (keep) { - result.add(my_addr, cell.second); - } - } - return spec_from_value(*value_from_spec(result, SimpleValueBuilderFactory::get())); + return ReferenceOperations::peek(param, with_indexes, children); } - TensorSpec perform_generic_peek(const TensorSpec &a, const ValueType &result_type, PeekSpec spec, const ValueBuilderFactory &factory) { @@ -174,7 +151,7 @@ void verify_peek_equal(const TensorSpec &input, } if (reduce_dims.empty()) return; ValueType result_type = param_type.reduce(reduce_dims); - auto expect = reference_peek(input, result_type.to_spec(), spec); + auto expect = reference_peek(input, spec); SCOPED_TRACE(fmt("peek input: %s\n peek spec: %s\n peek result %s\n", input.to_string().c_str(), to_str(spec).c_str(), diff --git a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp index d894d273f02..beac7a9df3d 100644 --- a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp +++ b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_reduce.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -34,35 +35,6 @@ std::vector<Layout> layouts = { float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})}) }; -TensorSpec reference_reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr) { - Stash stash; - ValueType res_type = ValueType::from_spec(a.type()).reduce(dims); - EXPECT_FALSE(res_type.is_error()); - std::map<TensorSpec::Address,std::optional<Aggregator*>> my_map; - for (const auto &cell: a.cells()) { - TensorSpec::Address addr; - for (const auto &dim: cell.first) { - if (res_type.dimension_index(dim.first) != ValueType::Dimension::npos) { - addr.insert_or_assign(dim.first, dim.second); - } - } - auto [pos, is_empty] = my_map.emplace(addr, std::nullopt); - if (is_empty) { - pos->second = &Aggregator::create(aggr, stash); - pos->second.value()->first(cell.second); - } else { - pos->second.value()->next(cell.second); - } - } - TensorSpec result(res_type.to_spec()); - for (const auto &my_entry: my_map) { - result.add(my_entry.first, my_entry.second.value()->result()); - } - // use SimpleValue to add implicit cells with default value - const auto &factory = SimpleValueBuilderFactory::get(); - return spec_from_value(*value_from_spec(result, factory)); -} - TensorSpec perform_generic_reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr, const ValueBuilderFactory &factory) { @@ -99,11 +71,11 @@ void test_generic_reduce_with(const ValueBuilderFactory &factory) { TensorSpec input = spec(layout, Div16(N())); for (Aggr aggr: {Aggr::SUM, Aggr::AVG, Aggr::MIN, Aggr::MAX}) { for (const Domain &domain: layout) { - auto expect = reference_reduce(input, {domain.dimension}, aggr); + auto expect = ReferenceOperations::reduce(input, {domain.dimension}, aggr); auto actual = perform_generic_reduce(input, {domain.dimension}, aggr, factory); EXPECT_EQ(actual, expect); } - auto expect = reference_reduce(input, {}, aggr); + auto expect = ReferenceOperations::reduce(input, {}, aggr); auto actual = perform_generic_reduce(input, {}, aggr, factory); EXPECT_EQ(actual, expect); } @@ -130,11 +102,11 @@ TEST(GenericReduceTest, immediate_generic_reduce_works) { TensorSpec input = spec(layout, Div16(N())); for (Aggr aggr: {Aggr::SUM, Aggr::AVG, Aggr::MIN, Aggr::MAX}) { for (const Domain &domain: layout) { - auto expect = reference_reduce(input, {domain.dimension}, aggr); + auto expect = ReferenceOperations::reduce(input, {domain.dimension}, aggr); auto actual = immediate_generic_reduce(input, {domain.dimension}, aggr); EXPECT_EQ(actual, expect); } - auto expect = reference_reduce(input, {}, aggr); + auto expect = ReferenceOperations::reduce(input, {}, aggr); auto actual = immediate_generic_reduce(input, {}, aggr); EXPECT_EQ(actual, expect); } diff --git a/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp b/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp index b2e30a8b78c..a7e6b8d807b 100644 --- a/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp +++ b/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp @@ -6,6 +6,7 @@ #include <vespa/eval/instruction/generic_rename.h> #include <vespa/eval/eval/interpreted_function.h> #include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -98,20 +99,6 @@ vespalib::string rename_dimension(const vespalib::string &name, const FromTo &ft return name; } -TensorSpec reference_rename(const TensorSpec &a, const FromTo &ft) { - ValueType res_type = ValueType::from_spec(a.type()).rename(ft.from, ft.to); - EXPECT_FALSE(res_type.is_error()); - TensorSpec result(res_type.to_spec()); - for (const auto &cell: a.cells()) { - TensorSpec::Address addr; - for (const auto &dim: cell.first) { - addr.insert_or_assign(rename_dimension(dim.first, ft), dim.second); - } - result.add(addr, cell.second); - } - return result; -} - TensorSpec perform_generic_rename(const TensorSpec &a, const FromTo &ft, const ValueBuilderFactory &factory) { @@ -132,7 +119,7 @@ void test_generic_rename_with(const ValueBuilderFactory &factory) { if (renamed_type.is_error()) continue; // printf("type %s -> %s\n", lhs_type.to_spec().c_str(), renamed_type.to_spec().c_str()); SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str())); - auto expect = reference_rename(lhs, from_to); + auto expect = ReferenceOperations::rename(lhs, from_to.from, from_to.to); auto actual = perform_generic_rename(lhs, from_to, factory); EXPECT_EQ(actual, expect); } @@ -165,7 +152,7 @@ TEST(GenericRenameTest, immediate_generic_rename_works) { if (renamed_type.is_error()) continue; // printf("type %s -> %s\n", lhs_type.to_spec().c_str(), renamed_type.to_spec().c_str()); SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str())); - auto expect = reference_rename(lhs, from_to); + auto expect = ReferenceOperations::rename(lhs, from_to.from, from_to.to); auto actual = immediate_generic_rename(lhs, from_to); EXPECT_EQ(actual, expect); } diff --git a/eval/src/vespa/eval/eval/test/CMakeLists.txt b/eval/src/vespa/eval/eval/test/CMakeLists.txt index 6e88beab9b7..f3b0750d503 100644 --- a/eval/src/vespa/eval/eval/test/CMakeLists.txt +++ b/eval/src/vespa/eval/eval/test/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(eval_eval_test OBJECT SOURCES eval_fixture.cpp eval_spec.cpp + reference_operations.cpp tensor_conformance.cpp test_io.cpp value_compare.cpp diff --git a/eval/src/vespa/eval/eval/test/reference_operations.cpp b/eval/src/vespa/eval/eval/test/reference_operations.cpp new file mode 100644 index 00000000000..fdf7ba98f70 --- /dev/null +++ b/eval/src/vespa/eval/eval/test/reference_operations.cpp @@ -0,0 +1,295 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "reference_operations.h" +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/vespalib/util/overload.h> +#include <vespa/vespalib/util/visit_ranges.h> +#include <vespa/vespalib/util/stash.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <cassert> + +namespace vespalib::eval { + +namespace { + +bool concat_address(const TensorSpec::Address &me, const TensorSpec::Address &other, + const std::string &concat_dim, size_t my_offset, + TensorSpec::Address &my_out, TensorSpec::Address &other_out) +{ + my_out.insert_or_assign(concat_dim, my_offset); + for (const auto &my_dim: me) { + const auto & name = my_dim.first; + const auto & label = my_dim.second; + if (name == concat_dim) { + my_out.insert_or_assign(name, label.index + my_offset); + } else { + auto pos = other.find(name); + if ((pos == other.end()) || (pos->second == label)) { + my_out.insert_or_assign(name, label); + other_out.insert_or_assign(name, label); + } else { + return false; + } + } + } + return true; +} + +bool concat_addresses(const TensorSpec::Address &a, const TensorSpec::Address &b, + const std::string &concat_dim, size_t b_offset, + TensorSpec::Address &a_out, TensorSpec::Address &b_out) +{ + return concat_address(a, b, concat_dim, 0, a_out, b_out) && + concat_address(b, a, concat_dim, b_offset, b_out, a_out); +} + +double value_from_child(const TensorSpec &child) { + double sum = 0.0; + for (const auto & [addr, value] : child.cells()) { + sum += value; + } + return sum; +} + +bool join_address(const TensorSpec::Address &a, const TensorSpec::Address &b, TensorSpec::Address &addr) { + for (const auto &dim_a: a) { + auto pos_b = b.find(dim_a.first); + if ((pos_b != b.end()) && !(pos_b->second == dim_a.second)) { + return false; + } + addr.insert_or_assign(dim_a.first, dim_a.second); + } + return true; +} + +vespalib::string rename_dimension(const vespalib::string &name, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to) { + for (size_t i = 0; i < from.size(); ++i) { + if (name == from[i]) { + return to[i]; + } + } + return name; +} + +} // namespace <unnamed> + + +TensorSpec ReferenceOperations::concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim) { + ValueType a_type = ValueType::from_spec(a.type()); + ValueType b_type = ValueType::from_spec(b.type()); + ValueType res_type = ValueType::concat(a_type, b_type, concat_dim); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + size_t b_offset = 1; + size_t concat_dim_index = a_type.dimension_index(concat_dim); + if (concat_dim_index != ValueType::Dimension::npos) { + const auto &dim = a_type.dimensions()[concat_dim_index]; + assert(dim.is_indexed()); // type resolving (above) should catch this + b_offset = dim.size; + } + for (const auto &cell_a: a.cells()) { + for (const auto &cell_b: b.cells()) { + TensorSpec::Address addr_a; + TensorSpec::Address addr_b; + if (concat_addresses(cell_a.first, cell_b.first, concat_dim, b_offset, addr_a, addr_b)) { + result.add(addr_a, cell_a.second); + result.add(addr_b, cell_b.second); + } + } + } + return result; +} + + +TensorSpec ReferenceOperations::create(const vespalib::string &type, const CreateSpec &spec, const std::vector<TensorSpec> &children) { + TensorSpec result(type); + if (ValueType::from_spec(type).is_error()) { + return result; + } + for (const auto & [addr, child_idx] : spec) { + assert(child_idx < children.size()); + const auto &child = children[child_idx]; + double val = value_from_child(child); + result.add(addr, val); + } + // use SimpleValue to add implicit cells with default value + const auto &factory = SimpleValueBuilderFactory::get(); + return spec_from_value(*value_from_spec(result, factory)); +} + + +TensorSpec ReferenceOperations::join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) { + ValueType res_type = ValueType::join(ValueType::from_spec(a.type()), ValueType::from_spec(b.type())); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + for (const auto &cell_a: a.cells()) { + for (const auto &cell_b: b.cells()) { + TensorSpec::Address addr; + if (join_address(cell_a.first, cell_b.first, addr) && + join_address(cell_b.first, cell_a.first, addr)) + { + result.add(addr, function(cell_a.second, cell_b.second)); + } + } + } + return result; +} + + +TensorSpec ReferenceOperations::map(const TensorSpec &a, map_fun_t func) { + ValueType res_type = ValueType::from_spec(a.type()); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + for (const auto & [ addr, value ]: a.cells()) { + result.add(addr, func(value)); + } + return result; +} + + +TensorSpec ReferenceOperations::merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + ValueType res_type = ValueType::merge(ValueType::from_spec(a.type()), + ValueType::from_spec(b.type())); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + for (const auto & [ addr, value ]: a.cells()) { + auto other = b.cells().find(addr); + if (other == b.cells().end()) { + result.add(addr, value); + } else { + result.add(addr, fun(value, other->second)); + } + } + for (const auto & [ addr, value ]: b.cells()) { + auto other = a.cells().find(addr); + if (other == a.cells().end()) { + result.add(addr, value); + } + } + return result; +} + + +TensorSpec ReferenceOperations::peek(const TensorSpec ¶m, const PeekSpec &peek_spec, const std::vector<TensorSpec> &children) { + if (peek_spec.empty()) { + return TensorSpec(ValueType::error_type().to_spec()); + } + std::vector<vespalib::string> peek_dims; + for (const auto & [dim_name, label_or_child] : peek_spec) { + peek_dims.push_back(dim_name); + } + ValueType param_type = ValueType::from_spec(param.type()); + ValueType result_type = param_type.reduce(peek_dims); + TensorSpec result(result_type.to_spec()); + if (result_type.is_error()) { + return result; + } + auto is_mapped_dim = [&](const vespalib::string &name) { + size_t dim_idx = param_type.dimension_index(name); + assert(dim_idx != ValueType::Dimension::npos); + const auto ¶m_dim = param_type.dimensions()[dim_idx]; + return param_dim.is_mapped(); + }; + TensorSpec::Address addr; + for (const auto & [dim_name, label_or_child] : peek_spec) { + const vespalib::string &dim = dim_name; + std::visit(vespalib::overload + { + [&](const TensorSpec::Label &label) { + addr.emplace(dim, label); + }, + [&](const size_t &child_idx) { + assert(child_idx < children.size()); + const auto &child = children[child_idx]; + double child_value = value_from_child(child); + if (is_mapped_dim(dim)) { + addr.emplace(dim, vespalib::make_string("%zd", int64_t(child_value))); + } else { + addr.emplace(dim, child_value); + } + } + }, label_or_child); + } + for (const auto &cell: param.cells()) { + bool keep = true; + TensorSpec::Address my_addr; + for (const auto &binding: cell.first) { + auto pos = addr.find(binding.first); + if (pos == addr.end()) { + my_addr.emplace(binding.first, binding.second); + } else { + if (!(pos->second == binding.second)) { + keep = false; + } + } + } + if (keep) { + result.add(my_addr, cell.second); + } + } + // use SimpleValue to add implicit cells with default value + const auto &factory = SimpleValueBuilderFactory::get(); + return spec_from_value(*value_from_spec(result, factory)); +} + + +TensorSpec ReferenceOperations::reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr) { + ValueType res_type = ValueType::from_spec(a.type()).reduce(dims); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + Stash stash; + std::map<TensorSpec::Address,std::optional<Aggregator*>> my_map; + for (const auto &cell: a.cells()) { + TensorSpec::Address addr; + for (const auto &dim: cell.first) { + if (res_type.dimension_index(dim.first) != ValueType::Dimension::npos) { + addr.insert_or_assign(dim.first, dim.second); + } + } + auto [pos, is_empty] = my_map.emplace(addr, std::nullopt); + if (is_empty) { + pos->second = &Aggregator::create(aggr, stash); + pos->second.value()->first(cell.second); + } else { + pos->second.value()->next(cell.second); + } + } + for (const auto &my_entry: my_map) { + result.add(my_entry.first, my_entry.second.value()->result()); + } + // use SimpleValue to add implicit cells with default value + const auto &factory = SimpleValueBuilderFactory::get(); + return spec_from_value(*value_from_spec(result, factory)); +} + + +TensorSpec ReferenceOperations::rename(const TensorSpec &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to) { + assert(from.size() == to.size()); + ValueType res_type = ValueType::from_spec(a.type()).rename(from, to); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + for (const auto &cell: a.cells()) { + TensorSpec::Address addr; + for (const auto &dim: cell.first) { + addr.insert_or_assign(rename_dimension(dim.first, from, to), dim.second); + } + result.add(addr, cell.second); + } + return result; +} + + +} // namespace diff --git a/eval/src/vespa/eval/eval/test/reference_operations.h b/eval/src/vespa/eval/eval/test/reference_operations.h new file mode 100644 index 00000000000..dd33c4cd3e5 --- /dev/null +++ b/eval/src/vespa/eval/eval/test/reference_operations.h @@ -0,0 +1,38 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/aggr.h> +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/value_type.h> + +#include <vector> +#include <map> +#include <variant> + +namespace vespalib::eval { + +struct ReferenceOperations { + using map_fun_t = vespalib::eval::operation::op1_t; + using join_fun_t = vespalib::eval::operation::op2_t; + + // for create: mapping from cell address to index of child that computes the cell value + using CreateSpec = std::map<TensorSpec::Address, size_t>; + + // for Peek: a verbatim label or the index of a child that computes the label value + using LabelOrChildIndex = std::variant<TensorSpec::Label, size_t>; + // for Peek: mapping from dimension name to verbatim label or child + using PeekSpec = std::map<vespalib::string, LabelOrChildIndex>; + + static TensorSpec concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim); + static TensorSpec create(const vespalib::string &type, const CreateSpec &spec, const std::vector<TensorSpec> &children); + static TensorSpec join(const TensorSpec &a, const TensorSpec &b, join_fun_t function); + static TensorSpec map(const TensorSpec &a, map_fun_t func); + static TensorSpec merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun); + static TensorSpec peek(const TensorSpec ¶m, const PeekSpec &spec, const std::vector<TensorSpec> &children); + static TensorSpec reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr); + static TensorSpec rename(const TensorSpec &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to); +}; + +} // namespace |