diff options
38 files changed, 1148 insertions, 251 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 021f3e39e40..ee8509fcf19 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -53,6 +53,7 @@ vespa_define_module( src/tests/instruction/dense_tensor_peek_function src/tests/instruction/index_lookup_table src/tests/instruction/join_with_number + src/tests/streamed/value src/tests/tensor/dense_add_dimension_optimizer src/tests/tensor/dense_dimension_combiner src/tests/tensor/dense_fast_rename_optimizer @@ -90,6 +91,7 @@ vespa_define_module( src/vespa/eval/eval/value_cache src/vespa/eval/gp src/vespa/eval/instruction + src/vespa/eval/streamed src/vespa/eval/tensor src/vespa/eval/tensor/dense src/vespa/eval/tensor/serialization diff --git a/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp b/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp index aaea8fdcb28..c59d9783648 100644 --- a/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp +++ b/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp @@ -8,6 +8,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_concat.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -64,63 +65,6 @@ TensorSpec perform_simpletensor_concat(const TensorSpec &a, const TensorSpec &b, return SimpleTensorEngine::ref().to_spec(*out); } -bool concat_address(const TensorSpec::Address &me, const TensorSpec::Address &other, - const std::string &concat_dim, size_t my_offset, - TensorSpec::Address &my_out, TensorSpec::Address &other_out) -{ - my_out.insert_or_assign(concat_dim, my_offset); - for (const auto &my_dim: me) { - const auto & name = my_dim.first; - const auto & label = my_dim.second; - if (name == concat_dim) { - my_out.insert_or_assign(name, label.index + my_offset); - } else { - auto pos = other.find(name); - if ((pos == other.end()) || (pos->second == label)) { - my_out.insert_or_assign(name, label); - other_out.insert_or_assign(name, label); - } else { - return false; - } - } - } - return true; -} - -bool concat_addresses(const TensorSpec::Address &a, const TensorSpec::Address &b, - const std::string &concat_dim, size_t b_offset, - TensorSpec::Address &a_out, TensorSpec::Address &b_out) -{ - return concat_address(a, b, concat_dim, 0, a_out, b_out) && - concat_address(b, a, concat_dim, b_offset, b_out, a_out); -} - -TensorSpec reference_concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim) { - ValueType a_type = ValueType::from_spec(a.type()); - ValueType b_type = ValueType::from_spec(b.type()); - ValueType res_type = ValueType::concat(a_type, b_type, concat_dim); - EXPECT_FALSE(res_type.is_error()); - size_t b_offset = 1; - size_t concat_dim_index = a_type.dimension_index(concat_dim); - if (concat_dim_index != ValueType::Dimension::npos) { - const auto &dim = a_type.dimensions()[concat_dim_index]; - EXPECT_TRUE(dim.is_indexed()); - b_offset = dim.size; - } - TensorSpec result(res_type.to_spec()); - for (const auto &cell_a: a.cells()) { - for (const auto &cell_b: b.cells()) { - TensorSpec::Address addr_a; - TensorSpec::Address addr_b; - if (concat_addresses(cell_a.first, cell_b.first, concat_dim, b_offset, addr_a, addr_b)) { - result.add(addr_a, cell_a.second); - result.add(addr_b, cell_b.second); - } - } - } - return result; -} - TensorSpec perform_generic_concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim, const ValueBuilderFactory &factory) { @@ -138,7 +82,7 @@ TEST(GenericConcatTest, generic_reference_concat_works) { const TensorSpec lhs = spec(concat_layouts[i], N()); const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N())); SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - auto actual = reference_concat(lhs, rhs, "y"); + auto actual = ReferenceOperations::concat(lhs, rhs, "y"); auto expect = perform_simpletensor_concat(lhs, rhs, "y"); EXPECT_EQ(actual, expect); } @@ -151,7 +95,7 @@ void test_generic_concat_with(const ValueBuilderFactory &factory) { const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N())); SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); auto actual = perform_generic_concat(lhs, rhs, "y", factory); - auto expect = reference_concat(lhs, rhs, "y"); + auto expect = ReferenceOperations::concat(lhs, rhs, "y"); EXPECT_EQ(actual, expect); } } @@ -202,7 +146,7 @@ TEST(GenericConcatTest, immediate_generic_concat_works) { const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N())); SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); auto actual = immediate_generic_concat(lhs, rhs, "y"); - auto expect = reference_concat(lhs, rhs, "y"); + auto expect = ReferenceOperations::concat(lhs, rhs, "y"); EXPECT_EQ(actual, expect); } } diff --git a/eval/src/tests/instruction/generic_create/generic_create_test.cpp b/eval/src/tests/instruction/generic_create/generic_create_test.cpp index e07db870ad2..42af4ba6621 100644 --- a/eval/src/tests/instruction/generic_create/generic_create_test.cpp +++ b/eval/src/tests/instruction/generic_create/generic_create_test.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_create.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -53,6 +54,19 @@ bool operator< (const NumberedCellSpec &a, const NumberedCellSpec &b) { return a.num < b.num; } +TensorSpec reference_create(const TensorSpec &a) { + std::vector<TensorSpec> children; + ReferenceOperations::CreateSpec spec; + for (const auto & [addr, value] : a.cells()) { + size_t child_idx = children.size(); + spec.emplace(addr, child_idx); + TensorSpec child("double"); + child.add({}, value); + children.push_back(child); + } + return ReferenceOperations::create(a.type(), spec, children); +} + TensorSpec perform_generic_create(const TensorSpec &a, const ValueBuilderFactory &factory) { ValueType res_type = ValueType::from_spec(a.type()); @@ -80,12 +94,16 @@ void test_generic_create_with(const ValueBuilderFactory &factory) { for (const auto & layout : create_layouts) { TensorSpec full = spec(layout, N()); auto actual = perform_generic_create(full, factory); - EXPECT_EQ(actual, full); + auto ref_spec = reference_create(full); + // use SimpleValue to add implicit cells with default value + auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get())); + EXPECT_EQ(actual, expect); for (size_t n : {2, 3, 4, 5}) { TensorSpec partial = remove_each(full, n); actual = perform_generic_create(partial, factory); - auto filled = spec_from_value(*value_from_spec(partial, SimpleValueBuilderFactory::get())); - EXPECT_EQ(actual, filled); + ref_spec = reference_create(partial); + expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get())); + EXPECT_EQ(actual, expect); } } } diff --git a/eval/src/tests/instruction/generic_join/generic_join_test.cpp b/eval/src/tests/instruction/generic_join/generic_join_test.cpp index 558f20d2e10..a81294c8d25 100644 --- a/eval/src/tests/instruction/generic_join/generic_join_test.cpp +++ b/eval/src/tests/instruction/generic_join/generic_join_test.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_join.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -53,23 +54,6 @@ bool join_address(const TensorSpec::Address &a, const TensorSpec::Address &b, Te return true; } -TensorSpec reference_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) { - ValueType res_type = ValueType::join(ValueType::from_spec(a.type()), ValueType::from_spec(b.type())); - EXPECT_FALSE(res_type.is_error()); - TensorSpec result(res_type.to_spec()); - for (const auto &cell_a: a.cells()) { - for (const auto &cell_b: b.cells()) { - TensorSpec::Address addr; - if (join_address(cell_a.first, cell_b.first, addr) && - join_address(cell_b.first, cell_a.first, addr)) - { - result.add(addr, function(cell_a.second, cell_b.second)); - } - } - } - return result; -} - TensorSpec perform_generic_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function, const ValueBuilderFactory &factory) { @@ -130,7 +114,7 @@ TEST(GenericJoinTest, generic_join_works_for_simple_and_fast_values) { TensorSpec rhs = spec(join_layouts[i + 1], Div16(N())); for (auto fun: {operation::Add::f, operation::Sub::f, operation::Mul::f, operation::Div::f}) { SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - auto expect = reference_join(lhs, rhs, fun); + auto expect = ReferenceOperations::join(lhs, rhs, fun); auto simple = perform_generic_join(lhs, rhs, fun, SimpleValueBuilderFactory::get()); auto fast = perform_generic_join(lhs, rhs, fun, FastValueBuilderFactory::get()); EXPECT_EQ(simple, expect); @@ -154,7 +138,7 @@ TEST(GenericJoinTest, immediate_generic_join_works) { TensorSpec rhs = spec(join_layouts[i + 1], Div16(N())); for (auto fun: {operation::Add::f, operation::Sub::f, operation::Mul::f, operation::Div::f}) { SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - auto expect = reference_join(lhs, rhs, fun); + auto expect = ReferenceOperations::join(lhs, rhs, fun); auto actual = immediate_generic_join(lhs, rhs, fun); EXPECT_EQ(actual, expect); } diff --git a/eval/src/tests/instruction/generic_map/generic_map_test.cpp b/eval/src/tests/instruction/generic_map/generic_map_test.cpp index 63a9563a11b..ba6a1630777 100644 --- a/eval/src/tests/instruction/generic_map/generic_map_test.cpp +++ b/eval/src/tests/instruction/generic_map/generic_map_test.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_map.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -30,16 +31,6 @@ std::vector<Layout> map_layouts = { float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})}) }; -TensorSpec reference_map(const TensorSpec &a, map_fun_t func) { - ValueType res_type = ValueType::from_spec(a.type()); - EXPECT_FALSE(res_type.is_error()); - TensorSpec result(res_type.to_spec()); - for (const auto &cell: a.cells()) { - result.add(cell.first, func(cell.second)); - } - return result; -} - TensorSpec perform_generic_map(const TensorSpec &a, map_fun_t func, const ValueBuilderFactory &factory) { auto lhs = value_from_spec(a, factory); @@ -54,7 +45,7 @@ void test_generic_map_with(const ValueBuilderFactory &factory) { ValueType lhs_type = ValueType::from_spec(lhs.type()); for (auto func : {operation::Floor::f, operation::Fabs::f, operation::Square::f, operation::Inv::f}) { SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str())); - auto expect = reference_map(lhs, func); + auto expect = ReferenceOperations::map(lhs, func); auto actual = perform_generic_map(lhs, func, factory); EXPECT_EQ(actual, expect); } @@ -82,7 +73,7 @@ TEST(GenericMapTest, immediate_generic_map_works) { ValueType lhs_type = ValueType::from_spec(lhs.type()); for (auto func : {operation::Floor::f, operation::Fabs::f, operation::Square::f, operation::Inv::f}) { SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str())); - auto expect = reference_map(lhs, func); + auto expect = ReferenceOperations::map(lhs, func); auto actual = immediate_generic_map(lhs, func, SimpleValueBuilderFactory::get()); EXPECT_EQ(actual, expect); } diff --git a/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp b/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp index 5166ef6ccc9..a43169a6959 100644 --- a/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp +++ b/eval/src/tests/instruction/generic_merge/generic_merge_test.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_merge.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -33,29 +34,6 @@ std::vector<Layout> merge_layouts = { {x({"a","b","c"}),y(5)}, {x({"b","c","d"}),y(5)} }; - -TensorSpec reference_merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { - ValueType res_type = ValueType::merge(ValueType::from_spec(a.type()), - ValueType::from_spec(b.type())); - EXPECT_FALSE(res_type.is_error()); - TensorSpec result(res_type.to_spec()); - for (const auto &cell: a.cells()) { - auto other = b.cells().find(cell.first); - if (other == b.cells().end()) { - result.add(cell.first, cell.second); - } else { - result.add(cell.first, fun(cell.second, other->second)); - } - } - for (const auto &cell: b.cells()) { - auto other = a.cells().find(cell.first); - if (other == a.cells().end()) { - result.add(cell.first, cell.second); - } - } - return result; -} - TensorSpec perform_generic_merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun, const ValueBuilderFactory &factory) { Stash stash; auto lhs = value_from_spec(a, factory); @@ -72,7 +50,7 @@ void test_generic_merge_with(const ValueBuilderFactory &factory) { TensorSpec rhs = spec(merge_layouts[i + 1], Div16(N())); SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f, operation::Max::f}) { - auto expect = reference_merge(lhs, rhs, fun); + auto expect = ReferenceOperations::merge(lhs, rhs, fun); auto actual = perform_generic_merge(lhs, rhs, fun, factory); EXPECT_EQ(actual, expect); } @@ -102,7 +80,7 @@ TEST(GenericMergeTest, immediate_generic_merge_works) { TensorSpec rhs = spec(merge_layouts[i + 1], Div16(N())); SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f, operation::Max::f}) { - auto expect = reference_merge(lhs, rhs, fun); + auto expect = ReferenceOperations::merge(lhs, rhs, fun); auto actual = immediate_generic_merge(lhs, rhs, fun); EXPECT_EQ(actual, expect); } diff --git a/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp b/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp index ef78d0cde68..18b1d6903dd 100644 --- a/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp +++ b/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp @@ -6,6 +6,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_peek.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/overload.h> @@ -36,55 +37,31 @@ std::vector<Layout> peek_layouts = { using PeekSpec = GenericPeek::SpecMap; -TensorSpec reference_peek(const TensorSpec ¶m, const vespalib::string &result_type, const PeekSpec &spec) { - TensorSpec result(result_type); - ValueType param_type = ValueType::from_spec(param.type()); - auto is_mapped_dim = [&](const vespalib::string &name) { - size_t dim_idx = param_type.dimension_index(name); - assert(dim_idx != ValueType::Dimension::npos); - const auto ¶m_dim = param_type.dimensions()[dim_idx]; - return param_dim.is_mapped(); - }; - TensorSpec::Address addr; +TensorSpec reference_peek(const TensorSpec ¶m, const PeekSpec &spec) { + std::vector<TensorSpec> children; + PeekSpec with_indexes; for (const auto & [dim_name, label_or_child] : spec) { + const vespalib::string &dim = dim_name; std::visit(vespalib::overload { - [&,&dim_name = dim_name](const TensorSpec::Label &label) { - addr.emplace(dim_name, label); + [&](const TensorSpec::Label &label) { + with_indexes.emplace(dim, label); }, - [&,&dim_name = dim_name](const size_t &child_value) { + [&](const size_t &child_value) { // here, label_or_child is a size_t specifying the value // we pretend a child produced - if (is_mapped_dim(dim_name)) { - // (but cast to signed first, to allow labels like the string "-2") - addr.emplace(dim_name, vespalib::make_string("%zd", ssize_t(child_value))); - } else { - addr.emplace(dim_name, child_value); - } + size_t child_idx = children.size(); + TensorSpec child("double"); + // (but cast to signed first, to allow labels like the string "-2") + child.add({}, ssize_t(child_value)); + children.push_back(child); + with_indexes.emplace(dim, child_idx); } }, label_or_child); } - for (const auto &cell: param.cells()) { - bool keep = true; - TensorSpec::Address my_addr; - for (const auto &binding: cell.first) { - auto pos = addr.find(binding.first); - if (pos == addr.end()) { - my_addr.emplace(binding.first, binding.second); - } else { - if (!(pos->second == binding.second)) { - keep = false; - } - } - } - if (keep) { - result.add(my_addr, cell.second); - } - } - return spec_from_value(*value_from_spec(result, SimpleValueBuilderFactory::get())); + return ReferenceOperations::peek(param, with_indexes, children); } - TensorSpec perform_generic_peek(const TensorSpec &a, const ValueType &result_type, PeekSpec spec, const ValueBuilderFactory &factory) { @@ -174,7 +151,9 @@ void verify_peek_equal(const TensorSpec &input, } if (reduce_dims.empty()) return; ValueType result_type = param_type.reduce(reduce_dims); - auto expect = reference_peek(input, result_type.to_spec(), spec); + auto ref_spec = reference_peek(input, spec); + // use SimpleValue to add implicit cells with default value + auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get())); SCOPED_TRACE(fmt("peek input: %s\n peek spec: %s\n peek result %s\n", input.to_string().c_str(), to_str(spec).c_str(), diff --git a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp index d894d273f02..fa55406be3a 100644 --- a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp +++ b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/instruction/generic_reduce.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/eval/eval/test/tensor_model.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -34,35 +35,6 @@ std::vector<Layout> layouts = { float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})}) }; -TensorSpec reference_reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr) { - Stash stash; - ValueType res_type = ValueType::from_spec(a.type()).reduce(dims); - EXPECT_FALSE(res_type.is_error()); - std::map<TensorSpec::Address,std::optional<Aggregator*>> my_map; - for (const auto &cell: a.cells()) { - TensorSpec::Address addr; - for (const auto &dim: cell.first) { - if (res_type.dimension_index(dim.first) != ValueType::Dimension::npos) { - addr.insert_or_assign(dim.first, dim.second); - } - } - auto [pos, is_empty] = my_map.emplace(addr, std::nullopt); - if (is_empty) { - pos->second = &Aggregator::create(aggr, stash); - pos->second.value()->first(cell.second); - } else { - pos->second.value()->next(cell.second); - } - } - TensorSpec result(res_type.to_spec()); - for (const auto &my_entry: my_map) { - result.add(my_entry.first, my_entry.second.value()->result()); - } - // use SimpleValue to add implicit cells with default value - const auto &factory = SimpleValueBuilderFactory::get(); - return spec_from_value(*value_from_spec(result, factory)); -} - TensorSpec perform_generic_reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr, const ValueBuilderFactory &factory) { @@ -99,11 +71,14 @@ void test_generic_reduce_with(const ValueBuilderFactory &factory) { TensorSpec input = spec(layout, Div16(N())); for (Aggr aggr: {Aggr::SUM, Aggr::AVG, Aggr::MIN, Aggr::MAX}) { for (const Domain &domain: layout) { - auto expect = reference_reduce(input, {domain.dimension}, aggr); + auto ref_spec = ReferenceOperations::reduce(input, {domain.dimension}, aggr); + // use SimpleValue to add implicit cells with default value + auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get())); auto actual = perform_generic_reduce(input, {domain.dimension}, aggr, factory); EXPECT_EQ(actual, expect); } - auto expect = reference_reduce(input, {}, aggr); + auto ref_spec = ReferenceOperations::reduce(input, {}, aggr); + auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get())); auto actual = perform_generic_reduce(input, {}, aggr, factory); EXPECT_EQ(actual, expect); } @@ -130,11 +105,13 @@ TEST(GenericReduceTest, immediate_generic_reduce_works) { TensorSpec input = spec(layout, Div16(N())); for (Aggr aggr: {Aggr::SUM, Aggr::AVG, Aggr::MIN, Aggr::MAX}) { for (const Domain &domain: layout) { - auto expect = reference_reduce(input, {domain.dimension}, aggr); + auto ref_spec = ReferenceOperations::reduce(input, {domain.dimension}, aggr); + auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get())); auto actual = immediate_generic_reduce(input, {domain.dimension}, aggr); EXPECT_EQ(actual, expect); } - auto expect = reference_reduce(input, {}, aggr); + auto ref_spec = ReferenceOperations::reduce(input, {}, aggr); + auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get())); auto actual = immediate_generic_reduce(input, {}, aggr); EXPECT_EQ(actual, expect); } diff --git a/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp b/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp index b2e30a8b78c..a7e6b8d807b 100644 --- a/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp +++ b/eval/src/tests/instruction/generic_rename/generic_rename_test.cpp @@ -6,6 +6,7 @@ #include <vespa/eval/instruction/generic_rename.h> #include <vespa/eval/eval/interpreted_function.h> #include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/eval/test/reference_operations.h> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> @@ -98,20 +99,6 @@ vespalib::string rename_dimension(const vespalib::string &name, const FromTo &ft return name; } -TensorSpec reference_rename(const TensorSpec &a, const FromTo &ft) { - ValueType res_type = ValueType::from_spec(a.type()).rename(ft.from, ft.to); - EXPECT_FALSE(res_type.is_error()); - TensorSpec result(res_type.to_spec()); - for (const auto &cell: a.cells()) { - TensorSpec::Address addr; - for (const auto &dim: cell.first) { - addr.insert_or_assign(rename_dimension(dim.first, ft), dim.second); - } - result.add(addr, cell.second); - } - return result; -} - TensorSpec perform_generic_rename(const TensorSpec &a, const FromTo &ft, const ValueBuilderFactory &factory) { @@ -132,7 +119,7 @@ void test_generic_rename_with(const ValueBuilderFactory &factory) { if (renamed_type.is_error()) continue; // printf("type %s -> %s\n", lhs_type.to_spec().c_str(), renamed_type.to_spec().c_str()); SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str())); - auto expect = reference_rename(lhs, from_to); + auto expect = ReferenceOperations::rename(lhs, from_to.from, from_to.to); auto actual = perform_generic_rename(lhs, from_to, factory); EXPECT_EQ(actual, expect); } @@ -165,7 +152,7 @@ TEST(GenericRenameTest, immediate_generic_rename_works) { if (renamed_type.is_error()) continue; // printf("type %s -> %s\n", lhs_type.to_spec().c_str(), renamed_type.to_spec().c_str()); SCOPED_TRACE(fmt("\n===\nLHS: %s\n===\n", lhs.to_string().c_str())); - auto expect = reference_rename(lhs, from_to); + auto expect = ReferenceOperations::rename(lhs, from_to.from, from_to.to); auto actual = immediate_generic_rename(lhs, from_to); EXPECT_EQ(actual, expect); } diff --git a/eval/src/tests/streamed/value/CMakeLists.txt b/eval/src/tests/streamed/value/CMakeLists.txt new file mode 100644 index 00000000000..d2ccced8c14 --- /dev/null +++ b/eval/src/tests/streamed/value/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_streamed_value_test_app TEST + SOURCES + streamed_value_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_streamed_value_test_app COMMAND eval_streamed_value_test_app) diff --git a/eval/src/tests/streamed/value/streamed_value_test.cpp b/eval/src/tests/streamed/value/streamed_value_test.cpp new file mode 100644 index 00000000000..3de6ba0fb63 --- /dev/null +++ b/eval/src/tests/streamed/value/streamed_value_test.cpp @@ -0,0 +1,136 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/streamed/streamed_value_builder_factory.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/instruction/generic_join.h> +#include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::instruction; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +using PA = std::vector<vespalib::stringref *>; +using CPA = std::vector<const vespalib::stringref *>; + +std::vector<Layout> layouts = { + {}, + {x(3)}, + {x(3),y(5)}, + {x(3),y(5),z(7)}, + float_cells({x(3),y(5),z(7)}), + {x({"a","b","c"})}, + {x({"a","b","c"}),y({"foo","bar"})}, + {x({"a","b","c"}),y({"foo","bar"}),z({"i","j","k","l"})}, + float_cells({x({"a","b","c"}),y({"foo","bar"}),z({"i","j","k","l"})}), + {x(3),y({"foo", "bar"}),z(7)}, + {x({"a","b","c"}),y(5),z({"i","j","k","l"})}, + float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})}) +}; + +std::vector<Layout> join_layouts = { + {}, {}, + {x(5)}, {x(5)}, + {x(5)}, {y(5)}, + {x(5)}, {x(5),y(5)}, + {y(3)}, {x(2),z(3)}, + {x(3),y(5)}, {y(5),z(7)}, + float_cells({x(3),y(5)}), {y(5),z(7)}, + {x(3),y(5)}, float_cells({y(5),z(7)}), + float_cells({x(3),y(5)}), float_cells({y(5),z(7)}), + {x({"a","b","c"})}, {x({"a","b","c"})}, + {x({"a","b","c"})}, {x({"a","b"})}, + {x({"a","b","c"})}, {y({"foo","bar","baz"})}, + {x({"a","b","c"})}, {x({"a","b","c"}),y({"foo","bar","baz"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, {x({"a","b","c"}),y({"foo","bar"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, {y({"foo","bar"}),z({"i","j","k","l"})}, + float_cells({x({"a","b"}),y({"foo","bar","baz"})}), {y({"foo","bar"}),z({"i","j","k","l"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, float_cells({y({"foo","bar"}),z({"i","j","k","l"})}), + float_cells({x({"a","b"}),y({"foo","bar","baz"})}), float_cells({y({"foo","bar"}),z({"i","j","k","l"})}), + {x(3),y({"foo", "bar"})}, {y({"foo", "bar"}),z(7)}, + {x({"a","b","c"}),y(5)}, {y(5),z({"i","j","k","l"})}, + float_cells({x({"a","b","c"}),y(5)}), {y(5),z({"i","j","k","l"})}, + {x({"a","b","c"}),y(5)}, float_cells({y(5),z({"i","j","k","l"})}), + float_cells({x({"a","b","c"}),y(5)}), float_cells({y(5),z({"i","j","k","l"})}) +}; + +TensorSpec simple_tensor_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) { + Stash stash; + const auto &engine = SimpleTensorEngine::ref(); + auto lhs = engine.from_spec(a); + auto rhs = engine.from_spec(b); + const auto &result = engine.join(*lhs, *rhs, function, stash); + return engine.to_spec(result); +} + +TensorSpec streamed_value_new_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) { + Stash stash; + const auto &factory = StreamedValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + auto my_op = GenericJoin::make_instruction(lhs->type(), rhs->type(), function, factory, stash); + InterpretedFunction::EvalSingle single(factory, my_op); + return spec_from_value(single.eval(std::vector<Value::CREF>({*lhs,*rhs}))); +} + +TEST(StreamedValueTest, streamed_values_can_be_converted_from_and_to_tensor_spec) { + for (const auto &layout: layouts) { + TensorSpec expect = spec(layout, N()); + std::unique_ptr<Value> value = value_from_spec(expect, StreamedValueBuilderFactory::get()); + TensorSpec actual = spec_from_value(*value); + EXPECT_EQ(actual, expect); + } +} + +TEST(StreamedValueTest, streamed_value_can_be_built_and_inspected) { + ValueType type = ValueType::from_spec("tensor<float>(x{},y[2],z{})"); + const auto &factory = StreamedValueBuilderFactory::get(); + std::unique_ptr<ValueBuilder<float>> builder = factory.create_value_builder<float>(type); + float seq = 0.0; + for (vespalib::string x: {"a", "b", "c"}) { + for (vespalib::string y: {"aa", "bb"}) { + std::vector<vespalib::stringref> addr = {x, y}; + auto subspace = builder->add_subspace(addr); + EXPECT_EQ(subspace.size(), 2); + subspace[0] = seq + 1.0; + subspace[1] = seq + 5.0; + seq += 10.0; + } + seq += 100.0; + } + std::unique_ptr<Value> value = builder->build(std::move(builder)); + EXPECT_EQ(value->index().size(), 6); + auto view = value->index().create_view({0}); + vespalib::stringref query = "b"; + vespalib::stringref label; + size_t subspace; + view->lookup(CPA{&query}); + EXPECT_TRUE(view->next_result(PA{&label}, subspace)); + EXPECT_EQ(label, "aa"); + EXPECT_EQ(subspace, 2); + EXPECT_TRUE(view->next_result(PA{&label}, subspace)); + EXPECT_EQ(label, "bb"); + EXPECT_EQ(subspace, 3); + EXPECT_FALSE(view->next_result(PA{&label}, subspace)); +} + +TEST(StreamedValueTest, new_generic_join_works_for_streamed_values) { + ASSERT_TRUE((join_layouts.size() % 2) == 0); + for (size_t i = 0; i < join_layouts.size(); i += 2) { + TensorSpec lhs = spec(join_layouts[i], Div16(N())); + TensorSpec rhs = spec(join_layouts[i + 1], Div16(N())); + for (auto fun: {operation::Add::f, operation::Sub::f, operation::Mul::f, operation::Max::f}) { + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = simple_tensor_join(lhs, rhs, fun); + auto actual = streamed_value_new_join(lhs, rhs, fun); + EXPECT_EQ(actual, expect); + } + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp b/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp index 233aff0e425..6468f50a00e 100644 --- a/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp +++ b/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp @@ -3,11 +3,13 @@ #include <vespa/eval/eval/test/tensor_conformance.h> #include <vespa/eval/eval/simple_tensor_engine.h> #include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/streamed/streamed_value_builder_factory.h> #include <vespa/eval/eval/fast_value.h> #include <vespa/eval/tensor/default_tensor_engine.h> #include <vespa/vespalib/util/stringfmt.h> using vespalib::eval::SimpleValueBuilderFactory; +using vespalib::eval::StreamedValueBuilderFactory; using vespalib::eval::FastValueBuilderFactory; using vespalib::eval::SimpleTensorEngine; using vespalib::eval::test::TensorConformance; @@ -29,6 +31,10 @@ TEST("require that SimpleValue implementation passes all conformance tests") { TEST_DO(TensorConformance::run_tests(module_src_path, SimpleValueBuilderFactory::get())); } +TEST("require that StreamedValue implementation passes all conformance tests") { + TEST_DO(TensorConformance::run_tests(module_src_path, StreamedValueBuilderFactory::get())); +} + TEST("require that FastValue implementation passes all conformance tests") { TEST_DO(TensorConformance::run_tests(module_src_path, FastValueBuilderFactory::get())); } diff --git a/eval/src/vespa/eval/CMakeLists.txt b/eval/src/vespa/eval/CMakeLists.txt index 9173278473d..952640195b1 100644 --- a/eval/src/vespa/eval/CMakeLists.txt +++ b/eval/src/vespa/eval/CMakeLists.txt @@ -7,6 +7,7 @@ vespa_add_library(vespaeval $<TARGET_OBJECTS:eval_eval_test> $<TARGET_OBJECTS:eval_eval_value_cache> $<TARGET_OBJECTS:eval_gp> + $<TARGET_OBJECTS:eval_streamed> $<TARGET_OBJECTS:eval_tensor> $<TARGET_OBJECTS:eval_tensor_dense> $<TARGET_OBJECTS:eval_tensor_serialization> diff --git a/eval/src/vespa/eval/eval/cell_type.cpp b/eval/src/vespa/eval/eval/cell_type.cpp index e5729c547b0..365a3f59a56 100644 --- a/eval/src/vespa/eval/eval/cell_type.cpp +++ b/eval/src/vespa/eval/eval/cell_type.cpp @@ -1,3 +1,19 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "cell_type.h" +#include <stdio.h> +#include <cstdlib> +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/util/stringfmt.h> + +using vespalib::make_string_short::fmt; + +namespace vespalib::eval { + +void +CellTypeUtils::bad_argument(uint32_t id) +{ + throw IllegalArgumentException(fmt("Unknown CellType id=%u", id)); +} + +} diff --git a/eval/src/vespa/eval/eval/cell_type.h b/eval/src/vespa/eval/eval/cell_type.h index 0e878f26f47..49114d04bfe 100644 --- a/eval/src/vespa/eval/eval/cell_type.h +++ b/eval/src/vespa/eval/eval/cell_type.h @@ -3,7 +3,7 @@ #pragma once #include <vespa/vespalib/util/typify.h> -#include <cstdlib> +#include <cstdint> namespace vespalib::eval { @@ -25,6 +25,26 @@ template <typename CT> inline CellType get_cell_type(); template <> inline CellType get_cell_type<double>() { return CellType::DOUBLE; } template <> inline CellType get_cell_type<float>() { return CellType::FLOAT; } +struct CellTypeUtils { + static void bad_argument [[ noreturn ]] (uint32_t id); + + static constexpr uint32_t alignment(CellType cell_type) { + switch (cell_type) { + case CellType::DOUBLE: return sizeof(double); + case CellType::FLOAT: return sizeof(float); + } + bad_argument((uint32_t)cell_type); + } + + static constexpr size_t mem_size(CellType cell_type, size_t sz) { + switch (cell_type) { + case CellType::DOUBLE: return sz * sizeof(double); + case CellType::FLOAT: return sz * sizeof(float); + } + bad_argument((uint32_t)cell_type); + } +}; + struct TypifyCellType { template <typename T> using Result = TypifyResultType<T>; template <typename F> static decltype(auto) resolve(CellType value, F &&f) { @@ -32,7 +52,7 @@ struct TypifyCellType { case CellType::DOUBLE: return f(Result<double>()); case CellType::FLOAT: return f(Result<float>()); } - abort(); + CellTypeUtils::bad_argument((uint32_t)value); } }; diff --git a/eval/src/vespa/eval/eval/tensor_function.cpp b/eval/src/vespa/eval/eval/tensor_function.cpp index 77ca6c1b8f0..614ef8389d8 100644 --- a/eval/src/vespa/eval/eval/tensor_function.cpp +++ b/eval/src/vespa/eval/eval/tensor_function.cpp @@ -125,7 +125,7 @@ void op_tensor_create(State &state, uint64_t param) { const Create &self = unwrap_param<Create>(param); TensorSpec spec(self.result_type().to_spec()); size_t i = 0; - for (auto pos = self.spec().rbegin(); pos != self.spec().rend(); ++pos) { + for (auto pos = self.map().rbegin(); pos != self.map().rend(); ++pos) { spec.add(pos->first, state.peek(i++).as_double()); } const Value &result = *state.stash.create<Value::UP>(state.engine.from_spec(spec)); @@ -180,7 +180,7 @@ void op_tensor_peek(State &state, uint64_t param) { const Peek &self = unwrap_param<Peek>(param); TensorSpec::Address addr; size_t child_cnt = 0; - for (auto pos = self.spec().rbegin(); pos != self.spec().rend(); ++pos) { + for (auto pos = self.map().rbegin(); pos != self.map().rend(); ++pos) { std::visit(vespalib::overload { [&](const TensorSpec::Label &label) { @@ -388,21 +388,27 @@ Concat::visit_self(vespalib::ObjectVisitor &visitor) const void Create::push_children(std::vector<Child::CREF> &children) const { - for (const auto &cell: _spec) { + for (const auto &cell: _map) { children.emplace_back(cell.second); } } +Create::Spec +Create::make_spec() const +{ + Spec generic_spec; + size_t child_idx = 0; + for (const auto & kv : map()) { + generic_spec[kv.first] = child_idx++; + } + return generic_spec; +} + Instruction Create::compile_self(EngineOrFactory engine, Stash &stash) const { if (engine.is_factory()) { - std::map<TensorSpec::Address, size_t> generic_spec; - size_t child_idx = 0; - for (const auto & kv : spec()) { - generic_spec[kv.first] = child_idx++; - } - return instruction::GenericCreate::make_instruction(result_type(), generic_spec, engine.factory(), stash); + return instruction::GenericCreate::make_instruction(result_type(), make_spec(), engine.factory(), stash); } return Instruction(op_tensor_create, wrap_param<Create>(*this)); } @@ -410,7 +416,7 @@ Create::compile_self(EngineOrFactory engine, Stash &stash) const void Create::visit_children(vespalib::ObjectVisitor &visitor) const { - for (const auto &cell: _spec) { + for (const auto &cell: _map) { ::visit(visitor, ::vespalib::eval::as_string(cell.first), cell.second.get()); } } @@ -487,7 +493,7 @@ void Peek::push_children(std::vector<Child::CREF> &children) const { children.emplace_back(_param); - for (const auto &dim: _spec) { + for (const auto &dim: _map) { std::visit(vespalib::overload { [&](const Child &child) { @@ -498,23 +504,29 @@ Peek::push_children(std::vector<Child::CREF> &children) const } } +Peek::Spec +Peek::make_spec() const +{ + Spec generic_spec; + size_t child_idx = 0; + for (const auto & [dim_name, label_or_child] : map()) { + std::visit(vespalib::overload { + [&,&dim_name = dim_name](const TensorSpec::Label &label) { + generic_spec.emplace(dim_name, label); + }, + [&,&dim_name = dim_name](const TensorFunction::Child &) { + generic_spec.emplace(dim_name, child_idx++); + } + }, label_or_child); + } + return generic_spec; +} + Instruction Peek::compile_self(EngineOrFactory engine, Stash &stash) const { if (engine.is_factory()) { - instruction::GenericPeek::SpecMap generic_spec; - size_t child_idx = 0; - for (const auto & [dim_name, label_or_child] : spec()) { - std::visit(vespalib::overload { - [&,&dim_name = dim_name](const TensorSpec::Label &label) { - generic_spec.emplace(dim_name, label); - }, - [&,&dim_name = dim_name](const TensorFunction::Child &) { - generic_spec.emplace(dim_name, child_idx++); - } - }, label_or_child); - } - return instruction::GenericPeek::make_instruction(param_type(), result_type(), generic_spec, engine.factory(), stash); + return instruction::GenericPeek::make_instruction(param_type(), result_type(), make_spec(), engine.factory(), stash); } return Instruction(op_tensor_peek, wrap_param<Peek>(*this)); } @@ -523,7 +535,7 @@ void Peek::visit_children(vespalib::ObjectVisitor &visitor) const { ::visit(visitor, "param", _param.get()); - for (const auto &dim: _spec) { + for (const auto &dim: _map) { std::visit(vespalib::overload { [&](const TensorSpec::Label &label) { diff --git a/eval/src/vespa/eval/eval/tensor_function.h b/eval/src/vespa/eval/eval/tensor_function.h index d6158f8eb4a..3c4eb6c53a4 100644 --- a/eval/src/vespa/eval/eval/tensor_function.h +++ b/eval/src/vespa/eval/eval/tensor_function.h @@ -310,16 +310,19 @@ class Create : public Node { using Super = Node; private: - std::map<TensorSpec::Address, Child> _spec; + std::map<TensorSpec::Address, Child> _map; public: Create(const ValueType &result_type_in, const std::map<TensorSpec::Address, TensorFunction::CREF> &spec_in) - : Super(result_type_in), _spec() + : Super(result_type_in), _map() { for (const auto &cell: spec_in) { - _spec.emplace(cell.first, Child(cell.second)); + _map.emplace(cell.first, Child(cell.second)); } } - const std::map<TensorSpec::Address, Child> &spec() const { return _spec; } + const std::map<TensorSpec::Address, Child> &map() const { return _map; } + // mapping from cell address to index of child that computes the cell value + using Spec = std::map<TensorSpec::Address, size_t>; + Spec make_spec() const; bool result_is_mutable() const override { return true; } InterpretedFunction::Instruction compile_self(EngineOrFactory engine, Stash &stash) const final override; void push_children(std::vector<Child::CREF> &children) const final override; @@ -359,25 +362,30 @@ public: using MyLabel = std::variant<TensorSpec::Label, Child>; private: Child _param; - std::map<vespalib::string, MyLabel> _spec; + std::map<vespalib::string, MyLabel> _map; public: Peek(const ValueType &result_type_in, const TensorFunction ¶m, const std::map<vespalib::string, std::variant<TensorSpec::Label, TensorFunction::CREF>> &spec) - : Super(result_type_in), _param(param), _spec() + : Super(result_type_in), _param(param), _map() { for (const auto &dim: spec) { std::visit(vespalib::overload { [&](const TensorSpec::Label &label) { - _spec.emplace(dim.first, label); + _map.emplace(dim.first, label); }, [&](const TensorFunction::CREF &ref) { - _spec.emplace(dim.first, ref.get()); + _map.emplace(dim.first, ref.get()); } }, dim.second); } } - const std::map<vespalib::string, MyLabel> &spec() const { return _spec; } + const std::map<vespalib::string, MyLabel> &map() const { return _map; } + // a verbatim label or the index of a child that computes the label value: + using LabelOrChildIndex = std::variant<TensorSpec::Label, size_t>; + // mapping from dimension name to verbatim label or child index: + using Spec = std::map<vespalib::string, LabelOrChildIndex>; + Spec make_spec() const; const ValueType ¶m_type() const { return _param.get().result_type(); } bool result_is_mutable() const override { return true; } InterpretedFunction::Instruction compile_self(EngineOrFactory engine, Stash &stash) const final override; diff --git a/eval/src/vespa/eval/eval/test/CMakeLists.txt b/eval/src/vespa/eval/eval/test/CMakeLists.txt index 6e88beab9b7..f3b0750d503 100644 --- a/eval/src/vespa/eval/eval/test/CMakeLists.txt +++ b/eval/src/vespa/eval/eval/test/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(eval_eval_test OBJECT SOURCES eval_fixture.cpp eval_spec.cpp + reference_operations.cpp tensor_conformance.cpp test_io.cpp value_compare.cpp diff --git a/eval/src/vespa/eval/eval/test/reference_operations.cpp b/eval/src/vespa/eval/eval/test/reference_operations.cpp new file mode 100644 index 00000000000..89b99526d55 --- /dev/null +++ b/eval/src/vespa/eval/eval/test/reference_operations.cpp @@ -0,0 +1,287 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "reference_operations.h" +#include <vespa/vespalib/util/overload.h> +#include <vespa/vespalib/util/visit_ranges.h> +#include <vespa/vespalib/util/stash.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <cassert> + +namespace vespalib::eval { + +namespace { + +bool concat_address(const TensorSpec::Address &me, const TensorSpec::Address &other, + const std::string &concat_dim, size_t my_offset, + TensorSpec::Address &my_out, TensorSpec::Address &other_out) +{ + my_out.insert_or_assign(concat_dim, my_offset); + for (const auto &my_dim: me) { + const auto & name = my_dim.first; + const auto & label = my_dim.second; + if (name == concat_dim) { + my_out.insert_or_assign(name, label.index + my_offset); + } else { + auto pos = other.find(name); + if ((pos == other.end()) || (pos->second == label)) { + my_out.insert_or_assign(name, label); + other_out.insert_or_assign(name, label); + } else { + return false; + } + } + } + return true; +} + +bool concat_addresses(const TensorSpec::Address &a, const TensorSpec::Address &b, + const std::string &concat_dim, size_t b_offset, + TensorSpec::Address &a_out, TensorSpec::Address &b_out) +{ + return concat_address(a, b, concat_dim, 0, a_out, b_out) && + concat_address(b, a, concat_dim, b_offset, b_out, a_out); +} + +double value_from_child(const TensorSpec &child) { + double sum = 0.0; + for (const auto & [addr, value] : child.cells()) { + sum += value; + } + return sum; +} + +bool join_address(const TensorSpec::Address &a, const TensorSpec::Address &b, TensorSpec::Address &addr) { + for (const auto &dim_a: a) { + auto pos_b = b.find(dim_a.first); + if ((pos_b != b.end()) && !(pos_b->second == dim_a.second)) { + return false; + } + addr.insert_or_assign(dim_a.first, dim_a.second); + } + return true; +} + +vespalib::string rename_dimension(const vespalib::string &name, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to) { + for (size_t i = 0; i < from.size(); ++i) { + if (name == from[i]) { + return to[i]; + } + } + return name; +} + +} // namespace <unnamed> + + +TensorSpec ReferenceOperations::concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim) { + ValueType a_type = ValueType::from_spec(a.type()); + ValueType b_type = ValueType::from_spec(b.type()); + ValueType res_type = ValueType::concat(a_type, b_type, concat_dim); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + size_t b_offset = 1; + size_t concat_dim_index = a_type.dimension_index(concat_dim); + if (concat_dim_index != ValueType::Dimension::npos) { + const auto &dim = a_type.dimensions()[concat_dim_index]; + assert(dim.is_indexed()); // type resolving (above) should catch this + b_offset = dim.size; + } + for (const auto &cell_a: a.cells()) { + for (const auto &cell_b: b.cells()) { + TensorSpec::Address addr_a; + TensorSpec::Address addr_b; + if (concat_addresses(cell_a.first, cell_b.first, concat_dim, b_offset, addr_a, addr_b)) { + result.add(addr_a, cell_a.second); + result.add(addr_b, cell_b.second); + } + } + } + return result; +} + + +TensorSpec ReferenceOperations::create(const vespalib::string &type, const CreateSpec &spec, const std::vector<TensorSpec> &children) { + TensorSpec result(type); + if (ValueType::from_spec(type).is_error()) { + return result; + } + for (const auto & [addr, child_idx] : spec) { + assert(child_idx < children.size()); + const auto &child = children[child_idx]; + double val = value_from_child(child); + result.add(addr, val); + } + return result; +} + + +TensorSpec ReferenceOperations::join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) { + ValueType res_type = ValueType::join(ValueType::from_spec(a.type()), ValueType::from_spec(b.type())); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + for (const auto &cell_a: a.cells()) { + for (const auto &cell_b: b.cells()) { + TensorSpec::Address addr; + if (join_address(cell_a.first, cell_b.first, addr) && + join_address(cell_b.first, cell_a.first, addr)) + { + result.add(addr, function(cell_a.second, cell_b.second)); + } + } + } + return result; +} + + +TensorSpec ReferenceOperations::map(const TensorSpec &a, map_fun_t func) { + ValueType res_type = ValueType::from_spec(a.type()); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + for (const auto & [ addr, value ]: a.cells()) { + result.add(addr, func(value)); + } + return result; +} + + +TensorSpec ReferenceOperations::merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + ValueType res_type = ValueType::merge(ValueType::from_spec(a.type()), + ValueType::from_spec(b.type())); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + for (const auto & [ addr, value ]: a.cells()) { + auto other = b.cells().find(addr); + if (other == b.cells().end()) { + result.add(addr, value); + } else { + result.add(addr, fun(value, other->second)); + } + } + for (const auto & [ addr, value ]: b.cells()) { + auto other = a.cells().find(addr); + if (other == a.cells().end()) { + result.add(addr, value); + } + } + return result; +} + + +TensorSpec ReferenceOperations::peek(const TensorSpec ¶m, const PeekSpec &peek_spec, const std::vector<TensorSpec> &children) { + if (peek_spec.empty()) { + return TensorSpec(ValueType::error_type().to_spec()); + } + std::vector<vespalib::string> peek_dims; + for (const auto & [dim_name, label_or_child] : peek_spec) { + peek_dims.push_back(dim_name); + } + ValueType param_type = ValueType::from_spec(param.type()); + ValueType result_type = param_type.reduce(peek_dims); + TensorSpec result(result_type.to_spec()); + if (result_type.is_error()) { + return result; + } + auto is_mapped_dim = [&](const vespalib::string &name) { + size_t dim_idx = param_type.dimension_index(name); + assert(dim_idx != ValueType::Dimension::npos); + const auto ¶m_dim = param_type.dimensions()[dim_idx]; + return param_dim.is_mapped(); + }; + TensorSpec::Address addr; + for (const auto & [dim_name, label_or_child] : peek_spec) { + const vespalib::string &dim = dim_name; + std::visit(vespalib::overload + { + [&](const TensorSpec::Label &label) { + addr.emplace(dim, label); + }, + [&](const size_t &child_idx) { + assert(child_idx < children.size()); + const auto &child = children[child_idx]; + double child_value = value_from_child(child); + if (is_mapped_dim(dim)) { + addr.emplace(dim, vespalib::make_string("%zd", int64_t(child_value))); + } else { + addr.emplace(dim, child_value); + } + } + }, label_or_child); + } + for (const auto &cell: param.cells()) { + bool keep = true; + TensorSpec::Address my_addr; + for (const auto &binding: cell.first) { + auto pos = addr.find(binding.first); + if (pos == addr.end()) { + my_addr.emplace(binding.first, binding.second); + } else { + if (!(pos->second == binding.second)) { + keep = false; + } + } + } + if (keep) { + result.add(my_addr, cell.second); + } + } + return result; +} + + +TensorSpec ReferenceOperations::reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr) { + ValueType res_type = ValueType::from_spec(a.type()).reduce(dims); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + Stash stash; + std::map<TensorSpec::Address,std::optional<Aggregator*>> my_map; + for (const auto &cell: a.cells()) { + TensorSpec::Address addr; + for (const auto &dim: cell.first) { + if (res_type.dimension_index(dim.first) != ValueType::Dimension::npos) { + addr.insert_or_assign(dim.first, dim.second); + } + } + auto [pos, is_empty] = my_map.emplace(addr, std::nullopt); + if (is_empty) { + pos->second = &Aggregator::create(aggr, stash); + pos->second.value()->first(cell.second); + } else { + pos->second.value()->next(cell.second); + } + } + for (const auto &my_entry: my_map) { + result.add(my_entry.first, my_entry.second.value()->result()); + } + return result; +} + + +TensorSpec ReferenceOperations::rename(const TensorSpec &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to) { + assert(from.size() == to.size()); + ValueType res_type = ValueType::from_spec(a.type()).rename(from, to); + TensorSpec result(res_type.to_spec()); + if (res_type.is_error()) { + return result; + } + for (const auto &cell: a.cells()) { + TensorSpec::Address addr; + for (const auto &dim: cell.first) { + addr.insert_or_assign(rename_dimension(dim.first, from, to), dim.second); + } + result.add(addr, cell.second); + } + return result; +} + + +} // namespace diff --git a/eval/src/vespa/eval/eval/test/reference_operations.h b/eval/src/vespa/eval/eval/test/reference_operations.h new file mode 100644 index 00000000000..735454b486a --- /dev/null +++ b/eval/src/vespa/eval/eval/test/reference_operations.h @@ -0,0 +1,37 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/aggr.h> +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/tensor_function.h> + +#include <vector> +#include <map> +#include <variant> + +namespace vespalib::eval { + +struct ReferenceOperations { + using map_fun_t = vespalib::eval::operation::op1_t; + using join_fun_t = vespalib::eval::operation::op2_t; + + // mapping from cell address to index of child that computes the cell value + using CreateSpec = tensor_function::Create::Spec; + + // mapping from dimension name to verbatim label or child + using PeekSpec = tensor_function::Peek::Spec; + + static TensorSpec concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim); + static TensorSpec create(const vespalib::string &type, const CreateSpec &spec, const std::vector<TensorSpec> &children); + static TensorSpec join(const TensorSpec &a, const TensorSpec &b, join_fun_t function); + static TensorSpec map(const TensorSpec &a, map_fun_t func); + static TensorSpec merge(const TensorSpec &a, const TensorSpec &b, join_fun_t fun); + static TensorSpec peek(const TensorSpec ¶m, const PeekSpec &spec, const std::vector<TensorSpec> &children); + static TensorSpec reduce(const TensorSpec &a, const std::vector<vespalib::string> &dims, Aggr aggr); + static TensorSpec rename(const TensorSpec &a, const std::vector<vespalib::string> &from, const std::vector<vespalib::string> &to); +}; + +} // namespace diff --git a/eval/src/vespa/eval/instruction/dense_tensor_peek_function.cpp b/eval/src/vespa/eval/instruction/dense_tensor_peek_function.cpp index daad4da947b..fd93cd62fa9 100644 --- a/eval/src/vespa/eval/instruction/dense_tensor_peek_function.cpp +++ b/eval/src/vespa/eval/instruction/dense_tensor_peek_function.cpp @@ -75,10 +75,10 @@ DenseTensorPeekFunction::optimize(const TensorFunction &expr, Stash &stash) const ValueType &peek_type = peek->param_type(); if (expr.result_type().is_double() && peek_type.is_dense()) { std::vector<std::pair<int64_t,size_t>> spec; - assert(peek_type.dimensions().size() == peek->spec().size()); + assert(peek_type.dimensions().size() == peek->map().size()); for (auto dim = peek_type.dimensions().rbegin(); dim != peek_type.dimensions().rend(); ++dim) { - auto dim_spec = peek->spec().find(dim->name); - assert(dim_spec != peek->spec().end()); + auto dim_spec = peek->map().find(dim->name); + assert(dim_spec != peek->map().end()); std::visit(vespalib::overload { diff --git a/eval/src/vespa/eval/instruction/generic_create.h b/eval/src/vespa/eval/instruction/generic_create.h index dc3cebc1086..dfd858613fe 100644 --- a/eval/src/vespa/eval/instruction/generic_create.h +++ b/eval/src/vespa/eval/instruction/generic_create.h @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_type.h> #include <vespa/eval/eval/tensor_spec.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/tensor_function.h> #include <map> namespace vespalib { class Stash; } @@ -15,7 +16,8 @@ namespace vespalib::eval::instruction { //----------------------------------------------------------------------------- struct GenericCreate { - using SpecMap = std::map<TensorSpec::Address, size_t>; + // mapping from cell address to index of child that computes the cell value + using SpecMap = tensor_function::Create::Spec; static InterpretedFunction::Instruction make_instruction(const ValueType &res_type, diff --git a/eval/src/vespa/eval/instruction/generic_peek.cpp b/eval/src/vespa/eval/instruction/generic_peek.cpp index 5802a60d43a..d8ae9241f44 100644 --- a/eval/src/vespa/eval/instruction/generic_peek.cpp +++ b/eval/src/vespa/eval/instruction/generic_peek.cpp @@ -35,7 +35,7 @@ size_t count_children(const Spec &spec) struct DimSpec { vespalib::stringref name; - GenericPeek::MyLabel child_or_label; + GenericPeek::SpecMap::mapped_type child_or_label; bool has_child() const { return std::holds_alternative<size_t>(child_or_label); } diff --git a/eval/src/vespa/eval/instruction/generic_peek.h b/eval/src/vespa/eval/instruction/generic_peek.h index d31b47238cb..3fe7aa9d270 100644 --- a/eval/src/vespa/eval/instruction/generic_peek.h +++ b/eval/src/vespa/eval/instruction/generic_peek.h @@ -5,6 +5,7 @@ #include <vespa/eval/eval/value_type.h> #include <vespa/eval/eval/tensor_spec.h> #include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/tensor_function.h> #include <map> namespace vespalib { class Stash; } @@ -15,8 +16,8 @@ namespace vespalib::eval::instruction { //----------------------------------------------------------------------------- struct GenericPeek { - using MyLabel = std::variant<TensorSpec::Label, size_t>; - using SpecMap = std::map<vespalib::string, MyLabel>; + // mapping from dimension name to verbatim label or child + using SpecMap = tensor_function::Peek::Spec; static InterpretedFunction::Instruction make_instruction(const ValueType &input_type, diff --git a/eval/src/vespa/eval/streamed/CMakeLists.txt b/eval/src/vespa/eval/streamed/CMakeLists.txt new file mode 100644 index 00000000000..ee928d7b2c9 --- /dev/null +++ b/eval/src/vespa/eval/streamed/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +vespa_add_library(eval_streamed OBJECT + SOURCES + streamed_value.cpp + streamed_value_index.cpp + streamed_value_utils.cpp + streamed_value_builder.cpp + streamed_value_builder_factory.cpp + streamed_value_view.cpp +) diff --git a/eval/src/vespa/eval/streamed/streamed_value.cpp b/eval/src/vespa/eval/streamed/streamed_value.cpp new file mode 100644 index 00000000000..bdfe5fd4e27 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value.cpp @@ -0,0 +1,28 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value.h" +#include <vespa/log/log.h> + +LOG_SETUP(".vespalib.eval.streamed.streamed_value"); + +namespace vespalib::eval { + +template <typename T> +StreamedValue<T>::~StreamedValue() = default; + +template <typename T> +MemoryUsage +StreamedValue<T>::get_memory_usage() const +{ + MemoryUsage usage = self_memory_usage<StreamedValue<T>>(); + usage.merge(vector_extra_memory_usage(_my_cells)); + usage.incUsedBytes(_label_buf.byteSize()); + usage.incAllocatedBytes(_label_buf.byteCapacity()); + return usage; +} + +template class StreamedValue<double>; +template class StreamedValue<float>; + +} // namespace + diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h new file mode 100644 index 00000000000..258802a53e8 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value.h @@ -0,0 +1,48 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/value.h> +#include "streamed_value_index.h" +#include <cassert> + +namespace vespalib::eval { + +/** + * A very simple Value implementation. + * Cheap to construct from serialized data, + * and cheap to serialize or iterate through. + * Slow for full or partial lookups. + **/ +template <typename T> +class StreamedValue : public Value +{ +private: + ValueType _type; + std::vector<T> _my_cells; + Array<char> _label_buf; + StreamedValueIndex _my_index; + +public: + StreamedValue(ValueType type, size_t num_mapped_dimensions, + std::vector<T> cells, size_t num_subspaces, Array<char> && label_buf) + : _type(std::move(type)), + _my_cells(std::move(cells)), + _label_buf(std::move(label_buf)), + _my_index(num_mapped_dimensions, + num_subspaces, + ConstArrayRef<char>(_label_buf.begin(), _label_buf.size())) + { + assert(num_subspaces * _type.dense_subspace_size() == _my_cells.size()); + } + + ~StreamedValue(); + const ValueType &type() const final override { return _type; } + TypedCells cells() const final override { return TypedCells(_my_cells); } + const Value::Index &index() const final override { return _my_index; } + MemoryUsage get_memory_usage() const final override; + auto get_data_reference() const { return _my_index.get_data_reference(); } +}; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder.cpp new file mode 100644 index 00000000000..957121c42b7 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder.cpp @@ -0,0 +1,13 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_builder.h" + +namespace vespalib::eval { + +template<typename T> +StreamedValueBuilder<T>::~StreamedValueBuilder() = default; + +template class StreamedValueBuilder<double>; +template class StreamedValueBuilder<float>; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.h b/eval/src/vespa/eval/streamed/streamed_value_builder.h new file mode 100644 index 00000000000..5698c805756 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder.h @@ -0,0 +1,66 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "streamed_value.h" +#include <vespa/vespalib/objects/nbostream.h> + +namespace vespalib::eval { + + /** + * Builder for StreamedValue objects. + **/ +template <typename T> +class StreamedValueBuilder : public ValueBuilder<T> +{ +private: + ValueType _type; + size_t _num_mapped_dimensions; + size_t _dense_subspace_size; + std::vector<T> _cells; + size_t _num_subspaces; + nbostream _labels; +public: + StreamedValueBuilder(const ValueType &type, + size_t num_mapped_in, + size_t subspace_size_in, + size_t expected_subspaces) + : _type(type), + _num_mapped_dimensions(num_mapped_in), + _dense_subspace_size(subspace_size_in), + _cells(), + _num_subspaces(0), + _labels() + { + _cells.reserve(subspace_size_in * expected_subspaces); + // assume small sized label strings: + _labels.reserve(num_mapped_in * expected_subspaces * 3); + }; + + ~StreamedValueBuilder(); + + ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override { + for (auto label : addr) { + _labels.writeSmallString(label); + } + size_t old_sz = _cells.size(); + _cells.resize(old_sz + _dense_subspace_size); + _num_subspaces++; + return ArrayRef<T>(&_cells[old_sz], _dense_subspace_size); + } + + std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>>) override { + if (_num_mapped_dimensions == 0) { + assert(_num_subspaces == 1); + } + assert(_num_subspaces * _dense_subspace_size == _cells.size()); + return std::make_unique<StreamedValue<T>>(std::move(_type), + _num_mapped_dimensions, + std::move(_cells), + _num_subspaces, + _labels.extract_buffer()); + } + +}; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp new file mode 100644 index 00000000000..aa6347a2c51 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp @@ -0,0 +1,36 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_builder_factory.h" +#include "streamed_value_builder.h" + +namespace vespalib::eval { + +struct SelectStreamedValueBuilder { + template <typename T> + static std::unique_ptr<ValueBuilderBase> invoke( + const ValueType &type, size_t num_mapped, + size_t subspace_size, size_t expected_subspaces) + { + assert(check_cell_type<T>(type.cell_type())); + return std::make_unique<StreamedValueBuilder<T>>( + type, num_mapped, subspace_size, expected_subspaces); + } +}; + +std::unique_ptr<ValueBuilderBase> +StreamedValueBuilderFactory::create_value_builder_base(const ValueType &type, + size_t num_mapped, + size_t subspace_size, + size_t expected_subspaces) const +{ + return typify_invoke<1,TypifyCellType,SelectStreamedValueBuilder>( + type.cell_type(), + type, num_mapped, subspace_size, expected_subspaces); +} + +StreamedValueBuilderFactory::~StreamedValueBuilderFactory() = default; +StreamedValueBuilderFactory StreamedValueBuilderFactory::_factory; + +} // namespace + + diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h new file mode 100644 index 00000000000..3f81981f429 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h @@ -0,0 +1,24 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "streamed_value.h" + +namespace vespalib::eval { + +/** + * A factory that can generate appropriate ValueBuilder instances + */ +struct StreamedValueBuilderFactory : ValueBuilderFactory { +private: + StreamedValueBuilderFactory() {} + static StreamedValueBuilderFactory _factory; + std::unique_ptr<ValueBuilderBase> create_value_builder_base( + const ValueType &type, size_t num_mapped_in, + size_t subspace_size_in, size_t expected_subspaces) const override; +public: + static const StreamedValueBuilderFactory &get() { return _factory; } + ~StreamedValueBuilderFactory(); +}; + +} diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.cpp b/eval/src/vespa/eval/streamed/streamed_value_index.cpp new file mode 100644 index 00000000000..38b57e9c660 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_index.cpp @@ -0,0 +1,100 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_index.h" +#include "streamed_value_utils.h" + +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/visit_ranges.h> +#include <vespa/log/log.h> + +LOG_SETUP(".searchlib.tensor.streamed_value_index"); + +namespace vespalib::eval { + +namespace { + +struct StreamedFilterView : Value::Index::View +{ + LabelBlockStream label_blocks; + std::vector<size_t> view_dims; + std::vector<vespalib::stringref> to_match; + + StreamedFilterView(LabelBlockStream labels, std::vector<size_t> view_dims_in) + : label_blocks(std::move(labels)), + view_dims(std::move(view_dims_in)), + to_match() + { + to_match.reserve(view_dims.size()); + } + + void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + label_blocks.reset(); + to_match.clear(); + for (auto ptr : addr) { + to_match.push_back(*ptr); + } + assert(view_dims.size() == to_match.size()); + } + + bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { + while (const auto block = label_blocks.next_block()) { + idx_out = block.ss_idx; + bool matches = true; + size_t out_idx = 0; + size_t vdm_idx = 0; + for (size_t dim = 0; dim < block.address.size(); ++dim) { + if (vdm_idx < view_dims.size() && (view_dims[vdm_idx] == dim)) { + matches &= (block.address[dim] == to_match[vdm_idx++]); + } else { + *addr_out[out_idx++] = block.address[dim]; + } + } + assert(out_idx == addr_out.size()); + assert(vdm_idx == view_dims.size()); + if (matches) return true; + } + return false; + } +}; + +struct StreamedIterationView : Value::Index::View +{ + LabelBlockStream label_blocks; + + StreamedIterationView(LabelBlockStream labels) + : label_blocks(std::move(labels)) + {} + + void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + label_blocks.reset(); + assert(addr.size() == 0); + } + + bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { + if (auto block = label_blocks.next_block()) { + idx_out = block.ss_idx; + size_t i = 0; + assert(addr_out.size() == block.address.size()); + for (auto ptr : addr_out) { + *ptr = block.address[i++]; + } + return true; + } + return false; + } +}; + +} // namespace <unnamed> + +std::unique_ptr<Value::Index::View> +StreamedValueIndex::create_view(const std::vector<size_t> &dims) const +{ + LabelBlockStream label_stream(_data.num_subspaces, _data.labels_buffer, _data.num_mapped_dims); + if (dims.empty()) { + return std::make_unique<StreamedIterationView>(std::move(label_stream)); + } + return std::make_unique<StreamedFilterView>(std::move(label_stream), dims); +} + +} // namespace vespalib::eval diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h new file mode 100644 index 00000000000..8fd561200c3 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_index.h @@ -0,0 +1,36 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value.h> + +namespace vespalib::eval { + + /** + * Implements Value::Index by reading a stream of serialized + * labels. + **/ +class StreamedValueIndex : public Value::Index +{ +public: + struct SerializedDataRef { + uint32_t num_mapped_dims; + uint32_t num_subspaces; + ConstArrayRef<char> labels_buffer; + }; + StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, ConstArrayRef<char> labels_buf) + : _data{num_mapped_dims, num_subspaces, labels_buf} + {} + + // index API: + size_t size() const override { return _data.num_subspaces; } + std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override; + + SerializedDataRef get_data_reference() const { return _data; } + +private: + SerializedDataRef _data; +}; + +} // namespace + diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.cpp b/eval/src/vespa/eval/streamed/streamed_value_utils.cpp new file mode 100644 index 00000000000..1b4a91a9080 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.cpp @@ -0,0 +1,9 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_utils.h" + +namespace vespalib::eval { + +LabelBlockStream::~LabelBlockStream() = default; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h new file mode 100644 index 00000000000..3e3da82dd22 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h @@ -0,0 +1,76 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value.h> +#include <vespa/vespalib/objects/nbostream.h> + +namespace vespalib::eval { + +/** + * Reads a stream of serialized labels. + * Reading more labels than available will + * throw an exception. + **/ +struct LabelStream { + nbostream source; + LabelStream(ConstArrayRef<char> data) : source(data.begin(), data.size()) {} + vespalib::stringref next_label() { + size_t str_size = source.getInt1_4Bytes(); + vespalib::stringref label(source.peek(), str_size); + source.adjustReadPos(str_size); + return label; + } + void reset() { source.rp(0); } +}; + +/** + * Represents an address (set of labels) mapping to a subspace index + **/ +struct LabelBlock { + static constexpr size_t npos = -1; + size_t ss_idx; + ConstArrayRef<vespalib::stringref> address; + operator bool() const { return ss_idx != npos; } +}; + +/** + * Utility for reading a buffer with serialized labels + * as a stream of LabelBlock objects. + **/ +class LabelBlockStream { +private: + size_t _num_subspaces; + LabelStream _labels; + size_t _subspace_index; + std::vector<vespalib::stringref> _current_address; +public: + LabelBlock next_block() { + if (_subspace_index < _num_subspaces) { + for (auto & label : _current_address) { + label = _labels.next_label(); + } + return LabelBlock{_subspace_index++, _current_address}; + } else { + return LabelBlock{LabelBlock::npos, {}}; + } + } + + void reset() { + _subspace_index = 0; + _labels.reset(); + } + + LabelBlockStream(uint32_t num_subspaces, + ConstArrayRef<char> label_buf, + uint32_t num_mapped_dims) + : _num_subspaces(num_subspaces), + _labels(label_buf), + _subspace_index(num_subspaces), + _current_address(num_mapped_dims) + {} + + ~LabelBlockStream(); +}; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.cpp b/eval/src/vespa/eval/streamed/streamed_value_view.cpp new file mode 100644 index 00000000000..87e1e676692 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_view.cpp @@ -0,0 +1,9 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_view.h" + +namespace vespalib::eval { + +StreamedValueView::~StreamedValueView() = default; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h new file mode 100644 index 00000000000..e37f442dd9a --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_view.h @@ -0,0 +1,45 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/value.h> +#include "streamed_value_index.h" +#include <cassert> + +namespace vespalib::eval { + + /** + * Same characteristics as StreamedValue, but does not + * own its data - refers to type, cells and serialized + * labels that must be kept outside the Value. + **/ +class StreamedValueView : public Value +{ +private: + const ValueType &_type; + TypedCells _cells_ref; + StreamedValueIndex _my_index; + +public: + StreamedValueView(const ValueType &type, size_t num_mapped_dimensions, + TypedCells cells, size_t num_subspaces, + ConstArrayRef<char> labels_buf) + : _type(type), + _cells_ref(cells), + _my_index(num_mapped_dimensions, num_subspaces, labels_buf) + { + assert(num_subspaces * _type.dense_subspace_size() == _cells_ref.size); + } + + ~StreamedValueView(); + const ValueType &type() const final override { return _type; } + TypedCells cells() const final override { return _cells_ref; } + const Value::Index &index() const final override { return _my_index; } + MemoryUsage get_memory_usage() const final override { + return self_memory_usage<StreamedValueView>(); + } + auto get_data_reference() const { return _my_index.get_data_reference(); } +}; + +} // namespace diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp index f5ab49df816..4b7f4936815 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_create_function.cpp @@ -85,7 +85,7 @@ DenseTensorCreateFunction::optimize(const eval::TensorFunction &expr, Stash &sta const auto &zero_value = stash.create<DoubleValue>(0.0); const auto &zero_node = const_value(zero_value, stash); std::vector<Child> children(num_cells, zero_node); - for (const auto &cell: create->spec()) { + for (const auto &cell: create->map()) { size_t cell_idx = get_index(cell.first, expr.result_type()); children[cell_idx] = cell.second; } |