From eb5cf28c586205a1c60df31583252cf4d3a8a030 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Tue, 22 Aug 2023 13:41:47 +0000 Subject: Add modify operation that creates non-existing sub-spaces with default cell values. --- .../partial_modify/partial_modify_test.cpp | 57 ++++++++++ .../document/update/tensor_partial_update.cpp | 115 ++++++++++++++++++--- .../vespa/document/update/tensor_partial_update.h | 9 ++ 3 files changed, 168 insertions(+), 13 deletions(-) (limited to 'document') diff --git a/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp index 47ef9f21a27..bf0f893b901 100644 --- a/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp +++ b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp @@ -60,12 +60,35 @@ Value::UP try_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_ return TensorPartialUpdate::modify(*lhs, fun, *rhs, factory); } +Value::UP try_partial_modify_with_defaults(const TensorSpec &a, const TensorSpec &b, join_fun_t fun, double default_cell_value) { + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + return TensorPartialUpdate::modify_with_defaults(*lhs, fun, *rhs, default_cell_value, factory); +} + TensorSpec perform_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { auto up = try_partial_modify(a, b, fun); EXPECT_TRUE(up); return spec_from_value(*up); } +TensorSpec perform_partial_modify_with_defaults(const TensorSpec &a, const TensorSpec &b, join_fun_t fun, double default_cell_value) { + auto up = try_partial_modify_with_defaults(a, b, fun, default_cell_value); + EXPECT_TRUE(up); + return spec_from_value(*up); +} + +void expect_modify_with_defaults(const vespalib::string& lhs_expr, const vespalib::string& rhs_expr, + join_fun_t fun, double default_cell_value, const vespalib::string& exp_expr) { + auto lhs = TensorSpec::from_expr(lhs_expr); + auto rhs = TensorSpec::from_expr(rhs_expr); + auto exp = TensorSpec::from_expr(exp_expr); + auto act = perform_partial_modify_with_defaults(lhs, rhs, fun, default_cell_value); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + EXPECT_EQ(exp, act); +} + TEST(PartialModifyTest, partial_modify_works_for_simple_values) { for (const auto &layouts: modify_layouts) { for (auto lhs_ct: CellTypeUtils::list_types()) { @@ -87,6 +110,27 @@ TEST(PartialModifyTest, partial_modify_works_for_simple_values) { } } +TEST(PartialModifyTest, partial_modify_with_defauls) { + expect_modify_with_defaults("tensor(x{}):{{x:\"a\"}:1,{x:\"b\"}:2}", + "tensor(x{}):{{x:\"b\"}:3}", + operation::Add::f, 0.0, + "tensor(x{}):{{x:\"a\"}:1,{x:\"b\"}:5}"); + + expect_modify_with_defaults("tensor(x{}):{{x:\"a\"}:1,{x:\"b\"}:2}", + "tensor(x{}):{{x:\"b\"}:3,{x:\"c\"}:4}", + operation::Add::f, 0.0, + "tensor(x{}):{{x:\"a\"}:1,{x:\"b\"}:5,{x:\"c\"}:4}"); + + expect_modify_with_defaults("tensor(x{},y[3]):{{x:\"a\",y:0}:3,{x:\"a\",y:1}:4,{x:\"a\",y:2}:5}", + "tensor(x{},y{}):{{x:\"a\",y:\"0\"}:6," + "{x:\"b\",y:\"1\"}:7,{x:\"b\",y:\"2\"}:8," + "{x:\"c\",y:\"0\"}:9}", + operation::Add::f, 1.0, + "tensor(x{},y[3]):{{x:\"a\",y:0}:9,{x:\"a\",y:1}:4,{x:\"a\",y:2}:5," + "{x:\"b\",y:0}:1,{x:\"b\",y:1}:8,{x:\"b\",y:2}:9," + "{x:\"c\",y:0}:10,{x:\"c\",y:1}:1,{x:\"c\",y:2}:1}"); +} + std::vector> bad_layouts = { { "x3", "x3" }, { "x3y4_1", "x3y4_1" }, @@ -108,4 +152,17 @@ TEST(PartialModifyTest, partial_modify_returns_nullptr_on_invalid_inputs) { } } +TEST(PartialModifyTest, partial_modify_with_defaults_returns_nullptr_on_invalid_inputs) { + for (const auto &layouts: bad_layouts) { + TensorSpec lhs = GenSpec::from_desc(layouts.first).seq(N()); + TensorSpec rhs = GenSpec::from_desc(layouts.second).seq(Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + for (auto fun: {operation::Add::f}) { + auto actual = try_partial_modify_with_defaults(lhs, rhs, fun, 0.0); + auto expect = Value::UP(); + EXPECT_EQ(actual, expect); + } + } +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/document/src/vespa/document/update/tensor_partial_update.cpp b/document/src/vespa/document/update/tensor_partial_update.cpp index 9c3db0edb5f..72bcc044977 100644 --- a/document/src/vespa/document/update/tensor_partial_update.cpp +++ b/document/src/vespa/document/update/tensor_partial_update.cpp @@ -1,11 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "tensor_partial_update.h" +#include #include #include +#include #include #include -#include #include #include @@ -203,27 +204,27 @@ struct PerformModify { static Value::UP invoke(const Value &input, join_fun_t function, const Value &modifier, - const ValueBuilderFactory &factory); + const ValueBuilderFactory &factory, + AddressHandler& handler, + Value::UP output); }; template Value::UP -PerformModify::invoke(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory) +PerformModify::invoke(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory, + AddressHandler& handler, Value::UP output) { const ValueType &input_type = input.type(); const size_t dsss = input_type.dense_subspace_size(); - const ValueType &modifier_type = modifier.type(); - AddressHandler handler(input_type, modifier_type); - if (! handler.valid) { - return {}; + if (!output) { + // copy input to output + output = copy_tensor(input, input_type, handler.for_output, factory); } - // copy input to output - auto out = copy_tensor(input, input_type, handler.for_output, factory); // need to overwrite some cells - auto output_cells = unconstify(out->cells().template typify()); + auto output_cells = unconstify(output->cells().template typify()); const auto modifier_cells = modifier.cells().typify(); auto modifier_view = modifier.index().create_view({}); - auto lookup_view = out->index().create_view(handler.for_output.lookup_view_dims); + auto lookup_view = output->index().create_view(handler.for_output.lookup_view_dims); modifier_view->lookup({}); size_t modifier_subspace_index; while (modifier_view->next_result(handler.from_modifier.next_result_refs, modifier_subspace_index)) { @@ -242,7 +243,70 @@ PerformModify::invoke(const Value &input, join_fun_t function, const Value &modi dst[dense_idx] = function(lhs, rhs); } } - return out; + return output; +} + +void +find_sub_spaces_not_in_input(const Value& input, const Value& modifier, double default_cell_value, + AddressHandler& handler, ArrayArrayMap& sub_spaces_result) +{ + auto lookup_view = input.index().create_view(handler.for_output.lookup_view_dims); + auto modifier_view = modifier.index().create_view({}); + modifier_view->lookup({}); + size_t modifier_subspace_index; + while (modifier_view->next_result(handler.from_modifier.next_result_refs, modifier_subspace_index)) { + handler.handle_address(); + size_t dense_idx = handler.dense_converter.get_dense_index(); + if (dense_idx == npos()) { + continue; + } + lookup_view->lookup(handler.for_output.lookup_refs); + size_t output_subspace_index; + if (!lookup_view->next_result({}, output_subspace_index)) { + ConstArrayRef addr(handler.for_output.addr); + auto [tag, inserted] = sub_spaces_result.lookup_or_add_entry(addr); + if (inserted) { + auto values = sub_spaces_result.get_values(tag); + for (size_t i = 0; i < values.size(); ++i) { + values[i] = default_cell_value; + } + } + } + } +} + +struct PerformInsertSubspaces { + template + static Value::UP invoke(const Value& input, + SparseCoords& output_addrs, + const ArrayArrayMap& sub_spaces, + const ValueBuilderFactory& factory); +}; + +template +Value::UP +PerformInsertSubspaces::invoke(const Value& input, + SparseCoords& output_addrs, + const ArrayArrayMap& sub_spaces, + const ValueBuilderFactory& factory) +{ + const auto& input_type = input.type(); + const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); + const size_t dsss = input_type.dense_subspace_size(); + const size_t expected_subspaces = input.index().size() + sub_spaces.size(); + auto builder = factory.create_value_builder(input_type, num_mapped_in_input, dsss, expected_subspaces); + auto no_filter = [] (const auto&, size_t) { + return true; + }; + copy_tensor_with_filter(input, dsss, output_addrs, *builder, no_filter); + sub_spaces.each_entry([&](vespalib::ConstArrayRef keys, vespalib::ConstArrayRef values) { + auto dst = builder->add_subspace(keys).begin(); + assert(dsss == values.size()); + for (size_t i = 0; i < dsss; ++i) { + dst[i] = values[i]; + } + }); + return builder->build(std::move(builder)); } //----------------------------------------------------------------------------- @@ -398,9 +462,34 @@ Value::UP TensorPartialUpdate::modify(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory) { + AddressHandler handler(input.type(), modifier.type()); + if (!handler.valid) { + return {}; + } + return typify_invoke<2, TypifyCellType, PerformModify>( + input.cells().type, modifier.cells().type, + input, function, modifier, factory, handler, Value::UP()); +} + +Value::UP +TensorPartialUpdate::modify_with_defaults(const Value& input, join_fun_t function, + const Value& modifier, double default_cell_value, const ValueBuilderFactory& factory) +{ + AddressHandler handler(input.type(), modifier.type()); + if (!handler.valid) { + return {}; + } + const size_t dsss = input.type().dense_subspace_size(); + ArrayArrayMap sub_spaces(handler.for_output.addr.size(), dsss, modifier.index().size()); + find_sub_spaces_not_in_input(input, modifier, default_cell_value, handler, sub_spaces); + Value::UP output; + if (sub_spaces.size() > 0) { + output = typify_invoke<1, TypifyCellType, PerformInsertSubspaces>( + input.cells().type, input, handler.for_output, sub_spaces, factory); + } return typify_invoke<2, TypifyCellType, PerformModify>( input.cells().type, modifier.cells().type, - input, function, modifier, factory); + input, function, modifier, factory, handler, std::move(output)); } Value::UP diff --git a/document/src/vespa/document/update/tensor_partial_update.h b/document/src/vespa/document/update/tensor_partial_update.h index 196b14f6f5c..f3069d59a9b 100644 --- a/document/src/vespa/document/update/tensor_partial_update.h +++ b/document/src/vespa/document/update/tensor_partial_update.h @@ -23,6 +23,15 @@ struct TensorPartialUpdate { static Value::UP modify(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory); + /** + * Make a copy of the input and add all dense sub-spaces (with default cell value) exising only in the modifier. + * Then apply function(oldvalue, modifier.cellvalue) to the cells that exist in the modifier. + * The modifier type must be sparse with exactly the same dimension names as the input type. + * Returns null pointer if this constraint is violated. + **/ + static Value::UP modify_with_defaults(const Value& input, join_fun_t function, + const Value& modifier, double default_cell_value, const ValueBuilderFactory& factory); + /** * Make a copy of the input, but add or overwrite cells from add_cells. * Requires same type for input and add_cells. -- cgit v1.2.3