summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-08-22 13:41:47 +0000
committerGeir Storli <geirst@yahooinc.com>2023-08-22 13:41:47 +0000
commiteb5cf28c586205a1c60df31583252cf4d3a8a030 (patch)
tree3b43f5eb5946f3bf93879cd47ffc064ca26b6911 /document
parent3ed739122e2bd5f029a7a68cfa99dafaa6ddefd5 (diff)
Add modify operation that creates non-existing sub-spaces with default cell values.
Diffstat (limited to 'document')
-rw-r--r--document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp57
-rw-r--r--document/src/vespa/document/update/tensor_partial_update.cpp115
-rw-r--r--document/src/vespa/document/update/tensor_partial_update.h9
3 files changed, 168 insertions, 13 deletions
diff --git a/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp
index 47ef9f21a27..bf0f893b901 100644
--- a/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp
+++ b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp
@@ -60,12 +60,35 @@ Value::UP try_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_
return TensorPartialUpdate::modify(*lhs, fun, *rhs, factory);
}
+Value::UP try_partial_modify_with_defaults(const TensorSpec &a, const TensorSpec &b, join_fun_t fun, double default_cell_value) {
+ const auto &factory = SimpleValueBuilderFactory::get();
+ auto lhs = value_from_spec(a, factory);
+ auto rhs = value_from_spec(b, factory);
+ return TensorPartialUpdate::modify_with_defaults(*lhs, fun, *rhs, default_cell_value, factory);
+}
+
TensorSpec perform_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) {
auto up = try_partial_modify(a, b, fun);
EXPECT_TRUE(up);
return spec_from_value(*up);
}
+TensorSpec perform_partial_modify_with_defaults(const TensorSpec &a, const TensorSpec &b, join_fun_t fun, double default_cell_value) {
+ auto up = try_partial_modify_with_defaults(a, b, fun, default_cell_value);
+ EXPECT_TRUE(up);
+ return spec_from_value(*up);
+}
+
+void expect_modify_with_defaults(const vespalib::string& lhs_expr, const vespalib::string& rhs_expr,
+ join_fun_t fun, double default_cell_value, const vespalib::string& exp_expr) {
+ auto lhs = TensorSpec::from_expr(lhs_expr);
+ auto rhs = TensorSpec::from_expr(rhs_expr);
+ auto exp = TensorSpec::from_expr(exp_expr);
+ auto act = perform_partial_modify_with_defaults(lhs, rhs, fun, default_cell_value);
+ SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+ EXPECT_EQ(exp, act);
+}
+
TEST(PartialModifyTest, partial_modify_works_for_simple_values) {
for (const auto &layouts: modify_layouts) {
for (auto lhs_ct: CellTypeUtils::list_types()) {
@@ -87,6 +110,27 @@ TEST(PartialModifyTest, partial_modify_works_for_simple_values) {
}
}
+TEST(PartialModifyTest, partial_modify_with_defauls) {
+ expect_modify_with_defaults("tensor(x{}):{{x:\"a\"}:1,{x:\"b\"}:2}",
+ "tensor(x{}):{{x:\"b\"}:3}",
+ operation::Add::f, 0.0,
+ "tensor(x{}):{{x:\"a\"}:1,{x:\"b\"}:5}");
+
+ expect_modify_with_defaults("tensor(x{}):{{x:\"a\"}:1,{x:\"b\"}:2}",
+ "tensor(x{}):{{x:\"b\"}:3,{x:\"c\"}:4}",
+ operation::Add::f, 0.0,
+ "tensor(x{}):{{x:\"a\"}:1,{x:\"b\"}:5,{x:\"c\"}:4}");
+
+ expect_modify_with_defaults("tensor(x{},y[3]):{{x:\"a\",y:0}:3,{x:\"a\",y:1}:4,{x:\"a\",y:2}:5}",
+ "tensor(x{},y{}):{{x:\"a\",y:\"0\"}:6,"
+ "{x:\"b\",y:\"1\"}:7,{x:\"b\",y:\"2\"}:8,"
+ "{x:\"c\",y:\"0\"}:9}",
+ operation::Add::f, 1.0,
+ "tensor(x{},y[3]):{{x:\"a\",y:0}:9,{x:\"a\",y:1}:4,{x:\"a\",y:2}:5,"
+ "{x:\"b\",y:0}:1,{x:\"b\",y:1}:8,{x:\"b\",y:2}:9,"
+ "{x:\"c\",y:0}:10,{x:\"c\",y:1}:1,{x:\"c\",y:2}:1}");
+}
+
std::vector<std::pair<vespalib::string,vespalib::string>> bad_layouts = {
{ "x3", "x3" },
{ "x3y4_1", "x3y4_1" },
@@ -108,4 +152,17 @@ TEST(PartialModifyTest, partial_modify_returns_nullptr_on_invalid_inputs) {
}
}
+TEST(PartialModifyTest, partial_modify_with_defaults_returns_nullptr_on_invalid_inputs) {
+ for (const auto &layouts: bad_layouts) {
+ TensorSpec lhs = GenSpec::from_desc(layouts.first).seq(N());
+ TensorSpec rhs = GenSpec::from_desc(layouts.second).seq(Div16(N()));
+ SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+ for (auto fun: {operation::Add::f}) {
+ auto actual = try_partial_modify_with_defaults(lhs, rhs, fun, 0.0);
+ auto expect = Value::UP();
+ EXPECT_EQ(actual, expect);
+ }
+ }
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/document/src/vespa/document/update/tensor_partial_update.cpp b/document/src/vespa/document/update/tensor_partial_update.cpp
index 9c3db0edb5f..72bcc044977 100644
--- a/document/src/vespa/document/update/tensor_partial_update.cpp
+++ b/document/src/vespa/document/update/tensor_partial_update.cpp
@@ -1,11 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "tensor_partial_update.h"
+#include <vespa/eval/eval/array_array_map.h>
#include <vespa/eval/eval/operation.h>
#include <vespa/vespalib/util/overload.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/visit_ranges.h>
-#include <vespa/vespalib/util/shared_string_repo.h>
#include <cassert>
#include <set>
@@ -203,27 +204,27 @@ struct PerformModify {
static Value::UP invoke(const Value &input,
join_fun_t function,
const Value &modifier,
- const ValueBuilderFactory &factory);
+ const ValueBuilderFactory &factory,
+ AddressHandler& handler,
+ Value::UP output);
};
template <typename ICT, typename MCT>
Value::UP
-PerformModify::invoke(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory)
+PerformModify::invoke(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory,
+ AddressHandler& handler, Value::UP output)
{
const ValueType &input_type = input.type();
const size_t dsss = input_type.dense_subspace_size();
- const ValueType &modifier_type = modifier.type();
- AddressHandler handler(input_type, modifier_type);
- if (! handler.valid) {
- return {};
+ if (!output) {
+ // copy input to output
+ output = copy_tensor<ICT>(input, input_type, handler.for_output, factory);
}
- // copy input to output
- auto out = copy_tensor<ICT>(input, input_type, handler.for_output, factory);
// need to overwrite some cells
- auto output_cells = unconstify(out->cells().template typify<ICT>());
+ auto output_cells = unconstify(output->cells().template typify<ICT>());
const auto modifier_cells = modifier.cells().typify<MCT>();
auto modifier_view = modifier.index().create_view({});
- auto lookup_view = out->index().create_view(handler.for_output.lookup_view_dims);
+ auto lookup_view = output->index().create_view(handler.for_output.lookup_view_dims);
modifier_view->lookup({});
size_t modifier_subspace_index;
while (modifier_view->next_result(handler.from_modifier.next_result_refs, modifier_subspace_index)) {
@@ -242,7 +243,70 @@ PerformModify::invoke(const Value &input, join_fun_t function, const Value &modi
dst[dense_idx] = function(lhs, rhs);
}
}
- return out;
+ return output;
+}
+
+void
+find_sub_spaces_not_in_input(const Value& input, const Value& modifier, double default_cell_value,
+ AddressHandler& handler, ArrayArrayMap<string_id, double>& sub_spaces_result)
+{
+ auto lookup_view = input.index().create_view(handler.for_output.lookup_view_dims);
+ auto modifier_view = modifier.index().create_view({});
+ modifier_view->lookup({});
+ size_t modifier_subspace_index;
+ while (modifier_view->next_result(handler.from_modifier.next_result_refs, modifier_subspace_index)) {
+ handler.handle_address();
+ size_t dense_idx = handler.dense_converter.get_dense_index();
+ if (dense_idx == npos()) {
+ continue;
+ }
+ lookup_view->lookup(handler.for_output.lookup_refs);
+ size_t output_subspace_index;
+ if (!lookup_view->next_result({}, output_subspace_index)) {
+ ConstArrayRef<string_id> addr(handler.for_output.addr);
+ auto [tag, inserted] = sub_spaces_result.lookup_or_add_entry(addr);
+ if (inserted) {
+ auto values = sub_spaces_result.get_values(tag);
+ for (size_t i = 0; i < values.size(); ++i) {
+ values[i] = default_cell_value;
+ }
+ }
+ }
+ }
+}
+
+struct PerformInsertSubspaces {
+ template<typename ICT>
+ static Value::UP invoke(const Value& input,
+ SparseCoords& output_addrs,
+ const ArrayArrayMap<string_id, double>& sub_spaces,
+ const ValueBuilderFactory& factory);
+};
+
+template <typename ICT>
+Value::UP
+PerformInsertSubspaces::invoke(const Value& input,
+ SparseCoords& output_addrs,
+ const ArrayArrayMap<string_id, double>& sub_spaces,
+ const ValueBuilderFactory& factory)
+{
+ const auto& input_type = input.type();
+ const size_t num_mapped_in_input = input_type.count_mapped_dimensions();
+ const size_t dsss = input_type.dense_subspace_size();
+ const size_t expected_subspaces = input.index().size() + sub_spaces.size();
+ auto builder = factory.create_value_builder<ICT>(input_type, num_mapped_in_input, dsss, expected_subspaces);
+ auto no_filter = [] (const auto&, size_t) {
+ return true;
+ };
+ copy_tensor_with_filter<ICT>(input, dsss, output_addrs, *builder, no_filter);
+ sub_spaces.each_entry([&](vespalib::ConstArrayRef<string_id> keys, vespalib::ConstArrayRef<double> values) {
+ auto dst = builder->add_subspace(keys).begin();
+ assert(dsss == values.size());
+ for (size_t i = 0; i < dsss; ++i) {
+ dst[i] = values[i];
+ }
+ });
+ return builder->build(std::move(builder));
}
//-----------------------------------------------------------------------------
@@ -398,9 +462,34 @@ Value::UP
TensorPartialUpdate::modify(const Value &input, join_fun_t function,
const Value &modifier, const ValueBuilderFactory &factory)
{
+ AddressHandler handler(input.type(), modifier.type());
+ if (!handler.valid) {
+ return {};
+ }
+ return typify_invoke<2, TypifyCellType, PerformModify>(
+ input.cells().type, modifier.cells().type,
+ input, function, modifier, factory, handler, Value::UP());
+}
+
+Value::UP
+TensorPartialUpdate::modify_with_defaults(const Value& input, join_fun_t function,
+ const Value& modifier, double default_cell_value, const ValueBuilderFactory& factory)
+{
+ AddressHandler handler(input.type(), modifier.type());
+ if (!handler.valid) {
+ return {};
+ }
+ const size_t dsss = input.type().dense_subspace_size();
+ ArrayArrayMap<string_id, double> sub_spaces(handler.for_output.addr.size(), dsss, modifier.index().size());
+ find_sub_spaces_not_in_input(input, modifier, default_cell_value, handler, sub_spaces);
+ Value::UP output;
+ if (sub_spaces.size() > 0) {
+ output = typify_invoke<1, TypifyCellType, PerformInsertSubspaces>(
+ input.cells().type, input, handler.for_output, sub_spaces, factory);
+ }
return typify_invoke<2, TypifyCellType, PerformModify>(
input.cells().type, modifier.cells().type,
- input, function, modifier, factory);
+ input, function, modifier, factory, handler, std::move(output));
}
Value::UP
diff --git a/document/src/vespa/document/update/tensor_partial_update.h b/document/src/vespa/document/update/tensor_partial_update.h
index 196b14f6f5c..f3069d59a9b 100644
--- a/document/src/vespa/document/update/tensor_partial_update.h
+++ b/document/src/vespa/document/update/tensor_partial_update.h
@@ -24,6 +24,15 @@ struct TensorPartialUpdate {
const Value &modifier, const ValueBuilderFactory &factory);
/**
+ * Make a copy of the input and add all dense sub-spaces (with default cell value) exising only in the modifier.
+ * Then apply function(oldvalue, modifier.cellvalue) to the cells that exist in the modifier.
+ * The modifier type must be sparse with exactly the same dimension names as the input type.
+ * Returns null pointer if this constraint is violated.
+ **/
+ static Value::UP modify_with_defaults(const Value& input, join_fun_t function,
+ const Value& modifier, double default_cell_value, const ValueBuilderFactory& factory);
+
+ /**
* Make a copy of the input, but add or overwrite cells from add_cells.
* Requires same type for input and add_cells.
* Returns null pointer if this constraint is violated.