summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-12-08 13:55:43 +0000
committerArne Juul <arnej@verizonmedia.com>2020-12-08 13:57:58 +0000
commit55b6fa9bbedbc372222c4a76924703f4525bc987 (patch)
tree7f58d6653fe7982d23bc6dae69dc56b163de209c /document
parent6ca5863b37cb94b1ebb223cbe3a44a4554f845eb (diff)
move partial_update files from eval to document
Diffstat (limited to 'document')
-rw-r--r--document/CMakeLists.txt3
-rw-r--r--document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt9
-rw-r--r--document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp89
-rw-r--r--document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt9
-rw-r--r--document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp110
-rw-r--r--document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt9
-rw-r--r--document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp130
-rw-r--r--document/src/vespa/document/update/CMakeLists.txt1
-rw-r--r--document/src/vespa/document/update/tensor_add_update.cpp2
-rw-r--r--document/src/vespa/document/update/tensor_modify_update.cpp2
-rw-r--r--document/src/vespa/document/update/tensor_partial_update.cpp419
-rw-r--r--document/src/vespa/document/update/tensor_partial_update.h44
-rw-r--r--document/src/vespa/document/update/tensor_remove_update.cpp2
13 files changed, 826 insertions, 3 deletions
diff --git a/document/CMakeLists.txt b/document/CMakeLists.txt
index 8c49f7bd7bf..46da458fe6d 100644
--- a/document/CMakeLists.txt
+++ b/document/CMakeLists.txt
@@ -38,6 +38,9 @@ vespa_define_module(
src/tests/serialization
src/tests/struct_anno
src/tests/tensor_fieldvalue
+ src/tests/tensor_fieldvalue/partial_add
+ src/tests/tensor_fieldvalue/partial_modify
+ src/tests/tensor_fieldvalue/partial_remove
)
install_java_artifact(document)
diff --git a/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt
new file mode 100644
index 00000000000..8d5ee0df6e6
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_partial_add_test_app TEST
+ SOURCES
+ partial_add_test.cpp
+ DEPENDS
+ document
+ GTest::GTest
+)
+vespa_add_test(NAME eval_partial_add_test_app COMMAND eval_partial_add_test_app)
diff --git a/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp b/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp
new file mode 100644
index 00000000000..db391a5b889
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp
@@ -0,0 +1,89 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/document/update/tensor_partial_update.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <optional>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+using vespalib::make_string_short::fmt;
+
+std::vector<Layout> add_layouts = {
+ {x({"a"})}, {x({"b"})},
+ {x({"a","b"})}, {x({"a","c"})},
+ float_cells({x({"a","b"})}), {x({"a","c"})},
+ {x({"a","b"})}, float_cells({x({"a","c"})}),
+ float_cells({x({"a","b"})}), float_cells({x({"a","c"})}),
+ {x({"a","b","c"}),y({"d","e"})}, {x({"b","f"}),y({"d","g"})},
+ {x(3),y({"a","b"})}, {x(3),y({"b","c"})}
+};
+
+TensorSpec reference_add(const TensorSpec &a, const TensorSpec &b) {
+ TensorSpec result(a.type());
+ for (const auto &cell: b.cells()) {
+ result.add(cell.first, cell.second);
+ }
+ auto end_iter = b.cells().end();
+ for (const auto &cell: a.cells()) {
+ auto iter = b.cells().find(cell.first);
+ if (iter == end_iter) {
+ result.add(cell.first, cell.second);
+ }
+ }
+ return result;
+}
+
+Value::UP try_partial_add(const TensorSpec &a, const TensorSpec &b) {
+ const auto &factory = SimpleValueBuilderFactory::get();
+ auto lhs = value_from_spec(a, factory);
+ auto rhs = value_from_spec(b, factory);
+ return tensor::TensorPartialUpdate::add(*lhs, *rhs, factory);
+}
+
+TensorSpec perform_partial_add(const TensorSpec &a, const TensorSpec &b) {
+ auto up = try_partial_add(a, b);
+ EXPECT_TRUE(up);
+ return spec_from_value(*up);
+}
+
+TEST(PartialAddTest, partial_add_works_for_simple_values) {
+ ASSERT_TRUE((add_layouts.size() % 2) == 0);
+ for (size_t i = 0; i < add_layouts.size(); i += 2) {
+ TensorSpec lhs = spec(add_layouts[i], N());
+ TensorSpec rhs = spec(add_layouts[i + 1], Div16(N()));
+ SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+ auto expect = reference_add(lhs, rhs);
+ auto actual = perform_partial_add(lhs, rhs);
+ EXPECT_EQ(actual, expect);
+ }
+}
+
+std::vector<Layout> bad_layouts = {
+ {x(3)}, {x(3),y(1)},
+ {x(3),y(1)}, {x(3)},
+ {x(3),y(3)}, {x(3),y({"a"})},
+ {x(3),y({"a"})}, {x(3),y(3)},
+ {x({"a"})}, {x({"a"}),y({"b"})},
+ {x({"a"}),y({"b"})}, {x({"a"})},
+ {x({"a"})}, {x({"a"}),y(1)}
+};
+
+TEST(PartialAddTest, partial_add_returns_nullptr_on_invalid_inputs) {
+ ASSERT_TRUE((bad_layouts.size() % 2) == 0);
+ for (size_t i = 0; i < bad_layouts.size(); i += 2) {
+ TensorSpec lhs = spec(bad_layouts[i], N());
+ TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N()));
+ SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+ auto actual = try_partial_add(lhs, rhs);
+ auto expect = Value::UP();
+ EXPECT_EQ(actual, expect);
+ }
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt
new file mode 100644
index 00000000000..8fde8339f66
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_partial_modify_test_app TEST
+ SOURCES
+ partial_modify_test.cpp
+ DEPENDS
+ document
+ GTest::GTest
+)
+vespa_add_test(NAME eval_partial_modify_test_app COMMAND eval_partial_modify_test_app)
diff --git a/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp
new file mode 100644
index 00000000000..a4562c09e50
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp
@@ -0,0 +1,110 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/document/update/tensor_partial_update.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <optional>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+using vespalib::make_string_short::fmt;
+
+std::vector<Layout> modify_layouts = {
+ {x({"a"})}, {x({"a"})},
+ {x({"a",""})}, {x({"b","c","d","e"})},
+ {x(5)}, {x({"1","2","foo","17"})},
+ {x({"a","b","c"}),y({"d","e"})}, {x({"b"}),y({"d"})},
+ {x({"a","b","c"})}, {x({"b","c","d"})},
+ {x(4),y({"a","b","c","d"}),z(5)}, {x({"1","2"}),y({"b","d"}),z({"1","3"})},
+ {x(3),y(2)}, {x({"0","1"}),y({"0","1"})},
+ {x({"a","","b"})}, {x({""})}
+};
+
+TensorSpec::Address sparsify(const TensorSpec::Address &input) {
+ TensorSpec::Address output;
+ for (const auto & kv : input) {
+ if (kv.second.is_indexed()) {
+ auto val = fmt("%zu", kv.second.index);
+ output.emplace(kv.first, val);
+ } else {
+ output.emplace(kv.first, kv.second);
+ }
+ }
+ return output;
+}
+
+TensorSpec reference_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) {
+ TensorSpec result(a.type());
+ auto end_iter = b.cells().end();
+ for (const auto &cell: a.cells()) {
+ double v = cell.second;
+ auto sparse_addr = sparsify(cell.first);
+ auto iter = b.cells().find(sparse_addr);
+ if (iter == end_iter) {
+ result.add(cell.first, v);
+ } else {
+ result.add(cell.first, fun(v, iter->second));
+ }
+ }
+ return result;
+}
+
+Value::UP try_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) {
+ const auto &factory = SimpleValueBuilderFactory::get();
+ auto lhs = value_from_spec(a, factory);
+ auto rhs = value_from_spec(b, factory);
+ return tensor::TensorPartialUpdate::modify(*lhs, fun, *rhs, factory);
+}
+
+TensorSpec perform_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) {
+ auto up = try_partial_modify(a, b, fun);
+ EXPECT_TRUE(up);
+ return spec_from_value(*up);
+}
+
+TEST(PartialModifyTest, partial_modify_works_for_simple_values) {
+ ASSERT_TRUE((modify_layouts.size() % 2) == 0);
+ for (size_t i = 0; i < modify_layouts.size(); i += 2) {
+ TensorSpec lhs = spec(modify_layouts[i], N());
+ TensorSpec rhs = spec(modify_layouts[i + 1], Div16(N()));
+ SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+ for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f}) {
+ auto expect = reference_modify(lhs, rhs, fun);
+ auto actual = perform_partial_modify(lhs, rhs, fun);
+ EXPECT_EQ(actual, expect);
+ }
+ auto fun = [](double, double keep) { return keep; };
+ auto expect = reference_modify(lhs, rhs, fun);
+ auto actual = perform_partial_modify(lhs, rhs, fun);
+ EXPECT_EQ(actual, expect);
+ }
+}
+
+std::vector<Layout> bad_layouts = {
+ {x(3)}, {x(3)},
+ {x(3),y({"a"})}, {x(3),y({"a"})},
+ {x({"a"})}, {x({"a"}),y({"b"})},
+ {x({"a"}),y({"b"})}, {x({"a"})},
+ {x({"a"})}, {x({"a"}),y(1)}
+};
+
+TEST(PartialModifyTest, partial_modify_returns_nullptr_on_invalid_inputs) {
+ ASSERT_TRUE((bad_layouts.size() % 2) == 0);
+ for (size_t i = 0; i < bad_layouts.size(); i += 2) {
+ TensorSpec lhs = spec(bad_layouts[i], N());
+ TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N()));
+ SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+ for (auto fun: {operation::Add::f}) {
+ auto actual = try_partial_modify(lhs, rhs, fun);
+ auto expect = Value::UP();
+ EXPECT_EQ(actual, expect);
+ }
+ }
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt
new file mode 100644
index 00000000000..7382ced9490
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_partial_remove_test_app TEST
+ SOURCES
+ partial_remove_test.cpp
+ DEPENDS
+ document
+ GTest::GTest
+)
+vespa_add_test(NAME eval_partial_remove_test_app COMMAND eval_partial_remove_test_app)
diff --git a/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp b/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp
new file mode 100644
index 00000000000..7b5b17b9cf8
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp
@@ -0,0 +1,130 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/document/update/tensor_partial_update.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <optional>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+using vespalib::make_string_short::fmt;
+
+std::vector<Layout> remove_layouts = {
+ {x({"a"})}, {x({"b"})},
+ {x({"a","b"})}, {x({"a","c"})},
+ {x({"a","b"})}, {x({"a","b"})},
+ float_cells({x({"a","b"})}), {x({"a","c"})},
+ {x({"a","b"})}, float_cells({x({"a","c"})}),
+ float_cells({x({"a","b"})}), float_cells({x({"a","c"})}),
+ {x({"a","b","c"}),y({"d","e"})}, {x({"b","f"}),y({"d","g"})},
+ {x(3),y({"a","b"})}, {y({"b","c"})}
+};
+
+TensorSpec::Address only_sparse(const TensorSpec::Address &input) {
+ TensorSpec::Address output;
+ for (const auto & kv : input) {
+ if (kv.second.is_mapped()) {
+ output.emplace(kv.first, kv.second);
+ }
+ }
+ return output;
+}
+
+TensorSpec reference_remove(const TensorSpec &a, const TensorSpec &b) {
+ TensorSpec result(a.type());
+ auto end_iter = b.cells().end();
+ for (const auto &cell: a.cells()) {
+ auto iter = b.cells().find(only_sparse(cell.first));
+ if (iter == end_iter) {
+ result.add(cell.first, cell.second);
+ }
+ }
+ return result;
+}
+
+Value::UP try_partial_remove(const TensorSpec &a, const TensorSpec &b) {
+ const auto &factory = SimpleValueBuilderFactory::get();
+ auto lhs = value_from_spec(a, factory);
+ auto rhs = value_from_spec(b, factory);
+ return tensor::TensorPartialUpdate::remove(*lhs, *rhs, factory);
+}
+
+TensorSpec perform_partial_remove(const TensorSpec &a, const TensorSpec &b) {
+ auto up = try_partial_remove(a, b);
+ EXPECT_TRUE(up);
+ return spec_from_value(*up);
+}
+
+TEST(PartialRemoveTest, partial_remove_works_for_simple_values) {
+ ASSERT_TRUE((remove_layouts.size() % 2) == 0);
+ for (size_t i = 0; i < remove_layouts.size(); i += 2) {
+ TensorSpec lhs = spec(remove_layouts[i], N());
+ TensorSpec rhs = spec(remove_layouts[i + 1], Div16(N()));
+ SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+ auto expect = reference_remove(lhs, rhs);
+ auto actual = perform_partial_remove(lhs, rhs);
+ EXPECT_EQ(actual, expect);
+ }
+}
+
+std::vector<Layout> bad_layouts = {
+ {x(3)}, {x(3)},
+ {x(3),y({"a"})}, {x(3)},
+ {x(3),y({"a"})}, {x(3),y({"a"})},
+ {x({"a"})}, {y({"a"})},
+ {x({"a"})}, {x({"a"}),y({"b"})}
+};
+
+TEST(PartialRemoveTest, partial_remove_returns_nullptr_on_invalid_inputs) {
+ ASSERT_TRUE((bad_layouts.size() % 2) == 0);
+ for (size_t i = 0; i < bad_layouts.size(); i += 2) {
+ TensorSpec lhs = spec(bad_layouts[i], N());
+ TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N()));
+ SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+ auto actual = try_partial_remove(lhs, rhs);
+ auto expect = Value::UP();
+ EXPECT_EQ(actual, expect);
+ }
+}
+
+void
+expect_partial_remove(const TensorSpec& input, const TensorSpec& remove, const TensorSpec& exp)
+{
+ auto act = perform_partial_remove(input, remove);
+ EXPECT_EQ(exp, act);
+}
+
+TEST(PartialRemoveTest, remove_where_address_is_not_fully_specified) {
+ auto input_sparse = TensorSpec("tensor(x{},y{})").
+ add({{"x", "a"},{"y", "c"}}, 3.0).
+ add({{"x", "a"},{"y", "d"}}, 5.0).
+ add({{"x", "b"},{"y", "c"}}, 7.0);
+
+ expect_partial_remove(input_sparse, TensorSpec("tensor(x{})").add({{"x", "a"}}, 1.0),
+ TensorSpec("tensor(x{},y{})").add({{"x", "b"},{"y", "c"}}, 7.0));
+
+ expect_partial_remove(input_sparse, TensorSpec("tensor(y{})").add({{"y", "c"}}, 1.0),
+ TensorSpec("tensor(x{},y{})").add({{"x", "a"},{"y", "d"}}, 5.0));
+
+ expect_partial_remove(input_sparse, TensorSpec("tensor(y{})").add({{"y", "d"}}, 1.0),
+ TensorSpec("tensor(x{},y{})").add({{"x", "a"},{"y", "c"}}, 3.0)
+ .add({{"x", "b"},{"y", "c"}}, 7.0));
+
+ auto input_mixed = TensorSpec("tensor(x{},y{},z[1])").
+ add({{"x", "a"},{"y", "c"},{"z", 0}}, 3.0).
+ add({{"x", "a"},{"y", "d"},{"z", 0}}, 5.0).
+ add({{"x", "b"},{"y", "c"},{"z", 0}}, 7.0);
+
+ expect_partial_remove(input_mixed,TensorSpec("tensor(x{})").add({{"x", "a"}}, 1.0),
+ TensorSpec("tensor(x{},y{},z[1])").add({{"x", "b"},{"y", "c"},{"z", 0}}, 7.0));
+
+ expect_partial_remove(input_mixed, TensorSpec("tensor(y{})").add({{"y", "c"}}, 1.0),
+ TensorSpec("tensor(x{},y{},z[1])").add({{"x", "a"},{"y", "d"},{"z", 0}}, 5.0));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/document/src/vespa/document/update/CMakeLists.txt b/document/src/vespa/document/update/CMakeLists.txt
index a587d8e3e2d..b0ffa056e1a 100644
--- a/document/src/vespa/document/update/CMakeLists.txt
+++ b/document/src/vespa/document/update/CMakeLists.txt
@@ -15,6 +15,7 @@ vespa_add_library(document_updates OBJECT
removevalueupdate.cpp
tensor_add_update.cpp
tensor_modify_update.cpp
+ tensor_partial_update.cpp
tensor_remove_update.cpp
valueupdate.cpp
DEPENDS
diff --git a/document/src/vespa/document/update/tensor_add_update.cpp b/document/src/vespa/document/update/tensor_add_update.cpp
index c9ffad2a789..8846ec2fc0a 100644
--- a/document/src/vespa/document/update/tensor_add_update.cpp
+++ b/document/src/vespa/document/update/tensor_add_update.cpp
@@ -1,6 +1,7 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "tensor_add_update.h"
+#include "tensor_partial_update.h"
#include <vespa/document/base/exceptions.h>
#include <vespa/document/base/field.h>
#include <vespa/document/datatype/tensor_data_type.h>
@@ -10,7 +11,6 @@
#include <vespa/document/util/serializableexceptions.h>
#include <vespa/eval/eval/value.h>
#include <vespa/eval/eval/fast_value.h>
-#include <vespa/eval/tensor/partial_update.h>
#include <vespa/vespalib/objects/nbostream.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/util/stringfmt.h>
diff --git a/document/src/vespa/document/update/tensor_modify_update.cpp b/document/src/vespa/document/update/tensor_modify_update.cpp
index 4da93d0ae46..bc4085ec4fa 100644
--- a/document/src/vespa/document/update/tensor_modify_update.cpp
+++ b/document/src/vespa/document/update/tensor_modify_update.cpp
@@ -1,6 +1,7 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "tensor_modify_update.h"
+#include "tensor_partial_update.h"
#include <vespa/document/base/exceptions.h>
#include <vespa/document/base/field.h>
#include <vespa/document/datatype/tensor_data_type.h>
@@ -11,7 +12,6 @@
#include <vespa/eval/eval/operation.h>
#include <vespa/eval/eval/value.h>
#include <vespa/eval/eval/fast_value.h>
-#include <vespa/eval/tensor/partial_update.h>
#include <vespa/vespalib/objects/nbostream.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/util/stringfmt.h>
diff --git a/document/src/vespa/document/update/tensor_partial_update.cpp b/document/src/vespa/document/update/tensor_partial_update.cpp
new file mode 100644
index 00000000000..9bf243602dd
--- /dev/null
+++ b/document/src/vespa/document/update/tensor_partial_update.cpp
@@ -0,0 +1,419 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "tensor_partial_update.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/vespalib/util/overload.h>
+#include <vespa/vespalib/util/typify.h>
+#include <vespa/vespalib/util/visit_ranges.h>
+#include <cassert>
+#include <set>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".eval.tensor.partial_update");
+
+using namespace vespalib::eval;
+
+namespace vespalib::tensor {
+
+namespace {
+
+using join_fun_t = vespalib::eval::operation::op2_t;
+
+static constexpr size_t npos() { return -1; }
+
+enum class DimCase {
+ MAPPED_MATCH, CONV_TO_INDEXED
+};
+
+struct DenseCoords {
+ std::vector<size_t> dim_sizes;
+ size_t total_size;
+ size_t offset;
+ size_t current;
+ DenseCoords(const ValueType &output_type)
+ : total_size(1), offset(0), current(0)
+ {
+ for (const auto & dim : output_type.dimensions()) {
+ if (dim.is_indexed()) {
+ dim_sizes.push_back(dim.size);
+ total_size *= dim.size;
+ }
+ }
+ }
+ ~DenseCoords();
+ void clear() { offset = 0; current = 0; }
+ void convert_label(vespalib::stringref label) {
+ uint32_t coord = 0;
+ for (char c : label) {
+ if (c < '0' || c > '9') { // bad char
+ offset = npos();
+ break;
+ }
+ coord = coord * 10 + (c - '0');
+ }
+ size_t cur_dim_size = dim_sizes[current];
+ if (coord < cur_dim_size) {
+ if (offset != npos()) {
+ offset *= cur_dim_size;
+ offset += coord;
+ }
+ } else {
+ offset = npos();
+ }
+ ++current;
+ }
+ size_t get_dense_index() const {
+ assert(current == dim_sizes.size());
+ return offset;
+ }
+};
+DenseCoords::~DenseCoords() = default;
+
+struct SparseCoords {
+ std::vector<vespalib::stringref> addr;
+ std::vector<vespalib::stringref *> next_result_refs;
+ std::vector<const vespalib::stringref *> lookup_refs;
+ std::vector<size_t> lookup_view_dims;
+ SparseCoords(size_t sz)
+ : addr(sz), next_result_refs(sz), lookup_refs(sz), lookup_view_dims(sz)
+ {
+ for (size_t i = 0; i < sz; ++i) {
+ next_result_refs[i] = &addr[i];
+ lookup_refs[i] = &addr[i];
+ lookup_view_dims[i] = i;
+ }
+ }
+ ~SparseCoords();
+};
+SparseCoords::~SparseCoords() = default;
+
+/**
+ * Helper class that converts a fully-sparse address from the modifier
+ * tensor into a subset sparse address for the output and an offset
+ * in the dense subspace.
+ **/
+struct AddressHandler {
+ std::vector<DimCase> dimension_plan;
+ DenseCoords dense_converter;
+ SparseCoords for_output;
+ SparseCoords from_modifier;
+ bool valid;
+
+ AddressHandler(const ValueType &output_type,
+ const ValueType &modifier_type)
+ : dimension_plan(), dense_converter(output_type),
+ for_output(output_type.count_mapped_dimensions()),
+ from_modifier(modifier_type.count_mapped_dimensions()),
+ valid(true)
+ {
+ if (! modifier_type.is_sparse()) {
+ LOG(error, "Unexpected non-sparse modifier tensor, type is %s",
+ modifier_type.to_spec().c_str());
+ valid = false;
+ return;
+ }
+ // analyse dimensions
+ auto visitor = overload {
+ [&](visit_ranges_either, const auto &) { valid = false; },
+ [&](visit_ranges_both, const auto &a, const auto &) {
+ dimension_plan.push_back(a.is_mapped() ? DimCase::MAPPED_MATCH : DimCase::CONV_TO_INDEXED);
+ }
+ };
+ const auto & input_dims = output_type.dimensions();
+ const auto & modifier_dims = modifier_type.dimensions();
+ visit_ranges(visitor,
+ input_dims.begin(), input_dims.end(),
+ modifier_dims.begin(), modifier_dims.end(),
+ [](const auto &a, const auto &b){ return (a.name < b.name); });
+ if (! valid) {
+ LOG(error, "Value type %s does not match modifier type %s (should have same dimensions)",
+ output_type.to_spec().c_str(), modifier_type.to_spec().c_str());
+ return;
+ }
+ // implicitly checked above, must hold:
+ assert(input_dims.size() == modifier_dims.size());
+ // the plan should now be fully built:
+ assert(input_dims.size() == dimension_plan.size());
+ }
+
+ void handle_address()
+ {
+ dense_converter.clear();
+ auto out = for_output.addr.begin();
+ for (size_t i = 0; i < dimension_plan.size(); ++i) {
+ if (dimension_plan[i] == DimCase::CONV_TO_INDEXED) {
+ dense_converter.convert_label(from_modifier.addr[i]);
+ } else {
+ *out++ = from_modifier.addr[i];
+ }
+ }
+ assert(out == for_output.addr.end());
+ assert(dense_converter.current == dense_converter.dim_sizes.size());
+ }
+
+ ~AddressHandler();
+};
+AddressHandler::~AddressHandler() = default;
+
+template <typename CT, typename ICT = CT, typename KeepFun>
+void copy_tensor_with_filter(const Value &input,
+ size_t dsss,
+ SparseCoords &addrs,
+ ValueBuilder<CT> &builder,
+ KeepFun && keep_subspace)
+{
+ const auto input_cells = input.cells().typify<ICT>();
+ auto input_view = input.index().create_view({});
+ input_view->lookup({});
+ size_t input_subspace_index;
+ while (input_view->next_result(addrs.next_result_refs, input_subspace_index)) {
+ if (keep_subspace(addrs.lookup_refs, input_subspace_index)) {
+ size_t input_offset = dsss * input_subspace_index;
+ auto src = input_cells.begin() + input_offset;
+ auto dst = builder.add_subspace(addrs.addr).begin();
+ for (size_t i = 0; i < dsss; ++i) {
+ dst[i] = src[i];
+ }
+ }
+ }
+}
+
+template <typename CT>
+Value::UP
+copy_tensor(const Value &input, const ValueType &input_type, SparseCoords &helper, const ValueBuilderFactory &factory)
+{
+ const size_t num_mapped_in_input = input_type.count_mapped_dimensions();
+ const size_t dsss = input_type.dense_subspace_size();
+ const size_t expected_subspaces = input.index().size();
+ auto builder = factory.create_value_builder<CT>(input_type, num_mapped_in_input, dsss, expected_subspaces);
+ auto no_filter = [] (const auto &, size_t) {
+ return true;
+ };
+ copy_tensor_with_filter<CT>(input, dsss, helper, *builder, no_filter);
+ return builder->build(std::move(builder));
+}
+
+//-----------------------------------------------------------------------------
+
+struct PerformModify {
+ template<typename ICT, typename MCT>
+ static Value::UP invoke(const Value &input,
+ join_fun_t function,
+ const Value &modifier,
+ const ValueBuilderFactory &factory);
+};
+
+template <typename ICT, typename MCT>
+Value::UP
+PerformModify::invoke(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory)
+{
+ const ValueType &input_type = input.type();
+ const size_t dsss = input_type.dense_subspace_size();
+ const ValueType &modifier_type = modifier.type();
+ AddressHandler handler(input_type, modifier_type);
+ if (! handler.valid) {
+ return {};
+ }
+ // copy input to output
+ auto out = copy_tensor<ICT>(input, input_type, handler.for_output, factory);
+ // need to overwrite some cells
+ auto output_cells = unconstify(out->cells().template typify<ICT>());
+ const auto modifier_cells = modifier.cells().typify<MCT>();
+ auto modifier_view = modifier.index().create_view({});
+ auto lookup_view = out->index().create_view(handler.for_output.lookup_view_dims);
+ modifier_view->lookup({});
+ size_t modifier_subspace_index;
+ while (modifier_view->next_result(handler.from_modifier.next_result_refs, modifier_subspace_index)) {
+ handler.handle_address();
+ size_t dense_idx = handler.dense_converter.get_dense_index();
+ if (dense_idx == npos()) {
+ continue;
+ }
+ lookup_view->lookup(handler.for_output.lookup_refs);
+ size_t output_subspace_index;
+ if (lookup_view->next_result({}, output_subspace_index)) {
+ size_t subspace_offset = dsss * output_subspace_index;
+ auto dst = output_cells.begin() + subspace_offset;
+ ICT lhs = dst[dense_idx];
+ MCT rhs = modifier_cells[modifier_subspace_index];
+ dst[dense_idx] = function(lhs, rhs);
+ }
+ }
+ return out;
+}
+
+//-----------------------------------------------------------------------------
+
+struct PerformAdd {
+ template<typename ICT, typename MCT>
+ static Value::UP invoke(const Value &input,
+ const Value &modifier,
+ const ValueBuilderFactory &factory);
+};
+
+template <typename ICT, typename MCT>
+Value::UP
+PerformAdd::invoke(const Value &input, const Value &modifier, const ValueBuilderFactory &factory)
+{
+ const ValueType &input_type = input.type();
+ const ValueType &modifier_type = modifier.type();
+ if (input_type.dimensions() != modifier_type.dimensions()) {
+ LOG(error, "when adding cells to a tensor, dimensions must be equal. "
+ "Got input type %s != modifier type %s",
+ input_type.to_spec().c_str(), modifier_type.to_spec().c_str());
+ return {};
+ }
+ const size_t num_mapped_in_input = input_type.count_mapped_dimensions();
+ const size_t dsss = input_type.dense_subspace_size();
+ const size_t expected_subspaces = input.index().size() + modifier.index().size();
+ auto builder = factory.create_value_builder<ICT>(input_type, num_mapped_in_input, dsss, expected_subspaces);
+ SparseCoords addrs(num_mapped_in_input);
+ auto lookup_view = input.index().create_view(addrs.lookup_view_dims);
+ std::vector<bool> overwritten(input.index().size(), false);
+ auto remember_subspaces = [&] (const auto & lookup_refs, size_t) {
+ lookup_view->lookup(lookup_refs);
+ size_t input_subspace_index;
+ if (lookup_view->next_result({}, input_subspace_index)) {
+ overwritten[input_subspace_index] = true;
+ }
+ return true;
+ };
+ copy_tensor_with_filter<ICT, MCT>(modifier, dsss, addrs, *builder, remember_subspaces);
+ auto filter = [&] (const auto &, size_t input_subspace) {
+ return ! overwritten[input_subspace];
+ };
+ copy_tensor_with_filter<ICT>(input, dsss, addrs, *builder, filter);
+ return builder->build(std::move(builder));
+}
+
+//-----------------------------------------------------------------------------
+
+struct PerformRemove {
+ template<typename ICT>
+ static Value::UP invoke(const Value &input,
+ const Value &modifier,
+ const ValueBuilderFactory &factory);
+};
+
+/**
+ * Calculates the indexes of where the mapped modifier dimensions are found in the mapped input dimensions.
+ *
+ * The modifier dimensions should be a subset or all of the input dimensions.
+ * An empty vector is returned on type mismatch.
+ */
+std::vector<size_t>
+calc_mapped_dimension_indexes(const ValueType& input_type,
+ const ValueType& modifier_type)
+{
+ auto input_dims = input_type.mapped_dimensions();
+ auto mod_dims = modifier_type.mapped_dimensions();
+ if (mod_dims.size() > input_dims.size()) {
+ return {};
+ }
+ std::vector<size_t> result(mod_dims.size());
+ size_t j = 0;
+ for (size_t i = 0; i < mod_dims.size(); ++i) {
+ while ((j < input_dims.size()) && (input_dims[j] != mod_dims[i])) {
+ ++j;
+ }
+ if (j >= input_dims.size()) {
+ return {};
+ }
+ result[i] = j;
+ }
+ return result;
+}
+
+struct ModifierCoords {
+
+ std::vector<const vespalib::stringref *> lookup_refs;
+ std::vector<size_t> lookup_view_dims;
+
+ ModifierCoords(const SparseCoords& input_coords,
+ const std::vector<size_t>& input_dim_indexes,
+ const ValueType& modifier_type)
+ : lookup_refs(modifier_type.dimensions().size()),
+ lookup_view_dims(modifier_type.dimensions().size())
+ {
+ assert(modifier_type.dimensions().size() == input_dim_indexes.size());
+ for (size_t i = 0; i < input_dim_indexes.size(); ++i) {
+ // Setup the modifier dimensions to point to the matching input dimensions.
+ lookup_refs[i] = &input_coords.addr[input_dim_indexes[i]];
+ lookup_view_dims[i] = i;
+ }
+ }
+ ~ModifierCoords() {}
+};
+
+template <typename ICT>
+Value::UP
+PerformRemove::invoke(const Value &input, const Value &modifier, const ValueBuilderFactory &factory)
+{
+ const ValueType &input_type = input.type();
+ const ValueType &modifier_type = modifier.type();
+ const size_t num_mapped_in_input = input_type.count_mapped_dimensions();
+ if (num_mapped_in_input == 0) {
+ LOG(error, "Cannot remove cells from a dense input tensor of type %s",
+ input_type.to_spec().c_str());
+ return {};
+ }
+ if (modifier_type.count_indexed_dimensions() != 0) {
+ LOG(error, "Cannot remove cells using a modifier tensor of type %s",
+ modifier_type.to_spec().c_str());
+ return {};
+ }
+ auto input_dim_indexes = calc_mapped_dimension_indexes(input_type, modifier_type);
+ if (input_dim_indexes.empty()) {
+ LOG(error, "Tensor type mismatch when removing cells from a tensor. "
+ "Got input type %s versus modifier type %s",
+ input_type.to_spec().c_str(), modifier_type.to_spec().c_str());
+ return {};
+ }
+ SparseCoords addrs(num_mapped_in_input);
+ ModifierCoords mod_coords(addrs, input_dim_indexes, modifier_type);
+ auto modifier_view = modifier.index().create_view(mod_coords.lookup_view_dims);
+ const size_t expected_subspaces = input.index().size();
+ const size_t dsss = input_type.dense_subspace_size();
+ auto builder = factory.create_value_builder<ICT>(input_type, num_mapped_in_input, dsss, expected_subspaces);
+ auto filter_by_modifier = [&] (const auto & lookup_refs, size_t) {
+ // The modifier dimensions are setup to point to the input dimensions address storage in ModifierCoords,
+ // so we don't need to use the lookup_refs argument.
+ (void) lookup_refs;
+ modifier_view->lookup(mod_coords.lookup_refs);
+ size_t modifier_subspace_index;
+ return !(modifier_view->next_result({}, modifier_subspace_index));
+ };
+ copy_tensor_with_filter<ICT>(input, dsss, addrs, *builder, filter_by_modifier);
+ return builder->build(std::move(builder));
+}
+
+} // namespace <unnamed>
+
+//-----------------------------------------------------------------------------
+
+Value::UP
+TensorPartialUpdate::modify(const Value &input, join_fun_t function,
+ const Value &modifier, const ValueBuilderFactory &factory)
+{
+ return typify_invoke<2, TypifyCellType, PerformModify>(
+ input.cells().type, modifier.cells().type,
+ input, function, modifier, factory);
+}
+
+Value::UP
+TensorPartialUpdate::add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory)
+{
+ return typify_invoke<2, TypifyCellType, PerformAdd>(
+ input.cells().type, add_cells.cells().type,
+ input, add_cells, factory);
+}
+
+Value::UP
+TensorPartialUpdate::remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory)
+{
+ return typify_invoke<1, TypifyCellType, PerformRemove>(
+ input.cells().type,
+ input, remove_spec, factory);
+}
+
+} // namespace
diff --git a/document/src/vespa/document/update/tensor_partial_update.h b/document/src/vespa/document/update/tensor_partial_update.h
new file mode 100644
index 00000000000..b3e9d32fca8
--- /dev/null
+++ b/document/src/vespa/document/update/tensor_partial_update.h
@@ -0,0 +1,44 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/operation.h>
+
+namespace vespalib::tensor {
+
+struct TensorPartialUpdate {
+ using join_fun_t = vespalib::eval::operation::op2_t;
+ using Value = vespalib::eval::Value;
+ using ValueBuilderFactory = vespalib::eval::ValueBuilderFactory;
+
+ /**
+ * Make a copy of the input, but apply function(oldvalue, modifier.cellvalue)
+ * to cells which also exist in the "modifier".
+ * The modifier type must be sparse with exactly the same dimension names
+ * as the input type.
+ * Returns null pointer if this constraint is violated.
+ **/
+ static Value::UP modify(const Value &input, join_fun_t function,
+ const Value &modifier, const ValueBuilderFactory &factory);
+
+ /**
+ * Make a copy of the input, but add or overwrite cells from add_cells.
+ * Requires same type for input and add_cells.
+ * Returns null pointer if this constraint is violated.
+ **/
+ static Value::UP add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory);
+
+ /**
+ * Make a copy of the input, but remove cells present in remove_spec.
+ * The remove_spec must be a sparse tensor, with exactly the mapped dimensions
+ * that the input value has.
+ * Cell values in remove_spec are ignored.
+ * Not valid for dense tensors, since removing cells for those are impossible.
+ * Returns null pointer if these constraints are violated.
+ **/
+ static Value::UP remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory);
+};
+
+} // namespace
diff --git a/document/src/vespa/document/update/tensor_remove_update.cpp b/document/src/vespa/document/update/tensor_remove_update.cpp
index 7b81581aeed..c9ff1a462c5 100644
--- a/document/src/vespa/document/update/tensor_remove_update.cpp
+++ b/document/src/vespa/document/update/tensor_remove_update.cpp
@@ -1,13 +1,13 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "tensor_remove_update.h"
+#include "tensor_partial_update.h"
#include <vespa/document/base/exceptions.h>
#include <vespa/document/datatype/tensor_data_type.h>
#include <vespa/document/fieldvalue/document.h>
#include <vespa/document/fieldvalue/tensorfieldvalue.h>
#include <vespa/document/serialization/vespadocumentdeserializer.h>
#include <vespa/eval/eval/fast_value.h>
-#include <vespa/eval/tensor/partial_update.h>
#include <vespa/eval/eval/value.h>
#include <vespa/vespalib/objects/nbostream.h>
#include <vespa/vespalib/util/xmlstream.h>