move partial_update files from eval to document

author: Arne Juul <arnej@verizonmedia.com> 2020-12-08 13:55:43 +0000
committer: Arne Juul <arnej@verizonmedia.com> 2020-12-08 13:57:58 +0000
commit: 55b6fa9bbedbc372222c4a76924703f4525bc987 (patch)
tree: 7f58d6653fe7982d23bc6dae69dc56b163de209c /document/src
parent: 6ca5863b37cb94b1ebb223cbe3a44a4554f845eb (diff)
12 files changed, 823 insertions, 3 deletions
diff --git a/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt
new file mode 100644
index 00000000000..8d5ee0df6e6
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_partial_add_test_app TEST
+    SOURCES
+    partial_add_test.cpp
+    DEPENDS
+    document
+    GTest::GTest
+)
+vespa_add_test(NAME eval_partial_add_test_app COMMAND eval_partial_add_test_app)
diff --git a/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp b/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp
new file mode 100644
index 00000000000..db391a5b889
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp
@@ -0,0 +1,89 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/document/update/tensor_partial_update.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <optional>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+using vespalib::make_string_short::fmt;
+
+std::vector<Layout> add_layouts = {
+    {x({"a"})},                           {x({"b"})},
+    {x({"a","b"})},                       {x({"a","c"})},
+    float_cells({x({"a","b"})}),          {x({"a","c"})},
+    {x({"a","b"})},                       float_cells({x({"a","c"})}),
+    float_cells({x({"a","b"})}),          float_cells({x({"a","c"})}),
+    {x({"a","b","c"}),y({"d","e"})},      {x({"b","f"}),y({"d","g"})},             
+    {x(3),y({"a","b"})},                  {x(3),y({"b","c"})}
+};
+
+TensorSpec reference_add(const TensorSpec &a, const TensorSpec &b) {
+    TensorSpec result(a.type());
+    for (const auto &cell: b.cells()) {
+        result.add(cell.first, cell.second);
+    }
+    auto end_iter = b.cells().end();
+    for (const auto &cell: a.cells()) {
+        auto iter = b.cells().find(cell.first);
+        if (iter == end_iter) {
+            result.add(cell.first, cell.second);
+        }
+    }
+    return result;
+}
+
+Value::UP try_partial_add(const TensorSpec &a, const TensorSpec &b) {
+    const auto &factory = SimpleValueBuilderFactory::get();
+    auto lhs = value_from_spec(a, factory);
+    auto rhs = value_from_spec(b, factory);
+    return tensor::TensorPartialUpdate::add(*lhs, *rhs, factory);
+}
+
+TensorSpec perform_partial_add(const TensorSpec &a, const TensorSpec &b) {
+    auto up = try_partial_add(a, b);
+    EXPECT_TRUE(up);
+    return spec_from_value(*up);
+}
+
+TEST(PartialAddTest, partial_add_works_for_simple_values) {
+    ASSERT_TRUE((add_layouts.size() % 2) == 0);
+    for (size_t i = 0; i < add_layouts.size(); i += 2) {
+        TensorSpec lhs = spec(add_layouts[i], N());
+        TensorSpec rhs = spec(add_layouts[i + 1], Div16(N()));
+        SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+        auto expect = reference_add(lhs, rhs);
+        auto actual = perform_partial_add(lhs, rhs);
+        EXPECT_EQ(actual, expect);
+    }
+}
+
+std::vector<Layout> bad_layouts = {
+    {x(3)},                               {x(3),y(1)},
+    {x(3),y(1)},                          {x(3)},
+    {x(3),y(3)},                          {x(3),y({"a"})},
+    {x(3),y({"a"})},                      {x(3),y(3)},
+    {x({"a"})},                           {x({"a"}),y({"b"})},
+    {x({"a"}),y({"b"})},                  {x({"a"})},
+    {x({"a"})},                           {x({"a"}),y(1)}
+};
+
+TEST(PartialAddTest, partial_add_returns_nullptr_on_invalid_inputs) {
+    ASSERT_TRUE((bad_layouts.size() % 2) == 0);
+    for (size_t i = 0; i < bad_layouts.size(); i += 2) {
+        TensorSpec lhs = spec(bad_layouts[i], N());
+        TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N()));
+        SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+        auto actual = try_partial_add(lhs, rhs);
+        auto expect = Value::UP();
+        EXPECT_EQ(actual, expect);
+    }
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt
new file mode 100644
index 00000000000..8fde8339f66
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_partial_modify_test_app TEST
+    SOURCES
+    partial_modify_test.cpp
+    DEPENDS
+    document
+    GTest::GTest
+)
+vespa_add_test(NAME eval_partial_modify_test_app COMMAND eval_partial_modify_test_app)
diff --git a/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp
new file mode 100644
index 00000000000..a4562c09e50
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp
@@ -0,0 +1,110 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/document/update/tensor_partial_update.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <optional>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+using vespalib::make_string_short::fmt;
+
+std::vector<Layout> modify_layouts = {
+    {x({"a"})},                           {x({"a"})},
+    {x({"a",""})},                        {x({"b","c","d","e"})},
+    {x(5)},                               {x({"1","2","foo","17"})},
+    {x({"a","b","c"}),y({"d","e"})},      {x({"b"}),y({"d"})},             
+    {x({"a","b","c"})},                   {x({"b","c","d"})},
+    {x(4),y({"a","b","c","d"}),z(5)},     {x({"1","2"}),y({"b","d"}),z({"1","3"})},
+    {x(3),y(2)},                          {x({"0","1"}),y({"0","1"})},
+    {x({"a","","b"})},                    {x({""})}
+};
+
+TensorSpec::Address sparsify(const TensorSpec::Address &input) {
+    TensorSpec::Address output;
+    for (const auto & kv : input) {
+        if (kv.second.is_indexed()) {
+            auto val = fmt("%zu", kv.second.index);
+            output.emplace(kv.first, val);
+        } else {
+            output.emplace(kv.first, kv.second);
+        }
+    }
+    return output;
+}
+
+TensorSpec reference_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) {
+    TensorSpec result(a.type());
+    auto end_iter = b.cells().end();
+    for (const auto &cell: a.cells()) {
+        double v = cell.second;
+        auto sparse_addr = sparsify(cell.first);
+        auto iter = b.cells().find(sparse_addr);
+        if (iter == end_iter) {
+            result.add(cell.first, v);
+        } else {
+            result.add(cell.first, fun(v, iter->second));
+        }
+    }
+    return result;
+}
+
+Value::UP try_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) {
+    const auto &factory = SimpleValueBuilderFactory::get();
+    auto lhs = value_from_spec(a, factory);
+    auto rhs = value_from_spec(b, factory);
+    return tensor::TensorPartialUpdate::modify(*lhs, fun, *rhs, factory);
+}
+
+TensorSpec perform_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) {
+    auto up = try_partial_modify(a, b, fun);
+    EXPECT_TRUE(up);
+    return spec_from_value(*up);
+}
+
+TEST(PartialModifyTest, partial_modify_works_for_simple_values) {
+    ASSERT_TRUE((modify_layouts.size() % 2) == 0);
+    for (size_t i = 0; i < modify_layouts.size(); i += 2) {
+        TensorSpec lhs = spec(modify_layouts[i], N());
+        TensorSpec rhs = spec(modify_layouts[i + 1], Div16(N()));
+        SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+        for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f}) {
+            auto expect = reference_modify(lhs, rhs, fun);
+            auto actual = perform_partial_modify(lhs, rhs, fun);
+            EXPECT_EQ(actual, expect);
+        }
+        auto fun = [](double, double keep) { return keep; };
+        auto expect = reference_modify(lhs, rhs, fun);
+        auto actual = perform_partial_modify(lhs, rhs, fun);
+        EXPECT_EQ(actual, expect);
+    }
+}
+
+std::vector<Layout> bad_layouts = {
+    {x(3)},                               {x(3)},
+    {x(3),y({"a"})},                      {x(3),y({"a"})},
+    {x({"a"})},                           {x({"a"}),y({"b"})},
+    {x({"a"}),y({"b"})},                  {x({"a"})},
+    {x({"a"})},                           {x({"a"}),y(1)}
+};
+
+TEST(PartialModifyTest, partial_modify_returns_nullptr_on_invalid_inputs) {
+    ASSERT_TRUE((bad_layouts.size() % 2) == 0);
+    for (size_t i = 0; i < bad_layouts.size(); i += 2) {
+        TensorSpec lhs = spec(bad_layouts[i], N());
+        TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N()));
+        SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+        for (auto fun: {operation::Add::f}) {
+            auto actual = try_partial_modify(lhs, rhs, fun);
+            auto expect = Value::UP();
+            EXPECT_EQ(actual, expect);
+        }
+    }
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt
new file mode 100644
index 00000000000..7382ced9490
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_partial_remove_test_app TEST
+    SOURCES
+    partial_remove_test.cpp
+    DEPENDS
+    document
+    GTest::GTest
+)
+vespa_add_test(NAME eval_partial_remove_test_app COMMAND eval_partial_remove_test_app)
diff --git a/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp b/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp
new file mode 100644
index 00000000000..7b5b17b9cf8
--- /dev/null
+++ b/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp
@@ -0,0 +1,130 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/document/update/tensor_partial_update.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <optional>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+using vespalib::make_string_short::fmt;
+
+std::vector<Layout> remove_layouts = {
+    {x({"a"})},                           {x({"b"})},
+    {x({"a","b"})},                       {x({"a","c"})},
+    {x({"a","b"})},                       {x({"a","b"})},
+    float_cells({x({"a","b"})}),          {x({"a","c"})},
+    {x({"a","b"})},                       float_cells({x({"a","c"})}),
+    float_cells({x({"a","b"})}),          float_cells({x({"a","c"})}),
+    {x({"a","b","c"}),y({"d","e"})},      {x({"b","f"}),y({"d","g"})},             
+    {x(3),y({"a","b"})},                  {y({"b","c"})}
+};
+
+TensorSpec::Address only_sparse(const TensorSpec::Address &input) {
+    TensorSpec::Address output;
+    for (const auto & kv : input) {
+        if (kv.second.is_mapped()) {
+            output.emplace(kv.first, kv.second);
+        }
+    }
+    return output;
+}
+
+TensorSpec reference_remove(const TensorSpec &a, const TensorSpec &b) {
+    TensorSpec result(a.type());
+    auto end_iter = b.cells().end();
+    for (const auto &cell: a.cells()) {
+        auto iter = b.cells().find(only_sparse(cell.first));
+        if (iter == end_iter) {
+            result.add(cell.first, cell.second);
+        }
+    }
+    return result;
+}
+
+Value::UP try_partial_remove(const TensorSpec &a, const TensorSpec &b) {
+    const auto &factory = SimpleValueBuilderFactory::get();
+    auto lhs = value_from_spec(a, factory);
+    auto rhs = value_from_spec(b, factory);
+    return tensor::TensorPartialUpdate::remove(*lhs, *rhs, factory);
+}
+
+TensorSpec perform_partial_remove(const TensorSpec &a, const TensorSpec &b) {
+    auto up = try_partial_remove(a, b);
+    EXPECT_TRUE(up);
+    return spec_from_value(*up);
+}
+
+TEST(PartialRemoveTest, partial_remove_works_for_simple_values) {
+    ASSERT_TRUE((remove_layouts.size() % 2) == 0);
+    for (size_t i = 0; i < remove_layouts.size(); i += 2) {
+        TensorSpec lhs = spec(remove_layouts[i], N());
+        TensorSpec rhs = spec(remove_layouts[i + 1], Div16(N()));
+        SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+        auto expect = reference_remove(lhs, rhs);
+        auto actual = perform_partial_remove(lhs, rhs);
+        EXPECT_EQ(actual, expect);
+    }
+}
+
+std::vector<Layout> bad_layouts = {
+    {x(3)},                               {x(3)},
+    {x(3),y({"a"})},                      {x(3)},
+    {x(3),y({"a"})},                      {x(3),y({"a"})},
+    {x({"a"})},                           {y({"a"})},
+    {x({"a"})},                           {x({"a"}),y({"b"})}
+};
+
+TEST(PartialRemoveTest, partial_remove_returns_nullptr_on_invalid_inputs) {
+    ASSERT_TRUE((bad_layouts.size() % 2) == 0);
+    for (size_t i = 0; i < bad_layouts.size(); i += 2) {
+        TensorSpec lhs = spec(bad_layouts[i], N());
+        TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N()));
+        SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+        auto actual = try_partial_remove(lhs, rhs);
+        auto expect = Value::UP();
+        EXPECT_EQ(actual, expect);
+    }
+}
+
+void
+expect_partial_remove(const TensorSpec& input, const TensorSpec& remove, const TensorSpec& exp)
+{
+    auto act = perform_partial_remove(input, remove);
+    EXPECT_EQ(exp, act);
+}
+
+TEST(PartialRemoveTest, remove_where_address_is_not_fully_specified) {
+    auto input_sparse = TensorSpec("tensor(x{},y{})").
+            add({{"x", "a"},{"y", "c"}}, 3.0).
+            add({{"x", "a"},{"y", "d"}}, 5.0).
+            add({{"x", "b"},{"y", "c"}}, 7.0);
+
+    expect_partial_remove(input_sparse, TensorSpec("tensor(x{})").add({{"x", "a"}}, 1.0),
+                          TensorSpec("tensor(x{},y{})").add({{"x", "b"},{"y", "c"}}, 7.0));
+
+    expect_partial_remove(input_sparse, TensorSpec("tensor(y{})").add({{"y", "c"}}, 1.0),
+                          TensorSpec("tensor(x{},y{})").add({{"x", "a"},{"y", "d"}}, 5.0));
+
+    expect_partial_remove(input_sparse, TensorSpec("tensor(y{})").add({{"y", "d"}}, 1.0),
+                          TensorSpec("tensor(x{},y{})").add({{"x", "a"},{"y", "c"}}, 3.0)
+                                  .add({{"x", "b"},{"y", "c"}}, 7.0));
+
+    auto input_mixed = TensorSpec("tensor(x{},y{},z[1])").
+            add({{"x", "a"},{"y", "c"},{"z", 0}}, 3.0).
+            add({{"x", "a"},{"y", "d"},{"z", 0}}, 5.0).
+            add({{"x", "b"},{"y", "c"},{"z", 0}}, 7.0);
+
+    expect_partial_remove(input_mixed,TensorSpec("tensor(x{})").add({{"x", "a"}}, 1.0),
+                          TensorSpec("tensor(x{},y{},z[1])").add({{"x", "b"},{"y", "c"},{"z", 0}}, 7.0));
+
+    expect_partial_remove(input_mixed, TensorSpec("tensor(y{})").add({{"y", "c"}}, 1.0),
+                          TensorSpec("tensor(x{},y{},z[1])").add({{"x", "a"},{"y", "d"},{"z", 0}}, 5.0));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/document/src/vespa/document/update/CMakeLists.txt b/document/src/vespa/document/update/CMakeLists.txt
index a587d8e3e2d..b0ffa056e1a 100644
--- a/document/src/vespa/document/update/CMakeLists.txt
+++ b/document/src/vespa/document/update/CMakeLists.txt
@@ -15,6 +15,7 @@ vespa_add_library(document_updates OBJECT
     removevalueupdate.cpp
     tensor_add_update.cpp
     tensor_modify_update.cpp
+    tensor_partial_update.cpp
     tensor_remove_update.cpp
     valueupdate.cpp
     DEPENDS
diff --git a/document/src/vespa/document/update/tensor_add_update.cpp b/document/src/vespa/document/update/tensor_add_update.cpp
index c9ffad2a789..8846ec2fc0a 100644
--- a/document/src/vespa/document/update/tensor_add_update.cpp
+++ b/document/src/vespa/document/update/tensor_add_update.cpp
@@ -1,6 +1,7 @@
 // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
 #include "tensor_add_update.h"
+#include "tensor_partial_update.h"
 #include <vespa/document/base/exceptions.h>
 #include <vespa/document/base/field.h>
 #include <vespa/document/datatype/tensor_data_type.h>
@@ -10,7 +11,6 @@
 #include <vespa/document/util/serializableexceptions.h>
 #include <vespa/eval/eval/value.h>
 #include <vespa/eval/eval/fast_value.h>
-#include <vespa/eval/tensor/partial_update.h>
 #include <vespa/vespalib/objects/nbostream.h>
 #include <vespa/vespalib/stllike/asciistream.h>
 #include <vespa/vespalib/util/stringfmt.h>
diff --git a/document/src/vespa/document/update/tensor_modify_update.cpp b/document/src/vespa/document/update/tensor_modify_update.cpp
index 4da93d0ae46..bc4085ec4fa 100644
--- a/document/src/vespa/document/update/tensor_modify_update.cpp
+++ b/document/src/vespa/document/update/tensor_modify_update.cpp
@@ -1,6 +1,7 @@
 // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
 #include "tensor_modify_update.h"
+#include "tensor_partial_update.h"
 #include <vespa/document/base/exceptions.h>
 #include <vespa/document/base/field.h>
 #include <vespa/document/datatype/tensor_data_type.h>
@@ -11,7 +12,6 @@
 #include <vespa/eval/eval/operation.h>
 #include <vespa/eval/eval/value.h>
 #include <vespa/eval/eval/fast_value.h>
-#include <vespa/eval/tensor/partial_update.h>
 #include <vespa/vespalib/objects/nbostream.h>
 #include <vespa/vespalib/stllike/asciistream.h>
 #include <vespa/vespalib/util/stringfmt.h>
diff --git a/document/src/vespa/document/update/tensor_partial_update.cpp b/document/src/vespa/document/update/tensor_partial_update.cpp
new file mode 100644
index 00000000000..9bf243602dd
--- /dev/null
+++ b/document/src/vespa/document/update/tensor_partial_update.cpp
@@ -0,0 +1,419 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "tensor_partial_update.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/vespalib/util/overload.h>
+#include <vespa/vespalib/util/typify.h>
+#include <vespa/vespalib/util/visit_ranges.h>
+#include <cassert>
+#include <set>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".eval.tensor.partial_update");
+
+using namespace vespalib::eval;
+
+namespace vespalib::tensor {
+
+namespace {
+
+using join_fun_t = vespalib::eval::operation::op2_t;
+
+static constexpr size_t npos() { return -1; }
+
+enum class DimCase {
+    MAPPED_MATCH, CONV_TO_INDEXED
+};
+
+struct DenseCoords {
+    std::vector<size_t> dim_sizes;
+    size_t total_size;
+    size_t offset;
+    size_t current;
+    DenseCoords(const ValueType &output_type)
+        : total_size(1), offset(0), current(0)
+    {
+        for (const auto & dim : output_type.dimensions()) {
+            if (dim.is_indexed()) {
+                dim_sizes.push_back(dim.size);
+                total_size *= dim.size;
+            }
+        }
+    }
+    ~DenseCoords();
+    void clear() { offset = 0; current = 0; }
+    void convert_label(vespalib::stringref label) {
+        uint32_t coord = 0;
+        for (char c : label) {
+            if (c < '0' || c > '9') { // bad char
+                offset = npos();
+                break;
+            }
+            coord = coord * 10 + (c - '0');
+        }
+        size_t cur_dim_size = dim_sizes[current];
+        if (coord < cur_dim_size) {
+            if (offset != npos()) {
+                offset *= cur_dim_size;
+                offset += coord;
+            }
+        } else {
+            offset = npos();
+        }
+        ++current;
+    }
+    size_t get_dense_index() const {
+        assert(current == dim_sizes.size());
+        return offset;
+    }
+};
+DenseCoords::~DenseCoords() = default;
+
+struct SparseCoords {
+    std::vector<vespalib::stringref> addr;
+    std::vector<vespalib::stringref *> next_result_refs;
+    std::vector<const vespalib::stringref *> lookup_refs;
+    std::vector<size_t> lookup_view_dims;
+    SparseCoords(size_t sz)
+        : addr(sz), next_result_refs(sz), lookup_refs(sz), lookup_view_dims(sz)
+    {
+        for (size_t i = 0; i < sz; ++i) {
+            next_result_refs[i] = &addr[i];
+            lookup_refs[i] = &addr[i];
+            lookup_view_dims[i] = i;
+        }
+    }
+    ~SparseCoords();
+};
+SparseCoords::~SparseCoords() = default;
+
+/**
+ * Helper class that converts a fully-sparse address from the modifier
+ * tensor into a subset sparse address for the output and an offset
+ * in the dense subspace.
+ **/
+struct AddressHandler {
+    std::vector<DimCase> dimension_plan;
+    DenseCoords dense_converter;
+    SparseCoords for_output;
+    SparseCoords from_modifier;
+    bool valid;
+
+    AddressHandler(const ValueType &output_type,
+                   const ValueType &modifier_type)
+        : dimension_plan(), dense_converter(output_type),
+          for_output(output_type.count_mapped_dimensions()),
+          from_modifier(modifier_type.count_mapped_dimensions()),
+          valid(true)
+    {
+        if (! modifier_type.is_sparse()) {
+            LOG(error, "Unexpected non-sparse modifier tensor, type is %s",
+                modifier_type.to_spec().c_str());
+            valid = false;
+            return;
+        }
+        // analyse dimensions
+        auto visitor = overload {
+            [&](visit_ranges_either, const auto &) { valid = false; },
+            [&](visit_ranges_both, const auto &a, const auto &) {
+                dimension_plan.push_back(a.is_mapped() ? DimCase::MAPPED_MATCH : DimCase::CONV_TO_INDEXED);
+            }
+        };
+        const auto & input_dims = output_type.dimensions();
+        const auto & modifier_dims = modifier_type.dimensions();
+        visit_ranges(visitor,
+                     input_dims.begin(), input_dims.end(),
+                     modifier_dims.begin(), modifier_dims.end(),
+                     [](const auto &a, const auto &b){ return (a.name < b.name); });
+        if (! valid) {
+            LOG(error, "Value type %s does not match modifier type %s (should have same dimensions)",
+                output_type.to_spec().c_str(), modifier_type.to_spec().c_str());
+            return;
+        }
+        // implicitly checked above, must hold:
+        assert(input_dims.size() == modifier_dims.size());
+        // the plan should now be fully built:
+        assert(input_dims.size() == dimension_plan.size());
+    }
+
+    void handle_address()
+    {
+        dense_converter.clear();
+        auto out = for_output.addr.begin();
+        for (size_t i = 0; i < dimension_plan.size(); ++i) {
+            if (dimension_plan[i] == DimCase::CONV_TO_INDEXED) {
+                dense_converter.convert_label(from_modifier.addr[i]);
+            } else {
+                *out++ = from_modifier.addr[i];
+            }
+        }
+        assert(out == for_output.addr.end());
+        assert(dense_converter.current == dense_converter.dim_sizes.size());
+    }
+
+    ~AddressHandler();
+};
+AddressHandler::~AddressHandler() = default;
+
+template <typename CT, typename ICT = CT, typename KeepFun>
+void copy_tensor_with_filter(const Value &input,
+                             size_t dsss,
+                             SparseCoords &addrs,
+                             ValueBuilder<CT> &builder,
+                             KeepFun && keep_subspace)
+{
+    const auto input_cells = input.cells().typify<ICT>();
+    auto input_view = input.index().create_view({});
+    input_view->lookup({});
+    size_t input_subspace_index;
+    while (input_view->next_result(addrs.next_result_refs, input_subspace_index)) {
+        if (keep_subspace(addrs.lookup_refs, input_subspace_index)) {
+            size_t input_offset = dsss * input_subspace_index;
+            auto src = input_cells.begin() + input_offset;
+            auto dst = builder.add_subspace(addrs.addr).begin();
+            for (size_t i = 0; i < dsss; ++i) {
+                dst[i] = src[i];
+            }
+        }
+    }
+}
+
+template <typename CT>
+Value::UP
+copy_tensor(const Value &input, const ValueType &input_type, SparseCoords &helper, const ValueBuilderFactory &factory)
+{
+    const size_t num_mapped_in_input = input_type.count_mapped_dimensions();
+    const size_t dsss = input_type.dense_subspace_size();
+    const size_t expected_subspaces = input.index().size();
+    auto builder = factory.create_value_builder<CT>(input_type, num_mapped_in_input, dsss, expected_subspaces);
+    auto no_filter = [] (const auto &, size_t) {
+        return true;
+    };
+    copy_tensor_with_filter<CT>(input, dsss, helper, *builder, no_filter);
+    return builder->build(std::move(builder));
+}
+
+//-----------------------------------------------------------------------------
+
+struct PerformModify {
+    template<typename ICT, typename MCT>
+    static Value::UP invoke(const Value &input,
+                            join_fun_t function,
+                            const Value &modifier,
+                            const ValueBuilderFactory &factory);
+};
+
+template <typename ICT, typename MCT>
+Value::UP
+PerformModify::invoke(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory)
+{
+    const ValueType &input_type = input.type();
+    const size_t dsss = input_type.dense_subspace_size();
+    const ValueType &modifier_type = modifier.type();
+    AddressHandler handler(input_type, modifier_type);
+    if (! handler.valid) {
+        return {};
+    }
+    // copy input to output
+    auto out = copy_tensor<ICT>(input, input_type, handler.for_output, factory);
+    // need to overwrite some cells
+    auto output_cells = unconstify(out->cells().template typify<ICT>());
+    const auto modifier_cells = modifier.cells().typify<MCT>();
+    auto modifier_view = modifier.index().create_view({});
+    auto lookup_view = out->index().create_view(handler.for_output.lookup_view_dims);
+    modifier_view->lookup({});
+    size_t modifier_subspace_index;
+    while (modifier_view->next_result(handler.from_modifier.next_result_refs, modifier_subspace_index)) {
+        handler.handle_address();
+        size_t dense_idx = handler.dense_converter.get_dense_index();
+        if (dense_idx == npos()) {
+            continue;
+        }
+        lookup_view->lookup(handler.for_output.lookup_refs);
+        size_t output_subspace_index;
+        if (lookup_view->next_result({}, output_subspace_index)) {
+            size_t subspace_offset = dsss * output_subspace_index;
+            auto dst = output_cells.begin() + subspace_offset;
+            ICT lhs = dst[dense_idx];
+            MCT rhs = modifier_cells[modifier_subspace_index];
+            dst[dense_idx] = function(lhs, rhs);
+        }
+    }
+    return out;
+}
+
+//-----------------------------------------------------------------------------
+
+struct PerformAdd {
+    template<typename ICT, typename MCT>
+    static Value::UP invoke(const Value &input,
+                            const Value &modifier,
+                            const ValueBuilderFactory &factory);
+};
+
+template <typename ICT, typename MCT>
+Value::UP
+PerformAdd::invoke(const Value &input, const Value &modifier, const ValueBuilderFactory &factory)
+{
+    const ValueType &input_type = input.type();
+    const ValueType &modifier_type = modifier.type();
+    if (input_type.dimensions() != modifier_type.dimensions()) {
+        LOG(error, "when adding cells to a tensor, dimensions must be equal. "
+            "Got input type %s != modifier type %s",
+            input_type.to_spec().c_str(), modifier_type.to_spec().c_str());
+        return {};
+    }
+    const size_t num_mapped_in_input = input_type.count_mapped_dimensions();
+    const size_t dsss = input_type.dense_subspace_size();
+    const size_t expected_subspaces = input.index().size() + modifier.index().size();
+    auto builder = factory.create_value_builder<ICT>(input_type, num_mapped_in_input, dsss, expected_subspaces);
+    SparseCoords addrs(num_mapped_in_input);
+    auto lookup_view = input.index().create_view(addrs.lookup_view_dims);
+    std::vector<bool> overwritten(input.index().size(), false);
+    auto remember_subspaces = [&] (const auto & lookup_refs, size_t) {
+        lookup_view->lookup(lookup_refs);
+        size_t input_subspace_index;
+        if (lookup_view->next_result({}, input_subspace_index)) {
+            overwritten[input_subspace_index] = true;
+        }
+        return true;
+    };
+    copy_tensor_with_filter<ICT, MCT>(modifier, dsss, addrs, *builder, remember_subspaces);
+    auto filter = [&] (const auto &, size_t input_subspace) {
+        return ! overwritten[input_subspace];
+    };
+    copy_tensor_with_filter<ICT>(input, dsss, addrs, *builder, filter);
+    return builder->build(std::move(builder));
+}
+
+//-----------------------------------------------------------------------------
+
+struct PerformRemove {
+    template<typename ICT>
+    static Value::UP invoke(const Value &input,
+                            const Value &modifier,
+                            const ValueBuilderFactory &factory);
+};
+
+/**
+ * Calculates the indexes of where the mapped modifier dimensions are found in the mapped input dimensions.
+ *
+ * The modifier dimensions should be a subset or all of the input dimensions.
+ * An empty vector is returned on type mismatch.
+ */
+std::vector<size_t>
+calc_mapped_dimension_indexes(const ValueType& input_type,
+                              const ValueType& modifier_type)
+{
+    auto input_dims = input_type.mapped_dimensions();
+    auto mod_dims = modifier_type.mapped_dimensions();
+    if (mod_dims.size() > input_dims.size()) {
+        return {};
+    }
+    std::vector<size_t> result(mod_dims.size());
+    size_t j = 0;
+    for (size_t i = 0; i < mod_dims.size(); ++i) {
+        while ((j < input_dims.size()) && (input_dims[j] != mod_dims[i])) {
+            ++j;
+        }
+        if (j >= input_dims.size()) {
+            return {};
+        }
+        result[i] = j;
+    }
+    return result;
+}
+
+struct ModifierCoords {
+
+    std::vector<const vespalib::stringref *> lookup_refs;
+    std::vector<size_t> lookup_view_dims;
+
+    ModifierCoords(const SparseCoords& input_coords,
+                   const std::vector<size_t>& input_dim_indexes,
+                   const ValueType& modifier_type)
+        : lookup_refs(modifier_type.dimensions().size()),
+          lookup_view_dims(modifier_type.dimensions().size())
+    {
+        assert(modifier_type.dimensions().size() == input_dim_indexes.size());
+        for (size_t i = 0; i < input_dim_indexes.size(); ++i) {
+            // Setup the modifier dimensions to point to the matching input dimensions.
+            lookup_refs[i] = &input_coords.addr[input_dim_indexes[i]];
+            lookup_view_dims[i] = i;
+        }
+    }
+    ~ModifierCoords() {}
+};
+
+template <typename ICT>
+Value::UP
+PerformRemove::invoke(const Value &input, const Value &modifier, const ValueBuilderFactory &factory)
+{
+    const ValueType &input_type = input.type();
+    const ValueType &modifier_type = modifier.type();
+    const size_t num_mapped_in_input = input_type.count_mapped_dimensions();
+    if (num_mapped_in_input == 0) {
+        LOG(error, "Cannot remove cells from a dense input tensor of type %s",
+            input_type.to_spec().c_str());
+        return {};
+    }
+    if (modifier_type.count_indexed_dimensions() != 0) {
+        LOG(error, "Cannot remove cells using a modifier tensor of type %s",
+            modifier_type.to_spec().c_str());
+        return {};
+    }
+    auto input_dim_indexes = calc_mapped_dimension_indexes(input_type, modifier_type);
+    if (input_dim_indexes.empty()) {
+        LOG(error, "Tensor type mismatch when removing cells from a tensor. "
+            "Got input type %s versus modifier type %s",
+            input_type.to_spec().c_str(), modifier_type.to_spec().c_str());
+        return {};
+    }
+    SparseCoords addrs(num_mapped_in_input);
+    ModifierCoords mod_coords(addrs, input_dim_indexes, modifier_type);
+    auto modifier_view = modifier.index().create_view(mod_coords.lookup_view_dims);
+    const size_t expected_subspaces = input.index().size();
+    const size_t dsss = input_type.dense_subspace_size();
+    auto builder = factory.create_value_builder<ICT>(input_type, num_mapped_in_input, dsss, expected_subspaces);
+    auto filter_by_modifier = [&] (const auto & lookup_refs, size_t) {
+        // The modifier dimensions are setup to point to the input dimensions address storage in ModifierCoords,
+        // so we don't need to use the lookup_refs argument.
+        (void) lookup_refs;
+        modifier_view->lookup(mod_coords.lookup_refs);
+        size_t modifier_subspace_index;
+        return !(modifier_view->next_result({}, modifier_subspace_index));
+    };
+    copy_tensor_with_filter<ICT>(input, dsss, addrs, *builder, filter_by_modifier);
+    return builder->build(std::move(builder));
+}
+
+} // namespace <unnamed>
+
+//-----------------------------------------------------------------------------
+
+Value::UP
+TensorPartialUpdate::modify(const Value &input, join_fun_t function,
+                            const Value &modifier, const ValueBuilderFactory &factory)
+{
+    return typify_invoke<2, TypifyCellType, PerformModify>(
+            input.cells().type, modifier.cells().type,
+            input, function, modifier, factory);
+}
+
+Value::UP
+TensorPartialUpdate::add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory)
+{
+    return typify_invoke<2, TypifyCellType, PerformAdd>(
+            input.cells().type, add_cells.cells().type,
+            input, add_cells, factory);
+}
+
+Value::UP
+TensorPartialUpdate::remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory)
+{
+    return typify_invoke<1, TypifyCellType, PerformRemove>(
+            input.cells().type,
+            input, remove_spec, factory);
+}
+
+} // namespace
diff --git a/document/src/vespa/document/update/tensor_partial_update.h b/document/src/vespa/document/update/tensor_partial_update.h
new file mode 100644
index 00000000000..b3e9d32fca8
--- /dev/null
+++ b/document/src/vespa/document/update/tensor_partial_update.h
@@ -0,0 +1,44 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/operation.h>
+
+namespace vespalib::tensor {
+
+struct TensorPartialUpdate {
+    using join_fun_t = vespalib::eval::operation::op2_t;
+    using Value = vespalib::eval::Value;
+    using ValueBuilderFactory = vespalib::eval::ValueBuilderFactory;
+
+    /**
+     *  Make a copy of the input, but apply function(oldvalue, modifier.cellvalue)
+     *  to cells which also exist in the "modifier".
+     *  The modifier type must be sparse with exactly the same dimension names
+     *  as the input type.
+     *  Returns null pointer if this constraint is violated.
+     **/
+    static Value::UP modify(const Value &input, join_fun_t function,
+                            const Value &modifier, const ValueBuilderFactory &factory);
+
+    /**
+     *  Make a copy of the input, but add or overwrite cells from add_cells.
+     *  Requires same type for input and add_cells.
+     *  Returns null pointer if this constraint is violated.
+     **/
+    static Value::UP add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory);
+
+    /**
+     *  Make a copy of the input, but remove cells present in remove_spec.
+     *  The remove_spec must be a sparse tensor, with exactly the mapped dimensions
+     *  that the input value has.
+     *  Cell values in remove_spec are ignored.
+     *  Not valid for dense tensors, since removing cells for those are impossible.
+     *  Returns null pointer if these constraints are violated.
+     **/
+    static Value::UP remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory);
+};
+
+} // namespace
diff --git a/document/src/vespa/document/update/tensor_remove_update.cpp b/document/src/vespa/document/update/tensor_remove_update.cpp
index 7b81581aeed..c9ff1a462c5 100644
--- a/document/src/vespa/document/update/tensor_remove_update.cpp
+++ b/document/src/vespa/document/update/tensor_remove_update.cpp
@@ -1,13 +1,13 @@
 // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
 #include "tensor_remove_update.h"
+#include "tensor_partial_update.h"
 #include <vespa/document/base/exceptions.h>
 #include <vespa/document/datatype/tensor_data_type.h>
 #include <vespa/document/fieldvalue/document.h>
 #include <vespa/document/fieldvalue/tensorfieldvalue.h>
 #include <vespa/document/serialization/vespadocumentdeserializer.h>
 #include <vespa/eval/eval/fast_value.h>
-#include <vespa/eval/tensor/partial_update.h>
 #include <vespa/eval/eval/value.h>
 #include <vespa/vespalib/objects/nbostream.h>
 #include <vespa/vespalib/util/xmlstream.h>
author	Arne Juul <arnej@verizonmedia.com>	2020-12-08 13:55:43 +0000
committer	Arne Juul <arnej@verizonmedia.com>	2020-12-08 13:57:58 +0000
commit	55b6fa9bbedbc372222c4a76924703f4525bc987 (patch)
tree	7f58d6653fe7982d23bc6dae69dc56b163de209c /document/src
parent	6ca5863b37cb94b1ebb223cbe3a44a4554f845eb (diff)