diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-12-08 13:55:43 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-12-08 13:57:58 +0000 |
commit | 55b6fa9bbedbc372222c4a76924703f4525bc987 (patch) | |
tree | 7f58d6653fe7982d23bc6dae69dc56b163de209c /document/src | |
parent | 6ca5863b37cb94b1ebb223cbe3a44a4554f845eb (diff) |
move partial_update files from eval to document
Diffstat (limited to 'document/src')
12 files changed, 823 insertions, 3 deletions
diff --git a/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt new file mode 100644 index 00000000000..8d5ee0df6e6 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_partial_add_test_app TEST + SOURCES + partial_add_test.cpp + DEPENDS + document + GTest::GTest +) +vespa_add_test(NAME eval_partial_add_test_app COMMAND eval_partial_add_test_app) diff --git a/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp b/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp new file mode 100644 index 00000000000..db391a5b889 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp @@ -0,0 +1,89 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/document/update/tensor_partial_update.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <optional> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +std::vector<Layout> add_layouts = { + {x({"a"})}, {x({"b"})}, + {x({"a","b"})}, {x({"a","c"})}, + float_cells({x({"a","b"})}), {x({"a","c"})}, + {x({"a","b"})}, float_cells({x({"a","c"})}), + float_cells({x({"a","b"})}), float_cells({x({"a","c"})}), + {x({"a","b","c"}),y({"d","e"})}, {x({"b","f"}),y({"d","g"})}, + {x(3),y({"a","b"})}, {x(3),y({"b","c"})} +}; + +TensorSpec reference_add(const TensorSpec &a, const TensorSpec &b) { + TensorSpec result(a.type()); + for (const auto &cell: b.cells()) { + result.add(cell.first, cell.second); + } + auto end_iter = b.cells().end(); + for (const auto &cell: a.cells()) { + auto iter = b.cells().find(cell.first); + if (iter == end_iter) { + result.add(cell.first, cell.second); + } + } + return result; +} + +Value::UP try_partial_add(const TensorSpec &a, const TensorSpec &b) { + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + return tensor::TensorPartialUpdate::add(*lhs, *rhs, factory); +} + +TensorSpec perform_partial_add(const TensorSpec &a, const TensorSpec &b) { + auto up = try_partial_add(a, b); + EXPECT_TRUE(up); + return spec_from_value(*up); +} + +TEST(PartialAddTest, partial_add_works_for_simple_values) { + ASSERT_TRUE((add_layouts.size() % 2) == 0); + for (size_t i = 0; i < add_layouts.size(); i += 2) { + TensorSpec lhs = spec(add_layouts[i], N()); + TensorSpec rhs = spec(add_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = reference_add(lhs, rhs); + auto actual = perform_partial_add(lhs, rhs); + EXPECT_EQ(actual, expect); + } +} + +std::vector<Layout> bad_layouts = { + {x(3)}, {x(3),y(1)}, + {x(3),y(1)}, {x(3)}, + {x(3),y(3)}, {x(3),y({"a"})}, + {x(3),y({"a"})}, {x(3),y(3)}, + {x({"a"})}, {x({"a"}),y({"b"})}, + {x({"a"}),y({"b"})}, {x({"a"})}, + {x({"a"})}, {x({"a"}),y(1)} +}; + +TEST(PartialAddTest, partial_add_returns_nullptr_on_invalid_inputs) { + ASSERT_TRUE((bad_layouts.size() % 2) == 0); + for (size_t i = 0; i < bad_layouts.size(); i += 2) { + TensorSpec lhs = spec(bad_layouts[i], N()); + TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto actual = try_partial_add(lhs, rhs); + auto expect = Value::UP(); + EXPECT_EQ(actual, expect); + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt new file mode 100644 index 00000000000..8fde8339f66 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_partial_modify_test_app TEST + SOURCES + partial_modify_test.cpp + DEPENDS + document + GTest::GTest +) +vespa_add_test(NAME eval_partial_modify_test_app COMMAND eval_partial_modify_test_app) diff --git a/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp new file mode 100644 index 00000000000..a4562c09e50 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp @@ -0,0 +1,110 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/document/update/tensor_partial_update.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <optional> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +std::vector<Layout> modify_layouts = { + {x({"a"})}, {x({"a"})}, + {x({"a",""})}, {x({"b","c","d","e"})}, + {x(5)}, {x({"1","2","foo","17"})}, + {x({"a","b","c"}),y({"d","e"})}, {x({"b"}),y({"d"})}, + {x({"a","b","c"})}, {x({"b","c","d"})}, + {x(4),y({"a","b","c","d"}),z(5)}, {x({"1","2"}),y({"b","d"}),z({"1","3"})}, + {x(3),y(2)}, {x({"0","1"}),y({"0","1"})}, + {x({"a","","b"})}, {x({""})} +}; + +TensorSpec::Address sparsify(const TensorSpec::Address &input) { + TensorSpec::Address output; + for (const auto & kv : input) { + if (kv.second.is_indexed()) { + auto val = fmt("%zu", kv.second.index); + output.emplace(kv.first, val); + } else { + output.emplace(kv.first, kv.second); + } + } + return output; +} + +TensorSpec reference_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + TensorSpec result(a.type()); + auto end_iter = b.cells().end(); + for (const auto &cell: a.cells()) { + double v = cell.second; + auto sparse_addr = sparsify(cell.first); + auto iter = b.cells().find(sparse_addr); + if (iter == end_iter) { + result.add(cell.first, v); + } else { + result.add(cell.first, fun(v, iter->second)); + } + } + return result; +} + +Value::UP try_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + return tensor::TensorPartialUpdate::modify(*lhs, fun, *rhs, factory); +} + +TensorSpec perform_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + auto up = try_partial_modify(a, b, fun); + EXPECT_TRUE(up); + return spec_from_value(*up); +} + +TEST(PartialModifyTest, partial_modify_works_for_simple_values) { + ASSERT_TRUE((modify_layouts.size() % 2) == 0); + for (size_t i = 0; i < modify_layouts.size(); i += 2) { + TensorSpec lhs = spec(modify_layouts[i], N()); + TensorSpec rhs = spec(modify_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f}) { + auto expect = reference_modify(lhs, rhs, fun); + auto actual = perform_partial_modify(lhs, rhs, fun); + EXPECT_EQ(actual, expect); + } + auto fun = [](double, double keep) { return keep; }; + auto expect = reference_modify(lhs, rhs, fun); + auto actual = perform_partial_modify(lhs, rhs, fun); + EXPECT_EQ(actual, expect); + } +} + +std::vector<Layout> bad_layouts = { + {x(3)}, {x(3)}, + {x(3),y({"a"})}, {x(3),y({"a"})}, + {x({"a"})}, {x({"a"}),y({"b"})}, + {x({"a"}),y({"b"})}, {x({"a"})}, + {x({"a"})}, {x({"a"}),y(1)} +}; + +TEST(PartialModifyTest, partial_modify_returns_nullptr_on_invalid_inputs) { + ASSERT_TRUE((bad_layouts.size() % 2) == 0); + for (size_t i = 0; i < bad_layouts.size(); i += 2) { + TensorSpec lhs = spec(bad_layouts[i], N()); + TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + for (auto fun: {operation::Add::f}) { + auto actual = try_partial_modify(lhs, rhs, fun); + auto expect = Value::UP(); + EXPECT_EQ(actual, expect); + } + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt new file mode 100644 index 00000000000..7382ced9490 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_partial_remove_test_app TEST + SOURCES + partial_remove_test.cpp + DEPENDS + document + GTest::GTest +) +vespa_add_test(NAME eval_partial_remove_test_app COMMAND eval_partial_remove_test_app) diff --git a/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp b/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp new file mode 100644 index 00000000000..7b5b17b9cf8 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp @@ -0,0 +1,130 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/document/update/tensor_partial_update.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <optional> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +std::vector<Layout> remove_layouts = { + {x({"a"})}, {x({"b"})}, + {x({"a","b"})}, {x({"a","c"})}, + {x({"a","b"})}, {x({"a","b"})}, + float_cells({x({"a","b"})}), {x({"a","c"})}, + {x({"a","b"})}, float_cells({x({"a","c"})}), + float_cells({x({"a","b"})}), float_cells({x({"a","c"})}), + {x({"a","b","c"}),y({"d","e"})}, {x({"b","f"}),y({"d","g"})}, + {x(3),y({"a","b"})}, {y({"b","c"})} +}; + +TensorSpec::Address only_sparse(const TensorSpec::Address &input) { + TensorSpec::Address output; + for (const auto & kv : input) { + if (kv.second.is_mapped()) { + output.emplace(kv.first, kv.second); + } + } + return output; +} + +TensorSpec reference_remove(const TensorSpec &a, const TensorSpec &b) { + TensorSpec result(a.type()); + auto end_iter = b.cells().end(); + for (const auto &cell: a.cells()) { + auto iter = b.cells().find(only_sparse(cell.first)); + if (iter == end_iter) { + result.add(cell.first, cell.second); + } + } + return result; +} + +Value::UP try_partial_remove(const TensorSpec &a, const TensorSpec &b) { + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + return tensor::TensorPartialUpdate::remove(*lhs, *rhs, factory); +} + +TensorSpec perform_partial_remove(const TensorSpec &a, const TensorSpec &b) { + auto up = try_partial_remove(a, b); + EXPECT_TRUE(up); + return spec_from_value(*up); +} + +TEST(PartialRemoveTest, partial_remove_works_for_simple_values) { + ASSERT_TRUE((remove_layouts.size() % 2) == 0); + for (size_t i = 0; i < remove_layouts.size(); i += 2) { + TensorSpec lhs = spec(remove_layouts[i], N()); + TensorSpec rhs = spec(remove_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = reference_remove(lhs, rhs); + auto actual = perform_partial_remove(lhs, rhs); + EXPECT_EQ(actual, expect); + } +} + +std::vector<Layout> bad_layouts = { + {x(3)}, {x(3)}, + {x(3),y({"a"})}, {x(3)}, + {x(3),y({"a"})}, {x(3),y({"a"})}, + {x({"a"})}, {y({"a"})}, + {x({"a"})}, {x({"a"}),y({"b"})} +}; + +TEST(PartialRemoveTest, partial_remove_returns_nullptr_on_invalid_inputs) { + ASSERT_TRUE((bad_layouts.size() % 2) == 0); + for (size_t i = 0; i < bad_layouts.size(); i += 2) { + TensorSpec lhs = spec(bad_layouts[i], N()); + TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto actual = try_partial_remove(lhs, rhs); + auto expect = Value::UP(); + EXPECT_EQ(actual, expect); + } +} + +void +expect_partial_remove(const TensorSpec& input, const TensorSpec& remove, const TensorSpec& exp) +{ + auto act = perform_partial_remove(input, remove); + EXPECT_EQ(exp, act); +} + +TEST(PartialRemoveTest, remove_where_address_is_not_fully_specified) { + auto input_sparse = TensorSpec("tensor(x{},y{})"). + add({{"x", "a"},{"y", "c"}}, 3.0). + add({{"x", "a"},{"y", "d"}}, 5.0). + add({{"x", "b"},{"y", "c"}}, 7.0); + + expect_partial_remove(input_sparse, TensorSpec("tensor(x{})").add({{"x", "a"}}, 1.0), + TensorSpec("tensor(x{},y{})").add({{"x", "b"},{"y", "c"}}, 7.0)); + + expect_partial_remove(input_sparse, TensorSpec("tensor(y{})").add({{"y", "c"}}, 1.0), + TensorSpec("tensor(x{},y{})").add({{"x", "a"},{"y", "d"}}, 5.0)); + + expect_partial_remove(input_sparse, TensorSpec("tensor(y{})").add({{"y", "d"}}, 1.0), + TensorSpec("tensor(x{},y{})").add({{"x", "a"},{"y", "c"}}, 3.0) + .add({{"x", "b"},{"y", "c"}}, 7.0)); + + auto input_mixed = TensorSpec("tensor(x{},y{},z[1])"). + add({{"x", "a"},{"y", "c"},{"z", 0}}, 3.0). + add({{"x", "a"},{"y", "d"},{"z", 0}}, 5.0). + add({{"x", "b"},{"y", "c"},{"z", 0}}, 7.0); + + expect_partial_remove(input_mixed,TensorSpec("tensor(x{})").add({{"x", "a"}}, 1.0), + TensorSpec("tensor(x{},y{},z[1])").add({{"x", "b"},{"y", "c"},{"z", 0}}, 7.0)); + + expect_partial_remove(input_mixed, TensorSpec("tensor(y{})").add({{"y", "c"}}, 1.0), + TensorSpec("tensor(x{},y{},z[1])").add({{"x", "a"},{"y", "d"},{"z", 0}}, 5.0)); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/document/src/vespa/document/update/CMakeLists.txt b/document/src/vespa/document/update/CMakeLists.txt index a587d8e3e2d..b0ffa056e1a 100644 --- a/document/src/vespa/document/update/CMakeLists.txt +++ b/document/src/vespa/document/update/CMakeLists.txt @@ -15,6 +15,7 @@ vespa_add_library(document_updates OBJECT removevalueupdate.cpp tensor_add_update.cpp tensor_modify_update.cpp + tensor_partial_update.cpp tensor_remove_update.cpp valueupdate.cpp DEPENDS diff --git a/document/src/vespa/document/update/tensor_add_update.cpp b/document/src/vespa/document/update/tensor_add_update.cpp index c9ffad2a789..8846ec2fc0a 100644 --- a/document/src/vespa/document/update/tensor_add_update.cpp +++ b/document/src/vespa/document/update/tensor_add_update.cpp @@ -1,6 +1,7 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "tensor_add_update.h" +#include "tensor_partial_update.h" #include <vespa/document/base/exceptions.h> #include <vespa/document/base/field.h> #include <vespa/document/datatype/tensor_data_type.h> @@ -10,7 +11,6 @@ #include <vespa/document/util/serializableexceptions.h> #include <vespa/eval/eval/value.h> #include <vespa/eval/eval/fast_value.h> -#include <vespa/eval/tensor/partial_update.h> #include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/util/stringfmt.h> diff --git a/document/src/vespa/document/update/tensor_modify_update.cpp b/document/src/vespa/document/update/tensor_modify_update.cpp index 4da93d0ae46..bc4085ec4fa 100644 --- a/document/src/vespa/document/update/tensor_modify_update.cpp +++ b/document/src/vespa/document/update/tensor_modify_update.cpp @@ -1,6 +1,7 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "tensor_modify_update.h" +#include "tensor_partial_update.h" #include <vespa/document/base/exceptions.h> #include <vespa/document/base/field.h> #include <vespa/document/datatype/tensor_data_type.h> @@ -11,7 +12,6 @@ #include <vespa/eval/eval/operation.h> #include <vespa/eval/eval/value.h> #include <vespa/eval/eval/fast_value.h> -#include <vespa/eval/tensor/partial_update.h> #include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/util/stringfmt.h> diff --git a/document/src/vespa/document/update/tensor_partial_update.cpp b/document/src/vespa/document/update/tensor_partial_update.cpp new file mode 100644 index 00000000000..9bf243602dd --- /dev/null +++ b/document/src/vespa/document/update/tensor_partial_update.cpp @@ -0,0 +1,419 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "tensor_partial_update.h" +#include <vespa/eval/eval/operation.h> +#include <vespa/vespalib/util/overload.h> +#include <vespa/vespalib/util/typify.h> +#include <vespa/vespalib/util/visit_ranges.h> +#include <cassert> +#include <set> + +#include <vespa/log/log.h> +LOG_SETUP(".eval.tensor.partial_update"); + +using namespace vespalib::eval; + +namespace vespalib::tensor { + +namespace { + +using join_fun_t = vespalib::eval::operation::op2_t; + +static constexpr size_t npos() { return -1; } + +enum class DimCase { + MAPPED_MATCH, CONV_TO_INDEXED +}; + +struct DenseCoords { + std::vector<size_t> dim_sizes; + size_t total_size; + size_t offset; + size_t current; + DenseCoords(const ValueType &output_type) + : total_size(1), offset(0), current(0) + { + for (const auto & dim : output_type.dimensions()) { + if (dim.is_indexed()) { + dim_sizes.push_back(dim.size); + total_size *= dim.size; + } + } + } + ~DenseCoords(); + void clear() { offset = 0; current = 0; } + void convert_label(vespalib::stringref label) { + uint32_t coord = 0; + for (char c : label) { + if (c < '0' || c > '9') { // bad char + offset = npos(); + break; + } + coord = coord * 10 + (c - '0'); + } + size_t cur_dim_size = dim_sizes[current]; + if (coord < cur_dim_size) { + if (offset != npos()) { + offset *= cur_dim_size; + offset += coord; + } + } else { + offset = npos(); + } + ++current; + } + size_t get_dense_index() const { + assert(current == dim_sizes.size()); + return offset; + } +}; +DenseCoords::~DenseCoords() = default; + +struct SparseCoords { + std::vector<vespalib::stringref> addr; + std::vector<vespalib::stringref *> next_result_refs; + std::vector<const vespalib::stringref *> lookup_refs; + std::vector<size_t> lookup_view_dims; + SparseCoords(size_t sz) + : addr(sz), next_result_refs(sz), lookup_refs(sz), lookup_view_dims(sz) + { + for (size_t i = 0; i < sz; ++i) { + next_result_refs[i] = &addr[i]; + lookup_refs[i] = &addr[i]; + lookup_view_dims[i] = i; + } + } + ~SparseCoords(); +}; +SparseCoords::~SparseCoords() = default; + +/** + * Helper class that converts a fully-sparse address from the modifier + * tensor into a subset sparse address for the output and an offset + * in the dense subspace. + **/ +struct AddressHandler { + std::vector<DimCase> dimension_plan; + DenseCoords dense_converter; + SparseCoords for_output; + SparseCoords from_modifier; + bool valid; + + AddressHandler(const ValueType &output_type, + const ValueType &modifier_type) + : dimension_plan(), dense_converter(output_type), + for_output(output_type.count_mapped_dimensions()), + from_modifier(modifier_type.count_mapped_dimensions()), + valid(true) + { + if (! modifier_type.is_sparse()) { + LOG(error, "Unexpected non-sparse modifier tensor, type is %s", + modifier_type.to_spec().c_str()); + valid = false; + return; + } + // analyse dimensions + auto visitor = overload { + [&](visit_ranges_either, const auto &) { valid = false; }, + [&](visit_ranges_both, const auto &a, const auto &) { + dimension_plan.push_back(a.is_mapped() ? DimCase::MAPPED_MATCH : DimCase::CONV_TO_INDEXED); + } + }; + const auto & input_dims = output_type.dimensions(); + const auto & modifier_dims = modifier_type.dimensions(); + visit_ranges(visitor, + input_dims.begin(), input_dims.end(), + modifier_dims.begin(), modifier_dims.end(), + [](const auto &a, const auto &b){ return (a.name < b.name); }); + if (! valid) { + LOG(error, "Value type %s does not match modifier type %s (should have same dimensions)", + output_type.to_spec().c_str(), modifier_type.to_spec().c_str()); + return; + } + // implicitly checked above, must hold: + assert(input_dims.size() == modifier_dims.size()); + // the plan should now be fully built: + assert(input_dims.size() == dimension_plan.size()); + } + + void handle_address() + { + dense_converter.clear(); + auto out = for_output.addr.begin(); + for (size_t i = 0; i < dimension_plan.size(); ++i) { + if (dimension_plan[i] == DimCase::CONV_TO_INDEXED) { + dense_converter.convert_label(from_modifier.addr[i]); + } else { + *out++ = from_modifier.addr[i]; + } + } + assert(out == for_output.addr.end()); + assert(dense_converter.current == dense_converter.dim_sizes.size()); + } + + ~AddressHandler(); +}; +AddressHandler::~AddressHandler() = default; + +template <typename CT, typename ICT = CT, typename KeepFun> +void copy_tensor_with_filter(const Value &input, + size_t dsss, + SparseCoords &addrs, + ValueBuilder<CT> &builder, + KeepFun && keep_subspace) +{ + const auto input_cells = input.cells().typify<ICT>(); + auto input_view = input.index().create_view({}); + input_view->lookup({}); + size_t input_subspace_index; + while (input_view->next_result(addrs.next_result_refs, input_subspace_index)) { + if (keep_subspace(addrs.lookup_refs, input_subspace_index)) { + size_t input_offset = dsss * input_subspace_index; + auto src = input_cells.begin() + input_offset; + auto dst = builder.add_subspace(addrs.addr).begin(); + for (size_t i = 0; i < dsss; ++i) { + dst[i] = src[i]; + } + } + } +} + +template <typename CT> +Value::UP +copy_tensor(const Value &input, const ValueType &input_type, SparseCoords &helper, const ValueBuilderFactory &factory) +{ + const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); + const size_t dsss = input_type.dense_subspace_size(); + const size_t expected_subspaces = input.index().size(); + auto builder = factory.create_value_builder<CT>(input_type, num_mapped_in_input, dsss, expected_subspaces); + auto no_filter = [] (const auto &, size_t) { + return true; + }; + copy_tensor_with_filter<CT>(input, dsss, helper, *builder, no_filter); + return builder->build(std::move(builder)); +} + +//----------------------------------------------------------------------------- + +struct PerformModify { + template<typename ICT, typename MCT> + static Value::UP invoke(const Value &input, + join_fun_t function, + const Value &modifier, + const ValueBuilderFactory &factory); +}; + +template <typename ICT, typename MCT> +Value::UP +PerformModify::invoke(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory) +{ + const ValueType &input_type = input.type(); + const size_t dsss = input_type.dense_subspace_size(); + const ValueType &modifier_type = modifier.type(); + AddressHandler handler(input_type, modifier_type); + if (! handler.valid) { + return {}; + } + // copy input to output + auto out = copy_tensor<ICT>(input, input_type, handler.for_output, factory); + // need to overwrite some cells + auto output_cells = unconstify(out->cells().template typify<ICT>()); + const auto modifier_cells = modifier.cells().typify<MCT>(); + auto modifier_view = modifier.index().create_view({}); + auto lookup_view = out->index().create_view(handler.for_output.lookup_view_dims); + modifier_view->lookup({}); + size_t modifier_subspace_index; + while (modifier_view->next_result(handler.from_modifier.next_result_refs, modifier_subspace_index)) { + handler.handle_address(); + size_t dense_idx = handler.dense_converter.get_dense_index(); + if (dense_idx == npos()) { + continue; + } + lookup_view->lookup(handler.for_output.lookup_refs); + size_t output_subspace_index; + if (lookup_view->next_result({}, output_subspace_index)) { + size_t subspace_offset = dsss * output_subspace_index; + auto dst = output_cells.begin() + subspace_offset; + ICT lhs = dst[dense_idx]; + MCT rhs = modifier_cells[modifier_subspace_index]; + dst[dense_idx] = function(lhs, rhs); + } + } + return out; +} + +//----------------------------------------------------------------------------- + +struct PerformAdd { + template<typename ICT, typename MCT> + static Value::UP invoke(const Value &input, + const Value &modifier, + const ValueBuilderFactory &factory); +}; + +template <typename ICT, typename MCT> +Value::UP +PerformAdd::invoke(const Value &input, const Value &modifier, const ValueBuilderFactory &factory) +{ + const ValueType &input_type = input.type(); + const ValueType &modifier_type = modifier.type(); + if (input_type.dimensions() != modifier_type.dimensions()) { + LOG(error, "when adding cells to a tensor, dimensions must be equal. " + "Got input type %s != modifier type %s", + input_type.to_spec().c_str(), modifier_type.to_spec().c_str()); + return {}; + } + const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); + const size_t dsss = input_type.dense_subspace_size(); + const size_t expected_subspaces = input.index().size() + modifier.index().size(); + auto builder = factory.create_value_builder<ICT>(input_type, num_mapped_in_input, dsss, expected_subspaces); + SparseCoords addrs(num_mapped_in_input); + auto lookup_view = input.index().create_view(addrs.lookup_view_dims); + std::vector<bool> overwritten(input.index().size(), false); + auto remember_subspaces = [&] (const auto & lookup_refs, size_t) { + lookup_view->lookup(lookup_refs); + size_t input_subspace_index; + if (lookup_view->next_result({}, input_subspace_index)) { + overwritten[input_subspace_index] = true; + } + return true; + }; + copy_tensor_with_filter<ICT, MCT>(modifier, dsss, addrs, *builder, remember_subspaces); + auto filter = [&] (const auto &, size_t input_subspace) { + return ! overwritten[input_subspace]; + }; + copy_tensor_with_filter<ICT>(input, dsss, addrs, *builder, filter); + return builder->build(std::move(builder)); +} + +//----------------------------------------------------------------------------- + +struct PerformRemove { + template<typename ICT> + static Value::UP invoke(const Value &input, + const Value &modifier, + const ValueBuilderFactory &factory); +}; + +/** + * Calculates the indexes of where the mapped modifier dimensions are found in the mapped input dimensions. + * + * The modifier dimensions should be a subset or all of the input dimensions. + * An empty vector is returned on type mismatch. + */ +std::vector<size_t> +calc_mapped_dimension_indexes(const ValueType& input_type, + const ValueType& modifier_type) +{ + auto input_dims = input_type.mapped_dimensions(); + auto mod_dims = modifier_type.mapped_dimensions(); + if (mod_dims.size() > input_dims.size()) { + return {}; + } + std::vector<size_t> result(mod_dims.size()); + size_t j = 0; + for (size_t i = 0; i < mod_dims.size(); ++i) { + while ((j < input_dims.size()) && (input_dims[j] != mod_dims[i])) { + ++j; + } + if (j >= input_dims.size()) { + return {}; + } + result[i] = j; + } + return result; +} + +struct ModifierCoords { + + std::vector<const vespalib::stringref *> lookup_refs; + std::vector<size_t> lookup_view_dims; + + ModifierCoords(const SparseCoords& input_coords, + const std::vector<size_t>& input_dim_indexes, + const ValueType& modifier_type) + : lookup_refs(modifier_type.dimensions().size()), + lookup_view_dims(modifier_type.dimensions().size()) + { + assert(modifier_type.dimensions().size() == input_dim_indexes.size()); + for (size_t i = 0; i < input_dim_indexes.size(); ++i) { + // Setup the modifier dimensions to point to the matching input dimensions. + lookup_refs[i] = &input_coords.addr[input_dim_indexes[i]]; + lookup_view_dims[i] = i; + } + } + ~ModifierCoords() {} +}; + +template <typename ICT> +Value::UP +PerformRemove::invoke(const Value &input, const Value &modifier, const ValueBuilderFactory &factory) +{ + const ValueType &input_type = input.type(); + const ValueType &modifier_type = modifier.type(); + const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); + if (num_mapped_in_input == 0) { + LOG(error, "Cannot remove cells from a dense input tensor of type %s", + input_type.to_spec().c_str()); + return {}; + } + if (modifier_type.count_indexed_dimensions() != 0) { + LOG(error, "Cannot remove cells using a modifier tensor of type %s", + modifier_type.to_spec().c_str()); + return {}; + } + auto input_dim_indexes = calc_mapped_dimension_indexes(input_type, modifier_type); + if (input_dim_indexes.empty()) { + LOG(error, "Tensor type mismatch when removing cells from a tensor. " + "Got input type %s versus modifier type %s", + input_type.to_spec().c_str(), modifier_type.to_spec().c_str()); + return {}; + } + SparseCoords addrs(num_mapped_in_input); + ModifierCoords mod_coords(addrs, input_dim_indexes, modifier_type); + auto modifier_view = modifier.index().create_view(mod_coords.lookup_view_dims); + const size_t expected_subspaces = input.index().size(); + const size_t dsss = input_type.dense_subspace_size(); + auto builder = factory.create_value_builder<ICT>(input_type, num_mapped_in_input, dsss, expected_subspaces); + auto filter_by_modifier = [&] (const auto & lookup_refs, size_t) { + // The modifier dimensions are setup to point to the input dimensions address storage in ModifierCoords, + // so we don't need to use the lookup_refs argument. + (void) lookup_refs; + modifier_view->lookup(mod_coords.lookup_refs); + size_t modifier_subspace_index; + return !(modifier_view->next_result({}, modifier_subspace_index)); + }; + copy_tensor_with_filter<ICT>(input, dsss, addrs, *builder, filter_by_modifier); + return builder->build(std::move(builder)); +} + +} // namespace <unnamed> + +//----------------------------------------------------------------------------- + +Value::UP +TensorPartialUpdate::modify(const Value &input, join_fun_t function, + const Value &modifier, const ValueBuilderFactory &factory) +{ + return typify_invoke<2, TypifyCellType, PerformModify>( + input.cells().type, modifier.cells().type, + input, function, modifier, factory); +} + +Value::UP +TensorPartialUpdate::add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory) +{ + return typify_invoke<2, TypifyCellType, PerformAdd>( + input.cells().type, add_cells.cells().type, + input, add_cells, factory); +} + +Value::UP +TensorPartialUpdate::remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory) +{ + return typify_invoke<1, TypifyCellType, PerformRemove>( + input.cells().type, + input, remove_spec, factory); +} + +} // namespace diff --git a/document/src/vespa/document/update/tensor_partial_update.h b/document/src/vespa/document/update/tensor_partial_update.h new file mode 100644 index 00000000000..b3e9d32fca8 --- /dev/null +++ b/document/src/vespa/document/update/tensor_partial_update.h @@ -0,0 +1,44 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/operation.h> + +namespace vespalib::tensor { + +struct TensorPartialUpdate { + using join_fun_t = vespalib::eval::operation::op2_t; + using Value = vespalib::eval::Value; + using ValueBuilderFactory = vespalib::eval::ValueBuilderFactory; + + /** + * Make a copy of the input, but apply function(oldvalue, modifier.cellvalue) + * to cells which also exist in the "modifier". + * The modifier type must be sparse with exactly the same dimension names + * as the input type. + * Returns null pointer if this constraint is violated. + **/ + static Value::UP modify(const Value &input, join_fun_t function, + const Value &modifier, const ValueBuilderFactory &factory); + + /** + * Make a copy of the input, but add or overwrite cells from add_cells. + * Requires same type for input and add_cells. + * Returns null pointer if this constraint is violated. + **/ + static Value::UP add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory); + + /** + * Make a copy of the input, but remove cells present in remove_spec. + * The remove_spec must be a sparse tensor, with exactly the mapped dimensions + * that the input value has. + * Cell values in remove_spec are ignored. + * Not valid for dense tensors, since removing cells for those are impossible. + * Returns null pointer if these constraints are violated. + **/ + static Value::UP remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory); +}; + +} // namespace diff --git a/document/src/vespa/document/update/tensor_remove_update.cpp b/document/src/vespa/document/update/tensor_remove_update.cpp index 7b81581aeed..c9ff1a462c5 100644 --- a/document/src/vespa/document/update/tensor_remove_update.cpp +++ b/document/src/vespa/document/update/tensor_remove_update.cpp @@ -1,13 +1,13 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "tensor_remove_update.h" +#include "tensor_partial_update.h" #include <vespa/document/base/exceptions.h> #include <vespa/document/datatype/tensor_data_type.h> #include <vespa/document/fieldvalue/document.h> #include <vespa/document/fieldvalue/tensorfieldvalue.h> #include <vespa/document/serialization/vespadocumentdeserializer.h> #include <vespa/eval/eval/fast_value.h> -#include <vespa/eval/tensor/partial_update.h> #include <vespa/eval/eval/value.h> #include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/util/xmlstream.h> |