diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-10-06 12:45:12 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-10-07 12:02:01 +0000 |
commit | 7d8516aecc9caf10435cd63cd59973d29e5c07cc (patch) | |
tree | 6f244515484d037541789aabab7b700d25c25a3a /eval | |
parent | 3f75460d657edeb6ee6b66035b57b418569a34c3 (diff) |
add TensorPartialUpdate with add/modify/remove
Diffstat (limited to 'eval')
-rw-r--r-- | eval/CMakeLists.txt | 3 | ||||
-rw-r--r-- | eval/src/tests/tensor/partial_add/CMakeLists.txt | 9 | ||||
-rw-r--r-- | eval/src/tests/tensor/partial_add/partial_add_test.cpp | 98 | ||||
-rw-r--r-- | eval/src/tests/tensor/partial_modify/CMakeLists.txt | 9 | ||||
-rw-r--r-- | eval/src/tests/tensor/partial_modify/partial_modify_test.cpp | 122 | ||||
-rw-r--r-- | eval/src/tests/tensor/partial_remove/CMakeLists.txt | 9 | ||||
-rw-r--r-- | eval/src/tests/tensor/partial_remove/partial_remove_test.cpp | 106 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/partial_update.cpp | 388 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/partial_update.h | 35 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/wrapped_simple_tensor.cpp | 1 |
11 files changed, 780 insertions, 1 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 18c3676c366..ab793534c14 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -65,6 +65,9 @@ vespa_define_module( src/tests/tensor/instruction_benchmark src/tests/tensor/onnx_wrapper src/tests/tensor/packed_mappings + src/tests/tensor/partial_add + src/tests/tensor/partial_modify + src/tests/tensor/partial_remove src/tests/tensor/tensor_add_operation src/tests/tensor/tensor_address src/tests/tensor/tensor_conformance diff --git a/eval/src/tests/tensor/partial_add/CMakeLists.txt b/eval/src/tests/tensor/partial_add/CMakeLists.txt new file mode 100644 index 00000000000..f0d07a8e9cf --- /dev/null +++ b/eval/src/tests/tensor/partial_add/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_partial_add_test_app TEST + SOURCES + partial_add_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_partial_add_test_app COMMAND eval_partial_add_test_app) diff --git a/eval/src/tests/tensor/partial_add/partial_add_test.cpp b/eval/src/tests/tensor/partial_add/partial_add_test.cpp new file mode 100644 index 00000000000..f31df131345 --- /dev/null +++ b/eval/src/tests/tensor/partial_add/partial_add_test.cpp @@ -0,0 +1,98 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/tensor/cell_values.h> +#include <vespa/eval/tensor/default_tensor_engine.h> +#include <vespa/eval/tensor/partial_update.h> +#include <vespa/eval/tensor/sparse/sparse_tensor.h> +#include <vespa/eval/tensor/tensor.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <optional> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +std::vector<Layout> add_layouts = { + {x({"a"})}, {x({"b"})}, + {x({"a","b"})}, {x({"a","c"})}, + float_cells({x({"a","b"})}), {x({"a","c"})}, + {x({"a","b"})}, float_cells({x({"a","c"})}), + float_cells({x({"a","b"})}), float_cells({x({"a","c"})}), + {x({"a","b","c"}),y({"d","e"})}, {x({"b","f"}),y({"d","g"})}, + {x(3),y({"a","b"})}, {x(3),y({"b","c"})} +}; + +TensorSpec reference_add(const TensorSpec &a, const TensorSpec &b) { + TensorSpec result(a.type()); + for (const auto &cell: b.cells()) { + result.add(cell.first, cell.second); + } + auto end_iter = b.cells().end(); + for (const auto &cell: a.cells()) { + auto iter = b.cells().find(cell.first); + if (iter == end_iter) { + result.add(cell.first, cell.second); + } + } + return result; +} + +TensorSpec perform_partial_add(const TensorSpec &a, const TensorSpec &b) { + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + auto up = tensor::TensorPartialUpdate::add(*lhs, *rhs, factory); + if (up) { + return spec_from_value(*up); + } else { + return TensorSpec(a.type()); + } +} + +TensorSpec perform_old_add(const TensorSpec &a, const TensorSpec &b) { + const auto &engine = tensor::DefaultTensorEngine::ref(); + auto lhs = engine.from_spec(a); + auto rhs = engine.from_spec(b); + auto lhs_tensor = dynamic_cast<tensor::Tensor *>(lhs.get()); + EXPECT_TRUE(lhs_tensor); + auto rhs_tensor = dynamic_cast<tensor::Tensor *>(rhs.get()); + EXPECT_TRUE(rhs_tensor); + auto up = lhs_tensor->add(*rhs_tensor); + EXPECT_TRUE(up); + return engine.to_spec(*up); +} + + +TEST(PartialAddTest, partial_add_works_for_simple_values) { + ASSERT_TRUE((add_layouts.size() % 2) == 0); + for (size_t i = 0; i < add_layouts.size(); i += 2) { + TensorSpec lhs = spec(add_layouts[i], N()); + TensorSpec rhs = spec(add_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = reference_add(lhs, rhs); + auto actual = perform_partial_add(lhs, rhs); + EXPECT_EQ(actual, expect); + } +} + +TEST(PartialAddTest, partial_add_works_like_old_add) { + ASSERT_TRUE((add_layouts.size() % 2) == 0); + for (size_t i = 0; i < add_layouts.size(); i += 2) { + TensorSpec lhs = spec(add_layouts[i], N()); + TensorSpec rhs = spec(add_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = perform_old_add(lhs, rhs); + auto actual = perform_partial_add(lhs, rhs); + EXPECT_EQ(actual, expect); + printf("%s add %s -> %s\n", lhs.to_string().c_str(), rhs.to_string().c_str(), actual.to_string().c_str()); + + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/tensor/partial_modify/CMakeLists.txt b/eval/src/tests/tensor/partial_modify/CMakeLists.txt new file mode 100644 index 00000000000..42a08acaae6 --- /dev/null +++ b/eval/src/tests/tensor/partial_modify/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_partial_modify_test_app TEST + SOURCES + partial_modify_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_partial_modify_test_app COMMAND eval_partial_modify_test_app) diff --git a/eval/src/tests/tensor/partial_modify/partial_modify_test.cpp b/eval/src/tests/tensor/partial_modify/partial_modify_test.cpp new file mode 100644 index 00000000000..a35c7597194 --- /dev/null +++ b/eval/src/tests/tensor/partial_modify/partial_modify_test.cpp @@ -0,0 +1,122 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/tensor/cell_values.h> +#include <vespa/eval/tensor/default_tensor_engine.h> +#include <vespa/eval/tensor/partial_update.h> +#include <vespa/eval/tensor/sparse/sparse_tensor.h> +#include <vespa/eval/tensor/tensor.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <optional> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +std::vector<Layout> modify_layouts = { + {x({"a"})}, {x({"a"})}, + {x({"a",""})}, {x({"b","c","d","e"})}, + {x(5)}, {x({"1","2","foo","17"})}, + {x({"a","b","c"}),y({"d","e"})}, {x({"b"}),y({"d"})}, + {x({"a","b","c"})}, {x({"b","c","d"})}, + {x(3),y(2)}, {x({"0","1"}),y({"0","1"})}, + {x({"a","","b"})}, {x({""})} +}; + +TensorSpec::Address sparsify(const TensorSpec::Address &input) { + TensorSpec::Address output; + for (const auto & kv : input) { + if (kv.second.is_indexed()) { + auto val = fmt("%zu", kv.second.index); + output.emplace(kv.first, val); + } else { + output.emplace(kv.first, kv.second); + } + } + return output; +} + +TensorSpec reference_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + TensorSpec result(a.type()); + auto end_iter = b.cells().end(); + for (const auto &cell: a.cells()) { + double v = cell.second; + auto sparse_addr = sparsify(cell.first); + auto iter = b.cells().find(sparse_addr); + if (iter == end_iter) { + result.add(cell.first, v); + } else { + result.add(cell.first, fun(v, iter->second)); + } + } + return result; +} + +TensorSpec perform_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + auto up = tensor::TensorPartialUpdate::modify(*lhs, fun, *rhs, factory); + if (up) { + return spec_from_value(*up); + } else { + return TensorSpec(a.type()); + } +} + +TensorSpec perform_old_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + const auto &engine = tensor::DefaultTensorEngine::ref(); + auto lhs = engine.from_spec(a); + auto rhs = engine.from_spec(b); + auto lhs_tensor = dynamic_cast<tensor::Tensor *>(lhs.get()); + EXPECT_TRUE(lhs_tensor); + auto rhs_sparse = dynamic_cast<tensor::SparseTensor *>(rhs.get()); + EXPECT_TRUE(rhs_sparse); + tensor::CellValues cell_values(*rhs_sparse); + auto up = lhs_tensor->modify(fun, cell_values); + EXPECT_TRUE(up); + return engine.to_spec(*up); +} + + +TEST(PartialModifyTest, partial_modify_works_for_simple_values) { + ASSERT_TRUE((modify_layouts.size() % 2) == 0); + for (size_t i = 0; i < modify_layouts.size(); i += 2) { + TensorSpec lhs = spec(modify_layouts[i], N()); + TensorSpec rhs = spec(modify_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f}) { + auto expect = reference_modify(lhs, rhs, fun); + auto actual = perform_partial_modify(lhs, rhs, fun); + EXPECT_EQ(actual, expect); + } + auto fun = [](double, double keep) { return keep; }; + auto expect = reference_modify(lhs, rhs, fun); + auto actual = perform_partial_modify(lhs, rhs, fun); + EXPECT_EQ(actual, expect); + } +} + +TEST(PartialModifyTest, partial_modify_works_like_old_modify) { + ASSERT_TRUE((modify_layouts.size() % 2) == 0); + for (size_t i = 0; i < modify_layouts.size(); i += 2) { + TensorSpec lhs = spec(modify_layouts[i], N()); + TensorSpec rhs = spec(modify_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f}) { + auto expect = perform_old_modify(lhs, rhs, fun); + auto actual = perform_partial_modify(lhs, rhs, fun); + EXPECT_EQ(actual, expect); + if (fun == operation::Max::f) { + printf("%s modify(sub) %s -> %s\n", lhs.to_string().c_str(), rhs.to_string().c_str(), actual.to_string().c_str()); + } + } + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/tensor/partial_remove/CMakeLists.txt b/eval/src/tests/tensor/partial_remove/CMakeLists.txt new file mode 100644 index 00000000000..1680324f574 --- /dev/null +++ b/eval/src/tests/tensor/partial_remove/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_partial_remove_test_app TEST + SOURCES + partial_remove_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_partial_remove_test_app COMMAND eval_partial_remove_test_app) diff --git a/eval/src/tests/tensor/partial_remove/partial_remove_test.cpp b/eval/src/tests/tensor/partial_remove/partial_remove_test.cpp new file mode 100644 index 00000000000..7fb46790415 --- /dev/null +++ b/eval/src/tests/tensor/partial_remove/partial_remove_test.cpp @@ -0,0 +1,106 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/tensor/cell_values.h> +#include <vespa/eval/tensor/default_tensor_engine.h> +#include <vespa/eval/tensor/partial_update.h> +#include <vespa/eval/tensor/sparse/sparse_tensor.h> +#include <vespa/eval/tensor/tensor.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <optional> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +std::vector<Layout> remove_layouts = { + {x({"a"})}, {x({"b"})}, + {x({"a","b"})}, {x({"a","c"})}, + float_cells({x({"a","b"})}), {x({"a","c"})}, + {x({"a","b"})}, float_cells({x({"a","c"})}), + float_cells({x({"a","b"})}), float_cells({x({"a","c"})}), + {x({"a","b","c"}),y({"d","e"})}, {x({"b","f"}),y({"d","g"})}, + {x(3),y({"a","b"})}, {y({"b","c"})} +}; + +TensorSpec::Address only_sparse(const TensorSpec::Address &input) { + TensorSpec::Address output; + for (const auto & kv : input) { + if (kv.second.is_mapped()) { + output.emplace(kv.first, kv.second); + } + } + return output; +} + +TensorSpec reference_remove(const TensorSpec &a, const TensorSpec &b) { + TensorSpec result(a.type()); + auto end_iter = b.cells().end(); + for (const auto &cell: a.cells()) { + auto iter = b.cells().find(only_sparse(cell.first)); + if (iter == end_iter) { + result.add(cell.first, cell.second); + } + } + return result; +} + +TensorSpec perform_partial_remove(const TensorSpec &a, const TensorSpec &b) { + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + auto up = tensor::TensorPartialUpdate::remove(*lhs, *rhs, factory); + if (up) { + return spec_from_value(*up); + } else { + return TensorSpec(a.type()); + } +} + +TensorSpec perform_old_remove(const TensorSpec &a, const TensorSpec &b) { + const auto &engine = tensor::DefaultTensorEngine::ref(); + auto lhs = engine.from_spec(a); + auto rhs = engine.from_spec(b); + auto lhs_tensor = dynamic_cast<tensor::Tensor *>(lhs.get()); + EXPECT_TRUE(lhs_tensor); + auto rhs_sparse = dynamic_cast<tensor::SparseTensor *>(rhs.get()); + EXPECT_TRUE(rhs_sparse); + tensor::CellValues cell_values(*rhs_sparse); + auto up = lhs_tensor->remove(cell_values); + EXPECT_TRUE(up); + return engine.to_spec(*up); +} + + +TEST(PartialAddTest, partial_remove_works_for_simple_values) { + ASSERT_TRUE((remove_layouts.size() % 2) == 0); + for (size_t i = 0; i < remove_layouts.size(); i += 2) { + TensorSpec lhs = spec(remove_layouts[i], N()); + TensorSpec rhs = spec(remove_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = reference_remove(lhs, rhs); + auto actual = perform_partial_remove(lhs, rhs); + EXPECT_EQ(actual, expect); + } +} + +TEST(PartialAddTest, partial_remove_works_like_old_remove) { + ASSERT_TRUE((remove_layouts.size() % 2) == 0); + for (size_t i = 0; i < remove_layouts.size(); i += 2) { + TensorSpec lhs = spec(remove_layouts[i], N()); + TensorSpec rhs = spec(remove_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = perform_old_remove(lhs, rhs); + auto actual = perform_partial_remove(lhs, rhs); + EXPECT_EQ(actual, expect); + // printf("%s remove %s -> %s\n", lhs.to_string().c_str(), rhs.to_string().c_str(), actual.to_string().c_str()); + + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/tensor/CMakeLists.txt b/eval/src/vespa/eval/tensor/CMakeLists.txt index 79f6f7e2a4f..b75b34098f5 100644 --- a/eval/src/vespa/eval/tensor/CMakeLists.txt +++ b/eval/src/vespa/eval/tensor/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(eval_tensor OBJECT SOURCES default_tensor_engine.cpp default_value_builder_factory.cpp + partial_update.cpp tensor.cpp tensor_address.cpp wrapped_simple_tensor.cpp diff --git a/eval/src/vespa/eval/tensor/partial_update.cpp b/eval/src/vespa/eval/tensor/partial_update.cpp new file mode 100644 index 00000000000..0ba8b37e77f --- /dev/null +++ b/eval/src/vespa/eval/tensor/partial_update.cpp @@ -0,0 +1,388 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "partial_update.h" +#include <vespa/vespalib/util/overload.h> +#include <vespa/vespalib/util/typify.h> +#include <vespa/vespalib/util/visit_ranges.h> +#include <cassert> +#include <set> + +#include <vespa/log/log.h> +LOG_SETUP(".eval.tensor.partial_update"); + +using namespace vespalib::eval; + +namespace vespalib::tensor { + +namespace { + +using join_fun_t = double (*)(double, double); + +static constexpr size_t npos() { return -1; } + +enum class DimCase { + SKIP_MAPPED, SKIP_INDEXED, + MISSING_MAPPED, MISSING_INDEXED, + MAPPED_MATCH, INDEXED_MATCH, + CONV_TO_INDEXED, CONV_TO_MAPPED +}; + +struct DenseCoords { + std::vector<size_t> dim_sizes; + std::vector<const char *> dim_names; + size_t total_size = 1; + size_t offset; + size_t dim; + void clear() { offset = 0; dim = 0; } + void with(size_t coord) { + size_t cur = dim_sizes[dim]; + if (coord < cur) { + if (offset != npos()) { + offset *= cur; + offset += coord; + } + } else { + // "bad label{%s} in modifier tensor, was %zu, must be < %zu", dim_names[dim], coord, cur + offset = npos(); + } + ++dim; + } + void with(vespalib::stringref label) { + uint32_t result = 0; + for (char c : label) { + if (c < '0' || c > '9') { // bad char + // "bad label{%s} in modifier tensor, was '%s'", dim_names[dim], label.data() + offset = npos(); + ++dim; + return; + } + result = result * 10 + (c - '0'); + } + with(result); + } + void add_dim(const char *name, size_t sz) { + dim_sizes.push_back(sz); + dim_names.push_back(name); + total_size *= sz; + } + size_t get() const { + assert(dim == dim_sizes.size()); + return offset; + } + ~DenseCoords(); +}; +DenseCoords::~DenseCoords() = default; + +struct Addresses { + std::vector<vespalib::stringref> addr; + std::vector<vespalib::stringref *> next_result_refs; + std::vector<const vespalib::stringref *> lookup_refs; + std::vector<size_t> lookup_view_dims; + Addresses(size_t sz) + : addr(sz), next_result_refs(sz), lookup_refs(sz), lookup_view_dims(sz) + { + for (size_t i = 0; i < sz; ++i) { + next_result_refs[i] = &addr[i]; + lookup_refs[i] = &addr[i]; + lookup_view_dims[i] = i; + } + } + ~Addresses(); +}; +Addresses::~Addresses() = default; + +struct AddressHandler { + std::vector<DimCase> how; + DenseCoords target_coords; + Addresses for_output; + Addresses from_modifier; + bool valid; + + AddressHandler(const ValueType &input_type, + const ValueType &modifier_type) + : how(), target_coords(), + for_output(input_type.count_mapped_dimensions()), + from_modifier(modifier_type.count_mapped_dimensions()), + valid(true) + { + if (! modifier_type.is_sparse()) { + LOG(error, "Unexpected non-sparse modifier tensor, type is %s", + modifier_type.to_spec().c_str()); + valid = false; + return; + } + // analyse dimensions + auto visitor = overload { + [&](visit_ranges_either, const auto &) { valid = false; }, + [&](visit_ranges_both, const auto &a, const auto &) { + how.push_back(a.is_mapped() ? DimCase::MAPPED_MATCH : DimCase::CONV_TO_INDEXED); + } + }; + const auto & input_dims = input_type.dimensions(); + const auto & modifier_dims = modifier_type.dimensions(); + visit_ranges(visitor, + input_dims.begin(), input_dims.end(), + modifier_dims.begin(), modifier_dims.end(), + [](const auto &a, const auto &b){ return (a.name < b.name); }); + if ((! valid) || + (input_dims.size() != modifier_dims.size()) || + (input_dims.size() != how.size())) + { + LOG(error, "Value type %s does not match modifier type %s (should have same dimensions)", + input_type.to_spec().c_str(), + modifier_type.to_spec().c_str()); + valid = false; + return; + } + for (const auto & dim : input_type.dimensions()) { + if (dim.is_indexed()) { + target_coords.add_dim(dim.name.c_str(), dim.size); + } + } + } + + void handle_address() + { + target_coords.clear(); + auto out = for_output.addr.begin(); + for (size_t i = 0; i < how.size(); ++i) { + if (how[i] == DimCase::CONV_TO_INDEXED) { + target_coords.with(from_modifier.addr[i]); + } else { + *out++ = from_modifier.addr[i]; + } + } + assert(out == for_output.addr.end()); + assert(target_coords.dim == target_coords.dim_sizes.size()); + } + + ~AddressHandler(); +}; +AddressHandler::~AddressHandler() = default; + +template <typename CT> +Value::UP +copy_tensor(const Value &input, const ValueType &input_type, Addresses &helper, const ValueBuilderFactory &factory) +{ + const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); + const size_t dsss = input_type.dense_subspace_size(); + const size_t expected_subspaces = input.index().size(); + auto builder = factory.create_value_builder<CT>(input_type, num_mapped_in_input, dsss, expected_subspaces); + auto view = input.index().create_view({}); + view->lookup({}); + auto input_cells = input.cells().typify<CT>(); + size_t input_subspace; + while (view->next_result(helper.next_result_refs, input_subspace)) { + size_t input_offset = input_subspace * dsss; + auto src = input_cells.begin() + input_offset; + auto dst = builder->add_subspace(helper.addr).begin(); + for (size_t i = 0; i < dsss; ++i) { + dst[i] = src[i]; + } + } + return builder->build(std::move(builder)); +} + +template <typename ICT, typename MCT> +Value::UP +my_modify_value(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory) +{ + const ValueType &input_type = input.type(); + const size_t dsss = input_type.dense_subspace_size(); + const ValueType &modifier_type = modifier.type(); + AddressHandler handler(input_type, modifier_type); + if (! handler.valid) { + return Value::UP(); + } + // copy input to output + auto out = copy_tensor<ICT>(input, input_type, handler.for_output, factory); + // need to overwrite some cells + auto output_cells = unconstify(out->cells().template typify<ICT>()); + const auto modifier_cells = modifier.cells().typify<MCT>(); + auto modifier_view = modifier.index().create_view({}); + auto lookup_view = out->index().create_view(handler.for_output.lookup_view_dims); + modifier_view->lookup({}); + size_t modifier_subspace_index; + while (modifier_view->next_result(handler.from_modifier.next_result_refs, modifier_subspace_index)) { + handler.handle_address(); + size_t dense_idx = handler.target_coords.get(); + if (dense_idx == npos()) { + continue; + } + lookup_view->lookup(handler.for_output.lookup_refs); + size_t output_subspace_index; + if (lookup_view->next_result({}, output_subspace_index)) { + size_t subspace_offset = dsss * output_subspace_index; + auto dst = output_cells.begin() + subspace_offset; + ICT lhs = dst[dense_idx]; + MCT rhs = modifier_cells[modifier_subspace_index]; + dst[dense_idx] = function(lhs, rhs); + } + } + return out; +} +struct PerformModify { + template<typename ICT, typename MCT> + static Value::UP invoke(const Value &input, + join_fun_t function, + const Value &modifier, + const ValueBuilderFactory &factory) + { + return my_modify_value<ICT,MCT>(input, function, modifier, factory); + } +}; + +//----------------------------------------------------------------------------- + + +template <typename ICT, typename MCT> +Value::UP +my_add_cells(const Value &input, const Value &modifier, const ValueBuilderFactory &factory) +{ + const ValueType &input_type = input.type(); + const ValueType &modifier_type = modifier.type(); + if (input_type.dimensions() != modifier_type.dimensions()) { + LOG(error, "when adding cells to a tensor, dimensions must be equal"); + return Value::UP(); + } + const auto input_cells = input.cells().typify<ICT>(); + const auto modifier_cells = modifier.cells().typify<MCT>(); + const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); + const size_t dsss = input_type.dense_subspace_size(); + const size_t expected_subspaces = input.index().size() + modifier.index().size(); + auto builder = factory.create_value_builder<ICT>(input_type, num_mapped_in_input, dsss, expected_subspaces); + Addresses addrs(num_mapped_in_input); + std::set<size_t> overwritten_subspaces; + auto modifier_view = modifier.index().create_view({}); + auto lookup_view = input.index().create_view(addrs.lookup_view_dims); + modifier_view->lookup({}); + size_t modifier_subspace_index; + while (modifier_view->next_result(addrs.next_result_refs, modifier_subspace_index)) { + size_t modifier_offset = dsss * modifier_subspace_index; + auto src = modifier_cells.begin() + modifier_offset; + auto dst = builder->add_subspace(addrs.addr).begin(); + for (size_t i = 0; i < dsss; ++i) { + dst[i] = src[i]; + } + lookup_view->lookup(addrs.lookup_refs); + size_t input_subspace_index; + if (lookup_view->next_result({}, input_subspace_index)) { + overwritten_subspaces.insert(input_subspace_index); + } + } + auto input_view = input.index().create_view({}); + input_view->lookup({}); + size_t input_subspace_index; + while (input_view->next_result(addrs.next_result_refs, input_subspace_index)) { + if (overwritten_subspaces.count(input_subspace_index) == 0) { + size_t input_offset = dsss * input_subspace_index; + auto src = input_cells.begin() + input_offset; + auto dst = builder->add_subspace(addrs.addr).begin(); + for (size_t i = 0; i < dsss; ++i) { + dst[i] = src[i]; + } + } + } + return builder->build(std::move(builder)); +} + +struct PerformAdd { + template<typename ICT, typename MCT> + static Value::UP invoke(const Value &input, + const Value &modifier, + const ValueBuilderFactory &factory) + { + return my_add_cells<ICT,MCT>(input, modifier, factory); + } +}; + +//----------------------------------------------------------------------------- + +template <typename ICT> +Value::UP +my_remove_cells(const Value &input, const Value &modifier, const ValueBuilderFactory &factory) +{ + const ValueType &input_type = input.type(); + const ValueType &modifier_type = modifier.type(); + if (input_type.mapped_dimensions() != modifier_type.mapped_dimensions()) { + LOG(error, "when removing cells from a tensor, mapped dimensions must be equal"); + return Value::UP(); + } + if (input_type.mapped_dimensions().size() == 0) { + LOG(error, "cannot remove cells from a dense tensor"); + return Value::UP(); + } + const auto input_cells = input.cells().typify<ICT>(); + const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); + const size_t dsss = input_type.dense_subspace_size(); + Addresses addrs(num_mapped_in_input); + std::set<size_t> removed_subspaces; + auto modifier_view = modifier.index().create_view({}); + auto lookup_view = input.index().create_view(addrs.lookup_view_dims); + modifier_view->lookup({}); + size_t modifier_subspace_index; + while (modifier_view->next_result(addrs.next_result_refs, modifier_subspace_index)) { + lookup_view->lookup(addrs.lookup_refs); + size_t input_subspace_index; + if (lookup_view->next_result({}, input_subspace_index)) { + removed_subspaces.insert(input_subspace_index); + } + } + const size_t expected_subspaces = input.index().size() - removed_subspaces.size(); + auto builder = factory.create_value_builder<ICT>(input_type, num_mapped_in_input, dsss, expected_subspaces); + auto input_view = input.index().create_view({}); + input_view->lookup({}); + size_t input_subspace_index; + while (input_view->next_result(addrs.next_result_refs, input_subspace_index)) { + if (removed_subspaces.count(input_subspace_index) == 0) { + size_t input_offset = dsss * input_subspace_index; + auto src = input_cells.begin() + input_offset; + auto dst = builder->add_subspace(addrs.addr).begin(); + for (size_t i = 0; i < dsss; ++i) { + dst[i] = src[i]; + } + } + } + return builder->build(std::move(builder)); +} + +struct PerformRemove { + template<typename ICT> + static Value::UP invoke(const Value &input, + const Value &modifier, + const ValueBuilderFactory &factory) + { + return my_remove_cells<ICT>(input, modifier, factory); + } +}; + +} // namespace <unnamed> + +//----------------------------------------------------------------------------- + +Value::UP +TensorPartialUpdate::modify(const Value &input, join_fun_t function, + const Value &modifier, const ValueBuilderFactory &factory) +{ + return typify_invoke<2, TypifyCellType, PerformModify>( + input.cells().type, modifier.cells().type, + input, function, modifier, factory); +} + +Value::UP +TensorPartialUpdate::add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory) +{ + return typify_invoke<2, TypifyCellType, PerformAdd>( + input.cells().type, add_cells.cells().type, + input, add_cells, factory); +} + +Value::UP +TensorPartialUpdate::remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory) +{ + return typify_invoke<1, TypifyCellType, PerformRemove>( + input.cells().type, + input, remove_spec, factory); +} + +} // namespace diff --git a/eval/src/vespa/eval/tensor/partial_update.h b/eval/src/vespa/eval/tensor/partial_update.h new file mode 100644 index 00000000000..17448b8223e --- /dev/null +++ b/eval/src/vespa/eval/tensor/partial_update.h @@ -0,0 +1,35 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value.h> + +namespace vespalib::tensor { + +struct TensorPartialUpdate { + using join_fun_t = double (*)(double, double); + using Value = vespalib::eval::Value; + using ValueBuilderFactory = vespalib::eval::ValueBuilderFactory; + + // make a copy of the input, but apply function(oldvalue, modifier.cellvalue) + // to cells which also exist in the "modifier". + // modifier.type() must be sparse with exactly the same dimension names + // as the input type. + // returns null pointer if this constraint is violated. + static Value::UP modify(const Value &input, join_fun_t function, + const Value &modifier, const ValueBuilderFactory &factory); + + // make a copy of the input, but add or overwrite cells from add_cells. + // requires same type for input and add_cells. + // returns null pointer if this constraint is violated. + static Value::UP add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory); + + // make a copy of the input, but remove cells present in remove_spec. + // cell values in remove_spec are ignored. + // requires same set of mapped imensions input and remove_spec. + // not valid for dense tensors, since removing cells for those are impossible. + // returns null pointer if these constraints are violated. + static Value::UP remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory); +}; + +} // namespace diff --git a/eval/src/vespa/eval/tensor/wrapped_simple_tensor.cpp b/eval/src/vespa/eval/tensor/wrapped_simple_tensor.cpp index 241b8026b59..178935fce00 100644 --- a/eval/src/vespa/eval/tensor/wrapped_simple_tensor.cpp +++ b/eval/src/vespa/eval/tensor/wrapped_simple_tensor.cpp @@ -145,7 +145,6 @@ WrappedSimpleTensor::add(const Tensor &arg) const if (!rhs || type() != rhs->type()) { return Tensor::UP(); } - TensorSpec oldTensor = toSpec(); TensorSpec argTensor = rhs->toSpec(); TensorSpec result(type().to_spec()); |