From 55b6fa9bbedbc372222c4a76924703f4525bc987 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Tue, 8 Dec 2020 13:55:43 +0000 Subject: move partial_update files from eval to document --- document/CMakeLists.txt | 3 + .../tensor_fieldvalue/partial_add/CMakeLists.txt | 9 + .../partial_add/partial_add_test.cpp | 89 +++++ .../partial_modify/CMakeLists.txt | 9 + .../partial_modify/partial_modify_test.cpp | 110 ++++++ .../partial_remove/CMakeLists.txt | 9 + .../partial_remove/partial_remove_test.cpp | 130 +++++++ document/src/vespa/document/update/CMakeLists.txt | 1 + .../vespa/document/update/tensor_add_update.cpp | 2 +- .../vespa/document/update/tensor_modify_update.cpp | 2 +- .../document/update/tensor_partial_update.cpp | 419 +++++++++++++++++++++ .../vespa/document/update/tensor_partial_update.h | 44 +++ .../vespa/document/update/tensor_remove_update.cpp | 2 +- eval/CMakeLists.txt | 4 - eval/src/tests/tensor/partial_add/CMakeLists.txt | 9 - .../tests/tensor/partial_add/partial_add_test.cpp | 89 ----- .../src/tests/tensor/partial_modify/CMakeLists.txt | 9 - .../tensor/partial_modify/partial_modify_test.cpp | 110 ------ .../src/tests/tensor/partial_remove/CMakeLists.txt | 9 - .../tensor/partial_remove/partial_remove_test.cpp | 130 ------- eval/src/vespa/eval/CMakeLists.txt | 1 - eval/src/vespa/eval/tensor/CMakeLists.txt | 5 - eval/src/vespa/eval/tensor/partial_update.cpp | 419 --------------------- eval/src/vespa/eval/tensor/partial_update.h | 44 --- 24 files changed, 826 insertions(+), 832 deletions(-) create mode 100644 document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt create mode 100644 document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp create mode 100644 document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt create mode 100644 document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp create mode 100644 document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt create mode 100644 document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp create mode 100644 document/src/vespa/document/update/tensor_partial_update.cpp create mode 100644 document/src/vespa/document/update/tensor_partial_update.h delete mode 100644 eval/src/tests/tensor/partial_add/CMakeLists.txt delete mode 100644 eval/src/tests/tensor/partial_add/partial_add_test.cpp delete mode 100644 eval/src/tests/tensor/partial_modify/CMakeLists.txt delete mode 100644 eval/src/tests/tensor/partial_modify/partial_modify_test.cpp delete mode 100644 eval/src/tests/tensor/partial_remove/CMakeLists.txt delete mode 100644 eval/src/tests/tensor/partial_remove/partial_remove_test.cpp delete mode 100644 eval/src/vespa/eval/tensor/CMakeLists.txt delete mode 100644 eval/src/vespa/eval/tensor/partial_update.cpp delete mode 100644 eval/src/vespa/eval/tensor/partial_update.h diff --git a/document/CMakeLists.txt b/document/CMakeLists.txt index 8c49f7bd7bf..46da458fe6d 100644 --- a/document/CMakeLists.txt +++ b/document/CMakeLists.txt @@ -38,6 +38,9 @@ vespa_define_module( src/tests/serialization src/tests/struct_anno src/tests/tensor_fieldvalue + src/tests/tensor_fieldvalue/partial_add + src/tests/tensor_fieldvalue/partial_modify + src/tests/tensor_fieldvalue/partial_remove ) install_java_artifact(document) diff --git a/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt new file mode 100644 index 00000000000..8d5ee0df6e6 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_add/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_partial_add_test_app TEST + SOURCES + partial_add_test.cpp + DEPENDS + document + GTest::GTest +) +vespa_add_test(NAME eval_partial_add_test_app COMMAND eval_partial_add_test_app) diff --git a/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp b/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp new file mode 100644 index 00000000000..db391a5b889 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_add/partial_add_test.cpp @@ -0,0 +1,89 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +std::vector add_layouts = { + {x({"a"})}, {x({"b"})}, + {x({"a","b"})}, {x({"a","c"})}, + float_cells({x({"a","b"})}), {x({"a","c"})}, + {x({"a","b"})}, float_cells({x({"a","c"})}), + float_cells({x({"a","b"})}), float_cells({x({"a","c"})}), + {x({"a","b","c"}),y({"d","e"})}, {x({"b","f"}),y({"d","g"})}, + {x(3),y({"a","b"})}, {x(3),y({"b","c"})} +}; + +TensorSpec reference_add(const TensorSpec &a, const TensorSpec &b) { + TensorSpec result(a.type()); + for (const auto &cell: b.cells()) { + result.add(cell.first, cell.second); + } + auto end_iter = b.cells().end(); + for (const auto &cell: a.cells()) { + auto iter = b.cells().find(cell.first); + if (iter == end_iter) { + result.add(cell.first, cell.second); + } + } + return result; +} + +Value::UP try_partial_add(const TensorSpec &a, const TensorSpec &b) { + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + return tensor::TensorPartialUpdate::add(*lhs, *rhs, factory); +} + +TensorSpec perform_partial_add(const TensorSpec &a, const TensorSpec &b) { + auto up = try_partial_add(a, b); + EXPECT_TRUE(up); + return spec_from_value(*up); +} + +TEST(PartialAddTest, partial_add_works_for_simple_values) { + ASSERT_TRUE((add_layouts.size() % 2) == 0); + for (size_t i = 0; i < add_layouts.size(); i += 2) { + TensorSpec lhs = spec(add_layouts[i], N()); + TensorSpec rhs = spec(add_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = reference_add(lhs, rhs); + auto actual = perform_partial_add(lhs, rhs); + EXPECT_EQ(actual, expect); + } +} + +std::vector bad_layouts = { + {x(3)}, {x(3),y(1)}, + {x(3),y(1)}, {x(3)}, + {x(3),y(3)}, {x(3),y({"a"})}, + {x(3),y({"a"})}, {x(3),y(3)}, + {x({"a"})}, {x({"a"}),y({"b"})}, + {x({"a"}),y({"b"})}, {x({"a"})}, + {x({"a"})}, {x({"a"}),y(1)} +}; + +TEST(PartialAddTest, partial_add_returns_nullptr_on_invalid_inputs) { + ASSERT_TRUE((bad_layouts.size() % 2) == 0); + for (size_t i = 0; i < bad_layouts.size(); i += 2) { + TensorSpec lhs = spec(bad_layouts[i], N()); + TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto actual = try_partial_add(lhs, rhs); + auto expect = Value::UP(); + EXPECT_EQ(actual, expect); + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt new file mode 100644 index 00000000000..8fde8339f66 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_modify/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_partial_modify_test_app TEST + SOURCES + partial_modify_test.cpp + DEPENDS + document + GTest::GTest +) +vespa_add_test(NAME eval_partial_modify_test_app COMMAND eval_partial_modify_test_app) diff --git a/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp new file mode 100644 index 00000000000..a4562c09e50 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_modify/partial_modify_test.cpp @@ -0,0 +1,110 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +std::vector modify_layouts = { + {x({"a"})}, {x({"a"})}, + {x({"a",""})}, {x({"b","c","d","e"})}, + {x(5)}, {x({"1","2","foo","17"})}, + {x({"a","b","c"}),y({"d","e"})}, {x({"b"}),y({"d"})}, + {x({"a","b","c"})}, {x({"b","c","d"})}, + {x(4),y({"a","b","c","d"}),z(5)}, {x({"1","2"}),y({"b","d"}),z({"1","3"})}, + {x(3),y(2)}, {x({"0","1"}),y({"0","1"})}, + {x({"a","","b"})}, {x({""})} +}; + +TensorSpec::Address sparsify(const TensorSpec::Address &input) { + TensorSpec::Address output; + for (const auto & kv : input) { + if (kv.second.is_indexed()) { + auto val = fmt("%zu", kv.second.index); + output.emplace(kv.first, val); + } else { + output.emplace(kv.first, kv.second); + } + } + return output; +} + +TensorSpec reference_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + TensorSpec result(a.type()); + auto end_iter = b.cells().end(); + for (const auto &cell: a.cells()) { + double v = cell.second; + auto sparse_addr = sparsify(cell.first); + auto iter = b.cells().find(sparse_addr); + if (iter == end_iter) { + result.add(cell.first, v); + } else { + result.add(cell.first, fun(v, iter->second)); + } + } + return result; +} + +Value::UP try_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + return tensor::TensorPartialUpdate::modify(*lhs, fun, *rhs, factory); +} + +TensorSpec perform_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { + auto up = try_partial_modify(a, b, fun); + EXPECT_TRUE(up); + return spec_from_value(*up); +} + +TEST(PartialModifyTest, partial_modify_works_for_simple_values) { + ASSERT_TRUE((modify_layouts.size() % 2) == 0); + for (size_t i = 0; i < modify_layouts.size(); i += 2) { + TensorSpec lhs = spec(modify_layouts[i], N()); + TensorSpec rhs = spec(modify_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f}) { + auto expect = reference_modify(lhs, rhs, fun); + auto actual = perform_partial_modify(lhs, rhs, fun); + EXPECT_EQ(actual, expect); + } + auto fun = [](double, double keep) { return keep; }; + auto expect = reference_modify(lhs, rhs, fun); + auto actual = perform_partial_modify(lhs, rhs, fun); + EXPECT_EQ(actual, expect); + } +} + +std::vector bad_layouts = { + {x(3)}, {x(3)}, + {x(3),y({"a"})}, {x(3),y({"a"})}, + {x({"a"})}, {x({"a"}),y({"b"})}, + {x({"a"}),y({"b"})}, {x({"a"})}, + {x({"a"})}, {x({"a"}),y(1)} +}; + +TEST(PartialModifyTest, partial_modify_returns_nullptr_on_invalid_inputs) { + ASSERT_TRUE((bad_layouts.size() % 2) == 0); + for (size_t i = 0; i < bad_layouts.size(); i += 2) { + TensorSpec lhs = spec(bad_layouts[i], N()); + TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + for (auto fun: {operation::Add::f}) { + auto actual = try_partial_modify(lhs, rhs, fun); + auto expect = Value::UP(); + EXPECT_EQ(actual, expect); + } + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt b/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt new file mode 100644 index 00000000000..7382ced9490 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_remove/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_partial_remove_test_app TEST + SOURCES + partial_remove_test.cpp + DEPENDS + document + GTest::GTest +) +vespa_add_test(NAME eval_partial_remove_test_app COMMAND eval_partial_remove_test_app) diff --git a/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp b/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp new file mode 100644 index 00000000000..7b5b17b9cf8 --- /dev/null +++ b/document/src/tests/tensor_fieldvalue/partial_remove/partial_remove_test.cpp @@ -0,0 +1,130 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +std::vector remove_layouts = { + {x({"a"})}, {x({"b"})}, + {x({"a","b"})}, {x({"a","c"})}, + {x({"a","b"})}, {x({"a","b"})}, + float_cells({x({"a","b"})}), {x({"a","c"})}, + {x({"a","b"})}, float_cells({x({"a","c"})}), + float_cells({x({"a","b"})}), float_cells({x({"a","c"})}), + {x({"a","b","c"}),y({"d","e"})}, {x({"b","f"}),y({"d","g"})}, + {x(3),y({"a","b"})}, {y({"b","c"})} +}; + +TensorSpec::Address only_sparse(const TensorSpec::Address &input) { + TensorSpec::Address output; + for (const auto & kv : input) { + if (kv.second.is_mapped()) { + output.emplace(kv.first, kv.second); + } + } + return output; +} + +TensorSpec reference_remove(const TensorSpec &a, const TensorSpec &b) { + TensorSpec result(a.type()); + auto end_iter = b.cells().end(); + for (const auto &cell: a.cells()) { + auto iter = b.cells().find(only_sparse(cell.first)); + if (iter == end_iter) { + result.add(cell.first, cell.second); + } + } + return result; +} + +Value::UP try_partial_remove(const TensorSpec &a, const TensorSpec &b) { + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + return tensor::TensorPartialUpdate::remove(*lhs, *rhs, factory); +} + +TensorSpec perform_partial_remove(const TensorSpec &a, const TensorSpec &b) { + auto up = try_partial_remove(a, b); + EXPECT_TRUE(up); + return spec_from_value(*up); +} + +TEST(PartialRemoveTest, partial_remove_works_for_simple_values) { + ASSERT_TRUE((remove_layouts.size() % 2) == 0); + for (size_t i = 0; i < remove_layouts.size(); i += 2) { + TensorSpec lhs = spec(remove_layouts[i], N()); + TensorSpec rhs = spec(remove_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = reference_remove(lhs, rhs); + auto actual = perform_partial_remove(lhs, rhs); + EXPECT_EQ(actual, expect); + } +} + +std::vector bad_layouts = { + {x(3)}, {x(3)}, + {x(3),y({"a"})}, {x(3)}, + {x(3),y({"a"})}, {x(3),y({"a"})}, + {x({"a"})}, {y({"a"})}, + {x({"a"})}, {x({"a"}),y({"b"})} +}; + +TEST(PartialRemoveTest, partial_remove_returns_nullptr_on_invalid_inputs) { + ASSERT_TRUE((bad_layouts.size() % 2) == 0); + for (size_t i = 0; i < bad_layouts.size(); i += 2) { + TensorSpec lhs = spec(bad_layouts[i], N()); + TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto actual = try_partial_remove(lhs, rhs); + auto expect = Value::UP(); + EXPECT_EQ(actual, expect); + } +} + +void +expect_partial_remove(const TensorSpec& input, const TensorSpec& remove, const TensorSpec& exp) +{ + auto act = perform_partial_remove(input, remove); + EXPECT_EQ(exp, act); +} + +TEST(PartialRemoveTest, remove_where_address_is_not_fully_specified) { + auto input_sparse = TensorSpec("tensor(x{},y{})"). + add({{"x", "a"},{"y", "c"}}, 3.0). + add({{"x", "a"},{"y", "d"}}, 5.0). + add({{"x", "b"},{"y", "c"}}, 7.0); + + expect_partial_remove(input_sparse, TensorSpec("tensor(x{})").add({{"x", "a"}}, 1.0), + TensorSpec("tensor(x{},y{})").add({{"x", "b"},{"y", "c"}}, 7.0)); + + expect_partial_remove(input_sparse, TensorSpec("tensor(y{})").add({{"y", "c"}}, 1.0), + TensorSpec("tensor(x{},y{})").add({{"x", "a"},{"y", "d"}}, 5.0)); + + expect_partial_remove(input_sparse, TensorSpec("tensor(y{})").add({{"y", "d"}}, 1.0), + TensorSpec("tensor(x{},y{})").add({{"x", "a"},{"y", "c"}}, 3.0) + .add({{"x", "b"},{"y", "c"}}, 7.0)); + + auto input_mixed = TensorSpec("tensor(x{},y{},z[1])"). + add({{"x", "a"},{"y", "c"},{"z", 0}}, 3.0). + add({{"x", "a"},{"y", "d"},{"z", 0}}, 5.0). + add({{"x", "b"},{"y", "c"},{"z", 0}}, 7.0); + + expect_partial_remove(input_mixed,TensorSpec("tensor(x{})").add({{"x", "a"}}, 1.0), + TensorSpec("tensor(x{},y{},z[1])").add({{"x", "b"},{"y", "c"},{"z", 0}}, 7.0)); + + expect_partial_remove(input_mixed, TensorSpec("tensor(y{})").add({{"y", "c"}}, 1.0), + TensorSpec("tensor(x{},y{},z[1])").add({{"x", "a"},{"y", "d"},{"z", 0}}, 5.0)); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/document/src/vespa/document/update/CMakeLists.txt b/document/src/vespa/document/update/CMakeLists.txt index a587d8e3e2d..b0ffa056e1a 100644 --- a/document/src/vespa/document/update/CMakeLists.txt +++ b/document/src/vespa/document/update/CMakeLists.txt @@ -15,6 +15,7 @@ vespa_add_library(document_updates OBJECT removevalueupdate.cpp tensor_add_update.cpp tensor_modify_update.cpp + tensor_partial_update.cpp tensor_remove_update.cpp valueupdate.cpp DEPENDS diff --git a/document/src/vespa/document/update/tensor_add_update.cpp b/document/src/vespa/document/update/tensor_add_update.cpp index c9ffad2a789..8846ec2fc0a 100644 --- a/document/src/vespa/document/update/tensor_add_update.cpp +++ b/document/src/vespa/document/update/tensor_add_update.cpp @@ -1,6 +1,7 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "tensor_add_update.h" +#include "tensor_partial_update.h" #include #include #include @@ -10,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/document/src/vespa/document/update/tensor_modify_update.cpp b/document/src/vespa/document/update/tensor_modify_update.cpp index 4da93d0ae46..bc4085ec4fa 100644 --- a/document/src/vespa/document/update/tensor_modify_update.cpp +++ b/document/src/vespa/document/update/tensor_modify_update.cpp @@ -1,6 +1,7 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "tensor_modify_update.h" +#include "tensor_partial_update.h" #include #include #include @@ -11,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/document/src/vespa/document/update/tensor_partial_update.cpp b/document/src/vespa/document/update/tensor_partial_update.cpp new file mode 100644 index 00000000000..9bf243602dd --- /dev/null +++ b/document/src/vespa/document/update/tensor_partial_update.cpp @@ -0,0 +1,419 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "tensor_partial_update.h" +#include +#include +#include +#include +#include +#include + +#include +LOG_SETUP(".eval.tensor.partial_update"); + +using namespace vespalib::eval; + +namespace vespalib::tensor { + +namespace { + +using join_fun_t = vespalib::eval::operation::op2_t; + +static constexpr size_t npos() { return -1; } + +enum class DimCase { + MAPPED_MATCH, CONV_TO_INDEXED +}; + +struct DenseCoords { + std::vector dim_sizes; + size_t total_size; + size_t offset; + size_t current; + DenseCoords(const ValueType &output_type) + : total_size(1), offset(0), current(0) + { + for (const auto & dim : output_type.dimensions()) { + if (dim.is_indexed()) { + dim_sizes.push_back(dim.size); + total_size *= dim.size; + } + } + } + ~DenseCoords(); + void clear() { offset = 0; current = 0; } + void convert_label(vespalib::stringref label) { + uint32_t coord = 0; + for (char c : label) { + if (c < '0' || c > '9') { // bad char + offset = npos(); + break; + } + coord = coord * 10 + (c - '0'); + } + size_t cur_dim_size = dim_sizes[current]; + if (coord < cur_dim_size) { + if (offset != npos()) { + offset *= cur_dim_size; + offset += coord; + } + } else { + offset = npos(); + } + ++current; + } + size_t get_dense_index() const { + assert(current == dim_sizes.size()); + return offset; + } +}; +DenseCoords::~DenseCoords() = default; + +struct SparseCoords { + std::vector addr; + std::vector next_result_refs; + std::vector lookup_refs; + std::vector lookup_view_dims; + SparseCoords(size_t sz) + : addr(sz), next_result_refs(sz), lookup_refs(sz), lookup_view_dims(sz) + { + for (size_t i = 0; i < sz; ++i) { + next_result_refs[i] = &addr[i]; + lookup_refs[i] = &addr[i]; + lookup_view_dims[i] = i; + } + } + ~SparseCoords(); +}; +SparseCoords::~SparseCoords() = default; + +/** + * Helper class that converts a fully-sparse address from the modifier + * tensor into a subset sparse address for the output and an offset + * in the dense subspace. + **/ +struct AddressHandler { + std::vector dimension_plan; + DenseCoords dense_converter; + SparseCoords for_output; + SparseCoords from_modifier; + bool valid; + + AddressHandler(const ValueType &output_type, + const ValueType &modifier_type) + : dimension_plan(), dense_converter(output_type), + for_output(output_type.count_mapped_dimensions()), + from_modifier(modifier_type.count_mapped_dimensions()), + valid(true) + { + if (! modifier_type.is_sparse()) { + LOG(error, "Unexpected non-sparse modifier tensor, type is %s", + modifier_type.to_spec().c_str()); + valid = false; + return; + } + // analyse dimensions + auto visitor = overload { + [&](visit_ranges_either, const auto &) { valid = false; }, + [&](visit_ranges_both, const auto &a, const auto &) { + dimension_plan.push_back(a.is_mapped() ? DimCase::MAPPED_MATCH : DimCase::CONV_TO_INDEXED); + } + }; + const auto & input_dims = output_type.dimensions(); + const auto & modifier_dims = modifier_type.dimensions(); + visit_ranges(visitor, + input_dims.begin(), input_dims.end(), + modifier_dims.begin(), modifier_dims.end(), + [](const auto &a, const auto &b){ return (a.name < b.name); }); + if (! valid) { + LOG(error, "Value type %s does not match modifier type %s (should have same dimensions)", + output_type.to_spec().c_str(), modifier_type.to_spec().c_str()); + return; + } + // implicitly checked above, must hold: + assert(input_dims.size() == modifier_dims.size()); + // the plan should now be fully built: + assert(input_dims.size() == dimension_plan.size()); + } + + void handle_address() + { + dense_converter.clear(); + auto out = for_output.addr.begin(); + for (size_t i = 0; i < dimension_plan.size(); ++i) { + if (dimension_plan[i] == DimCase::CONV_TO_INDEXED) { + dense_converter.convert_label(from_modifier.addr[i]); + } else { + *out++ = from_modifier.addr[i]; + } + } + assert(out == for_output.addr.end()); + assert(dense_converter.current == dense_converter.dim_sizes.size()); + } + + ~AddressHandler(); +}; +AddressHandler::~AddressHandler() = default; + +template +void copy_tensor_with_filter(const Value &input, + size_t dsss, + SparseCoords &addrs, + ValueBuilder &builder, + KeepFun && keep_subspace) +{ + const auto input_cells = input.cells().typify(); + auto input_view = input.index().create_view({}); + input_view->lookup({}); + size_t input_subspace_index; + while (input_view->next_result(addrs.next_result_refs, input_subspace_index)) { + if (keep_subspace(addrs.lookup_refs, input_subspace_index)) { + size_t input_offset = dsss * input_subspace_index; + auto src = input_cells.begin() + input_offset; + auto dst = builder.add_subspace(addrs.addr).begin(); + for (size_t i = 0; i < dsss; ++i) { + dst[i] = src[i]; + } + } + } +} + +template +Value::UP +copy_tensor(const Value &input, const ValueType &input_type, SparseCoords &helper, const ValueBuilderFactory &factory) +{ + const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); + const size_t dsss = input_type.dense_subspace_size(); + const size_t expected_subspaces = input.index().size(); + auto builder = factory.create_value_builder(input_type, num_mapped_in_input, dsss, expected_subspaces); + auto no_filter = [] (const auto &, size_t) { + return true; + }; + copy_tensor_with_filter(input, dsss, helper, *builder, no_filter); + return builder->build(std::move(builder)); +} + +//----------------------------------------------------------------------------- + +struct PerformModify { + template + static Value::UP invoke(const Value &input, + join_fun_t function, + const Value &modifier, + const ValueBuilderFactory &factory); +}; + +template +Value::UP +PerformModify::invoke(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory) +{ + const ValueType &input_type = input.type(); + const size_t dsss = input_type.dense_subspace_size(); + const ValueType &modifier_type = modifier.type(); + AddressHandler handler(input_type, modifier_type); + if (! handler.valid) { + return {}; + } + // copy input to output + auto out = copy_tensor(input, input_type, handler.for_output, factory); + // need to overwrite some cells + auto output_cells = unconstify(out->cells().template typify()); + const auto modifier_cells = modifier.cells().typify(); + auto modifier_view = modifier.index().create_view({}); + auto lookup_view = out->index().create_view(handler.for_output.lookup_view_dims); + modifier_view->lookup({}); + size_t modifier_subspace_index; + while (modifier_view->next_result(handler.from_modifier.next_result_refs, modifier_subspace_index)) { + handler.handle_address(); + size_t dense_idx = handler.dense_converter.get_dense_index(); + if (dense_idx == npos()) { + continue; + } + lookup_view->lookup(handler.for_output.lookup_refs); + size_t output_subspace_index; + if (lookup_view->next_result({}, output_subspace_index)) { + size_t subspace_offset = dsss * output_subspace_index; + auto dst = output_cells.begin() + subspace_offset; + ICT lhs = dst[dense_idx]; + MCT rhs = modifier_cells[modifier_subspace_index]; + dst[dense_idx] = function(lhs, rhs); + } + } + return out; +} + +//----------------------------------------------------------------------------- + +struct PerformAdd { + template + static Value::UP invoke(const Value &input, + const Value &modifier, + const ValueBuilderFactory &factory); +}; + +template +Value::UP +PerformAdd::invoke(const Value &input, const Value &modifier, const ValueBuilderFactory &factory) +{ + const ValueType &input_type = input.type(); + const ValueType &modifier_type = modifier.type(); + if (input_type.dimensions() != modifier_type.dimensions()) { + LOG(error, "when adding cells to a tensor, dimensions must be equal. " + "Got input type %s != modifier type %s", + input_type.to_spec().c_str(), modifier_type.to_spec().c_str()); + return {}; + } + const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); + const size_t dsss = input_type.dense_subspace_size(); + const size_t expected_subspaces = input.index().size() + modifier.index().size(); + auto builder = factory.create_value_builder(input_type, num_mapped_in_input, dsss, expected_subspaces); + SparseCoords addrs(num_mapped_in_input); + auto lookup_view = input.index().create_view(addrs.lookup_view_dims); + std::vector overwritten(input.index().size(), false); + auto remember_subspaces = [&] (const auto & lookup_refs, size_t) { + lookup_view->lookup(lookup_refs); + size_t input_subspace_index; + if (lookup_view->next_result({}, input_subspace_index)) { + overwritten[input_subspace_index] = true; + } + return true; + }; + copy_tensor_with_filter(modifier, dsss, addrs, *builder, remember_subspaces); + auto filter = [&] (const auto &, size_t input_subspace) { + return ! overwritten[input_subspace]; + }; + copy_tensor_with_filter(input, dsss, addrs, *builder, filter); + return builder->build(std::move(builder)); +} + +//----------------------------------------------------------------------------- + +struct PerformRemove { + template + static Value::UP invoke(const Value &input, + const Value &modifier, + const ValueBuilderFactory &factory); +}; + +/** + * Calculates the indexes of where the mapped modifier dimensions are found in the mapped input dimensions. + * + * The modifier dimensions should be a subset or all of the input dimensions. + * An empty vector is returned on type mismatch. + */ +std::vector +calc_mapped_dimension_indexes(const ValueType& input_type, + const ValueType& modifier_type) +{ + auto input_dims = input_type.mapped_dimensions(); + auto mod_dims = modifier_type.mapped_dimensions(); + if (mod_dims.size() > input_dims.size()) { + return {}; + } + std::vector result(mod_dims.size()); + size_t j = 0; + for (size_t i = 0; i < mod_dims.size(); ++i) { + while ((j < input_dims.size()) && (input_dims[j] != mod_dims[i])) { + ++j; + } + if (j >= input_dims.size()) { + return {}; + } + result[i] = j; + } + return result; +} + +struct ModifierCoords { + + std::vector lookup_refs; + std::vector lookup_view_dims; + + ModifierCoords(const SparseCoords& input_coords, + const std::vector& input_dim_indexes, + const ValueType& modifier_type) + : lookup_refs(modifier_type.dimensions().size()), + lookup_view_dims(modifier_type.dimensions().size()) + { + assert(modifier_type.dimensions().size() == input_dim_indexes.size()); + for (size_t i = 0; i < input_dim_indexes.size(); ++i) { + // Setup the modifier dimensions to point to the matching input dimensions. + lookup_refs[i] = &input_coords.addr[input_dim_indexes[i]]; + lookup_view_dims[i] = i; + } + } + ~ModifierCoords() {} +}; + +template +Value::UP +PerformRemove::invoke(const Value &input, const Value &modifier, const ValueBuilderFactory &factory) +{ + const ValueType &input_type = input.type(); + const ValueType &modifier_type = modifier.type(); + const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); + if (num_mapped_in_input == 0) { + LOG(error, "Cannot remove cells from a dense input tensor of type %s", + input_type.to_spec().c_str()); + return {}; + } + if (modifier_type.count_indexed_dimensions() != 0) { + LOG(error, "Cannot remove cells using a modifier tensor of type %s", + modifier_type.to_spec().c_str()); + return {}; + } + auto input_dim_indexes = calc_mapped_dimension_indexes(input_type, modifier_type); + if (input_dim_indexes.empty()) { + LOG(error, "Tensor type mismatch when removing cells from a tensor. " + "Got input type %s versus modifier type %s", + input_type.to_spec().c_str(), modifier_type.to_spec().c_str()); + return {}; + } + SparseCoords addrs(num_mapped_in_input); + ModifierCoords mod_coords(addrs, input_dim_indexes, modifier_type); + auto modifier_view = modifier.index().create_view(mod_coords.lookup_view_dims); + const size_t expected_subspaces = input.index().size(); + const size_t dsss = input_type.dense_subspace_size(); + auto builder = factory.create_value_builder(input_type, num_mapped_in_input, dsss, expected_subspaces); + auto filter_by_modifier = [&] (const auto & lookup_refs, size_t) { + // The modifier dimensions are setup to point to the input dimensions address storage in ModifierCoords, + // so we don't need to use the lookup_refs argument. + (void) lookup_refs; + modifier_view->lookup(mod_coords.lookup_refs); + size_t modifier_subspace_index; + return !(modifier_view->next_result({}, modifier_subspace_index)); + }; + copy_tensor_with_filter(input, dsss, addrs, *builder, filter_by_modifier); + return builder->build(std::move(builder)); +} + +} // namespace + +//----------------------------------------------------------------------------- + +Value::UP +TensorPartialUpdate::modify(const Value &input, join_fun_t function, + const Value &modifier, const ValueBuilderFactory &factory) +{ + return typify_invoke<2, TypifyCellType, PerformModify>( + input.cells().type, modifier.cells().type, + input, function, modifier, factory); +} + +Value::UP +TensorPartialUpdate::add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory) +{ + return typify_invoke<2, TypifyCellType, PerformAdd>( + input.cells().type, add_cells.cells().type, + input, add_cells, factory); +} + +Value::UP +TensorPartialUpdate::remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory) +{ + return typify_invoke<1, TypifyCellType, PerformRemove>( + input.cells().type, + input, remove_spec, factory); +} + +} // namespace diff --git a/document/src/vespa/document/update/tensor_partial_update.h b/document/src/vespa/document/update/tensor_partial_update.h new file mode 100644 index 00000000000..b3e9d32fca8 --- /dev/null +++ b/document/src/vespa/document/update/tensor_partial_update.h @@ -0,0 +1,44 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace vespalib::tensor { + +struct TensorPartialUpdate { + using join_fun_t = vespalib::eval::operation::op2_t; + using Value = vespalib::eval::Value; + using ValueBuilderFactory = vespalib::eval::ValueBuilderFactory; + + /** + * Make a copy of the input, but apply function(oldvalue, modifier.cellvalue) + * to cells which also exist in the "modifier". + * The modifier type must be sparse with exactly the same dimension names + * as the input type. + * Returns null pointer if this constraint is violated. + **/ + static Value::UP modify(const Value &input, join_fun_t function, + const Value &modifier, const ValueBuilderFactory &factory); + + /** + * Make a copy of the input, but add or overwrite cells from add_cells. + * Requires same type for input and add_cells. + * Returns null pointer if this constraint is violated. + **/ + static Value::UP add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory); + + /** + * Make a copy of the input, but remove cells present in remove_spec. + * The remove_spec must be a sparse tensor, with exactly the mapped dimensions + * that the input value has. + * Cell values in remove_spec are ignored. + * Not valid for dense tensors, since removing cells for those are impossible. + * Returns null pointer if these constraints are violated. + **/ + static Value::UP remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory); +}; + +} // namespace diff --git a/document/src/vespa/document/update/tensor_remove_update.cpp b/document/src/vespa/document/update/tensor_remove_update.cpp index 7b81581aeed..c9ff1a462c5 100644 --- a/document/src/vespa/document/update/tensor_remove_update.cpp +++ b/document/src/vespa/document/update/tensor_remove_update.cpp @@ -1,13 +1,13 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "tensor_remove_update.h" +#include "tensor_partial_update.h" #include #include #include #include #include #include -#include #include #include #include diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 7022569e6a3..8a88b1e5be2 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -66,9 +66,6 @@ vespa_define_module( src/tests/tensor/dense_tensor_create_function src/tests/tensor/instruction_benchmark src/tests/tensor/onnx_wrapper - src/tests/tensor/partial_add - src/tests/tensor/partial_modify - src/tests/tensor/partial_remove src/tests/tensor/tensor_conformance src/tests/tensor/typed_cells src/tests/tensor/vector_from_doubles_function @@ -82,6 +79,5 @@ vespa_define_module( src/vespa/eval/gp src/vespa/eval/instruction src/vespa/eval/streamed - src/vespa/eval/tensor src/vespa/eval/tensor/dense ) diff --git a/eval/src/tests/tensor/partial_add/CMakeLists.txt b/eval/src/tests/tensor/partial_add/CMakeLists.txt deleted file mode 100644 index f0d07a8e9cf..00000000000 --- a/eval/src/tests/tensor/partial_add/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(eval_partial_add_test_app TEST - SOURCES - partial_add_test.cpp - DEPENDS - vespaeval - GTest::GTest -) -vespa_add_test(NAME eval_partial_add_test_app COMMAND eval_partial_add_test_app) diff --git a/eval/src/tests/tensor/partial_add/partial_add_test.cpp b/eval/src/tests/tensor/partial_add/partial_add_test.cpp deleted file mode 100644 index 893acf07adb..00000000000 --- a/eval/src/tests/tensor/partial_add/partial_add_test.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include -#include -#include -#include -#include -#include -#include - -using namespace vespalib; -using namespace vespalib::eval; -using namespace vespalib::eval::test; - -using vespalib::make_string_short::fmt; - -std::vector add_layouts = { - {x({"a"})}, {x({"b"})}, - {x({"a","b"})}, {x({"a","c"})}, - float_cells({x({"a","b"})}), {x({"a","c"})}, - {x({"a","b"})}, float_cells({x({"a","c"})}), - float_cells({x({"a","b"})}), float_cells({x({"a","c"})}), - {x({"a","b","c"}),y({"d","e"})}, {x({"b","f"}),y({"d","g"})}, - {x(3),y({"a","b"})}, {x(3),y({"b","c"})} -}; - -TensorSpec reference_add(const TensorSpec &a, const TensorSpec &b) { - TensorSpec result(a.type()); - for (const auto &cell: b.cells()) { - result.add(cell.first, cell.second); - } - auto end_iter = b.cells().end(); - for (const auto &cell: a.cells()) { - auto iter = b.cells().find(cell.first); - if (iter == end_iter) { - result.add(cell.first, cell.second); - } - } - return result; -} - -Value::UP try_partial_add(const TensorSpec &a, const TensorSpec &b) { - const auto &factory = SimpleValueBuilderFactory::get(); - auto lhs = value_from_spec(a, factory); - auto rhs = value_from_spec(b, factory); - return tensor::TensorPartialUpdate::add(*lhs, *rhs, factory); -} - -TensorSpec perform_partial_add(const TensorSpec &a, const TensorSpec &b) { - auto up = try_partial_add(a, b); - EXPECT_TRUE(up); - return spec_from_value(*up); -} - -TEST(PartialAddTest, partial_add_works_for_simple_values) { - ASSERT_TRUE((add_layouts.size() % 2) == 0); - for (size_t i = 0; i < add_layouts.size(); i += 2) { - TensorSpec lhs = spec(add_layouts[i], N()); - TensorSpec rhs = spec(add_layouts[i + 1], Div16(N())); - SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - auto expect = reference_add(lhs, rhs); - auto actual = perform_partial_add(lhs, rhs); - EXPECT_EQ(actual, expect); - } -} - -std::vector bad_layouts = { - {x(3)}, {x(3),y(1)}, - {x(3),y(1)}, {x(3)}, - {x(3),y(3)}, {x(3),y({"a"})}, - {x(3),y({"a"})}, {x(3),y(3)}, - {x({"a"})}, {x({"a"}),y({"b"})}, - {x({"a"}),y({"b"})}, {x({"a"})}, - {x({"a"})}, {x({"a"}),y(1)} -}; - -TEST(PartialAddTest, partial_add_returns_nullptr_on_invalid_inputs) { - ASSERT_TRUE((bad_layouts.size() % 2) == 0); - for (size_t i = 0; i < bad_layouts.size(); i += 2) { - TensorSpec lhs = spec(bad_layouts[i], N()); - TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N())); - SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - auto actual = try_partial_add(lhs, rhs); - auto expect = Value::UP(); - EXPECT_EQ(actual, expect); - } -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/tensor/partial_modify/CMakeLists.txt b/eval/src/tests/tensor/partial_modify/CMakeLists.txt deleted file mode 100644 index 42a08acaae6..00000000000 --- a/eval/src/tests/tensor/partial_modify/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(eval_partial_modify_test_app TEST - SOURCES - partial_modify_test.cpp - DEPENDS - vespaeval - GTest::GTest -) -vespa_add_test(NAME eval_partial_modify_test_app COMMAND eval_partial_modify_test_app) diff --git a/eval/src/tests/tensor/partial_modify/partial_modify_test.cpp b/eval/src/tests/tensor/partial_modify/partial_modify_test.cpp deleted file mode 100644 index a1802540fa5..00000000000 --- a/eval/src/tests/tensor/partial_modify/partial_modify_test.cpp +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include -#include -#include -#include -#include -#include -#include - -using namespace vespalib; -using namespace vespalib::eval; -using namespace vespalib::eval::test; - -using vespalib::make_string_short::fmt; - -std::vector modify_layouts = { - {x({"a"})}, {x({"a"})}, - {x({"a",""})}, {x({"b","c","d","e"})}, - {x(5)}, {x({"1","2","foo","17"})}, - {x({"a","b","c"}),y({"d","e"})}, {x({"b"}),y({"d"})}, - {x({"a","b","c"})}, {x({"b","c","d"})}, - {x(4),y({"a","b","c","d"}),z(5)}, {x({"1","2"}),y({"b","d"}),z({"1","3"})}, - {x(3),y(2)}, {x({"0","1"}),y({"0","1"})}, - {x({"a","","b"})}, {x({""})} -}; - -TensorSpec::Address sparsify(const TensorSpec::Address &input) { - TensorSpec::Address output; - for (const auto & kv : input) { - if (kv.second.is_indexed()) { - auto val = fmt("%zu", kv.second.index); - output.emplace(kv.first, val); - } else { - output.emplace(kv.first, kv.second); - } - } - return output; -} - -TensorSpec reference_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { - TensorSpec result(a.type()); - auto end_iter = b.cells().end(); - for (const auto &cell: a.cells()) { - double v = cell.second; - auto sparse_addr = sparsify(cell.first); - auto iter = b.cells().find(sparse_addr); - if (iter == end_iter) { - result.add(cell.first, v); - } else { - result.add(cell.first, fun(v, iter->second)); - } - } - return result; -} - -Value::UP try_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { - const auto &factory = SimpleValueBuilderFactory::get(); - auto lhs = value_from_spec(a, factory); - auto rhs = value_from_spec(b, factory); - return tensor::TensorPartialUpdate::modify(*lhs, fun, *rhs, factory); -} - -TensorSpec perform_partial_modify(const TensorSpec &a, const TensorSpec &b, join_fun_t fun) { - auto up = try_partial_modify(a, b, fun); - EXPECT_TRUE(up); - return spec_from_value(*up); -} - -TEST(PartialModifyTest, partial_modify_works_for_simple_values) { - ASSERT_TRUE((modify_layouts.size() % 2) == 0); - for (size_t i = 0; i < modify_layouts.size(); i += 2) { - TensorSpec lhs = spec(modify_layouts[i], N()); - TensorSpec rhs = spec(modify_layouts[i + 1], Div16(N())); - SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - for (auto fun: {operation::Add::f, operation::Mul::f, operation::Sub::f}) { - auto expect = reference_modify(lhs, rhs, fun); - auto actual = perform_partial_modify(lhs, rhs, fun); - EXPECT_EQ(actual, expect); - } - auto fun = [](double, double keep) { return keep; }; - auto expect = reference_modify(lhs, rhs, fun); - auto actual = perform_partial_modify(lhs, rhs, fun); - EXPECT_EQ(actual, expect); - } -} - -std::vector bad_layouts = { - {x(3)}, {x(3)}, - {x(3),y({"a"})}, {x(3),y({"a"})}, - {x({"a"})}, {x({"a"}),y({"b"})}, - {x({"a"}),y({"b"})}, {x({"a"})}, - {x({"a"})}, {x({"a"}),y(1)} -}; - -TEST(PartialModifyTest, partial_modify_returns_nullptr_on_invalid_inputs) { - ASSERT_TRUE((bad_layouts.size() % 2) == 0); - for (size_t i = 0; i < bad_layouts.size(); i += 2) { - TensorSpec lhs = spec(bad_layouts[i], N()); - TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N())); - SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - for (auto fun: {operation::Add::f}) { - auto actual = try_partial_modify(lhs, rhs, fun); - auto expect = Value::UP(); - EXPECT_EQ(actual, expect); - } - } -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/tensor/partial_remove/CMakeLists.txt b/eval/src/tests/tensor/partial_remove/CMakeLists.txt deleted file mode 100644 index 1680324f574..00000000000 --- a/eval/src/tests/tensor/partial_remove/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(eval_partial_remove_test_app TEST - SOURCES - partial_remove_test.cpp - DEPENDS - vespaeval - GTest::GTest -) -vespa_add_test(NAME eval_partial_remove_test_app COMMAND eval_partial_remove_test_app) diff --git a/eval/src/tests/tensor/partial_remove/partial_remove_test.cpp b/eval/src/tests/tensor/partial_remove/partial_remove_test.cpp deleted file mode 100644 index fef6d99519f..00000000000 --- a/eval/src/tests/tensor/partial_remove/partial_remove_test.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include -#include -#include -#include -#include -#include -#include - -using namespace vespalib; -using namespace vespalib::eval; -using namespace vespalib::eval::test; - -using vespalib::make_string_short::fmt; - -std::vector remove_layouts = { - {x({"a"})}, {x({"b"})}, - {x({"a","b"})}, {x({"a","c"})}, - {x({"a","b"})}, {x({"a","b"})}, - float_cells({x({"a","b"})}), {x({"a","c"})}, - {x({"a","b"})}, float_cells({x({"a","c"})}), - float_cells({x({"a","b"})}), float_cells({x({"a","c"})}), - {x({"a","b","c"}),y({"d","e"})}, {x({"b","f"}),y({"d","g"})}, - {x(3),y({"a","b"})}, {y({"b","c"})} -}; - -TensorSpec::Address only_sparse(const TensorSpec::Address &input) { - TensorSpec::Address output; - for (const auto & kv : input) { - if (kv.second.is_mapped()) { - output.emplace(kv.first, kv.second); - } - } - return output; -} - -TensorSpec reference_remove(const TensorSpec &a, const TensorSpec &b) { - TensorSpec result(a.type()); - auto end_iter = b.cells().end(); - for (const auto &cell: a.cells()) { - auto iter = b.cells().find(only_sparse(cell.first)); - if (iter == end_iter) { - result.add(cell.first, cell.second); - } - } - return result; -} - -Value::UP try_partial_remove(const TensorSpec &a, const TensorSpec &b) { - const auto &factory = SimpleValueBuilderFactory::get(); - auto lhs = value_from_spec(a, factory); - auto rhs = value_from_spec(b, factory); - return tensor::TensorPartialUpdate::remove(*lhs, *rhs, factory); -} - -TensorSpec perform_partial_remove(const TensorSpec &a, const TensorSpec &b) { - auto up = try_partial_remove(a, b); - EXPECT_TRUE(up); - return spec_from_value(*up); -} - -TEST(PartialRemoveTest, partial_remove_works_for_simple_values) { - ASSERT_TRUE((remove_layouts.size() % 2) == 0); - for (size_t i = 0; i < remove_layouts.size(); i += 2) { - TensorSpec lhs = spec(remove_layouts[i], N()); - TensorSpec rhs = spec(remove_layouts[i + 1], Div16(N())); - SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - auto expect = reference_remove(lhs, rhs); - auto actual = perform_partial_remove(lhs, rhs); - EXPECT_EQ(actual, expect); - } -} - -std::vector bad_layouts = { - {x(3)}, {x(3)}, - {x(3),y({"a"})}, {x(3)}, - {x(3),y({"a"})}, {x(3),y({"a"})}, - {x({"a"})}, {y({"a"})}, - {x({"a"})}, {x({"a"}),y({"b"})} -}; - -TEST(PartialRemoveTest, partial_remove_returns_nullptr_on_invalid_inputs) { - ASSERT_TRUE((bad_layouts.size() % 2) == 0); - for (size_t i = 0; i < bad_layouts.size(); i += 2) { - TensorSpec lhs = spec(bad_layouts[i], N()); - TensorSpec rhs = spec(bad_layouts[i + 1], Div16(N())); - SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); - auto actual = try_partial_remove(lhs, rhs); - auto expect = Value::UP(); - EXPECT_EQ(actual, expect); - } -} - -void -expect_partial_remove(const TensorSpec& input, const TensorSpec& remove, const TensorSpec& exp) -{ - auto act = perform_partial_remove(input, remove); - EXPECT_EQ(exp, act); -} - -TEST(PartialRemoveTest, remove_where_address_is_not_fully_specified) { - auto input_sparse = TensorSpec("tensor(x{},y{})"). - add({{"x", "a"},{"y", "c"}}, 3.0). - add({{"x", "a"},{"y", "d"}}, 5.0). - add({{"x", "b"},{"y", "c"}}, 7.0); - - expect_partial_remove(input_sparse, TensorSpec("tensor(x{})").add({{"x", "a"}}, 1.0), - TensorSpec("tensor(x{},y{})").add({{"x", "b"},{"y", "c"}}, 7.0)); - - expect_partial_remove(input_sparse, TensorSpec("tensor(y{})").add({{"y", "c"}}, 1.0), - TensorSpec("tensor(x{},y{})").add({{"x", "a"},{"y", "d"}}, 5.0)); - - expect_partial_remove(input_sparse, TensorSpec("tensor(y{})").add({{"y", "d"}}, 1.0), - TensorSpec("tensor(x{},y{})").add({{"x", "a"},{"y", "c"}}, 3.0) - .add({{"x", "b"},{"y", "c"}}, 7.0)); - - auto input_mixed = TensorSpec("tensor(x{},y{},z[1])"). - add({{"x", "a"},{"y", "c"},{"z", 0}}, 3.0). - add({{"x", "a"},{"y", "d"},{"z", 0}}, 5.0). - add({{"x", "b"},{"y", "c"},{"z", 0}}, 7.0); - - expect_partial_remove(input_mixed,TensorSpec("tensor(x{})").add({{"x", "a"}}, 1.0), - TensorSpec("tensor(x{},y{},z[1])").add({{"x", "b"},{"y", "c"},{"z", 0}}, 7.0)); - - expect_partial_remove(input_mixed, TensorSpec("tensor(y{})").add({{"y", "c"}}, 1.0), - TensorSpec("tensor(x{},y{},z[1])").add({{"x", "a"},{"y", "d"},{"z", 0}}, 5.0)); -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/CMakeLists.txt b/eval/src/vespa/eval/CMakeLists.txt index 9a3c2f817d8..2d6e7b76c20 100644 --- a/eval/src/vespa/eval/CMakeLists.txt +++ b/eval/src/vespa/eval/CMakeLists.txt @@ -8,7 +8,6 @@ vespa_add_library(vespaeval $ $ $ - $ $ INSTALL lib64 DEPENDS diff --git a/eval/src/vespa/eval/tensor/CMakeLists.txt b/eval/src/vespa/eval/tensor/CMakeLists.txt deleted file mode 100644 index 8b0178bd656..00000000000 --- a/eval/src/vespa/eval/tensor/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(eval_tensor OBJECT - SOURCES - partial_update.cpp -) diff --git a/eval/src/vespa/eval/tensor/partial_update.cpp b/eval/src/vespa/eval/tensor/partial_update.cpp deleted file mode 100644 index fba42988f92..00000000000 --- a/eval/src/vespa/eval/tensor/partial_update.cpp +++ /dev/null @@ -1,419 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "partial_update.h" -#include -#include -#include -#include -#include -#include - -#include -LOG_SETUP(".eval.tensor.partial_update"); - -using namespace vespalib::eval; - -namespace vespalib::tensor { - -namespace { - -using join_fun_t = vespalib::eval::operation::op2_t; - -static constexpr size_t npos() { return -1; } - -enum class DimCase { - MAPPED_MATCH, CONV_TO_INDEXED -}; - -struct DenseCoords { - std::vector dim_sizes; - size_t total_size; - size_t offset; - size_t current; - DenseCoords(const ValueType &output_type) - : total_size(1), offset(0), current(0) - { - for (const auto & dim : output_type.dimensions()) { - if (dim.is_indexed()) { - dim_sizes.push_back(dim.size); - total_size *= dim.size; - } - } - } - ~DenseCoords(); - void clear() { offset = 0; current = 0; } - void convert_label(vespalib::stringref label) { - uint32_t coord = 0; - for (char c : label) { - if (c < '0' || c > '9') { // bad char - offset = npos(); - break; - } - coord = coord * 10 + (c - '0'); - } - size_t cur_dim_size = dim_sizes[current]; - if (coord < cur_dim_size) { - if (offset != npos()) { - offset *= cur_dim_size; - offset += coord; - } - } else { - offset = npos(); - } - ++current; - } - size_t get_dense_index() const { - assert(current == dim_sizes.size()); - return offset; - } -}; -DenseCoords::~DenseCoords() = default; - -struct SparseCoords { - std::vector addr; - std::vector next_result_refs; - std::vector lookup_refs; - std::vector lookup_view_dims; - SparseCoords(size_t sz) - : addr(sz), next_result_refs(sz), lookup_refs(sz), lookup_view_dims(sz) - { - for (size_t i = 0; i < sz; ++i) { - next_result_refs[i] = &addr[i]; - lookup_refs[i] = &addr[i]; - lookup_view_dims[i] = i; - } - } - ~SparseCoords(); -}; -SparseCoords::~SparseCoords() = default; - -/** - * Helper class that converts a fully-sparse address from the modifier - * tensor into a subset sparse address for the output and an offset - * in the dense subspace. - **/ -struct AddressHandler { - std::vector dimension_plan; - DenseCoords dense_converter; - SparseCoords for_output; - SparseCoords from_modifier; - bool valid; - - AddressHandler(const ValueType &output_type, - const ValueType &modifier_type) - : dimension_plan(), dense_converter(output_type), - for_output(output_type.count_mapped_dimensions()), - from_modifier(modifier_type.count_mapped_dimensions()), - valid(true) - { - if (! modifier_type.is_sparse()) { - LOG(error, "Unexpected non-sparse modifier tensor, type is %s", - modifier_type.to_spec().c_str()); - valid = false; - return; - } - // analyse dimensions - auto visitor = overload { - [&](visit_ranges_either, const auto &) { valid = false; }, - [&](visit_ranges_both, const auto &a, const auto &) { - dimension_plan.push_back(a.is_mapped() ? DimCase::MAPPED_MATCH : DimCase::CONV_TO_INDEXED); - } - }; - const auto & input_dims = output_type.dimensions(); - const auto & modifier_dims = modifier_type.dimensions(); - visit_ranges(visitor, - input_dims.begin(), input_dims.end(), - modifier_dims.begin(), modifier_dims.end(), - [](const auto &a, const auto &b){ return (a.name < b.name); }); - if (! valid) { - LOG(error, "Value type %s does not match modifier type %s (should have same dimensions)", - output_type.to_spec().c_str(), modifier_type.to_spec().c_str()); - return; - } - // implicitly checked above, must hold: - assert(input_dims.size() == modifier_dims.size()); - // the plan should now be fully built: - assert(input_dims.size() == dimension_plan.size()); - } - - void handle_address() - { - dense_converter.clear(); - auto out = for_output.addr.begin(); - for (size_t i = 0; i < dimension_plan.size(); ++i) { - if (dimension_plan[i] == DimCase::CONV_TO_INDEXED) { - dense_converter.convert_label(from_modifier.addr[i]); - } else { - *out++ = from_modifier.addr[i]; - } - } - assert(out == for_output.addr.end()); - assert(dense_converter.current == dense_converter.dim_sizes.size()); - } - - ~AddressHandler(); -}; -AddressHandler::~AddressHandler() = default; - -template -void copy_tensor_with_filter(const Value &input, - size_t dsss, - SparseCoords &addrs, - ValueBuilder &builder, - KeepFun && keep_subspace) -{ - const auto input_cells = input.cells().typify(); - auto input_view = input.index().create_view({}); - input_view->lookup({}); - size_t input_subspace_index; - while (input_view->next_result(addrs.next_result_refs, input_subspace_index)) { - if (keep_subspace(addrs.lookup_refs, input_subspace_index)) { - size_t input_offset = dsss * input_subspace_index; - auto src = input_cells.begin() + input_offset; - auto dst = builder.add_subspace(addrs.addr).begin(); - for (size_t i = 0; i < dsss; ++i) { - dst[i] = src[i]; - } - } - } -} - -template -Value::UP -copy_tensor(const Value &input, const ValueType &input_type, SparseCoords &helper, const ValueBuilderFactory &factory) -{ - const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); - const size_t dsss = input_type.dense_subspace_size(); - const size_t expected_subspaces = input.index().size(); - auto builder = factory.create_value_builder(input_type, num_mapped_in_input, dsss, expected_subspaces); - auto no_filter = [] (const auto &, size_t) { - return true; - }; - copy_tensor_with_filter(input, dsss, helper, *builder, no_filter); - return builder->build(std::move(builder)); -} - -//----------------------------------------------------------------------------- - -struct PerformModify { - template - static Value::UP invoke(const Value &input, - join_fun_t function, - const Value &modifier, - const ValueBuilderFactory &factory); -}; - -template -Value::UP -PerformModify::invoke(const Value &input, join_fun_t function, const Value &modifier, const ValueBuilderFactory &factory) -{ - const ValueType &input_type = input.type(); - const size_t dsss = input_type.dense_subspace_size(); - const ValueType &modifier_type = modifier.type(); - AddressHandler handler(input_type, modifier_type); - if (! handler.valid) { - return {}; - } - // copy input to output - auto out = copy_tensor(input, input_type, handler.for_output, factory); - // need to overwrite some cells - auto output_cells = unconstify(out->cells().template typify()); - const auto modifier_cells = modifier.cells().typify(); - auto modifier_view = modifier.index().create_view({}); - auto lookup_view = out->index().create_view(handler.for_output.lookup_view_dims); - modifier_view->lookup({}); - size_t modifier_subspace_index; - while (modifier_view->next_result(handler.from_modifier.next_result_refs, modifier_subspace_index)) { - handler.handle_address(); - size_t dense_idx = handler.dense_converter.get_dense_index(); - if (dense_idx == npos()) { - continue; - } - lookup_view->lookup(handler.for_output.lookup_refs); - size_t output_subspace_index; - if (lookup_view->next_result({}, output_subspace_index)) { - size_t subspace_offset = dsss * output_subspace_index; - auto dst = output_cells.begin() + subspace_offset; - ICT lhs = dst[dense_idx]; - MCT rhs = modifier_cells[modifier_subspace_index]; - dst[dense_idx] = function(lhs, rhs); - } - } - return out; -} - -//----------------------------------------------------------------------------- - -struct PerformAdd { - template - static Value::UP invoke(const Value &input, - const Value &modifier, - const ValueBuilderFactory &factory); -}; - -template -Value::UP -PerformAdd::invoke(const Value &input, const Value &modifier, const ValueBuilderFactory &factory) -{ - const ValueType &input_type = input.type(); - const ValueType &modifier_type = modifier.type(); - if (input_type.dimensions() != modifier_type.dimensions()) { - LOG(error, "when adding cells to a tensor, dimensions must be equal. " - "Got input type %s != modifier type %s", - input_type.to_spec().c_str(), modifier_type.to_spec().c_str()); - return {}; - } - const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); - const size_t dsss = input_type.dense_subspace_size(); - const size_t expected_subspaces = input.index().size() + modifier.index().size(); - auto builder = factory.create_value_builder(input_type, num_mapped_in_input, dsss, expected_subspaces); - SparseCoords addrs(num_mapped_in_input); - auto lookup_view = input.index().create_view(addrs.lookup_view_dims); - std::vector overwritten(input.index().size(), false); - auto remember_subspaces = [&] (const auto & lookup_refs, size_t) { - lookup_view->lookup(lookup_refs); - size_t input_subspace_index; - if (lookup_view->next_result({}, input_subspace_index)) { - overwritten[input_subspace_index] = true; - } - return true; - }; - copy_tensor_with_filter(modifier, dsss, addrs, *builder, remember_subspaces); - auto filter = [&] (const auto &, size_t input_subspace) { - return ! overwritten[input_subspace]; - }; - copy_tensor_with_filter(input, dsss, addrs, *builder, filter); - return builder->build(std::move(builder)); -} - -//----------------------------------------------------------------------------- - -struct PerformRemove { - template - static Value::UP invoke(const Value &input, - const Value &modifier, - const ValueBuilderFactory &factory); -}; - -/** - * Calculates the indexes of where the mapped modifier dimensions are found in the mapped input dimensions. - * - * The modifier dimensions should be a subset or all of the input dimensions. - * An empty vector is returned on type mismatch. - */ -std::vector -calc_mapped_dimension_indexes(const ValueType& input_type, - const ValueType& modifier_type) -{ - auto input_dims = input_type.mapped_dimensions(); - auto mod_dims = modifier_type.mapped_dimensions(); - if (mod_dims.size() > input_dims.size()) { - return {}; - } - std::vector result(mod_dims.size()); - size_t j = 0; - for (size_t i = 0; i < mod_dims.size(); ++i) { - while ((j < input_dims.size()) && (input_dims[j] != mod_dims[i])) { - ++j; - } - if (j >= input_dims.size()) { - return {}; - } - result[i] = j; - } - return result; -} - -struct ModifierCoords { - - std::vector lookup_refs; - std::vector lookup_view_dims; - - ModifierCoords(const SparseCoords& input_coords, - const std::vector& input_dim_indexes, - const ValueType& modifier_type) - : lookup_refs(modifier_type.dimensions().size()), - lookup_view_dims(modifier_type.dimensions().size()) - { - assert(modifier_type.dimensions().size() == input_dim_indexes.size()); - for (size_t i = 0; i < input_dim_indexes.size(); ++i) { - // Setup the modifier dimensions to point to the matching input dimensions. - lookup_refs[i] = &input_coords.addr[input_dim_indexes[i]]; - lookup_view_dims[i] = i; - } - } - ~ModifierCoords() {} -}; - -template -Value::UP -PerformRemove::invoke(const Value &input, const Value &modifier, const ValueBuilderFactory &factory) -{ - const ValueType &input_type = input.type(); - const ValueType &modifier_type = modifier.type(); - const size_t num_mapped_in_input = input_type.count_mapped_dimensions(); - if (num_mapped_in_input == 0) { - LOG(error, "Cannot remove cells from a dense input tensor of type %s", - input_type.to_spec().c_str()); - return {}; - } - if (modifier_type.count_indexed_dimensions() != 0) { - LOG(error, "Cannot remove cells using a modifier tensor of type %s", - modifier_type.to_spec().c_str()); - return {}; - } - auto input_dim_indexes = calc_mapped_dimension_indexes(input_type, modifier_type); - if (input_dim_indexes.empty()) { - LOG(error, "Tensor type mismatch when removing cells from a tensor. " - "Got input type %s versus modifier type %s", - input_type.to_spec().c_str(), modifier_type.to_spec().c_str()); - return {}; - } - SparseCoords addrs(num_mapped_in_input); - ModifierCoords mod_coords(addrs, input_dim_indexes, modifier_type); - auto modifier_view = modifier.index().create_view(mod_coords.lookup_view_dims); - const size_t expected_subspaces = input.index().size(); - const size_t dsss = input_type.dense_subspace_size(); - auto builder = factory.create_value_builder(input_type, num_mapped_in_input, dsss, expected_subspaces); - auto filter_by_modifier = [&] (const auto & lookup_refs, size_t) { - // The modifier dimensions are setup to point to the input dimensions address storage in ModifierCoords, - // so we don't need to use the lookup_refs argument. - (void) lookup_refs; - modifier_view->lookup(mod_coords.lookup_refs); - size_t modifier_subspace_index; - return !(modifier_view->next_result({}, modifier_subspace_index)); - }; - copy_tensor_with_filter(input, dsss, addrs, *builder, filter_by_modifier); - return builder->build(std::move(builder)); -} - -} // namespace - -//----------------------------------------------------------------------------- - -Value::UP -TensorPartialUpdate::modify(const Value &input, join_fun_t function, - const Value &modifier, const ValueBuilderFactory &factory) -{ - return typify_invoke<2, TypifyCellType, PerformModify>( - input.cells().type, modifier.cells().type, - input, function, modifier, factory); -} - -Value::UP -TensorPartialUpdate::add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory) -{ - return typify_invoke<2, TypifyCellType, PerformAdd>( - input.cells().type, add_cells.cells().type, - input, add_cells, factory); -} - -Value::UP -TensorPartialUpdate::remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory) -{ - return typify_invoke<1, TypifyCellType, PerformRemove>( - input.cells().type, - input, remove_spec, factory); -} - -} // namespace diff --git a/eval/src/vespa/eval/tensor/partial_update.h b/eval/src/vespa/eval/tensor/partial_update.h deleted file mode 100644 index b3e9d32fca8..00000000000 --- a/eval/src/vespa/eval/tensor/partial_update.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include -#include -#include - -namespace vespalib::tensor { - -struct TensorPartialUpdate { - using join_fun_t = vespalib::eval::operation::op2_t; - using Value = vespalib::eval::Value; - using ValueBuilderFactory = vespalib::eval::ValueBuilderFactory; - - /** - * Make a copy of the input, but apply function(oldvalue, modifier.cellvalue) - * to cells which also exist in the "modifier". - * The modifier type must be sparse with exactly the same dimension names - * as the input type. - * Returns null pointer if this constraint is violated. - **/ - static Value::UP modify(const Value &input, join_fun_t function, - const Value &modifier, const ValueBuilderFactory &factory); - - /** - * Make a copy of the input, but add or overwrite cells from add_cells. - * Requires same type for input and add_cells. - * Returns null pointer if this constraint is violated. - **/ - static Value::UP add(const Value &input, const Value &add_cells, const ValueBuilderFactory &factory); - - /** - * Make a copy of the input, but remove cells present in remove_spec. - * The remove_spec must be a sparse tensor, with exactly the mapped dimensions - * that the input value has. - * Cell values in remove_spec are ignored. - * Not valid for dense tensors, since removing cells for those are impossible. - * Returns null pointer if these constraints are violated. - **/ - static Value::UP remove(const Value &input, const Value &remove_spec, const ValueBuilderFactory &factory); -}; - -} // namespace -- cgit v1.2.3