diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-09-29 08:12:07 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-10-01 09:32:18 +0000 |
commit | 920eea97620f6db8b58cbb53def2eea783b2dcbd (patch) | |
tree | 4c7335c8ff4813f5696d9ab3c67facbd13188910 /eval/src | |
parent | cc954b2df294897606a6b1b40da9d5810394a2d8 (diff) |
Implement new Value API in SparseTensor
* new Address -> index mapping in SparseTensorIndex
* extra indirection in SparseTensor
* rename old "apply" utilities -> join
* make a celltype-templated SparseTensorT and its Builder
* add large vector sparse multiply benchmark
* get rid of temporary SparseTensorValue
* handle templated DirectSparseTensorBuilder in searchlib
Diffstat (limited to 'eval/src')
33 files changed, 747 insertions, 686 deletions
diff --git a/eval/src/tests/tensor/default_value_builder_factory/default_value_builder_factory_test.cpp b/eval/src/tests/tensor/default_value_builder_factory/default_value_builder_factory_test.cpp index d180b3f6517..28f8fcc7eb8 100644 --- a/eval/src/tests/tensor/default_value_builder_factory/default_value_builder_factory_test.cpp +++ b/eval/src/tests/tensor/default_value_builder_factory/default_value_builder_factory_test.cpp @@ -5,7 +5,7 @@ #include <vespa/eval/eval/tensor_spec.h> #include <vespa/eval/tensor/default_value_builder_factory.h> #include <vespa/eval/tensor/mixed/packed_mixed_tensor.h> -#include <vespa/eval/tensor/sparse/sparse_tensor_value.h> +#include <vespa/eval/tensor/sparse/sparse_tensor.h> #include <vespa/eval/tensor/dense/dense_tensor.h> #include <vespa/vespalib/gtest/gtest.h> @@ -28,7 +28,7 @@ TEST(DefaultValueBuilderFactoryTest, all_built_value_types_are_correct) { EXPECT_TRUE(dynamic_cast<DoubleValue *>(dbl.get())); EXPECT_TRUE(dynamic_cast<DenseTensorView *>(trivial.get())); EXPECT_TRUE(dynamic_cast<DenseTensorView *>(dense.get())); - EXPECT_TRUE(dynamic_cast<SparseTensorValue<double> *>(sparse.get())); + EXPECT_TRUE(dynamic_cast<SparseTensor *>(sparse.get())); EXPECT_TRUE(dynamic_cast<PackedMixedTensor *>(mixed.get())); EXPECT_EQ(dbl->as_double(), 3.0); diff --git a/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp b/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp index e4640cf2c6a..a0883ccfa4b 100644 --- a/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp +++ b/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp @@ -13,7 +13,7 @@ using vespalib::eval::ValueType; void assertCellValue(double expValue, const TensorAddress &address, const ValueType &type, - const SparseTensor::Cells &cells) + const SparseTensor &tensor) { SparseTensorAddressBuilder addressBuilder; auto dimsItr = type.dimensions().cbegin(); @@ -32,15 +32,20 @@ assertCellValue(double expValue, const TensorAddress &address, ++dimsItr; } SparseTensorAddressRef addressRef(addressBuilder.getAddressRef()); - auto itr = cells.find(addressRef); - EXPECT_FALSE(itr == cells.end()); - EXPECT_EQUAL(expValue, itr->second); + size_t idx; + bool found = tensor.index().lookup_address(addressRef, idx); + EXPECT_TRUE(found); + auto cells = tensor.cells(); + if (EXPECT_TRUE(cells.type == CellType::DOUBLE)) { + auto arr = cells.typify<double>(); + EXPECT_EQUAL(expValue, arr[idx]); + } } Tensor::UP buildTensor() { - DirectSparseTensorBuilder builder(ValueType::from_spec("tensor(a{},b{},c{},d{})")); + DirectSparseTensorBuilder<double> builder(ValueType::from_spec("tensor(a{},b{},c{},d{})")); SparseTensorAddressBuilder address; address.set({"1", "2", "", ""}); builder.insertCell(address, 10); @@ -54,10 +59,10 @@ TEST("require that tensor can be constructed") Tensor::UP tensor = buildTensor(); const SparseTensor &sparseTensor = dynamic_cast<const SparseTensor &>(*tensor); const ValueType &type = sparseTensor.type(); - const SparseTensor::Cells &cells = sparseTensor.my_cells(); - EXPECT_EQUAL(2u, cells.size()); - assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}), type, cells); - assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}), type, cells); + const auto & index = sparseTensor.index(); + EXPECT_EQUAL(2u, index.size()); + assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}), type, sparseTensor); + assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}), type, sparseTensor); } TEST("require that tensor can be converted to tensor spec") diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp index 31777e233f6..4771034902b 100644 --- a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp +++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp @@ -340,6 +340,12 @@ TEST(SparseJoin, small_vectors) { benchmark_join("small sparse vector multiply", lhs, rhs, operation::Mul::f); } +TEST(SparseJoin, large_vectors) { + auto lhs = make_vector(D::map("x", 1800, 1), 1.0); + auto rhs = make_vector(D::map("x", 1000, 2), 2.0); + benchmark_join("large sparse vector multiply", lhs, rhs, operation::Mul::f); +} + TEST(SparseJoin, full_overlap) { auto lhs = make_cube(D::map("a", 16, 1), D::map("b", 16, 1), D::map("c", 16, 1), 1.0); auto rhs = make_cube(D::map("a", 16, 2), D::map("b", 16, 2), D::map("c", 16, 2), 2.0); diff --git a/eval/src/vespa/eval/tensor/CMakeLists.txt b/eval/src/vespa/eval/tensor/CMakeLists.txt index 810dfd6d0b3..79f6f7e2a4f 100644 --- a/eval/src/vespa/eval/tensor/CMakeLists.txt +++ b/eval/src/vespa/eval/tensor/CMakeLists.txt @@ -5,6 +5,5 @@ vespa_add_library(eval_tensor OBJECT default_value_builder_factory.cpp tensor.cpp tensor_address.cpp - tensor_apply.cpp wrapped_simple_tensor.cpp ) diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp index ca14e40e4d0..7d4bff21380 100644 --- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp +++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp @@ -193,6 +193,26 @@ struct CallDenseTensorBuilder { } }; +struct CallSparseTensorBuilder { + template <typename CT> + static Value::UP + invoke(const ValueType &type, const TensorSpec &spec) + { + DirectSparseTensorBuilder<CT> builder(type); + builder.reserve(spec.cells().size()); + SparseTensorAddressBuilder address_builder; + for (const auto &cell: spec.cells()) { + const auto &address = cell.first; + if (build_cell_address(type, address, address_builder)) { + builder.insertCell(address_builder, cell.second); + } else { + bad_spec(spec); + } + } + return builder.build(); + } +}; + using MyTypify = eval::TypifyCellType; Value::UP @@ -207,17 +227,7 @@ DefaultTensorEngine::from_spec(const TensorSpec &spec) const } else if (type.is_dense()) { return typify_invoke<1,MyTypify,CallDenseTensorBuilder>(type.cell_type(), type, spec); } else if (type.is_sparse()) { - DirectSparseTensorBuilder builder(type); - SparseTensorAddressBuilder address_builder; - for (const auto &cell: spec.cells()) { - const auto &address = cell.first; - if (build_cell_address(type, address, address_builder)) { - builder.insertCell(address_builder, cell.second); - } else { - bad_spec(spec); - } - } - return builder.build(); + return typify_invoke<1,MyTypify,CallSparseTensorBuilder>(type.cell_type(), type, spec); } return std::make_unique<WrappedSimpleTensor>(eval::SimpleTensor::create(spec)); } diff --git a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp index 06e3f63c8da..3b542621295 100644 --- a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp +++ b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp @@ -99,7 +99,7 @@ size_t encodeCells(nbostream &stream, const Tensor &tensor, CellType cell_type) } template<typename T> -void decodeCells(nbostream &stream, size_t dimensionsSize, size_t cellsSize, DirectSparseTensorBuilder &builder) { +void decodeCells(nbostream &stream, size_t dimensionsSize, size_t cellsSize, DirectSparseTensorBuilder<T> &builder) { T cellValue = 0.0; vespalib::string str; SparseTensorAddressBuilder address; @@ -118,17 +118,6 @@ void decodeCells(nbostream &stream, size_t dimensionsSize, size_t cellsSize, Dir } } -void decodeCells(CellType cell_type, nbostream &stream, size_t dimensionsSize, size_t cellsSize, DirectSparseTensorBuilder &builder) { - switch (cell_type) { - case CellType::DOUBLE: - decodeCells<double>(stream, dimensionsSize, cellsSize, builder); - break; - case CellType::FLOAT: - decodeCells<float>(stream, dimensionsSize, cellsSize, builder); - break; - } -} - } void @@ -152,11 +141,19 @@ SparseBinaryFormat::deserialize(nbostream &stream, CellType cell_type) stream.readSmallString(str); dimensions.emplace_back(str); } - ValueType type = ValueType::tensor_type(std::move(dimensions), cell_type); - DirectSparseTensorBuilder builder(type); size_t cellsSize = stream.getInt1_4Bytes(); - decodeCells(cell_type, stream, dimensionsSize, cellsSize, builder); - return builder.build(); + ValueType type = ValueType::tensor_type(std::move(dimensions), cell_type); + switch (cell_type) { + case CellType::DOUBLE: { + DirectSparseTensorBuilder<double> builder(type); + decodeCells<double>(stream, dimensionsSize, cellsSize, builder); + return builder.build(); } + case CellType::FLOAT: { + DirectSparseTensorBuilder<float> builder(type); + decodeCells<float>(stream, dimensionsSize, cellsSize, builder); + return builder.build(); } + } + abort(); } -} +} // namespace diff --git a/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt b/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt index 91c609a59b7..45baefe24c3 100644 --- a/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt +++ b/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt @@ -3,14 +3,15 @@ vespa_add_library(eval_tensor_sparse OBJECT SOURCES direct_sparse_tensor_builder.cpp sparse_tensor.cpp + sparse_tensor_t.cpp sparse_tensor_add.cpp sparse_tensor_address_builder.cpp sparse_tensor_address_combiner.cpp sparse_tensor_address_reducer.cpp sparse_tensor_address_ref.cpp + sparse_tensor_index.cpp sparse_tensor_match.cpp sparse_tensor_modify.cpp sparse_tensor_remove.cpp - sparse_tensor_value.cpp sparse_tensor_value_builder.cpp ) diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp index c47521e702d..4e8d2fda7cb 100644 --- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp +++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp @@ -1,50 +1,46 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "direct_sparse_tensor_builder.h" +#include "sparse_tensor_t.h" +#include <type_traits> namespace vespalib::tensor { -void -DirectSparseTensorBuilder::copyCells(const Cells &cells_in) -{ - for (const auto &cell : cells_in) { - SparseTensorAddressRef oldRef = cell.first; - SparseTensorAddressRef newRef(oldRef, _stash); - _cells[newRef] = cell.second; - } -} - -DirectSparseTensorBuilder::DirectSparseTensorBuilder() - : _stash(SparseTensor::STASH_CHUNK_SIZE), - _type(eval::ValueType::double_type()), - _cells() -{ -} - -DirectSparseTensorBuilder::DirectSparseTensorBuilder(const eval::ValueType &type_in) - : _stash(SparseTensor::STASH_CHUNK_SIZE), - _type(type_in), - _cells() +template<typename T> +DirectSparseTensorBuilder<T>::DirectSparseTensorBuilder() + : _type(eval::ValueType::double_type()), + _index(0), + _values() { + assert((std::is_same_v<T,double>)); } -DirectSparseTensorBuilder::DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in) - : _stash(SparseTensor::STASH_CHUNK_SIZE), - _type(type_in), - _cells() +template<typename T> +DirectSparseTensorBuilder<T>::DirectSparseTensorBuilder(const eval::ValueType &type_in) + : _type(type_in), + _index(_type.count_mapped_dimensions()), + _values() { - copyCells(cells_in); } -DirectSparseTensorBuilder::~DirectSparseTensorBuilder() = default; +template<typename T> +DirectSparseTensorBuilder<T>::~DirectSparseTensorBuilder() = default; +template<typename T> Tensor::UP -DirectSparseTensorBuilder::build() { - return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash)); +DirectSparseTensorBuilder<T>::build() { + using tt = SparseTensorT<T>; + return std::make_unique<tt>(std::move(_type), std::move(_index), std::move(_values)); } -void DirectSparseTensorBuilder::reserve(uint32_t estimatedCells) { - _cells.resize(estimatedCells*2); +template<typename T> +void +DirectSparseTensorBuilder<T>::reserve(uint32_t estimatedCells) { + _index.reserve(estimatedCells); + _values.reserve(estimatedCells); } -}
\ No newline at end of file +template class DirectSparseTensorBuilder<float>; +template class DirectSparseTensorBuilder<double>; + +} diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h index bcb22c0761d..c46ae5b9819 100644 --- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h +++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h @@ -12,56 +12,56 @@ namespace vespalib::tensor { * Utility class to build tensors of type SparseTensor, to be used by * tensor operations. */ +template<typename T> class DirectSparseTensorBuilder { public: - using Cells = SparseTensor::Cells; using AddressBuilderType = SparseTensorAddressBuilder; using AddressRefType = SparseTensorAddressRef; private: - Stash _stash; eval::ValueType _type; - Cells _cells; + SparseTensorIndex _index; + std::vector<T> _values; public: - void copyCells(const Cells &cells_in); DirectSparseTensorBuilder(); DirectSparseTensorBuilder(const eval::ValueType &type_in); - DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in); ~DirectSparseTensorBuilder(); Tensor::UP build(); template <class Function> - void insertCell(SparseTensorAddressRef address, double value, Function &&func) + void insertCell(SparseTensorAddressRef address, T value, Function &&func) { - auto res = _cells.insert(std::make_pair(address, value)); - if (res.second) { - // Replace key with own copy - res.first->first = SparseTensorAddressRef(address, _stash); + size_t idx; + if (_index.lookup_address(address, idx)) { + _values[idx] = func(_values[idx], value); } else { - res.first->second = func(res.first->second, value); + idx = _index.lookup_or_add(address); + assert(idx == _values.size()); + _values.push_back(value); } } - void insertCell(SparseTensorAddressRef address, double value) { + void insertCell(SparseTensorAddressRef address, T value) { // This address should not already exist and a new cell should be inserted. - insertCell(address, value, [](double, double) -> double { HDR_ABORT("should not be reached"); }); + _index.add_address(address); + _values.push_back(value); } template <class Function> - void insertCell(SparseTensorAddressBuilder &address, double value, Function &&func) { + void insertCell(SparseTensorAddressBuilder &address, T value, Function &&func) { insertCell(address.getAddressRef(), value, func); } - void insertCell(SparseTensorAddressBuilder &address, double value) { + void insertCell(SparseTensorAddressBuilder &address, T value) { // This address should not already exist and a new cell should be inserted. - insertCell(address.getAddressRef(), value, [](double, double) -> double { HDR_ABORT("should not be reached"); }); + insertCell(address.getAddressRef(), value); } eval::ValueType &fast_type() { return _type; } - Cells &cells() { return _cells; } + void reserve(uint32_t estimatedCells); }; diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp index 98a20cd9630..bd6c2b28157 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp @@ -3,16 +3,14 @@ #include "sparse_tensor.h" #include "sparse_tensor_add.h" #include "sparse_tensor_address_builder.h" -#include "sparse_tensor_apply.hpp" +#include "sparse_tensor_join.hpp" #include "sparse_tensor_match.h" -#include "sparse_tensor_modify.h" -#include "sparse_tensor_reduce.hpp" -#include "sparse_tensor_remove.h" #include "direct_sparse_tensor_builder.h" +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/value_codec.h> #include <vespa/eval/eval/operation.h> #include <vespa/eval/tensor/cell_values.h> #include <vespa/eval/tensor/tensor_address_builder.h> -#include <vespa/eval/tensor/tensor_apply.h> #include <vespa/eval/tensor/tensor_visitor.h> #include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/stllike/hash_map_equal.hpp> @@ -25,51 +23,46 @@ using vespalib::eval::TensorSpec; namespace vespalib::tensor { -namespace { - -using Cells = SparseTensor::Cells; - -void -copyCells(Cells &cells, const Cells &cells_in, Stash &stash) -{ - // copy the exact hashtable structure: - cells = cells_in; - // copy the actual contents of the addresses, - // and update the pointers inside the hashtable - // keys so they point to our copy: - for (auto &cell : cells) { - SparseTensorAddressRef oldRef = cell.first; - SparseTensorAddressRef newRef(oldRef, stash); - cell.first = newRef; - } -} - -} - -SparseTensor::SparseTensor(const eval::ValueType &type_in, const Cells &cells_in) - : _type(type_in), - _cells(), - _stash(STASH_CHUNK_SIZE) -{ - copyCells(_cells, cells_in, _stash); -} - - -SparseTensor::SparseTensor(eval::ValueType &&type_in, Cells &&cells_in, Stash &&stash_in) +SparseTensor::SparseTensor(eval::ValueType type_in, SparseTensorIndex index_in) : _type(std::move(type_in)), - _cells(std::move(cells_in)), - _stash(std::move(stash_in)) -{ } + _index(std::move(index_in)) +{} SparseTensor::~SparseTensor() = default; +struct CompareValues { + template <typename T> + static bool invoke(const SparseTensor &lhs_in, + const SparseTensor &rhs_in) + { + auto & lhs = static_cast<const SparseTensorT<T> &>(lhs_in); + auto & rhs = static_cast<const SparseTensorT<T> &>(rhs_in); + auto lhs_cells = lhs.cells().template typify<T>(); + auto rhs_cells = rhs.cells().template typify<T>(); + size_t rhs_idx; + for (const auto & kv : lhs.index().get_map()) { + if (rhs.index().lookup_address(kv.first, rhs_idx)) { + size_t lhs_idx = kv.second; + if (lhs_cells[lhs_idx] != rhs_cells[rhs_idx]) { + return false; + } + } else { + return false; + } + } + return true; + } +}; + bool SparseTensor::operator==(const SparseTensor &rhs) const { - return _type == rhs._type && _cells == rhs._cells; + if (fast_type() == rhs.fast_type() && my_size() == rhs.my_size()) { + return typify_invoke<1,eval::TypifyCellType,CompareValues>(_type.cell_type(), *this, rhs); + } + return false; } - eval::ValueType SparseTensor::combineDimensionsWith(const SparseTensor &rhs) const { @@ -82,20 +75,6 @@ SparseTensor::type() const return _type; } -double -SparseTensor::as_double() const -{ - double result = 0.0; - _cells.for_each([&result](const auto & v) { result += v.second; }); - return result; -} - -Tensor::UP -SparseTensor::apply(const CellFunction &func) const -{ - return TensorApply<SparseTensor>(*this, func).result(); -} - bool SparseTensor::equals(const Tensor &arg) const { @@ -106,175 +85,13 @@ SparseTensor::equals(const Tensor &arg) const return *this == *rhs; } -Tensor::UP -SparseTensor::clone() const -{ - size_t mem_use = _stash.get_memory_usage().usedBytes(); - if (mem_use < (STASH_CHUNK_SIZE / 4)) { - size_t aligned_size = (mem_use + 63) & ~(sizeof(char *) - 1); - Stash stash_copy(aligned_size); - Cells cells_copy; - copyCells(cells_copy, _cells, stash_copy); - if (stash_copy.get_memory_usage().allocatedBytes() * 2 > STASH_CHUNK_SIZE) { - LOG(warning, "shrink failed, %zu bytes -> chunksize %zu -> allocated %zu", - mem_use, aligned_size, stash_copy.get_memory_usage().allocatedBytes()); - } - eval::ValueType type_copy = _type; - return std::make_unique<SparseTensor>(std::move(type_copy), - std::move(cells_copy), - std::move(stash_copy)); - } - return std::make_unique<SparseTensor>(_type, _cells); -} - -namespace { - -void -buildAddress(const eval::ValueType &type, - SparseTensorAddressDecoder &decoder, - TensorSpec::Address &address) -{ - for (const auto &dimension : type.dimensions()) { - auto label = decoder.decodeLabel(); - address.emplace(std::make_pair(dimension.name, TensorSpec::Label(label))); - } - assert(!decoder.valid()); -} - -} - TensorSpec SparseTensor::toSpec() const { - TensorSpec result(type().to_spec()); - TensorSpec::Address address; - for (const auto &cell : _cells) { - SparseTensorAddressDecoder decoder(cell.first); - buildAddress(_type, decoder, address); - result.add(address, cell.second); - address.clear(); - } - if (_type.dimensions().empty() && _cells.empty()) { - result.add(address, 0.0); - } - return result; -} - -void -SparseTensor::accept(TensorVisitor &visitor) const -{ - TensorAddressBuilder addrBuilder; - TensorAddress addr; - for (const auto &cell : _cells) { - SparseTensorAddressDecoder decoder(cell.first); - addrBuilder.clear(); - for (const auto &dimension : _type.dimensions()) { - auto label = decoder.decodeLabel(); - if (label.size() != 0u) { - addrBuilder.add(dimension.name, label); - } - } - assert(!decoder.valid()); - addr = addrBuilder.build(); - visitor.visit(addr, cell.second); - } -} - -Tensor::UP -SparseTensor::join(join_fun_t function, const Tensor &arg) const -{ - const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); - if (!rhs) { - return Tensor::UP(); - } - if (function == eval::operation::Mul::f) { - if (fast_type() == rhs->fast_type()) { - return SparseTensorMatch(*this, *rhs).result(); - } else { - return sparse::apply(*this, *rhs, [](double lhsValue, double rhsValue) - { return lhsValue * rhsValue; }); - } - } - return sparse::apply(*this, *rhs, function); -} - -Tensor::UP -SparseTensor::merge(join_fun_t function, const Tensor &arg) const -{ - const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); - assert(rhs && (fast_type().dimensions() == rhs->fast_type().dimensions())); - DirectSparseTensorBuilder builder(eval::ValueType::merge(fast_type(), rhs->fast_type())); - builder.reserve(my_cells().size() + rhs->my_cells().size()); - for (const auto &cell: my_cells()) { - auto pos = rhs->my_cells().find(cell.first); - if (pos == rhs->my_cells().end()) { - builder.insertCell(cell.first, cell.second); - } else { - builder.insertCell(cell.first, function(cell.second, pos->second)); - } - } - for (const auto &cell: rhs->my_cells()) { - auto pos = my_cells().find(cell.first); - if (pos == my_cells().end()) { - builder.insertCell(cell.first, cell.second); - } - } - return builder.build(); -} - -Tensor::UP -SparseTensor::reduce(join_fun_t op, - const std::vector<vespalib::string> &dimensions) const -{ - return sparse::reduce(*this, dimensions, op); + return vespalib::eval::spec_from_value(*this); } -std::unique_ptr<Tensor> -SparseTensor::modify(join_fun_t op, const CellValues &cellValues) const -{ - Stash stash; - Cells cells; - copyCells(cells, _cells, stash); - SparseTensorModify modifier(op, _type, std::move(stash), std::move(cells)); - cellValues.accept(modifier); - return modifier.build(); -} -std::unique_ptr<Tensor> -SparseTensor::add(const Tensor &arg) const -{ - const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); - if (!rhs) { - return Tensor::UP(); - } - Cells cells; - Stash stash; - copyCells(cells, _cells, stash); - SparseTensorAdd adder(_type, std::move(cells), std::move(stash)); - rhs->accept(adder); - return adder.build(); -} - -std::unique_ptr<Tensor> -SparseTensor::remove(const CellValues &cellAddresses) const -{ - Cells cells; - Stash stash; - copyCells(cells, _cells, stash); - SparseTensorRemove remover(_type, std::move(cells), std::move(stash)); - cellAddresses.accept(remover); - return remover.build(); -} - -MemoryUsage -SparseTensor::get_memory_usage() const -{ - MemoryUsage result = _stash.get_memory_usage(); - size_t plus = sizeof(SparseTensor) + _cells.getMemoryConsumption(); - result.incUsedBytes(plus); - result.incAllocatedBytes(plus); // should probably be even more - return result; -} } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h index 002e0dac0ef..4093700b334 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h @@ -2,14 +2,12 @@ #pragma once -#include "sparse_tensor_address_ref.h" +#include "sparse_tensor_index.h" #include <vespa/eval/tensor/cell_function.h> #include <vespa/eval/tensor/tensor.h> #include <vespa/eval/tensor/tensor_address.h> #include <vespa/eval/tensor/types.h> -#include <vespa/vespalib/stllike/hash_map.h> #include <vespa/vespalib/stllike/string.h> -#include <vespa/vespalib/util/stash.h> namespace vespalib::tensor { @@ -20,42 +18,22 @@ namespace vespalib::tensor { */ class SparseTensor : public Tensor { -public: - using Cells = hash_map<SparseTensorAddressRef, double, hash<SparseTensorAddressRef>, - std::equal_to<>, hashtable_base::and_modulator>; - - static constexpr size_t STASH_CHUNK_SIZE = 16384u; - private: eval::ValueType _type; - Cells _cells; - Stash _stash; + SparseTensorIndex _index; public: - explicit SparseTensor(const eval::ValueType &type_in, const Cells &cells_in); - SparseTensor(eval::ValueType &&type_in, Cells &&cells_in, Stash &&stash_in); - TypedCells cells() const override { abort(); } - const Index &index() const override { abort(); } + SparseTensor(eval::ValueType type_in, SparseTensorIndex index_in); ~SparseTensor() override; - const Cells &my_cells() const { return _cells; } + size_t my_size() const { return _index.get_map().size(); } + const SparseTensorIndex &index() const override { return _index; } const eval::ValueType &fast_type() const { return _type; } bool operator==(const SparseTensor &rhs) const; eval::ValueType combineDimensionsWith(const SparseTensor &rhs) const; const eval::ValueType &type() const override; - double as_double() const override; - Tensor::UP apply(const CellFunction &func) const override; - Tensor::UP join(join_fun_t function, const Tensor &arg) const override; - Tensor::UP merge(join_fun_t function, const Tensor &arg) const override; - Tensor::UP reduce(join_fun_t op, const std::vector<vespalib::string> &dimensions) const override; - std::unique_ptr<Tensor> modify(join_fun_t op, const CellValues &cellValues) const override; - std::unique_ptr<Tensor> add(const Tensor &arg) const override; - std::unique_ptr<Tensor> remove(const CellValues &cellAddresses) const override; bool equals(const Tensor &arg) const override; - Tensor::UP clone() const override; eval::TensorSpec toSpec() const override; - void accept(TensorVisitor &visitor) const override; - MemoryUsage get_memory_usage() const override; }; } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp index 4503787e00e..6c2e7241856 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp @@ -1,33 +1,46 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "sparse_tensor_add.h" +#include "sparse_tensor_t.h" namespace vespalib::tensor { -SparseTensorAdd::SparseTensorAdd(const eval::ValueType &type, Cells &&cells, Stash &&stash) - : _type(type), - _cells(std::move(cells)), - _stash(std::move(stash)), +template<typename T> +SparseTensorAdd<T>::SparseTensorAdd(eval::ValueType type, SparseTensorIndex index, std::vector<T> values) + : _type(std::move(type)), + _index(std::move(index)), + _values(std::move(values)), _addressBuilder() { } -SparseTensorAdd::~SparseTensorAdd() = default; +template<typename T> +SparseTensorAdd<T>::~SparseTensorAdd() = default; +template<typename T> void -SparseTensorAdd::visit(const TensorAddress &address, double value) +SparseTensorAdd<T>::visit(const TensorAddress &address, double value) { _addressBuilder.populate(_type, address); auto addressRef = _addressBuilder.getAddressRef(); - // Make a persistent copy of the tensor address (owned by _stash) as the cell to insert might not already exist. - auto persistentAddress = SparseTensorAddressRef(addressRef, _stash); - _cells[persistentAddress] = value; + size_t idx = _index.lookup_or_add(addressRef); + if (idx < _values.size()) { + _values[idx] = value; + } else { + assert(idx == _values.size()); + _values.push_back(value); + } } +template<typename T> std::unique_ptr<Tensor> -SparseTensorAdd::build() +SparseTensorAdd<T>::build() { - return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash)); + using tt = SparseTensorT<T>; + return std::make_unique<tt>(std::move(_type), _index, std::move(_values)); } +template class SparseTensorAdd<float>; +template class SparseTensorAdd<double>; + } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.h index 8adc95adf35..7baea13440a 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.h @@ -14,16 +14,15 @@ namespace vespalib::tensor { * Creates a new tensor by adding the cells of the argument tensor to this tensor. * Existing cell values are overwritten. */ +template<typename T> class SparseTensorAdd : public TensorVisitor { - using Cells = SparseTensor::Cells; - eval::ValueType _type; - Cells _cells; - Stash _stash; + eval::ValueType _type; + SparseTensorIndex _index; + std::vector<T> _values; SparseTensorAddressBuilder _addressBuilder; - public: - SparseTensorAdd(const eval::ValueType &type, Cells &&cells, Stash &&stash); + SparseTensorAdd(eval::ValueType type, SparseTensorIndex index, std::vector<T> values); ~SparseTensorAdd(); void visit(const TensorAddress &address, double value) override; std::unique_ptr<Tensor> build(); diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp deleted file mode 100644 index 8d46e88ca72..00000000000 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "sparse_tensor_apply.h" -#include "sparse_tensor_address_combiner.h" -#include "direct_sparse_tensor_builder.h" - -namespace vespalib::tensor::sparse { - -template <typename Function> -std::unique_ptr<Tensor> -apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func) -{ - DirectSparseTensorBuilder builder(lhs.combineDimensionsWith(rhs)); - TensorAddressCombiner addressCombiner(lhs.fast_type(), rhs.fast_type()); - size_t estimatedCells = (lhs.my_cells().size() * rhs.my_cells().size()); - if (addressCombiner.numOverlappingDimensions() != 0) { - estimatedCells = std::min(lhs.my_cells().size(), rhs.my_cells().size()); - } - builder.reserve(estimatedCells*2); - for (const auto &lhsCell : lhs.my_cells()) { - for (const auto &rhsCell : rhs.my_cells()) { - bool combineSuccess = addressCombiner.combine(lhsCell.first, rhsCell.first); - if (combineSuccess) { - builder.insertCell(addressCombiner.getAddressRef(), - func(lhsCell.second, rhsCell.second)); - } - } - } - return builder.build(); -} - -} diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp index 62e3c786262..275acb51af3 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp @@ -1,23 +1,19 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "sparse_tensor_value.h" +#include "sparse_tensor_index.h" #include "sparse_tensor_address_builder.h" #include "sparse_tensor_address_decoder.h" - #include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/stllike/hash_map_equal.hpp> -#include <vespa/log/log.h> -LOG_SETUP(".eval.tensor.sparse.sparse_tensor_value"); - namespace vespalib::tensor { -using SubspaceMap = SparseTensorValueIndex::SubspaceMap; +using IndexMap = SparseTensorIndex::IndexMap; using View = vespalib::eval::Value::Index::View; namespace { -void copyMap(SubspaceMap &map, const SubspaceMap &map_in, Stash &to_stash) { +void copyMap(IndexMap &map, const IndexMap &map_in, Stash &to_stash) { // copy the exact hashtable structure: map = map_in; // copy the actual contents of the addresses, @@ -30,26 +26,17 @@ void copyMap(SubspaceMap &map, const SubspaceMap &map_in, Stash &to_stash) { } } -template<typename T> -size_t needed_memory_for(const SubspaceMap &map, ConstArrayRef<T> cells) { - size_t needs = cells.size() * sizeof(T); - for (const auto & kv : map) { - needs += kv.first.size(); - } - return needs; -} - //----------------------------------------------------------------------------- class SparseTensorValueView : public View { private: - const SubspaceMap ↦ - SubspaceMap::const_iterator iter; + const IndexMap ↦ + IndexMap::const_iterator iter; const std::vector<size_t> lookup_dims; std::vector<vespalib::stringref> lookup_refs; public: - SparseTensorValueView(const SubspaceMap & map_in, + SparseTensorValueView(const IndexMap & map_in, const std::vector<size_t> &dims) : map(map_in), iter(map.end()), lookup_dims(dims), lookup_refs() {} ~SparseTensorValueView(); @@ -116,10 +103,10 @@ SparseTensorValueView::next_result(const std::vector<vespalib::stringref*> &addr class SparseTensorValueLookup : public View { private: - const SubspaceMap ↦ - SubspaceMap::const_iterator iter; + const IndexMap ↦ + IndexMap::const_iterator iter; public: - SparseTensorValueLookup(const SubspaceMap & map_in) : map(map_in), iter(map.end()) {} + SparseTensorValueLookup(const IndexMap & map_in) : map(map_in), iter(map.end()) {} ~SparseTensorValueLookup(); void lookup(const std::vector<const vespalib::stringref*> &addr) override; bool next_result(const std::vector<vespalib::stringref*> &addr_out, size_t &idx_out) override; @@ -154,10 +141,10 @@ SparseTensorValueLookup::next_result(const std::vector<vespalib::stringref*> &, class SparseTensorValueAllMappings : public View { private: - const SubspaceMap ↦ - SubspaceMap::const_iterator iter; + const IndexMap ↦ + IndexMap::const_iterator iter; public: - SparseTensorValueAllMappings(const SubspaceMap & map_in) : map(map_in), iter(map.end()) {} + SparseTensorValueAllMappings(const IndexMap & map_in) : map(map_in), iter(map.end()) {} ~SparseTensorValueAllMappings(); void lookup(const std::vector<const vespalib::stringref*> &addr) override; bool next_result(const std::vector<vespalib::stringref*> &addr_out, size_t &idx_out) override; @@ -192,23 +179,45 @@ SparseTensorValueAllMappings::next_result(const std::vector<vespalib::stringref* //----------------------------------------------------------------------------- -SparseTensorValueIndex::SparseTensorValueIndex(size_t num_mapped_in) - : _stash(), _map(), _num_mapped_dims(num_mapped_in) {} +size_t +SparseTensorIndex::needed_memory_for(const SparseTensorIndex &other) { + auto mem = other._stash.get_memory_usage(); + size_t mem_use = mem.usedBytes(); + if (mem_use == 0) { + return STASH_CHUNK_SIZE; + } + if (mem_use < (STASH_CHUNK_SIZE / 4)) { + size_t avg_per_addr = mem_use / other.size(); + mem_use = std::max(mem_use, (7 * avg_per_addr)); + size_t aligned_size = (mem_use + 63) & ~(sizeof(char *) - 1); + return aligned_size; + } + return STASH_CHUNK_SIZE; +} + +SparseTensorIndex::SparseTensorIndex(size_t num_mapped_in) + : _stash(STASH_CHUNK_SIZE), _map(), _num_mapped_dims(num_mapped_in) +{} -SparseTensorValueIndex::SparseTensorValueIndex(const SparseTensorValueIndex & index_in) - : _stash(), _map(), _num_mapped_dims(index_in._num_mapped_dims) +SparseTensorIndex::SparseTensorIndex(const SparseTensorIndex & index_in) + : _stash(needed_memory_for(index_in)), _map(), _num_mapped_dims(index_in._num_mapped_dims) { copyMap(_map, index_in._map, _stash); } -SparseTensorValueIndex::~SparseTensorValueIndex() = default; +void +SparseTensorIndex::reserve(size_t estimate) { + _map.resize(2*estimate); +} + +SparseTensorIndex::~SparseTensorIndex() = default; -size_t SparseTensorValueIndex::size() const { +size_t SparseTensorIndex::size() const { return _map.size(); } std::unique_ptr<View> -SparseTensorValueIndex::create_view(const std::vector<size_t> &dims) const +SparseTensorIndex::create_view(const std::vector<size_t> &dims) const { if (dims.size() == _num_mapped_dims) { return std::make_unique<SparseTensorValueLookup>(_map); @@ -220,38 +229,49 @@ SparseTensorValueIndex::create_view(const std::vector<size_t> &dims) const } void -SparseTensorValueIndex::add_subspace(SparseTensorAddressRef tmp_ref, size_t idx) +SparseTensorIndex::add_address(SparseTensorAddressRef tmp_ref) { SparseTensorAddressRef ref(tmp_ref, _stash); - assert(_map.find(ref) == _map.end()); - assert(_map.size() == idx); - _map[ref] = idx; + size_t idx = _map.size(); + auto insert_result = _map.insert({ref, idx}); + assert(insert_result.second); } - -//----------------------------------------------------------------------------- - -template<typename T> -SparseTensorValue<T>::SparseTensorValue(const eval::ValueType &type_in, - const SparseTensorValueIndex &index_in, - const std::vector<T> &cells_in) - : _type(type_in), - _index(index_in), - _cells(cells_in) + +size_t +SparseTensorIndex::lookup_or_add(SparseTensorAddressRef tmp_ref) { + auto [map_iter, was_inserted] = _map.insert({tmp_ref, _map.size()}); + if (was_inserted) { + // we must copy the memory tmp_ref refers to into our own stash: + SparseTensorAddressRef ref(tmp_ref, _stash); + // and update the key in the map, just like copyMap() does. + map_iter->first = ref; + } + return map_iter->second; } -template<typename T> -SparseTensorValue<T>::SparseTensorValue(eval::ValueType &&type_in, SparseTensorValueIndex &&index_in, std::vector<T> &&cells_in) - : _type(std::move(type_in)), - _index(std::move(index_in)), - _cells(std::move(cells_in)) +bool +SparseTensorIndex::lookup_address(SparseTensorAddressRef ref, size_t &idx) const { + auto iter = _map.find(ref); + if (iter != _map.end()) { + idx = iter->second; + return true; + } + idx = size_t(-1); + return false; } -template<typename T> SparseTensorValue<T>::~SparseTensorValue() = default; +MemoryUsage +SparseTensorIndex::get_memory_usage() const +{ + MemoryUsage mem = _stash.get_memory_usage(); + size_t plus = _map.getMemoryConsumption(); + mem.incUsedBytes(plus); + mem.incAllocatedBytes(plus); + return mem; +} -template class SparseTensorValue<float>; -template class SparseTensorValue<double>; //----------------------------------------------------------------------------- diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h new file mode 100644 index 00000000000..c30bcf4732b --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h @@ -0,0 +1,45 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sparse_tensor_address_ref.h" +#include <vespa/eval/eval/value.h> +#include <vespa/vespalib/stllike/hash_map.h> +#include <vespa/vespalib/util/stash.h> + +namespace vespalib::tensor { + +class SparseTensorIndex : public vespalib::eval::Value::Index +{ +public: + static constexpr size_t STASH_CHUNK_SIZE = 16384u; + // + using View = vespalib::eval::Value::Index::View; + using IndexMap = hash_map<SparseTensorAddressRef, uint32_t, hash<SparseTensorAddressRef>, + std::equal_to<>, hashtable_base::and_modulator>; + // construct + explicit SparseTensorIndex(size_t num_mapped_dims_in); + SparseTensorIndex(const SparseTensorIndex & index_in); + SparseTensorIndex(SparseTensorIndex && index_in) = default; + ~SparseTensorIndex(); + // Index API + size_t size() const override; + std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override; + // build API + void reserve(size_t estimate); + void add_address(SparseTensorAddressRef tmp_ref); + size_t lookup_or_add(SparseTensorAddressRef tmp_ref); + // lookup API + bool lookup_address(SparseTensorAddressRef ref, size_t &idx) const; + // traversal API + const IndexMap &get_map() const { return _map; } + // stats + MemoryUsage get_memory_usage() const; +private: + Stash _stash; + IndexMap _map; + size_t _num_mapped_dims; + static size_t needed_memory_for(const SparseTensorIndex &other); +}; + +} // namespace diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.h index ec6edf2d847..07695b66ccb 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.h @@ -14,9 +14,9 @@ namespace vespalib::tensor::sparse { * labels for common dimensions, using func to calculate new cell value * based on the cell values in the input tensors. */ -template <typename Function> +template <typename LCT, typename RCT, typename OCT, typename Function> std::unique_ptr<Tensor> -apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func); +join(const SparseTensor &lhs, const SparseTensor &rhs, eval::ValueType res_type, Function &&func); } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.hpp new file mode 100644 index 00000000000..ae54e42f5c2 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.hpp @@ -0,0 +1,40 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sparse_tensor_join.h" +#include "sparse_tensor_t.h" +#include "sparse_tensor_address_combiner.h" +#include "direct_sparse_tensor_builder.h" + +namespace vespalib::tensor::sparse { + +template <typename LCT, typename RCT, typename OCT, typename Function> +std::unique_ptr<Tensor> +join(const SparseTensor &lhs_in, const SparseTensor &rhs_in, eval::ValueType res_type, Function &&func) +{ + auto & lhs = static_cast<const SparseTensorT<LCT> &>(lhs_in); + auto & rhs = static_cast<const SparseTensorT<RCT> &>(rhs_in); + DirectSparseTensorBuilder<OCT> builder(std::move(res_type)); + TensorAddressCombiner addressCombiner(lhs.fast_type(), rhs.fast_type()); + if (addressCombiner.numOverlappingDimensions() != 0) { + size_t estimatedCells = std::min(lhs.my_size(), rhs.my_size()); + builder.reserve(estimatedCells*2); + } else { + size_t estimatedCells = (lhs.my_size() * rhs.my_size()); + builder.reserve(estimatedCells); + } + for (const auto & lhs_kv : lhs.index().get_map()) { + for (const auto & rhs_kv : rhs.index().get_map()) { + bool combineSuccess = addressCombiner.combine(lhs_kv.first, rhs_kv.first); + if (combineSuccess) { + auto a = lhs.get_value(lhs_kv.second); + auto b = rhs.get_value(rhs_kv.second); + builder.insertCell(addressCombiner.getAddressRef(), func(a, b)); + } + } + } + return builder.build(); +} + +} diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp index 9dc47b0176c..74aa557d92b 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp @@ -9,30 +9,36 @@ namespace vespalib::tensor { +template<typename LCT, typename RCT> void -SparseTensorMatch::fastMatch(const TensorImplType &lhs, const TensorImplType &rhs) +SparseTensorMatch<LCT,RCT>::fastMatch(const SparseTensorT<LCT> &lhs, const SparseTensorT<RCT> &rhs) { - _builder.reserve(lhs.my_cells().size()); - for (const auto &lhsCell : lhs.my_cells()) { - auto rhsItr = rhs.my_cells().find(lhsCell.first); - if (rhsItr != rhs.my_cells().end()) { - _builder.insertCell(lhsCell.first, lhsCell.second * rhsItr->second); + const auto & lhs_map = lhs.index().get_map(); + const auto & rhs_map = rhs.index().get_map(); + _builder.reserve(lhs_map.size()); + const auto rhs_map_end = rhs_map.end(); + for (const auto & kv : lhs_map) { + auto rhsItr = rhs_map.find(kv.first); + if (rhsItr != rhs_map_end) { + LCT a = lhs.get_value(kv.second); + RCT b = rhs.get_value(rhsItr->second); + _builder.insertCell(kv.first, a * b); } } } -SparseTensorMatch::SparseTensorMatch(const TensorImplType &lhs, const TensorImplType &rhs) - : Parent(lhs.combineDimensionsWith(rhs)) +template<typename LCT, typename RCT> +SparseTensorMatch<LCT,RCT>::SparseTensorMatch(const SparseTensorT<LCT> &lhs, + const SparseTensorT<RCT> &rhs, + eval::ValueType res_type) + : _builder(std::move(res_type)) { - assert (lhs.fast_type().dimensions().size() == rhs.fast_type().dimensions().size()); - assert (lhs.fast_type().dimensions().size() == _builder.fast_type().dimensions().size()); - - // Ensure that first tensor to fastMatch has fewest cells. - if (lhs.my_cells().size() <= rhs.my_cells().size()) { - fastMatch(lhs, rhs); - } else { - fastMatch(rhs, lhs); - } + fastMatch(lhs, rhs); } +template class SparseTensorMatch<float,float>; +template class SparseTensorMatch<float,double>; +template class SparseTensorMatch<double,float>; +template class SparseTensorMatch<double,double>; + } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h index f5f52eda756..21223112329 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h @@ -2,7 +2,9 @@ #pragma once -#include <vespa/eval/tensor/tensor_operation.h> +#include "sparse_tensor.h" +#include "sparse_tensor_t.h" +#include "direct_sparse_tensor_builder.h" namespace vespalib::tensor { @@ -14,16 +16,19 @@ namespace vespalib::tensor { * Only used when two tensors have exactly the same dimensions, * this is the Hadamard product. */ -class SparseTensorMatch : public TensorOperation<SparseTensor> +template<typename LCT, typename RCT> +class SparseTensorMatch { public: - using Parent = TensorOperation<SparseTensor>; - using typename Parent::TensorImplType; - using Parent::_builder; + using OCT = typename eval::UnifyCellTypes<LCT,RCT>::type; + DirectSparseTensorBuilder<OCT> _builder; private: - void fastMatch(const TensorImplType &lhs, const TensorImplType &rhs); + void fastMatch(const SparseTensorT<LCT> &lhs, const SparseTensorT<RCT> &rhs); public: - SparseTensorMatch(const TensorImplType &lhs, const TensorImplType &rhs); + SparseTensorMatch(const SparseTensorT<LCT> &lhs, const SparseTensorT<RCT> &rhs, eval::ValueType res_type); + Tensor::UP result() { + return _builder.build(); + } }; } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp index 0ab8352bfbb..23a2d00c8b3 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp @@ -1,36 +1,45 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "sparse_tensor_modify.h" +#include "sparse_tensor_t.h" #include <vespa/eval/tensor/tensor_address_element_iterator.h> namespace vespalib::tensor { -SparseTensorModify::SparseTensorModify(join_fun_t op, const eval::ValueType &type, Stash &&stash, Cells &&cells) +template<typename T> +SparseTensorModify<T>::SparseTensorModify(join_fun_t op, const SparseTensorT<T> &input) : _op(op), - _type(type), - _stash(std::move(stash)), - _cells(std::move(cells)), + _type(input.fast_type()), + _index(input.index()), + _values(input.my_values()), _addressBuilder() { } -SparseTensorModify::~SparseTensorModify() = default; +template<typename T> +SparseTensorModify<T>::~SparseTensorModify() = default; +template<typename T> void -SparseTensorModify::visit(const TensorAddress &address, double value) +SparseTensorModify<T>::visit(const TensorAddress &address, double value) { _addressBuilder.populate(_type, address); auto addressRef = _addressBuilder.getAddressRef(); - auto cellItr = _cells.find(addressRef); - if (cellItr != _cells.end()) { - cellItr->second = _op(cellItr->second, value); + size_t idx; + if (_index.lookup_address(addressRef, idx)) { + _values[idx] = _op(_values[idx], value); } } +template<typename T> std::unique_ptr<Tensor> -SparseTensorModify::build() +SparseTensorModify<T>::build() { - return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash)); + using tt = SparseTensorT<T>; + return std::make_unique<tt>(std::move(_type), std::move(_index), std::move(_values)); } +template class SparseTensorModify<float>; +template class SparseTensorModify<double>; + } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h index 17a2ad3a2c1..f66a3c8946e 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h @@ -4,6 +4,7 @@ #include <vespa/eval/tensor/tensor_visitor.h> #include "sparse_tensor.h" +#include "sparse_tensor_t.h" #include "sparse_tensor_address_builder.h" namespace vespalib::tensor { @@ -13,18 +14,18 @@ namespace vespalib::tensor { * For all cells visited, a join function is applied to determine * the new cell value. */ +template<typename T> class SparseTensorModify : public TensorVisitor { using join_fun_t = Tensor::join_fun_t; - using Cells = SparseTensor::Cells; join_fun_t _op; eval::ValueType _type; - Stash _stash; - Cells _cells; + SparseTensorIndex _index; + std::vector<T> _values; SparseTensorAddressBuilder _addressBuilder; public: - SparseTensorModify(join_fun_t op, const eval::ValueType &type, Stash &&stash, Cells &&cells); + SparseTensorModify(join_fun_t op, const SparseTensorT<T> & input); ~SparseTensorModify(); void visit(const TensorAddress &address, double value) override; std::unique_ptr<Tensor> build(); diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp index f55fec85155..1ee13a2d8e1 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp @@ -7,50 +7,39 @@ namespace vespalib::tensor::sparse { -template <typename Function> +template <typename T, typename Function> std::unique_ptr<Tensor> -reduceAll(const SparseTensor &tensor, - DirectSparseTensorBuilder &builder, Function &&func) +reduceAll(const SparseTensorT<T> &tensor, Function &&func) { - auto itr = tensor.my_cells().begin(); - auto itrEnd = tensor.my_cells().end(); + DirectSparseTensorBuilder<double> builder; + size_t sz = tensor.my_size(); double result = 0.0; - if (itr != itrEnd) { - result = itr->second; - ++itr; + if (sz != 0) { + result = tensor.get_value(0); } - for (; itr != itrEnd; ++itr) { - result = func(result, itr->second); + for (size_t i = 1; i < sz; ++i) { + result = func(result, tensor.get_value(i)); } - builder.insertCell(SparseTensorAddressBuilder().getAddressRef(), result); + builder.insertCell(SparseTensorAddressRef(), result); return builder.build(); } -template <typename Function> +template <typename T, typename Function> std::unique_ptr<Tensor> -reduceAll(const SparseTensor &tensor, Function &&func) -{ - DirectSparseTensorBuilder builder; - return reduceAll(tensor, builder, func); -} - -template <typename Function> -std::unique_ptr<Tensor> -reduce(const SparseTensor &tensor, +reduce(const SparseTensorT<T> &tensor, const std::vector<vespalib::string> &dimensions, Function &&func) { - if (dimensions.empty()) { + auto tt = tensor.fast_type().reduce(dimensions); + if (tt.is_double()) { return reduceAll(tensor, func); } - DirectSparseTensorBuilder builder(tensor.fast_type().reduce(dimensions)); - if (builder.fast_type().dimensions().empty()) { - return reduceAll(tensor, builder, func); - } + DirectSparseTensorBuilder<T> builder(std::move(tt)); + builder.reserve(tensor.my_size()); TensorAddressReducer addressReducer(tensor.fast_type(), dimensions); - builder.reserve(tensor.my_cells().size()*2); - for (const auto &cell : tensor.my_cells()) { - addressReducer.reduce(cell.first); - builder.insertCell(addressReducer.getAddressRef(), cell.second, func); + for (const auto & kv : tensor.index().get_map()) { + addressReducer.reduce(kv.first); + auto v = tensor.get_value(kv.second); + builder.insertCell(addressReducer.getAddressRef(), v, func); } return builder.build(); } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.cpp index 76af1e3b5fb..eae09c0cb83 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.cpp @@ -1,33 +1,50 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "sparse_tensor_remove.h" +#include "sparse_tensor_t.h" #include <vespa/eval/tensor/tensor_address_element_iterator.h> namespace vespalib::tensor { -SparseTensorRemove::SparseTensorRemove(const eval::ValueType &type, Cells &&cells, Stash &&stash) - : _type(type), - _cells(std::move(cells)), - _stash(std::move(stash)), +template<typename T> +SparseTensorRemove<T>::SparseTensorRemove(const SparseTensorT<T> &input) + : _input(input), + _map(input.index().get_map()), _addressBuilder() { } -SparseTensorRemove::~SparseTensorRemove() = default; +template<typename T> +SparseTensorRemove<T>::~SparseTensorRemove() = default; +template<typename T> void -SparseTensorRemove::visit(const TensorAddress &address, double value) +SparseTensorRemove<T>::visit(const TensorAddress &address, double) { - (void) value; - _addressBuilder.populate(_type, address); + _addressBuilder.populate(_input.fast_type(), address); auto addressRef = _addressBuilder.getAddressRef(); - _cells.erase(addressRef); + _map.erase(addressRef); } +template<typename T> std::unique_ptr<Tensor> -SparseTensorRemove::build() +SparseTensorRemove<T>::build() { - return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash)); + SparseTensorIndex new_index(_input.fast_type().count_mapped_dimensions()); + std::vector<T> new_values; + new_index.reserve(_map.size()); + new_values.reserve(_map.size()); + for (const auto & kv : _map) { + size_t idx = new_index.lookup_or_add(kv.first); + assert(idx == new_values.size()); + double v = _input.get_value(kv.second); + new_values.push_back(v); + } + using tt = SparseTensorT<T>; + return std::make_unique<tt>(_input.fast_type(), std::move(new_index), std::move(new_values)); } +template class SparseTensorRemove<float>; +template class SparseTensorRemove<double>; + } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.h index 3d5905d8f41..c52c38a9b0e 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.h @@ -3,6 +3,7 @@ #pragma once #include "sparse_tensor.h" +#include "sparse_tensor_t.h" #include "sparse_tensor_address_builder.h" #include <vespa/eval/tensor/tensor_visitor.h> @@ -14,16 +15,14 @@ namespace vespalib::tensor { * Creates a new tensor by removing the cells matching the cell addresses visited. * The value associated with the address is ignored. */ +template<typename T> class SparseTensorRemove : public TensorVisitor { private: - using Cells = SparseTensor::Cells; - eval::ValueType _type; - Cells _cells; - Stash _stash; + const SparseTensorT<T> & _input; + SparseTensorIndex::IndexMap _map; SparseTensorAddressBuilder _addressBuilder; - public: - SparseTensorRemove(const eval::ValueType &type, Cells &&cells, Stash &&stash); + explicit SparseTensorRemove(const SparseTensorT<T> &input); ~SparseTensorRemove(); void visit(const TensorAddress &address, double value) override; std::unique_ptr<Tensor> build(); diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp new file mode 100644 index 00000000000..5882b9c28d0 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp @@ -0,0 +1,251 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "sparse_tensor.h" +#include "sparse_tensor_add.h" +#include "sparse_tensor_address_builder.h" +#include "sparse_tensor_join.h" +#include "sparse_tensor_join.hpp" +#include "sparse_tensor_match.h" +#include "sparse_tensor_modify.h" +#include "sparse_tensor_reduce.hpp" +#include "sparse_tensor_remove.h" +#include "direct_sparse_tensor_builder.h" +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/tensor/cell_values.h> +#include <vespa/eval/tensor/tensor_address_builder.h> +#include <vespa/eval/tensor/tensor_visitor.h> +#include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/stllike/hash_map_equal.hpp> +#include <vespa/vespalib/util/array_equal.hpp> + +#include <vespa/log/log.h> +LOG_SETUP(".eval.tensor.sparse.sparse_tensor"); + +namespace vespalib::tensor { + +namespace { + +template<typename LCT> +struct GenericSparseJoin { + template<typename RCT, typename OCT> + static Tensor::UP invoke(const SparseTensor & lhs_in, + const SparseTensor & rhs_in, + eval::ValueType res_type, + SparseTensor::join_fun_t func) + { + auto & lhs = static_cast<const SparseTensorT<LCT> &>(lhs_in); + auto & rhs = static_cast<const SparseTensorT<RCT> &>(rhs_in); + return sparse::join<LCT, RCT, OCT>(lhs, rhs, std::move(res_type), func); + } +}; + +template<typename LCT> +struct FastSparseJoin { + template<typename RCT> + static Tensor::UP invoke(const SparseTensor & lhs_in, + const SparseTensor & rhs_in, + eval::ValueType res_type) + { + auto & lhs = static_cast<const SparseTensorT<LCT> &>(lhs_in); + auto & rhs = static_cast<const SparseTensorT<RCT> &>(rhs_in); + // Ensure that first tensor to fastMatch has fewest cells. + if (rhs.my_size() < lhs.my_size()) { + return SparseTensorMatch(rhs, lhs, std::move(res_type)).result(); + } else { + return SparseTensorMatch(lhs, rhs, std::move(res_type)).result(); + } + } +}; + +struct GenericSparseMerge { + template<typename LCT, typename RCT> + static Tensor::UP invoke(const SparseTensor &lhs_in, + const SparseTensor &rhs_in, + SparseTensor::join_fun_t function) + { + using OCT = typename eval::UnifyCellTypes<LCT,RCT>::type; + auto & lhs= static_cast<const SparseTensorT<LCT> &>(lhs_in); + auto & rhs= static_cast<const SparseTensorT<RCT> &>(rhs_in); + DirectSparseTensorBuilder<OCT> builder(eval::ValueType::merge(lhs.fast_type(), rhs.fast_type())); + builder.reserve(lhs.my_size() + rhs.my_size()); + const auto &lhs_map = lhs.index().get_map(); + const auto &rhs_map = rhs.index().get_map(); + for (const auto & kv : lhs_map) { + auto pos = rhs_map.find(kv.first); + if (pos == rhs_map.end()) { + builder.insertCell(kv.first, lhs.get_value(kv.second)); + } else { + double a = lhs.get_value(kv.second); + double b = rhs.get_value(pos->second); + builder.insertCell(kv.first, function(a, b)); + } + } + for (const auto & kv : rhs_map) { + auto pos = lhs_map.find(kv.first); + if (pos == lhs_map.end()) { + double b = rhs.get_value(kv.second); + builder.insertCell(kv.first, b); + } + } + return builder.build(); + } +}; + +} // namespace <unnamed> + +template<typename T> +SparseTensorT<T>::SparseTensorT(eval::ValueType type_in, SparseTensorIndex index_in, std::vector<T> values_in) + : SparseTensor(std::move(type_in), std::move(index_in)), + _values(std::move(values_in)) +{ +} + +template<typename T> +SparseTensorT<T>::~SparseTensorT() = default; + +template<typename T> +TypedCells +SparseTensorT<T>::cells() const +{ + return TypedCells(_values); +} + +template<typename T> +double +SparseTensorT<T>::as_double() const +{ + double result = 0.0; + for (double v : _values) { + result += v; + } + return result; +} + +template<typename T> +void +SparseTensorT<T>::accept(TensorVisitor &visitor) const +{ + TensorAddressBuilder addrBuilder; + TensorAddress addr; + for (const auto & kv : index().get_map()) { + SparseTensorAddressDecoder decoder(kv.first); + addrBuilder.clear(); + for (const auto &dimension : fast_type().dimensions()) { + auto label = decoder.decodeLabel(); + if (label.size() != 0u) { + addrBuilder.add(dimension.name, label); + } + } + assert(!decoder.valid()); + addr = addrBuilder.build(); + visitor.visit(addr, get_value(kv.second)); + } +} + +template<typename T> +std::unique_ptr<Tensor> +SparseTensorT<T>::add(const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + if (!rhs) { + return Tensor::UP(); + } + SparseTensorAdd<T> adder(fast_type(), index(), _values); + rhs->accept(adder); + return adder.build(); +} + +template<typename T> +Tensor::UP +SparseTensorT<T>::apply(const CellFunction &func) const +{ + std::vector<T> new_values; + new_values.reserve(_values.size()); + for (T v : _values) { + new_values.push_back(func.apply(v)); + } + return std::make_unique<SparseTensorT<T>>(fast_type(), index(), std::move(new_values)); +} + +template<typename T> +Tensor::UP +SparseTensorT<T>::clone() const +{ + return std::make_unique<SparseTensorT<T>>(fast_type(), index(), _values); +} + +template<typename T> +Tensor::UP +SparseTensorT<T>::join(join_fun_t function, const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + if (!rhs) { + return Tensor::UP(); + } + const auto & lhs_type = fast_type(); + const auto & rhs_type = rhs->fast_type(); + auto res_type = eval::ValueType::join(lhs_type, rhs_type); + if (function == eval::operation::Mul::f) { + if (lhs_type.dimensions() == rhs_type.dimensions()) { + return typify_invoke<1,eval::TypifyCellType,FastSparseJoin<T>>( + rhs_type.cell_type(), + *this, *rhs, std::move(res_type)); + } + } + return typify_invoke<2,eval::TypifyCellType,GenericSparseJoin<T>>( + rhs_type.cell_type(), res_type.cell_type(), + *this, *rhs, std::move(res_type), function); +} + +template<typename T> +Tensor::UP +SparseTensorT<T>::merge(join_fun_t function, const Tensor &arg) const +{ + const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg); + assert(rhs && (fast_type().dimensions() == rhs->fast_type().dimensions())); + return typify_invoke<2,eval::TypifyCellType,GenericSparseMerge>( + fast_type().cell_type(), rhs->fast_type().cell_type(), + *this, *rhs, function); +} + +template<typename T> +std::unique_ptr<Tensor> +SparseTensorT<T>::modify(join_fun_t op, const CellValues &cellValues) const +{ + SparseTensorModify modifier(op, *this);; + cellValues.accept(modifier); + return modifier.build(); +} + +template<typename T> +Tensor::UP +SparseTensorT<T>::reduce(join_fun_t op, const std::vector<vespalib::string> &dimensions) const +{ + return sparse::reduce(*this, dimensions, op); +} + +template<typename T> +std::unique_ptr<Tensor> +SparseTensorT<T>::remove(const CellValues &cellAddresses) const +{ + SparseTensorRemove<T> remover(*this); + cellAddresses.accept(remover); + return remover.build(); +} + +template<typename T> +MemoryUsage +SparseTensorT<T>::get_memory_usage() const +{ + MemoryUsage result = index().get_memory_usage(); + result.incUsedBytes(sizeof(SparseTensor)); + result.incUsedBytes(_values.size() * sizeof(T)); + result.incAllocatedBytes(sizeof(SparseTensor)); + result.incAllocatedBytes(_values.capacity() * sizeof(T)); + return result; +} + +template class SparseTensorT<float>; +template class SparseTensorT<double>; + +} diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h new file mode 100644 index 00000000000..1bd0f7caafd --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h @@ -0,0 +1,41 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sparse_tensor_index.h" +#include <vespa/eval/tensor/cell_function.h> +#include <vespa/eval/tensor/tensor.h> +#include <vespa/eval/tensor/tensor_address.h> +#include <vespa/eval/tensor/types.h> +#include <vespa/vespalib/stllike/hash_map.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/stash.h> + +namespace vespalib::tensor { + +template<typename T> +class SparseTensorT : public SparseTensor +{ +private: + std::vector<T> _values; +public: + SparseTensorT(eval::ValueType type_in, SparseTensorIndex index_in, std::vector<T> cells_in); + ~SparseTensorT() override; + TypedCells cells() const override; + T get_value(size_t idx) const { return _values[idx]; } + size_t my_size() const { return _values.size(); } + const std::vector<T> &my_values() const { return _values; } + double as_double() const override; + void accept(TensorVisitor &visitor) const override; + Tensor::UP add(const Tensor &arg) const override; + Tensor::UP apply(const CellFunction &func) const override; + Tensor::UP clone() const override; + Tensor::UP join(join_fun_t function, const Tensor &arg) const override; + Tensor::UP merge(join_fun_t function, const Tensor &arg) const override; + Tensor::UP modify(join_fun_t op, const CellValues &cellValues) const override; + Tensor::UP reduce(join_fun_t op, const std::vector<vespalib::string> &dimensions) const override; + Tensor::UP remove(const CellValues &cellAddresses) const override; + MemoryUsage get_memory_usage() const override; +}; + +} diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.h deleted file mode 100644 index 61e412b0191..00000000000 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "sparse_tensor_address_ref.h" -#include <vespa/eval/eval/value.h> -#include <vespa/eval/tensor/types.h> -#include <vespa/vespalib/stllike/hash_map.h> -#include <vespa/vespalib/stllike/string.h> -#include <vespa/vespalib/util/stash.h> - -namespace vespalib::tensor { - -struct SparseTensorValueIndex : public vespalib::eval::Value::Index -{ - using View = vespalib::eval::Value::Index::View; - using SubspaceMap = hash_map<SparseTensorAddressRef, uint32_t, hash<SparseTensorAddressRef>, - std::equal_to<>, hashtable_base::and_modulator>; - - Stash _stash; - SubspaceMap _map; - size_t _num_mapped_dims; - - explicit SparseTensorValueIndex(size_t num_mapped_dims_in); - SparseTensorValueIndex(const SparseTensorValueIndex & index_in); - SparseTensorValueIndex(SparseTensorValueIndex && index_in) = default; - ~SparseTensorValueIndex(); - size_t size() const override; - std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override; - void add_subspace(SparseTensorAddressRef tmp_ref, size_t idx); -}; - -/** - * A tensor implementation using serialized tensor addresses to - * improve CPU cache and TLB hit ratio, relative to SimpleTensor - * implementation. - */ -template<typename T> -class SparseTensorValue : public vespalib::eval::Value -{ -private: - eval::ValueType _type; - SparseTensorValueIndex _index; - std::vector<T> _cells; -public: - SparseTensorValue(const eval::ValueType &type_in, const SparseTensorValueIndex &index_in, const std::vector<T> &cells_in); - - SparseTensorValue(eval::ValueType &&type_in, SparseTensorValueIndex &&index_in, std::vector<T> &&cells_in); - - ~SparseTensorValue() override; - - TypedCells cells() const override { return TypedCells(_cells); } - - const Index &index() const override { return _index; } - - const eval::ValueType &type() const override { return _type; } -}; - -} // namespace diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.cpp index 07ba2b217ac..7c584246d83 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.cpp @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "sparse_tensor_value_builder.h" +#include "sparse_tensor_t.h" namespace vespalib::tensor { @@ -9,13 +10,13 @@ ArrayRef<T> SparseTensorValueBuilder<T>::add_subspace(const std::vector<vespalib::stringref> &addr) { uint32_t idx = _cells.size(); - _cells.resize(idx + 1); _addr_builder.clear(); for (const auto & label : addr) { _addr_builder.add(label); } auto tmp_ref = _addr_builder.getAddressRef(); - _index.add_subspace(tmp_ref, idx); + _index.add_address(tmp_ref); + _cells.push_back(0.0); return ArrayRef<T>(&_cells[idx], 1); } @@ -23,9 +24,9 @@ template <typename T> std::unique_ptr<eval::Value> SparseTensorValueBuilder<T>::build(std::unique_ptr<eval::ValueBuilder<T>>) { - return std::make_unique<SparseTensorValue<T>>(std::move(_type), - std::move(_index), - std::move(_cells)); + return std::make_unique<SparseTensorT<T>>(std::move(_type), + std::move(_index), + std::move(_cells)); } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.h index 46d79482f3d..db3ff314ed2 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.h @@ -2,7 +2,7 @@ #pragma once -#include "sparse_tensor_value.h" +#include "sparse_tensor.h" #include "sparse_tensor_address_builder.h" namespace vespalib::tensor { @@ -16,7 +16,7 @@ class SparseTensorValueBuilder : public eval::ValueBuilder<T> { private: eval::ValueType _type; - SparseTensorValueIndex _index; + SparseTensorIndex _index; std::vector<T> _cells; SparseTensorAddressBuilder _addr_builder; public: @@ -28,6 +28,7 @@ public: _cells() { assert(num_mapped_in > 0); + _index.reserve(expected_subspaces); _cells.reserve(expected_subspaces); } diff --git a/eval/src/vespa/eval/tensor/tensor_apply.cpp b/eval/src/vespa/eval/tensor/tensor_apply.cpp deleted file mode 100644 index 98450797f0c..00000000000 --- a/eval/src/vespa/eval/tensor/tensor_apply.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "tensor_apply.h" -#include <vespa/vespalib/stllike/hash_map.hpp> - -namespace vespalib::tensor { - -template <class TensorT> -TensorApply<TensorT>::TensorApply(const TensorImplType &tensor, - const CellFunction &func) - : Parent(tensor.fast_type()) -{ - for (const auto &cell : tensor.my_cells()) { - _builder.insertCell(cell.first, func.apply(cell.second)); - } -} - -template class TensorApply<SparseTensor>; - -} diff --git a/eval/src/vespa/eval/tensor/tensor_apply.h b/eval/src/vespa/eval/tensor/tensor_apply.h deleted file mode 100644 index bb5ffdd1885..00000000000 --- a/eval/src/vespa/eval/tensor/tensor_apply.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "cell_function.h" -#include "tensor_operation.h" - -namespace vespalib::tensor { - -/** - * Returns a tensor with the given function applied to all cells in the input tensor. - */ -template <class TensorT> -class TensorApply : public TensorOperation<TensorT> -{ -public: - using Parent = TensorOperation<TensorT>; - using typename Parent::TensorImplType; - using Parent::_builder; - TensorApply(const TensorImplType &tensor, const CellFunction &func); -}; - -extern template class TensorApply<SparseTensor>; - -} diff --git a/eval/src/vespa/eval/tensor/tensor_operation.h b/eval/src/vespa/eval/tensor/tensor_operation.h deleted file mode 100644 index 0532fe3efa0..00000000000 --- a/eval/src/vespa/eval/tensor/tensor_operation.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h> - -namespace vespalib::tensor { - -/** - * Base class for an operation over tensors. - */ -template <class TensorT> -class TensorOperation -{ -public: - using TensorImplType = TensorT; - using MyTensorBuilder = DirectSparseTensorBuilder; - using Cells = typename TensorImplType::Cells; - using AddressBuilderType = typename MyTensorBuilder::AddressBuilderType; - using AddressRefType = typename MyTensorBuilder::AddressRefType; -protected: - MyTensorBuilder _builder; - eval::ValueType &_type; - Cells &_cells; - -public: - TensorOperation() - : _builder(), - _type(_builder.fast_type()), - _cells(_builder.cells()) - {} - TensorOperation(const eval::ValueType &type) - : _builder(type), - _type(_builder.fast_type()), - _cells(_builder.cells()) - {} - TensorOperation(const eval::ValueType &type, const Cells &cells) - : _builder(type, cells), - _type(_builder.fast_type()), - _cells(_builder.cells()) - {} - Tensor::UP result() { - return _builder.build(); - } -}; - -} |