diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-10-15 13:45:13 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-10-15 20:39:03 +0000 |
commit | c5f3c169bd2a03aed5542caf31fece5a25cc9118 (patch) | |
tree | e9d1e6f8b61879155709bac680999d8389733c92 | |
parent | af9b71c51a175a26f003cdcce19583db024a824b (diff) |
more explicit copy with/without shrinking
* shrink if needed when deserializing binary data
11 files changed, 60 insertions, 21 deletions
diff --git a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp index f6ceb148a44..eda8f7eecc7 100644 --- a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp +++ b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp @@ -133,14 +133,19 @@ SparseBinaryFormat::serialize(nbostream &stream, const Tensor &tensor) struct BuildSparseCells { template<typename CT> - static auto invoke(ValueType type, nbostream &stream, - size_t dimensionsSize, - size_t cellsSize) + static Tensor::UP invoke(ValueType type, nbostream &stream, + size_t dimensionsSize, + size_t cellsSize) { DirectSparseTensorBuilder<CT> builder(std::move(type)); builder.reserve(cellsSize); decodeCells<CT>(stream, dimensionsSize, cellsSize, builder); - return builder.build(); + auto retval = builder.build(); + if (retval->should_shrink()) { + return retval->shrink(); + } else { + return retval; + } } }; diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp index 4e8d2fda7cb..d6471f07191 100644 --- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp +++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp @@ -27,7 +27,7 @@ template<typename T> DirectSparseTensorBuilder<T>::~DirectSparseTensorBuilder() = default; template<typename T> -Tensor::UP +std::unique_ptr<SparseTensorT<T>> DirectSparseTensorBuilder<T>::build() { using tt = SparseTensorT<T>; return std::make_unique<tt>(std::move(_type), std::move(_index), std::move(_values)); diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h index c46ae5b9819..9570f744ae0 100644 --- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h +++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h @@ -4,6 +4,7 @@ #include <vespa/vespalib/util/hdr_abort.h> #include "sparse_tensor.h" +#include "sparse_tensor_t.h" #include "sparse_tensor_address_builder.h" namespace vespalib::tensor { @@ -29,7 +30,7 @@ public: DirectSparseTensorBuilder(const eval::ValueType &type_in); ~DirectSparseTensorBuilder(); - Tensor::UP build(); + std::unique_ptr<SparseTensorT<T>> build(); template <class Function> void insertCell(SparseTensorAddressRef address, T value, Function &&func) diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h index 4093700b334..441dd2670f9 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h @@ -26,7 +26,7 @@ public: SparseTensor(eval::ValueType type_in, SparseTensorIndex index_in); ~SparseTensor() override; size_t my_size() const { return _index.get_map().size(); } - const SparseTensorIndex &index() const override { return _index; } + const SparseTensorIndex &index() const final override { return _index; } const eval::ValueType &fast_type() const { return _type; } bool operator==(const SparseTensor &rhs) const; eval::ValueType combineDimensionsWith(const SparseTensor &rhs) const; diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp index 6c2e7241856..bb44826db9c 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp @@ -37,7 +37,7 @@ std::unique_ptr<Tensor> SparseTensorAdd<T>::build() { using tt = SparseTensorT<T>; - return std::make_unique<tt>(std::move(_type), _index, std::move(_values)); + return std::make_unique<tt>(std::move(_type), std::move(_index), std::move(_values)); } template class SparseTensorAdd<float>; diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp index b30d734f9ab..b3b48d4cb13 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp @@ -184,7 +184,7 @@ SparseTensorIndex::needed_memory_for(const SparseTensorIndex &other) { auto mem = other._stash.get_memory_usage(); size_t mem_use = mem.usedBytes(); if (mem_use == 0) { - return STASH_CHUNK_SIZE; + return STASH_CHUNK_SIZE; } if (mem_use < (STASH_CHUNK_SIZE / 4)) { size_t avg_per_addr = mem_use / other.size(); @@ -199,10 +199,24 @@ SparseTensorIndex::SparseTensorIndex(size_t num_mapped_in) : _stash(STASH_CHUNK_SIZE), _map(), _num_mapped_dims(num_mapped_in) {} -SparseTensorIndex::SparseTensorIndex(const SparseTensorIndex & index_in) - : _stash(needed_memory_for(index_in)), _map(), _num_mapped_dims(index_in._num_mapped_dims) +SparseTensorIndex::SparseTensorIndex(size_t stash_size, const SparseTensorIndex &index_in) + : _stash(stash_size), _map(), _num_mapped_dims(index_in._num_mapped_dims) { - copyMap(_map, index_in._map, _stash); + copyMap(_map, index_in._map, _stash); +} + +SparseTensorIndex +SparseTensorIndex::shrunk_copy() const +{ + size_t want_mem = needed_memory_for(*this); + return SparseTensorIndex(want_mem, *this); +} + +SparseTensorIndex +SparseTensorIndex::copy() const +{ + size_t want_mem = _stash.get_chunk_size(); + return SparseTensorIndex(want_mem, *this); } void diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h index c30bcf4732b..c762c84e3d3 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h @@ -19,7 +19,8 @@ public: std::equal_to<>, hashtable_base::and_modulator>; // construct explicit SparseTensorIndex(size_t num_mapped_dims_in); - SparseTensorIndex(const SparseTensorIndex & index_in); + SparseTensorIndex copy() const; + SparseTensorIndex shrunk_copy() const; SparseTensorIndex(SparseTensorIndex && index_in) = default; ~SparseTensorIndex(); // Index API @@ -40,6 +41,7 @@ private: IndexMap _map; size_t _num_mapped_dims; static size_t needed_memory_for(const SparseTensorIndex &other); + SparseTensorIndex(size_t stash_size, const SparseTensorIndex &other); }; } // namespace diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp index 23a2d00c8b3..f36e1c71c16 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp @@ -36,7 +36,7 @@ std::unique_ptr<Tensor> SparseTensorModify<T>::build() { using tt = SparseTensorT<T>; - return std::make_unique<tt>(std::move(_type), std::move(_index), std::move(_values)); + return std::make_unique<tt>(std::move(_type), _index.copy(), std::move(_values)); } template class SparseTensorModify<float>; diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h index 0f8b5c02267..86f0c0a5c48 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h @@ -19,10 +19,10 @@ template<typename T> class SparseTensorModify : public TensorVisitor { using join_fun_t = vespalib::eval::operation::op2_t; - join_fun_t _op; - eval::ValueType _type; - SparseTensorIndex _index; - std::vector<T> _values; + join_fun_t _op; + eval::ValueType _type; + const SparseTensorIndex & _index; + std::vector<T> _values; SparseTensorAddressBuilder _addressBuilder; public: diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp index cb1370475ba..d47ba27d796 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp @@ -150,7 +150,7 @@ SparseTensorT<T>::add(const Tensor &arg) const if (!rhs) { return Tensor::UP(); } - SparseTensorAdd<T> adder(fast_type(), index(), _values); + SparseTensorAdd<T> adder(fast_type(), index().copy(), _values); rhs->accept(adder); return adder.build(); } @@ -164,14 +164,14 @@ SparseTensorT<T>::apply(const CellFunction &func) const for (T v : _values) { new_values.push_back(func.apply(v)); } - return std::make_unique<SparseTensorT<T>>(fast_type(), index(), std::move(new_values)); + return std::make_unique<SparseTensorT<T>>(fast_type(), index().copy(), std::move(new_values)); } template<typename T> Tensor::UP SparseTensorT<T>::clone() const { - return std::make_unique<SparseTensorT<T>>(fast_type(), index(), _values); + return std::make_unique<SparseTensorT<T>>(fast_type(), index().shrunk_copy(), _values); } template<typename T> @@ -245,6 +245,21 @@ SparseTensorT<T>::get_memory_usage() const return result; } +template<typename T> +bool +SparseTensorT<T>::should_shrink() const +{ + auto mem_use = get_memory_usage(); + return (mem_use.usedBytes() * 3 < mem_use.allocatedBytes()); +} + +template<typename T> +Tensor::UP +SparseTensorT<T>::shrink() const +{ + return std::make_unique<SparseTensorT<T>>(fast_type(), index().shrunk_copy(), _values); +} + template class SparseTensorT<float>; template class SparseTensorT<double>; diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h index abe2c429d53..5a9aaa224c6 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h @@ -36,6 +36,8 @@ public: Tensor::UP reduce(join_fun_t op, const std::vector<vespalib::string> &dimensions) const override; Tensor::UP remove(const CellValues &cellAddresses) const override; MemoryUsage get_memory_usage() const override; + bool should_shrink() const; + Tensor::UP shrink() const; }; } |