aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-10-15 13:45:13 +0000
committerArne Juul <arnej@verizonmedia.com>2020-10-15 20:39:03 +0000
commitc5f3c169bd2a03aed5542caf31fece5a25cc9118 (patch)
treee9d1e6f8b61879155709bac680999d8389733c92
parentaf9b71c51a175a26f003cdcce19583db024a824b (diff)
more explicit copy with/without shrinking
* shrink if needed when deserializing binary data
-rw-r--r--eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp13
-rw-r--r--eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp2
-rw-r--r--eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h3
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor.h2
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp2
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp22
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h4
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp2
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h8
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp21
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h2
11 files changed, 60 insertions, 21 deletions
diff --git a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp
index f6ceb148a44..eda8f7eecc7 100644
--- a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp
+++ b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp
@@ -133,14 +133,19 @@ SparseBinaryFormat::serialize(nbostream &stream, const Tensor &tensor)
struct BuildSparseCells {
template<typename CT>
- static auto invoke(ValueType type, nbostream &stream,
- size_t dimensionsSize,
- size_t cellsSize)
+ static Tensor::UP invoke(ValueType type, nbostream &stream,
+ size_t dimensionsSize,
+ size_t cellsSize)
{
DirectSparseTensorBuilder<CT> builder(std::move(type));
builder.reserve(cellsSize);
decodeCells<CT>(stream, dimensionsSize, cellsSize, builder);
- return builder.build();
+ auto retval = builder.build();
+ if (retval->should_shrink()) {
+ return retval->shrink();
+ } else {
+ return retval;
+ }
}
};
diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp
index 4e8d2fda7cb..d6471f07191 100644
--- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp
@@ -27,7 +27,7 @@ template<typename T>
DirectSparseTensorBuilder<T>::~DirectSparseTensorBuilder() = default;
template<typename T>
-Tensor::UP
+std::unique_ptr<SparseTensorT<T>>
DirectSparseTensorBuilder<T>::build() {
using tt = SparseTensorT<T>;
return std::make_unique<tt>(std::move(_type), std::move(_index), std::move(_values));
diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h
index c46ae5b9819..9570f744ae0 100644
--- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h
+++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h
@@ -4,6 +4,7 @@
#include <vespa/vespalib/util/hdr_abort.h>
#include "sparse_tensor.h"
+#include "sparse_tensor_t.h"
#include "sparse_tensor_address_builder.h"
namespace vespalib::tensor {
@@ -29,7 +30,7 @@ public:
DirectSparseTensorBuilder(const eval::ValueType &type_in);
~DirectSparseTensorBuilder();
- Tensor::UP build();
+ std::unique_ptr<SparseTensorT<T>> build();
template <class Function>
void insertCell(SparseTensorAddressRef address, T value, Function &&func)
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h
index 4093700b334..441dd2670f9 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h
@@ -26,7 +26,7 @@ public:
SparseTensor(eval::ValueType type_in, SparseTensorIndex index_in);
~SparseTensor() override;
size_t my_size() const { return _index.get_map().size(); }
- const SparseTensorIndex &index() const override { return _index; }
+ const SparseTensorIndex &index() const final override { return _index; }
const eval::ValueType &fast_type() const { return _type; }
bool operator==(const SparseTensor &rhs) const;
eval::ValueType combineDimensionsWith(const SparseTensor &rhs) const;
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp
index 6c2e7241856..bb44826db9c 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp
@@ -37,7 +37,7 @@ std::unique_ptr<Tensor>
SparseTensorAdd<T>::build()
{
using tt = SparseTensorT<T>;
- return std::make_unique<tt>(std::move(_type), _index, std::move(_values));
+ return std::make_unique<tt>(std::move(_type), std::move(_index), std::move(_values));
}
template class SparseTensorAdd<float>;
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp
index b30d734f9ab..b3b48d4cb13 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp
@@ -184,7 +184,7 @@ SparseTensorIndex::needed_memory_for(const SparseTensorIndex &other) {
auto mem = other._stash.get_memory_usage();
size_t mem_use = mem.usedBytes();
if (mem_use == 0) {
- return STASH_CHUNK_SIZE;
+ return STASH_CHUNK_SIZE;
}
if (mem_use < (STASH_CHUNK_SIZE / 4)) {
size_t avg_per_addr = mem_use / other.size();
@@ -199,10 +199,24 @@ SparseTensorIndex::SparseTensorIndex(size_t num_mapped_in)
: _stash(STASH_CHUNK_SIZE), _map(), _num_mapped_dims(num_mapped_in)
{}
-SparseTensorIndex::SparseTensorIndex(const SparseTensorIndex & index_in)
- : _stash(needed_memory_for(index_in)), _map(), _num_mapped_dims(index_in._num_mapped_dims)
+SparseTensorIndex::SparseTensorIndex(size_t stash_size, const SparseTensorIndex &index_in)
+ : _stash(stash_size), _map(), _num_mapped_dims(index_in._num_mapped_dims)
{
- copyMap(_map, index_in._map, _stash);
+ copyMap(_map, index_in._map, _stash);
+}
+
+SparseTensorIndex
+SparseTensorIndex::shrunk_copy() const
+{
+ size_t want_mem = needed_memory_for(*this);
+ return SparseTensorIndex(want_mem, *this);
+}
+
+SparseTensorIndex
+SparseTensorIndex::copy() const
+{
+ size_t want_mem = _stash.get_chunk_size();
+ return SparseTensorIndex(want_mem, *this);
}
void
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h
index c30bcf4732b..c762c84e3d3 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h
@@ -19,7 +19,8 @@ public:
std::equal_to<>, hashtable_base::and_modulator>;
// construct
explicit SparseTensorIndex(size_t num_mapped_dims_in);
- SparseTensorIndex(const SparseTensorIndex & index_in);
+ SparseTensorIndex copy() const;
+ SparseTensorIndex shrunk_copy() const;
SparseTensorIndex(SparseTensorIndex && index_in) = default;
~SparseTensorIndex();
// Index API
@@ -40,6 +41,7 @@ private:
IndexMap _map;
size_t _num_mapped_dims;
static size_t needed_memory_for(const SparseTensorIndex &other);
+ SparseTensorIndex(size_t stash_size, const SparseTensorIndex &other);
};
} // namespace
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp
index 23a2d00c8b3..f36e1c71c16 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp
@@ -36,7 +36,7 @@ std::unique_ptr<Tensor>
SparseTensorModify<T>::build()
{
using tt = SparseTensorT<T>;
- return std::make_unique<tt>(std::move(_type), std::move(_index), std::move(_values));
+ return std::make_unique<tt>(std::move(_type), _index.copy(), std::move(_values));
}
template class SparseTensorModify<float>;
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h
index 0f8b5c02267..86f0c0a5c48 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h
@@ -19,10 +19,10 @@ template<typename T>
class SparseTensorModify : public TensorVisitor
{
using join_fun_t = vespalib::eval::operation::op2_t;
- join_fun_t _op;
- eval::ValueType _type;
- SparseTensorIndex _index;
- std::vector<T> _values;
+ join_fun_t _op;
+ eval::ValueType _type;
+ const SparseTensorIndex & _index;
+ std::vector<T> _values;
SparseTensorAddressBuilder _addressBuilder;
public:
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp
index cb1370475ba..d47ba27d796 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp
@@ -150,7 +150,7 @@ SparseTensorT<T>::add(const Tensor &arg) const
if (!rhs) {
return Tensor::UP();
}
- SparseTensorAdd<T> adder(fast_type(), index(), _values);
+ SparseTensorAdd<T> adder(fast_type(), index().copy(), _values);
rhs->accept(adder);
return adder.build();
}
@@ -164,14 +164,14 @@ SparseTensorT<T>::apply(const CellFunction &func) const
for (T v : _values) {
new_values.push_back(func.apply(v));
}
- return std::make_unique<SparseTensorT<T>>(fast_type(), index(), std::move(new_values));
+ return std::make_unique<SparseTensorT<T>>(fast_type(), index().copy(), std::move(new_values));
}
template<typename T>
Tensor::UP
SparseTensorT<T>::clone() const
{
- return std::make_unique<SparseTensorT<T>>(fast_type(), index(), _values);
+ return std::make_unique<SparseTensorT<T>>(fast_type(), index().shrunk_copy(), _values);
}
template<typename T>
@@ -245,6 +245,21 @@ SparseTensorT<T>::get_memory_usage() const
return result;
}
+template<typename T>
+bool
+SparseTensorT<T>::should_shrink() const
+{
+ auto mem_use = get_memory_usage();
+ return (mem_use.usedBytes() * 3 < mem_use.allocatedBytes());
+}
+
+template<typename T>
+Tensor::UP
+SparseTensorT<T>::shrink() const
+{
+ return std::make_unique<SparseTensorT<T>>(fast_type(), index().shrunk_copy(), _values);
+}
+
template class SparseTensorT<float>;
template class SparseTensorT<double>;
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h
index abe2c429d53..5a9aaa224c6 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h
@@ -36,6 +36,8 @@ public:
Tensor::UP reduce(join_fun_t op, const std::vector<vespalib::string> &dimensions) const override;
Tensor::UP remove(const CellValues &cellAddresses) const override;
MemoryUsage get_memory_usage() const override;
+ bool should_shrink() const;
+ Tensor::UP shrink() const;
};
}