From bcd1eb0bc951312509cccddd5efbc19bb939e703 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Tue, 8 Feb 2022 22:02:29 +0000 Subject: vector of string_id tends to become very large. Use mmap allocation automatically. --- eval/src/vespa/eval/eval/fast_addr_map.h | 9 +++--- eval/src/vespa/eval/eval/fast_value.hpp | 33 +++++++++++++--------- eval/src/vespa/eval/eval/memory_usage_stuff.h | 5 ++-- eval/src/vespa/eval/streamed/streamed_value.h | 2 +- .../src/vespa/eval/streamed/streamed_value_index.h | 4 +-- .../src/vespa/eval/streamed/streamed_value_utils.h | 8 +++--- eval/src/vespa/eval/streamed/streamed_value_view.h | 2 +- 7 files changed, 34 insertions(+), 29 deletions(-) (limited to 'eval') diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h index a3b73afba6d..b9bc39ad619 100644 --- a/eval/src/vespa/eval/eval/fast_addr_map.h +++ b/eval/src/vespa/eval/eval/fast_addr_map.h @@ -7,7 +7,6 @@ #include #include #include -#include namespace vespalib::eval { @@ -62,8 +61,8 @@ public: // view able to convert tags into sparse addresses struct LabelView { size_t addr_size; - const std::vector &labels; - LabelView(size_t num_mapped_dims, const std::vector &labels_in) + const StringIdVector &labels; + LabelView(size_t num_mapped_dims, const StringIdVector &labels_in) : addr_size(num_mapped_dims), labels(labels_in) {} ConstArrayRef get_addr(size_t idx) const { return {&labels[idx * addr_size], addr_size}; @@ -105,7 +104,7 @@ private: HashType _map; public: - FastAddrMap(size_t num_mapped_dims, const std::vector &labels_in, size_t expected_subspaces) + FastAddrMap(size_t num_mapped_dims, const StringIdVector &labels_in, size_t expected_subspaces) : _labels(num_mapped_dims, labels_in), _map(expected_subspaces * 2, Hash(), Equal(_labels)) {} ~FastAddrMap(); @@ -117,7 +116,7 @@ public: ConstArrayRef get_addr(size_t idx) const { return _labels.get_addr(idx); } size_t size() const { return _map.size(); } constexpr size_t addr_size() const { return _labels.addr_size; } - const std::vector &labels() const { return _labels.labels; } + const StringIdVector &labels() const { return _labels.labels; } template size_t lookup(ConstArrayRef addr, uint32_t hash) const { // assert(addr_size() == addr.size()); diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index 185529b2f51..591350347e1 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -139,7 +139,7 @@ struct FastIterateView : public Value::Index::View { // operations by calling inline functions directly. struct FastValueIndex final : Value::Index { FastAddrMap map; - FastValueIndex(size_t num_mapped_dims_in, const std::vector &labels, size_t expected_subspaces_in) + FastValueIndex(size_t num_mapped_dims_in, const StringIdVector &labels, size_t expected_subspaces_in) : map(num_mapped_dims_in, labels, expected_subspaces_in) {} size_t size() const override { return map.size(); } std::unique_ptr create_view(ConstArrayRef dims) const override; @@ -213,13 +213,12 @@ struct FastCells { template struct FastValue final : Value, ValueBuilder { - using Handles = typename std::conditional, + StringIdVector, SharedStringRepo::Handles>::type; - static const std::vector &get_view(const std::vector &handles) { return handles; } - static const std::vector &get_view(const SharedStringRepo::Handles &handles) { return handles.view(); } + static const StringIdVector &get_view(const StringIdVector &handles) { return handles; } + static const StringIdVector &get_view(const SharedStringRepo::Handles &handles) { return handles.view(); } ValueType my_type; size_t my_subspace_size; @@ -227,14 +226,7 @@ struct FastValue final : Value, ValueBuilder { FastValueIndex my_index; FastCells my_cells; - FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in) - : my_type(type_in), my_subspace_size(subspace_size_in), - my_handles(), - my_index(num_mapped_dims_in, get_view(my_handles), expected_subspaces_in), - my_cells(subspace_size_in * expected_subspaces_in) - { - my_handles.reserve(expected_subspaces_in * num_mapped_dims_in); - } + FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in); ~FastValue() override; const ValueType &type() const override { return my_type; } const Value::Index &index() const override { return my_index; } @@ -310,7 +302,20 @@ struct FastValue final : Value, ValueBuilder { return usage; } }; -template FastValue::~FastValue() = default; + +template +FastValue::FastValue(const ValueType &type_in, size_t num_mapped_dims_in, + size_t subspace_size_in, size_t expected_subspaces_in) + : my_type(type_in), my_subspace_size(subspace_size_in), + my_handles(), + my_index(num_mapped_dims_in, get_view(my_handles), expected_subspaces_in), + my_cells(subspace_size_in * expected_subspaces_in) +{ + my_handles.reserve(expected_subspaces_in * num_mapped_dims_in); +} + +template +FastValue::~FastValue() = default; //----------------------------------------------------------------------------- diff --git a/eval/src/vespa/eval/eval/memory_usage_stuff.h b/eval/src/vespa/eval/eval/memory_usage_stuff.h index 79a4cfb0eda..10f5459b0e7 100644 --- a/eval/src/vespa/eval/eval/memory_usage_stuff.h +++ b/eval/src/vespa/eval/eval/memory_usage_stuff.h @@ -10,8 +10,9 @@ namespace vespalib::eval { template MemoryUsage self_memory_usage() { return MemoryUsage(sizeof(T), sizeof(T), 0, 0); } -template -MemoryUsage vector_extra_memory_usage(const std::vector &vec) { +template +MemoryUsage vector_extra_memory_usage(const V &vec) { + using T = typename V::value_type; MemoryUsage usage; usage.incAllocatedBytes(sizeof(T) * vec.capacity()); usage.incUsedBytes(sizeof(T) * vec.size()); diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h index ef44eeafce3..a1927e793b2 100644 --- a/eval/src/vespa/eval/streamed/streamed_value.h +++ b/eval/src/vespa/eval/streamed/streamed_value.h @@ -2,10 +2,10 @@ #pragma once +#include "streamed_value_index.h" #include #include #include -#include "streamed_value_index.h" #include namespace vespalib::eval { diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h index 724874af577..d3f0f2781c4 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_index.h +++ b/eval/src/vespa/eval/streamed/streamed_value_index.h @@ -16,10 +16,10 @@ class StreamedValueIndex : public Value::Index private: uint32_t _num_mapped_dims; uint32_t _num_subspaces; - const std::vector &_labels_ref; + const StringIdVector &_labels_ref; public: - StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector &labels_ref) + StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const StringIdVector &labels_ref) : _num_mapped_dims(num_mapped_dims), _num_subspaces(num_subspaces), _labels_ref(labels_ref) diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h index efcaf4c6e7a..b808ad3c573 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_utils.h +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h @@ -13,9 +13,9 @@ namespace vespalib::eval { * Reading more labels than available will trigger an assert. **/ struct LabelStream { - const std::vector &source; + const StringIdVector &source; size_t pos; - LabelStream(const std::vector &data) : source(data), pos(0) {} + LabelStream(const StringIdVector &data) : source(data), pos(0) {} string_id next_label() { assert(pos < source.size()); return source[pos++]; @@ -42,7 +42,7 @@ private: size_t _num_subspaces; LabelStream _labels; size_t _subspace_index; - std::vector _current_address; + StringIdVector _current_address; public: LabelBlock next_block() { if (_subspace_index < _num_subspaces) { @@ -61,7 +61,7 @@ public: } LabelBlockStream(uint32_t num_subspaces, - const std::vector &labels, + const StringIdVector &labels, uint32_t num_mapped_dims) : _num_subspaces(num_subspaces), _labels(labels), diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h index 908176d5cac..53b9e733de5 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_view.h +++ b/eval/src/vespa/eval/streamed/streamed_value_view.h @@ -24,7 +24,7 @@ private: public: StreamedValueView(const ValueType &type, size_t num_mapped_dimensions, TypedCells cells, size_t num_subspaces, - const std::vector &labels) + const StringIdVector &labels) : _type(type), _cells_ref(cells), _my_index(num_mapped_dimensions, num_subspaces, labels) -- cgit v1.2.3