From bcd1eb0bc951312509cccddd5efbc19bb939e703 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Tue, 8 Feb 2022 22:02:29 +0000 Subject: vector of string_id tends to become very large. Use mmap allocation automatically. --- eval/src/vespa/eval/eval/fast_addr_map.h | 9 +++--- eval/src/vespa/eval/eval/fast_value.hpp | 33 +++++++++++++--------- eval/src/vespa/eval/eval/memory_usage_stuff.h | 5 ++-- eval/src/vespa/eval/streamed/streamed_value.h | 2 +- .../src/vespa/eval/streamed/streamed_value_index.h | 4 +-- .../src/vespa/eval/streamed/streamed_value_utils.h | 8 +++--- eval/src/vespa/eval/streamed/streamed_value_view.h | 2 +- .../searchlib/tensor/streamed_value_store.cpp | 6 ++-- .../shared_string_repo/shared_string_repo_test.cpp | 6 ++-- .../src/vespa/vespalib/util/shared_string_repo.h | 4 +-- vespalib/src/vespa/vespalib/util/string_id.h | 5 +++- 11 files changed, 46 insertions(+), 38 deletions(-) diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h index a3b73afba6d..b9bc39ad619 100644 --- a/eval/src/vespa/eval/eval/fast_addr_map.h +++ b/eval/src/vespa/eval/eval/fast_addr_map.h @@ -7,7 +7,6 @@ #include #include #include -#include namespace vespalib::eval { @@ -62,8 +61,8 @@ public: // view able to convert tags into sparse addresses struct LabelView { size_t addr_size; - const std::vector &labels; - LabelView(size_t num_mapped_dims, const std::vector &labels_in) + const StringIdVector &labels; + LabelView(size_t num_mapped_dims, const StringIdVector &labels_in) : addr_size(num_mapped_dims), labels(labels_in) {} ConstArrayRef get_addr(size_t idx) const { return {&labels[idx * addr_size], addr_size}; @@ -105,7 +104,7 @@ private: HashType _map; public: - FastAddrMap(size_t num_mapped_dims, const std::vector &labels_in, size_t expected_subspaces) + FastAddrMap(size_t num_mapped_dims, const StringIdVector &labels_in, size_t expected_subspaces) : _labels(num_mapped_dims, labels_in), _map(expected_subspaces * 2, Hash(), Equal(_labels)) {} ~FastAddrMap(); @@ -117,7 +116,7 @@ public: ConstArrayRef get_addr(size_t idx) const { return _labels.get_addr(idx); } size_t size() const { return _map.size(); } constexpr size_t addr_size() const { return _labels.addr_size; } - const std::vector &labels() const { return _labels.labels; } + const StringIdVector &labels() const { return _labels.labels; } template size_t lookup(ConstArrayRef addr, uint32_t hash) const { // assert(addr_size() == addr.size()); diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index 185529b2f51..591350347e1 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -139,7 +139,7 @@ struct FastIterateView : public Value::Index::View { // operations by calling inline functions directly. struct FastValueIndex final : Value::Index { FastAddrMap map; - FastValueIndex(size_t num_mapped_dims_in, const std::vector &labels, size_t expected_subspaces_in) + FastValueIndex(size_t num_mapped_dims_in, const StringIdVector &labels, size_t expected_subspaces_in) : map(num_mapped_dims_in, labels, expected_subspaces_in) {} size_t size() const override { return map.size(); } std::unique_ptr create_view(ConstArrayRef dims) const override; @@ -213,13 +213,12 @@ struct FastCells { template struct FastValue final : Value, ValueBuilder { - using Handles = typename std::conditional, + StringIdVector, SharedStringRepo::Handles>::type; - static const std::vector &get_view(const std::vector &handles) { return handles; } - static const std::vector &get_view(const SharedStringRepo::Handles &handles) { return handles.view(); } + static const StringIdVector &get_view(const StringIdVector &handles) { return handles; } + static const StringIdVector &get_view(const SharedStringRepo::Handles &handles) { return handles.view(); } ValueType my_type; size_t my_subspace_size; @@ -227,14 +226,7 @@ struct FastValue final : Value, ValueBuilder { FastValueIndex my_index; FastCells my_cells; - FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in) - : my_type(type_in), my_subspace_size(subspace_size_in), - my_handles(), - my_index(num_mapped_dims_in, get_view(my_handles), expected_subspaces_in), - my_cells(subspace_size_in * expected_subspaces_in) - { - my_handles.reserve(expected_subspaces_in * num_mapped_dims_in); - } + FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in); ~FastValue() override; const ValueType &type() const override { return my_type; } const Value::Index &index() const override { return my_index; } @@ -310,7 +302,20 @@ struct FastValue final : Value, ValueBuilder { return usage; } }; -template FastValue::~FastValue() = default; + +template +FastValue::FastValue(const ValueType &type_in, size_t num_mapped_dims_in, + size_t subspace_size_in, size_t expected_subspaces_in) + : my_type(type_in), my_subspace_size(subspace_size_in), + my_handles(), + my_index(num_mapped_dims_in, get_view(my_handles), expected_subspaces_in), + my_cells(subspace_size_in * expected_subspaces_in) +{ + my_handles.reserve(expected_subspaces_in * num_mapped_dims_in); +} + +template +FastValue::~FastValue() = default; //----------------------------------------------------------------------------- diff --git a/eval/src/vespa/eval/eval/memory_usage_stuff.h b/eval/src/vespa/eval/eval/memory_usage_stuff.h index 79a4cfb0eda..10f5459b0e7 100644 --- a/eval/src/vespa/eval/eval/memory_usage_stuff.h +++ b/eval/src/vespa/eval/eval/memory_usage_stuff.h @@ -10,8 +10,9 @@ namespace vespalib::eval { template MemoryUsage self_memory_usage() { return MemoryUsage(sizeof(T), sizeof(T), 0, 0); } -template -MemoryUsage vector_extra_memory_usage(const std::vector &vec) { +template +MemoryUsage vector_extra_memory_usage(const V &vec) { + using T = typename V::value_type; MemoryUsage usage; usage.incAllocatedBytes(sizeof(T) * vec.capacity()); usage.incUsedBytes(sizeof(T) * vec.size()); diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h index ef44eeafce3..a1927e793b2 100644 --- a/eval/src/vespa/eval/streamed/streamed_value.h +++ b/eval/src/vespa/eval/streamed/streamed_value.h @@ -2,10 +2,10 @@ #pragma once +#include "streamed_value_index.h" #include #include #include -#include "streamed_value_index.h" #include namespace vespalib::eval { diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h index 724874af577..d3f0f2781c4 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_index.h +++ b/eval/src/vespa/eval/streamed/streamed_value_index.h @@ -16,10 +16,10 @@ class StreamedValueIndex : public Value::Index private: uint32_t _num_mapped_dims; uint32_t _num_subspaces; - const std::vector &_labels_ref; + const StringIdVector &_labels_ref; public: - StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector &labels_ref) + StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const StringIdVector &labels_ref) : _num_mapped_dims(num_mapped_dims), _num_subspaces(num_subspaces), _labels_ref(labels_ref) diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h index efcaf4c6e7a..b808ad3c573 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_utils.h +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h @@ -13,9 +13,9 @@ namespace vespalib::eval { * Reading more labels than available will trigger an assert. **/ struct LabelStream { - const std::vector &source; + const StringIdVector &source; size_t pos; - LabelStream(const std::vector &data) : source(data), pos(0) {} + LabelStream(const StringIdVector &data) : source(data), pos(0) {} string_id next_label() { assert(pos < source.size()); return source[pos++]; @@ -42,7 +42,7 @@ private: size_t _num_subspaces; LabelStream _labels; size_t _subspace_index; - std::vector _current_address; + StringIdVector _current_address; public: LabelBlock next_block() { if (_subspace_index < _num_subspaces) { @@ -61,7 +61,7 @@ public: } LabelBlockStream(uint32_t num_subspaces, - const std::vector &labels, + const StringIdVector &labels, uint32_t num_mapped_dims) : _num_subspaces(num_subspaces), _labels(labels), diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h index 908176d5cac..53b9e733de5 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_view.h +++ b/eval/src/vespa/eval/streamed/streamed_value_view.h @@ -24,7 +24,7 @@ private: public: StreamedValueView(const ValueType &type, size_t num_mapped_dimensions, TypedCells cells, size_t num_subspaces, - const std::vector &labels) + const StringIdVector &labels) : _type(type), _cells_ref(cells), _my_index(num_mapped_dimensions, num_subspaces, labels) diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp index 0901e643afa..2e6d771a870 100644 --- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include LOG_SETUP(".searchlib.tensor.streamed_value_store"); @@ -22,6 +21,7 @@ using namespace vespalib::eval; using vespalib::ConstArrayRef; using vespalib::MemoryUsage; using vespalib::string_id; +using vespalib::StringIdVector; namespace search::tensor { @@ -61,12 +61,12 @@ struct MyFastValueView final : Value { const ValueType &my_type; FastValueIndex my_index; TypedCells my_cells; - MyFastValueView(const ValueType &type_ref, const std::vector &handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces) + MyFastValueView(const ValueType &type_ref, const StringIdVector &handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces) : my_type(type_ref), my_index(num_mapped, handle_view, num_spaces), my_cells(cells) { - const std::vector &labels = handle_view; + const StringIdVector &labels = handle_view; for (size_t i = 0; i < num_spaces; ++i) { ConstArrayRef addr(&labels[i * num_mapped], num_mapped); my_index.map.add_mapping(FastAddrMap::hash_labels(addr)); diff --git a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp index 81c8271f755..5b267c3b9e9 100644 --- a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp +++ b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp @@ -101,8 +101,8 @@ std::unique_ptr copy_strong_handles(const Handles &handles) { return result; } -std::unique_ptr> make_weak_handles(const Handles &handles) { - return std::make_unique>(handles.view()); +std::unique_ptr make_weak_handles(const Handles &handles) { + return std::make_unique(handles.view()); } //----------------------------------------------------------------------------- @@ -202,7 +202,7 @@ struct Fixture { std::vector get_direct_result; std::unique_ptr strong; std::unique_ptr strong_copy; - std::unique_ptr> weak; + std::unique_ptr weak; auto copy_strings_task = [&](){ copy_strings_result = copy_strings(work); }; auto copy_and_hash_task = [&](){ copy_and_hash_result = copy_and_hash(work); }; auto local_enum_task = [&](){ local_enum_result = local_enum(work); }; diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h index ec65b942d88..d5faa0dfb0d 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h @@ -291,7 +291,7 @@ public: // A collection of string handles with ownership class Handles { private: - std::vector _handles; + StringIdVector _handles; public: Handles(); Handles(Handles &&rhs); @@ -309,7 +309,7 @@ public: string_id id = _repo.copy(handle); _handles.push_back(id); } - const std::vector &view() const { return _handles; } + const StringIdVector &view() const { return _handles; } }; }; diff --git a/vespalib/src/vespa/vespalib/util/string_id.h b/vespalib/src/vespa/vespalib/util/string_id.h index 7a72feee64a..7fec1da0bb8 100644 --- a/vespalib/src/vespa/vespalib/util/string_id.h +++ b/vespalib/src/vespa/vespalib/util/string_id.h @@ -2,7 +2,8 @@ #pragma once -#include +#include +#include namespace vespalib { @@ -38,4 +39,6 @@ public: constexpr bool operator!=(const string_id &rhs) const noexcept { return (_id != rhs._id); } }; +using StringIdVector = std::vector>; + } -- cgit v1.2.3