diff options
author | Håvard Pettersen <3535158+havardpe@users.noreply.github.com> | 2022-02-09 10:27:48 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-09 10:27:48 +0100 |
commit | 0eeda7e810af1aeb56773b5dec49843705b61f78 (patch) | |
tree | 581fe95da2833130e3e81b448d92438e39dbfaf4 | |
parent | 6d15a9668c566483e2e169701db1231897f9e407 (diff) | |
parent | bcd1eb0bc951312509cccddd5efbc19bb939e703 (diff) |
Merge pull request #21112 from vespa-engine/balder/use-mmap-for-large-vectors-3
vector of string_id tends to become very large. Use mmap allocation a…
11 files changed, 46 insertions, 38 deletions
diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h index a3b73afba6d..b9bc39ad619 100644 --- a/eval/src/vespa/eval/eval/fast_addr_map.h +++ b/eval/src/vespa/eval/eval/fast_addr_map.h @@ -7,7 +7,6 @@ #include <vespa/vespalib/util/string_id.h> #include <vespa/vespalib/stllike/identity.h> #include <vespa/vespalib/stllike/hashtable.h> -#include <vector> namespace vespalib::eval { @@ -62,8 +61,8 @@ public: // view able to convert tags into sparse addresses struct LabelView { size_t addr_size; - const std::vector<string_id> &labels; - LabelView(size_t num_mapped_dims, const std::vector<string_id> &labels_in) + const StringIdVector &labels; + LabelView(size_t num_mapped_dims, const StringIdVector &labels_in) : addr_size(num_mapped_dims), labels(labels_in) {} ConstArrayRef<string_id> get_addr(size_t idx) const { return {&labels[idx * addr_size], addr_size}; @@ -105,7 +104,7 @@ private: HashType _map; public: - FastAddrMap(size_t num_mapped_dims, const std::vector<string_id> &labels_in, size_t expected_subspaces) + FastAddrMap(size_t num_mapped_dims, const StringIdVector &labels_in, size_t expected_subspaces) : _labels(num_mapped_dims, labels_in), _map(expected_subspaces * 2, Hash(), Equal(_labels)) {} ~FastAddrMap(); @@ -117,7 +116,7 @@ public: ConstArrayRef<string_id> get_addr(size_t idx) const { return _labels.get_addr(idx); } size_t size() const { return _map.size(); } constexpr size_t addr_size() const { return _labels.addr_size; } - const std::vector<string_id> &labels() const { return _labels.labels; } + const StringIdVector &labels() const { return _labels.labels; } template <typename T> size_t lookup(ConstArrayRef<T> addr, uint32_t hash) const { // assert(addr_size() == addr.size()); diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index 185529b2f51..591350347e1 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -139,7 +139,7 @@ struct FastIterateView : public Value::Index::View { // operations by calling inline functions directly. struct FastValueIndex final : Value::Index { FastAddrMap map; - FastValueIndex(size_t num_mapped_dims_in, const std::vector<string_id> &labels, size_t expected_subspaces_in) + FastValueIndex(size_t num_mapped_dims_in, const StringIdVector &labels, size_t expected_subspaces_in) : map(num_mapped_dims_in, labels, expected_subspaces_in) {} size_t size() const override { return map.size(); } std::unique_ptr<View> create_view(ConstArrayRef<size_t> dims) const override; @@ -213,13 +213,12 @@ struct FastCells { template <typename T, bool transient> struct FastValue final : Value, ValueBuilder<T> { - using Handles = typename std::conditional<transient, - std::vector<string_id>, + StringIdVector, SharedStringRepo::Handles>::type; - static const std::vector<string_id> &get_view(const std::vector<string_id> &handles) { return handles; } - static const std::vector<string_id> &get_view(const SharedStringRepo::Handles &handles) { return handles.view(); } + static const StringIdVector &get_view(const StringIdVector &handles) { return handles; } + static const StringIdVector &get_view(const SharedStringRepo::Handles &handles) { return handles.view(); } ValueType my_type; size_t my_subspace_size; @@ -227,14 +226,7 @@ struct FastValue final : Value, ValueBuilder<T> { FastValueIndex my_index; FastCells<T> my_cells; - FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in) - : my_type(type_in), my_subspace_size(subspace_size_in), - my_handles(), - my_index(num_mapped_dims_in, get_view(my_handles), expected_subspaces_in), - my_cells(subspace_size_in * expected_subspaces_in) - { - my_handles.reserve(expected_subspaces_in * num_mapped_dims_in); - } + FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in); ~FastValue() override; const ValueType &type() const override { return my_type; } const Value::Index &index() const override { return my_index; } @@ -310,7 +302,20 @@ struct FastValue final : Value, ValueBuilder<T> { return usage; } }; -template <typename T,bool transient> FastValue<T,transient>::~FastValue() = default; + +template <typename T,bool transient> +FastValue<T,transient>::FastValue(const ValueType &type_in, size_t num_mapped_dims_in, + size_t subspace_size_in, size_t expected_subspaces_in) + : my_type(type_in), my_subspace_size(subspace_size_in), + my_handles(), + my_index(num_mapped_dims_in, get_view(my_handles), expected_subspaces_in), + my_cells(subspace_size_in * expected_subspaces_in) +{ + my_handles.reserve(expected_subspaces_in * num_mapped_dims_in); +} + +template <typename T,bool transient> +FastValue<T,transient>::~FastValue() = default; //----------------------------------------------------------------------------- diff --git a/eval/src/vespa/eval/eval/memory_usage_stuff.h b/eval/src/vespa/eval/eval/memory_usage_stuff.h index 79a4cfb0eda..10f5459b0e7 100644 --- a/eval/src/vespa/eval/eval/memory_usage_stuff.h +++ b/eval/src/vespa/eval/eval/memory_usage_stuff.h @@ -10,8 +10,9 @@ namespace vespalib::eval { template <typename T> MemoryUsage self_memory_usage() { return MemoryUsage(sizeof(T), sizeof(T), 0, 0); } -template <typename T> -MemoryUsage vector_extra_memory_usage(const std::vector<T> &vec) { +template <typename V> +MemoryUsage vector_extra_memory_usage(const V &vec) { + using T = typename V::value_type; MemoryUsage usage; usage.incAllocatedBytes(sizeof(T) * vec.capacity()); usage.incUsedBytes(sizeof(T) * vec.size()); diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h index ef44eeafce3..a1927e793b2 100644 --- a/eval/src/vespa/eval/streamed/streamed_value.h +++ b/eval/src/vespa/eval/streamed/streamed_value.h @@ -2,10 +2,10 @@ #pragma once +#include "streamed_value_index.h" #include <vespa/eval/eval/value_type.h> #include <vespa/eval/eval/value.h> #include <vespa/vespalib/util/shared_string_repo.h> -#include "streamed_value_index.h" #include <cassert> namespace vespalib::eval { diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h index 724874af577..d3f0f2781c4 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_index.h +++ b/eval/src/vespa/eval/streamed/streamed_value_index.h @@ -16,10 +16,10 @@ class StreamedValueIndex : public Value::Index private: uint32_t _num_mapped_dims; uint32_t _num_subspaces; - const std::vector<string_id> &_labels_ref; + const StringIdVector &_labels_ref; public: - StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector<string_id> &labels_ref) + StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const StringIdVector &labels_ref) : _num_mapped_dims(num_mapped_dims), _num_subspaces(num_subspaces), _labels_ref(labels_ref) diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h index efcaf4c6e7a..b808ad3c573 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_utils.h +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h @@ -13,9 +13,9 @@ namespace vespalib::eval { * Reading more labels than available will trigger an assert. **/ struct LabelStream { - const std::vector<string_id> &source; + const StringIdVector &source; size_t pos; - LabelStream(const std::vector<string_id> &data) : source(data), pos(0) {} + LabelStream(const StringIdVector &data) : source(data), pos(0) {} string_id next_label() { assert(pos < source.size()); return source[pos++]; @@ -42,7 +42,7 @@ private: size_t _num_subspaces; LabelStream _labels; size_t _subspace_index; - std::vector<string_id> _current_address; + StringIdVector _current_address; public: LabelBlock next_block() { if (_subspace_index < _num_subspaces) { @@ -61,7 +61,7 @@ public: } LabelBlockStream(uint32_t num_subspaces, - const std::vector<string_id> &labels, + const StringIdVector &labels, uint32_t num_mapped_dims) : _num_subspaces(num_subspaces), _labels(labels), diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h index 908176d5cac..53b9e733de5 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_view.h +++ b/eval/src/vespa/eval/streamed/streamed_value_view.h @@ -24,7 +24,7 @@ private: public: StreamedValueView(const ValueType &type, size_t num_mapped_dimensions, TypedCells cells, size_t num_subspaces, - const std::vector<string_id> &labels) + const StringIdVector &labels) : _type(type), _cells_ref(cells), _my_index(num_mapped_dimensions, num_subspaces, labels) diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp index 0901e643afa..2e6d771a870 100644 --- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp @@ -11,7 +11,6 @@ #include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/util/typify.h> -#include <vespa/vespalib/util/stringfmt.h> #include <vespa/log/log.h> LOG_SETUP(".searchlib.tensor.streamed_value_store"); @@ -22,6 +21,7 @@ using namespace vespalib::eval; using vespalib::ConstArrayRef; using vespalib::MemoryUsage; using vespalib::string_id; +using vespalib::StringIdVector; namespace search::tensor { @@ -61,12 +61,12 @@ struct MyFastValueView final : Value { const ValueType &my_type; FastValueIndex my_index; TypedCells my_cells; - MyFastValueView(const ValueType &type_ref, const std::vector<string_id> &handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces) + MyFastValueView(const ValueType &type_ref, const StringIdVector &handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces) : my_type(type_ref), my_index(num_mapped, handle_view, num_spaces), my_cells(cells) { - const std::vector<string_id> &labels = handle_view; + const StringIdVector &labels = handle_view; for (size_t i = 0; i < num_spaces; ++i) { ConstArrayRef<string_id> addr(&labels[i * num_mapped], num_mapped); my_index.map.add_mapping(FastAddrMap::hash_labels(addr)); diff --git a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp index 81c8271f755..5b267c3b9e9 100644 --- a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp +++ b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp @@ -101,8 +101,8 @@ std::unique_ptr<Handles> copy_strong_handles(const Handles &handles) { return result; } -std::unique_ptr<std::vector<string_id>> make_weak_handles(const Handles &handles) { - return std::make_unique<std::vector<string_id>>(handles.view()); +std::unique_ptr<StringIdVector> make_weak_handles(const Handles &handles) { + return std::make_unique<StringIdVector>(handles.view()); } //----------------------------------------------------------------------------- @@ -202,7 +202,7 @@ struct Fixture { std::vector<vespalib::string> get_direct_result; std::unique_ptr<Handles> strong; std::unique_ptr<Handles> strong_copy; - std::unique_ptr<std::vector<string_id>> weak; + std::unique_ptr<StringIdVector> weak; auto copy_strings_task = [&](){ copy_strings_result = copy_strings(work); }; auto copy_and_hash_task = [&](){ copy_and_hash_result = copy_and_hash(work); }; auto local_enum_task = [&](){ local_enum_result = local_enum(work); }; diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h index ec65b942d88..d5faa0dfb0d 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h @@ -291,7 +291,7 @@ public: // A collection of string handles with ownership class Handles { private: - std::vector<string_id> _handles; + StringIdVector _handles; public: Handles(); Handles(Handles &&rhs); @@ -309,7 +309,7 @@ public: string_id id = _repo.copy(handle); _handles.push_back(id); } - const std::vector<string_id> &view() const { return _handles; } + const StringIdVector &view() const { return _handles; } }; }; diff --git a/vespalib/src/vespa/vespalib/util/string_id.h b/vespalib/src/vespa/vespalib/util/string_id.h index 7a72feee64a..7fec1da0bb8 100644 --- a/vespalib/src/vespa/vespalib/util/string_id.h +++ b/vespalib/src/vespa/vespalib/util/string_id.h @@ -2,7 +2,8 @@ #pragma once -#include <cstdint> +#include <vespa/vespalib/stllike/allocator.h> +#include <vector> namespace vespalib { @@ -38,4 +39,6 @@ public: constexpr bool operator!=(const string_id &rhs) const noexcept { return (_id != rhs._id); } }; +using StringIdVector = std::vector<string_id, vespalib::allocator_large<string_id>>; + } |