aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåvard Pettersen <3535158+havardpe@users.noreply.github.com>2022-02-09 10:27:48 +0100
committerGitHub <noreply@github.com>2022-02-09 10:27:48 +0100
commit0eeda7e810af1aeb56773b5dec49843705b61f78 (patch)
tree581fe95da2833130e3e81b448d92438e39dbfaf4
parent6d15a9668c566483e2e169701db1231897f9e407 (diff)
parentbcd1eb0bc951312509cccddd5efbc19bb939e703 (diff)
Merge pull request #21112 from vespa-engine/balder/use-mmap-for-large-vectors-3
vector of string_id tends to become very large. Use mmap allocation a…
-rw-r--r--eval/src/vespa/eval/eval/fast_addr_map.h9
-rw-r--r--eval/src/vespa/eval/eval/fast_value.hpp33
-rw-r--r--eval/src/vespa/eval/eval/memory_usage_stuff.h5
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value.h2
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_index.h4
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_utils.h8
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_view.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp6
-rw-r--r--vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp6
-rw-r--r--vespalib/src/vespa/vespalib/util/shared_string_repo.h4
-rw-r--r--vespalib/src/vespa/vespalib/util/string_id.h5
11 files changed, 46 insertions, 38 deletions
diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h
index a3b73afba6d..b9bc39ad619 100644
--- a/eval/src/vespa/eval/eval/fast_addr_map.h
+++ b/eval/src/vespa/eval/eval/fast_addr_map.h
@@ -7,7 +7,6 @@
#include <vespa/vespalib/util/string_id.h>
#include <vespa/vespalib/stllike/identity.h>
#include <vespa/vespalib/stllike/hashtable.h>
-#include <vector>
namespace vespalib::eval {
@@ -62,8 +61,8 @@ public:
// view able to convert tags into sparse addresses
struct LabelView {
size_t addr_size;
- const std::vector<string_id> &labels;
- LabelView(size_t num_mapped_dims, const std::vector<string_id> &labels_in)
+ const StringIdVector &labels;
+ LabelView(size_t num_mapped_dims, const StringIdVector &labels_in)
: addr_size(num_mapped_dims), labels(labels_in) {}
ConstArrayRef<string_id> get_addr(size_t idx) const {
return {&labels[idx * addr_size], addr_size};
@@ -105,7 +104,7 @@ private:
HashType _map;
public:
- FastAddrMap(size_t num_mapped_dims, const std::vector<string_id> &labels_in, size_t expected_subspaces)
+ FastAddrMap(size_t num_mapped_dims, const StringIdVector &labels_in, size_t expected_subspaces)
: _labels(num_mapped_dims, labels_in),
_map(expected_subspaces * 2, Hash(), Equal(_labels)) {}
~FastAddrMap();
@@ -117,7 +116,7 @@ public:
ConstArrayRef<string_id> get_addr(size_t idx) const { return _labels.get_addr(idx); }
size_t size() const { return _map.size(); }
constexpr size_t addr_size() const { return _labels.addr_size; }
- const std::vector<string_id> &labels() const { return _labels.labels; }
+ const StringIdVector &labels() const { return _labels.labels; }
template <typename T>
size_t lookup(ConstArrayRef<T> addr, uint32_t hash) const {
// assert(addr_size() == addr.size());
diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp
index 185529b2f51..591350347e1 100644
--- a/eval/src/vespa/eval/eval/fast_value.hpp
+++ b/eval/src/vespa/eval/eval/fast_value.hpp
@@ -139,7 +139,7 @@ struct FastIterateView : public Value::Index::View {
// operations by calling inline functions directly.
struct FastValueIndex final : Value::Index {
FastAddrMap map;
- FastValueIndex(size_t num_mapped_dims_in, const std::vector<string_id> &labels, size_t expected_subspaces_in)
+ FastValueIndex(size_t num_mapped_dims_in, const StringIdVector &labels, size_t expected_subspaces_in)
: map(num_mapped_dims_in, labels, expected_subspaces_in) {}
size_t size() const override { return map.size(); }
std::unique_ptr<View> create_view(ConstArrayRef<size_t> dims) const override;
@@ -213,13 +213,12 @@ struct FastCells {
template <typename T, bool transient>
struct FastValue final : Value, ValueBuilder<T> {
-
using Handles = typename std::conditional<transient,
- std::vector<string_id>,
+ StringIdVector,
SharedStringRepo::Handles>::type;
- static const std::vector<string_id> &get_view(const std::vector<string_id> &handles) { return handles; }
- static const std::vector<string_id> &get_view(const SharedStringRepo::Handles &handles) { return handles.view(); }
+ static const StringIdVector &get_view(const StringIdVector &handles) { return handles; }
+ static const StringIdVector &get_view(const SharedStringRepo::Handles &handles) { return handles.view(); }
ValueType my_type;
size_t my_subspace_size;
@@ -227,14 +226,7 @@ struct FastValue final : Value, ValueBuilder<T> {
FastValueIndex my_index;
FastCells<T> my_cells;
- FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in)
- : my_type(type_in), my_subspace_size(subspace_size_in),
- my_handles(),
- my_index(num_mapped_dims_in, get_view(my_handles), expected_subspaces_in),
- my_cells(subspace_size_in * expected_subspaces_in)
- {
- my_handles.reserve(expected_subspaces_in * num_mapped_dims_in);
- }
+ FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in);
~FastValue() override;
const ValueType &type() const override { return my_type; }
const Value::Index &index() const override { return my_index; }
@@ -310,7 +302,20 @@ struct FastValue final : Value, ValueBuilder<T> {
return usage;
}
};
-template <typename T,bool transient> FastValue<T,transient>::~FastValue() = default;
+
+template <typename T,bool transient>
+FastValue<T,transient>::FastValue(const ValueType &type_in, size_t num_mapped_dims_in,
+ size_t subspace_size_in, size_t expected_subspaces_in)
+ : my_type(type_in), my_subspace_size(subspace_size_in),
+ my_handles(),
+ my_index(num_mapped_dims_in, get_view(my_handles), expected_subspaces_in),
+ my_cells(subspace_size_in * expected_subspaces_in)
+{
+ my_handles.reserve(expected_subspaces_in * num_mapped_dims_in);
+}
+
+template <typename T,bool transient>
+FastValue<T,transient>::~FastValue() = default;
//-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/eval/memory_usage_stuff.h b/eval/src/vespa/eval/eval/memory_usage_stuff.h
index 79a4cfb0eda..10f5459b0e7 100644
--- a/eval/src/vespa/eval/eval/memory_usage_stuff.h
+++ b/eval/src/vespa/eval/eval/memory_usage_stuff.h
@@ -10,8 +10,9 @@ namespace vespalib::eval {
template <typename T>
MemoryUsage self_memory_usage() { return MemoryUsage(sizeof(T), sizeof(T), 0, 0); }
-template <typename T>
-MemoryUsage vector_extra_memory_usage(const std::vector<T> &vec) {
+template <typename V>
+MemoryUsage vector_extra_memory_usage(const V &vec) {
+ using T = typename V::value_type;
MemoryUsage usage;
usage.incAllocatedBytes(sizeof(T) * vec.capacity());
usage.incUsedBytes(sizeof(T) * vec.size());
diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h
index ef44eeafce3..a1927e793b2 100644
--- a/eval/src/vespa/eval/streamed/streamed_value.h
+++ b/eval/src/vespa/eval/streamed/streamed_value.h
@@ -2,10 +2,10 @@
#pragma once
+#include "streamed_value_index.h"
#include <vespa/eval/eval/value_type.h>
#include <vespa/eval/eval/value.h>
#include <vespa/vespalib/util/shared_string_repo.h>
-#include "streamed_value_index.h"
#include <cassert>
namespace vespalib::eval {
diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h
index 724874af577..d3f0f2781c4 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_index.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_index.h
@@ -16,10 +16,10 @@ class StreamedValueIndex : public Value::Index
private:
uint32_t _num_mapped_dims;
uint32_t _num_subspaces;
- const std::vector<string_id> &_labels_ref;
+ const StringIdVector &_labels_ref;
public:
- StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector<string_id> &labels_ref)
+ StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const StringIdVector &labels_ref)
: _num_mapped_dims(num_mapped_dims),
_num_subspaces(num_subspaces),
_labels_ref(labels_ref)
diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h
index efcaf4c6e7a..b808ad3c573 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_utils.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h
@@ -13,9 +13,9 @@ namespace vespalib::eval {
* Reading more labels than available will trigger an assert.
**/
struct LabelStream {
- const std::vector<string_id> &source;
+ const StringIdVector &source;
size_t pos;
- LabelStream(const std::vector<string_id> &data) : source(data), pos(0) {}
+ LabelStream(const StringIdVector &data) : source(data), pos(0) {}
string_id next_label() {
assert(pos < source.size());
return source[pos++];
@@ -42,7 +42,7 @@ private:
size_t _num_subspaces;
LabelStream _labels;
size_t _subspace_index;
- std::vector<string_id> _current_address;
+ StringIdVector _current_address;
public:
LabelBlock next_block() {
if (_subspace_index < _num_subspaces) {
@@ -61,7 +61,7 @@ public:
}
LabelBlockStream(uint32_t num_subspaces,
- const std::vector<string_id> &labels,
+ const StringIdVector &labels,
uint32_t num_mapped_dims)
: _num_subspaces(num_subspaces),
_labels(labels),
diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h
index 908176d5cac..53b9e733de5 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_view.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_view.h
@@ -24,7 +24,7 @@ private:
public:
StreamedValueView(const ValueType &type, size_t num_mapped_dimensions,
TypedCells cells, size_t num_subspaces,
- const std::vector<string_id> &labels)
+ const StringIdVector &labels)
: _type(type),
_cells_ref(cells),
_my_index(num_mapped_dimensions, num_subspaces, labels)
diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
index 0901e643afa..2e6d771a870 100644
--- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
@@ -11,7 +11,6 @@
#include <vespa/vespalib/objects/nbostream.h>
#include <vespa/vespalib/util/size_literals.h>
#include <vespa/vespalib/util/typify.h>
-#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.tensor.streamed_value_store");
@@ -22,6 +21,7 @@ using namespace vespalib::eval;
using vespalib::ConstArrayRef;
using vespalib::MemoryUsage;
using vespalib::string_id;
+using vespalib::StringIdVector;
namespace search::tensor {
@@ -61,12 +61,12 @@ struct MyFastValueView final : Value {
const ValueType &my_type;
FastValueIndex my_index;
TypedCells my_cells;
- MyFastValueView(const ValueType &type_ref, const std::vector<string_id> &handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces)
+ MyFastValueView(const ValueType &type_ref, const StringIdVector &handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces)
: my_type(type_ref),
my_index(num_mapped, handle_view, num_spaces),
my_cells(cells)
{
- const std::vector<string_id> &labels = handle_view;
+ const StringIdVector &labels = handle_view;
for (size_t i = 0; i < num_spaces; ++i) {
ConstArrayRef<string_id> addr(&labels[i * num_mapped], num_mapped);
my_index.map.add_mapping(FastAddrMap::hash_labels(addr));
diff --git a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp
index 81c8271f755..5b267c3b9e9 100644
--- a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp
+++ b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp
@@ -101,8 +101,8 @@ std::unique_ptr<Handles> copy_strong_handles(const Handles &handles) {
return result;
}
-std::unique_ptr<std::vector<string_id>> make_weak_handles(const Handles &handles) {
- return std::make_unique<std::vector<string_id>>(handles.view());
+std::unique_ptr<StringIdVector> make_weak_handles(const Handles &handles) {
+ return std::make_unique<StringIdVector>(handles.view());
}
//-----------------------------------------------------------------------------
@@ -202,7 +202,7 @@ struct Fixture {
std::vector<vespalib::string> get_direct_result;
std::unique_ptr<Handles> strong;
std::unique_ptr<Handles> strong_copy;
- std::unique_ptr<std::vector<string_id>> weak;
+ std::unique_ptr<StringIdVector> weak;
auto copy_strings_task = [&](){ copy_strings_result = copy_strings(work); };
auto copy_and_hash_task = [&](){ copy_and_hash_result = copy_and_hash(work); };
auto local_enum_task = [&](){ local_enum_result = local_enum(work); };
diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h
index ec65b942d88..d5faa0dfb0d 100644
--- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h
+++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h
@@ -291,7 +291,7 @@ public:
// A collection of string handles with ownership
class Handles {
private:
- std::vector<string_id> _handles;
+ StringIdVector _handles;
public:
Handles();
Handles(Handles &&rhs);
@@ -309,7 +309,7 @@ public:
string_id id = _repo.copy(handle);
_handles.push_back(id);
}
- const std::vector<string_id> &view() const { return _handles; }
+ const StringIdVector &view() const { return _handles; }
};
};
diff --git a/vespalib/src/vespa/vespalib/util/string_id.h b/vespalib/src/vespa/vespalib/util/string_id.h
index 7a72feee64a..7fec1da0bb8 100644
--- a/vespalib/src/vespa/vespalib/util/string_id.h
+++ b/vespalib/src/vespa/vespalib/util/string_id.h
@@ -2,7 +2,8 @@
#pragma once
-#include <cstdint>
+#include <vespa/vespalib/stllike/allocator.h>
+#include <vector>
namespace vespalib {
@@ -38,4 +39,6 @@ public:
constexpr bool operator!=(const string_id &rhs) const noexcept { return (_id != rhs._id); }
};
+using StringIdVector = std::vector<string_id, vespalib::allocator_large<string_id>>;
+
}