summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-01-31 06:26:50 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2023-01-31 06:26:50 +0000
commitfa400d2e0bdfabebab312edb08757db0c72005be (patch)
treeea465847eb075bac03a6f31316f01dd1c12b9a68 /vespalib
parente7dcb897a30dafea405efabc13d95277f5a4c7ff (diff)
Move implementation not benefiting from inlining to .cpp file
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/vespa/vespalib/util/shared_string_repo.cpp163
-rw-r--r--vespalib/src/vespa/vespalib/util/shared_string_repo.h201
2 files changed, 202 insertions, 162 deletions
diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp
index e213c1745c6..15ad146c552 100644
--- a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp
+++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp
@@ -1,6 +1,9 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "shared_string_repo.h"
+#include <xxhash.h>
+#include <charconv>
+#include <cassert>
#include <vespa/log/log.h>
LOG_SETUP(".vespalib.shared_string_repo");
@@ -49,6 +52,71 @@ SharedStringRepo::Stats::id_space_usage() const
SharedStringRepo::Partition::~Partition() = default;
+SharedStringRepo::Partition::Partition()
+ : _lock(), _entries(), _free(Entry::npos), _hash(32, Hash(), Equal(_entries))
+{
+ make_entries(16);
+}
+
+SharedStringRepo::Partition::Entry::Entry(Entry &&) noexcept = default;
+SharedStringRepo::Partition::Entry::~Entry() = default;
+
+vespalib::string
+SharedStringRepo::Partition::Entry::as_string() const {
+ assert(!is_free());
+ return _str;
+}
+void
+SharedStringRepo::Partition::Entry::add_ref() {
+ assert(!is_free());
+ ++_ref_cnt;
+}
+bool
+SharedStringRepo::Partition::Entry::sub_ref() {
+ assert(!is_free());
+ return (--_ref_cnt == 0);
+}
+
+uint32_t
+SharedStringRepo::Partition::resolve(const AltKey &alt_key) {
+ bool count_refs = should_reclaim;
+ std::lock_guard guard(_lock);
+ auto pos = _hash.find(alt_key);
+ if (pos != _hash.end()) {
+ if (count_refs) {
+ _entries[pos->idx].add_ref();
+ }
+ return pos->idx;
+ } else {
+ uint32_t idx = make_entry(alt_key);
+ _hash.force_insert(Key{idx, alt_key.hash});
+ return idx;
+ }
+}
+
+vespalib::string
+SharedStringRepo::Partition::as_string(uint32_t idx) const {
+ std::lock_guard guard(_lock);
+ return _entries[idx].as_string();
+}
+
+void
+SharedStringRepo::Partition::copy(uint32_t idx) {
+ std::lock_guard guard(_lock);
+ _entries[idx].add_ref();
+}
+
+void
+SharedStringRepo::Partition::reclaim(uint32_t idx) {
+ std::lock_guard guard(_lock);
+ Entry &entry = _entries[idx];
+ if (entry.sub_ref()) {
+ _hash.erase(Key{idx, entry.hash()});
+ entry.fini(_free);
+ _free = idx;
+ }
+}
+
void
SharedStringRepo::Partition::find_leaked_entries(size_t my_idx) const
{
@@ -92,6 +160,16 @@ SharedStringRepo::Partition::make_entries(size_t hint)
}
}
+uint32_t
+SharedStringRepo::Partition::make_entry(const AltKey &alt_key) {
+ if (__builtin_expect(_free == Entry::npos, false)) {
+ make_entries(_entries.size() * 2);
+ }
+ uint32_t idx = _free;
+ _free = _entries[idx].init(alt_key);
+ return idx;
+}
+
SharedStringRepo SharedStringRepo::_repo;
SharedStringRepo::SharedStringRepo() = default;
@@ -116,6 +194,89 @@ SharedStringRepo::stats()
return stats;
}
+namespace {
+
+uint32_t
+try_make_direct_id(vespalib::stringref str) noexcept {
+ if ((str.size() > SharedStringRepo::FAST_DIGITS) || ((str.size() > 1) && (str[0] == '0'))) {
+ return SharedStringRepo::ID_BIAS;
+ } else if (str.empty()) {
+ return 0;
+ } else {
+ uint32_t value = 0;
+ for (char c: str) {
+ if (!isdigit(c)) {
+ return SharedStringRepo::ID_BIAS;
+ } else {
+ value = ((value * 10) + (c - '0'));
+ }
+ }
+ return (value + 1);
+ }
+}
+
+vespalib::string
+string_from_direct_id(uint32_t id) {
+ if (id == 0) {
+ return {};
+ } else {
+ char tmp[16];
+ auto res = std::to_chars(tmp, tmp + sizeof(tmp), (id - 1), 10);
+ return {tmp, size_t(res.ptr - tmp)};
+ }
+}
+
+}
+
+string_id
+SharedStringRepo::resolve(vespalib::stringref str) {
+ uint32_t direct_id = try_make_direct_id(str);
+ if (direct_id >= ID_BIAS) {
+#pragma GCC diagnostic push
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 12
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
+ uint64_t full_hash = XXH3_64bits(str.data(), str.size());
+#pragma GCC diagnostic pop
+ uint32_t part = full_hash & PART_MASK;
+ uint32_t local_hash = full_hash >> PART_BITS;
+ uint32_t local_idx = _partitions[part].resolve(AltKey{str, local_hash});
+ return string_id(((local_idx << PART_BITS) | part) + ID_BIAS);
+ } else {
+ return string_id(direct_id);
+ }
+}
+
+vespalib::string
+SharedStringRepo::as_string(string_id id) {
+ if (id._id >= ID_BIAS) {
+ uint32_t part = (id._id - ID_BIAS) & PART_MASK;
+ uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS;
+ return _partitions[part].as_string(local_idx);
+ } else {
+ return string_from_direct_id(id._id);
+ }
+}
+
+string_id
+SharedStringRepo::copy(string_id id) {
+ if ((id._id >= ID_BIAS) && should_reclaim) {
+ uint32_t part = (id._id - ID_BIAS) & PART_MASK;
+ uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS;
+ _partitions[part].copy(local_idx);
+ }
+ return id;
+}
+
+void
+SharedStringRepo::reclaim(string_id id) {
+ if ((id._id >= ID_BIAS) && should_reclaim) {
+ uint32_t part = (id._id - ID_BIAS) & PART_MASK;
+ uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS;
+ _partitions[part].reclaim(local_idx);
+ }
+}
+
SharedStringRepo::Handle
SharedStringRepo::Handle::handle_from_number_slow(int64_t value) {
char buf[24];
@@ -128,7 +289,7 @@ SharedStringRepo::Handles::Handles()
{
}
-SharedStringRepo::Handles::Handles(Handles &&rhs)
+SharedStringRepo::Handles::Handles(Handles &&rhs) noexcept
: _handles(std::move(rhs._handles))
{
assert(rhs._handles.empty());
diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h
index 0b090d7fff1..93788019875 100644
--- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h
+++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h
@@ -9,14 +9,11 @@
#include <vespa/vespalib/stllike/identity.h>
#include <vespa/vespalib/stllike/allocator.h>
#include <vespa/vespalib/stllike/hashtable.hpp>
-#include <xxhash.h>
#include <mutex>
#include <vector>
#include <array>
-#include <cassert>
-#include <ctype.h>
+#include <cctype>
#include <limits>
-#include <charconv>
namespace vespalib {
@@ -38,16 +35,17 @@ public:
Stats();
void merge(const Stats &s);
static size_t part_limit();
- double id_space_usage() const;
+ [[nodiscard]] double id_space_usage() const;
};
-
+private:
+ static constexpr uint32_t FAST_ID_MAX = 9999999;
+public:
+ static constexpr uint32_t FAST_DIGITS = 7;
+ static constexpr uint32_t ID_BIAS = (FAST_ID_MAX + 2);
private:
static constexpr uint32_t PART_BITS = 8;
static constexpr uint32_t NUM_PARTS = 1 << PART_BITS;
static constexpr uint32_t PART_MASK = NUM_PARTS - 1;
- static constexpr uint32_t FAST_DIGITS = 7;
- static constexpr uint32_t FAST_ID_MAX = 9999999;
- static constexpr uint32_t ID_BIAS = (FAST_ID_MAX + 2);
static constexpr size_t PART_LIMIT = (std::numeric_limits<uint32_t>::max() - ID_BIAS) / NUM_PARTS;
static const bool should_reclaim;
@@ -68,9 +66,14 @@ private:
public:
explicit Entry(uint32_t next) noexcept
: _hash(next), _ref_cnt(npos), _str() {}
- constexpr uint32_t hash() const noexcept { return _hash; }
- constexpr const vespalib::string &str() const noexcept { return _str; }
- constexpr bool is_free() const noexcept { return (_ref_cnt == npos); }
+ Entry(const Entry &) = delete;
+ Entry & operator =(const Entry &) = delete;
+ Entry(Entry &&) noexcept;
+ Entry & operator =(Entry &&) noexcept = delete;
+ ~Entry();
+ [[nodiscard]] constexpr uint32_t hash() const noexcept { return _hash; }
+ [[nodiscard]] constexpr const vespalib::string &str() const noexcept { return _str; }
+ [[nodiscard]] constexpr bool is_free() const noexcept { return (_ref_cnt == npos); }
uint32_t init(const AltKey &key) {
uint32_t next = _hash;
_hash = key.hash;
@@ -83,18 +86,9 @@ private:
_ref_cnt = npos;
_str.reset();
}
- vespalib::string as_string() const {
- assert(!is_free());
- return _str;
- }
- void add_ref() {
- assert(!is_free());
- ++_ref_cnt;
- }
- bool sub_ref() {
- assert(!is_free());
- return (--_ref_cnt == 0);
- }
+ [[nodiscard]] VESPA_DLL_LOCAL vespalib::string as_string() const;
+ VESPA_DLL_LOCAL void add_ref();
+ VESPA_DLL_LOCAL bool sub_ref();
};
using EntryVector = std::vector<Entry, allocator_large<Entry>>;
struct Key {
@@ -107,7 +101,7 @@ private:
};
struct Equal {
const EntryVector &entries;
- Equal(const EntryVector &entries_in) : entries(entries_in) {}
+ explicit Equal(const EntryVector &entries_in) : entries(entries_in) {}
Equal(const Equal &rhs) = default;
bool operator()(const Key &a, const Key &b) const { return (a.idx == b.idx); }
bool operator()(const Key &a, const AltKey &b) const { return ((a.hash == b.hash) && (entries[a.idx].str() == b.str)); }
@@ -120,62 +114,17 @@ private:
uint32_t _free;
HashType _hash;
- void make_entries(size_t hint);
-
- uint32_t make_entry(const AltKey &alt_key) {
- if (__builtin_expect(_free == Entry::npos, false)) {
- make_entries(_entries.size() * 2);
- }
- uint32_t idx = _free;
- _free = _entries[idx].init(alt_key);
- return idx;
- }
-
+ VESPA_DLL_LOCAL void make_entries(size_t hint);
+ VESPA_DLL_LOCAL uint32_t make_entry(const AltKey &alt_key);
public:
- Partition()
- : _lock(), _entries(), _free(Entry::npos), _hash(32, Hash(), Equal(_entries))
- {
- make_entries(16);
- }
+ Partition();
~Partition();
- void find_leaked_entries(size_t my_idx) const;
- Stats stats() const;
-
- uint32_t resolve(const AltKey &alt_key) {
- bool count_refs = should_reclaim;
- std::lock_guard guard(_lock);
- auto pos = _hash.find(alt_key);
- if (pos != _hash.end()) {
- if (count_refs) {
- _entries[pos->idx].add_ref();
- }
- return pos->idx;
- } else {
- uint32_t idx = make_entry(alt_key);
- _hash.force_insert(Key{idx, alt_key.hash});
- return idx;
- }
- }
-
- vespalib::string as_string(uint32_t idx) const {
- std::lock_guard guard(_lock);
- return _entries[idx].as_string();
- }
-
- void copy(uint32_t idx) {
- std::lock_guard guard(_lock);
- _entries[idx].add_ref();
- }
-
- void reclaim(uint32_t idx) {
- std::lock_guard guard(_lock);
- Entry &entry = _entries[idx];
- if (entry.sub_ref()) {
- _hash.erase(Key{idx, entry.hash()});
- entry.fini(_free);
- _free = idx;
- }
- }
+ VESPA_DLL_LOCAL void find_leaked_entries(size_t my_idx) const;
+ VESPA_DLL_LOCAL Stats stats() const;
+ VESPA_DLL_LOCAL uint32_t resolve(const AltKey &alt_key);
+ VESPA_DLL_LOCAL vespalib::string as_string(uint32_t idx) const;
+ VESPA_DLL_LOCAL void copy(uint32_t idx);
+ VESPA_DLL_LOCAL void reclaim(uint32_t idx);
};
std::array<Partition,NUM_PARTS> _partitions;
@@ -183,82 +132,12 @@ private:
SharedStringRepo();
~SharedStringRepo();
- static uint32_t try_make_direct_id(vespalib::stringref str) noexcept {
- if ((str.size() > FAST_DIGITS) || ((str.size() > 1) && (str[0] == '0'))) {
- return ID_BIAS;
- } else if (str.size() == 0) {
- return 0;
- } else {
- uint32_t value = 0;
- for (size_t i = 0; i < str.size(); ++i) {
- char c = str[i];
- if (!isdigit(c)) {
- return ID_BIAS;
- } else {
- value = ((value * 10) + (c - '0'));
- }
- }
- return (value + 1);
- }
- }
-
- static vespalib::string string_from_direct_id(uint32_t id) {
- if (id == 0) {
- return {};
- } else {
- char tmp[16];
- auto res = std::to_chars(tmp, tmp + sizeof(tmp), (id - 1), 10);
- return vespalib::string(tmp, res.ptr - tmp);
- }
- }
-
- string_id resolve(vespalib::stringref str) {
- uint32_t direct_id = try_make_direct_id(str);
- if (direct_id >= ID_BIAS) {
-#pragma GCC diagnostic push
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 12
-#pragma GCC diagnostic ignored "-Warray-bounds"
-#endif
- uint64_t full_hash = XXH3_64bits(str.data(), str.size());
-#pragma GCC diagnostic pop
- uint32_t part = full_hash & PART_MASK;
- uint32_t local_hash = full_hash >> PART_BITS;
- uint32_t local_idx = _partitions[part].resolve(AltKey{str, local_hash});
- return string_id(((local_idx << PART_BITS) | part) + ID_BIAS);
- } else {
- return string_id(direct_id);
- }
- }
-
- vespalib::string as_string(string_id id) {
- if (id._id >= ID_BIAS) {
- uint32_t part = (id._id - ID_BIAS) & PART_MASK;
- uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS;
- return _partitions[part].as_string(local_idx);
- } else {
- return string_from_direct_id(id._id);
- }
- }
-
- string_id copy(string_id id) {
- if ((id._id >= ID_BIAS) && should_reclaim) {
- uint32_t part = (id._id - ID_BIAS) & PART_MASK;
- uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS;
- _partitions[part].copy(local_idx);
- }
- return id;
- }
-
- void reclaim(string_id id) {
- if ((id._id >= ID_BIAS) && should_reclaim) {
- uint32_t part = (id._id - ID_BIAS) & PART_MASK;
- uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS;
- _partitions[part].reclaim(local_idx);
- }
- }
+ string_id resolve(vespalib::stringref str);
+ vespalib::string as_string(string_id id);
+ string_id copy(string_id id);
+ void reclaim(string_id id);
static SharedStringRepo _repo;
-
public:
static bool will_reclaim() { return should_reclaim; }
static Stats stats();
@@ -267,11 +146,11 @@ public:
class Handle {
private:
string_id _id;
- Handle(string_id weak_id) : _id(_repo.copy(weak_id)) {}
+ explicit Handle(string_id weak_id) : _id(_repo.copy(weak_id)) {}
static Handle handle_from_number_slow(int64_t value);
public:
Handle() noexcept : _id() {}
- Handle(vespalib::stringref str) : _id(_repo.resolve(str)) {}
+ explicit Handle(vespalib::stringref str) : _id(_repo.resolve(str)) {}
Handle(const Handle &rhs) : _id(_repo.copy(rhs._id)) {}
Handle &operator=(const Handle &rhs) {
string_id copy = _repo.copy(rhs._id);
@@ -282,7 +161,7 @@ public:
Handle(Handle &&rhs) noexcept : _id(rhs._id) {
rhs._id = string_id();
}
- Handle &operator=(Handle &&rhs) {
+ Handle &operator=(Handle &&rhs) noexcept {
_repo.reclaim(_id);
_id = rhs._id;
rhs._id = string_id();
@@ -292,9 +171,9 @@ public:
bool operator<(const Handle &rhs) const noexcept { return (_id < rhs._id); }
bool operator==(const Handle &rhs) const noexcept { return (_id == rhs._id); }
bool operator!=(const Handle &rhs) const noexcept { return (_id != rhs._id); }
- string_id id() const noexcept { return _id; }
- uint32_t hash() const noexcept { return _id.hash(); }
- vespalib::string as_string() const { return _repo.as_string(_id); }
+ [[nodiscard]] string_id id() const noexcept { return _id; }
+ [[nodiscard]] uint32_t hash() const noexcept { return _id.hash(); }
+ [[nodiscard]] vespalib::string as_string() const { return _repo.as_string(_id); }
static Handle handle_from_id(string_id weak_id) { return Handle(weak_id); }
static Handle handle_from_number(int64_t value) {
if ((value < 0) || (value > FAST_ID_MAX)) {
@@ -312,7 +191,7 @@ public:
StringIdVector _handles;
public:
Handles();
- Handles(Handles &&rhs);
+ Handles(Handles &&rhs) noexcept;
Handles(const Handles &) = delete;
Handles &operator=(const Handles &) = delete;
Handles &operator=(Handles &&) = delete;
@@ -327,7 +206,7 @@ public:
string_id id = _repo.copy(handle);
_handles.push_back(id);
}
- const StringIdVector &view() const { return _handles; }
+ [[nodiscard]] const StringIdVector &view() const { return _handles; }
};
// Used by search::tensor::TensorBufferOperations