diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-01-31 06:26:50 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-01-31 06:26:50 +0000 |
commit | fa400d2e0bdfabebab312edb08757db0c72005be (patch) | |
tree | ea465847eb075bac03a6f31316f01dd1c12b9a68 /vespalib | |
parent | e7dcb897a30dafea405efabc13d95277f5a4c7ff (diff) |
Move implementation not benefiting from inlining to .cpp file
Diffstat (limited to 'vespalib')
-rw-r--r-- | vespalib/src/vespa/vespalib/util/shared_string_repo.cpp | 163 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/shared_string_repo.h | 201 |
2 files changed, 202 insertions, 162 deletions
diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp index e213c1745c6..15ad146c552 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp @@ -1,6 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "shared_string_repo.h" +#include <xxhash.h> +#include <charconv> +#include <cassert> #include <vespa/log/log.h> LOG_SETUP(".vespalib.shared_string_repo"); @@ -49,6 +52,71 @@ SharedStringRepo::Stats::id_space_usage() const SharedStringRepo::Partition::~Partition() = default; +SharedStringRepo::Partition::Partition() + : _lock(), _entries(), _free(Entry::npos), _hash(32, Hash(), Equal(_entries)) +{ + make_entries(16); +} + +SharedStringRepo::Partition::Entry::Entry(Entry &&) noexcept = default; +SharedStringRepo::Partition::Entry::~Entry() = default; + +vespalib::string +SharedStringRepo::Partition::Entry::as_string() const { + assert(!is_free()); + return _str; +} +void +SharedStringRepo::Partition::Entry::add_ref() { + assert(!is_free()); + ++_ref_cnt; +} +bool +SharedStringRepo::Partition::Entry::sub_ref() { + assert(!is_free()); + return (--_ref_cnt == 0); +} + +uint32_t +SharedStringRepo::Partition::resolve(const AltKey &alt_key) { + bool count_refs = should_reclaim; + std::lock_guard guard(_lock); + auto pos = _hash.find(alt_key); + if (pos != _hash.end()) { + if (count_refs) { + _entries[pos->idx].add_ref(); + } + return pos->idx; + } else { + uint32_t idx = make_entry(alt_key); + _hash.force_insert(Key{idx, alt_key.hash}); + return idx; + } +} + +vespalib::string +SharedStringRepo::Partition::as_string(uint32_t idx) const { + std::lock_guard guard(_lock); + return _entries[idx].as_string(); +} + +void +SharedStringRepo::Partition::copy(uint32_t idx) { + std::lock_guard guard(_lock); + _entries[idx].add_ref(); +} + +void +SharedStringRepo::Partition::reclaim(uint32_t idx) { + std::lock_guard guard(_lock); + Entry &entry = _entries[idx]; + if (entry.sub_ref()) { + _hash.erase(Key{idx, entry.hash()}); + entry.fini(_free); + _free = idx; + } +} + void SharedStringRepo::Partition::find_leaked_entries(size_t my_idx) const { @@ -92,6 +160,16 @@ SharedStringRepo::Partition::make_entries(size_t hint) } } +uint32_t +SharedStringRepo::Partition::make_entry(const AltKey &alt_key) { + if (__builtin_expect(_free == Entry::npos, false)) { + make_entries(_entries.size() * 2); + } + uint32_t idx = _free; + _free = _entries[idx].init(alt_key); + return idx; +} + SharedStringRepo SharedStringRepo::_repo; SharedStringRepo::SharedStringRepo() = default; @@ -116,6 +194,89 @@ SharedStringRepo::stats() return stats; } +namespace { + +uint32_t +try_make_direct_id(vespalib::stringref str) noexcept { + if ((str.size() > SharedStringRepo::FAST_DIGITS) || ((str.size() > 1) && (str[0] == '0'))) { + return SharedStringRepo::ID_BIAS; + } else if (str.empty()) { + return 0; + } else { + uint32_t value = 0; + for (char c: str) { + if (!isdigit(c)) { + return SharedStringRepo::ID_BIAS; + } else { + value = ((value * 10) + (c - '0')); + } + } + return (value + 1); + } +} + +vespalib::string +string_from_direct_id(uint32_t id) { + if (id == 0) { + return {}; + } else { + char tmp[16]; + auto res = std::to_chars(tmp, tmp + sizeof(tmp), (id - 1), 10); + return {tmp, size_t(res.ptr - tmp)}; + } +} + +} + +string_id +SharedStringRepo::resolve(vespalib::stringref str) { + uint32_t direct_id = try_make_direct_id(str); + if (direct_id >= ID_BIAS) { +#pragma GCC diagnostic push +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 12 +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif + uint64_t full_hash = XXH3_64bits(str.data(), str.size()); +#pragma GCC diagnostic pop + uint32_t part = full_hash & PART_MASK; + uint32_t local_hash = full_hash >> PART_BITS; + uint32_t local_idx = _partitions[part].resolve(AltKey{str, local_hash}); + return string_id(((local_idx << PART_BITS) | part) + ID_BIAS); + } else { + return string_id(direct_id); + } +} + +vespalib::string +SharedStringRepo::as_string(string_id id) { + if (id._id >= ID_BIAS) { + uint32_t part = (id._id - ID_BIAS) & PART_MASK; + uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS; + return _partitions[part].as_string(local_idx); + } else { + return string_from_direct_id(id._id); + } +} + +string_id +SharedStringRepo::copy(string_id id) { + if ((id._id >= ID_BIAS) && should_reclaim) { + uint32_t part = (id._id - ID_BIAS) & PART_MASK; + uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS; + _partitions[part].copy(local_idx); + } + return id; +} + +void +SharedStringRepo::reclaim(string_id id) { + if ((id._id >= ID_BIAS) && should_reclaim) { + uint32_t part = (id._id - ID_BIAS) & PART_MASK; + uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS; + _partitions[part].reclaim(local_idx); + } +} + SharedStringRepo::Handle SharedStringRepo::Handle::handle_from_number_slow(int64_t value) { char buf[24]; @@ -128,7 +289,7 @@ SharedStringRepo::Handles::Handles() { } -SharedStringRepo::Handles::Handles(Handles &&rhs) +SharedStringRepo::Handles::Handles(Handles &&rhs) noexcept : _handles(std::move(rhs._handles)) { assert(rhs._handles.empty()); diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h index 0b090d7fff1..93788019875 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h @@ -9,14 +9,11 @@ #include <vespa/vespalib/stllike/identity.h> #include <vespa/vespalib/stllike/allocator.h> #include <vespa/vespalib/stllike/hashtable.hpp> -#include <xxhash.h> #include <mutex> #include <vector> #include <array> -#include <cassert> -#include <ctype.h> +#include <cctype> #include <limits> -#include <charconv> namespace vespalib { @@ -38,16 +35,17 @@ public: Stats(); void merge(const Stats &s); static size_t part_limit(); - double id_space_usage() const; + [[nodiscard]] double id_space_usage() const; }; - +private: + static constexpr uint32_t FAST_ID_MAX = 9999999; +public: + static constexpr uint32_t FAST_DIGITS = 7; + static constexpr uint32_t ID_BIAS = (FAST_ID_MAX + 2); private: static constexpr uint32_t PART_BITS = 8; static constexpr uint32_t NUM_PARTS = 1 << PART_BITS; static constexpr uint32_t PART_MASK = NUM_PARTS - 1; - static constexpr uint32_t FAST_DIGITS = 7; - static constexpr uint32_t FAST_ID_MAX = 9999999; - static constexpr uint32_t ID_BIAS = (FAST_ID_MAX + 2); static constexpr size_t PART_LIMIT = (std::numeric_limits<uint32_t>::max() - ID_BIAS) / NUM_PARTS; static const bool should_reclaim; @@ -68,9 +66,14 @@ private: public: explicit Entry(uint32_t next) noexcept : _hash(next), _ref_cnt(npos), _str() {} - constexpr uint32_t hash() const noexcept { return _hash; } - constexpr const vespalib::string &str() const noexcept { return _str; } - constexpr bool is_free() const noexcept { return (_ref_cnt == npos); } + Entry(const Entry &) = delete; + Entry & operator =(const Entry &) = delete; + Entry(Entry &&) noexcept; + Entry & operator =(Entry &&) noexcept = delete; + ~Entry(); + [[nodiscard]] constexpr uint32_t hash() const noexcept { return _hash; } + [[nodiscard]] constexpr const vespalib::string &str() const noexcept { return _str; } + [[nodiscard]] constexpr bool is_free() const noexcept { return (_ref_cnt == npos); } uint32_t init(const AltKey &key) { uint32_t next = _hash; _hash = key.hash; @@ -83,18 +86,9 @@ private: _ref_cnt = npos; _str.reset(); } - vespalib::string as_string() const { - assert(!is_free()); - return _str; - } - void add_ref() { - assert(!is_free()); - ++_ref_cnt; - } - bool sub_ref() { - assert(!is_free()); - return (--_ref_cnt == 0); - } + [[nodiscard]] VESPA_DLL_LOCAL vespalib::string as_string() const; + VESPA_DLL_LOCAL void add_ref(); + VESPA_DLL_LOCAL bool sub_ref(); }; using EntryVector = std::vector<Entry, allocator_large<Entry>>; struct Key { @@ -107,7 +101,7 @@ private: }; struct Equal { const EntryVector &entries; - Equal(const EntryVector &entries_in) : entries(entries_in) {} + explicit Equal(const EntryVector &entries_in) : entries(entries_in) {} Equal(const Equal &rhs) = default; bool operator()(const Key &a, const Key &b) const { return (a.idx == b.idx); } bool operator()(const Key &a, const AltKey &b) const { return ((a.hash == b.hash) && (entries[a.idx].str() == b.str)); } @@ -120,62 +114,17 @@ private: uint32_t _free; HashType _hash; - void make_entries(size_t hint); - - uint32_t make_entry(const AltKey &alt_key) { - if (__builtin_expect(_free == Entry::npos, false)) { - make_entries(_entries.size() * 2); - } - uint32_t idx = _free; - _free = _entries[idx].init(alt_key); - return idx; - } - + VESPA_DLL_LOCAL void make_entries(size_t hint); + VESPA_DLL_LOCAL uint32_t make_entry(const AltKey &alt_key); public: - Partition() - : _lock(), _entries(), _free(Entry::npos), _hash(32, Hash(), Equal(_entries)) - { - make_entries(16); - } + Partition(); ~Partition(); - void find_leaked_entries(size_t my_idx) const; - Stats stats() const; - - uint32_t resolve(const AltKey &alt_key) { - bool count_refs = should_reclaim; - std::lock_guard guard(_lock); - auto pos = _hash.find(alt_key); - if (pos != _hash.end()) { - if (count_refs) { - _entries[pos->idx].add_ref(); - } - return pos->idx; - } else { - uint32_t idx = make_entry(alt_key); - _hash.force_insert(Key{idx, alt_key.hash}); - return idx; - } - } - - vespalib::string as_string(uint32_t idx) const { - std::lock_guard guard(_lock); - return _entries[idx].as_string(); - } - - void copy(uint32_t idx) { - std::lock_guard guard(_lock); - _entries[idx].add_ref(); - } - - void reclaim(uint32_t idx) { - std::lock_guard guard(_lock); - Entry &entry = _entries[idx]; - if (entry.sub_ref()) { - _hash.erase(Key{idx, entry.hash()}); - entry.fini(_free); - _free = idx; - } - } + VESPA_DLL_LOCAL void find_leaked_entries(size_t my_idx) const; + VESPA_DLL_LOCAL Stats stats() const; + VESPA_DLL_LOCAL uint32_t resolve(const AltKey &alt_key); + VESPA_DLL_LOCAL vespalib::string as_string(uint32_t idx) const; + VESPA_DLL_LOCAL void copy(uint32_t idx); + VESPA_DLL_LOCAL void reclaim(uint32_t idx); }; std::array<Partition,NUM_PARTS> _partitions; @@ -183,82 +132,12 @@ private: SharedStringRepo(); ~SharedStringRepo(); - static uint32_t try_make_direct_id(vespalib::stringref str) noexcept { - if ((str.size() > FAST_DIGITS) || ((str.size() > 1) && (str[0] == '0'))) { - return ID_BIAS; - } else if (str.size() == 0) { - return 0; - } else { - uint32_t value = 0; - for (size_t i = 0; i < str.size(); ++i) { - char c = str[i]; - if (!isdigit(c)) { - return ID_BIAS; - } else { - value = ((value * 10) + (c - '0')); - } - } - return (value + 1); - } - } - - static vespalib::string string_from_direct_id(uint32_t id) { - if (id == 0) { - return {}; - } else { - char tmp[16]; - auto res = std::to_chars(tmp, tmp + sizeof(tmp), (id - 1), 10); - return vespalib::string(tmp, res.ptr - tmp); - } - } - - string_id resolve(vespalib::stringref str) { - uint32_t direct_id = try_make_direct_id(str); - if (direct_id >= ID_BIAS) { -#pragma GCC diagnostic push -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 12 -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif - uint64_t full_hash = XXH3_64bits(str.data(), str.size()); -#pragma GCC diagnostic pop - uint32_t part = full_hash & PART_MASK; - uint32_t local_hash = full_hash >> PART_BITS; - uint32_t local_idx = _partitions[part].resolve(AltKey{str, local_hash}); - return string_id(((local_idx << PART_BITS) | part) + ID_BIAS); - } else { - return string_id(direct_id); - } - } - - vespalib::string as_string(string_id id) { - if (id._id >= ID_BIAS) { - uint32_t part = (id._id - ID_BIAS) & PART_MASK; - uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS; - return _partitions[part].as_string(local_idx); - } else { - return string_from_direct_id(id._id); - } - } - - string_id copy(string_id id) { - if ((id._id >= ID_BIAS) && should_reclaim) { - uint32_t part = (id._id - ID_BIAS) & PART_MASK; - uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS; - _partitions[part].copy(local_idx); - } - return id; - } - - void reclaim(string_id id) { - if ((id._id >= ID_BIAS) && should_reclaim) { - uint32_t part = (id._id - ID_BIAS) & PART_MASK; - uint32_t local_idx = (id._id - ID_BIAS) >> PART_BITS; - _partitions[part].reclaim(local_idx); - } - } + string_id resolve(vespalib::stringref str); + vespalib::string as_string(string_id id); + string_id copy(string_id id); + void reclaim(string_id id); static SharedStringRepo _repo; - public: static bool will_reclaim() { return should_reclaim; } static Stats stats(); @@ -267,11 +146,11 @@ public: class Handle { private: string_id _id; - Handle(string_id weak_id) : _id(_repo.copy(weak_id)) {} + explicit Handle(string_id weak_id) : _id(_repo.copy(weak_id)) {} static Handle handle_from_number_slow(int64_t value); public: Handle() noexcept : _id() {} - Handle(vespalib::stringref str) : _id(_repo.resolve(str)) {} + explicit Handle(vespalib::stringref str) : _id(_repo.resolve(str)) {} Handle(const Handle &rhs) : _id(_repo.copy(rhs._id)) {} Handle &operator=(const Handle &rhs) { string_id copy = _repo.copy(rhs._id); @@ -282,7 +161,7 @@ public: Handle(Handle &&rhs) noexcept : _id(rhs._id) { rhs._id = string_id(); } - Handle &operator=(Handle &&rhs) { + Handle &operator=(Handle &&rhs) noexcept { _repo.reclaim(_id); _id = rhs._id; rhs._id = string_id(); @@ -292,9 +171,9 @@ public: bool operator<(const Handle &rhs) const noexcept { return (_id < rhs._id); } bool operator==(const Handle &rhs) const noexcept { return (_id == rhs._id); } bool operator!=(const Handle &rhs) const noexcept { return (_id != rhs._id); } - string_id id() const noexcept { return _id; } - uint32_t hash() const noexcept { return _id.hash(); } - vespalib::string as_string() const { return _repo.as_string(_id); } + [[nodiscard]] string_id id() const noexcept { return _id; } + [[nodiscard]] uint32_t hash() const noexcept { return _id.hash(); } + [[nodiscard]] vespalib::string as_string() const { return _repo.as_string(_id); } static Handle handle_from_id(string_id weak_id) { return Handle(weak_id); } static Handle handle_from_number(int64_t value) { if ((value < 0) || (value > FAST_ID_MAX)) { @@ -312,7 +191,7 @@ public: StringIdVector _handles; public: Handles(); - Handles(Handles &&rhs); + Handles(Handles &&rhs) noexcept; Handles(const Handles &) = delete; Handles &operator=(const Handles &) = delete; Handles &operator=(Handles &&) = delete; @@ -327,7 +206,7 @@ public: string_id id = _repo.copy(handle); _handles.push_back(id); } - const StringIdVector &view() const { return _handles; } + [[nodiscard]] const StringIdVector &view() const { return _handles; } }; // Used by search::tensor::TensorBufferOperations |