diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-12-07 22:15:46 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-07 22:15:46 +0100 |
commit | 82d1a71afff67b0004291be4091d8a00d0dfa79e (patch) | |
tree | 3ba357bce820c97e3129279f7666ecd384ab67b6 | |
parent | 663900a1ea319d3e9f72a0cd546ff3e9706eda93 (diff) | |
parent | 85bb91a52d706ea78aca40b8f354a42d1b8656c3 (diff) |
Merge pull request #15728 from vespa-engine/havardpe/more-string-enum-experiments
some more shared string repo experiments
3 files changed, 165 insertions, 30 deletions
diff --git a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp index dcf29cc1445..e8f39c88afe 100644 --- a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp +++ b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp @@ -4,9 +4,11 @@ #include <vespa/vespalib/util/rendezvous.h> #include <vespa/vespalib/util/time.h> #include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/testkit/test_kit.h> #include <vector> #include <map> +#include <xxhash.h> using namespace vespalib; using make_string_short::fmt; @@ -27,6 +29,34 @@ std::vector<vespalib::string> make_strings(size_t cnt) { return strings; } +std::vector<vespalib::string> copy_strings(const std::vector<vespalib::string> &strings) { + std::vector<vespalib::string> result; + result.reserve(strings.size()); + for (const auto &str: strings) { + result.push_back(str); + } + return result; +} + +std::vector<std::pair<vespalib::string, uint64_t>> copy_and_hash(const std::vector<vespalib::string> &strings) { + std::vector<std::pair<vespalib::string, uint64_t>> result; + result.reserve(strings.size()); + for (const auto &str: strings) { + result.emplace_back(str, XXH3_64bits(str.data(), str.size())); + } + return result; +} + +std::vector<uint32_t> local_enum(const std::vector<vespalib::string> &strings) { + hash_map<vespalib::string, uint32_t> map(strings.size() * 2); + std::vector<uint32_t> result; + result.reserve(strings.size()); + for (const auto &str: strings) { + result.push_back(map.insert(std::make_pair(str, map.size())).first->second); + } + return result; +} + std::vector<Handle> resolve_strings(const std::vector<vespalib::string> &strings) { std::vector<Handle> handles; handles.reserve(strings.size()); @@ -45,6 +75,22 @@ std::vector<vespalib::string> get_strings(const std::vector<Handle> &handles) { return strings; } +std::unique_ptr<SharedStringRepo::StrongHandles> make_strong_handles(const std::vector<vespalib::string> &strings) { + auto result = std::make_unique<SharedStringRepo::StrongHandles>(strings.size()); + for (const auto &str: strings) { + result->add(str); + } + return result; +} + +std::unique_ptr<SharedStringRepo::WeakHandles> make_weak_handles(const SharedStringRepo::HandleView &view) { + auto result = std::make_unique<SharedStringRepo::WeakHandles>(view.handles().size()); + for (uint32_t handle: view.handles()) { + result->add(handle); + } + return result; +} + //----------------------------------------------------------------------------- struct Avg : Rendezvous<double, double> { @@ -131,27 +177,43 @@ struct Fixture { void benchmark(bool is_master) { for (bool once_more = true; vote(once_more); once_more = has_budget()) { std::vector<vespalib::string> copy_strings_result; + std::vector<std::pair<vespalib::string,uint64_t>> copy_and_hash_result; + std::vector<uint32_t> local_enum_result; std::vector<Handle> resolve_result; std::vector<Handle> copy_handles_result; std::vector<Handle> resolve_again_result; std::vector<vespalib::string> get_result; - auto copy_strings_task = [&](){ copy_strings_result = work; }; + std::unique_ptr<SharedStringRepo::StrongHandles> strong; + std::unique_ptr<SharedStringRepo::WeakHandles> weak; + auto copy_strings_task = [&](){ copy_strings_result = copy_strings(work); }; + auto copy_and_hash_task = [&](){ copy_and_hash_result = copy_and_hash(work); }; + auto local_enum_task = [&](){ local_enum_result = local_enum(work); }; auto resolve_task = [&](){ resolve_result = resolve_strings(work); }; auto copy_handles_task = [&](){ copy_handles_result = resolve_result; }; auto resolve_again_task = [&](){ resolve_again_result = resolve_strings(work); }; auto get_task = [&](){ get_result = get_strings(resolve_result); }; auto reclaim_task = [&]() { resolve_again_result.clear(); }; auto reclaim_last_task = [&]() { resolve_result.clear(); }; - measure_task("[0] copy strings", is_master, copy_strings_task); - measure_task("[1] resolve", is_master, resolve_task); - measure_task("[2] copy handles", is_master, copy_handles_task); - measure_task("[3] resolve again", is_master, resolve_again_task); + auto make_strong_task = [&]() { strong = make_strong_handles(work); }; + auto make_weak_task = [&]() { weak = make_weak_handles(strong->view()); }; + auto free_weak_task = [&]() { weak.reset(); }; + auto free_strong_task = [&]() { strong.reset(); }; + measure_task("[01] copy strings", is_master, copy_strings_task); + measure_task("[02] copy and hash", is_master, copy_and_hash_task); + measure_task("[03] local enum", is_master, local_enum_task); + measure_task("[04] resolve", is_master, resolve_task); + measure_task("[05] copy handles", is_master, copy_handles_task); + measure_task("[06] resolve again", is_master, resolve_again_task); verify_equal(resolve_result, resolve_again_result); - measure_task("[4] as_string", is_master, get_task); + measure_task("[07] as_string", is_master, get_task); verify_equal(get_result, work); - measure_task("[5] reclaim", is_master, reclaim_task); + measure_task("[08] reclaim", is_master, reclaim_task); copy_handles_result.clear(); - measure_task("[6] reclaim last", is_master, reclaim_last_task); + measure_task("[09] reclaim last", is_master, reclaim_last_task); + measure_task("[10] make strong handles", is_master, make_strong_task); + measure_task("[11] make weak handles", is_master, make_weak_task); + measure_task("[12] free weak handles", is_master, free_weak_task); + measure_task("[13] free strong handles", is_master, free_strong_task); } } }; @@ -215,7 +277,7 @@ int main(int argc, char **argv) { TEST_MASTER.init(__FILE__); if ((argc == 2) && (argv[1] == std::string("verbose"))) { verbose = true; - budget = 10.0; + budget = 30.0; work_size = 128000; } TEST_RUN_ALL(); diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp index 38fe5a6b7a3..a5ec9540a1b 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp @@ -6,6 +6,19 @@ namespace vespalib { SharedStringRepo::Partition::~Partition() = default; +void +SharedStringRepo::Partition::make_entries(size_t hint) +{ + hint = std::max(hint, _entries.size() + 1); + size_t want_mem = roundUp2inN(hint * sizeof(Entry)); + size_t want_entries = want_mem / sizeof(Entry); + _entries.reserve(want_entries); + while (_entries.size() < _entries.capacity()) { + _entries.emplace_back(_free); + _free = (_entries.size() - 1); + } +} + SharedStringRepo::SharedStringRepo() = default; SharedStringRepo::~SharedStringRepo() = default; @@ -16,4 +29,26 @@ SharedStringRepo::get() return repo; } +SharedStringRepo::WeakHandles::WeakHandles(size_t expect_size) + : _handles() +{ + _handles.reserve(expect_size); +} + +SharedStringRepo::WeakHandles::~WeakHandles() = default; + +SharedStringRepo::StrongHandles::StrongHandles(size_t expect_size) + : _repo(SharedStringRepo::get()), + _handles() +{ + _handles.reserve(expect_size); +} + +SharedStringRepo::StrongHandles::~StrongHandles() +{ + for (uint32_t handle: _handles) { + _repo.reclaim(handle); + } +} + } diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h index 1bdce9b47b4..afdd3a289f9 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h @@ -4,7 +4,8 @@ #include "spin_lock.h" #include <vespa/vespalib/stllike/string.h> -#include <vespa/vespalib/stllike/hash_set.hpp> +#include <vespa/vespalib/stllike/identity.h> +#include <vespa/vespalib/stllike/hashtable.hpp> #include <xxhash.h> #include <mutex> #include <vector> @@ -34,17 +35,20 @@ private: class alignas(64) Partition { public: struct Entry { + static constexpr uint32_t npos = -1; uint32_t hash; uint32_t ref_cnt; vespalib::string str; - Entry(const AltKey &key) noexcept : hash(key.hash), ref_cnt(1), str(key.str) {} - void reset() { - str.reset(); - } - void reuse(const AltKey &key) { + explicit Entry(uint32_t next) noexcept : hash(), ref_cnt(next), str() {} + uint32_t init(const AltKey &key) { + uint32_t next = ref_cnt; hash = key.hash; ref_cnt = 1; str = key.str; + return next; + } + void fini(uint32_t next) { + ref_cnt = next; } }; struct Key { @@ -62,30 +66,31 @@ private: bool operator()(const Key &a, const Key &b) const { return (a.idx == b.idx); } bool operator()(const Key &a, const AltKey &b) const { return ((a.hash == b.hash) && (entries[a.idx].str == b.str)); } }; - using HashType = vespalib::hash_set<Key,Hash,Equal>; + using HashType = hashtable<Key,Key,Hash,Equal,Identity,hashtable_base::and_modulator>; private: SpinLock _lock; std::vector<Entry> _entries; - std::vector<uint32_t> _free; + uint32_t _free; HashType _hash; + void make_entries(size_t hint); + uint32_t make_entry(const AltKey &alt_key) { - if (_free.empty()) { - uint32_t idx = _entries.size(); - _entries.emplace_back(alt_key); - return idx; - } else { - uint32_t idx = _free.back(); - _free.pop_back(); - _entries[idx].reuse(alt_key); - return idx; + if (__builtin_expect(_free == Entry::npos, false)) { + make_entries(_entries.size() * 2); } + uint32_t idx = _free; + _free = _entries[idx].init(alt_key); + return idx; } public: Partition() - : _lock(), _entries(), _free(), _hash(0, Hash(), Equal(_entries)) {} + : _lock(), _entries(), _free(Entry::npos), _hash(128, Hash(), Equal(_entries)) + { + make_entries(64); + } ~Partition(); uint32_t resolve(const AltKey &alt_key) { @@ -96,7 +101,7 @@ private: return pos->idx; } else { uint32_t idx = make_entry(alt_key); - _hash.insert(Key{idx, alt_key.hash}); + _hash.force_insert(Key{idx, alt_key.hash}); return idx; } } @@ -116,8 +121,8 @@ private: Entry &entry = _entries[idx]; if (--entry.ref_cnt == 0) { _hash.erase(Key{idx, entry.hash}); - entry.reset(); - _free.push_back(idx); + entry.fini(_free); + _free = idx; } } }; @@ -169,6 +174,7 @@ private: public: static SharedStringRepo &get(); + // A single stand-alone string handle with ownership class Handle { private: uint32_t _id; @@ -195,6 +201,38 @@ public: vespalib::string as_string() const { return get().as_string(_id); } ~Handle() { get().reclaim(_id); } }; + + // Read-only access to a collection of string handles + class HandleView { + private: + const std::vector<uint32_t> &_handles; + public: + HandleView(const std::vector<uint32_t> &handles_in) : _handles(handles_in) {} + const std::vector<uint32_t> &handles() const { return _handles; } + }; + + // A collection of string handles without ownership + class WeakHandles { + private: + std::vector<uint32_t> _handles; + public: + WeakHandles(size_t expect_size); + ~WeakHandles(); + void add(uint32_t handle) { _handles.push_back(handle); } + HandleView view() const { return HandleView(_handles); } + }; + + // A collection of string handles with ownership + class StrongHandles { + private: + SharedStringRepo &_repo; + std::vector<uint32_t> _handles; + public: + StrongHandles(size_t expect_size); + ~StrongHandles(); + void add(vespalib::stringref str) { _handles.push_back(_repo.resolve(str)); } + HandleView view() const { return HandleView(_handles); } + }; }; } |