summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-12-07 22:15:46 +0100
committerGitHub <noreply@github.com>2020-12-07 22:15:46 +0100
commit82d1a71afff67b0004291be4091d8a00d0dfa79e (patch)
tree3ba357bce820c97e3129279f7666ecd384ab67b6
parent663900a1ea319d3e9f72a0cd546ff3e9706eda93 (diff)
parent85bb91a52d706ea78aca40b8f354a42d1b8656c3 (diff)
Merge pull request #15728 from vespa-engine/havardpe/more-string-enum-experiments
some more shared string repo experiments
-rw-r--r--vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp80
-rw-r--r--vespalib/src/vespa/vespalib/util/shared_string_repo.cpp35
-rw-r--r--vespalib/src/vespa/vespalib/util/shared_string_repo.h80
3 files changed, 165 insertions, 30 deletions
diff --git a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp
index dcf29cc1445..e8f39c88afe 100644
--- a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp
+++ b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp
@@ -4,9 +4,11 @@
#include <vespa/vespalib/util/rendezvous.h>
#include <vespa/vespalib/util/time.h>
#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/vespalib/testkit/test_kit.h>
#include <vector>
#include <map>
+#include <xxhash.h>
using namespace vespalib;
using make_string_short::fmt;
@@ -27,6 +29,34 @@ std::vector<vespalib::string> make_strings(size_t cnt) {
return strings;
}
+std::vector<vespalib::string> copy_strings(const std::vector<vespalib::string> &strings) {
+ std::vector<vespalib::string> result;
+ result.reserve(strings.size());
+ for (const auto &str: strings) {
+ result.push_back(str);
+ }
+ return result;
+}
+
+std::vector<std::pair<vespalib::string, uint64_t>> copy_and_hash(const std::vector<vespalib::string> &strings) {
+ std::vector<std::pair<vespalib::string, uint64_t>> result;
+ result.reserve(strings.size());
+ for (const auto &str: strings) {
+ result.emplace_back(str, XXH3_64bits(str.data(), str.size()));
+ }
+ return result;
+}
+
+std::vector<uint32_t> local_enum(const std::vector<vespalib::string> &strings) {
+ hash_map<vespalib::string, uint32_t> map(strings.size() * 2);
+ std::vector<uint32_t> result;
+ result.reserve(strings.size());
+ for (const auto &str: strings) {
+ result.push_back(map.insert(std::make_pair(str, map.size())).first->second);
+ }
+ return result;
+}
+
std::vector<Handle> resolve_strings(const std::vector<vespalib::string> &strings) {
std::vector<Handle> handles;
handles.reserve(strings.size());
@@ -45,6 +75,22 @@ std::vector<vespalib::string> get_strings(const std::vector<Handle> &handles) {
return strings;
}
+std::unique_ptr<SharedStringRepo::StrongHandles> make_strong_handles(const std::vector<vespalib::string> &strings) {
+ auto result = std::make_unique<SharedStringRepo::StrongHandles>(strings.size());
+ for (const auto &str: strings) {
+ result->add(str);
+ }
+ return result;
+}
+
+std::unique_ptr<SharedStringRepo::WeakHandles> make_weak_handles(const SharedStringRepo::HandleView &view) {
+ auto result = std::make_unique<SharedStringRepo::WeakHandles>(view.handles().size());
+ for (uint32_t handle: view.handles()) {
+ result->add(handle);
+ }
+ return result;
+}
+
//-----------------------------------------------------------------------------
struct Avg : Rendezvous<double, double> {
@@ -131,27 +177,43 @@ struct Fixture {
void benchmark(bool is_master) {
for (bool once_more = true; vote(once_more); once_more = has_budget()) {
std::vector<vespalib::string> copy_strings_result;
+ std::vector<std::pair<vespalib::string,uint64_t>> copy_and_hash_result;
+ std::vector<uint32_t> local_enum_result;
std::vector<Handle> resolve_result;
std::vector<Handle> copy_handles_result;
std::vector<Handle> resolve_again_result;
std::vector<vespalib::string> get_result;
- auto copy_strings_task = [&](){ copy_strings_result = work; };
+ std::unique_ptr<SharedStringRepo::StrongHandles> strong;
+ std::unique_ptr<SharedStringRepo::WeakHandles> weak;
+ auto copy_strings_task = [&](){ copy_strings_result = copy_strings(work); };
+ auto copy_and_hash_task = [&](){ copy_and_hash_result = copy_and_hash(work); };
+ auto local_enum_task = [&](){ local_enum_result = local_enum(work); };
auto resolve_task = [&](){ resolve_result = resolve_strings(work); };
auto copy_handles_task = [&](){ copy_handles_result = resolve_result; };
auto resolve_again_task = [&](){ resolve_again_result = resolve_strings(work); };
auto get_task = [&](){ get_result = get_strings(resolve_result); };
auto reclaim_task = [&]() { resolve_again_result.clear(); };
auto reclaim_last_task = [&]() { resolve_result.clear(); };
- measure_task("[0] copy strings", is_master, copy_strings_task);
- measure_task("[1] resolve", is_master, resolve_task);
- measure_task("[2] copy handles", is_master, copy_handles_task);
- measure_task("[3] resolve again", is_master, resolve_again_task);
+ auto make_strong_task = [&]() { strong = make_strong_handles(work); };
+ auto make_weak_task = [&]() { weak = make_weak_handles(strong->view()); };
+ auto free_weak_task = [&]() { weak.reset(); };
+ auto free_strong_task = [&]() { strong.reset(); };
+ measure_task("[01] copy strings", is_master, copy_strings_task);
+ measure_task("[02] copy and hash", is_master, copy_and_hash_task);
+ measure_task("[03] local enum", is_master, local_enum_task);
+ measure_task("[04] resolve", is_master, resolve_task);
+ measure_task("[05] copy handles", is_master, copy_handles_task);
+ measure_task("[06] resolve again", is_master, resolve_again_task);
verify_equal(resolve_result, resolve_again_result);
- measure_task("[4] as_string", is_master, get_task);
+ measure_task("[07] as_string", is_master, get_task);
verify_equal(get_result, work);
- measure_task("[5] reclaim", is_master, reclaim_task);
+ measure_task("[08] reclaim", is_master, reclaim_task);
copy_handles_result.clear();
- measure_task("[6] reclaim last", is_master, reclaim_last_task);
+ measure_task("[09] reclaim last", is_master, reclaim_last_task);
+ measure_task("[10] make strong handles", is_master, make_strong_task);
+ measure_task("[11] make weak handles", is_master, make_weak_task);
+ measure_task("[12] free weak handles", is_master, free_weak_task);
+ measure_task("[13] free strong handles", is_master, free_strong_task);
}
}
};
@@ -215,7 +277,7 @@ int main(int argc, char **argv) {
TEST_MASTER.init(__FILE__);
if ((argc == 2) && (argv[1] == std::string("verbose"))) {
verbose = true;
- budget = 10.0;
+ budget = 30.0;
work_size = 128000;
}
TEST_RUN_ALL();
diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp
index 38fe5a6b7a3..a5ec9540a1b 100644
--- a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp
+++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp
@@ -6,6 +6,19 @@ namespace vespalib {
SharedStringRepo::Partition::~Partition() = default;
+void
+SharedStringRepo::Partition::make_entries(size_t hint)
+{
+ hint = std::max(hint, _entries.size() + 1);
+ size_t want_mem = roundUp2inN(hint * sizeof(Entry));
+ size_t want_entries = want_mem / sizeof(Entry);
+ _entries.reserve(want_entries);
+ while (_entries.size() < _entries.capacity()) {
+ _entries.emplace_back(_free);
+ _free = (_entries.size() - 1);
+ }
+}
+
SharedStringRepo::SharedStringRepo() = default;
SharedStringRepo::~SharedStringRepo() = default;
@@ -16,4 +29,26 @@ SharedStringRepo::get()
return repo;
}
+SharedStringRepo::WeakHandles::WeakHandles(size_t expect_size)
+ : _handles()
+{
+ _handles.reserve(expect_size);
+}
+
+SharedStringRepo::WeakHandles::~WeakHandles() = default;
+
+SharedStringRepo::StrongHandles::StrongHandles(size_t expect_size)
+ : _repo(SharedStringRepo::get()),
+ _handles()
+{
+ _handles.reserve(expect_size);
+}
+
+SharedStringRepo::StrongHandles::~StrongHandles()
+{
+ for (uint32_t handle: _handles) {
+ _repo.reclaim(handle);
+ }
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h
index 1bdce9b47b4..afdd3a289f9 100644
--- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h
+++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h
@@ -4,7 +4,8 @@
#include "spin_lock.h"
#include <vespa/vespalib/stllike/string.h>
-#include <vespa/vespalib/stllike/hash_set.hpp>
+#include <vespa/vespalib/stllike/identity.h>
+#include <vespa/vespalib/stllike/hashtable.hpp>
#include <xxhash.h>
#include <mutex>
#include <vector>
@@ -34,17 +35,20 @@ private:
class alignas(64) Partition {
public:
struct Entry {
+ static constexpr uint32_t npos = -1;
uint32_t hash;
uint32_t ref_cnt;
vespalib::string str;
- Entry(const AltKey &key) noexcept : hash(key.hash), ref_cnt(1), str(key.str) {}
- void reset() {
- str.reset();
- }
- void reuse(const AltKey &key) {
+ explicit Entry(uint32_t next) noexcept : hash(), ref_cnt(next), str() {}
+ uint32_t init(const AltKey &key) {
+ uint32_t next = ref_cnt;
hash = key.hash;
ref_cnt = 1;
str = key.str;
+ return next;
+ }
+ void fini(uint32_t next) {
+ ref_cnt = next;
}
};
struct Key {
@@ -62,30 +66,31 @@ private:
bool operator()(const Key &a, const Key &b) const { return (a.idx == b.idx); }
bool operator()(const Key &a, const AltKey &b) const { return ((a.hash == b.hash) && (entries[a.idx].str == b.str)); }
};
- using HashType = vespalib::hash_set<Key,Hash,Equal>;
+ using HashType = hashtable<Key,Key,Hash,Equal,Identity,hashtable_base::and_modulator>;
private:
SpinLock _lock;
std::vector<Entry> _entries;
- std::vector<uint32_t> _free;
+ uint32_t _free;
HashType _hash;
+ void make_entries(size_t hint);
+
uint32_t make_entry(const AltKey &alt_key) {
- if (_free.empty()) {
- uint32_t idx = _entries.size();
- _entries.emplace_back(alt_key);
- return idx;
- } else {
- uint32_t idx = _free.back();
- _free.pop_back();
- _entries[idx].reuse(alt_key);
- return idx;
+ if (__builtin_expect(_free == Entry::npos, false)) {
+ make_entries(_entries.size() * 2);
}
+ uint32_t idx = _free;
+ _free = _entries[idx].init(alt_key);
+ return idx;
}
public:
Partition()
- : _lock(), _entries(), _free(), _hash(0, Hash(), Equal(_entries)) {}
+ : _lock(), _entries(), _free(Entry::npos), _hash(128, Hash(), Equal(_entries))
+ {
+ make_entries(64);
+ }
~Partition();
uint32_t resolve(const AltKey &alt_key) {
@@ -96,7 +101,7 @@ private:
return pos->idx;
} else {
uint32_t idx = make_entry(alt_key);
- _hash.insert(Key{idx, alt_key.hash});
+ _hash.force_insert(Key{idx, alt_key.hash});
return idx;
}
}
@@ -116,8 +121,8 @@ private:
Entry &entry = _entries[idx];
if (--entry.ref_cnt == 0) {
_hash.erase(Key{idx, entry.hash});
- entry.reset();
- _free.push_back(idx);
+ entry.fini(_free);
+ _free = idx;
}
}
};
@@ -169,6 +174,7 @@ private:
public:
static SharedStringRepo &get();
+ // A single stand-alone string handle with ownership
class Handle {
private:
uint32_t _id;
@@ -195,6 +201,38 @@ public:
vespalib::string as_string() const { return get().as_string(_id); }
~Handle() { get().reclaim(_id); }
};
+
+ // Read-only access to a collection of string handles
+ class HandleView {
+ private:
+ const std::vector<uint32_t> &_handles;
+ public:
+ HandleView(const std::vector<uint32_t> &handles_in) : _handles(handles_in) {}
+ const std::vector<uint32_t> &handles() const { return _handles; }
+ };
+
+ // A collection of string handles without ownership
+ class WeakHandles {
+ private:
+ std::vector<uint32_t> _handles;
+ public:
+ WeakHandles(size_t expect_size);
+ ~WeakHandles();
+ void add(uint32_t handle) { _handles.push_back(handle); }
+ HandleView view() const { return HandleView(_handles); }
+ };
+
+ // A collection of string handles with ownership
+ class StrongHandles {
+ private:
+ SharedStringRepo &_repo;
+ std::vector<uint32_t> _handles;
+ public:
+ StrongHandles(size_t expect_size);
+ ~StrongHandles();
+ void add(vespalib::stringref str) { _handles.push_back(_repo.resolve(str)); }
+ HandleView view() const { return HandleView(_handles); }
+ };
};
}