diff options
Diffstat (limited to 'vespalib')
4 files changed, 275 insertions, 121 deletions
diff --git a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp index e8f39c88afe..d560513e24d 100644 --- a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp +++ b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp @@ -13,6 +13,7 @@ using namespace vespalib; using make_string_short::fmt; using Handle = SharedStringRepo::Handle; +using Handles = SharedStringRepo::Handles; bool verbose = false; double budget = 0.10; @@ -30,12 +31,7 @@ std::vector<vespalib::string> make_strings(size_t cnt) { } std::vector<vespalib::string> copy_strings(const std::vector<vespalib::string> &strings) { - std::vector<vespalib::string> result; - result.reserve(strings.size()); - for (const auto &str: strings) { - result.push_back(str); - } - return result; + return strings; } std::vector<std::pair<vespalib::string, uint64_t>> copy_and_hash(const std::vector<vespalib::string> &strings) { @@ -75,20 +71,27 @@ std::vector<vespalib::string> get_strings(const std::vector<Handle> &handles) { return strings; } -std::unique_ptr<SharedStringRepo::StrongHandles> make_strong_handles(const std::vector<vespalib::string> &strings) { - auto result = std::make_unique<SharedStringRepo::StrongHandles>(strings.size()); +std::unique_ptr<SharedStringRepo::Handles> make_strong_handles(const std::vector<vespalib::string> &strings) { + auto result = std::make_unique<SharedStringRepo::Handles>(); + result->reserve(strings.size()); for (const auto &str: strings) { result->add(str); } return result; } -std::unique_ptr<SharedStringRepo::WeakHandles> make_weak_handles(const SharedStringRepo::HandleView &view) { - auto result = std::make_unique<SharedStringRepo::WeakHandles>(view.handles().size()); - for (uint32_t handle: view.handles()) { - result->add(handle); +std::unique_ptr<SharedStringRepo::Handles> copy_strong_handles(const SharedStringRepo::Handles &handles) { + const auto &view = handles.view(); + auto result = std::make_unique<SharedStringRepo::Handles>(); + result->reserve(view.size()); + for (const auto &handle: view) { + result->push_back(handle); } - return result; + return result; +} + +std::unique_ptr<std::vector<string_id>> make_weak_handles(const SharedStringRepo::Handles &handles) { + return std::make_unique<std::vector<string_id>>(handles.view()); } //----------------------------------------------------------------------------- @@ -183,8 +186,9 @@ struct Fixture { std::vector<Handle> copy_handles_result; std::vector<Handle> resolve_again_result; std::vector<vespalib::string> get_result; - std::unique_ptr<SharedStringRepo::StrongHandles> strong; - std::unique_ptr<SharedStringRepo::WeakHandles> weak; + std::unique_ptr<SharedStringRepo::Handles> strong; + std::unique_ptr<SharedStringRepo::Handles> strong_copy; + std::unique_ptr<std::vector<string_id>> weak; auto copy_strings_task = [&](){ copy_strings_result = copy_strings(work); }; auto copy_and_hash_task = [&](){ copy_and_hash_result = copy_and_hash(work); }; auto local_enum_task = [&](){ local_enum_result = local_enum(work); }; @@ -195,8 +199,10 @@ struct Fixture { auto reclaim_task = [&]() { resolve_again_result.clear(); }; auto reclaim_last_task = [&]() { resolve_result.clear(); }; auto make_strong_task = [&]() { strong = make_strong_handles(work); }; - auto make_weak_task = [&]() { weak = make_weak_handles(strong->view()); }; + auto copy_strong_task = [&]() { strong_copy = copy_strong_handles(*strong); }; + auto make_weak_task = [&]() { weak = make_weak_handles(*strong); }; auto free_weak_task = [&]() { weak.reset(); }; + auto free_strong_copy_task = [&]() { strong_copy.reset(); }; auto free_strong_task = [&]() { strong.reset(); }; measure_task("[01] copy strings", is_master, copy_strings_task); measure_task("[02] copy and hash", is_master, copy_and_hash_task); @@ -211,36 +217,117 @@ struct Fixture { copy_handles_result.clear(); measure_task("[09] reclaim last", is_master, reclaim_last_task); measure_task("[10] make strong handles", is_master, make_strong_task); - measure_task("[11] make weak handles", is_master, make_weak_task); - measure_task("[12] free weak handles", is_master, free_weak_task); - measure_task("[13] free strong handles", is_master, free_strong_task); + measure_task("[11] copy strong handles", is_master, copy_strong_task); + measure_task("[12] make weak handles", is_master, make_weak_task); + measure_task("[13] free weak handles", is_master, free_weak_task); + measure_task("[14] free strong handles copy", is_master, free_strong_copy_task); + measure_task("[15] free strong handles", is_master, free_strong_task); } } }; //----------------------------------------------------------------------------- -TEST("require that basic usage works") { +void verify_eq(const Handle &a, const Handle &b) { + EXPECT_TRUE(a == b); + EXPECT_TRUE(a.id() == b.id()); + EXPECT_FALSE(a != b); + EXPECT_FALSE(a.id() != b.id()); + EXPECT_FALSE(a < b); + EXPECT_FALSE(a.id() < b.id()); + EXPECT_FALSE(b < a); + EXPECT_FALSE(b.id() < a.id()); +} + +void verify_not_eq(const Handle &a, const Handle &b) { + EXPECT_FALSE(a == b); + EXPECT_FALSE(a.id() == b.id()); + EXPECT_TRUE(a != b); + EXPECT_TRUE(a.id() != b.id()); + EXPECT_NOT_EQUAL((a < b), (b < a)); + EXPECT_NOT_EQUAL((a.id() < b.id()), (b.id() < a.id())); +} + +//----------------------------------------------------------------------------- + +TEST("require that basic handle usage works") { Handle empty; Handle foo("foo"); Handle bar("bar"); Handle empty2; Handle foo2("foo"); - Handle bar2(bar); - EXPECT_EQUAL(empty.id(), 0u); - EXPECT_TRUE(empty.id() != foo.id()); - EXPECT_TRUE(empty.id() != bar.id()); - EXPECT_TRUE(foo.id() != bar.id()); - EXPECT_EQUAL(empty.id(), empty2.id()); - EXPECT_EQUAL(foo.id(), foo2.id()); - EXPECT_EQUAL(bar.id(), bar2.id()); + Handle bar2("bar"); + + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 2u); + + TEST_DO(verify_eq(empty, empty2)); + TEST_DO(verify_eq(foo, foo2)); + TEST_DO(verify_eq(bar, bar2)); + + TEST_DO(verify_not_eq(empty, foo)); + TEST_DO(verify_not_eq(empty, bar)); + TEST_DO(verify_not_eq(foo, bar)); + + EXPECT_TRUE(empty.id() == string_id()); + EXPECT_TRUE(empty2.id() == string_id()); EXPECT_EQUAL(empty.as_string(), vespalib::string("")); + EXPECT_EQUAL(empty2.as_string(), vespalib::string("")); EXPECT_EQUAL(foo.as_string(), vespalib::string("foo")); EXPECT_EQUAL(bar.as_string(), vespalib::string("bar")); EXPECT_EQUAL(foo2.as_string(), vespalib::string("foo")); EXPECT_EQUAL(bar2.as_string(), vespalib::string("bar")); } +TEST("require that handles can be copied") { + Handle a("foo"); + Handle b(a); + Handle c; + c = b; + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 1u); + EXPECT_TRUE(a.id() == b.id()); + EXPECT_TRUE(b.id() == c.id()); + EXPECT_EQUAL(c.as_string(), vespalib::string("foo")); +} + +TEST("require that handles can be moved") { + Handle a("foo"); + Handle b(std::move(a)); + Handle c; + c = std::move(b); + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 1u); + EXPECT_TRUE(a.id() == string_id()); + EXPECT_TRUE(b.id() == string_id()); + EXPECT_EQUAL(c.as_string(), vespalib::string("foo")); +} + +TEST("require that handle/string can be obtained from string_id") { + Handle a("str"); + Handle b = Handle::handle_from_id(a.id()); + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 1u); + EXPECT_EQUAL(Handle::string_from_id(b.id()), vespalib::string("str")); +} + +//----------------------------------------------------------------------------- + +TEST("require that basic multi-handle usage works") { + Handles a; + a.reserve(4); + Handle foo("foo"); + Handle bar("bar"); + EXPECT_TRUE(a.add("foo") == foo.id()); + EXPECT_TRUE(a.add("bar") == bar.id()); + a.push_back(foo.id()); + a.push_back(bar.id()); + Handles b(std::move(a)); + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 2u); + EXPECT_EQUAL(a.view().size(), 0u); + EXPECT_EQUAL(b.view().size(), 4u); + EXPECT_TRUE(b.view()[0] == foo.id()); + EXPECT_TRUE(b.view()[1] == bar.id()); + EXPECT_TRUE(b.view()[2] == foo.id()); + EXPECT_TRUE(b.view()[3] == bar.id()); +} + //----------------------------------------------------------------------------- TEST_MT_F("test shared string repo operations with 1 threads", 1, Fixture(num_threads)) { @@ -273,6 +360,22 @@ TEST_MT_F("test shared string repo operations with 64 threads", 64, Fixture(num_ //----------------------------------------------------------------------------- +#if 0 +// verify leak-detection and reporting + +TEST("leak some handles on purpose") { + new Handle("leaked string"); + new Handle("also leaked"); + new Handle("even more leak"); +} +#endif + +TEST("require that no handles have leaked during testing") { + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 0u); +} + +//----------------------------------------------------------------------------- + int main(int argc, char **argv) { TEST_MASTER.init(__FILE__); if ((argc == 2) && (argv[1] == std::string("verbose"))) { diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp index e529b1190d9..187f586d344 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp @@ -2,8 +2,24 @@ #include "shared_string_repo.h" +#include <vespa/log/log.h> +LOG_SETUP(".vespalib.shared_string_repo"); + namespace vespalib { +SharedStringRepo::Stats::Stats() + : active_entries(0), + total_entries(0) +{ +} + +void +SharedStringRepo::Stats::merge(const Stats &s) +{ + active_entries += s.active_entries; + total_entries += s.total_entries; +} + SharedStringRepo::Partition::~Partition() = default; void @@ -12,12 +28,22 @@ SharedStringRepo::Partition::find_leaked_entries(size_t my_idx) const for (size_t i = 0; i < _entries.size(); ++i) { if (!_entries[i].is_free()) { size_t id = (((i << PART_BITS) | my_idx) + 1); - fprintf(stderr, "WARNING: shared_string_repo: leaked string id: %zu ('%s')\n", - id, _entries[i].str().c_str()); + LOG(warning, "leaked string id: %zu (part: %zu/%d, string: '%s')\n", + id, my_idx, NUM_PARTS, _entries[i].str().c_str()); } } } +SharedStringRepo::Stats +SharedStringRepo::Partition::stats() const +{ + Stats stats; + std::lock_guard guard(_lock); + stats.active_entries = _hash.size(); + stats.total_entries = _entries.size(); + return stats; +} + void SharedStringRepo::Partition::make_entries(size_t hint) { @@ -31,6 +57,8 @@ SharedStringRepo::Partition::make_entries(size_t hint) } } +SharedStringRepo SharedStringRepo::_repo; + SharedStringRepo::SharedStringRepo() = default; SharedStringRepo::~SharedStringRepo() { @@ -39,38 +67,30 @@ SharedStringRepo::~SharedStringRepo() } } -SharedStringRepo & -SharedStringRepo::get() +SharedStringRepo::Stats +SharedStringRepo::stats() { - static SharedStringRepo repo; - return repo; + Stats stats; + for (const auto &part: _repo._partitions) { + stats.merge(part.stats()); + } + return stats; } -SharedStringRepo::WeakHandles::WeakHandles(size_t expect_size) +SharedStringRepo::Handles::Handles() : _handles() { - _handles.reserve(expect_size); -} - -SharedStringRepo::WeakHandles::~WeakHandles() = default; - -SharedStringRepo::StrongHandles::StrongHandles(size_t expect_size) - : _repo(SharedStringRepo::get()), - _handles() -{ - _handles.reserve(expect_size); } -SharedStringRepo::StrongHandles::StrongHandles(StrongHandles &&rhs) - : _repo(rhs._repo), - _handles(std::move(rhs._handles)) +SharedStringRepo::Handles::Handles(Handles &&rhs) + : _handles(std::move(rhs._handles)) { assert(rhs._handles.empty()); } -SharedStringRepo::StrongHandles::~StrongHandles() +SharedStringRepo::Handles::~Handles() { - for (uint32_t handle: _handles) { + for (string_id handle: _handles) { _repo.reclaim(handle); } } diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h index f7137984caa..b2a222c82af 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h @@ -2,6 +2,7 @@ #pragma once +#include "string_id.h" #include "spin_lock.h" #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/stllike/identity.h> @@ -23,6 +24,14 @@ namespace vespalib { * repo. Handle objects are used to track which strings are in use. **/ class SharedStringRepo { +public: + struct Stats { + size_t active_entries; + size_t total_entries; + Stats(); + void merge(const Stats &s); + }; + private: static constexpr int NUM_PARTS = 64; static constexpr int PART_BITS = 6; @@ -58,8 +67,7 @@ private: void fini(uint32_t next) { _hash = next; _ref_cnt = npos; - // to reset or not to reset... - // _str.reset(); + _str.reset(); } vespalib::string as_string() const { assert(!is_free()); @@ -92,7 +100,7 @@ private: using HashType = hashtable<Key,Key,Hash,Equal,Identity,hashtable_base::and_modulator>; private: - SpinLock _lock; + mutable SpinLock _lock; std::vector<Entry> _entries; uint32_t _free; HashType _hash; @@ -116,6 +124,7 @@ private: } ~Partition(); void find_leaked_entries(size_t my_idx) const; + Stats stats() const; uint32_t resolve(const AltKey &alt_key) { std::lock_guard guard(_lock); @@ -130,7 +139,7 @@ private: } } - vespalib::string as_string(uint32_t idx) { + vespalib::string as_string(uint32_t idx) const { std::lock_guard guard(_lock); return _entries[idx].as_string(); } @@ -156,125 +165,107 @@ private: SharedStringRepo(); ~SharedStringRepo(); - uint32_t resolve(vespalib::stringref str) { + string_id resolve(vespalib::stringref str) { if (!str.empty()) { uint64_t full_hash = XXH3_64bits(str.data(), str.size()); uint32_t part = full_hash & PART_MASK; uint32_t local_hash = full_hash >> PART_BITS; uint32_t local_idx = _partitions[part].resolve(AltKey{str, local_hash}); - return (((local_idx << PART_BITS) | part) + 1); + return string_id(((local_idx << PART_BITS) | part) + 1); } else { - return 0; + return {}; } } - vespalib::string as_string(uint32_t id) { - if (id != 0) { - uint32_t part = (id - 1) & PART_MASK; - uint32_t local_idx = (id - 1) >> PART_BITS; + vespalib::string as_string(string_id id) { + if (id._id != 0) { + uint32_t part = (id._id - 1) & PART_MASK; + uint32_t local_idx = (id._id - 1) >> PART_BITS; return _partitions[part].as_string(local_idx); } else { return {}; } } - uint32_t copy(uint32_t id) { - if (id != 0) { - uint32_t part = (id - 1) & PART_MASK; - uint32_t local_idx = (id - 1) >> PART_BITS; + string_id copy(string_id id) { + if (id._id != 0) { + uint32_t part = (id._id - 1) & PART_MASK; + uint32_t local_idx = (id._id - 1) >> PART_BITS; _partitions[part].copy(local_idx); } return id; } - void reclaim(uint32_t id) { - if (id != 0) { - uint32_t part = (id - 1) & PART_MASK; - uint32_t local_idx = (id - 1) >> PART_BITS; + void reclaim(string_id id) { + if (id._id != 0) { + uint32_t part = (id._id - 1) & PART_MASK; + uint32_t local_idx = (id._id - 1) >> PART_BITS; _partitions[part].reclaim(local_idx); } } + static SharedStringRepo _repo; + public: - static SharedStringRepo &get(); + static Stats stats(); // A single stand-alone string handle with ownership class Handle { private: - uint32_t _id; - Handle(uint32_t weak_id) : _id(get().copy(weak_id)) {} + string_id _id; + Handle(string_id weak_id) : _id(_repo.copy(weak_id)) {} public: - Handle() noexcept : _id(0) {} - Handle(vespalib::stringref str) : _id(get().resolve(str)) {} - Handle(const Handle &rhs) : _id(get().copy(rhs._id)) {} + Handle() noexcept : _id() {} + Handle(vespalib::stringref str) : _id(_repo.resolve(str)) {} + Handle(const Handle &rhs) : _id(_repo.copy(rhs._id)) {} Handle &operator=(const Handle &rhs) { - get().reclaim(_id); - _id = get().copy(rhs._id); - return *this; + _repo.reclaim(_id); + _id = _repo.copy(rhs._id); + return *this; } Handle(Handle &&rhs) noexcept : _id(rhs._id) { - rhs._id = 0; + rhs._id = string_id(); } Handle &operator=(Handle &&rhs) { - get().reclaim(_id); + _repo.reclaim(_id); _id = rhs._id; - rhs._id = 0; + rhs._id = string_id(); return *this; } // NB: not lexical sorting order, but can be used in maps bool operator<(const Handle &rhs) const noexcept { return (_id < rhs._id); } bool operator==(const Handle &rhs) const noexcept { return (_id == rhs._id); } bool operator!=(const Handle &rhs) const noexcept { return (_id != rhs._id); } - uint32_t id() const noexcept { return _id; } - uint32_t hash() const noexcept { return _id; } - vespalib::string as_string() const { return get().as_string(_id); } - static Handle handle_from_id(uint32_t weak_id) { return Handle(weak_id); } - static vespalib::string string_from_id(uint32_t weak_id) { return get().as_string(weak_id); } - ~Handle() { get().reclaim(_id); } - }; - - // Read-only access to a collection of string handles - class HandleView { - private: - const std::vector<uint32_t> &_handles; - public: - HandleView(const std::vector<uint32_t> &handles_in) : _handles(handles_in) {} - const std::vector<uint32_t> &handles() const { return _handles; } - }; - - // A collection of string handles without ownership - class WeakHandles { - private: - std::vector<uint32_t> _handles; - public: - WeakHandles(size_t expect_size); - ~WeakHandles(); - void add(uint32_t handle) { _handles.push_back(handle); } - HandleView view() const { return HandleView(_handles); } + string_id id() const noexcept { return _id; } + uint32_t hash() const noexcept { return _id.hash(); } + vespalib::string as_string() const { return _repo.as_string(_id); } + static Handle handle_from_id(string_id weak_id) { return Handle(weak_id); } + static vespalib::string string_from_id(string_id weak_id) { return _repo.as_string(weak_id); } + ~Handle() { _repo.reclaim(_id); } }; // A collection of string handles with ownership - class StrongHandles { + class Handles { private: - SharedStringRepo &_repo; - std::vector<uint32_t> _handles; + std::vector<string_id> _handles; public: - StrongHandles(size_t expect_size); - StrongHandles(StrongHandles &&rhs); - StrongHandles(const StrongHandles &) = delete; - StrongHandles &operator=(const StrongHandles &) = delete; - StrongHandles &operator=(StrongHandles &&) = delete; - ~StrongHandles(); - uint32_t add(vespalib::stringref str) { - uint32_t id = _repo.resolve(str); + Handles(); + Handles(Handles &&rhs); + Handles(const Handles &) = delete; + Handles &operator=(const Handles &) = delete; + Handles &operator=(Handles &&) = delete; + ~Handles(); + string_id add(vespalib::stringref str) { + string_id id = _repo.resolve(str); _handles.push_back(id); return id; } - void add(uint32_t handle) { - uint32_t id = _repo.copy(handle); + void reserve(size_t value) { _handles.reserve(value); } + void push_back(string_id handle) { + string_id id = _repo.copy(handle); _handles.push_back(id); } - HandleView view() const { return HandleView(_handles); } + const std::vector<string_id> &view() const { return _handles; } }; }; diff --git a/vespalib/src/vespa/vespalib/util/string_id.h b/vespalib/src/vespa/vespalib/util/string_id.h new file mode 100644 index 00000000000..92711acdc67 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/string_id.h @@ -0,0 +1,40 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cstdint> + +namespace vespalib { + +class SharedStringRepo; + +/** + * A typed integer value representing the identity of a string stored + * in the global SharedStringRepo. This class is a simple wrapper with + * no lifetime management of the mapping between string value and + * string id. For a string_id to be valid, it needs to be owned by at + * least one SharedStringRepo::Handle or SharedStringRepo::Handles + * object. This is similar to how std::enable_shared_from_this works; + * the string_id acts like a reference to a mapping from a string to a + * numberical value (without ownership) and Handle/Handles act like + * shared pointers to the same mapping (with shared ownership). + **/ +class string_id { + friend class ::vespalib::SharedStringRepo; +private: + uint32_t _id; + explicit constexpr string_id(uint32_t value_in) noexcept : _id(value_in) {} +public: + constexpr string_id() noexcept : _id(0) {} + constexpr string_id(const string_id &) noexcept = default; + constexpr string_id(string_id &&) noexcept = default; + constexpr string_id &operator=(const string_id &) noexcept = default; + constexpr string_id &operator=(string_id &&) noexcept = default; + constexpr uint32_t hash() const noexcept { return _id; } + // NB: not lexical sorting order, but can be used in maps + constexpr bool operator<(const string_id &rhs) const noexcept { return (_id < rhs._id); } + constexpr bool operator==(const string_id &rhs) const noexcept { return (_id == rhs._id); } + constexpr bool operator!=(const string_id &rhs) const noexcept { return (_id != rhs._id); } +}; + +} |