From a855ee4d533ae1e474a64f761887d882e97c14c1 Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Thu, 14 Jan 2021 13:56:57 +0000 Subject: improve stats for shared string repo --- .../shared_string_repo/shared_string_repo_test.cpp | 92 +++++++++++++++------- .../src/vespa/vespalib/util/shared_string_repo.cpp | 23 +++++- .../src/vespa/vespalib/util/shared_string_repo.h | 5 +- 3 files changed, 87 insertions(+), 33 deletions(-) (limited to 'vespalib') diff --git a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp index 20a41146a2c..4c3c32c2326 100644 --- a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp +++ b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp @@ -14,6 +14,7 @@ using namespace vespalib; using make_string_short::fmt; using Handle = SharedStringRepo::Handle; using Handles = SharedStringRepo::Handles; +using Stats = SharedStringRepo::Stats; bool verbose = false; double budget = 0.10; @@ -80,8 +81,8 @@ std::vector get_strings(const std::vector &handles) { return strings; } -std::unique_ptr make_strong_handles(const std::vector &strings) { - auto result = std::make_unique(); +std::unique_ptr make_strong_handles(const std::vector &strings) { + auto result = std::make_unique(); result->reserve(strings.size()); for (const auto &str: strings) { result->add(str); @@ -89,9 +90,9 @@ std::unique_ptr make_strong_handles(const std::vector return result; } -std::unique_ptr copy_strong_handles(const SharedStringRepo::Handles &handles) { +std::unique_ptr copy_strong_handles(const Handles &handles) { const auto &view = handles.view(); - auto result = std::make_unique(); + auto result = std::make_unique(); result->reserve(view.size()); for (const auto &handle: view) { result->push_back(handle); @@ -99,7 +100,7 @@ std::unique_ptr copy_strong_handles(const SharedStrin return result; } -std::unique_ptr> make_weak_handles(const SharedStringRepo::Handles &handles) { +std::unique_ptr> make_weak_handles(const Handles &handles) { return std::make_unique>(handles.view()); } @@ -198,8 +199,8 @@ struct Fixture { std::vector resolve_again_result; std::vector get_result; std::vector get_direct_result; - std::unique_ptr strong; - std::unique_ptr strong_copy; + std::unique_ptr strong; + std::unique_ptr strong_copy; std::unique_ptr> weak; auto copy_strings_task = [&](){ copy_strings_result = copy_strings(work); }; auto copy_and_hash_task = [&](){ copy_and_hash_result = copy_and_hash(work); }; @@ -267,45 +268,79 @@ void verify_not_eq(const Handle &a, const Handle &b) { //----------------------------------------------------------------------------- -TEST("require that empty stats object have expected values") { - size_t part_size = (uint32_t(-1) - 100001) / 64; - SharedStringRepo::Stats empty; +TEST("require that empty stats object has expected values") { + Stats empty; EXPECT_EQUAL(empty.active_entries, 0u); EXPECT_EQUAL(empty.total_entries, 0u); - EXPECT_EQUAL(empty.min_free, part_size); - if (verbose) { - fprintf(stderr, "max entries per part: %zu\n", empty.min_free); - } + EXPECT_EQUAL(empty.max_part_usage, 0u); + EXPECT_EQUAL(empty.memory_usage.allocatedBytes(), 0u); + EXPECT_EQUAL(empty.memory_usage.usedBytes(), 0u); + EXPECT_EQUAL(empty.memory_usage.deadBytes(), 0u); + EXPECT_EQUAL(empty.memory_usage.allocatedBytesOnHold(), 0u); } TEST("require that stats can be merged") { - SharedStringRepo::Stats a; - SharedStringRepo::Stats b; + Stats a; + Stats b; a.active_entries = 1; a.total_entries = 10; - a.min_free = 100; + a.max_part_usage = 100; + a.memory_usage.incAllocatedBytes(10); + a.memory_usage.incUsedBytes(5); b.active_entries = 3; b.total_entries = 20; - b.min_free = 50; + b.max_part_usage = 50; + b.memory_usage.incAllocatedBytes(20); + b.memory_usage.incUsedBytes(10); a.merge(b); EXPECT_EQUAL(a.active_entries, 4u); EXPECT_EQUAL(a.total_entries, 30u); - EXPECT_EQUAL(a.min_free, 50u); + EXPECT_EQUAL(a.max_part_usage, 100u); + EXPECT_EQUAL(a.memory_usage.allocatedBytes(), 30u); + EXPECT_EQUAL(a.memory_usage.usedBytes(), 15u); + EXPECT_EQUAL(a.memory_usage.deadBytes(), 0u); + EXPECT_EQUAL(a.memory_usage.allocatedBytesOnHold(), 0u); } TEST("require that id_space_usage is sane") { - SharedStringRepo::Stats empty; - SharedStringRepo::Stats stats; - stats.min_free = empty.min_free; + Stats stats; + stats.max_part_usage = 0; EXPECT_EQUAL(stats.id_space_usage(), 0.0); - stats.min_free = empty.min_free / 2; + stats.max_part_usage = Stats::part_limit() / 4; + EXPECT_EQUAL(stats.id_space_usage(), 0.25); + stats.max_part_usage = Stats::part_limit() / 2; EXPECT_EQUAL(stats.id_space_usage(), 0.5); - stats.min_free = empty.min_free / 4; - EXPECT_EQUAL(stats.id_space_usage(), 0.75); - stats.min_free = 0; + stats.max_part_usage = Stats::part_limit(); EXPECT_EQUAL(stats.id_space_usage(), 1.0); } +TEST("require that initial stats are as expected") { + size_t num_parts = 64; + size_t part_size = 128; + size_t hash_node_size = 12; + size_t entry_size = 72; + size_t initial_entries = 113; + size_t initial_hash_used = 64; + size_t initial_hash_allocated = 128; + size_t part_limit = (uint32_t(-1) - 100001) / num_parts; + auto stats = SharedStringRepo::stats(); + EXPECT_EQUAL(stats.active_entries, 0u); + EXPECT_EQUAL(stats.total_entries, num_parts * initial_entries); + EXPECT_EQUAL(stats.max_part_usage, 0u); + EXPECT_EQUAL(stats.id_space_usage(), 0.0); + EXPECT_EQUAL(stats.memory_usage.allocatedBytes(), + num_parts * (part_size + hash_node_size * initial_hash_allocated + entry_size * initial_entries)); + EXPECT_EQUAL(stats.memory_usage.usedBytes(), + num_parts * (part_size + hash_node_size * initial_hash_used + entry_size * initial_entries)); + EXPECT_EQUAL(stats.memory_usage.deadBytes(), 0u); + EXPECT_EQUAL(stats.memory_usage.allocatedBytesOnHold(), 0u); + EXPECT_EQUAL(Stats::part_limit(), part_limit); + if (verbose) { + fprintf(stderr, "max entries per part: %zu\n", Stats::part_limit()); + fprintf(stderr, "initial memory usage: %zu\n", stats.memory_usage.allocatedBytes()); + } +} + //----------------------------------------------------------------------------- TEST("require that basic handle usage works") { @@ -441,10 +476,11 @@ TEST("allocate handles until we run out") { std::vector handles; for (;;) { auto stats = SharedStringRepo::stats(); + size_t min_free = Stats::part_limit() - stats.max_part_usage; fprintf(stderr, "cnt: %zu, used: %zu/%zu, min free: %zu, usage: %g\n", - cnt, stats.active_entries, stats.total_entries, stats.min_free, + cnt, stats.active_entries, stats.total_entries, min_free, stats.id_space_usage()); - size_t n = std::max(size_t(1), stats.min_free); + size_t n = std::max(size_t(1), min_free); for (size_t i = 0; i < n; ++i) { handles.emplace_back(fmt("my_id_%zu", cnt++)); } diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp index c39b79f7899..c4a3ed4170a 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp @@ -10,7 +10,8 @@ namespace vespalib { SharedStringRepo::Stats::Stats() : active_entries(0), total_entries(0), - min_free(PART_LIMIT) + max_part_usage(0), + memory_usage() { } @@ -19,13 +20,20 @@ SharedStringRepo::Stats::merge(const Stats &s) { active_entries += s.active_entries; total_entries += s.total_entries; - min_free = std::min(min_free, s.min_free); + max_part_usage = std::max(max_part_usage, s.max_part_usage); + memory_usage.merge(s.memory_usage); +} + +size_t +SharedStringRepo::Stats::part_limit() +{ + return PART_LIMIT; } double SharedStringRepo::Stats::id_space_usage() const { - return (1.0 - (double(min_free) / double(PART_LIMIT))); + return (double(max_part_usage) / double(PART_LIMIT)); } SharedStringRepo::Partition::~Partition() = default; @@ -49,7 +57,12 @@ SharedStringRepo::Partition::stats() const std::lock_guard guard(_lock); stats.active_entries = _hash.size(); stats.total_entries = _entries.size(); - stats.min_free = (PART_LIMIT - _hash.size()); + stats.max_part_usage = _hash.size(); + // memory footprint of self is counted by SharedStringRepo::stats() + stats.memory_usage.incAllocatedBytes(sizeof(Entry) * _entries.capacity()); + stats.memory_usage.incUsedBytes(sizeof(Entry) * _entries.size()); + stats.memory_usage.incAllocatedBytes(_hash.getMemoryConsumption() - sizeof(HashType)); + stats.memory_usage.incUsedBytes(_hash.getMemoryUsed() - sizeof(HashType)); return stats; } @@ -82,6 +95,8 @@ SharedStringRepo::Stats SharedStringRepo::stats() { Stats stats; + stats.memory_usage.incAllocatedBytes(sizeof(SharedStringRepo)); + stats.memory_usage.incUsedBytes(sizeof(SharedStringRepo)); for (const auto &part: _repo._partitions) { stats.merge(part.stats()); } diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h index 518d3874b7d..29067060af0 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h @@ -2,6 +2,7 @@ #pragma once +#include "memoryusage.h" #include "string_id.h" #include "spin_lock.h" #include @@ -32,9 +33,11 @@ public: struct Stats { size_t active_entries; size_t total_entries; - size_t min_free; + size_t max_part_usage; + MemoryUsage memory_usage; Stats(); void merge(const Stats &s); + static size_t part_limit(); double id_space_usage() const; }; -- cgit v1.2.3