From 57361b00342b266b9da09beb06930423dd56a46e Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 13 Dec 2021 14:48:23 +0100 Subject: Add CompactBufferCandidates, used to select buffers to compact. --- vespalib/CMakeLists.txt | 1 + .../compact_buffer_candidates/CMakeLists.txt | 9 +++ .../compact_buffer_candidates_test.cpp | 91 ++++++++++++++++++++++ .../src/vespa/vespalib/datastore/CMakeLists.txt | 1 + .../vespalib/datastore/compact_buffer_candidate.h | 36 +++++++++ .../datastore/compact_buffer_candidates.cpp | 52 +++++++++++++ .../vespalib/datastore/compact_buffer_candidates.h | 27 +++++++ .../src/vespa/vespalib/datastore/datastorebase.cpp | 41 +++++----- 8 files changed, 236 insertions(+), 22 deletions(-) create mode 100644 vespalib/src/tests/datastore/compact_buffer_candidates/CMakeLists.txt create mode 100644 vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp create mode 100644 vespalib/src/vespa/vespalib/datastore/compact_buffer_candidate.h create mode 100644 vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp create mode 100644 vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h (limited to 'vespalib') diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index d4c7bc881a5..400c1ec5d1a 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -42,6 +42,7 @@ vespa_define_module( src/tests/datastore/array_store src/tests/datastore/array_store_config src/tests/datastore/buffer_type + src/tests/datastore/compact_buffer_candidates src/tests/datastore/datastore src/tests/datastore/fixed_size_hash_map src/tests/datastore/sharded_hash_map diff --git a/vespalib/src/tests/datastore/compact_buffer_candidates/CMakeLists.txt b/vespalib/src/tests/datastore/compact_buffer_candidates/CMakeLists.txt new file mode 100644 index 00000000000..d6731071927 --- /dev/null +++ b/vespalib/src/tests/datastore/compact_buffer_candidates/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_compact_buffer_candidates_test_app TEST + SOURCES + compact_buffer_candidates_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_compact_buffer_candidates_test_app COMMAND vespalib_compact_buffer_candidates_test_app) diff --git a/vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp b/vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp new file mode 100644 index 00000000000..80c0d571894 --- /dev/null +++ b/vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp @@ -0,0 +1,91 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include + +using vespalib::datastore::CompactBufferCandidates; + +namespace { + +constexpr uint32_t num_buffers = 1024; +constexpr double default_ratio = 0.2 / 2; +constexpr size_t default_slack = 1000; + +}; + + +class CompactBufferCandidatesTest : public ::testing::Test +{ +public: + CompactBufferCandidates candidates; + CompactBufferCandidatesTest(); + ~CompactBufferCandidatesTest() override; + void reset_candidates(uint32_t max_buffers); + CompactBufferCandidatesTest& add(uint32_t buffer_id, size_t used, size_t dead); + void assert_select(const std::vector& exp); +}; + +CompactBufferCandidatesTest::CompactBufferCandidatesTest() + : ::testing::Test(), + candidates(num_buffers, 1, default_ratio, default_slack) +{ +} + +CompactBufferCandidatesTest::~CompactBufferCandidatesTest() = default; + +void +CompactBufferCandidatesTest::reset_candidates(uint32_t max_buffers) +{ + candidates = CompactBufferCandidates(num_buffers, max_buffers, default_ratio, default_slack); +} + +CompactBufferCandidatesTest& +CompactBufferCandidatesTest::add(uint32_t buffer_id, size_t used, size_t dead) +{ + candidates.add(buffer_id, used, dead); + return *this; +} + +void +CompactBufferCandidatesTest::assert_select(const std::vector& exp) +{ + std::vector act; + candidates.select(act); + EXPECT_EQ(exp, act); +} + +TEST_F(CompactBufferCandidatesTest, select_single) +{ + add(0, 10000, 2000).add(1, 10000, 3000); + assert_select({1}); +} + +TEST_F(CompactBufferCandidatesTest, select_two) +{ + reset_candidates(2); + add(0, 10000, 2000).add(3, 10000, 3000).add(7, 10000, 4000); + assert_select({7, 3}); +} + +TEST_F(CompactBufferCandidatesTest, select_all) +{ + reset_candidates(4); + add(1, 10000, 2000).add(3, 10000, 4000).add(8, 10000, 3000); + assert_select({3, 8, 1}); +} + +TEST_F(CompactBufferCandidatesTest, select_cutoff_by_ratio) +{ + reset_candidates(4); + add(1, 100000, 9999).add(3, 100000, 40000).add(8, 100000, 30000); + assert_select({3, 8}); +} + +TEST_F(CompactBufferCandidatesTest, select_cutoff_by_slack) +{ + reset_candidates(4); + add(1, 2000, 999).add(3, 2000, 1200).add(9, 2000, 1300); + assert_select({9, 3}); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt index c36077e4dd0..d628843279d 100644 --- a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt @@ -6,6 +6,7 @@ vespa_add_library(vespalib_vespalib_datastore OBJECT buffer_type.cpp bufferstate.cpp compaction_strategy.cpp + compact_buffer_candidates.cpp datastore.cpp datastorebase.cpp entryref.cpp diff --git a/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidate.h b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidate.h new file mode 100644 index 00000000000..85ea1e42eac --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidate.h @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace vespalib::datastore { + +/* + * Class representing candidate buffer for compaction. + */ +class CompactBufferCandidate { + uint32_t _buffer_id; + size_t _used; + size_t _dead; +public: + CompactBufferCandidate(uint32_t buffer_id, size_t used, size_t dead) noexcept + : _buffer_id(buffer_id), + _used(used), + _dead(dead) + { + } + + CompactBufferCandidate() noexcept + : CompactBufferCandidate(0, 0, 0) + { + } + + bool operator<(const CompactBufferCandidate& rhs) const noexcept { return _dead > rhs._dead; } + uint32_t get_buffer_id() const noexcept { return _buffer_id; } + size_t get_used() const noexcept { return _used; } + size_t get_dead() const noexcept { return _dead; } +}; + +} diff --git a/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp new file mode 100644 index 00000000000..3003ef315e8 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp @@ -0,0 +1,52 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "compact_buffer_candidates.h" +#include + +namespace vespalib::datastore { + +CompactBufferCandidates::CompactBufferCandidates(uint32_t num_buffers, uint32_t max_buffers, double ratio, size_t slack) + : _candidates(), + _used(0), + _dead(0), + _max_buffers(std::max(max_buffers, 1u)), + _ratio(ratio), + _slack(slack) +{ + _candidates.reserve(num_buffers); +} + +CompactBufferCandidates::~CompactBufferCandidates() = default; + +void +CompactBufferCandidates::add(uint32_t buffer_id, size_t used, size_t dead) +{ + _candidates.emplace_back(buffer_id, used, dead); + _used += used; + _dead += dead; +} + +void +CompactBufferCandidates::select(std::vector& buffers) +{ + if (_candidates.empty()) { + return; + } + if (_candidates.size() > _max_buffers) { + std::nth_element(_candidates.begin(), _candidates.begin() + (_max_buffers - 1), _candidates.end()); + _candidates.resize(_max_buffers); + } + std::sort(_candidates.begin(), _candidates.end()); + size_t remaining_used = _used; + size_t remaining_dead = _dead; + for (auto& candidate : _candidates) { + buffers.emplace_back(candidate.get_buffer_id()); + remaining_used -= candidate.get_used(); + remaining_dead -= candidate.get_dead(); + if ((remaining_dead < _slack) || (remaining_dead <= remaining_used * _ratio)) { + break; + } + } +} + +} diff --git a/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h new file mode 100644 index 00000000000..59d35422328 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h @@ -0,0 +1,27 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "compact_buffer_candidate.h" +#include + +namespace vespalib::datastore { + +/* + * Class representing candidate buffers for compaction. + */ +class CompactBufferCandidates { + std::vector _candidates; + size_t _used; + size_t _dead; + uint32_t _max_buffers; + double _ratio; + size_t _slack; +public: + CompactBufferCandidates(uint32_t num_buffers, uint32_t max_buffers, double ratio, size_t slack); + ~CompactBufferCandidates(); + void add(uint32_t buffer_id, size_t used, size_t dead); + void select(std::vector& buffers); +}; + +} diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp index 059171e1f02..f137d5379fb 100644 --- a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp +++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp @@ -1,9 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "datastore.h" +#include "compact_buffer_candidates.h" #include "compaction_spec.h" +#include "compaction_strategy.h" #include #include +#include #include #include @@ -529,41 +532,35 @@ DataStoreBase::markCompacting(uint32_t bufferId) std::vector DataStoreBase::startCompactWorstBuffers(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) { - (void) compaction_strategy; - constexpr uint32_t noBufferId = std::numeric_limits::max(); - uint32_t worstMemoryBufferId = noBufferId; - uint32_t worstAddressSpaceBufferId = noBufferId; - size_t worstDeadElems = 0; - size_t worstDeadArrays = 0; + // compact memory usage + CompactBufferCandidates elem_buffers(_numBuffers, compaction_strategy.get_max_buffers(), compaction_strategy.getMaxDeadBytesRatio() / 2, CompactionStrategy::DEAD_BYTES_SLACK); + // compact address space + CompactBufferCandidates array_buffers(_numBuffers, compaction_strategy.get_max_buffers(), compaction_strategy.getMaxDeadAddressSpaceRatio() / 2, CompactionStrategy::DEAD_ADDRESS_SPACE_SLACK); for (uint32_t bufferId = 0; bufferId < _numBuffers; ++bufferId) { const auto &state = getBufferState(bufferId); if (state.isActive()) { auto typeHandler = state.getTypeHandler(); uint32_t arraySize = typeHandler->getArraySize(); uint32_t reservedElements = typeHandler->getReservedElements(bufferId); + size_t used_elems = state.size(); size_t deadElems = state.getDeadElems() - reservedElements; - if (compaction_spec.compact_memory() && deadElems > worstDeadElems) { - worstMemoryBufferId = bufferId; - worstDeadElems = deadElems; + if (compaction_spec.compact_memory()) { + elem_buffers.add(bufferId, used_elems, deadElems); } if (compaction_spec.compact_address_space()) { - size_t deadArrays = deadElems / arraySize; - if (deadArrays > worstDeadArrays) { - worstAddressSpaceBufferId = bufferId; - worstDeadArrays = deadArrays; - } + array_buffers.add(bufferId, used_elems / arraySize, deadElems / arraySize); } } } std::vector result; - if (worstMemoryBufferId != noBufferId) { - markCompacting(worstMemoryBufferId); - result.emplace_back(worstMemoryBufferId); - } - if (worstAddressSpaceBufferId != noBufferId && - worstAddressSpaceBufferId != worstMemoryBufferId) { - markCompacting(worstAddressSpaceBufferId); - result.emplace_back(worstAddressSpaceBufferId); + result.reserve(std::min(_numBuffers, 2 * compaction_strategy.get_max_buffers())); + elem_buffers.select(result); + array_buffers.select(result); + std::sort(result.begin(), result.end()); + auto last = std::unique(result.begin(), result.end()); + result.erase(last, result.end()); + for (auto buffer_id : result) { + markCompacting(buffer_id); } return result; } -- cgit v1.2.3