diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-12-14 19:11:43 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-14 19:11:43 +0100 |
commit | b608bab290c70eb230a1c0bdf39bc346795d2d0d (patch) | |
tree | 51b776ee7c52da3ec284d435a6968b1159d30525 | |
parent | a9c87aedadef67d0e00dd659a4164eaaf67d329c (diff) | |
parent | 445b8f8ae9e1d7bc4d2482b31c4b54b78ae29753 (diff) |
Merge pull request #20506 from vespa-engine/toregge/limit-buffers-to-compact
Limit buffers to compact based on number of active and free buffers.
5 files changed, 81 insertions, 22 deletions
diff --git a/vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp b/vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp index 80c0d571894..1ba9a993ee6 100644 --- a/vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp +++ b/vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp @@ -10,6 +10,7 @@ namespace { constexpr uint32_t num_buffers = 1024; constexpr double default_ratio = 0.2 / 2; constexpr size_t default_slack = 1000; +constexpr double default_active_buffers_ratio = 1.0; }; @@ -20,23 +21,24 @@ public: CompactBufferCandidates candidates; CompactBufferCandidatesTest(); ~CompactBufferCandidatesTest() override; - void reset_candidates(uint32_t max_buffers); + void reset_candidates(uint32_t max_buffers, double active_buffers_ratio = default_active_buffers_ratio); CompactBufferCandidatesTest& add(uint32_t buffer_id, size_t used, size_t dead); void assert_select(const std::vector<uint32_t>& exp); + void set_free_buffers(uint32_t free_buffers = 100); }; CompactBufferCandidatesTest::CompactBufferCandidatesTest() : ::testing::Test(), - candidates(num_buffers, 1, default_ratio, default_slack) + candidates(num_buffers, 1, default_active_buffers_ratio, default_ratio, default_slack) { } CompactBufferCandidatesTest::~CompactBufferCandidatesTest() = default; void -CompactBufferCandidatesTest::reset_candidates(uint32_t max_buffers) +CompactBufferCandidatesTest::reset_candidates(uint32_t max_buffers, double active_buffers_ratio) { - candidates = CompactBufferCandidates(num_buffers, max_buffers, default_ratio, default_slack); + candidates = CompactBufferCandidates(num_buffers, max_buffers, active_buffers_ratio, default_ratio, default_slack); } CompactBufferCandidatesTest& @@ -47,6 +49,12 @@ CompactBufferCandidatesTest::add(uint32_t buffer_id, size_t used, size_t dead) } void +CompactBufferCandidatesTest::set_free_buffers(uint32_t free_buffers) +{ + candidates.set_free_buffers(free_buffers); +} + +void CompactBufferCandidatesTest::assert_select(const std::vector<uint32_t>& exp) { std::vector<uint32_t> act; @@ -56,36 +64,50 @@ CompactBufferCandidatesTest::assert_select(const std::vector<uint32_t>& exp) TEST_F(CompactBufferCandidatesTest, select_single) { - add(0, 10000, 2000).add(1, 10000, 3000); + add(0, 10000, 2000).add(1, 10000, 3000).set_free_buffers(); assert_select({1}); } TEST_F(CompactBufferCandidatesTest, select_two) { reset_candidates(2); - add(0, 10000, 2000).add(3, 10000, 3000).add(7, 10000, 4000); + add(0, 10000, 2000).add(3, 10000, 3000).add(7, 10000, 4000).set_free_buffers(); assert_select({7, 3}); } TEST_F(CompactBufferCandidatesTest, select_all) { reset_candidates(4); - add(1, 10000, 2000).add(3, 10000, 4000).add(8, 10000, 3000); + add(1, 10000, 2000).add(3, 10000, 4000).add(8, 10000, 3000).set_free_buffers(); assert_select({3, 8, 1}); } TEST_F(CompactBufferCandidatesTest, select_cutoff_by_ratio) { reset_candidates(4); - add(1, 100000, 9999).add(3, 100000, 40000).add(8, 100000, 30000); + add(1, 100000, 9999).add(3, 100000, 40000).add(8, 100000, 30000).set_free_buffers(); assert_select({3, 8}); } TEST_F(CompactBufferCandidatesTest, select_cutoff_by_slack) { reset_candidates(4); - add(1, 2000, 999).add(3, 2000, 1200).add(9, 2000, 1300); + add(1, 2000, 999).add(3, 2000, 1200).add(9, 2000, 1300).set_free_buffers(); assert_select({9, 3}); } +TEST_F(CompactBufferCandidatesTest, select_cutoff_by_active_buffers_ratio) +{ + reset_candidates(4, 0.5); + add(1, 10000, 2000).add(3, 10000, 4000).add(8, 10000, 3000).set_free_buffers(); + assert_select({3, 8}); +} + +TEST_F(CompactBufferCandidatesTest, select_cutoff_by_lack_of_free_buffers) +{ + reset_candidates(4); + add(1, 10000, 2000).add(3, 10000, 4000).add(8, 10000, 3000).set_free_buffers(9); + assert_select({3, 8}); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp index 3003ef315e8..41c216a9684 100644 --- a/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp +++ b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp @@ -2,16 +2,19 @@ #include "compact_buffer_candidates.h" #include <algorithm> +#include <cmath> namespace vespalib::datastore { -CompactBufferCandidates::CompactBufferCandidates(uint32_t num_buffers, uint32_t max_buffers, double ratio, size_t slack) +CompactBufferCandidates::CompactBufferCandidates(uint32_t num_buffers, uint32_t max_buffers, double active_buffers_ratio, double ratio, size_t slack) : _candidates(), _used(0), _dead(0), _max_buffers(std::max(max_buffers, 1u)), + _active_buffers_ratio(std::min(1.0, std::max(0.0001, active_buffers_ratio))), _ratio(ratio), - _slack(slack) + _slack(slack), + _free_buffers(0) { _candidates.reserve(num_buffers); } @@ -27,14 +30,34 @@ CompactBufferCandidates::add(uint32_t buffer_id, size_t used, size_t dead) } void +CompactBufferCandidates::set_free_buffers(uint32_t free_buffers) +{ + _free_buffers = free_buffers; +} + +void CompactBufferCandidates::select(std::vector<uint32_t>& buffers) { if (_candidates.empty()) { return; } - if (_candidates.size() > _max_buffers) { - std::nth_element(_candidates.begin(), _candidates.begin() + (_max_buffers - 1), _candidates.end()); - _candidates.resize(_max_buffers); + /* + * Calculate a limit of how many buffers to compact at once. Throughput, + * latency, transient resource usage (memory and address space used for + * held buffers) and stability must all be considered. + * + * We want to compact up to a portion of the active buffers (hence + * _active_buffers_ratio) but do not want to use up all remaining free + * buffers during compaction (hence free_buffers_ratio). Cap the limit by + * [1, _max_buffers] to ensure some but not too much progress. + */ + constexpr double free_buffers_ratio = 0.2; + uint32_t active_buffers = _candidates.size(); + uint32_t max_buffers = ceil(std::min(active_buffers * _active_buffers_ratio, _free_buffers * free_buffers_ratio)); + max_buffers = std::max(1u, std::min(max_buffers, _max_buffers)); + if (_candidates.size() > max_buffers) { + std::nth_element(_candidates.begin(), _candidates.begin() + (max_buffers - 1), _candidates.end()); + _candidates.resize(max_buffers); } std::sort(_candidates.begin(), _candidates.end()); size_t remaining_used = _used; diff --git a/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h index 59d35422328..db82ebd4eae 100644 --- a/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h +++ b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h @@ -15,12 +15,15 @@ class CompactBufferCandidates { size_t _used; size_t _dead; uint32_t _max_buffers; + double _active_buffers_ratio; double _ratio; size_t _slack; + uint32_t _free_buffers; public: - CompactBufferCandidates(uint32_t num_buffers, uint32_t max_buffers, double ratio, size_t slack); + CompactBufferCandidates(uint32_t num_buffers, uint32_t max_buffers, double active_buffers_ratio, double ratio, size_t slack); ~CompactBufferCandidates(); void add(uint32_t buffer_id, size_t used, size_t dead); + void set_free_buffers(uint32_t free_buffers); void select(std::vector<uint32_t>& buffers); }; diff --git a/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h b/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h index 9ca4a64a55b..2bcf30fc6fc 100644 --- a/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h +++ b/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h @@ -28,6 +28,7 @@ private: double _maxDeadBytesRatio; // Max ratio of dead bytes before compaction double _maxDeadAddressSpaceRatio; // Max ratio of dead address space before compaction uint32_t _max_buffers; // Max number of buffers to compact for each reason (memory usage, address space usage) + double _active_buffers_ratio; // Ratio of active buffers to compact for each reason (memory usage, address space usage) bool should_compact_memory(size_t used_bytes, size_t dead_bytes) const { return ((dead_bytes >= DEAD_BYTES_SLACK) && (dead_bytes > used_bytes * getMaxDeadBytesRatio())); @@ -40,28 +41,33 @@ public: CompactionStrategy() noexcept : _maxDeadBytesRatio(0.05), _maxDeadAddressSpaceRatio(0.2), - _max_buffers(1) + _max_buffers(1), + _active_buffers_ratio(0.1) { } CompactionStrategy(double maxDeadBytesRatio, double maxDeadAddressSpaceRatio) noexcept : _maxDeadBytesRatio(maxDeadBytesRatio), _maxDeadAddressSpaceRatio(maxDeadAddressSpaceRatio), - _max_buffers(1) + _max_buffers(1), + _active_buffers_ratio(0.1) { } - CompactionStrategy(double maxDeadBytesRatio, double maxDeadAddressSpaceRatio, uint32_t max_buffers) noexcept + CompactionStrategy(double maxDeadBytesRatio, double maxDeadAddressSpaceRatio, uint32_t max_buffers, double active_buffers_ratio) noexcept : _maxDeadBytesRatio(maxDeadBytesRatio), _maxDeadAddressSpaceRatio(maxDeadAddressSpaceRatio), - _max_buffers(max_buffers) + _max_buffers(max_buffers), + _active_buffers_ratio(active_buffers_ratio) { } double getMaxDeadBytesRatio() const { return _maxDeadBytesRatio; } double getMaxDeadAddressSpaceRatio() const { return _maxDeadAddressSpaceRatio; } uint32_t get_max_buffers() const noexcept { return _max_buffers; } + double get_active_buffers_ratio() const noexcept { return _active_buffers_ratio; } bool operator==(const CompactionStrategy & rhs) const { return (_maxDeadBytesRatio == rhs._maxDeadBytesRatio) && (_maxDeadAddressSpaceRatio == rhs._maxDeadAddressSpaceRatio) && - (_max_buffers == rhs._max_buffers); + (_max_buffers == rhs._max_buffers) && + (_active_buffers_ratio == rhs._active_buffers_ratio); } bool operator!=(const CompactionStrategy & rhs) const { return !(operator==(rhs)); } diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp index f137d5379fb..d2b23a1a5cf 100644 --- a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp +++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp @@ -533,9 +533,10 @@ std::vector<uint32_t> DataStoreBase::startCompactWorstBuffers(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) { // compact memory usage - CompactBufferCandidates elem_buffers(_numBuffers, compaction_strategy.get_max_buffers(), compaction_strategy.getMaxDeadBytesRatio() / 2, CompactionStrategy::DEAD_BYTES_SLACK); + CompactBufferCandidates elem_buffers(_numBuffers, compaction_strategy.get_max_buffers(), compaction_strategy.get_active_buffers_ratio(), compaction_strategy.getMaxDeadBytesRatio() / 2, CompactionStrategy::DEAD_BYTES_SLACK); // compact address space - CompactBufferCandidates array_buffers(_numBuffers, compaction_strategy.get_max_buffers(), compaction_strategy.getMaxDeadAddressSpaceRatio() / 2, CompactionStrategy::DEAD_ADDRESS_SPACE_SLACK); + CompactBufferCandidates array_buffers(_numBuffers, compaction_strategy.get_max_buffers(), compaction_strategy.get_active_buffers_ratio(), compaction_strategy.getMaxDeadAddressSpaceRatio() / 2, CompactionStrategy::DEAD_ADDRESS_SPACE_SLACK); + uint32_t free_buffers = 0; for (uint32_t bufferId = 0; bufferId < _numBuffers; ++bufferId) { const auto &state = getBufferState(bufferId); if (state.isActive()) { @@ -550,8 +551,12 @@ DataStoreBase::startCompactWorstBuffers(CompactionSpec compaction_spec, const Co if (compaction_spec.compact_address_space()) { array_buffers.add(bufferId, used_elems / arraySize, deadElems / arraySize); } + } else if (state.isFree()) { + ++free_buffers; } } + elem_buffers.set_free_buffers(free_buffers); + array_buffers.set_free_buffers(free_buffers); std::vector<uint32_t> result; result.reserve(std::min(_numBuffers, 2 * compaction_strategy.get_max_buffers())); elem_buffers.select(result); |