summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2021-12-13 16:34:27 +0100
committerGitHub <noreply@github.com>2021-12-13 16:34:27 +0100
commitb93093af3bec8f7fd4a209db34737d291d34f017 (patch)
treede86b6e09d94aeb9f855dbd3a42869dde7035406
parent348b05de5ecbd4c36b7b5f694a7c26e69f0ea706 (diff)
parent57361b00342b266b9da09beb06930423dd56a46e (diff)
Merge pull request #20493 from vespa-engine/toregge/add-compact-buffer-candidates
Add CompactBufferCandidates, used to select buffers to compact.
-rw-r--r--vespalib/CMakeLists.txt1
-rw-r--r--vespalib/src/tests/datastore/compact_buffer_candidates/CMakeLists.txt9
-rw-r--r--vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp91
-rw-r--r--vespalib/src/vespa/vespalib/datastore/CMakeLists.txt1
-rw-r--r--vespalib/src/vespa/vespalib/datastore/compact_buffer_candidate.h36
-rw-r--r--vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp52
-rw-r--r--vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h27
-rw-r--r--vespalib/src/vespa/vespalib/datastore/datastorebase.cpp41
8 files changed, 236 insertions, 22 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index d4c7bc881a5..400c1ec5d1a 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -42,6 +42,7 @@ vespa_define_module(
src/tests/datastore/array_store
src/tests/datastore/array_store_config
src/tests/datastore/buffer_type
+ src/tests/datastore/compact_buffer_candidates
src/tests/datastore/datastore
src/tests/datastore/fixed_size_hash_map
src/tests/datastore/sharded_hash_map
diff --git a/vespalib/src/tests/datastore/compact_buffer_candidates/CMakeLists.txt b/vespalib/src/tests/datastore/compact_buffer_candidates/CMakeLists.txt
new file mode 100644
index 00000000000..d6731071927
--- /dev/null
+++ b/vespalib/src/tests/datastore/compact_buffer_candidates/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_compact_buffer_candidates_test_app TEST
+ SOURCES
+ compact_buffer_candidates_test.cpp
+ DEPENDS
+ vespalib
+ GTest::GTest
+)
+vespa_add_test(NAME vespalib_compact_buffer_candidates_test_app COMMAND vespalib_compact_buffer_candidates_test_app)
diff --git a/vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp b/vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp
new file mode 100644
index 00000000000..80c0d571894
--- /dev/null
+++ b/vespalib/src/tests/datastore/compact_buffer_candidates/compact_buffer_candidates_test.cpp
@@ -0,0 +1,91 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/datastore/compact_buffer_candidates.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using vespalib::datastore::CompactBufferCandidates;
+
+namespace {
+
+constexpr uint32_t num_buffers = 1024;
+constexpr double default_ratio = 0.2 / 2;
+constexpr size_t default_slack = 1000;
+
+};
+
+
+class CompactBufferCandidatesTest : public ::testing::Test
+{
+public:
+ CompactBufferCandidates candidates;
+ CompactBufferCandidatesTest();
+ ~CompactBufferCandidatesTest() override;
+ void reset_candidates(uint32_t max_buffers);
+ CompactBufferCandidatesTest& add(uint32_t buffer_id, size_t used, size_t dead);
+ void assert_select(const std::vector<uint32_t>& exp);
+};
+
+CompactBufferCandidatesTest::CompactBufferCandidatesTest()
+ : ::testing::Test(),
+ candidates(num_buffers, 1, default_ratio, default_slack)
+{
+}
+
+CompactBufferCandidatesTest::~CompactBufferCandidatesTest() = default;
+
+void
+CompactBufferCandidatesTest::reset_candidates(uint32_t max_buffers)
+{
+ candidates = CompactBufferCandidates(num_buffers, max_buffers, default_ratio, default_slack);
+}
+
+CompactBufferCandidatesTest&
+CompactBufferCandidatesTest::add(uint32_t buffer_id, size_t used, size_t dead)
+{
+ candidates.add(buffer_id, used, dead);
+ return *this;
+}
+
+void
+CompactBufferCandidatesTest::assert_select(const std::vector<uint32_t>& exp)
+{
+ std::vector<uint32_t> act;
+ candidates.select(act);
+ EXPECT_EQ(exp, act);
+}
+
+TEST_F(CompactBufferCandidatesTest, select_single)
+{
+ add(0, 10000, 2000).add(1, 10000, 3000);
+ assert_select({1});
+}
+
+TEST_F(CompactBufferCandidatesTest, select_two)
+{
+ reset_candidates(2);
+ add(0, 10000, 2000).add(3, 10000, 3000).add(7, 10000, 4000);
+ assert_select({7, 3});
+}
+
+TEST_F(CompactBufferCandidatesTest, select_all)
+{
+ reset_candidates(4);
+ add(1, 10000, 2000).add(3, 10000, 4000).add(8, 10000, 3000);
+ assert_select({3, 8, 1});
+}
+
+TEST_F(CompactBufferCandidatesTest, select_cutoff_by_ratio)
+{
+ reset_candidates(4);
+ add(1, 100000, 9999).add(3, 100000, 40000).add(8, 100000, 30000);
+ assert_select({3, 8});
+}
+
+TEST_F(CompactBufferCandidatesTest, select_cutoff_by_slack)
+{
+ reset_candidates(4);
+ add(1, 2000, 999).add(3, 2000, 1200).add(9, 2000, 1300);
+ assert_select({9, 3});
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt
index c36077e4dd0..d628843279d 100644
--- a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt
@@ -6,6 +6,7 @@ vespa_add_library(vespalib_vespalib_datastore OBJECT
buffer_type.cpp
bufferstate.cpp
compaction_strategy.cpp
+ compact_buffer_candidates.cpp
datastore.cpp
datastorebase.cpp
entryref.cpp
diff --git a/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidate.h b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidate.h
new file mode 100644
index 00000000000..85ea1e42eac
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidate.h
@@ -0,0 +1,36 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace vespalib::datastore {
+
+/*
+ * Class representing candidate buffer for compaction.
+ */
+class CompactBufferCandidate {
+ uint32_t _buffer_id;
+ size_t _used;
+ size_t _dead;
+public:
+ CompactBufferCandidate(uint32_t buffer_id, size_t used, size_t dead) noexcept
+ : _buffer_id(buffer_id),
+ _used(used),
+ _dead(dead)
+ {
+ }
+
+ CompactBufferCandidate() noexcept
+ : CompactBufferCandidate(0, 0, 0)
+ {
+ }
+
+ bool operator<(const CompactBufferCandidate& rhs) const noexcept { return _dead > rhs._dead; }
+ uint32_t get_buffer_id() const noexcept { return _buffer_id; }
+ size_t get_used() const noexcept { return _used; }
+ size_t get_dead() const noexcept { return _dead; }
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp
new file mode 100644
index 00000000000..3003ef315e8
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.cpp
@@ -0,0 +1,52 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "compact_buffer_candidates.h"
+#include <algorithm>
+
+namespace vespalib::datastore {
+
+CompactBufferCandidates::CompactBufferCandidates(uint32_t num_buffers, uint32_t max_buffers, double ratio, size_t slack)
+ : _candidates(),
+ _used(0),
+ _dead(0),
+ _max_buffers(std::max(max_buffers, 1u)),
+ _ratio(ratio),
+ _slack(slack)
+{
+ _candidates.reserve(num_buffers);
+}
+
+CompactBufferCandidates::~CompactBufferCandidates() = default;
+
+void
+CompactBufferCandidates::add(uint32_t buffer_id, size_t used, size_t dead)
+{
+ _candidates.emplace_back(buffer_id, used, dead);
+ _used += used;
+ _dead += dead;
+}
+
+void
+CompactBufferCandidates::select(std::vector<uint32_t>& buffers)
+{
+ if (_candidates.empty()) {
+ return;
+ }
+ if (_candidates.size() > _max_buffers) {
+ std::nth_element(_candidates.begin(), _candidates.begin() + (_max_buffers - 1), _candidates.end());
+ _candidates.resize(_max_buffers);
+ }
+ std::sort(_candidates.begin(), _candidates.end());
+ size_t remaining_used = _used;
+ size_t remaining_dead = _dead;
+ for (auto& candidate : _candidates) {
+ buffers.emplace_back(candidate.get_buffer_id());
+ remaining_used -= candidate.get_used();
+ remaining_dead -= candidate.get_dead();
+ if ((remaining_dead < _slack) || (remaining_dead <= remaining_used * _ratio)) {
+ break;
+ }
+ }
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h
new file mode 100644
index 00000000000..59d35422328
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/datastore/compact_buffer_candidates.h
@@ -0,0 +1,27 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "compact_buffer_candidate.h"
+#include <vector>
+
+namespace vespalib::datastore {
+
+/*
+ * Class representing candidate buffers for compaction.
+ */
+class CompactBufferCandidates {
+ std::vector<CompactBufferCandidate> _candidates;
+ size_t _used;
+ size_t _dead;
+ uint32_t _max_buffers;
+ double _ratio;
+ size_t _slack;
+public:
+ CompactBufferCandidates(uint32_t num_buffers, uint32_t max_buffers, double ratio, size_t slack);
+ ~CompactBufferCandidates();
+ void add(uint32_t buffer_id, size_t used, size_t dead);
+ void select(std::vector<uint32_t>& buffers);
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp
index 059171e1f02..f137d5379fb 100644
--- a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp
+++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp
@@ -1,9 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "datastore.h"
+#include "compact_buffer_candidates.h"
#include "compaction_spec.h"
+#include "compaction_strategy.h"
#include <vespa/vespalib/util/array.hpp>
#include <vespa/vespalib/util/stringfmt.h>
+#include <algorithm>
#include <limits>
#include <cassert>
@@ -529,41 +532,35 @@ DataStoreBase::markCompacting(uint32_t bufferId)
std::vector<uint32_t>
DataStoreBase::startCompactWorstBuffers(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy)
{
- (void) compaction_strategy;
- constexpr uint32_t noBufferId = std::numeric_limits<uint32_t>::max();
- uint32_t worstMemoryBufferId = noBufferId;
- uint32_t worstAddressSpaceBufferId = noBufferId;
- size_t worstDeadElems = 0;
- size_t worstDeadArrays = 0;
+ // compact memory usage
+ CompactBufferCandidates elem_buffers(_numBuffers, compaction_strategy.get_max_buffers(), compaction_strategy.getMaxDeadBytesRatio() / 2, CompactionStrategy::DEAD_BYTES_SLACK);
+ // compact address space
+ CompactBufferCandidates array_buffers(_numBuffers, compaction_strategy.get_max_buffers(), compaction_strategy.getMaxDeadAddressSpaceRatio() / 2, CompactionStrategy::DEAD_ADDRESS_SPACE_SLACK);
for (uint32_t bufferId = 0; bufferId < _numBuffers; ++bufferId) {
const auto &state = getBufferState(bufferId);
if (state.isActive()) {
auto typeHandler = state.getTypeHandler();
uint32_t arraySize = typeHandler->getArraySize();
uint32_t reservedElements = typeHandler->getReservedElements(bufferId);
+ size_t used_elems = state.size();
size_t deadElems = state.getDeadElems() - reservedElements;
- if (compaction_spec.compact_memory() && deadElems > worstDeadElems) {
- worstMemoryBufferId = bufferId;
- worstDeadElems = deadElems;
+ if (compaction_spec.compact_memory()) {
+ elem_buffers.add(bufferId, used_elems, deadElems);
}
if (compaction_spec.compact_address_space()) {
- size_t deadArrays = deadElems / arraySize;
- if (deadArrays > worstDeadArrays) {
- worstAddressSpaceBufferId = bufferId;
- worstDeadArrays = deadArrays;
- }
+ array_buffers.add(bufferId, used_elems / arraySize, deadElems / arraySize);
}
}
}
std::vector<uint32_t> result;
- if (worstMemoryBufferId != noBufferId) {
- markCompacting(worstMemoryBufferId);
- result.emplace_back(worstMemoryBufferId);
- }
- if (worstAddressSpaceBufferId != noBufferId &&
- worstAddressSpaceBufferId != worstMemoryBufferId) {
- markCompacting(worstAddressSpaceBufferId);
- result.emplace_back(worstAddressSpaceBufferId);
+ result.reserve(std::min(_numBuffers, 2 * compaction_strategy.get_max_buffers()));
+ elem_buffers.select(result);
+ array_buffers.select(result);
+ std::sort(result.begin(), result.end());
+ auto last = std::unique(result.begin(), result.end());
+ result.erase(last, result.end());
+ for (auto buffer_id : result) {
+ markCompacting(buffer_id);
}
return result;
}