summaryrefslogtreecommitdiffstats
path: root/searchcore/src
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-04-29 07:48:45 +0000
committerGeir Storli <geirst@verizonmedia.com>2020-04-29 07:51:06 +0000
commit6d108bacdf3c92ebbf8a84b70ff619201ff1ba53 (patch)
treee42e7a02073facf13936ecf8e860ded3623101e3 /searchcore/src
parente1d922c6979fa06695bfdcec6cafb8988d67ff77 (diff)
Improve tracking of remove batch rate used to consider to block lid space compaction.
This is also a preparation for tracking the rate of regular remove operations, and use this to consider to block lid space compaction.
Diffstat (limited to 'searchcore/src')
-rw-r--r--searchcore/src/tests/proton/common/operation_rate_tracker/CMakeLists.txt9
-rw-r--r--searchcore/src/tests/proton/common/operation_rate_tracker/operation_rate_tracker_test.cpp90
-rw-r--r--searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp57
-rw-r--r--searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp1
-rw-r--r--searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp27
-rw-r--r--searchcore/src/vespa/searchcore/config/proton.def12
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt3
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.cpp33
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.h38
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp16
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h12
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/i_document_meta_store.h3
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h19
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt7
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp6
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h6
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp2
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/i_lid_space_compaction_handler.h8
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.cpp6
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp11
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h4
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp31
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h30
-rw-r--r--searchcore/src/vespa/searchcore/proton/test/document_meta_store_observer.h3
25 files changed, 376 insertions, 59 deletions
diff --git a/searchcore/src/tests/proton/common/operation_rate_tracker/CMakeLists.txt b/searchcore/src/tests/proton/common/operation_rate_tracker/CMakeLists.txt
new file mode 100644
index 00000000000..f5e6a791124
--- /dev/null
+++ b/searchcore/src/tests/proton/common/operation_rate_tracker/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchcore_operation_rate_tracker_test_app TEST
+ SOURCES
+ operation_rate_tracker_test.cpp
+ DEPENDS
+ searchcore_pcommon
+ gtest
+)
+vespa_add_test(NAME searchcore_operation_rate_tracker_test_app COMMAND searchcore_operation_rate_tracker_test_app)
diff --git a/searchcore/src/tests/proton/common/operation_rate_tracker/operation_rate_tracker_test.cpp b/searchcore/src/tests/proton/common/operation_rate_tracker/operation_rate_tracker_test.cpp
new file mode 100644
index 00000000000..ce19867e286
--- /dev/null
+++ b/searchcore/src/tests/proton/common/operation_rate_tracker/operation_rate_tracker_test.cpp
@@ -0,0 +1,90 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcore/proton/common/operation_rate_tracker.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/util/time.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("operation_rate_tracker_test");
+
+using namespace proton;
+
+TEST(OperationRateTrackerTest, time_budget_per_op_is_inverse_of_rate_threshold)
+{
+ EXPECT_EQ(vespalib::from_s(0.25), OperationRateTracker(4).get_time_budget_per_op());
+ EXPECT_EQ(vespalib::from_s(2.0), OperationRateTracker(0.5).get_time_budget_per_op());
+}
+
+TEST(OperationRateTrackerTest, time_budget_window_is_minimum_1_sec)
+{
+ EXPECT_EQ(vespalib::from_s(1.0), OperationRateTracker(4).get_time_budget_window());
+ EXPECT_EQ(vespalib::from_s(2.0), OperationRateTracker(0.5).get_time_budget_window());
+}
+
+class Simulator {
+public:
+ vespalib::steady_time now;
+ OperationRateTracker ort;
+ Simulator(double rate_threshold)
+ : now(vespalib::steady_clock::now()),
+ ort(rate_threshold)
+ {
+ }
+ void tick(double real_rate) {
+ now = now + vespalib::from_s(1.0 / real_rate);
+ ort.observe(now);
+ }
+ bool above_threshold(double now_delta = 0) {
+ return ort.above_threshold(now + vespalib::from_s(now_delta));
+ }
+};
+
+TEST(OperationRateTrackerTest, tracks_whether_operation_rate_is_below_or_above_threshold)
+{
+ Simulator sim(2);
+
+ // Simulate an actual rate of 4 ops / sec
+ sim.tick(4); // Threshold time is 1.0s in the past (at time budget window start)
+ EXPECT_FALSE(sim.above_threshold(-1.0));
+ EXPECT_TRUE(sim.above_threshold(-1.01));
+
+ // Catch up with now
+ sim.tick(4);
+ sim.tick(4);
+ sim.tick(4);
+ sim.tick(4); // Threshold time is now.
+ EXPECT_FALSE(sim.above_threshold());
+ EXPECT_TRUE(sim.above_threshold(-0.01));
+
+ // Move into the future
+ sim.tick(4); // Threshold time is 0.25s into the future.
+ EXPECT_TRUE(sim.above_threshold(0.24));
+ EXPECT_FALSE(sim.above_threshold(0.25));
+
+ // Move to time budget window end
+ sim.tick(4);
+ sim.tick(4);
+ sim.tick(4); // Threshold time is 1.0s into the future (at time budget window end)
+ EXPECT_TRUE(sim.above_threshold(0.99));
+ EXPECT_FALSE(sim.above_threshold(1.0));
+
+ sim.tick(4); // Threshold time is still 1.0s into the future (at time budget window end)
+ EXPECT_TRUE(sim.above_threshold(0.99));
+ EXPECT_FALSE(sim.above_threshold(1.0));
+
+ // Reduce actual rate to 1 ops / sec
+ sim.tick(1); // Threshold time is 0.5s into the future.
+ EXPECT_TRUE(sim.above_threshold(0.49));
+ EXPECT_FALSE(sim.above_threshold(0.5));
+
+ sim.tick(1); // Threshold time is now.
+ EXPECT_FALSE(sim.above_threshold());
+ EXPECT_TRUE(sim.above_threshold(-0.01));
+
+ sim.tick(1);
+ sim.tick(1); // Threshold time is back at time budget window start
+ EXPECT_FALSE(sim.above_threshold(-1.0));
+ EXPECT_TRUE(sim.above_threshold(-1.01));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp
index 64299c70588..a97a2380f25 100644
--- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp
@@ -30,7 +30,7 @@ constexpr uint32_t SUBDB_ID = 2;
constexpr vespalib::duration JOB_DELAY = 1s;
constexpr uint32_t ALLOWED_LID_BLOAT = 1;
constexpr double ALLOWED_LID_BLOAT_FACTOR = 0.3;
-constexpr vespalib::duration REMOVE_BATCH_BLOCK_DELAY = 20s;
+constexpr double REMOVE_BATCH_BLOCK_RATE = 1.0 / 20.0;
constexpr uint32_t MAX_DOCS_TO_SCAN = 100;
constexpr double RESOURCE_LIMIT_FACTOR = 1.0;
constexpr uint32_t MAX_OUTSTANDING_MOVE_OPS = 10;
@@ -82,19 +82,30 @@ struct MyHandler : public ILidSpaceCompactionHandler {
mutable uint32_t _iteratorCnt;
bool _storeMoveDoneContexts;
std::vector<IDestructorCallback::SP> _moveDoneContexts;
+ documentmetastore::OperationListener::SP _op_listener;
+ RemoveOperationsRateTracker* _rm_listener;
MyHandler(bool storeMoveDoneContexts = false);
~MyHandler();
void clearMoveDoneContexts() { _moveDoneContexts.clear(); }
- void set_last_remove_batch(TimePoint last_remove_batch) {
- for (auto& s : _stats) {
- s = LidUsageStats(s.getLidLimit(), s.getUsedLids(),
- s.getLowestFreeLid(), s.getHighestUsedLid(), last_remove_batch);
- }
+ void run_remove_batch_ops() {
+ // This ensures to max out the threshold time in the operation rate tracker.
+ _op_listener->notify_remove_batch();
+ _op_listener->notify_remove_batch();
+ _op_listener->notify_remove_batch();
+ }
+ void stop_remove_batch_ops() {
+ _rm_listener->reset_remove_batch_tracker();
}
virtual vespalib::string getName() const override {
return "myhandler";
}
+ virtual void set_operation_listener(documentmetastore::OperationListener::SP op_listener) override {
+ auto* rm_listener = dynamic_cast<RemoveOperationsRateTracker*>(op_listener.get());
+ assert(rm_listener != nullptr);
+ _op_listener = std::move(op_listener);
+ _rm_listener = rm_listener;
+ }
virtual uint32_t getSubDbId() const override { return 2; }
virtual LidUsageStats getLidStatus() const override {
assert(_handleMoveCnt < _stats.size());
@@ -132,7 +143,9 @@ MyHandler::MyHandler(bool storeMoveDoneContexts)
_wantedLidLimit(0),
_iteratorCnt(0),
_storeMoveDoneContexts(storeMoveDoneContexts),
- _moveDoneContexts()
+ _moveDoneContexts(),
+ _op_listener(),
+ _rm_listener()
{}
MyHandler::~MyHandler() {}
@@ -265,7 +278,7 @@ struct JobTestBase : public ::testing::Test {
_handler = std::make_unique<MyHandler>(maxOutstandingMoveOps != MAX_OUTSTANDING_MOVE_OPS);
_job = std::make_unique<LidSpaceCompactionJob>(DocumentDBLidSpaceCompactionConfig(interval, allowedLidBloat,
allowedLidBloatFactor,
- REMOVE_BATCH_BLOCK_DELAY,
+ REMOVE_BATCH_BLOCK_RATE,
false, maxDocsToScan),
*_handler, _storer, _frozenHandler, _diskMemUsageNotifier,
BlockableMaintenanceJobConfig(resourceLimitFactor, maxOutstandingMoveOps),
@@ -274,20 +287,18 @@ struct JobTestBase : public ::testing::Test {
~JobTestBase();
JobTestBase &addStats(uint32_t docIdLimit,
const LidVector &usedLids,
- const LidPairVector &usedFreePairs,
- TimePoint last_remove_batch = TimePoint()) {
- return addMultiStats(docIdLimit, {usedLids}, usedFreePairs, last_remove_batch);
+ const LidPairVector &usedFreePairs) {
+ return addMultiStats(docIdLimit, {usedLids}, usedFreePairs);
}
JobTestBase &addMultiStats(uint32_t docIdLimit,
const std::vector<LidVector> &usedLidsVector,
- const LidPairVector &usedFreePairs,
- TimePoint last_remove_batch = TimePoint()) {
+ const LidPairVector &usedFreePairs) {
uint32_t usedLids = usedLidsVector[0].size();
for (auto pair : usedFreePairs) {
uint32_t highestUsedLid = pair.first;
uint32_t lowestFreeLid = pair.second;
_handler->_stats.push_back(LidUsageStats
- (docIdLimit, usedLids, lowestFreeLid, highestUsedLid, last_remove_batch));
+ (docIdLimit, usedLids, lowestFreeLid, highestUsedLid));
}
_handler->_lids = usedLidsVector;
return *this;
@@ -297,7 +308,7 @@ struct JobTestBase : public ::testing::Test {
uint32_t lowestFreeLid,
uint32_t highestUsedLid) {
_handler->_stats.push_back(LidUsageStats
- (docIdLimit, numDocs, lowestFreeLid, highestUsedLid, TimePoint()));
+ (docIdLimit, numDocs, lowestFreeLid, highestUsedLid));
return *this;
}
bool run() {
@@ -332,11 +343,10 @@ struct JobTestBase : public ::testing::Test {
void assertNoWorkDone() {
assertJobContext(0, 0, 0, 0, 0);
}
- JobTestBase &setupOneDocumentToCompact(TimePoint last_remove_batch = TimePoint()) {
+ JobTestBase &setupOneDocumentToCompact() {
addStats(10, {1,3,4,5,6,9},
{{9,2}, // 30% bloat: move 9 -> 2
- {6,7}}, // no documents to move
- last_remove_batch);
+ {6,7}}); // no documents to move
return *this;
}
void assertOneDocumentCompacted() {
@@ -622,8 +632,8 @@ TEST_F(JobTest, job_is_re_enabled_when_node_is_no_longer_retired)
TEST_F(JobTest, job_is_disabled_while_remove_batch_is_ongoing)
{
- TimePoint last_remove_batch = std::chrono::steady_clock::now();
- setupOneDocumentToCompact(last_remove_batch);
+ setupOneDocumentToCompact();
+ _handler->run_remove_batch_ops();
EXPECT_TRUE(run()); // job is disabled
assertNoWorkDone();
}
@@ -634,7 +644,7 @@ TEST_F(JobTest, job_becomes_disabled_if_remove_batch_starts)
EXPECT_FALSE(run()); // job executed as normal (with more work to do)
assertJobContext(2, 9, 1, 0, 0);
- _handler->set_last_remove_batch(std::chrono::steady_clock::now());
+ _handler->run_remove_batch_ops();
EXPECT_TRUE(run()); // job is disabled
assertJobContext(2, 9, 1, 0, 0);
}
@@ -645,12 +655,11 @@ TEST_F(JobTest, job_is_re_enabled_when_remove_batch_is_no_longer_ongoing)
EXPECT_FALSE(run()); // job executed as normal (with more work to do)
assertJobContext(2, 9, 1, 0, 0);
- TimePoint last_remove_batch = std::chrono::steady_clock::now();
- _handler->set_last_remove_batch(last_remove_batch);
+ _handler->run_remove_batch_ops();
EXPECT_TRUE(run()); // job is disabled
assertJobContext(2, 9, 1, 0, 0);
- _handler->set_last_remove_batch(last_remove_batch - REMOVE_BATCH_BLOCK_DELAY);
+ _handler->stop_remove_batch_ops();
EXPECT_FALSE(run()); // job executed as normal (with more work to do)
assertJobContext(3, 8, 2, 0, 0);
}
diff --git a/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp b/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp
index 7e2e258476f..2bd67f96b57 100644
--- a/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp
@@ -339,6 +339,7 @@ struct MockLidSpaceCompactionHandler : public ILidSpaceCompactionHandler
MockLidSpaceCompactionHandler(const vespalib::string &name_) : name(name_) {}
virtual vespalib::string getName() const override { return name; }
+ virtual void set_operation_listener(documentmetastore::OperationListener::SP) override {}
virtual uint32_t getSubDbId() const override { return 0; }
virtual search::LidUsageStats getLidStatus() const override { return search::LidUsageStats(); }
virtual IDocumentScanIterator::UP getIterator() const override { return IDocumentScanIterator::UP(); }
diff --git a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
index f8069dcf494..8935143a7be 100644
--- a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
+++ b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
@@ -6,6 +6,7 @@
#include <vespa/searchcore/proton/bucketdb/i_bucket_create_listener.h>
#include <vespa/searchcore/proton/common/hw_info.h>
#include <vespa/searchcore/proton/documentmetastore/documentmetastore.h>
+#include <vespa/searchcore/proton/documentmetastore/operation_listener.h>
#include <vespa/searchcore/proton/flushengine/shrink_lid_space_flush_target.h>
#include <vespa/searchcore/proton/server/itlssyncer.h>
#include <vespa/searchlib/attribute/attributefilesavetarget.h>
@@ -1782,7 +1783,7 @@ TEST(DocumentMetaStoreTest, get_lid_usage_stats_works)
void
assertLidBloat(uint32_t expBloat, uint32_t lidLimit, uint32_t usedLids)
{
- LidUsageStats stats(lidLimit, usedLids, 0, 0, LidUsageStats::TimePoint());
+ LidUsageStats stats(lidLimit, usedLids, 0, 0);
EXPECT_EQ(expBloat, stats.getLidBloat());
}
@@ -2084,21 +2085,27 @@ TEST(DocumentMetaStoreTest, multiple_lids_can_be_removed_with_removeBatch)
assertLidGidFound(4, dms);
}
-TEST(DocumentMetaStoreTest, tracks_time_of_last_call_to_remove_batch)
+class MockOperationListener : public documentmetastore::OperationListener {
+public:
+ size_t remove_batch_cnt;
+
+ MockOperationListener()
+ : remove_batch_cnt(0)
+ {
+ }
+ void notify_remove_batch() override { ++remove_batch_cnt; }
+};
+
+TEST(DocumentMetaStoreTest, call_to_remove_batch_is_notified)
{
DocumentMetaStore dms(createBucketDB());
+ auto listener = std::make_shared<MockOperationListener>();
+ dms.set_operation_listener(listener);
dms.constructFreeList();
addLid(dms, 1);
- LidUsageStats::TimePoint before = std::chrono::steady_clock::now();
- std::this_thread::sleep_for(std::chrono::milliseconds(1));
dms.removeBatch({1}, 5);
- std::this_thread::sleep_for(std::chrono::milliseconds(1));
- LidUsageStats::TimePoint after = std::chrono::steady_clock::now();
-
- auto stats = dms.getLidUsageStats();
- EXPECT_LT(before, stats.get_last_remove_batch());
- EXPECT_GT(after, stats.get_last_remove_batch());
+ EXPECT_EQ(1, listener->remove_batch_cnt);
}
}
diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def
index 0501ab6ed7c..644564c0e2e 100644
--- a/searchcore/src/vespa/searchcore/config/proton.def
+++ b/searchcore/src/vespa/searchcore/config/proton.def
@@ -369,6 +369,7 @@ lidspacecompaction.allowedlidbloat int default=1000
## The lid bloat factor must be >= allowedlidbloatfactor before considering compaction.
lidspacecompaction.allowedlidbloatfactor double default=0.01
+## DEPRECATED (no longer used): Remove on Vespa 8
## The delay (in seconds) for when the last remove batch operation would be considered to block lid space compaction.
##
## When considering compaction, if the document meta store has received a remove batch operation in the last delay seconds,
@@ -379,6 +380,17 @@ lidspacecompaction.allowedlidbloatfactor double default=0.01
## lid space compaction do not interfere, but instead is applied after deleting of buckets is complete.
lidspacecompaction.removebatchblockdelay double default=2.0
+## The rate (ops / second) of remove batch operations for when to block lid space compaction.
+##
+## When considering compaction, if the current observed rate of remove batch operations
+## is higher than the given block rate, the lid space compaction job is blocked.
+## It is considered again at the next regular interval (see above).
+##
+## Remove batch operations are used when deleting buckets on a content node.
+## This functionality ensures that during massive deleting of buckets (e.g. as part of redistribution of data to a new node),
+## lid space compaction do not interfere, but instead is applied after deleting of buckets is complete.
+lidspacecompaction.removebatchblockrate double default=0.5
+
## This is the maximum value visibilitydelay you can have.
## A to higher value here will cost more memory while not improving too much.
maxvisibilitydelay double default=1.0
diff --git a/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt
index 4db36039b3c..f3303f36199 100644
--- a/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt
+++ b/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt
@@ -14,9 +14,10 @@ vespa_add_library(searchcore_pcommon STATIC
hw_info_sampler.cpp
indexschema_inspector.cpp
monitored_refcount.cpp
+ operation_rate_tracker.cpp
select_utils.cpp
- selectpruner.cpp
selectcontext.cpp
+ selectpruner.cpp
state_reporter_utils.cpp
statusreport.cpp
DEPENDS
diff --git a/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.cpp b/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.cpp
new file mode 100644
index 00000000000..487102cebf6
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.cpp
@@ -0,0 +1,33 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "operation_rate_tracker.h"
+
+namespace proton {
+
+OperationRateTracker::OperationRateTracker(double rate_threshold)
+ : _time_budget_per_op(vespalib::from_s(1.0 / rate_threshold)),
+ _time_budget_window(std::max(vespalib::from_s(1.0), _time_budget_per_op)),
+ _threshold_time()
+{
+}
+
+void
+OperationRateTracker::observe(vespalib::steady_time now)
+{
+ vespalib::steady_time cand_time = std::max(now - _time_budget_window, _threshold_time + _time_budget_per_op);
+ _threshold_time = std::min(cand_time, now + _time_budget_window);
+}
+
+bool
+OperationRateTracker::above_threshold(vespalib::steady_time now) const
+{
+ return _threshold_time > now;
+}
+
+void
+OperationRateTracker::reset(vespalib::steady_time now)
+{
+ _threshold_time = now - _time_budget_window;
+}
+
+}
diff --git a/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.h b/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.h
new file mode 100644
index 00000000000..7f7526faa5d
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.h
@@ -0,0 +1,38 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/time.h>
+
+namespace proton {
+
+/**
+ * Tracks whether the rate (ops/sec) of an operation is above or below a given threshold.
+ *
+ * An operation is given a time budget which is the inverse of the rate threshold.
+ * When we observe an operation that much time is "spent", and we adjust a threshold time accordingly.
+ * If this time is into the future, the current observed rate is above the rate threshold.
+ *
+ * To avoid the threshold time racing into the future or lagging behind,
+ * it is capped in both directions by a time budget window.
+ */
+class OperationRateTracker {
+private:
+ vespalib::duration _time_budget_per_op;
+ vespalib::duration _time_budget_window;
+ vespalib::steady_time _threshold_time;
+
+public:
+ OperationRateTracker(double rate_threshold);
+
+ vespalib::duration get_time_budget_per_op() const { return _time_budget_per_op; }
+ vespalib::duration get_time_budget_window() const { return _time_budget_window; }
+
+ void observe(vespalib::steady_time now);
+ bool above_threshold(vespalib::steady_time now) const;
+
+ // Should only be used for testing
+ void reset(vespalib::steady_time now);
+};
+
+}
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp
index c36ee8e474a..3f8ede91faa 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp
@@ -2,6 +2,7 @@
#include "documentmetastore.h"
#include "documentmetastoresaver.h"
+#include "operation_listener.h"
#include "search_context.h"
#include <vespa/fastos/file.h>
#include <vespa/searchcore/proton/bucketdb/bucketsessionbase.h>
@@ -453,7 +454,7 @@ DocumentMetaStore::DocumentMetaStore(BucketDBOwner::SP bucketDB,
_shrinkLidSpaceBlockers(0),
_subDbType(subDbType),
_trackDocumentSizes(true),
- _last_remove_batch()
+ _op_listener()
{
ensureSpace(0); // lid 0 is reserved
setCommittedDocIdLimit(1u); // lid 0 is reserved
@@ -668,7 +669,9 @@ DocumentMetaStore::removeBatch(const std::vector<DocId> &lidsToRemove, const uin
(void) removed;
}
incGeneration();
- _last_remove_batch = std::chrono::steady_clock::now();
+ if (_op_listener) {
+ _op_listener->notify_remove_batch();
+ }
}
void
@@ -776,8 +779,7 @@ DocumentMetaStore::getLidUsageStats() const
return LidUsageStats(docIdLimit,
numDocs,
lowestFreeLid,
- highestUsedLid,
- _last_remove_batch);
+ highestUsedLid);
}
Blueprint::UP
@@ -1021,6 +1023,12 @@ DocumentMetaStore::canShrinkLidSpace() const
}
void
+DocumentMetaStore::set_operation_listener(documentmetastore::OperationListener::SP op_listener)
+{
+ _op_listener = std::move(op_listener);
+}
+
+void
DocumentMetaStore::onShrinkLidSpace()
{
uint32_t committedDocIdLimit = this->getCommittedDocIdLimit();
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h
index 3bd9795cfd5..99cefbe1802 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h
@@ -17,11 +17,14 @@
#include <vespa/vespalib/util/rcuvector.h>
namespace proton::bucketdb {
- class SplitBucketSession;
- class JoinBucketsSession;
+class SplitBucketSession;
+class JoinBucketsSession;
}
-namespace proton::documentmetastore { class Reader; }
+namespace proton::documentmetastore {
+class OperationListener;
+class Reader;
+}
namespace proton {
@@ -71,7 +74,7 @@ private:
uint32_t _shrinkLidSpaceBlockers;
const SubDbType _subDbType;
bool _trackDocumentSizes;
- search::LidUsageStats::TimePoint _last_remove_batch;
+ documentmetastore::OperationListener::SP _op_listener;
DocId getFreeLid();
DocId peekFreeLid();
@@ -225,6 +228,7 @@ public:
void compactLidSpace(DocId wantedLidLimit) override;
void holdUnblockShrinkLidSpace() override;
bool canShrinkLidSpace() const override;
+ void set_operation_listener(documentmetastore::OperationListener::SP op_listener) override;
SerialNum getLastSerialNum() const override {
return getStatus().getLastSyncToken();
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/i_document_meta_store.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/i_document_meta_store.h
index 37666879205..d7721a28b46 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/i_document_meta_store.h
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/i_document_meta_store.h
@@ -4,6 +4,7 @@
#include "lid_gid_key_comparator.h"
#include "i_simple_document_meta_store.h"
+#include "operation_listener.h"
#include <vespa/searchlib/attribute/attributeguard.h>
#include <vespa/searchlib/common/idocumentmetastore.h>
#include <vespa/searchlib/common/serialnum.h>
@@ -82,6 +83,8 @@ struct IDocumentMetaStore : public search::IDocumentMetaStore,
*/
virtual void compactLidSpace(DocId wantedLidLimit) = 0;
+ virtual void set_operation_listener(documentmetastore::OperationListener::SP op_listener) = 0;
+
};
} // namespace proton
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h
new file mode 100644
index 00000000000..3b974554551
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h
@@ -0,0 +1,19 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <memory>
+
+namespace proton::documentmetastore {
+
+/**
+ * Interface used to listen to operations handled by the document meta store.
+ */
+class OperationListener {
+public:
+ using SP = std::shared_ptr<OperationListener>;
+ virtual ~OperationListener() {}
+ virtual void notify_remove_batch() = 0;
+};
+
+}
diff --git a/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt
index 92cf186f697..5f9f5528776 100644
--- a/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt
+++ b/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt
@@ -10,8 +10,8 @@ vespa_add_library(searchcore_server STATIC
combiningfeedview.cpp
ddbstate.cpp
disk_mem_usage_filter.cpp
- disk_mem_usage_sampler.cpp
disk_mem_usage_forwarder.cpp
+ disk_mem_usage_sampler.cpp
docstorevalidator.cpp
document_db_config_owner.cpp
document_db_directory_holder.cpp
@@ -30,8 +30,8 @@ vespa_add_library(searchcore_server STATIC
documentdb_commit_job.cpp
documentdb_metrics_updater.cpp
documentdbconfig.cpp
- documentdbconfigscout.cpp
documentdbconfigmanager.cpp
+ documentdbconfigscout.cpp
documentretriever.cpp
documentretrieverbase.cpp
documentsubdbcollection.cpp
@@ -63,8 +63,8 @@ vespa_add_library(searchcore_server STATIC
maintenancejobrunner.cpp
matchers.cpp
matchview.cpp
- memoryconfigstore.cpp
memory_flush_config_updater.cpp
+ memoryconfigstore.cpp
memoryflush.cpp
minimal_document_retriever.cpp
move_operation_limiter.cpp
@@ -82,6 +82,7 @@ vespa_add_library(searchcore_server STATIC
putdonecontext.cpp
reconfig_params.cpp
remove_batch_done_context.cpp
+ remove_operations_rate_tracker.cpp
removedonecontext.cpp
removedonetask.cpp
replaypacketdispatcher.cpp
diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp
index 57ef2d1a45c..6bd1888ad06 100644
--- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp
@@ -53,7 +53,7 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig()
_interval(3600s),
_allowedLidBloat(1000000000),
_allowedLidBloatFactor(1.0),
- _remove_batch_block_delay(5s),
+ _remove_batch_block_rate(0.5),
_disabled(false),
_maxDocsToScan(10000)
{
@@ -62,14 +62,14 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig()
DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig(vespalib::duration interval,
uint32_t allowedLidBloat,
double allowedLidBloatFactor,
- vespalib::duration remove_batch_block_delay,
+ double remove_batch_block_rate,
bool disabled,
uint32_t maxDocsToScan)
: _delay(std::min(MAX_DELAY_SEC, interval)),
_interval(interval),
_allowedLidBloat(allowedLidBloat),
_allowedLidBloatFactor(allowedLidBloatFactor),
- _remove_batch_block_delay(remove_batch_block_delay),
+ _remove_batch_block_rate(remove_batch_block_rate),
_disabled(disabled),
_maxDocsToScan(maxDocsToScan)
{
diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h
index 604977aa04f..1370d855b31 100644
--- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h
+++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h
@@ -47,7 +47,7 @@ private:
vespalib::duration _interval;
uint32_t _allowedLidBloat;
double _allowedLidBloatFactor;
- vespalib::duration _remove_batch_block_delay;
+ double _remove_batch_block_rate;
bool _disabled;
uint32_t _maxDocsToScan;
@@ -56,7 +56,7 @@ public:
DocumentDBLidSpaceCompactionConfig(vespalib::duration interval,
uint32_t allowedLidBloat,
double allowwedLidBloatFactor,
- vespalib::duration remove_batch_block_delay,
+ double remove_batch_block_rate,
bool disabled,
uint32_t maxDocsToScan = 10000);
@@ -66,7 +66,7 @@ public:
vespalib::duration getInterval() const { return _interval; }
uint32_t getAllowedLidBloat() const { return _allowedLidBloat; }
double getAllowedLidBloatFactor() const { return _allowedLidBloatFactor; }
- vespalib::duration get_remove_batch_block_delay() const { return _remove_batch_block_delay; }
+ double get_remove_batch_block_rate() const { return _remove_batch_block_rate; }
bool isDisabled() const { return _disabled; }
uint32_t getMaxDocsToScan() const { return _maxDocsToScan; }
};
diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
index 7a1989c8d7b..9bf8ce71244 100644
--- a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
@@ -134,7 +134,7 @@ buildMaintenanceConfig(const BootstrapConfig::SP &bootstrapConfig,
vespalib::from_s(proton.lidspacecompaction.interval),
proton.lidspacecompaction.allowedlidbloat,
proton.lidspacecompaction.allowedlidbloatfactor,
- vespalib::from_s(proton.lidspacecompaction.removebatchblockdelay),
+ proton.lidspacecompaction.removebatchblockrate,
isDocumentTypeGlobal),
AttributeUsageFilterConfig(
proton.writefilter.attribute.enumstorelimit,
diff --git a/searchcore/src/vespa/searchcore/proton/server/i_lid_space_compaction_handler.h b/searchcore/src/vespa/searchcore/proton/server/i_lid_space_compaction_handler.h
index 4e996f3596d..b31f05c86a3 100644
--- a/searchcore/src/vespa/searchcore/proton/server/i_lid_space_compaction_handler.h
+++ b/searchcore/src/vespa/searchcore/proton/server/i_lid_space_compaction_handler.h
@@ -4,6 +4,7 @@
#include "i_document_scan_iterator.h"
#include "ifrozenbuckethandler.h"
+#include <vespa/searchcore/proton/documentmetastore/operation_listener.h>
#include <vespa/searchcore/proton/feedoperation/compact_lid_space_operation.h>
#include <vespa/searchcore/proton/feedoperation/moveoperation.h>
#include <vespa/searchlib/common/lid_usage_stats.h>
@@ -30,6 +31,13 @@ struct ILidSpaceCompactionHandler
virtual vespalib::string getName() const = 0;
/**
+ * Sets the listener used to get notifications on the operations handled by the document meta store.
+ *
+ * A call to this function should replace the previous listener if set.
+ */
+ virtual void set_operation_listener(documentmetastore::OperationListener::SP op_listener) = 0;
+
+ /**
* Returns the id of the sub database this handler is operating over.
*/
virtual uint32_t getSubDbId() const = 0;
diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.cpp b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.cpp
index c4dc26a0875..e51352bfbb9 100644
--- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.cpp
@@ -23,6 +23,12 @@ LidSpaceCompactionHandler::LidSpaceCompactionHandler(const MaintenanceDocumentSu
{
}
+void
+LidSpaceCompactionHandler::set_operation_listener(documentmetastore::OperationListener::SP op_listener)
+{
+ return _subDb.meta_store()->set_operation_listener(std::move(op_listener));
+}
+
LidUsageStats
LidSpaceCompactionHandler::getLidStatus() const
{
diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.h b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.h
index 21d20001923..a6469acb5e5 100644
--- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.h
+++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.h
@@ -23,6 +23,7 @@ public:
virtual vespalib::string getName() const override {
return _docTypeName + "." + _subDb.name();
}
+ virtual void set_operation_listener(documentmetastore::OperationListener::SP op_listener) override;
virtual uint32_t getSubDbId() const override { return _subDb.sub_db_id(); }
virtual search::LidUsageStats getLidStatus() const override;
virtual IDocumentScanIterator::UP getIterator() const override;
diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp
index c2d655538f5..62893055a0a 100644
--- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp
@@ -90,10 +90,9 @@ LidSpaceCompactionJob::compactLidSpace(const LidUsageStats &stats)
}
bool
-LidSpaceCompactionJob::remove_batch_is_ongoing(const LidUsageStats& stats) const
+LidSpaceCompactionJob::remove_batch_is_ongoing() const
{
- LidUsageStats::TimePoint now = std::chrono::steady_clock::now();
- return (now - stats.get_last_remove_batch()) < std::chrono::duration<double>(_cfg.get_remove_batch_block_delay());
+ return _ops_rate_tracker->remove_batch_above_threshold();
}
LidSpaceCompactionJob::LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionConfig &config,
@@ -114,13 +113,15 @@ LidSpaceCompactionJob::LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionC
_retryFrozenDocument(false),
_shouldCompactLidSpace(false),
_diskMemUsageNotifier(diskMemUsageNotifier),
- _clusterStateChangedNotifier(clusterStateChangedNotifier)
+ _clusterStateChangedNotifier(clusterStateChangedNotifier),
+ _ops_rate_tracker(std::make_shared<RemoveOperationsRateTracker>(config.get_remove_batch_block_rate()))
{
_diskMemUsageNotifier.addDiskMemUsageListener(this);
_clusterStateChangedNotifier.addClusterStateChangedHandler(this);
if (nodeRetired) {
setBlocked(BlockedReason::CLUSTER_STATE);
}
+ handler.set_operation_listener(_ops_rate_tracker);
}
LidSpaceCompactionJob::~LidSpaceCompactionJob()
@@ -136,7 +137,7 @@ LidSpaceCompactionJob::run()
return true; // indicate work is done since no work can be done
}
LidUsageStats stats = _handler.getLidStatus();
- if (remove_batch_is_ongoing(stats)) {
+ if (remove_batch_is_ongoing()) {
// Note that we don't set the job as blocked as the decision to un-block it is not driven externally.
LOG(info, "run(): Lid space compaction is disabled while remove batch (delete buckets) is ongoing");
return true;
diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h
index 2f242e5a33a..44c30987a2f 100644
--- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h
+++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h
@@ -9,6 +9,7 @@
#include "ibucketstatecalculator.h"
#include "iclusterstatechangedhandler.h"
#include "iclusterstatechangednotifier.h"
+#include "remove_operations_rate_tracker.h"
namespace proton {
@@ -37,6 +38,7 @@ private:
bool _shouldCompactLidSpace;
IDiskMemUsageNotifier &_diskMemUsageNotifier;
IClusterStateChangedNotifier &_clusterStateChangedNotifier;
+ std::shared_ptr<RemoveOperationsRateTracker> _ops_rate_tracker;
bool hasTooMuchLidBloat(const search::LidUsageStats &stats) const;
bool shouldRestartScanDocuments(const search::LidUsageStats &stats) const;
@@ -45,7 +47,7 @@ private:
void compactLidSpace(const search::LidUsageStats &stats);
void refreshRunnable();
void refreshAndConsiderRunnable();
- bool remove_batch_is_ongoing(const search::LidUsageStats& stats) const;
+ bool remove_batch_is_ongoing() const;
public:
LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionConfig &config,
diff --git a/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp
new file mode 100644
index 00000000000..eae59fbe175
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp
@@ -0,0 +1,31 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "remove_operations_rate_tracker.h"
+#include <vespa/vespalib/util/time.h>
+
+namespace proton {
+
+RemoveOperationsRateTracker::RemoveOperationsRateTracker(double remove_batch_rate_threshold)
+ : _remove_batch_tracker(remove_batch_rate_threshold)
+{
+}
+
+void
+RemoveOperationsRateTracker::notify_remove_batch()
+{
+ _remove_batch_tracker.observe(vespalib::steady_clock::now());
+}
+
+bool
+RemoveOperationsRateTracker::remove_batch_above_threshold() const
+{
+ return _remove_batch_tracker.above_threshold(vespalib::steady_clock::now());
+}
+
+void
+RemoveOperationsRateTracker::reset_remove_batch_tracker()
+{
+ _remove_batch_tracker.reset(vespalib::steady_clock::now());
+}
+
+}
diff --git a/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h
new file mode 100644
index 00000000000..a66ed5b9b54
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h
@@ -0,0 +1,30 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcore/proton/common/operation_rate_tracker.h>
+#include <vespa/searchcore/proton/documentmetastore/operation_listener.h>
+
+namespace proton {
+
+/**
+ * Class that tracks the rate of remove operations handled by the document meta store.
+ *
+ * For each operation we can tell if it is above or below a given rate threshold.
+ */
+class RemoveOperationsRateTracker : public documentmetastore::OperationListener {
+private:
+ OperationRateTracker _remove_batch_tracker;
+
+public:
+ RemoveOperationsRateTracker(double remove_batch_rate_threshold);
+
+ void notify_remove_batch() override;
+
+ bool remove_batch_above_threshold() const;
+
+ // Should only be used for testing
+ void reset_remove_batch_tracker();
+};
+
+}
diff --git a/searchcore/src/vespa/searchcore/proton/test/document_meta_store_observer.h b/searchcore/src/vespa/searchcore/proton/test/document_meta_store_observer.h
index 5cc547b4dcb..be85df0cc87 100644
--- a/searchcore/src/vespa/searchcore/proton/test/document_meta_store_observer.h
+++ b/searchcore/src/vespa/searchcore/proton/test/document_meta_store_observer.h
@@ -185,6 +185,9 @@ struct DocumentMetaStoreObserver : public IDocumentMetaStore
virtual void foreach(const search::IGidToLidMapperVisitor &visitor) const override {
_store.foreach(visitor);
}
+ virtual void set_operation_listener(documentmetastore::OperationListener::SP op_listener) override {
+ _store.set_operation_listener(std::move(op_listener));
+ }
};
}