summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-04-29 09:46:04 +0000
committerGeir Storli <geirst@verizonmedia.com>2020-04-29 09:46:04 +0000
commit7cc0f99900706932ac98c29af66c609199387785 (patch)
treeec102d1b5084ba716a0d3efc2447bfcd14e8212a /searchcore
parent6d108bacdf3c92ebbf8a84b70ff619201ff1ba53 (diff)
Add tracking of remove operations rate and use this to consider blocking lid space compaction.
During a period with a high rate of remove operations, there is no use running lid space compaction as this will interfere with the remove operations, increasing latency of those. Moving a document as part of lid space compaction is a costly operation (similar to putting the document in the first place) and it typically uses both the index and attribute writer thread pools.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp108
-rw-r--r--searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp17
-rw-r--r--searchcore/src/vespa/searchcore/config/proton.def7
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h3
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp1
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp14
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp18
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h9
12 files changed, 147 insertions, 38 deletions
diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp
index a97a2380f25..e679c028d1b 100644
--- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp
@@ -30,7 +30,8 @@ constexpr uint32_t SUBDB_ID = 2;
constexpr vespalib::duration JOB_DELAY = 1s;
constexpr uint32_t ALLOWED_LID_BLOAT = 1;
constexpr double ALLOWED_LID_BLOAT_FACTOR = 0.3;
-constexpr double REMOVE_BATCH_BLOCK_RATE = 1.0 / 20.0;
+constexpr double REMOVE_BATCH_BLOCK_RATE = 1.0 / 21.0;
+constexpr double REMOVE_BLOCK_RATE = 1.0 / 20.0;
constexpr uint32_t MAX_DOCS_TO_SCAN = 100;
constexpr double RESOURCE_LIMIT_FACTOR = 1.0;
constexpr uint32_t MAX_OUTSTANDING_MOVE_OPS = 10;
@@ -88,14 +89,24 @@ struct MyHandler : public ILidSpaceCompactionHandler {
MyHandler(bool storeMoveDoneContexts = false);
~MyHandler();
void clearMoveDoneContexts() { _moveDoneContexts.clear(); }
- void run_remove_batch_ops() {
+ void run_remove_ops(bool remove_batch) {
// This ensures to max out the threshold time in the operation rate tracker.
- _op_listener->notify_remove_batch();
- _op_listener->notify_remove_batch();
- _op_listener->notify_remove_batch();
+ if (remove_batch) {
+ _op_listener->notify_remove_batch();
+ _op_listener->notify_remove_batch();
+ _op_listener->notify_remove_batch();
+ } else {
+ _op_listener->notify_remove();
+ _op_listener->notify_remove();
+ _op_listener->notify_remove();
+ }
}
- void stop_remove_batch_ops() {
- _rm_listener->reset_remove_batch_tracker();
+ void stop_remove_ops(bool remove_batch) {
+ if (remove_batch) {
+ _rm_listener->get_remove_batch_tracker().reset(vespalib::steady_clock::now());
+ } else {
+ _rm_listener->get_remove_tracker().reset(vespalib::steady_clock::now());
+ }
}
virtual vespalib::string getName() const override {
return "myhandler";
@@ -279,6 +290,7 @@ struct JobTestBase : public ::testing::Test {
_job = std::make_unique<LidSpaceCompactionJob>(DocumentDBLidSpaceCompactionConfig(interval, allowedLidBloat,
allowedLidBloatFactor,
REMOVE_BATCH_BLOCK_RATE,
+ REMOVE_BLOCK_RATE,
false, maxDocsToScan),
*_handler, _storer, _frozenHandler, _diskMemUsageNotifier,
BlockableMaintenanceJobConfig(resourceLimitFactor, maxOutstandingMoveOps),
@@ -630,40 +642,78 @@ TEST_F(JobTest, job_is_re_enabled_when_node_is_no_longer_retired)
assertOneDocumentCompacted();
}
-TEST_F(JobTest, job_is_disabled_while_remove_batch_is_ongoing)
+class JobDisabledByRemoveOpsTest : public JobTest {
+public:
+ JobDisabledByRemoveOpsTest() : JobTest() {}
+
+ void job_is_disabled_while_remove_ops_are_ongoing(bool remove_batch) {
+ setupOneDocumentToCompact();
+ _handler->run_remove_ops(remove_batch);
+ EXPECT_TRUE(run()); // job is disabled
+ assertNoWorkDone();
+ }
+
+ void job_becomes_disabled_if_remove_ops_starts(bool remove_batch) {
+ setupThreeDocumentsToCompact();
+ EXPECT_FALSE(run()); // job executed as normal (with more work to do)
+ assertJobContext(2, 9, 1, 0, 0);
+
+ _handler->run_remove_ops(remove_batch);
+ EXPECT_TRUE(run()); // job is disabled
+ assertJobContext(2, 9, 1, 0, 0);
+ }
+
+ void job_is_re_enabled_when_remove_ops_are_no_longer_ongoing(bool remove_batch) {
+ job_becomes_disabled_if_remove_ops_starts(remove_batch);
+
+ _handler->stop_remove_ops(remove_batch);
+ EXPECT_FALSE(run()); // job executed as normal (with more work to do)
+ assertJobContext(3, 8, 2, 0, 0);
+ }
+};
+
+TEST_F(JobDisabledByRemoveOpsTest, config_is_propagated_to_remove_operations_rate_tracker)
{
- setupOneDocumentToCompact();
- _handler->run_remove_batch_ops();
- EXPECT_TRUE(run()); // job is disabled
- assertNoWorkDone();
+ auto& remove_batch_tracker = _handler->_rm_listener->get_remove_batch_tracker();
+ EXPECT_EQ(vespalib::from_s(21.0), remove_batch_tracker.get_time_budget_per_op());
+ EXPECT_EQ(vespalib::from_s(21.0), remove_batch_tracker.get_time_budget_window());
+
+ auto& remove_tracker = _handler->_rm_listener->get_remove_tracker();
+ EXPECT_EQ(vespalib::from_s(20.0), remove_tracker.get_time_budget_per_op());
+ EXPECT_EQ(vespalib::from_s(20.0), remove_tracker.get_time_budget_window());
}
-TEST_F(JobTest, job_becomes_disabled_if_remove_batch_starts)
+TEST_F(JobDisabledByRemoveOpsTest, job_is_disabled_while_remove_batch_is_ongoing)
{
- setupThreeDocumentsToCompact();
- EXPECT_FALSE(run()); // job executed as normal (with more work to do)
- assertJobContext(2, 9, 1, 0, 0);
+ job_is_disabled_while_remove_ops_are_ongoing(true);
+}
- _handler->run_remove_batch_ops();
- EXPECT_TRUE(run()); // job is disabled
- assertJobContext(2, 9, 1, 0, 0);
+TEST_F(JobDisabledByRemoveOpsTest, job_becomes_disabled_if_remove_batch_starts)
+{
+ job_becomes_disabled_if_remove_ops_starts(true);
}
-TEST_F(JobTest, job_is_re_enabled_when_remove_batch_is_no_longer_ongoing)
+TEST_F(JobDisabledByRemoveOpsTest, job_is_re_enabled_when_remove_batch_is_no_longer_ongoing)
{
- setupThreeDocumentsToCompact();
- EXPECT_FALSE(run()); // job executed as normal (with more work to do)
- assertJobContext(2, 9, 1, 0, 0);
+ job_is_re_enabled_when_remove_ops_are_no_longer_ongoing(true);
+}
- _handler->run_remove_batch_ops();
- EXPECT_TRUE(run()); // job is disabled
- assertJobContext(2, 9, 1, 0, 0);
+TEST_F(JobDisabledByRemoveOpsTest, job_is_disabled_while_removes_are_ongoing)
+{
+ job_is_disabled_while_remove_ops_are_ongoing(false);
+}
- _handler->stop_remove_batch_ops();
- EXPECT_FALSE(run()); // job executed as normal (with more work to do)
- assertJobContext(3, 8, 2, 0, 0);
+TEST_F(JobDisabledByRemoveOpsTest, job_becomes_disabled_if_removes_start)
+{
+ job_becomes_disabled_if_remove_ops_starts(false);
}
+TEST_F(JobDisabledByRemoveOpsTest, job_is_re_enabled_when_removes_are_no_longer_ongoing)
+{
+ job_is_re_enabled_when_remove_ops_are_no_longer_ongoing(false);
+}
+
+
struct MaxOutstandingJobTest : public JobTest {
std::unique_ptr<MyCountJobRunner> runner;
MaxOutstandingJobTest()
diff --git a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
index 8935143a7be..49b84c09040 100644
--- a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
+++ b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
@@ -2088,12 +2088,15 @@ TEST(DocumentMetaStoreTest, multiple_lids_can_be_removed_with_removeBatch)
class MockOperationListener : public documentmetastore::OperationListener {
public:
size_t remove_batch_cnt;
+ size_t remove_cnt;
MockOperationListener()
- : remove_batch_cnt(0)
+ : remove_batch_cnt(0),
+ remove_cnt(0)
{
}
void notify_remove_batch() override { ++remove_batch_cnt; }
+ void notify_remove() override { ++remove_cnt; }
};
TEST(DocumentMetaStoreTest, call_to_remove_batch_is_notified)
@@ -2108,6 +2111,18 @@ TEST(DocumentMetaStoreTest, call_to_remove_batch_is_notified)
EXPECT_EQ(1, listener->remove_batch_cnt);
}
+TEST(DocumentMetaStoreTest, call_to_remove_is_notified)
+{
+ DocumentMetaStore dms(createBucketDB());
+ auto listener = std::make_shared<MockOperationListener>();
+ dms.set_operation_listener(listener);
+ dms.constructFreeList();
+ addLid(dms, 1);
+
+ dms.remove(1);
+ EXPECT_EQ(1, listener->remove_cnt);
+}
+
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def
index 644564c0e2e..5d84516d100 100644
--- a/searchcore/src/vespa/searchcore/config/proton.def
+++ b/searchcore/src/vespa/searchcore/config/proton.def
@@ -391,6 +391,13 @@ lidspacecompaction.removebatchblockdelay double default=2.0
## lid space compaction do not interfere, but instead is applied after deleting of buckets is complete.
lidspacecompaction.removebatchblockrate double default=0.5
+## The rate (ops / second) of remove operations for when to block lid space compaction.
+##
+## When considering compaction, if the current observed rate of remove operations
+## is higher than the given block rate, the lid space compaction job is blocked.
+## It is considered again at the next regular interval (see above).
+lidspacecompaction.removeblockrate double default=100000.0
+
## This is the maximum value visibilitydelay you can have.
## A to higher value here will cost more memory while not improving too much.
maxvisibilitydelay double default=1.0
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp
index 3f8ede91faa..b43efa41d0b 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp
@@ -622,6 +622,9 @@ DocumentMetaStore::remove(DocId lid)
BucketDBOwner::Guard bucketGuard = _bucketDB->takeGuard();
bool result = remove(lid, bucketGuard);
incGeneration();
+ if (result && _op_listener) {
+ _op_listener->notify_remove();
+ }
return result;
}
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h
index 3b974554551..259f1a40c9d 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h
@@ -14,6 +14,7 @@ public:
using SP = std::shared_ptr<OperationListener>;
virtual ~OperationListener() {}
virtual void notify_remove_batch() = 0;
+ virtual void notify_remove() = 0;
};
}
diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp
index 6bd1888ad06..a4e8b6752b0 100644
--- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp
@@ -54,6 +54,7 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig()
_allowedLidBloat(1000000000),
_allowedLidBloatFactor(1.0),
_remove_batch_block_rate(0.5),
+ _remove_block_rate(100000),
_disabled(false),
_maxDocsToScan(10000)
{
@@ -63,6 +64,7 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig(vespalib:
uint32_t allowedLidBloat,
double allowedLidBloatFactor,
double remove_batch_block_rate,
+ double remove_block_rate,
bool disabled,
uint32_t maxDocsToScan)
: _delay(std::min(MAX_DELAY_SEC, interval)),
@@ -70,6 +72,7 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig(vespalib:
_allowedLidBloat(allowedLidBloat),
_allowedLidBloatFactor(allowedLidBloatFactor),
_remove_batch_block_rate(remove_batch_block_rate),
+ _remove_block_rate(remove_block_rate),
_disabled(disabled),
_maxDocsToScan(maxDocsToScan)
{
diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h
index 1370d855b31..4c0485baec6 100644
--- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h
+++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h
@@ -48,6 +48,7 @@ private:
uint32_t _allowedLidBloat;
double _allowedLidBloatFactor;
double _remove_batch_block_rate;
+ double _remove_block_rate;
bool _disabled;
uint32_t _maxDocsToScan;
@@ -57,6 +58,7 @@ public:
uint32_t allowedLidBloat,
double allowwedLidBloatFactor,
double remove_batch_block_rate,
+ double remove_block_rate,
bool disabled,
uint32_t maxDocsToScan = 10000);
@@ -67,6 +69,7 @@ public:
uint32_t getAllowedLidBloat() const { return _allowedLidBloat; }
double getAllowedLidBloatFactor() const { return _allowedLidBloatFactor; }
double get_remove_batch_block_rate() const { return _remove_batch_block_rate; }
+ double get_remove_block_rate() const { return _remove_block_rate; }
bool isDisabled() const { return _disabled; }
uint32_t getMaxDocsToScan() const { return _maxDocsToScan; }
};
diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
index 9bf8ce71244..d1d88434bd6 100644
--- a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
@@ -135,6 +135,7 @@ buildMaintenanceConfig(const BootstrapConfig::SP &bootstrapConfig,
proton.lidspacecompaction.allowedlidbloat,
proton.lidspacecompaction.allowedlidbloatfactor,
proton.lidspacecompaction.removebatchblockrate,
+ proton.lidspacecompaction.removeblockrate,
isDocumentTypeGlobal),
AttributeUsageFilterConfig(
proton.writefilter.attribute.enumstorelimit,
diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp
index 62893055a0a..bd53810e14b 100644
--- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp
@@ -95,6 +95,12 @@ LidSpaceCompactionJob::remove_batch_is_ongoing() const
return _ops_rate_tracker->remove_batch_above_threshold();
}
+bool
+LidSpaceCompactionJob::remove_is_ongoing() const
+{
+ return _ops_rate_tracker->remove_above_threshold();
+}
+
LidSpaceCompactionJob::LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionConfig &config,
ILidSpaceCompactionHandler &handler,
IOperationStorer &opStorer,
@@ -114,7 +120,8 @@ LidSpaceCompactionJob::LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionC
_shouldCompactLidSpace(false),
_diskMemUsageNotifier(diskMemUsageNotifier),
_clusterStateChangedNotifier(clusterStateChangedNotifier),
- _ops_rate_tracker(std::make_shared<RemoveOperationsRateTracker>(config.get_remove_batch_block_rate()))
+ _ops_rate_tracker(std::make_shared<RemoveOperationsRateTracker>(config.get_remove_batch_block_rate(),
+ config.get_remove_block_rate()))
{
_diskMemUsageNotifier.addDiskMemUsageListener(this);
_clusterStateChangedNotifier.addClusterStateChangedHandler(this);
@@ -142,6 +149,11 @@ LidSpaceCompactionJob::run()
LOG(info, "run(): Lid space compaction is disabled while remove batch (delete buckets) is ongoing");
return true;
}
+ if (remove_is_ongoing()) {
+ // Note that we don't set the job as blocked as the decision to un-block it is not driven externally.
+ LOG(info, "run(): Lid space compaction is disabled while remove operations are ongoing");
+ return true;
+ }
if (_scanItr) {
return scanDocuments(stats);
} else if (_shouldCompactLidSpace) {
diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h
index 44c30987a2f..9171fe7472e 100644
--- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h
+++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h
@@ -48,6 +48,7 @@ private:
void refreshRunnable();
void refreshAndConsiderRunnable();
bool remove_batch_is_ongoing() const;
+ bool remove_is_ongoing() const;
public:
LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionConfig &config,
diff --git a/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp
index eae59fbe175..22294d33aba 100644
--- a/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp
@@ -5,8 +5,10 @@
namespace proton {
-RemoveOperationsRateTracker::RemoveOperationsRateTracker(double remove_batch_rate_threshold)
- : _remove_batch_tracker(remove_batch_rate_threshold)
+RemoveOperationsRateTracker::RemoveOperationsRateTracker(double remove_batch_rate_threshold,
+ double remove_rate_threshold)
+ : _remove_batch_tracker(remove_batch_rate_threshold),
+ _remove_tracker(remove_rate_threshold)
{
}
@@ -16,16 +18,22 @@ RemoveOperationsRateTracker::notify_remove_batch()
_remove_batch_tracker.observe(vespalib::steady_clock::now());
}
+void
+RemoveOperationsRateTracker::notify_remove()
+{
+ _remove_tracker.observe(vespalib::steady_clock::now());
+}
+
bool
RemoveOperationsRateTracker::remove_batch_above_threshold() const
{
return _remove_batch_tracker.above_threshold(vespalib::steady_clock::now());
}
-void
-RemoveOperationsRateTracker::reset_remove_batch_tracker()
+bool
+RemoveOperationsRateTracker::remove_above_threshold() const
{
- _remove_batch_tracker.reset(vespalib::steady_clock::now());
+ return _remove_tracker.above_threshold(vespalib::steady_clock::now());
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h
index a66ed5b9b54..b8c0315b9e9 100644
--- a/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h
+++ b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h
@@ -15,16 +15,21 @@ namespace proton {
class RemoveOperationsRateTracker : public documentmetastore::OperationListener {
private:
OperationRateTracker _remove_batch_tracker;
+ OperationRateTracker _remove_tracker;
public:
- RemoveOperationsRateTracker(double remove_batch_rate_threshold);
+ RemoveOperationsRateTracker(double remove_batch_rate_threshold,
+ double remove_rate_threshold);
void notify_remove_batch() override;
+ void notify_remove() override;
bool remove_batch_above_threshold() const;
+ bool remove_above_threshold() const;
// Should only be used for testing
- void reset_remove_batch_tracker();
+ OperationRateTracker& get_remove_batch_tracker() { return _remove_batch_tracker; }
+ OperationRateTracker& get_remove_tracker() { return _remove_tracker; }
};
}