diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-08-13 11:33:48 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2019-08-13 11:35:00 +0000 |
commit | 5a829878b375af5f83c7fe53283acbee65587587 (patch) | |
tree | d319f454f4c324e45d1caedf4a69a99498f8e563 /searchcore | |
parent | 894e9ddd1e63ef2a28c342999c34051ce871a2b5 (diff) |
Block lid space compaction job while remove batch (delete buckets) is ongoing.
Diffstat (limited to 'searchcore')
10 files changed, 115 insertions, 16 deletions
diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp index 4843778d0c8..93f3299e121 100644 --- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp +++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp @@ -23,11 +23,13 @@ using namespace vespalib; using search::IDestructorCallback; using storage::spi::Timestamp; using BlockedReason = IBlockableMaintenanceJob::BlockedReason; +using TimePoint = LidUsageStats::TimePoint; constexpr uint32_t SUBDB_ID = 2; constexpr double JOB_DELAY = 1.0; constexpr uint32_t ALLOWED_LID_BLOAT = 1; constexpr double ALLOWED_LID_BLOAT_FACTOR = 0.3; +constexpr double REMOVE_BATCH_BLOCK_DELAY = 20.0; constexpr uint32_t MAX_DOCS_TO_SCAN = 100; constexpr double RESOURCE_LIMIT_FACTOR = 1.0; constexpr uint32_t MAX_OUTSTANDING_MOVE_OPS = 10; @@ -83,6 +85,12 @@ struct MyHandler : public ILidSpaceCompactionHandler { MyHandler(bool storeMoveDoneContexts = false); ~MyHandler(); void clearMoveDoneContexts() { _moveDoneContexts.clear(); } + void set_last_remove_batch(TimePoint last_remove_batch) { + for (auto& s : _stats) { + s = LidUsageStats(s.getLidLimit(), s.getUsedLids(), + s.getLowestFreeLid(), s.getHighestUsedLid(), last_remove_batch); + } + } virtual vespalib::string getName() const override { return "myhandler"; } @@ -255,36 +263,40 @@ struct JobTestBase : public ::testing::Test { { _handler = std::make_unique<MyHandler>(maxOutstandingMoveOps != MAX_OUTSTANDING_MOVE_OPS); _job = std::make_unique<LidSpaceCompactionJob>(DocumentDBLidSpaceCompactionConfig(interval, allowedLidBloat, - allowedLidBloatFactor, false, maxDocsToScan), + allowedLidBloatFactor, + REMOVE_BATCH_BLOCK_DELAY, + false, maxDocsToScan), *_handler, _storer, _frozenHandler, _diskMemUsageNotifier, BlockableMaintenanceJobConfig(resourceLimitFactor, maxOutstandingMoveOps), _clusterStateHandler, nodeRetired); } ~JobTestBase(); JobTestBase &addStats(uint32_t docIdLimit, - const LidVector &usedLids, - const LidPairVector &usedFreePairs) { - return addMultiStats(docIdLimit, {usedLids}, usedFreePairs); + const LidVector &usedLids, + const LidPairVector &usedFreePairs, + TimePoint last_remove_batch = TimePoint()) { + return addMultiStats(docIdLimit, {usedLids}, usedFreePairs, last_remove_batch); } JobTestBase &addMultiStats(uint32_t docIdLimit, const std::vector<LidVector> &usedLidsVector, - const LidPairVector &usedFreePairs) { + const LidPairVector &usedFreePairs, + TimePoint last_remove_batch = TimePoint()) { uint32_t usedLids = usedLidsVector[0].size(); for (auto pair : usedFreePairs) { uint32_t highestUsedLid = pair.first; uint32_t lowestFreeLid = pair.second; _handler->_stats.push_back(LidUsageStats - (docIdLimit, usedLids, lowestFreeLid, highestUsedLid)); + (docIdLimit, usedLids, lowestFreeLid, highestUsedLid, last_remove_batch)); } _handler->_lids = usedLidsVector; return *this; } JobTestBase &addStats(uint32_t docIdLimit, - uint32_t numDocs, - uint32_t lowestFreeLid, - uint32_t highestUsedLid) { + uint32_t numDocs, + uint32_t lowestFreeLid, + uint32_t highestUsedLid) { _handler->_stats.push_back(LidUsageStats - (docIdLimit, numDocs, lowestFreeLid, highestUsedLid)); + (docIdLimit, numDocs, lowestFreeLid, highestUsedLid, TimePoint())); return *this; } bool run() { @@ -319,10 +331,11 @@ struct JobTestBase : public ::testing::Test { void assertNoWorkDone() { assertJobContext(0, 0, 0, 0, 0); } - JobTestBase &setupOneDocumentToCompact() { + JobTestBase &setupOneDocumentToCompact(TimePoint last_remove_batch = TimePoint()) { addStats(10, {1,3,4,5,6,9}, {{9,2}, // 30% bloat: move 9 -> 2 - {6,7}}); // no documents to move + {6,7}}, // no documents to move + last_remove_batch); return *this; } void assertOneDocumentCompacted() { @@ -606,6 +619,41 @@ TEST_F(JobTest, job_is_re_enabled_when_node_is_no_longer_retired) assertOneDocumentCompacted(); } +TEST_F(JobTest, job_is_disabled_while_remove_batch_is_ongoing) +{ + TimePoint last_remove_batch = std::chrono::steady_clock::now(); + setupOneDocumentToCompact(last_remove_batch); + EXPECT_TRUE(run()); // job is disabled + assertNoWorkDone(); +} + +TEST_F(JobTest, job_becomes_disabled_if_remove_batch_starts) +{ + setupThreeDocumentsToCompact(); + EXPECT_FALSE(run()); // job executed as normal (with more work to do) + assertJobContext(2, 9, 1, 0, 0); + + _handler->set_last_remove_batch(std::chrono::steady_clock::now()); + EXPECT_TRUE(run()); // job is disabled + assertJobContext(2, 9, 1, 0, 0); +} + +TEST_F(JobTest, job_is_re_enabled_when_remove_batch_is_no_longer_ongoing) +{ + setupThreeDocumentsToCompact(); + EXPECT_FALSE(run()); // job executed as normal (with more work to do) + assertJobContext(2, 9, 1, 0, 0); + + TimePoint last_remove_batch = std::chrono::steady_clock::now(); + _handler->set_last_remove_batch(last_remove_batch); + EXPECT_TRUE(run()); // job is disabled + assertJobContext(2, 9, 1, 0, 0); + + _handler->set_last_remove_batch(last_remove_batch - std::chrono::seconds(static_cast<long>(REMOVE_BATCH_BLOCK_DELAY))); + EXPECT_FALSE(run()); // job executed as normal (with more work to do) + assertJobContext(3, 8, 2, 0, 0); +} + struct MaxOutstandingJobTest : public JobTest { std::unique_ptr<MyCountJobRunner> runner; MaxOutstandingJobTest() diff --git a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp index 99b425b9fd7..f6f0c2b0806 100644 --- a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp +++ b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp @@ -1782,7 +1782,7 @@ TEST(DocumentMetaStoreTest, get_lid_usage_stats_works) void assertLidBloat(uint32_t expBloat, uint32_t lidLimit, uint32_t usedLids) { - LidUsageStats stats(lidLimit, usedLids, 0, 0); + LidUsageStats stats(lidLimit, usedLids, 0, 0, LidUsageStats::TimePoint()); EXPECT_EQ(expBloat, stats.getLidBloat()); } @@ -2084,6 +2084,23 @@ TEST(DocumentMetaStoreTest, multiple_lids_can_be_removed_with_removeBatch) assertLidGidFound(4, dms); } +TEST(DocumentMetaStoreTest, tracks_time_of_last_call_to_remove_batch) +{ + DocumentMetaStore dms(createBucketDB()); + dms.constructFreeList(); + addLid(dms, 1); + + LidUsageStats::TimePoint before = std::chrono::steady_clock::now(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + dms.removeBatch({1}, 5); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + LidUsageStats::TimePoint after = std::chrono::steady_clock::now(); + + auto stats = dms.getLidUsageStats(); + EXPECT_LT(before, stats.get_last_remove_batch()); + EXPECT_GT(after, stats.get_last_remove_batch()); +} + } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def index a88239a41f6..fd50dd2094a 100644 --- a/searchcore/src/vespa/searchcore/config/proton.def +++ b/searchcore/src/vespa/searchcore/config/proton.def @@ -356,6 +356,16 @@ lidspacecompaction.allowedlidbloat int default=1000 ## The lid bloat factor must be >= allowedlidbloatfactor before considering compaction. lidspacecompaction.allowedlidbloatfactor double default=0.01 +## The delay (in seconds) for when the last remove batch operation would be considered to block lid space compaction. +## +## When considering compaction, if the document meta store has received a remove batch operation in the last delay seconds, +## the lid space compaction job is blocked. It is considered again at the next regular interval (see above). +## +## Remove batch operations are used when deleting buckets on a content node. +## This functionality ensures that during massive deleting of buckets (e.g. as part of redistribution of data to a new node), +## lid space compaction do not interfere, but instead is applied after deleting of buckets is complete. +lidspacecompaction.removebatchblockdelay double default=5.0 + ## This is the maximum value visibilitydelay you can have. ## A to higher value here will cost more memory while not improving too much. maxvisibilitydelay double default=1.0 diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp index 9b535be19b7..e6f16004bad 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp @@ -450,7 +450,8 @@ DocumentMetaStore::DocumentMetaStore(BucketDBOwner::SP bucketDB, _bucketDB(bucketDB), _shrinkLidSpaceBlockers(0), _subDbType(subDbType), - _trackDocumentSizes(true) + _trackDocumentSizes(true), + _last_remove_batch() { ensureSpace(0); // lid 0 is reserved setCommittedDocIdLimit(1u); // lid 0 is reserved @@ -665,6 +666,7 @@ DocumentMetaStore::removeBatch(const std::vector<DocId> &lidsToRemove, const uin (void) removed; } incGeneration(); + _last_remove_batch = std::chrono::steady_clock::now(); } void @@ -772,7 +774,8 @@ DocumentMetaStore::getLidUsageStats() const return LidUsageStats(docIdLimit, numDocs, lowestFreeLid, - highestUsedLid); + highestUsedLid, + _last_remove_batch); } Blueprint::UP diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h index 27c1c97556c..3bd9795cfd5 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h @@ -71,6 +71,7 @@ private: uint32_t _shrinkLidSpaceBlockers; const SubDbType _subDbType; bool _trackDocumentSizes; + search::LidUsageStats::TimePoint _last_remove_batch; DocId getFreeLid(); DocId peekFreeLid(); diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp index 848b1f27574..b470a390b50 100644 --- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp @@ -54,6 +54,7 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig() _interval(3600), _allowedLidBloat(1000000000), _allowedLidBloatFactor(1.0), + _remove_batch_block_delay(5.0), _disabled(false), _maxDocsToScan(10000) { @@ -62,12 +63,14 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig() DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig(double interval, uint32_t allowedLidBloat, double allowedLidBloatFactor, + double remove_batch_block_delay, bool disabled, uint32_t maxDocsToScan) : _delay(std::min(MAX_DELAY_SEC, interval)), _interval(interval), _allowedLidBloat(allowedLidBloat), _allowedLidBloatFactor(allowedLidBloatFactor), + _remove_batch_block_delay(remove_batch_block_delay), _disabled(disabled), _maxDocsToScan(maxDocsToScan) { diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h index acbbc442c7a..4b458765f3c 100644 --- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h +++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h @@ -47,6 +47,7 @@ private: double _interval; uint32_t _allowedLidBloat; double _allowedLidBloatFactor; + double _remove_batch_block_delay; bool _disabled; uint32_t _maxDocsToScan; @@ -55,7 +56,8 @@ public: DocumentDBLidSpaceCompactionConfig(double interval, uint32_t allowedLidBloat, double allowwedLidBloatFactor, - bool disabled = false, + double remove_batch_block_delay, + bool disabled, uint32_t maxDocsToScan = 10000); static DocumentDBLidSpaceCompactionConfig createDisabled(); @@ -64,6 +66,7 @@ public: double getInterval() const { return _interval; } uint32_t getAllowedLidBloat() const { return _allowedLidBloat; } double getAllowedLidBloatFactor() const { return _allowedLidBloatFactor; } + double get_remove_batch_block_delay() const { return _remove_batch_block_delay; } bool isDisabled() const { return _disabled; } uint32_t getMaxDocsToScan() const { return _maxDocsToScan; } }; diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp index a562408b64d..ef31da34683 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp @@ -135,6 +135,7 @@ buildMaintenanceConfig(const BootstrapConfig::SP &bootstrapConfig, proton.lidspacecompaction.interval, proton.lidspacecompaction.allowedlidbloat, proton.lidspacecompaction.allowedlidbloatfactor, + proton.lidspacecompaction.removebatchblockdelay, isDocumentTypeGlobal), AttributeUsageFilterConfig( proton.writefilter.attribute.enumstorelimit, diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp index fad00fa00e6..c2d655538f5 100644 --- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp @@ -89,6 +89,13 @@ LidSpaceCompactionJob::compactLidSpace(const LidUsageStats &stats) _shouldCompactLidSpace = false; } +bool +LidSpaceCompactionJob::remove_batch_is_ongoing(const LidUsageStats& stats) const +{ + LidUsageStats::TimePoint now = std::chrono::steady_clock::now(); + return (now - stats.get_last_remove_batch()) < std::chrono::duration<double>(_cfg.get_remove_batch_block_delay()); +} + LidSpaceCompactionJob::LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionConfig &config, ILidSpaceCompactionHandler &handler, IOperationStorer &opStorer, @@ -129,6 +136,11 @@ LidSpaceCompactionJob::run() return true; // indicate work is done since no work can be done } LidUsageStats stats = _handler.getLidStatus(); + if (remove_batch_is_ongoing(stats)) { + // Note that we don't set the job as blocked as the decision to un-block it is not driven externally. + LOG(info, "run(): Lid space compaction is disabled while remove batch (delete buckets) is ongoing"); + return true; + } if (_scanItr) { return scanDocuments(stats); } else if (_shouldCompactLidSpace) { diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h index 0732576cc70..2f242e5a33a 100644 --- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h +++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h @@ -45,6 +45,7 @@ private: void compactLidSpace(const search::LidUsageStats &stats); void refreshRunnable(); void refreshAndConsiderRunnable(); + bool remove_batch_is_ongoing(const search::LidUsageStats& stats) const; public: LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionConfig &config, |