summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-08-13 11:33:48 +0000
committerGeir Storli <geirst@verizonmedia.com>2019-08-13 11:35:00 +0000
commit5a829878b375af5f83c7fe53283acbee65587587 (patch)
treed319f454f4c324e45d1caedf4a69a99498f8e563 /searchcore
parent894e9ddd1e63ef2a28c342999c34051ce871a2b5 (diff)
Block lid space compaction job while remove batch (delete buckets) is ongoing.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp72
-rw-r--r--searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp19
-rw-r--r--searchcore/src/vespa/searchcore/config/proton.def10
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp7
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h5
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp1
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp12
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h1
10 files changed, 115 insertions, 16 deletions
diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp
index 4843778d0c8..93f3299e121 100644
--- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp
@@ -23,11 +23,13 @@ using namespace vespalib;
using search::IDestructorCallback;
using storage::spi::Timestamp;
using BlockedReason = IBlockableMaintenanceJob::BlockedReason;
+using TimePoint = LidUsageStats::TimePoint;
constexpr uint32_t SUBDB_ID = 2;
constexpr double JOB_DELAY = 1.0;
constexpr uint32_t ALLOWED_LID_BLOAT = 1;
constexpr double ALLOWED_LID_BLOAT_FACTOR = 0.3;
+constexpr double REMOVE_BATCH_BLOCK_DELAY = 20.0;
constexpr uint32_t MAX_DOCS_TO_SCAN = 100;
constexpr double RESOURCE_LIMIT_FACTOR = 1.0;
constexpr uint32_t MAX_OUTSTANDING_MOVE_OPS = 10;
@@ -83,6 +85,12 @@ struct MyHandler : public ILidSpaceCompactionHandler {
MyHandler(bool storeMoveDoneContexts = false);
~MyHandler();
void clearMoveDoneContexts() { _moveDoneContexts.clear(); }
+ void set_last_remove_batch(TimePoint last_remove_batch) {
+ for (auto& s : _stats) {
+ s = LidUsageStats(s.getLidLimit(), s.getUsedLids(),
+ s.getLowestFreeLid(), s.getHighestUsedLid(), last_remove_batch);
+ }
+ }
virtual vespalib::string getName() const override {
return "myhandler";
}
@@ -255,36 +263,40 @@ struct JobTestBase : public ::testing::Test {
{
_handler = std::make_unique<MyHandler>(maxOutstandingMoveOps != MAX_OUTSTANDING_MOVE_OPS);
_job = std::make_unique<LidSpaceCompactionJob>(DocumentDBLidSpaceCompactionConfig(interval, allowedLidBloat,
- allowedLidBloatFactor, false, maxDocsToScan),
+ allowedLidBloatFactor,
+ REMOVE_BATCH_BLOCK_DELAY,
+ false, maxDocsToScan),
*_handler, _storer, _frozenHandler, _diskMemUsageNotifier,
BlockableMaintenanceJobConfig(resourceLimitFactor, maxOutstandingMoveOps),
_clusterStateHandler, nodeRetired);
}
~JobTestBase();
JobTestBase &addStats(uint32_t docIdLimit,
- const LidVector &usedLids,
- const LidPairVector &usedFreePairs) {
- return addMultiStats(docIdLimit, {usedLids}, usedFreePairs);
+ const LidVector &usedLids,
+ const LidPairVector &usedFreePairs,
+ TimePoint last_remove_batch = TimePoint()) {
+ return addMultiStats(docIdLimit, {usedLids}, usedFreePairs, last_remove_batch);
}
JobTestBase &addMultiStats(uint32_t docIdLimit,
const std::vector<LidVector> &usedLidsVector,
- const LidPairVector &usedFreePairs) {
+ const LidPairVector &usedFreePairs,
+ TimePoint last_remove_batch = TimePoint()) {
uint32_t usedLids = usedLidsVector[0].size();
for (auto pair : usedFreePairs) {
uint32_t highestUsedLid = pair.first;
uint32_t lowestFreeLid = pair.second;
_handler->_stats.push_back(LidUsageStats
- (docIdLimit, usedLids, lowestFreeLid, highestUsedLid));
+ (docIdLimit, usedLids, lowestFreeLid, highestUsedLid, last_remove_batch));
}
_handler->_lids = usedLidsVector;
return *this;
}
JobTestBase &addStats(uint32_t docIdLimit,
- uint32_t numDocs,
- uint32_t lowestFreeLid,
- uint32_t highestUsedLid) {
+ uint32_t numDocs,
+ uint32_t lowestFreeLid,
+ uint32_t highestUsedLid) {
_handler->_stats.push_back(LidUsageStats
- (docIdLimit, numDocs, lowestFreeLid, highestUsedLid));
+ (docIdLimit, numDocs, lowestFreeLid, highestUsedLid, TimePoint()));
return *this;
}
bool run() {
@@ -319,10 +331,11 @@ struct JobTestBase : public ::testing::Test {
void assertNoWorkDone() {
assertJobContext(0, 0, 0, 0, 0);
}
- JobTestBase &setupOneDocumentToCompact() {
+ JobTestBase &setupOneDocumentToCompact(TimePoint last_remove_batch = TimePoint()) {
addStats(10, {1,3,4,5,6,9},
{{9,2}, // 30% bloat: move 9 -> 2
- {6,7}}); // no documents to move
+ {6,7}}, // no documents to move
+ last_remove_batch);
return *this;
}
void assertOneDocumentCompacted() {
@@ -606,6 +619,41 @@ TEST_F(JobTest, job_is_re_enabled_when_node_is_no_longer_retired)
assertOneDocumentCompacted();
}
+TEST_F(JobTest, job_is_disabled_while_remove_batch_is_ongoing)
+{
+ TimePoint last_remove_batch = std::chrono::steady_clock::now();
+ setupOneDocumentToCompact(last_remove_batch);
+ EXPECT_TRUE(run()); // job is disabled
+ assertNoWorkDone();
+}
+
+TEST_F(JobTest, job_becomes_disabled_if_remove_batch_starts)
+{
+ setupThreeDocumentsToCompact();
+ EXPECT_FALSE(run()); // job executed as normal (with more work to do)
+ assertJobContext(2, 9, 1, 0, 0);
+
+ _handler->set_last_remove_batch(std::chrono::steady_clock::now());
+ EXPECT_TRUE(run()); // job is disabled
+ assertJobContext(2, 9, 1, 0, 0);
+}
+
+TEST_F(JobTest, job_is_re_enabled_when_remove_batch_is_no_longer_ongoing)
+{
+ setupThreeDocumentsToCompact();
+ EXPECT_FALSE(run()); // job executed as normal (with more work to do)
+ assertJobContext(2, 9, 1, 0, 0);
+
+ TimePoint last_remove_batch = std::chrono::steady_clock::now();
+ _handler->set_last_remove_batch(last_remove_batch);
+ EXPECT_TRUE(run()); // job is disabled
+ assertJobContext(2, 9, 1, 0, 0);
+
+ _handler->set_last_remove_batch(last_remove_batch - std::chrono::seconds(static_cast<long>(REMOVE_BATCH_BLOCK_DELAY)));
+ EXPECT_FALSE(run()); // job executed as normal (with more work to do)
+ assertJobContext(3, 8, 2, 0, 0);
+}
+
struct MaxOutstandingJobTest : public JobTest {
std::unique_ptr<MyCountJobRunner> runner;
MaxOutstandingJobTest()
diff --git a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
index 99b425b9fd7..f6f0c2b0806 100644
--- a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
+++ b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
@@ -1782,7 +1782,7 @@ TEST(DocumentMetaStoreTest, get_lid_usage_stats_works)
void
assertLidBloat(uint32_t expBloat, uint32_t lidLimit, uint32_t usedLids)
{
- LidUsageStats stats(lidLimit, usedLids, 0, 0);
+ LidUsageStats stats(lidLimit, usedLids, 0, 0, LidUsageStats::TimePoint());
EXPECT_EQ(expBloat, stats.getLidBloat());
}
@@ -2084,6 +2084,23 @@ TEST(DocumentMetaStoreTest, multiple_lids_can_be_removed_with_removeBatch)
assertLidGidFound(4, dms);
}
+TEST(DocumentMetaStoreTest, tracks_time_of_last_call_to_remove_batch)
+{
+ DocumentMetaStore dms(createBucketDB());
+ dms.constructFreeList();
+ addLid(dms, 1);
+
+ LidUsageStats::TimePoint before = std::chrono::steady_clock::now();
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
+ dms.removeBatch({1}, 5);
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
+ LidUsageStats::TimePoint after = std::chrono::steady_clock::now();
+
+ auto stats = dms.getLidUsageStats();
+ EXPECT_LT(before, stats.get_last_remove_batch());
+ EXPECT_GT(after, stats.get_last_remove_batch());
+}
+
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def
index a88239a41f6..fd50dd2094a 100644
--- a/searchcore/src/vespa/searchcore/config/proton.def
+++ b/searchcore/src/vespa/searchcore/config/proton.def
@@ -356,6 +356,16 @@ lidspacecompaction.allowedlidbloat int default=1000
## The lid bloat factor must be >= allowedlidbloatfactor before considering compaction.
lidspacecompaction.allowedlidbloatfactor double default=0.01
+## The delay (in seconds) for when the last remove batch operation would be considered to block lid space compaction.
+##
+## When considering compaction, if the document meta store has received a remove batch operation in the last delay seconds,
+## the lid space compaction job is blocked. It is considered again at the next regular interval (see above).
+##
+## Remove batch operations are used when deleting buckets on a content node.
+## This functionality ensures that during massive deleting of buckets (e.g. as part of redistribution of data to a new node),
+## lid space compaction do not interfere, but instead is applied after deleting of buckets is complete.
+lidspacecompaction.removebatchblockdelay double default=5.0
+
## This is the maximum value visibilitydelay you can have.
## A to higher value here will cost more memory while not improving too much.
maxvisibilitydelay double default=1.0
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp
index 9b535be19b7..e6f16004bad 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp
@@ -450,7 +450,8 @@ DocumentMetaStore::DocumentMetaStore(BucketDBOwner::SP bucketDB,
_bucketDB(bucketDB),
_shrinkLidSpaceBlockers(0),
_subDbType(subDbType),
- _trackDocumentSizes(true)
+ _trackDocumentSizes(true),
+ _last_remove_batch()
{
ensureSpace(0); // lid 0 is reserved
setCommittedDocIdLimit(1u); // lid 0 is reserved
@@ -665,6 +666,7 @@ DocumentMetaStore::removeBatch(const std::vector<DocId> &lidsToRemove, const uin
(void) removed;
}
incGeneration();
+ _last_remove_batch = std::chrono::steady_clock::now();
}
void
@@ -772,7 +774,8 @@ DocumentMetaStore::getLidUsageStats() const
return LidUsageStats(docIdLimit,
numDocs,
lowestFreeLid,
- highestUsedLid);
+ highestUsedLid,
+ _last_remove_batch);
}
Blueprint::UP
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h
index 27c1c97556c..3bd9795cfd5 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h
@@ -71,6 +71,7 @@ private:
uint32_t _shrinkLidSpaceBlockers;
const SubDbType _subDbType;
bool _trackDocumentSizes;
+ search::LidUsageStats::TimePoint _last_remove_batch;
DocId getFreeLid();
DocId peekFreeLid();
diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp
index 848b1f27574..b470a390b50 100644
--- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp
@@ -54,6 +54,7 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig()
_interval(3600),
_allowedLidBloat(1000000000),
_allowedLidBloatFactor(1.0),
+ _remove_batch_block_delay(5.0),
_disabled(false),
_maxDocsToScan(10000)
{
@@ -62,12 +63,14 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig()
DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig(double interval,
uint32_t allowedLidBloat,
double allowedLidBloatFactor,
+ double remove_batch_block_delay,
bool disabled,
uint32_t maxDocsToScan)
: _delay(std::min(MAX_DELAY_SEC, interval)),
_interval(interval),
_allowedLidBloat(allowedLidBloat),
_allowedLidBloatFactor(allowedLidBloatFactor),
+ _remove_batch_block_delay(remove_batch_block_delay),
_disabled(disabled),
_maxDocsToScan(maxDocsToScan)
{
diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h
index acbbc442c7a..4b458765f3c 100644
--- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h
+++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h
@@ -47,6 +47,7 @@ private:
double _interval;
uint32_t _allowedLidBloat;
double _allowedLidBloatFactor;
+ double _remove_batch_block_delay;
bool _disabled;
uint32_t _maxDocsToScan;
@@ -55,7 +56,8 @@ public:
DocumentDBLidSpaceCompactionConfig(double interval,
uint32_t allowedLidBloat,
double allowwedLidBloatFactor,
- bool disabled = false,
+ double remove_batch_block_delay,
+ bool disabled,
uint32_t maxDocsToScan = 10000);
static DocumentDBLidSpaceCompactionConfig createDisabled();
@@ -64,6 +66,7 @@ public:
double getInterval() const { return _interval; }
uint32_t getAllowedLidBloat() const { return _allowedLidBloat; }
double getAllowedLidBloatFactor() const { return _allowedLidBloatFactor; }
+ double get_remove_batch_block_delay() const { return _remove_batch_block_delay; }
bool isDisabled() const { return _disabled; }
uint32_t getMaxDocsToScan() const { return _maxDocsToScan; }
};
diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
index a562408b64d..ef31da34683 100644
--- a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
@@ -135,6 +135,7 @@ buildMaintenanceConfig(const BootstrapConfig::SP &bootstrapConfig,
proton.lidspacecompaction.interval,
proton.lidspacecompaction.allowedlidbloat,
proton.lidspacecompaction.allowedlidbloatfactor,
+ proton.lidspacecompaction.removebatchblockdelay,
isDocumentTypeGlobal),
AttributeUsageFilterConfig(
proton.writefilter.attribute.enumstorelimit,
diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp
index fad00fa00e6..c2d655538f5 100644
--- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp
@@ -89,6 +89,13 @@ LidSpaceCompactionJob::compactLidSpace(const LidUsageStats &stats)
_shouldCompactLidSpace = false;
}
+bool
+LidSpaceCompactionJob::remove_batch_is_ongoing(const LidUsageStats& stats) const
+{
+ LidUsageStats::TimePoint now = std::chrono::steady_clock::now();
+ return (now - stats.get_last_remove_batch()) < std::chrono::duration<double>(_cfg.get_remove_batch_block_delay());
+}
+
LidSpaceCompactionJob::LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionConfig &config,
ILidSpaceCompactionHandler &handler,
IOperationStorer &opStorer,
@@ -129,6 +136,11 @@ LidSpaceCompactionJob::run()
return true; // indicate work is done since no work can be done
}
LidUsageStats stats = _handler.getLidStatus();
+ if (remove_batch_is_ongoing(stats)) {
+ // Note that we don't set the job as blocked as the decision to un-block it is not driven externally.
+ LOG(info, "run(): Lid space compaction is disabled while remove batch (delete buckets) is ongoing");
+ return true;
+ }
if (_scanItr) {
return scanDocuments(stats);
} else if (_shouldCompactLidSpace) {
diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h
index 0732576cc70..2f242e5a33a 100644
--- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h
+++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h
@@ -45,6 +45,7 @@ private:
void compactLidSpace(const search::LidUsageStats &stats);
void refreshRunnable();
void refreshAndConsiderRunnable();
+ bool remove_batch_is_ongoing(const search::LidUsageStats& stats) const;
public:
LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionConfig &config,