diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-04-29 07:48:45 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2020-04-29 07:51:06 +0000 |
commit | 6d108bacdf3c92ebbf8a84b70ff619201ff1ba53 (patch) | |
tree | e42e7a02073facf13936ecf8e860ded3623101e3 /searchcore/src | |
parent | e1d922c6979fa06695bfdcec6cafb8988d67ff77 (diff) |
Improve tracking of remove batch rate used to consider to block lid space compaction.
This is also a preparation for tracking the rate of regular remove operations,
and use this to consider to block lid space compaction.
Diffstat (limited to 'searchcore/src')
25 files changed, 376 insertions, 59 deletions
diff --git a/searchcore/src/tests/proton/common/operation_rate_tracker/CMakeLists.txt b/searchcore/src/tests/proton/common/operation_rate_tracker/CMakeLists.txt new file mode 100644 index 00000000000..f5e6a791124 --- /dev/null +++ b/searchcore/src/tests/proton/common/operation_rate_tracker/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_operation_rate_tracker_test_app TEST + SOURCES + operation_rate_tracker_test.cpp + DEPENDS + searchcore_pcommon + gtest +) +vespa_add_test(NAME searchcore_operation_rate_tracker_test_app COMMAND searchcore_operation_rate_tracker_test_app) diff --git a/searchcore/src/tests/proton/common/operation_rate_tracker/operation_rate_tracker_test.cpp b/searchcore/src/tests/proton/common/operation_rate_tracker/operation_rate_tracker_test.cpp new file mode 100644 index 00000000000..ce19867e286 --- /dev/null +++ b/searchcore/src/tests/proton/common/operation_rate_tracker/operation_rate_tracker_test.cpp @@ -0,0 +1,90 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcore/proton/common/operation_rate_tracker.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/util/time.h> + +#include <vespa/log/log.h> +LOG_SETUP("operation_rate_tracker_test"); + +using namespace proton; + +TEST(OperationRateTrackerTest, time_budget_per_op_is_inverse_of_rate_threshold) +{ + EXPECT_EQ(vespalib::from_s(0.25), OperationRateTracker(4).get_time_budget_per_op()); + EXPECT_EQ(vespalib::from_s(2.0), OperationRateTracker(0.5).get_time_budget_per_op()); +} + +TEST(OperationRateTrackerTest, time_budget_window_is_minimum_1_sec) +{ + EXPECT_EQ(vespalib::from_s(1.0), OperationRateTracker(4).get_time_budget_window()); + EXPECT_EQ(vespalib::from_s(2.0), OperationRateTracker(0.5).get_time_budget_window()); +} + +class Simulator { +public: + vespalib::steady_time now; + OperationRateTracker ort; + Simulator(double rate_threshold) + : now(vespalib::steady_clock::now()), + ort(rate_threshold) + { + } + void tick(double real_rate) { + now = now + vespalib::from_s(1.0 / real_rate); + ort.observe(now); + } + bool above_threshold(double now_delta = 0) { + return ort.above_threshold(now + vespalib::from_s(now_delta)); + } +}; + +TEST(OperationRateTrackerTest, tracks_whether_operation_rate_is_below_or_above_threshold) +{ + Simulator sim(2); + + // Simulate an actual rate of 4 ops / sec + sim.tick(4); // Threshold time is 1.0s in the past (at time budget window start) + EXPECT_FALSE(sim.above_threshold(-1.0)); + EXPECT_TRUE(sim.above_threshold(-1.01)); + + // Catch up with now + sim.tick(4); + sim.tick(4); + sim.tick(4); + sim.tick(4); // Threshold time is now. + EXPECT_FALSE(sim.above_threshold()); + EXPECT_TRUE(sim.above_threshold(-0.01)); + + // Move into the future + sim.tick(4); // Threshold time is 0.25s into the future. + EXPECT_TRUE(sim.above_threshold(0.24)); + EXPECT_FALSE(sim.above_threshold(0.25)); + + // Move to time budget window end + sim.tick(4); + sim.tick(4); + sim.tick(4); // Threshold time is 1.0s into the future (at time budget window end) + EXPECT_TRUE(sim.above_threshold(0.99)); + EXPECT_FALSE(sim.above_threshold(1.0)); + + sim.tick(4); // Threshold time is still 1.0s into the future (at time budget window end) + EXPECT_TRUE(sim.above_threshold(0.99)); + EXPECT_FALSE(sim.above_threshold(1.0)); + + // Reduce actual rate to 1 ops / sec + sim.tick(1); // Threshold time is 0.5s into the future. + EXPECT_TRUE(sim.above_threshold(0.49)); + EXPECT_FALSE(sim.above_threshold(0.5)); + + sim.tick(1); // Threshold time is now. + EXPECT_FALSE(sim.above_threshold()); + EXPECT_TRUE(sim.above_threshold(-0.01)); + + sim.tick(1); + sim.tick(1); // Threshold time is back at time budget window start + EXPECT_FALSE(sim.above_threshold(-1.0)); + EXPECT_TRUE(sim.above_threshold(-1.01)); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp index 64299c70588..a97a2380f25 100644 --- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp +++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp @@ -30,7 +30,7 @@ constexpr uint32_t SUBDB_ID = 2; constexpr vespalib::duration JOB_DELAY = 1s; constexpr uint32_t ALLOWED_LID_BLOAT = 1; constexpr double ALLOWED_LID_BLOAT_FACTOR = 0.3; -constexpr vespalib::duration REMOVE_BATCH_BLOCK_DELAY = 20s; +constexpr double REMOVE_BATCH_BLOCK_RATE = 1.0 / 20.0; constexpr uint32_t MAX_DOCS_TO_SCAN = 100; constexpr double RESOURCE_LIMIT_FACTOR = 1.0; constexpr uint32_t MAX_OUTSTANDING_MOVE_OPS = 10; @@ -82,19 +82,30 @@ struct MyHandler : public ILidSpaceCompactionHandler { mutable uint32_t _iteratorCnt; bool _storeMoveDoneContexts; std::vector<IDestructorCallback::SP> _moveDoneContexts; + documentmetastore::OperationListener::SP _op_listener; + RemoveOperationsRateTracker* _rm_listener; MyHandler(bool storeMoveDoneContexts = false); ~MyHandler(); void clearMoveDoneContexts() { _moveDoneContexts.clear(); } - void set_last_remove_batch(TimePoint last_remove_batch) { - for (auto& s : _stats) { - s = LidUsageStats(s.getLidLimit(), s.getUsedLids(), - s.getLowestFreeLid(), s.getHighestUsedLid(), last_remove_batch); - } + void run_remove_batch_ops() { + // This ensures to max out the threshold time in the operation rate tracker. + _op_listener->notify_remove_batch(); + _op_listener->notify_remove_batch(); + _op_listener->notify_remove_batch(); + } + void stop_remove_batch_ops() { + _rm_listener->reset_remove_batch_tracker(); } virtual vespalib::string getName() const override { return "myhandler"; } + virtual void set_operation_listener(documentmetastore::OperationListener::SP op_listener) override { + auto* rm_listener = dynamic_cast<RemoveOperationsRateTracker*>(op_listener.get()); + assert(rm_listener != nullptr); + _op_listener = std::move(op_listener); + _rm_listener = rm_listener; + } virtual uint32_t getSubDbId() const override { return 2; } virtual LidUsageStats getLidStatus() const override { assert(_handleMoveCnt < _stats.size()); @@ -132,7 +143,9 @@ MyHandler::MyHandler(bool storeMoveDoneContexts) _wantedLidLimit(0), _iteratorCnt(0), _storeMoveDoneContexts(storeMoveDoneContexts), - _moveDoneContexts() + _moveDoneContexts(), + _op_listener(), + _rm_listener() {} MyHandler::~MyHandler() {} @@ -265,7 +278,7 @@ struct JobTestBase : public ::testing::Test { _handler = std::make_unique<MyHandler>(maxOutstandingMoveOps != MAX_OUTSTANDING_MOVE_OPS); _job = std::make_unique<LidSpaceCompactionJob>(DocumentDBLidSpaceCompactionConfig(interval, allowedLidBloat, allowedLidBloatFactor, - REMOVE_BATCH_BLOCK_DELAY, + REMOVE_BATCH_BLOCK_RATE, false, maxDocsToScan), *_handler, _storer, _frozenHandler, _diskMemUsageNotifier, BlockableMaintenanceJobConfig(resourceLimitFactor, maxOutstandingMoveOps), @@ -274,20 +287,18 @@ struct JobTestBase : public ::testing::Test { ~JobTestBase(); JobTestBase &addStats(uint32_t docIdLimit, const LidVector &usedLids, - const LidPairVector &usedFreePairs, - TimePoint last_remove_batch = TimePoint()) { - return addMultiStats(docIdLimit, {usedLids}, usedFreePairs, last_remove_batch); + const LidPairVector &usedFreePairs) { + return addMultiStats(docIdLimit, {usedLids}, usedFreePairs); } JobTestBase &addMultiStats(uint32_t docIdLimit, const std::vector<LidVector> &usedLidsVector, - const LidPairVector &usedFreePairs, - TimePoint last_remove_batch = TimePoint()) { + const LidPairVector &usedFreePairs) { uint32_t usedLids = usedLidsVector[0].size(); for (auto pair : usedFreePairs) { uint32_t highestUsedLid = pair.first; uint32_t lowestFreeLid = pair.second; _handler->_stats.push_back(LidUsageStats - (docIdLimit, usedLids, lowestFreeLid, highestUsedLid, last_remove_batch)); + (docIdLimit, usedLids, lowestFreeLid, highestUsedLid)); } _handler->_lids = usedLidsVector; return *this; @@ -297,7 +308,7 @@ struct JobTestBase : public ::testing::Test { uint32_t lowestFreeLid, uint32_t highestUsedLid) { _handler->_stats.push_back(LidUsageStats - (docIdLimit, numDocs, lowestFreeLid, highestUsedLid, TimePoint())); + (docIdLimit, numDocs, lowestFreeLid, highestUsedLid)); return *this; } bool run() { @@ -332,11 +343,10 @@ struct JobTestBase : public ::testing::Test { void assertNoWorkDone() { assertJobContext(0, 0, 0, 0, 0); } - JobTestBase &setupOneDocumentToCompact(TimePoint last_remove_batch = TimePoint()) { + JobTestBase &setupOneDocumentToCompact() { addStats(10, {1,3,4,5,6,9}, {{9,2}, // 30% bloat: move 9 -> 2 - {6,7}}, // no documents to move - last_remove_batch); + {6,7}}); // no documents to move return *this; } void assertOneDocumentCompacted() { @@ -622,8 +632,8 @@ TEST_F(JobTest, job_is_re_enabled_when_node_is_no_longer_retired) TEST_F(JobTest, job_is_disabled_while_remove_batch_is_ongoing) { - TimePoint last_remove_batch = std::chrono::steady_clock::now(); - setupOneDocumentToCompact(last_remove_batch); + setupOneDocumentToCompact(); + _handler->run_remove_batch_ops(); EXPECT_TRUE(run()); // job is disabled assertNoWorkDone(); } @@ -634,7 +644,7 @@ TEST_F(JobTest, job_becomes_disabled_if_remove_batch_starts) EXPECT_FALSE(run()); // job executed as normal (with more work to do) assertJobContext(2, 9, 1, 0, 0); - _handler->set_last_remove_batch(std::chrono::steady_clock::now()); + _handler->run_remove_batch_ops(); EXPECT_TRUE(run()); // job is disabled assertJobContext(2, 9, 1, 0, 0); } @@ -645,12 +655,11 @@ TEST_F(JobTest, job_is_re_enabled_when_remove_batch_is_no_longer_ongoing) EXPECT_FALSE(run()); // job executed as normal (with more work to do) assertJobContext(2, 9, 1, 0, 0); - TimePoint last_remove_batch = std::chrono::steady_clock::now(); - _handler->set_last_remove_batch(last_remove_batch); + _handler->run_remove_batch_ops(); EXPECT_TRUE(run()); // job is disabled assertJobContext(2, 9, 1, 0, 0); - _handler->set_last_remove_batch(last_remove_batch - REMOVE_BATCH_BLOCK_DELAY); + _handler->stop_remove_batch_ops(); EXPECT_FALSE(run()); // job executed as normal (with more work to do) assertJobContext(3, 8, 2, 0, 0); } diff --git a/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp b/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp index 7e2e258476f..2bd67f96b57 100644 --- a/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp +++ b/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp @@ -339,6 +339,7 @@ struct MockLidSpaceCompactionHandler : public ILidSpaceCompactionHandler MockLidSpaceCompactionHandler(const vespalib::string &name_) : name(name_) {} virtual vespalib::string getName() const override { return name; } + virtual void set_operation_listener(documentmetastore::OperationListener::SP) override {} virtual uint32_t getSubDbId() const override { return 0; } virtual search::LidUsageStats getLidStatus() const override { return search::LidUsageStats(); } virtual IDocumentScanIterator::UP getIterator() const override { return IDocumentScanIterator::UP(); } diff --git a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp index f8069dcf494..8935143a7be 100644 --- a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp +++ b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp @@ -6,6 +6,7 @@ #include <vespa/searchcore/proton/bucketdb/i_bucket_create_listener.h> #include <vespa/searchcore/proton/common/hw_info.h> #include <vespa/searchcore/proton/documentmetastore/documentmetastore.h> +#include <vespa/searchcore/proton/documentmetastore/operation_listener.h> #include <vespa/searchcore/proton/flushengine/shrink_lid_space_flush_target.h> #include <vespa/searchcore/proton/server/itlssyncer.h> #include <vespa/searchlib/attribute/attributefilesavetarget.h> @@ -1782,7 +1783,7 @@ TEST(DocumentMetaStoreTest, get_lid_usage_stats_works) void assertLidBloat(uint32_t expBloat, uint32_t lidLimit, uint32_t usedLids) { - LidUsageStats stats(lidLimit, usedLids, 0, 0, LidUsageStats::TimePoint()); + LidUsageStats stats(lidLimit, usedLids, 0, 0); EXPECT_EQ(expBloat, stats.getLidBloat()); } @@ -2084,21 +2085,27 @@ TEST(DocumentMetaStoreTest, multiple_lids_can_be_removed_with_removeBatch) assertLidGidFound(4, dms); } -TEST(DocumentMetaStoreTest, tracks_time_of_last_call_to_remove_batch) +class MockOperationListener : public documentmetastore::OperationListener { +public: + size_t remove_batch_cnt; + + MockOperationListener() + : remove_batch_cnt(0) + { + } + void notify_remove_batch() override { ++remove_batch_cnt; } +}; + +TEST(DocumentMetaStoreTest, call_to_remove_batch_is_notified) { DocumentMetaStore dms(createBucketDB()); + auto listener = std::make_shared<MockOperationListener>(); + dms.set_operation_listener(listener); dms.constructFreeList(); addLid(dms, 1); - LidUsageStats::TimePoint before = std::chrono::steady_clock::now(); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); dms.removeBatch({1}, 5); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - LidUsageStats::TimePoint after = std::chrono::steady_clock::now(); - - auto stats = dms.getLidUsageStats(); - EXPECT_LT(before, stats.get_last_remove_batch()); - EXPECT_GT(after, stats.get_last_remove_batch()); + EXPECT_EQ(1, listener->remove_batch_cnt); } } diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def index 0501ab6ed7c..644564c0e2e 100644 --- a/searchcore/src/vespa/searchcore/config/proton.def +++ b/searchcore/src/vespa/searchcore/config/proton.def @@ -369,6 +369,7 @@ lidspacecompaction.allowedlidbloat int default=1000 ## The lid bloat factor must be >= allowedlidbloatfactor before considering compaction. lidspacecompaction.allowedlidbloatfactor double default=0.01 +## DEPRECATED (no longer used): Remove on Vespa 8 ## The delay (in seconds) for when the last remove batch operation would be considered to block lid space compaction. ## ## When considering compaction, if the document meta store has received a remove batch operation in the last delay seconds, @@ -379,6 +380,17 @@ lidspacecompaction.allowedlidbloatfactor double default=0.01 ## lid space compaction do not interfere, but instead is applied after deleting of buckets is complete. lidspacecompaction.removebatchblockdelay double default=2.0 +## The rate (ops / second) of remove batch operations for when to block lid space compaction. +## +## When considering compaction, if the current observed rate of remove batch operations +## is higher than the given block rate, the lid space compaction job is blocked. +## It is considered again at the next regular interval (see above). +## +## Remove batch operations are used when deleting buckets on a content node. +## This functionality ensures that during massive deleting of buckets (e.g. as part of redistribution of data to a new node), +## lid space compaction do not interfere, but instead is applied after deleting of buckets is complete. +lidspacecompaction.removebatchblockrate double default=0.5 + ## This is the maximum value visibilitydelay you can have. ## A to higher value here will cost more memory while not improving too much. maxvisibilitydelay double default=1.0 diff --git a/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt index 4db36039b3c..f3303f36199 100644 --- a/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt @@ -14,9 +14,10 @@ vespa_add_library(searchcore_pcommon STATIC hw_info_sampler.cpp indexschema_inspector.cpp monitored_refcount.cpp + operation_rate_tracker.cpp select_utils.cpp - selectpruner.cpp selectcontext.cpp + selectpruner.cpp state_reporter_utils.cpp statusreport.cpp DEPENDS diff --git a/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.cpp b/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.cpp new file mode 100644 index 00000000000..487102cebf6 --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.cpp @@ -0,0 +1,33 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "operation_rate_tracker.h" + +namespace proton { + +OperationRateTracker::OperationRateTracker(double rate_threshold) + : _time_budget_per_op(vespalib::from_s(1.0 / rate_threshold)), + _time_budget_window(std::max(vespalib::from_s(1.0), _time_budget_per_op)), + _threshold_time() +{ +} + +void +OperationRateTracker::observe(vespalib::steady_time now) +{ + vespalib::steady_time cand_time = std::max(now - _time_budget_window, _threshold_time + _time_budget_per_op); + _threshold_time = std::min(cand_time, now + _time_budget_window); +} + +bool +OperationRateTracker::above_threshold(vespalib::steady_time now) const +{ + return _threshold_time > now; +} + +void +OperationRateTracker::reset(vespalib::steady_time now) +{ + _threshold_time = now - _time_budget_window; +} + +} diff --git a/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.h b/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.h new file mode 100644 index 00000000000..7f7526faa5d --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/common/operation_rate_tracker.h @@ -0,0 +1,38 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/util/time.h> + +namespace proton { + +/** + * Tracks whether the rate (ops/sec) of an operation is above or below a given threshold. + * + * An operation is given a time budget which is the inverse of the rate threshold. + * When we observe an operation that much time is "spent", and we adjust a threshold time accordingly. + * If this time is into the future, the current observed rate is above the rate threshold. + * + * To avoid the threshold time racing into the future or lagging behind, + * it is capped in both directions by a time budget window. + */ +class OperationRateTracker { +private: + vespalib::duration _time_budget_per_op; + vespalib::duration _time_budget_window; + vespalib::steady_time _threshold_time; + +public: + OperationRateTracker(double rate_threshold); + + vespalib::duration get_time_budget_per_op() const { return _time_budget_per_op; } + vespalib::duration get_time_budget_window() const { return _time_budget_window; } + + void observe(vespalib::steady_time now); + bool above_threshold(vespalib::steady_time now) const; + + // Should only be used for testing + void reset(vespalib::steady_time now); +}; + +} diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp index c36ee8e474a..3f8ede91faa 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp @@ -2,6 +2,7 @@ #include "documentmetastore.h" #include "documentmetastoresaver.h" +#include "operation_listener.h" #include "search_context.h" #include <vespa/fastos/file.h> #include <vespa/searchcore/proton/bucketdb/bucketsessionbase.h> @@ -453,7 +454,7 @@ DocumentMetaStore::DocumentMetaStore(BucketDBOwner::SP bucketDB, _shrinkLidSpaceBlockers(0), _subDbType(subDbType), _trackDocumentSizes(true), - _last_remove_batch() + _op_listener() { ensureSpace(0); // lid 0 is reserved setCommittedDocIdLimit(1u); // lid 0 is reserved @@ -668,7 +669,9 @@ DocumentMetaStore::removeBatch(const std::vector<DocId> &lidsToRemove, const uin (void) removed; } incGeneration(); - _last_remove_batch = std::chrono::steady_clock::now(); + if (_op_listener) { + _op_listener->notify_remove_batch(); + } } void @@ -776,8 +779,7 @@ DocumentMetaStore::getLidUsageStats() const return LidUsageStats(docIdLimit, numDocs, lowestFreeLid, - highestUsedLid, - _last_remove_batch); + highestUsedLid); } Blueprint::UP @@ -1021,6 +1023,12 @@ DocumentMetaStore::canShrinkLidSpace() const } void +DocumentMetaStore::set_operation_listener(documentmetastore::OperationListener::SP op_listener) +{ + _op_listener = std::move(op_listener); +} + +void DocumentMetaStore::onShrinkLidSpace() { uint32_t committedDocIdLimit = this->getCommittedDocIdLimit(); diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h index 3bd9795cfd5..99cefbe1802 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h @@ -17,11 +17,14 @@ #include <vespa/vespalib/util/rcuvector.h> namespace proton::bucketdb { - class SplitBucketSession; - class JoinBucketsSession; +class SplitBucketSession; +class JoinBucketsSession; } -namespace proton::documentmetastore { class Reader; } +namespace proton::documentmetastore { +class OperationListener; +class Reader; +} namespace proton { @@ -71,7 +74,7 @@ private: uint32_t _shrinkLidSpaceBlockers; const SubDbType _subDbType; bool _trackDocumentSizes; - search::LidUsageStats::TimePoint _last_remove_batch; + documentmetastore::OperationListener::SP _op_listener; DocId getFreeLid(); DocId peekFreeLid(); @@ -225,6 +228,7 @@ public: void compactLidSpace(DocId wantedLidLimit) override; void holdUnblockShrinkLidSpace() override; bool canShrinkLidSpace() const override; + void set_operation_listener(documentmetastore::OperationListener::SP op_listener) override; SerialNum getLastSerialNum() const override { return getStatus().getLastSyncToken(); diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/i_document_meta_store.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/i_document_meta_store.h index 37666879205..d7721a28b46 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/i_document_meta_store.h +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/i_document_meta_store.h @@ -4,6 +4,7 @@ #include "lid_gid_key_comparator.h" #include "i_simple_document_meta_store.h" +#include "operation_listener.h" #include <vespa/searchlib/attribute/attributeguard.h> #include <vespa/searchlib/common/idocumentmetastore.h> #include <vespa/searchlib/common/serialnum.h> @@ -82,6 +83,8 @@ struct IDocumentMetaStore : public search::IDocumentMetaStore, */ virtual void compactLidSpace(DocId wantedLidLimit) = 0; + virtual void set_operation_listener(documentmetastore::OperationListener::SP op_listener) = 0; + }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h new file mode 100644 index 00000000000..3b974554551 --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/operation_listener.h @@ -0,0 +1,19 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <memory> + +namespace proton::documentmetastore { + +/** + * Interface used to listen to operations handled by the document meta store. + */ +class OperationListener { +public: + using SP = std::shared_ptr<OperationListener>; + virtual ~OperationListener() {} + virtual void notify_remove_batch() = 0; +}; + +} diff --git a/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt index 92cf186f697..5f9f5528776 100644 --- a/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt @@ -10,8 +10,8 @@ vespa_add_library(searchcore_server STATIC combiningfeedview.cpp ddbstate.cpp disk_mem_usage_filter.cpp - disk_mem_usage_sampler.cpp disk_mem_usage_forwarder.cpp + disk_mem_usage_sampler.cpp docstorevalidator.cpp document_db_config_owner.cpp document_db_directory_holder.cpp @@ -30,8 +30,8 @@ vespa_add_library(searchcore_server STATIC documentdb_commit_job.cpp documentdb_metrics_updater.cpp documentdbconfig.cpp - documentdbconfigscout.cpp documentdbconfigmanager.cpp + documentdbconfigscout.cpp documentretriever.cpp documentretrieverbase.cpp documentsubdbcollection.cpp @@ -63,8 +63,8 @@ vespa_add_library(searchcore_server STATIC maintenancejobrunner.cpp matchers.cpp matchview.cpp - memoryconfigstore.cpp memory_flush_config_updater.cpp + memoryconfigstore.cpp memoryflush.cpp minimal_document_retriever.cpp move_operation_limiter.cpp @@ -82,6 +82,7 @@ vespa_add_library(searchcore_server STATIC putdonecontext.cpp reconfig_params.cpp remove_batch_done_context.cpp + remove_operations_rate_tracker.cpp removedonecontext.cpp removedonetask.cpp replaypacketdispatcher.cpp diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp index 57ef2d1a45c..6bd1888ad06 100644 --- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp @@ -53,7 +53,7 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig() _interval(3600s), _allowedLidBloat(1000000000), _allowedLidBloatFactor(1.0), - _remove_batch_block_delay(5s), + _remove_batch_block_rate(0.5), _disabled(false), _maxDocsToScan(10000) { @@ -62,14 +62,14 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig() DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig(vespalib::duration interval, uint32_t allowedLidBloat, double allowedLidBloatFactor, - vespalib::duration remove_batch_block_delay, + double remove_batch_block_rate, bool disabled, uint32_t maxDocsToScan) : _delay(std::min(MAX_DELAY_SEC, interval)), _interval(interval), _allowedLidBloat(allowedLidBloat), _allowedLidBloatFactor(allowedLidBloatFactor), - _remove_batch_block_delay(remove_batch_block_delay), + _remove_batch_block_rate(remove_batch_block_rate), _disabled(disabled), _maxDocsToScan(maxDocsToScan) { diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h index 604977aa04f..1370d855b31 100644 --- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h +++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h @@ -47,7 +47,7 @@ private: vespalib::duration _interval; uint32_t _allowedLidBloat; double _allowedLidBloatFactor; - vespalib::duration _remove_batch_block_delay; + double _remove_batch_block_rate; bool _disabled; uint32_t _maxDocsToScan; @@ -56,7 +56,7 @@ public: DocumentDBLidSpaceCompactionConfig(vespalib::duration interval, uint32_t allowedLidBloat, double allowwedLidBloatFactor, - vespalib::duration remove_batch_block_delay, + double remove_batch_block_rate, bool disabled, uint32_t maxDocsToScan = 10000); @@ -66,7 +66,7 @@ public: vespalib::duration getInterval() const { return _interval; } uint32_t getAllowedLidBloat() const { return _allowedLidBloat; } double getAllowedLidBloatFactor() const { return _allowedLidBloatFactor; } - vespalib::duration get_remove_batch_block_delay() const { return _remove_batch_block_delay; } + double get_remove_batch_block_rate() const { return _remove_batch_block_rate; } bool isDisabled() const { return _disabled; } uint32_t getMaxDocsToScan() const { return _maxDocsToScan; } }; diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp index 7a1989c8d7b..9bf8ce71244 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp @@ -134,7 +134,7 @@ buildMaintenanceConfig(const BootstrapConfig::SP &bootstrapConfig, vespalib::from_s(proton.lidspacecompaction.interval), proton.lidspacecompaction.allowedlidbloat, proton.lidspacecompaction.allowedlidbloatfactor, - vespalib::from_s(proton.lidspacecompaction.removebatchblockdelay), + proton.lidspacecompaction.removebatchblockrate, isDocumentTypeGlobal), AttributeUsageFilterConfig( proton.writefilter.attribute.enumstorelimit, diff --git a/searchcore/src/vespa/searchcore/proton/server/i_lid_space_compaction_handler.h b/searchcore/src/vespa/searchcore/proton/server/i_lid_space_compaction_handler.h index 4e996f3596d..b31f05c86a3 100644 --- a/searchcore/src/vespa/searchcore/proton/server/i_lid_space_compaction_handler.h +++ b/searchcore/src/vespa/searchcore/proton/server/i_lid_space_compaction_handler.h @@ -4,6 +4,7 @@ #include "i_document_scan_iterator.h" #include "ifrozenbuckethandler.h" +#include <vespa/searchcore/proton/documentmetastore/operation_listener.h> #include <vespa/searchcore/proton/feedoperation/compact_lid_space_operation.h> #include <vespa/searchcore/proton/feedoperation/moveoperation.h> #include <vespa/searchlib/common/lid_usage_stats.h> @@ -30,6 +31,13 @@ struct ILidSpaceCompactionHandler virtual vespalib::string getName() const = 0; /** + * Sets the listener used to get notifications on the operations handled by the document meta store. + * + * A call to this function should replace the previous listener if set. + */ + virtual void set_operation_listener(documentmetastore::OperationListener::SP op_listener) = 0; + + /** * Returns the id of the sub database this handler is operating over. */ virtual uint32_t getSubDbId() const = 0; diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.cpp b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.cpp index c4dc26a0875..e51352bfbb9 100644 --- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.cpp @@ -23,6 +23,12 @@ LidSpaceCompactionHandler::LidSpaceCompactionHandler(const MaintenanceDocumentSu { } +void +LidSpaceCompactionHandler::set_operation_listener(documentmetastore::OperationListener::SP op_listener) +{ + return _subDb.meta_store()->set_operation_listener(std::move(op_listener)); +} + LidUsageStats LidSpaceCompactionHandler::getLidStatus() const { diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.h b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.h index 21d20001923..a6469acb5e5 100644 --- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.h +++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_handler.h @@ -23,6 +23,7 @@ public: virtual vespalib::string getName() const override { return _docTypeName + "." + _subDb.name(); } + virtual void set_operation_listener(documentmetastore::OperationListener::SP op_listener) override; virtual uint32_t getSubDbId() const override { return _subDb.sub_db_id(); } virtual search::LidUsageStats getLidStatus() const override; virtual IDocumentScanIterator::UP getIterator() const override; diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp index c2d655538f5..62893055a0a 100644 --- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp @@ -90,10 +90,9 @@ LidSpaceCompactionJob::compactLidSpace(const LidUsageStats &stats) } bool -LidSpaceCompactionJob::remove_batch_is_ongoing(const LidUsageStats& stats) const +LidSpaceCompactionJob::remove_batch_is_ongoing() const { - LidUsageStats::TimePoint now = std::chrono::steady_clock::now(); - return (now - stats.get_last_remove_batch()) < std::chrono::duration<double>(_cfg.get_remove_batch_block_delay()); + return _ops_rate_tracker->remove_batch_above_threshold(); } LidSpaceCompactionJob::LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionConfig &config, @@ -114,13 +113,15 @@ LidSpaceCompactionJob::LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionC _retryFrozenDocument(false), _shouldCompactLidSpace(false), _diskMemUsageNotifier(diskMemUsageNotifier), - _clusterStateChangedNotifier(clusterStateChangedNotifier) + _clusterStateChangedNotifier(clusterStateChangedNotifier), + _ops_rate_tracker(std::make_shared<RemoveOperationsRateTracker>(config.get_remove_batch_block_rate())) { _diskMemUsageNotifier.addDiskMemUsageListener(this); _clusterStateChangedNotifier.addClusterStateChangedHandler(this); if (nodeRetired) { setBlocked(BlockedReason::CLUSTER_STATE); } + handler.set_operation_listener(_ops_rate_tracker); } LidSpaceCompactionJob::~LidSpaceCompactionJob() @@ -136,7 +137,7 @@ LidSpaceCompactionJob::run() return true; // indicate work is done since no work can be done } LidUsageStats stats = _handler.getLidStatus(); - if (remove_batch_is_ongoing(stats)) { + if (remove_batch_is_ongoing()) { // Note that we don't set the job as blocked as the decision to un-block it is not driven externally. LOG(info, "run(): Lid space compaction is disabled while remove batch (delete buckets) is ongoing"); return true; diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h index 2f242e5a33a..44c30987a2f 100644 --- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h +++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.h @@ -9,6 +9,7 @@ #include "ibucketstatecalculator.h" #include "iclusterstatechangedhandler.h" #include "iclusterstatechangednotifier.h" +#include "remove_operations_rate_tracker.h" namespace proton { @@ -37,6 +38,7 @@ private: bool _shouldCompactLidSpace; IDiskMemUsageNotifier &_diskMemUsageNotifier; IClusterStateChangedNotifier &_clusterStateChangedNotifier; + std::shared_ptr<RemoveOperationsRateTracker> _ops_rate_tracker; bool hasTooMuchLidBloat(const search::LidUsageStats &stats) const; bool shouldRestartScanDocuments(const search::LidUsageStats &stats) const; @@ -45,7 +47,7 @@ private: void compactLidSpace(const search::LidUsageStats &stats); void refreshRunnable(); void refreshAndConsiderRunnable(); - bool remove_batch_is_ongoing(const search::LidUsageStats& stats) const; + bool remove_batch_is_ongoing() const; public: LidSpaceCompactionJob(const DocumentDBLidSpaceCompactionConfig &config, diff --git a/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp new file mode 100644 index 00000000000..eae59fbe175 --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.cpp @@ -0,0 +1,31 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "remove_operations_rate_tracker.h" +#include <vespa/vespalib/util/time.h> + +namespace proton { + +RemoveOperationsRateTracker::RemoveOperationsRateTracker(double remove_batch_rate_threshold) + : _remove_batch_tracker(remove_batch_rate_threshold) +{ +} + +void +RemoveOperationsRateTracker::notify_remove_batch() +{ + _remove_batch_tracker.observe(vespalib::steady_clock::now()); +} + +bool +RemoveOperationsRateTracker::remove_batch_above_threshold() const +{ + return _remove_batch_tracker.above_threshold(vespalib::steady_clock::now()); +} + +void +RemoveOperationsRateTracker::reset_remove_batch_tracker() +{ + _remove_batch_tracker.reset(vespalib::steady_clock::now()); +} + +} diff --git a/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h new file mode 100644 index 00000000000..a66ed5b9b54 --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/server/remove_operations_rate_tracker.h @@ -0,0 +1,30 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchcore/proton/common/operation_rate_tracker.h> +#include <vespa/searchcore/proton/documentmetastore/operation_listener.h> + +namespace proton { + +/** + * Class that tracks the rate of remove operations handled by the document meta store. + * + * For each operation we can tell if it is above or below a given rate threshold. + */ +class RemoveOperationsRateTracker : public documentmetastore::OperationListener { +private: + OperationRateTracker _remove_batch_tracker; + +public: + RemoveOperationsRateTracker(double remove_batch_rate_threshold); + + void notify_remove_batch() override; + + bool remove_batch_above_threshold() const; + + // Should only be used for testing + void reset_remove_batch_tracker(); +}; + +} diff --git a/searchcore/src/vespa/searchcore/proton/test/document_meta_store_observer.h b/searchcore/src/vespa/searchcore/proton/test/document_meta_store_observer.h index 5cc547b4dcb..be85df0cc87 100644 --- a/searchcore/src/vespa/searchcore/proton/test/document_meta_store_observer.h +++ b/searchcore/src/vespa/searchcore/proton/test/document_meta_store_observer.h @@ -185,6 +185,9 @@ struct DocumentMetaStoreObserver : public IDocumentMetaStore virtual void foreach(const search::IGidToLidMapperVisitor &visitor) const override { _store.foreach(visitor); } + virtual void set_operation_listener(documentmetastore::OperationListener::SP op_listener) override { + _store.set_operation_listener(std::move(op_listener)); + } }; } |