summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-04-17 14:58:36 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-04-17 15:09:11 +0000
commit5267eb646f5482464250d0d1d29855af65b0667e (patch)
tree90a9735a5c2c00a00c89f8ba07f2a261256770a7 /storage
parent0372397c1202363074a0f82d805adf5be1fa5b0f (diff)
Allow temporarily inhibiting maintenance ops when under load
If requests or responses from external sources are being constantly processed as part of the distributor tick, allow for up to N ticks to skip maintenance scanning, where N is a configurable number. This reduces the amount of CPU time spent on maintenance operations when the node has a lot of incoming data to deal with.
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/distributor/distributortest.cpp8
-rw-r--r--storage/src/vespa/storage/config/distributorconfiguration.cpp5
-rw-r--r--storage/src/vespa/storage/config/distributorconfiguration.h5
-rw-r--r--storage/src/vespa/storage/config/stor-distributormanager.def9
-rw-r--r--storage/src/vespa/storage/distributor/distributor.cpp29
-rw-r--r--storage/src/vespa/storage/distributor/distributor.h6
6 files changed, 56 insertions, 6 deletions
diff --git a/storage/src/tests/distributor/distributortest.cpp b/storage/src/tests/distributor/distributortest.cpp
index 421fd2cb4b6..b110e99f8a4 100644
--- a/storage/src/tests/distributor/distributortest.cpp
+++ b/storage/src/tests/distributor/distributortest.cpp
@@ -700,6 +700,14 @@ TEST_F(DistributorTest, replica_counting_mode_config_is_propagated_to_metric_upd
EXPECT_EQ(ConfigBuilder::MinimumReplicaCountingMode::ANY, currentReplicaCountingMode());
}
+TEST_F(DistributorTest, max_consecutively_inhibited_maintenance_ticks_config_is_propagated_to_internal_config) {
+ setupDistributor(Redundancy(2), NodeCount(2), "storage:2 distributor:1");
+ ConfigBuilder builder;
+ builder.maxConsecutivelyInhibitedMaintenanceTicks = 123;
+ getConfig().configure(builder);
+ EXPECT_EQ(getConfig().max_consecutively_inhibited_maintenance_ticks(), 123);
+}
+
TEST_F(DistributorTest, bucket_activation_is_enabled_by_default) {
setupDistributor(Redundancy(2), NodeCount(2), "storage:2 distributor:1");
EXPECT_FALSE(getConfig().isBucketActivationDisabled());
diff --git a/storage/src/vespa/storage/config/distributorconfiguration.cpp b/storage/src/vespa/storage/config/distributorconfiguration.cpp
index 9f51d70ce60..0c9988421a3 100644
--- a/storage/src/vespa/storage/config/distributorconfiguration.cpp
+++ b/storage/src/vespa/storage/config/distributorconfiguration.cpp
@@ -22,6 +22,7 @@ DistributorConfiguration::DistributorConfiguration(StorageComponent& component)
_maxIdealStateOperations(100),
_idealStateChunkSize(1000),
_maxNodesPerMerge(16),
+ _max_consecutively_inhibited_maintenance_ticks(20),
_lastGarbageCollectionChange(vespalib::duration::zero()),
_garbageCollectionInterval(0),
_minPendingMaintenanceOps(100),
@@ -44,7 +45,8 @@ DistributorConfiguration::DistributorConfiguration(StorageComponent& component)
_use_weak_internal_read_consistency_for_client_gets(false),
_enable_metadata_only_fetch_phase_for_inconsistent_updates(false),
_minimumReplicaCountingMode(ReplicaCountingMode::TRUSTED)
-{ }
+{
+}
DistributorConfiguration::~DistributorConfiguration() = default;
@@ -123,6 +125,7 @@ DistributorConfiguration::configure(const vespa::config::content::core::StorDist
_docCountJoinLimit = config.joincount;
_minimalBucketSplit = config.minsplitcount;
_maxNodesPerMerge = config.maximumNodesPerMerge;
+ _max_consecutively_inhibited_maintenance_ticks = config.maxConsecutivelyInhibitedMaintenanceTicks;
_garbageCollectionInterval = std::chrono::seconds(config.garbagecollection.interval);
diff --git a/storage/src/vespa/storage/config/distributorconfiguration.h b/storage/src/vespa/storage/config/distributorconfiguration.h
index b8e99165d69..0c4b1f5756c 100644
--- a/storage/src/vespa/storage/config/distributorconfiguration.h
+++ b/storage/src/vespa/storage/config/distributorconfiguration.h
@@ -242,6 +242,10 @@ public:
return _enable_metadata_only_fetch_phase_for_inconsistent_updates;
}
+ uint32_t max_consecutively_inhibited_maintenance_ticks() const noexcept {
+ return _max_consecutively_inhibited_maintenance_ticks;
+ }
+
bool containsTimeStatement(const std::string& documentSelection) const;
private:
@@ -258,6 +262,7 @@ private:
uint32_t _maxIdealStateOperations;
uint32_t _idealStateChunkSize;
uint32_t _maxNodesPerMerge;
+ uint32_t _max_consecutively_inhibited_maintenance_ticks;
std::string _garbageCollectionSelection;
diff --git a/storage/src/vespa/storage/config/stor-distributormanager.def b/storage/src/vespa/storage/config/stor-distributormanager.def
index 915b9b6b304..71059d717a6 100644
--- a/storage/src/vespa/storage/config/stor-distributormanager.def
+++ b/storage/src/vespa/storage/config/stor-distributormanager.def
@@ -236,4 +236,11 @@ use_weak_internal_read_consistency_for_client_gets bool default=false
## Setting this option to true always implicitly enables the fast update restart
## feature, so it's not required to set that config to true, nor will setting it
## to false actually disable the feature.
-enable_metadata_only_fetch_phase_for_inconsistent_updates bool default=false \ No newline at end of file
+enable_metadata_only_fetch_phase_for_inconsistent_updates bool default=false
+
+## If a distributor main thread tick is constantly processing requests or responses
+## originating from other nodes, setting this value above zero will prevent implicit
+## maintenance scans from being done as part of the tick for up to N rounds of ticking.
+## This is to reduce the amount of CPU spent on ideal state calculations and bucket DB
+## accesses when the distributor is heavily loaded with feed operations.
+max_consecutively_inhibited_maintenance_ticks int default=20 \ No newline at end of file
diff --git a/storage/src/vespa/storage/distributor/distributor.cpp b/storage/src/vespa/storage/distributor/distributor.cpp
index cce3d2d1acb..5dade47c2a6 100644
--- a/storage/src/vespa/storage/distributor/distributor.cpp
+++ b/storage/src/vespa/storage/distributor/distributor.cpp
@@ -105,6 +105,7 @@ Distributor::Distributor(DistributorComponentRegister& compReg,
_ownershipSafeTimeCalc(std::make_unique<OwnershipTransferSafeTimePointCalculator>(0s)), // Set by config later
_db_memory_sample_interval(30s),
_last_db_memory_sample_time_point(),
+ _inhibited_maintenance_tick_count(0),
_must_send_updated_host_info(false),
_use_btree_database(use_btree_database)
{
@@ -622,7 +623,7 @@ Distributor::signalWorkWasDone()
}
bool
-Distributor::workWasDone()
+Distributor::workWasDone() const noexcept
{
return !_tickResult.waitWanted();
}
@@ -848,17 +849,39 @@ Distributor::doNonCriticalTick(framework::ThreadIndex)
_tickResult = framework::ThreadWaitInfo::NO_MORE_CRITICAL_WORK_KNOWN;
handleStatusRequests();
startExternalOperations();
- if (!initializing()) {
+ if (initializing()) {
+ _bucketDBUpdater.resendDelayedMessages();
+ return _tickResult;
+ }
+ // Ordering note: since maintenance inhibiting checks whether startExternalOperations()
+ // did any useful work with incoming data, this check must be performed _after_ the call.
+ if (!should_inhibit_current_maintenance_scan_tick()) {
scanNextBucket();
startNextMaintenanceOperation();
if (isInRecoveryMode()) {
signalWorkWasDone();
}
+ mark_maintenance_tick_as_no_longer_inhibited();
+ _bucketDBUpdater.resendDelayedMessages();
+ } else {
+ mark_current_maintenance_tick_as_inhibited();
}
- _bucketDBUpdater.resendDelayedMessages();
return _tickResult;
}
+bool Distributor::should_inhibit_current_maintenance_scan_tick() const noexcept {
+ return (workWasDone() && (_inhibited_maintenance_tick_count
+ < getConfig().max_consecutively_inhibited_maintenance_ticks()));
+}
+
+void Distributor::mark_current_maintenance_tick_as_inhibited() noexcept {
+ ++_inhibited_maintenance_tick_count;
+}
+
+void Distributor::mark_maintenance_tick_as_no_longer_inhibited() noexcept {
+ _inhibited_maintenance_tick_count = 0;
+}
+
void
Distributor::enableNextConfig()
{
diff --git a/storage/src/vespa/storage/distributor/distributor.h b/storage/src/vespa/storage/distributor/distributor.h
index bf780434edf..36d399b0a9f 100644
--- a/storage/src/vespa/storage/distributor/distributor.h
+++ b/storage/src/vespa/storage/distributor/distributor.h
@@ -233,12 +233,15 @@ private:
void maybe_update_bucket_db_memory_usage_stats();
void scanAllBuckets();
MaintenanceScanner::ScanResult scanNextBucket();
+ bool should_inhibit_current_maintenance_scan_tick() const noexcept;
+ void mark_current_maintenance_tick_as_inhibited() noexcept;
+ void mark_maintenance_tick_as_no_longer_inhibited() noexcept;
void enableNextConfig();
void fetchStatusRequests();
void fetchExternalMessages();
void startNextMaintenanceOperation();
void signalWorkWasDone();
- bool workWasDone();
+ bool workWasDone() const noexcept;
void enterRecoveryMode();
void leaveRecoveryMode();
@@ -336,6 +339,7 @@ private:
std::unique_ptr<OwnershipTransferSafeTimePointCalculator> _ownershipSafeTimeCalc;
std::chrono::steady_clock::duration _db_memory_sample_interval;
std::chrono::steady_clock::time_point _last_db_memory_sample_time_point;
+ size_t _inhibited_maintenance_tick_count;
bool _must_send_updated_host_info;
const bool _use_btree_database;
};