diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2020-04-17 14:58:36 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2020-04-17 15:09:11 +0000 |
commit | 5267eb646f5482464250d0d1d29855af65b0667e (patch) | |
tree | 90a9735a5c2c00a00c89f8ba07f2a261256770a7 /storage | |
parent | 0372397c1202363074a0f82d805adf5be1fa5b0f (diff) |
Allow temporarily inhibiting maintenance ops when under load
If requests or responses from external sources are being constantly
processed as part of the distributor tick, allow for up to N ticks
to skip maintenance scanning, where N is a configurable number.
This reduces the amount of CPU time spent on maintenance operations
when the node has a lot of incoming data to deal with.
Diffstat (limited to 'storage')
6 files changed, 56 insertions, 6 deletions
diff --git a/storage/src/tests/distributor/distributortest.cpp b/storage/src/tests/distributor/distributortest.cpp index 421fd2cb4b6..b110e99f8a4 100644 --- a/storage/src/tests/distributor/distributortest.cpp +++ b/storage/src/tests/distributor/distributortest.cpp @@ -700,6 +700,14 @@ TEST_F(DistributorTest, replica_counting_mode_config_is_propagated_to_metric_upd EXPECT_EQ(ConfigBuilder::MinimumReplicaCountingMode::ANY, currentReplicaCountingMode()); } +TEST_F(DistributorTest, max_consecutively_inhibited_maintenance_ticks_config_is_propagated_to_internal_config) { + setupDistributor(Redundancy(2), NodeCount(2), "storage:2 distributor:1"); + ConfigBuilder builder; + builder.maxConsecutivelyInhibitedMaintenanceTicks = 123; + getConfig().configure(builder); + EXPECT_EQ(getConfig().max_consecutively_inhibited_maintenance_ticks(), 123); +} + TEST_F(DistributorTest, bucket_activation_is_enabled_by_default) { setupDistributor(Redundancy(2), NodeCount(2), "storage:2 distributor:1"); EXPECT_FALSE(getConfig().isBucketActivationDisabled()); diff --git a/storage/src/vespa/storage/config/distributorconfiguration.cpp b/storage/src/vespa/storage/config/distributorconfiguration.cpp index 9f51d70ce60..0c9988421a3 100644 --- a/storage/src/vespa/storage/config/distributorconfiguration.cpp +++ b/storage/src/vespa/storage/config/distributorconfiguration.cpp @@ -22,6 +22,7 @@ DistributorConfiguration::DistributorConfiguration(StorageComponent& component) _maxIdealStateOperations(100), _idealStateChunkSize(1000), _maxNodesPerMerge(16), + _max_consecutively_inhibited_maintenance_ticks(20), _lastGarbageCollectionChange(vespalib::duration::zero()), _garbageCollectionInterval(0), _minPendingMaintenanceOps(100), @@ -44,7 +45,8 @@ DistributorConfiguration::DistributorConfiguration(StorageComponent& component) _use_weak_internal_read_consistency_for_client_gets(false), _enable_metadata_only_fetch_phase_for_inconsistent_updates(false), _minimumReplicaCountingMode(ReplicaCountingMode::TRUSTED) -{ } +{ +} DistributorConfiguration::~DistributorConfiguration() = default; @@ -123,6 +125,7 @@ DistributorConfiguration::configure(const vespa::config::content::core::StorDist _docCountJoinLimit = config.joincount; _minimalBucketSplit = config.minsplitcount; _maxNodesPerMerge = config.maximumNodesPerMerge; + _max_consecutively_inhibited_maintenance_ticks = config.maxConsecutivelyInhibitedMaintenanceTicks; _garbageCollectionInterval = std::chrono::seconds(config.garbagecollection.interval); diff --git a/storage/src/vespa/storage/config/distributorconfiguration.h b/storage/src/vespa/storage/config/distributorconfiguration.h index b8e99165d69..0c4b1f5756c 100644 --- a/storage/src/vespa/storage/config/distributorconfiguration.h +++ b/storage/src/vespa/storage/config/distributorconfiguration.h @@ -242,6 +242,10 @@ public: return _enable_metadata_only_fetch_phase_for_inconsistent_updates; } + uint32_t max_consecutively_inhibited_maintenance_ticks() const noexcept { + return _max_consecutively_inhibited_maintenance_ticks; + } + bool containsTimeStatement(const std::string& documentSelection) const; private: @@ -258,6 +262,7 @@ private: uint32_t _maxIdealStateOperations; uint32_t _idealStateChunkSize; uint32_t _maxNodesPerMerge; + uint32_t _max_consecutively_inhibited_maintenance_ticks; std::string _garbageCollectionSelection; diff --git a/storage/src/vespa/storage/config/stor-distributormanager.def b/storage/src/vespa/storage/config/stor-distributormanager.def index 915b9b6b304..71059d717a6 100644 --- a/storage/src/vespa/storage/config/stor-distributormanager.def +++ b/storage/src/vespa/storage/config/stor-distributormanager.def @@ -236,4 +236,11 @@ use_weak_internal_read_consistency_for_client_gets bool default=false ## Setting this option to true always implicitly enables the fast update restart ## feature, so it's not required to set that config to true, nor will setting it ## to false actually disable the feature. -enable_metadata_only_fetch_phase_for_inconsistent_updates bool default=false
\ No newline at end of file +enable_metadata_only_fetch_phase_for_inconsistent_updates bool default=false + +## If a distributor main thread tick is constantly processing requests or responses +## originating from other nodes, setting this value above zero will prevent implicit +## maintenance scans from being done as part of the tick for up to N rounds of ticking. +## This is to reduce the amount of CPU spent on ideal state calculations and bucket DB +## accesses when the distributor is heavily loaded with feed operations. +max_consecutively_inhibited_maintenance_ticks int default=20
\ No newline at end of file diff --git a/storage/src/vespa/storage/distributor/distributor.cpp b/storage/src/vespa/storage/distributor/distributor.cpp index cce3d2d1acb..5dade47c2a6 100644 --- a/storage/src/vespa/storage/distributor/distributor.cpp +++ b/storage/src/vespa/storage/distributor/distributor.cpp @@ -105,6 +105,7 @@ Distributor::Distributor(DistributorComponentRegister& compReg, _ownershipSafeTimeCalc(std::make_unique<OwnershipTransferSafeTimePointCalculator>(0s)), // Set by config later _db_memory_sample_interval(30s), _last_db_memory_sample_time_point(), + _inhibited_maintenance_tick_count(0), _must_send_updated_host_info(false), _use_btree_database(use_btree_database) { @@ -622,7 +623,7 @@ Distributor::signalWorkWasDone() } bool -Distributor::workWasDone() +Distributor::workWasDone() const noexcept { return !_tickResult.waitWanted(); } @@ -848,17 +849,39 @@ Distributor::doNonCriticalTick(framework::ThreadIndex) _tickResult = framework::ThreadWaitInfo::NO_MORE_CRITICAL_WORK_KNOWN; handleStatusRequests(); startExternalOperations(); - if (!initializing()) { + if (initializing()) { + _bucketDBUpdater.resendDelayedMessages(); + return _tickResult; + } + // Ordering note: since maintenance inhibiting checks whether startExternalOperations() + // did any useful work with incoming data, this check must be performed _after_ the call. + if (!should_inhibit_current_maintenance_scan_tick()) { scanNextBucket(); startNextMaintenanceOperation(); if (isInRecoveryMode()) { signalWorkWasDone(); } + mark_maintenance_tick_as_no_longer_inhibited(); + _bucketDBUpdater.resendDelayedMessages(); + } else { + mark_current_maintenance_tick_as_inhibited(); } - _bucketDBUpdater.resendDelayedMessages(); return _tickResult; } +bool Distributor::should_inhibit_current_maintenance_scan_tick() const noexcept { + return (workWasDone() && (_inhibited_maintenance_tick_count + < getConfig().max_consecutively_inhibited_maintenance_ticks())); +} + +void Distributor::mark_current_maintenance_tick_as_inhibited() noexcept { + ++_inhibited_maintenance_tick_count; +} + +void Distributor::mark_maintenance_tick_as_no_longer_inhibited() noexcept { + _inhibited_maintenance_tick_count = 0; +} + void Distributor::enableNextConfig() { diff --git a/storage/src/vespa/storage/distributor/distributor.h b/storage/src/vespa/storage/distributor/distributor.h index bf780434edf..36d399b0a9f 100644 --- a/storage/src/vespa/storage/distributor/distributor.h +++ b/storage/src/vespa/storage/distributor/distributor.h @@ -233,12 +233,15 @@ private: void maybe_update_bucket_db_memory_usage_stats(); void scanAllBuckets(); MaintenanceScanner::ScanResult scanNextBucket(); + bool should_inhibit_current_maintenance_scan_tick() const noexcept; + void mark_current_maintenance_tick_as_inhibited() noexcept; + void mark_maintenance_tick_as_no_longer_inhibited() noexcept; void enableNextConfig(); void fetchStatusRequests(); void fetchExternalMessages(); void startNextMaintenanceOperation(); void signalWorkWasDone(); - bool workWasDone(); + bool workWasDone() const noexcept; void enterRecoveryMode(); void leaveRecoveryMode(); @@ -336,6 +339,7 @@ private: std::unique_ptr<OwnershipTransferSafeTimePointCalculator> _ownershipSafeTimeCalc; std::chrono::steady_clock::duration _db_memory_sample_interval; std::chrono::steady_clock::time_point _last_db_memory_sample_time_point; + size_t _inhibited_maintenance_tick_count; bool _must_send_updated_host_info; const bool _use_btree_database; }; |