From ce09948788f44ab58610a1c1185544bc045d18d7 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Tue, 12 Oct 2021 13:10:51 +0000 Subject: Make implicit bucket priority DB clearing on scheduling configurable --- .../tests/distributor/distributor_stripe_test.cpp | 21 ++++++++++++ .../tests/distributor/maintenanceschedulertest.cpp | 40 +++++++++++++++++----- .../storage/config/distributorconfiguration.cpp | 2 ++ .../storage/config/distributorconfiguration.h | 4 +++ .../storage/config/stor-distributormanager.def | 6 ++++ .../storage/distributor/distributor_stripe.cpp | 1 + .../maintenance/maintenancescheduler.cpp | 18 +++++++--- .../distributor/maintenance/maintenancescheduler.h | 14 ++++++-- 8 files changed, 90 insertions(+), 16 deletions(-) (limited to 'storage') diff --git a/storage/src/tests/distributor/distributor_stripe_test.cpp b/storage/src/tests/distributor/distributor_stripe_test.cpp index 556f5e20fb2..902dd6454f1 100644 --- a/storage/src/tests/distributor/distributor_stripe_test.cpp +++ b/storage/src/tests/distributor/distributor_stripe_test.cpp @@ -179,6 +179,16 @@ struct DistributorStripeTest : Test, DistributorStripeTestUtil { configure_stripe(builder); } + void configure_implicitly_clear_priority_on_schedule(bool implicitly_clear) { + ConfigBuilder builder; + builder.implicitlyClearBucketPriorityOnSchedule = implicitly_clear; + configure_stripe(builder); + } + + bool scheduler_has_implicitly_clear_priority_on_schedule_set() const noexcept { + return _stripe->_scheduler->implicitly_clear_priority_on_schedule(); + } + void configureMaxClusterClockSkew(int seconds); void configure_mutation_sequencing(bool enabled); void configure_merge_busy_inhibit_duration(int seconds); @@ -888,6 +898,17 @@ TEST_F(DistributorStripeTest, max_activation_inhibited_out_of_sync_groups_config EXPECT_EQ(getConfig().max_activation_inhibited_out_of_sync_groups(), 0); } +TEST_F(DistributorStripeTest, implicitly_clear_priority_on_schedule_config_is_propagated_to_scheduler) +{ + setup_stripe(Redundancy(1), NodeCount(1), "distributor:1 storage:1"); + + configure_implicitly_clear_priority_on_schedule(true); + EXPECT_TRUE(scheduler_has_implicitly_clear_priority_on_schedule_set()); + + configure_implicitly_clear_priority_on_schedule(false); + EXPECT_FALSE(scheduler_has_implicitly_clear_priority_on_schedule_set()); +} + TEST_F(DistributorStripeTest, wanted_split_bit_count_is_lower_bounded) { setup_stripe(Redundancy(1), NodeCount(1), "distributor:1 storage:1"); diff --git a/storage/src/tests/distributor/maintenanceschedulertest.cpp b/storage/src/tests/distributor/maintenanceschedulertest.cpp index 8f13e10fedc..1d85de28aab 100644 --- a/storage/src/tests/distributor/maintenanceschedulertest.cpp +++ b/storage/src/tests/distributor/maintenanceschedulertest.cpp @@ -17,7 +17,7 @@ using document::BucketId; using Priority = MaintenancePriority; using WaitTimeMs = MaintenanceScheduler::WaitTimeMs; -struct MaintenanceSchedulerTest : Test { +struct MaintenanceSchedulerTest : TestWithParam { SimpleBucketPriorityDatabase _priority_db; MockMaintenanceOperationGenerator _operation_generator; MockOperationStarter _operation_starter; @@ -31,22 +31,29 @@ struct MaintenanceSchedulerTest : Test { _pending_window_checker(), _scheduler(_operation_generator, _priority_db, _pending_window_checker, _operation_starter) {} + + void SetUp() override { + _scheduler.set_implicitly_clear_priority_on_schedule(GetParam()); + } }; -TEST_F(MaintenanceSchedulerTest, priority_cleared_after_scheduled) { +TEST_P(MaintenanceSchedulerTest, priority_cleared_after_scheduled) { _priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::HIGHEST)); _scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE); EXPECT_EQ("", _priority_db.toString()); } -TEST_F(MaintenanceSchedulerTest, operation_is_scheduled) { +TEST_P(MaintenanceSchedulerTest, operation_is_scheduled) { _priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::MEDIUM)); _scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE); EXPECT_EQ("Bucket(BucketSpace(0x0000000000000001), BucketId(0x4000000000000001)), pri 100\n", _operation_starter.toString()); } -TEST_F(MaintenanceSchedulerTest, operation_is_not_scheduled_if_pending_ops_not_accepted) { +TEST_P(MaintenanceSchedulerTest, operation_is_not_scheduled_if_pending_ops_not_accepted) { + if (!GetParam()) { + return; // Only works when implicit clearing is enabled + } _priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::MEDIUM)); _pending_window_checker.allow_operations(false); _scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE); @@ -56,13 +63,13 @@ TEST_F(MaintenanceSchedulerTest, operation_is_not_scheduled_if_pending_ops_not_a _priority_db.toString()); } -TEST_F(MaintenanceSchedulerTest, no_operations_to_schedule) { +TEST_P(MaintenanceSchedulerTest, no_operations_to_schedule) { WaitTimeMs waitMs(_scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE)); EXPECT_EQ(WaitTimeMs(1), waitMs); EXPECT_EQ("", _operation_starter.toString()); } -TEST_F(MaintenanceSchedulerTest, suppress_low_priorities_in_emergency_mode) { +TEST_P(MaintenanceSchedulerTest, suppress_low_priorities_in_emergency_mode) { _priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::VERY_HIGH)); _priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 2)), Priority::HIGHEST)); EXPECT_EQ(WaitTimeMs(0), _scheduler.tick(MaintenanceScheduler::RECOVERY_SCHEDULING_MODE)); @@ -73,7 +80,22 @@ TEST_F(MaintenanceSchedulerTest, suppress_low_priorities_in_emergency_mode) { _priority_db.toString()); } -TEST_F(MaintenanceSchedulerTest, priority_cleared_if_operation_not_started_inside_pending_window) { +TEST_P(MaintenanceSchedulerTest, priority_not_cleared_if_operation_not_started) { + if (GetParam()) { + return; // Only works when implicit clearing is NOT enabled + } + _priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::HIGH)); + _operation_starter.setShouldStartOperations(false); + WaitTimeMs waitMs(_scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE)); + EXPECT_EQ(WaitTimeMs(1), waitMs); + EXPECT_EQ("PrioritizedBucket(Bucket(BucketSpace(0x0000000000000001), BucketId(0x4000000000000001)), pri HIGH)\n", + _priority_db.toString()); +} + +TEST_P(MaintenanceSchedulerTest, priority_cleared_if_operation_not_started_inside_pending_window) { + if (!GetParam()) { + return; // Only works when implicit clearing is enabled + } _priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::HIGH)); _operation_starter.setShouldStartOperations(false); WaitTimeMs waitMs(_scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE)); @@ -81,7 +103,7 @@ TEST_F(MaintenanceSchedulerTest, priority_cleared_if_operation_not_started_insid EXPECT_EQ("", _priority_db.toString()); } -TEST_F(MaintenanceSchedulerTest, priority_not_cleared_if_operation_not_started_outside_pending_window) { +TEST_P(MaintenanceSchedulerTest, priority_not_cleared_if_operation_not_started_outside_pending_window) { _priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::HIGH)); _operation_starter.setShouldStartOperations(false); _pending_window_checker.allow_operations(false); @@ -91,4 +113,6 @@ TEST_F(MaintenanceSchedulerTest, priority_not_cleared_if_operation_not_started_o _priority_db.toString()); } +VESPA_GTEST_INSTANTIATE_TEST_SUITE_P(ImplicitClearOfDbPri, MaintenanceSchedulerTest, ::testing::Values(false, true)); + } diff --git a/storage/src/vespa/storage/config/distributorconfiguration.cpp b/storage/src/vespa/storage/config/distributorconfiguration.cpp index 6de3f5a4698..a23d00ee6a3 100644 --- a/storage/src/vespa/storage/config/distributorconfiguration.cpp +++ b/storage/src/vespa/storage/config/distributorconfiguration.cpp @@ -49,6 +49,7 @@ DistributorConfiguration::DistributorConfiguration(StorageComponent& component) _enable_metadata_only_fetch_phase_for_inconsistent_updates(false), _prioritize_global_bucket_merges(true), _enable_revert(true), + _implicitly_clear_priority_on_schedule(false), _minimumReplicaCountingMode(ReplicaCountingMode::TRUSTED) { } @@ -169,6 +170,7 @@ DistributorConfiguration::configure(const vespa::config::content::core::StorDist _prioritize_global_bucket_merges = config.prioritizeGlobalBucketMerges; _max_activation_inhibited_out_of_sync_groups = config.maxActivationInhibitedOutOfSyncGroups; _enable_revert = config.enableRevert; + _implicitly_clear_priority_on_schedule = config.implicitlyClearBucketPriorityOnSchedule; _minimumReplicaCountingMode = config.minimumReplicaCountingMode; diff --git a/storage/src/vespa/storage/config/distributorconfiguration.h b/storage/src/vespa/storage/config/distributorconfiguration.h index e50b2cf7771..7b4e082d1ed 100644 --- a/storage/src/vespa/storage/config/distributorconfiguration.h +++ b/storage/src/vespa/storage/config/distributorconfiguration.h @@ -264,6 +264,9 @@ public: bool enable_revert() const noexcept { return _enable_revert; } + [[nodiscard]] bool implicitly_clear_priority_on_schedule() const noexcept { + return _implicitly_clear_priority_on_schedule; + } uint32_t num_distributor_stripes() const noexcept { return _num_distributor_stripes; } @@ -320,6 +323,7 @@ private: bool _enable_metadata_only_fetch_phase_for_inconsistent_updates; bool _prioritize_global_bucket_merges; bool _enable_revert; + bool _implicitly_clear_priority_on_schedule; DistrConfig::MinimumReplicaCountingMode _minimumReplicaCountingMode; diff --git a/storage/src/vespa/storage/config/stor-distributormanager.def b/storage/src/vespa/storage/config/stor-distributormanager.def index 3b324d6ddd2..8a9fdf74802 100644 --- a/storage/src/vespa/storage/config/stor-distributormanager.def +++ b/storage/src/vespa/storage/config/stor-distributormanager.def @@ -280,3 +280,9 @@ max_activation_inhibited_out_of_sync_groups int default=0 ## CPU cores available. If > 0, the number of stripes is explicitly overridden. ## Stripe counts must be a power of two. num_distributor_stripes int default=0 restart + +## If set, the maintenance scheduler will implicitly clear entries from its internal +## bucket maintenance priority database even when no operation can be started for the +## bucket due to being blocked by concurrent operations. This avoids potential head-of-line +## blocking of later buckets in the priority database. +implicitly_clear_bucket_priority_on_schedule bool default=false diff --git a/storage/src/vespa/storage/distributor/distributor_stripe.cpp b/storage/src/vespa/storage/distributor/distributor_stripe.cpp index e45ef13f4ef..18a19288ad3 100644 --- a/storage/src/vespa/storage/distributor/distributor_stripe.cpp +++ b/storage/src/vespa/storage/distributor/distributor_stripe.cpp @@ -760,6 +760,7 @@ DistributorStripe::propagate_config_snapshot_to_internal_components() getConfig().allowStaleReadsDuringClusterStateTransitions()); _externalOperationHandler.set_use_weak_internal_read_consistency_for_gets( getConfig().use_weak_internal_read_consistency_for_client_gets()); + _scheduler->set_implicitly_clear_priority_on_schedule(getConfig().implicitly_clear_priority_on_schedule()); } void diff --git a/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.cpp b/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.cpp index d910c80d360..bbaaf6d5ae7 100644 --- a/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.cpp +++ b/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.cpp @@ -19,7 +19,8 @@ MaintenanceScheduler::MaintenanceScheduler( : _operationGenerator(operationGenerator), _priorityDb(priorityDb), _pending_window_checker(pending_window_checker), - _operationStarter(operationStarter) + _operationStarter(operationStarter), + _implicitly_clear_priority_on_schedule(false) { } @@ -41,12 +42,17 @@ MaintenanceScheduler::tick(SchedulingMode currentMode) if (!possibleToSchedule(mostImportant, currentMode)) { return WaitTimeMs(1); } - // If we can't start the operation, move on to the next bucket. Bucket will be - // re-prioritized when the distributor stripe next scans it. - clearPriority(mostImportant); + if (_implicitly_clear_priority_on_schedule) { + // If we can't start the operation, move on to the next bucket. Bucket will be + // re-prioritized when the distributor stripe next scans it. + clearPriority(mostImportant); + } if (!startOperation(mostImportant)) { return WaitTimeMs(1); } + if (!_implicitly_clear_priority_on_schedule) { + clearPriority(mostImportant); + } return WaitTimeMs(0); } @@ -58,7 +64,9 @@ MaintenanceScheduler::possibleToSchedule(const PrioritizedBucket& bucket, return false; } // If pending window is full nothing of equal or lower priority can be scheduled, so no point in trying. - if (!_pending_window_checker.may_allow_operation_with_priority(convertToOperationPriority(bucket.getPriority()))) { + if (_implicitly_clear_priority_on_schedule && + !_pending_window_checker.may_allow_operation_with_priority(convertToOperationPriority(bucket.getPriority()))) + { return false; } if (currentMode == RECOVERY_SCHEDULING_MODE) { diff --git a/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.h b/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.h index 982a90d47d8..7741f8b6d17 100644 --- a/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.h +++ b/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.h @@ -28,6 +28,13 @@ public: WaitTimeMs tick(SchedulingMode currentMode); + void set_implicitly_clear_priority_on_schedule(bool implicitly_clear) noexcept { + _implicitly_clear_priority_on_schedule = implicitly_clear; + } + [[nodiscard]] bool implicitly_clear_priority_on_schedule() const noexcept { + return _implicitly_clear_priority_on_schedule; + } + private: MaintenanceScheduler(const MaintenanceScheduler&); MaintenanceScheduler& operator=(const MaintenanceScheduler&); @@ -41,9 +48,10 @@ private: MaintenancePriority::Priority priority) const; MaintenanceOperationGenerator& _operationGenerator; - BucketPriorityDatabase& _priorityDb; - const PendingWindowChecker& _pending_window_checker; - OperationStarter& _operationStarter; + BucketPriorityDatabase& _priorityDb; + const PendingWindowChecker& _pending_window_checker; + OperationStarter& _operationStarter; + bool _implicitly_clear_priority_on_schedule; }; } -- cgit v1.2.3