summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2021-10-12 13:10:51 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2021-10-14 10:27:34 +0000
commitce09948788f44ab58610a1c1185544bc045d18d7 (patch)
treeeb1a80d87b454d5b311b5f469c5b32593ceec6e6 /storage
parent0e59cdfc303a4a2273f0e606e04060fabfe7dd5e (diff)
Make implicit bucket priority DB clearing on scheduling configurable
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/distributor/distributor_stripe_test.cpp21
-rw-r--r--storage/src/tests/distributor/maintenanceschedulertest.cpp40
-rw-r--r--storage/src/vespa/storage/config/distributorconfiguration.cpp2
-rw-r--r--storage/src/vespa/storage/config/distributorconfiguration.h4
-rw-r--r--storage/src/vespa/storage/config/stor-distributormanager.def6
-rw-r--r--storage/src/vespa/storage/distributor/distributor_stripe.cpp1
-rw-r--r--storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.cpp18
-rw-r--r--storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.h14
8 files changed, 90 insertions, 16 deletions
diff --git a/storage/src/tests/distributor/distributor_stripe_test.cpp b/storage/src/tests/distributor/distributor_stripe_test.cpp
index 556f5e20fb2..902dd6454f1 100644
--- a/storage/src/tests/distributor/distributor_stripe_test.cpp
+++ b/storage/src/tests/distributor/distributor_stripe_test.cpp
@@ -179,6 +179,16 @@ struct DistributorStripeTest : Test, DistributorStripeTestUtil {
configure_stripe(builder);
}
+ void configure_implicitly_clear_priority_on_schedule(bool implicitly_clear) {
+ ConfigBuilder builder;
+ builder.implicitlyClearBucketPriorityOnSchedule = implicitly_clear;
+ configure_stripe(builder);
+ }
+
+ bool scheduler_has_implicitly_clear_priority_on_schedule_set() const noexcept {
+ return _stripe->_scheduler->implicitly_clear_priority_on_schedule();
+ }
+
void configureMaxClusterClockSkew(int seconds);
void configure_mutation_sequencing(bool enabled);
void configure_merge_busy_inhibit_duration(int seconds);
@@ -888,6 +898,17 @@ TEST_F(DistributorStripeTest, max_activation_inhibited_out_of_sync_groups_config
EXPECT_EQ(getConfig().max_activation_inhibited_out_of_sync_groups(), 0);
}
+TEST_F(DistributorStripeTest, implicitly_clear_priority_on_schedule_config_is_propagated_to_scheduler)
+{
+ setup_stripe(Redundancy(1), NodeCount(1), "distributor:1 storage:1");
+
+ configure_implicitly_clear_priority_on_schedule(true);
+ EXPECT_TRUE(scheduler_has_implicitly_clear_priority_on_schedule_set());
+
+ configure_implicitly_clear_priority_on_schedule(false);
+ EXPECT_FALSE(scheduler_has_implicitly_clear_priority_on_schedule_set());
+}
+
TEST_F(DistributorStripeTest, wanted_split_bit_count_is_lower_bounded)
{
setup_stripe(Redundancy(1), NodeCount(1), "distributor:1 storage:1");
diff --git a/storage/src/tests/distributor/maintenanceschedulertest.cpp b/storage/src/tests/distributor/maintenanceschedulertest.cpp
index 8f13e10fedc..1d85de28aab 100644
--- a/storage/src/tests/distributor/maintenanceschedulertest.cpp
+++ b/storage/src/tests/distributor/maintenanceschedulertest.cpp
@@ -17,7 +17,7 @@ using document::BucketId;
using Priority = MaintenancePriority;
using WaitTimeMs = MaintenanceScheduler::WaitTimeMs;
-struct MaintenanceSchedulerTest : Test {
+struct MaintenanceSchedulerTest : TestWithParam<bool> {
SimpleBucketPriorityDatabase _priority_db;
MockMaintenanceOperationGenerator _operation_generator;
MockOperationStarter _operation_starter;
@@ -31,22 +31,29 @@ struct MaintenanceSchedulerTest : Test {
_pending_window_checker(),
_scheduler(_operation_generator, _priority_db, _pending_window_checker, _operation_starter)
{}
+
+ void SetUp() override {
+ _scheduler.set_implicitly_clear_priority_on_schedule(GetParam());
+ }
};
-TEST_F(MaintenanceSchedulerTest, priority_cleared_after_scheduled) {
+TEST_P(MaintenanceSchedulerTest, priority_cleared_after_scheduled) {
_priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::HIGHEST));
_scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE);
EXPECT_EQ("", _priority_db.toString());
}
-TEST_F(MaintenanceSchedulerTest, operation_is_scheduled) {
+TEST_P(MaintenanceSchedulerTest, operation_is_scheduled) {
_priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::MEDIUM));
_scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE);
EXPECT_EQ("Bucket(BucketSpace(0x0000000000000001), BucketId(0x4000000000000001)), pri 100\n",
_operation_starter.toString());
}
-TEST_F(MaintenanceSchedulerTest, operation_is_not_scheduled_if_pending_ops_not_accepted) {
+TEST_P(MaintenanceSchedulerTest, operation_is_not_scheduled_if_pending_ops_not_accepted) {
+ if (!GetParam()) {
+ return; // Only works when implicit clearing is enabled
+ }
_priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::MEDIUM));
_pending_window_checker.allow_operations(false);
_scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE);
@@ -56,13 +63,13 @@ TEST_F(MaintenanceSchedulerTest, operation_is_not_scheduled_if_pending_ops_not_a
_priority_db.toString());
}
-TEST_F(MaintenanceSchedulerTest, no_operations_to_schedule) {
+TEST_P(MaintenanceSchedulerTest, no_operations_to_schedule) {
WaitTimeMs waitMs(_scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE));
EXPECT_EQ(WaitTimeMs(1), waitMs);
EXPECT_EQ("", _operation_starter.toString());
}
-TEST_F(MaintenanceSchedulerTest, suppress_low_priorities_in_emergency_mode) {
+TEST_P(MaintenanceSchedulerTest, suppress_low_priorities_in_emergency_mode) {
_priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::VERY_HIGH));
_priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 2)), Priority::HIGHEST));
EXPECT_EQ(WaitTimeMs(0), _scheduler.tick(MaintenanceScheduler::RECOVERY_SCHEDULING_MODE));
@@ -73,7 +80,22 @@ TEST_F(MaintenanceSchedulerTest, suppress_low_priorities_in_emergency_mode) {
_priority_db.toString());
}
-TEST_F(MaintenanceSchedulerTest, priority_cleared_if_operation_not_started_inside_pending_window) {
+TEST_P(MaintenanceSchedulerTest, priority_not_cleared_if_operation_not_started) {
+ if (GetParam()) {
+ return; // Only works when implicit clearing is NOT enabled
+ }
+ _priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::HIGH));
+ _operation_starter.setShouldStartOperations(false);
+ WaitTimeMs waitMs(_scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE));
+ EXPECT_EQ(WaitTimeMs(1), waitMs);
+ EXPECT_EQ("PrioritizedBucket(Bucket(BucketSpace(0x0000000000000001), BucketId(0x4000000000000001)), pri HIGH)\n",
+ _priority_db.toString());
+}
+
+TEST_P(MaintenanceSchedulerTest, priority_cleared_if_operation_not_started_inside_pending_window) {
+ if (!GetParam()) {
+ return; // Only works when implicit clearing is enabled
+ }
_priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::HIGH));
_operation_starter.setShouldStartOperations(false);
WaitTimeMs waitMs(_scheduler.tick(MaintenanceScheduler::NORMAL_SCHEDULING_MODE));
@@ -81,7 +103,7 @@ TEST_F(MaintenanceSchedulerTest, priority_cleared_if_operation_not_started_insid
EXPECT_EQ("", _priority_db.toString());
}
-TEST_F(MaintenanceSchedulerTest, priority_not_cleared_if_operation_not_started_outside_pending_window) {
+TEST_P(MaintenanceSchedulerTest, priority_not_cleared_if_operation_not_started_outside_pending_window) {
_priority_db.setPriority(PrioritizedBucket(makeDocumentBucket(BucketId(16, 1)), Priority::HIGH));
_operation_starter.setShouldStartOperations(false);
_pending_window_checker.allow_operations(false);
@@ -91,4 +113,6 @@ TEST_F(MaintenanceSchedulerTest, priority_not_cleared_if_operation_not_started_o
_priority_db.toString());
}
+VESPA_GTEST_INSTANTIATE_TEST_SUITE_P(ImplicitClearOfDbPri, MaintenanceSchedulerTest, ::testing::Values(false, true));
+
}
diff --git a/storage/src/vespa/storage/config/distributorconfiguration.cpp b/storage/src/vespa/storage/config/distributorconfiguration.cpp
index 6de3f5a4698..a23d00ee6a3 100644
--- a/storage/src/vespa/storage/config/distributorconfiguration.cpp
+++ b/storage/src/vespa/storage/config/distributorconfiguration.cpp
@@ -49,6 +49,7 @@ DistributorConfiguration::DistributorConfiguration(StorageComponent& component)
_enable_metadata_only_fetch_phase_for_inconsistent_updates(false),
_prioritize_global_bucket_merges(true),
_enable_revert(true),
+ _implicitly_clear_priority_on_schedule(false),
_minimumReplicaCountingMode(ReplicaCountingMode::TRUSTED)
{
}
@@ -169,6 +170,7 @@ DistributorConfiguration::configure(const vespa::config::content::core::StorDist
_prioritize_global_bucket_merges = config.prioritizeGlobalBucketMerges;
_max_activation_inhibited_out_of_sync_groups = config.maxActivationInhibitedOutOfSyncGroups;
_enable_revert = config.enableRevert;
+ _implicitly_clear_priority_on_schedule = config.implicitlyClearBucketPriorityOnSchedule;
_minimumReplicaCountingMode = config.minimumReplicaCountingMode;
diff --git a/storage/src/vespa/storage/config/distributorconfiguration.h b/storage/src/vespa/storage/config/distributorconfiguration.h
index e50b2cf7771..7b4e082d1ed 100644
--- a/storage/src/vespa/storage/config/distributorconfiguration.h
+++ b/storage/src/vespa/storage/config/distributorconfiguration.h
@@ -264,6 +264,9 @@ public:
bool enable_revert() const noexcept {
return _enable_revert;
}
+ [[nodiscard]] bool implicitly_clear_priority_on_schedule() const noexcept {
+ return _implicitly_clear_priority_on_schedule;
+ }
uint32_t num_distributor_stripes() const noexcept { return _num_distributor_stripes; }
@@ -320,6 +323,7 @@ private:
bool _enable_metadata_only_fetch_phase_for_inconsistent_updates;
bool _prioritize_global_bucket_merges;
bool _enable_revert;
+ bool _implicitly_clear_priority_on_schedule;
DistrConfig::MinimumReplicaCountingMode _minimumReplicaCountingMode;
diff --git a/storage/src/vespa/storage/config/stor-distributormanager.def b/storage/src/vespa/storage/config/stor-distributormanager.def
index 3b324d6ddd2..8a9fdf74802 100644
--- a/storage/src/vespa/storage/config/stor-distributormanager.def
+++ b/storage/src/vespa/storage/config/stor-distributormanager.def
@@ -280,3 +280,9 @@ max_activation_inhibited_out_of_sync_groups int default=0
## CPU cores available. If > 0, the number of stripes is explicitly overridden.
## Stripe counts must be a power of two.
num_distributor_stripes int default=0 restart
+
+## If set, the maintenance scheduler will implicitly clear entries from its internal
+## bucket maintenance priority database even when no operation can be started for the
+## bucket due to being blocked by concurrent operations. This avoids potential head-of-line
+## blocking of later buckets in the priority database.
+implicitly_clear_bucket_priority_on_schedule bool default=false
diff --git a/storage/src/vespa/storage/distributor/distributor_stripe.cpp b/storage/src/vespa/storage/distributor/distributor_stripe.cpp
index e45ef13f4ef..18a19288ad3 100644
--- a/storage/src/vespa/storage/distributor/distributor_stripe.cpp
+++ b/storage/src/vespa/storage/distributor/distributor_stripe.cpp
@@ -760,6 +760,7 @@ DistributorStripe::propagate_config_snapshot_to_internal_components()
getConfig().allowStaleReadsDuringClusterStateTransitions());
_externalOperationHandler.set_use_weak_internal_read_consistency_for_gets(
getConfig().use_weak_internal_read_consistency_for_client_gets());
+ _scheduler->set_implicitly_clear_priority_on_schedule(getConfig().implicitly_clear_priority_on_schedule());
}
void
diff --git a/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.cpp b/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.cpp
index d910c80d360..bbaaf6d5ae7 100644
--- a/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.cpp
+++ b/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.cpp
@@ -19,7 +19,8 @@ MaintenanceScheduler::MaintenanceScheduler(
: _operationGenerator(operationGenerator),
_priorityDb(priorityDb),
_pending_window_checker(pending_window_checker),
- _operationStarter(operationStarter)
+ _operationStarter(operationStarter),
+ _implicitly_clear_priority_on_schedule(false)
{
}
@@ -41,12 +42,17 @@ MaintenanceScheduler::tick(SchedulingMode currentMode)
if (!possibleToSchedule(mostImportant, currentMode)) {
return WaitTimeMs(1);
}
- // If we can't start the operation, move on to the next bucket. Bucket will be
- // re-prioritized when the distributor stripe next scans it.
- clearPriority(mostImportant);
+ if (_implicitly_clear_priority_on_schedule) {
+ // If we can't start the operation, move on to the next bucket. Bucket will be
+ // re-prioritized when the distributor stripe next scans it.
+ clearPriority(mostImportant);
+ }
if (!startOperation(mostImportant)) {
return WaitTimeMs(1);
}
+ if (!_implicitly_clear_priority_on_schedule) {
+ clearPriority(mostImportant);
+ }
return WaitTimeMs(0);
}
@@ -58,7 +64,9 @@ MaintenanceScheduler::possibleToSchedule(const PrioritizedBucket& bucket,
return false;
}
// If pending window is full nothing of equal or lower priority can be scheduled, so no point in trying.
- if (!_pending_window_checker.may_allow_operation_with_priority(convertToOperationPriority(bucket.getPriority()))) {
+ if (_implicitly_clear_priority_on_schedule &&
+ !_pending_window_checker.may_allow_operation_with_priority(convertToOperationPriority(bucket.getPriority())))
+ {
return false;
}
if (currentMode == RECOVERY_SCHEDULING_MODE) {
diff --git a/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.h b/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.h
index 982a90d47d8..7741f8b6d17 100644
--- a/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.h
+++ b/storage/src/vespa/storage/distributor/maintenance/maintenancescheduler.h
@@ -28,6 +28,13 @@ public:
WaitTimeMs tick(SchedulingMode currentMode);
+ void set_implicitly_clear_priority_on_schedule(bool implicitly_clear) noexcept {
+ _implicitly_clear_priority_on_schedule = implicitly_clear;
+ }
+ [[nodiscard]] bool implicitly_clear_priority_on_schedule() const noexcept {
+ return _implicitly_clear_priority_on_schedule;
+ }
+
private:
MaintenanceScheduler(const MaintenanceScheduler&);
MaintenanceScheduler& operator=(const MaintenanceScheduler&);
@@ -41,9 +48,10 @@ private:
MaintenancePriority::Priority priority) const;
MaintenanceOperationGenerator& _operationGenerator;
- BucketPriorityDatabase& _priorityDb;
- const PendingWindowChecker& _pending_window_checker;
- OperationStarter& _operationStarter;
+ BucketPriorityDatabase& _priorityDb;
+ const PendingWindowChecker& _pending_window_checker;
+ OperationStarter& _operationStarter;
+ bool _implicitly_clear_priority_on_schedule;
};
}