diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2022-09-15 14:30:28 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahooinc.com> | 2022-09-15 14:32:01 +0000 |
commit | 6913409d5fe94c86bfe238daebf9fd1b16072e49 (patch) | |
tree | d39072c993b3f3019de70773d7f6e71bd5ba40a8 /storage | |
parent | 947b8c17af4ed513ce22615ddd595527985df46a (diff) |
Support activation of replicas even if ideal node in maintenance
Adds an internal feature support flag which communicates that the
content node will not implicitly index a non-ideal replica marked
explicitly as active. Activation during maintenance is only performed
iff a content node has this flag set.
Diffstat (limited to 'storage')
-rw-r--r-- | storage/src/tests/distributor/statecheckerstest.cpp | 36 | ||||
-rw-r--r-- | storage/src/vespa/storage/distributor/statecheckers.cpp | 14 |
2 files changed, 43 insertions, 7 deletions
diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp index 94f913a3325..f94b0ae1bcd 100644 --- a/storage/src/tests/distributor/statecheckerstest.cpp +++ b/storage/src/tests/distributor/statecheckerstest.cpp @@ -284,6 +284,14 @@ struct StateCheckersTest : Test, DistributorStripeTestUtil { bool includePriority = false); std::string testBucketStatePerGroup(const std::string& bucketInfo, bool includePriority = false); + + void do_test_bucket_activation(); + + void set_node_supports_no_implicit_index_on_activation(uint16_t node, bool supported) { + NodeSupportedFeatures nsf; + nsf.no_implicit_indexing_of_active_buckets = supported; + set_node_supported_features(node, nsf); + } }; StateCheckersTest::CheckerParams::CheckerParams() = default; @@ -997,7 +1005,7 @@ std::string StateCheckersTest::testBucketState( includePriority); } -TEST_F(StateCheckersTest, bucket_state) { +void StateCheckersTest::do_test_bucket_activation() { setup_stripe(2, 100, "distributor:1 storage:4"); { @@ -1035,11 +1043,13 @@ TEST_F(StateCheckersTest, bucket_state) { EXPECT_EQ("[Setting node 0 as active: copy has 3 docs]", testBucketState("0=2/3/4")); + // TODO remove this // A replica with more documents should be preferred over one with fewer. EXPECT_EQ("[Setting node 3 as active: copy has 6 docs and ideal state priority 1]" "[Setting node 1 as inactive]", testBucketState("1=2/3/4/u/a,3=5/6/7/t")); + // TODO remove this // Replica 2 has most documents and should be activated EXPECT_EQ("[Setting node 2 as active: copy has 9 docs]", testBucketState("1=2/3/4,3=5/6/7/,2=8/9/10/t")); @@ -1087,14 +1097,25 @@ TEST_F(StateCheckersTest, bucket_state) { testBucketState("2=8/9/10/u/i/r,1=2/3/4/u/a/r,3=5/6/7/u/i/r")); } +TEST_F(StateCheckersTest, bucket_activation_behaves_as_expected_with_implicit_index_on_active) { + set_node_supports_no_implicit_index_on_activation(2, false); + do_test_bucket_activation(); +} + +TEST_F(StateCheckersTest, bucket_activation_behaves_as_expected_without_implicit_index_on_active) { + set_node_supports_no_implicit_index_on_activation(2, true); + do_test_bucket_activation(); +} + /** * Users assume that setting nodes into maintenance will not cause extra load * on the cluster, but activating non-ready copies because the active copy went * into maintenance violates that assumption. See bug 6833209 for context and * details. */ -TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_maintenance) { +TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_maintenance_if_active_implicitly_indexes) { setup_stripe(2, 100, "distributor:1 storage:4 .1.s:m"); + set_node_supports_no_implicit_index_on_activation(2, false); // Ideal node 1 is in maintenance and no ready copy available. EXPECT_EQ("NO OPERATIONS GENERATED", testBucketState("2=8/9/10/t/i/u,3=5/6/7")); @@ -1103,6 +1124,17 @@ TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_ma testBucketState("2=8/9/10/u/i/r,3=5/6/7/u/i/u")); } +TEST_F(StateCheckersTest, activate_non_ready_copies_when_ideal_node_in_maintenance_if_active_does_not_implicitly_index) { + setup_stripe(2, 100, "distributor:1 storage:4 .1.s:m"); + set_node_supports_no_implicit_index_on_activation(2, true); + // Ideal node 1 is in maintenance and no ready copy available. + EXPECT_EQ("[Setting node 2 as active: copy has 9 docs]", // TODO ideal state pri instead + testBucketState("2=8/9/10/t/i/u,3=5/6/7")); + // But we should activate another copy iff there's another ready copy. + EXPECT_EQ("[Setting node 2 as active: copy is ready with 9 docs]", + testBucketState("2=8/9/10/u/i/r,3=5/6/7/u/i/u")); +} + /** * We really do not want to activate buckets when they are inconsistent. * See bug 6395693 for a set of reasons why. diff --git a/storage/src/vespa/storage/distributor/statecheckers.cpp b/storage/src/vespa/storage/distributor/statecheckers.cpp index 71b2da1359a..aaf65f1a6fe 100644 --- a/storage/src/vespa/storage/distributor/statecheckers.cpp +++ b/storage/src/vespa/storage/distributor/statecheckers.cpp @@ -2,6 +2,7 @@ #include "statecheckers.h" #include "activecopy.h" +#include "node_supported_features_repo.h" #include <vespa/document/bucket/fixed_bucket_spaces.h> #include <vespa/storage/distributor/operations/idealstate/splitoperation.h> #include <vespa/storage/distributor/operations/idealstate/joinoperation.h> @@ -1038,15 +1039,18 @@ BucketStateStateChecker::shouldSkipActivationDueToMaintenance( const StateChecker::Context& c) const { for (uint32_t i = 0; i < activeNodes.size(); ++i) { - const BucketCopy* cp(c.entry->getNode(activeNodes[i]._nodeIndex)); + const auto node_index = activeNodes[i]._nodeIndex; + const BucketCopy* cp(c.entry->getNode(node_index)); if (!cp || cp->active()) { continue; } if (!cp->ready()) { - // If copy is not ready, we don't want to activate it if a node - // is set in maintenance. Doing so would imply that we want proton - // to start background indexing. - return containsMaintenanceNode(c.idealState, c); + if (!c.op_ctx.node_supported_features_repo().node_supported_features(node_index).no_implicit_indexing_of_active_buckets) { + // If copy is not ready, we don't want to activate it if a node + // is set in maintenance. Doing so would imply that we want proton + // to start background indexing. + return containsMaintenanceNode(c.idealState, c); + } // else: activation does not imply indexing, so we can safely do it at any time. } } return false; |