From 6913409d5fe94c86bfe238daebf9fd1b16072e49 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Thu, 15 Sep 2022 14:30:28 +0000 Subject: Support activation of replicas even if ideal node in maintenance Adds an internal feature support flag which communicates that the content node will not implicitly index a non-ideal replica marked explicitly as active. Activation during maintenance is only performed iff a content node has this flag set. --- .../src/tests/distributor/statecheckerstest.cpp | 36 ++++++++++++++++++++-- .../vespa/storage/distributor/statecheckers.cpp | 14 ++++++--- 2 files changed, 43 insertions(+), 7 deletions(-) (limited to 'storage') diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp index 94f913a3325..f94b0ae1bcd 100644 --- a/storage/src/tests/distributor/statecheckerstest.cpp +++ b/storage/src/tests/distributor/statecheckerstest.cpp @@ -284,6 +284,14 @@ struct StateCheckersTest : Test, DistributorStripeTestUtil { bool includePriority = false); std::string testBucketStatePerGroup(const std::string& bucketInfo, bool includePriority = false); + + void do_test_bucket_activation(); + + void set_node_supports_no_implicit_index_on_activation(uint16_t node, bool supported) { + NodeSupportedFeatures nsf; + nsf.no_implicit_indexing_of_active_buckets = supported; + set_node_supported_features(node, nsf); + } }; StateCheckersTest::CheckerParams::CheckerParams() = default; @@ -997,7 +1005,7 @@ std::string StateCheckersTest::testBucketState( includePriority); } -TEST_F(StateCheckersTest, bucket_state) { +void StateCheckersTest::do_test_bucket_activation() { setup_stripe(2, 100, "distributor:1 storage:4"); { @@ -1035,11 +1043,13 @@ TEST_F(StateCheckersTest, bucket_state) { EXPECT_EQ("[Setting node 0 as active: copy has 3 docs]", testBucketState("0=2/3/4")); + // TODO remove this // A replica with more documents should be preferred over one with fewer. EXPECT_EQ("[Setting node 3 as active: copy has 6 docs and ideal state priority 1]" "[Setting node 1 as inactive]", testBucketState("1=2/3/4/u/a,3=5/6/7/t")); + // TODO remove this // Replica 2 has most documents and should be activated EXPECT_EQ("[Setting node 2 as active: copy has 9 docs]", testBucketState("1=2/3/4,3=5/6/7/,2=8/9/10/t")); @@ -1087,14 +1097,25 @@ TEST_F(StateCheckersTest, bucket_state) { testBucketState("2=8/9/10/u/i/r,1=2/3/4/u/a/r,3=5/6/7/u/i/r")); } +TEST_F(StateCheckersTest, bucket_activation_behaves_as_expected_with_implicit_index_on_active) { + set_node_supports_no_implicit_index_on_activation(2, false); + do_test_bucket_activation(); +} + +TEST_F(StateCheckersTest, bucket_activation_behaves_as_expected_without_implicit_index_on_active) { + set_node_supports_no_implicit_index_on_activation(2, true); + do_test_bucket_activation(); +} + /** * Users assume that setting nodes into maintenance will not cause extra load * on the cluster, but activating non-ready copies because the active copy went * into maintenance violates that assumption. See bug 6833209 for context and * details. */ -TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_maintenance) { +TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_maintenance_if_active_implicitly_indexes) { setup_stripe(2, 100, "distributor:1 storage:4 .1.s:m"); + set_node_supports_no_implicit_index_on_activation(2, false); // Ideal node 1 is in maintenance and no ready copy available. EXPECT_EQ("NO OPERATIONS GENERATED", testBucketState("2=8/9/10/t/i/u,3=5/6/7")); @@ -1103,6 +1124,17 @@ TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_ma testBucketState("2=8/9/10/u/i/r,3=5/6/7/u/i/u")); } +TEST_F(StateCheckersTest, activate_non_ready_copies_when_ideal_node_in_maintenance_if_active_does_not_implicitly_index) { + setup_stripe(2, 100, "distributor:1 storage:4 .1.s:m"); + set_node_supports_no_implicit_index_on_activation(2, true); + // Ideal node 1 is in maintenance and no ready copy available. + EXPECT_EQ("[Setting node 2 as active: copy has 9 docs]", // TODO ideal state pri instead + testBucketState("2=8/9/10/t/i/u,3=5/6/7")); + // But we should activate another copy iff there's another ready copy. + EXPECT_EQ("[Setting node 2 as active: copy is ready with 9 docs]", + testBucketState("2=8/9/10/u/i/r,3=5/6/7/u/i/u")); +} + /** * We really do not want to activate buckets when they are inconsistent. * See bug 6395693 for a set of reasons why. diff --git a/storage/src/vespa/storage/distributor/statecheckers.cpp b/storage/src/vespa/storage/distributor/statecheckers.cpp index 71b2da1359a..aaf65f1a6fe 100644 --- a/storage/src/vespa/storage/distributor/statecheckers.cpp +++ b/storage/src/vespa/storage/distributor/statecheckers.cpp @@ -2,6 +2,7 @@ #include "statecheckers.h" #include "activecopy.h" +#include "node_supported_features_repo.h" #include #include #include @@ -1038,15 +1039,18 @@ BucketStateStateChecker::shouldSkipActivationDueToMaintenance( const StateChecker::Context& c) const { for (uint32_t i = 0; i < activeNodes.size(); ++i) { - const BucketCopy* cp(c.entry->getNode(activeNodes[i]._nodeIndex)); + const auto node_index = activeNodes[i]._nodeIndex; + const BucketCopy* cp(c.entry->getNode(node_index)); if (!cp || cp->active()) { continue; } if (!cp->ready()) { - // If copy is not ready, we don't want to activate it if a node - // is set in maintenance. Doing so would imply that we want proton - // to start background indexing. - return containsMaintenanceNode(c.idealState, c); + if (!c.op_ctx.node_supported_features_repo().node_supported_features(node_index).no_implicit_indexing_of_active_buckets) { + // If copy is not ready, we don't want to activate it if a node + // is set in maintenance. Doing so would imply that we want proton + // to start background indexing. + return containsMaintenanceNode(c.idealState, c); + } // else: activation does not imply indexing, so we can safely do it at any time. } } return false; -- cgit v1.2.3