diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-09-20 07:31:16 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-20 07:31:16 +0200 |
commit | 724af959b75915756818bcb03e85e25a8d21f616 (patch) | |
tree | 032f17d6f6564e1b12ae2e40e42cf47f67b8daf7 /storage | |
parent | c0c310c145ce7d57b14bf0778ee9ff6bf2d53540 (diff) | |
parent | 136c6f5f0d52b2a2506b0e3be425be4bdb5b0437 (diff) |
Merge pull request #24090 from vespa-engine/vekterli/do-not-inhibit-activation-under-maintenance-mode
Do not inhibit bucket replica activation under maintenance mode [run-systemtest]
Diffstat (limited to 'storage')
11 files changed, 76 insertions, 27 deletions
diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp index 94f913a3325..0fc148eb549 100644 --- a/storage/src/tests/distributor/statecheckerstest.cpp +++ b/storage/src/tests/distributor/statecheckerstest.cpp @@ -88,11 +88,11 @@ struct StateCheckersTest : Test, DistributorStripeTestUtil { c.siblingEntry = getBucketDatabase(c.getBucketSpace()).get(c.siblingBucket); c.entries = entries; - for (uint32_t j = 0; j < entries.size(); ++j) { + for (const auto & entry : entries) { // Run checking only on this bucketid, but include all buckets // owned by it or owners of it, so we can detect inconsistent split. - if (entries[j].getBucketId() == c.getBucketId()) { - c.entry = entries[j]; + if (entry.getBucketId() == c.getBucketId()) { + c.entry = entry; StateChecker::Result result(checker.check(c)); IdealStateOperation::UP op(result.createOperation()); @@ -284,6 +284,14 @@ struct StateCheckersTest : Test, DistributorStripeTestUtil { bool includePriority = false); std::string testBucketStatePerGroup(const std::string& bucketInfo, bool includePriority = false); + + void do_test_bucket_activation(); + + void set_node_supports_no_implicit_indexing_on_activation(uint16_t node, bool supported) { + NodeSupportedFeatures nsf; + nsf.no_implicit_indexing_of_active_buckets = supported; + set_node_supported_features(node, nsf); + } }; StateCheckersTest::CheckerParams::CheckerParams() = default; @@ -997,7 +1005,7 @@ std::string StateCheckersTest::testBucketState( includePriority); } -TEST_F(StateCheckersTest, bucket_state) { +void StateCheckersTest::do_test_bucket_activation() { setup_stripe(2, 100, "distributor:1 storage:4"); { @@ -1035,11 +1043,13 @@ TEST_F(StateCheckersTest, bucket_state) { EXPECT_EQ("[Setting node 0 as active: copy has 3 docs]", testBucketState("0=2/3/4")); + // TODO remove this // A replica with more documents should be preferred over one with fewer. EXPECT_EQ("[Setting node 3 as active: copy has 6 docs and ideal state priority 1]" "[Setting node 1 as inactive]", testBucketState("1=2/3/4/u/a,3=5/6/7/t")); + // TODO remove this // Replica 2 has most documents and should be activated EXPECT_EQ("[Setting node 2 as active: copy has 9 docs]", testBucketState("1=2/3/4,3=5/6/7/,2=8/9/10/t")); @@ -1087,14 +1097,25 @@ TEST_F(StateCheckersTest, bucket_state) { testBucketState("2=8/9/10/u/i/r,1=2/3/4/u/a/r,3=5/6/7/u/i/r")); } +TEST_F(StateCheckersTest, bucket_activation_behaves_as_expected_with_implicit_indexing_on_active) { + set_node_supports_no_implicit_indexing_on_activation(2, false); + do_test_bucket_activation(); +} + +TEST_F(StateCheckersTest, bucket_activation_behaves_as_expected_without_implicit_indexing_on_active) { + set_node_supports_no_implicit_indexing_on_activation(2, true); + do_test_bucket_activation(); +} + /** * Users assume that setting nodes into maintenance will not cause extra load * on the cluster, but activating non-ready copies because the active copy went * into maintenance violates that assumption. See bug 6833209 for context and * details. */ -TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_maintenance) { +TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_maintenance_if_active_implicitly_indexes) { setup_stripe(2, 100, "distributor:1 storage:4 .1.s:m"); + set_node_supports_no_implicit_indexing_on_activation(2, false); // Ideal node 1 is in maintenance and no ready copy available. EXPECT_EQ("NO OPERATIONS GENERATED", testBucketState("2=8/9/10/t/i/u,3=5/6/7")); @@ -1103,6 +1124,17 @@ TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_ma testBucketState("2=8/9/10/u/i/r,3=5/6/7/u/i/u")); } +TEST_F(StateCheckersTest, activate_non_ready_copies_when_ideal_node_in_maintenance_if_active_does_not_implicitly_index) { + setup_stripe(2, 100, "distributor:1 storage:4 .1.s:m"); + set_node_supports_no_implicit_indexing_on_activation(2, true); + // Ideal node 1 is in maintenance and no ready copy available. + EXPECT_EQ("[Setting node 2 as active: copy has 9 docs]", // TODO ideal state pri instead + testBucketState("2=8/9/10/t/i/u,3=5/6/7")); + // But we should activate another copy iff there's another ready copy. + EXPECT_EQ("[Setting node 2 as active: copy is ready with 9 docs]", + testBucketState("2=8/9/10/u/i/r,3=5/6/7/u/i/u")); +} + /** * We really do not want to activate buckets when they are inconsistent. * See bug 6395693 for a set of reasons why. diff --git a/storage/src/tests/distributor/top_level_bucket_db_updater_test.cpp b/storage/src/tests/distributor/top_level_bucket_db_updater_test.cpp index 19ec51f4ed4..7b4f688b253 100644 --- a/storage/src/tests/distributor/top_level_bucket_db_updater_test.cpp +++ b/storage/src/tests/distributor/top_level_bucket_db_updater_test.cpp @@ -2512,6 +2512,7 @@ TEST_F(TopLevelBucketDBUpdaterTest, node_feature_sets_are_aggregated_from_nodes_ for (uint16_t i : {0, 1, 2}) { EXPECT_FALSE(s->node_supported_features_repo().node_supported_features(i).unordered_merge_chaining); EXPECT_FALSE(s->node_supported_features_repo().node_supported_features(i).two_phase_remove_location); + EXPECT_FALSE(s->node_supported_features_repo().node_supported_features(i).no_implicit_indexing_of_active_buckets); } } @@ -2524,6 +2525,7 @@ TEST_F(TopLevelBucketDBUpdaterTest, node_feature_sets_are_aggregated_from_nodes_ if (i > 0) { reply.supported_node_features().unordered_merge_chaining = true; reply.supported_node_features().two_phase_remove_location = true; + reply.supported_node_features().no_implicit_indexing_of_active_buckets = true; } })); } @@ -2532,10 +2534,15 @@ TEST_F(TopLevelBucketDBUpdaterTest, node_feature_sets_are_aggregated_from_nodes_ for (auto* s : stripes) { EXPECT_FALSE(s->node_supported_features_repo().node_supported_features(0).unordered_merge_chaining); EXPECT_FALSE(s->node_supported_features_repo().node_supported_features(0).two_phase_remove_location); + EXPECT_FALSE(s->node_supported_features_repo().node_supported_features(0).no_implicit_indexing_of_active_buckets); + EXPECT_TRUE(s->node_supported_features_repo().node_supported_features(1).unordered_merge_chaining); EXPECT_TRUE(s->node_supported_features_repo().node_supported_features(1).two_phase_remove_location); + EXPECT_TRUE(s->node_supported_features_repo().node_supported_features(1).no_implicit_indexing_of_active_buckets); + EXPECT_TRUE(s->node_supported_features_repo().node_supported_features(2).unordered_merge_chaining); EXPECT_TRUE(s->node_supported_features_repo().node_supported_features(2).two_phase_remove_location); + EXPECT_TRUE(s->node_supported_features_repo().node_supported_features(2).no_implicit_indexing_of_active_buckets); } } diff --git a/storage/src/tests/storageapi/mbusprot/storageprotocoltest.cpp b/storage/src/tests/storageapi/mbusprot/storageprotocoltest.cpp index 5ed2e0d96b4..344c909c13e 100644 --- a/storage/src/tests/storageapi/mbusprot/storageprotocoltest.cpp +++ b/storage/src/tests/storageapi/mbusprot/storageprotocoltest.cpp @@ -389,6 +389,7 @@ TEST_P(StorageProtocolTest, request_bucket_info) { EXPECT_TRUE(reply2->supported_node_features().unordered_merge_chaining); EXPECT_TRUE(reply2->supported_node_features().two_phase_remove_location); + EXPECT_TRUE(reply2->supported_node_features().no_implicit_indexing_of_active_buckets); } } diff --git a/storage/src/vespa/storage/distributor/distributor_stripe.cpp b/storage/src/vespa/storage/distributor/distributor_stripe.cpp index 1173a19d729..a811e043678 100644 --- a/storage/src/vespa/storage/distributor/distributor_stripe.cpp +++ b/storage/src/vespa/storage/distributor/distributor_stripe.cpp @@ -14,7 +14,6 @@ #include "throttlingoperationstarter.h" #include <vespa/document/bucket/fixed_bucket_spaces.h> #include <vespa/storage/common/global_bucket_space_distribution_converter.h> -#include <vespa/storage/common/hostreporter/hostinfo.h> #include <vespa/storage/common/node_identity.h> #include <vespa/storage/common/nodestateupdater.h> #include <vespa/storage/distributor/maintenance/simplebucketprioritydatabase.h> @@ -84,7 +83,7 @@ DistributorStripe::DistributorStripe(DistributorComponentRegister& compReg, { propagateDefaultDistribution(_component.getDistribution()); propagateClusterStates(); -}; +} DistributorStripe::~DistributorStripe() = default; @@ -109,7 +108,7 @@ void DistributorStripe::sendCommand(const std::shared_ptr<api::StorageCommand>& cmd) { if (cmd->getType() == api::MessageType::MERGEBUCKET) { - api::MergeBucketCommand& merge(static_cast<api::MergeBucketCommand&>(*cmd)); + auto & merge(static_cast<api::MergeBucketCommand&>(*cmd)); _idealStateManager.getMetrics().nodesPerMerge.addValue(merge.getNodes().size()); } send_up_with_tracking(cmd); @@ -182,7 +181,7 @@ DistributorStripe::handleCompletedMerge( } bool -DistributorStripe::isMaintenanceReply(const api::StorageReply& reply) const +DistributorStripe::isMaintenanceReply(const api::StorageReply& reply) { switch (reply.getType().getId()) { case api::MessageType::CREATEBUCKET_REPLY_ID: @@ -290,8 +289,8 @@ DistributorStripe::enableClusterStateBundle(const lib::ClusterStateBundle& state std::vector<uint64_t> msgIds = _pendingMessageTracker.clearMessagesForNode(i); LOG(debug, "Node %u is down, clearing %zu pending maintenance operations", i, msgIds.size()); - for (uint32_t j = 0; j < msgIds.size(); ++j) { - _maintenanceOperationOwner.erase(msgIds[j]); + for (const auto & msgId : msgIds) { + _maintenanceOperationOwner.erase(msgId); } } } @@ -407,7 +406,7 @@ public: bool found; uint8_t maxPri; - SplitChecker(uint8_t maxP) : found(false), maxPri(maxP) {}; + explicit SplitChecker(uint8_t maxP) : found(false), maxPri(maxP) {}; bool check(uint32_t msgType, uint16_t node, uint8_t pri) override { (void) node; @@ -603,7 +602,7 @@ namespace { BucketSpaceStats toBucketSpaceStats(const NodeMaintenanceStats &stats) { - return BucketSpaceStats(stats.total, stats.syncing + stats.copyingIn); + return {stats.total, stats.syncing + stats.copyingIn}; } using PerNodeBucketSpacesStats = BucketSpacesStatsProvider::PerNodeBucketSpacesStats; diff --git a/storage/src/vespa/storage/distributor/distributor_stripe.h b/storage/src/vespa/storage/distributor/distributor_stripe.h index d1366bc0285..ccd7ab9c21d 100644 --- a/storage/src/vespa/storage/distributor/distributor_stripe.h +++ b/storage/src/vespa/storage/distributor/distributor_stripe.h @@ -208,7 +208,7 @@ private: friend class TopLevelDistributorTestUtil; bool handleMessage(const std::shared_ptr<api::StorageMessage>& msg); - bool isMaintenanceReply(const api::StorageReply& reply) const; + static bool isMaintenanceReply(const api::StorageReply& reply); void send_shutdown_abort_reply(const std::shared_ptr<api::StorageMessage>&); void handle_or_propagate_message(const std::shared_ptr<api::StorageMessage>& msg); diff --git a/storage/src/vespa/storage/distributor/node_supported_features.h b/storage/src/vespa/storage/distributor/node_supported_features.h index 87b2a9aef8e..bbd17403a6d 100644 --- a/storage/src/vespa/storage/distributor/node_supported_features.h +++ b/storage/src/vespa/storage/distributor/node_supported_features.h @@ -13,6 +13,7 @@ namespace storage::distributor { struct NodeSupportedFeatures { bool unordered_merge_chaining = false; bool two_phase_remove_location = false; + bool no_implicit_indexing_of_active_buckets = false; bool operator==(const NodeSupportedFeatures& rhs) const noexcept = default; }; diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp index dadbda60021..cf32d21eb82 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp @@ -340,8 +340,9 @@ PendingClusterState::update_node_supported_features_from_reply(uint16_t node, co { const auto& src_feat = reply.supported_node_features(); NodeSupportedFeatures dest_feat; - dest_feat.unordered_merge_chaining = src_feat.unordered_merge_chaining; - dest_feat.two_phase_remove_location = src_feat.two_phase_remove_location; + dest_feat.unordered_merge_chaining = src_feat.unordered_merge_chaining; + dest_feat.two_phase_remove_location = src_feat.two_phase_remove_location; + dest_feat.no_implicit_indexing_of_active_buckets = src_feat.no_implicit_indexing_of_active_buckets; // This will overwrite per bucket-space reply, but does not matter since it's independent of bucket space. _node_features.insert(std::make_pair(node, dest_feat)); } diff --git a/storage/src/vespa/storage/distributor/statecheckers.cpp b/storage/src/vespa/storage/distributor/statecheckers.cpp index 71b2da1359a..aaf65f1a6fe 100644 --- a/storage/src/vespa/storage/distributor/statecheckers.cpp +++ b/storage/src/vespa/storage/distributor/statecheckers.cpp @@ -2,6 +2,7 @@ #include "statecheckers.h" #include "activecopy.h" +#include "node_supported_features_repo.h" #include <vespa/document/bucket/fixed_bucket_spaces.h> #include <vespa/storage/distributor/operations/idealstate/splitoperation.h> #include <vespa/storage/distributor/operations/idealstate/joinoperation.h> @@ -1038,15 +1039,18 @@ BucketStateStateChecker::shouldSkipActivationDueToMaintenance( const StateChecker::Context& c) const { for (uint32_t i = 0; i < activeNodes.size(); ++i) { - const BucketCopy* cp(c.entry->getNode(activeNodes[i]._nodeIndex)); + const auto node_index = activeNodes[i]._nodeIndex; + const BucketCopy* cp(c.entry->getNode(node_index)); if (!cp || cp->active()) { continue; } if (!cp->ready()) { - // If copy is not ready, we don't want to activate it if a node - // is set in maintenance. Doing so would imply that we want proton - // to start background indexing. - return containsMaintenanceNode(c.idealState, c); + if (!c.op_ctx.node_supported_features_repo().node_supported_features(node_index).no_implicit_indexing_of_active_buckets) { + // If copy is not ready, we don't want to activate it if a node + // is set in maintenance. Doing so would imply that we want proton + // to start background indexing. + return containsMaintenanceNode(c.idealState, c); + } // else: activation does not imply indexing, so we can safely do it at any time. } } return false; diff --git a/storage/src/vespa/storageapi/mbusprot/protobuf/maintenance.proto b/storage/src/vespa/storageapi/mbusprot/protobuf/maintenance.proto index a79ac9fd99a..0c1df005a5a 100644 --- a/storage/src/vespa/storageapi/mbusprot/protobuf/maintenance.proto +++ b/storage/src/vespa/storageapi/mbusprot/protobuf/maintenance.proto @@ -110,8 +110,9 @@ message BucketAndBucketInfo { } message SupportedNodeFeatures { - bool unordered_merge_chaining = 1; - bool two_phase_remove_location = 2; + bool unordered_merge_chaining = 1; + bool two_phase_remove_location = 2; + bool no_implicit_indexing_of_active_buckets = 3; } message RequestBucketInfoResponse { diff --git a/storage/src/vespa/storageapi/mbusprot/protocolserialization7.cpp b/storage/src/vespa/storageapi/mbusprot/protocolserialization7.cpp index 662408c4e95..151facf36e6 100644 --- a/storage/src/vespa/storageapi/mbusprot/protocolserialization7.cpp +++ b/storage/src/vespa/storageapi/mbusprot/protocolserialization7.cpp @@ -1058,6 +1058,7 @@ void ProtocolSerialization7::onEncode(GBBuf& buf, const api::RequestBucketInfoRe if (msg.full_bucket_fetch()) { res.mutable_supported_node_features()->set_unordered_merge_chaining(true); res.mutable_supported_node_features()->set_two_phase_remove_location(true); + res.mutable_supported_node_features()->set_no_implicit_indexing_of_active_buckets(true); } }); } @@ -1098,8 +1099,9 @@ api::StorageReply::UP ProtocolSerialization7::onDecodeRequestBucketInfoReply(con if (res.has_supported_node_features()) { const auto& src_features = res.supported_node_features(); auto& dest_features = reply->supported_node_features(); - dest_features.unordered_merge_chaining = src_features.unordered_merge_chaining(); - dest_features.two_phase_remove_location = src_features.two_phase_remove_location(); + dest_features.unordered_merge_chaining = src_features.unordered_merge_chaining(); + dest_features.two_phase_remove_location = src_features.two_phase_remove_location(); + dest_features.no_implicit_indexing_of_active_buckets = src_features.no_implicit_indexing_of_active_buckets(); } return reply; }); diff --git a/storage/src/vespa/storageapi/message/bucket.h b/storage/src/vespa/storageapi/message/bucket.h index d9843236d2e..ab61a9202c8 100644 --- a/storage/src/vespa/storageapi/message/bucket.h +++ b/storage/src/vespa/storageapi/message/bucket.h @@ -393,8 +393,9 @@ public: friend std::ostream& operator<<(std::ostream& os, const Entry&); }; struct SupportedNodeFeatures { - bool unordered_merge_chaining = false; - bool two_phase_remove_location = false; + bool unordered_merge_chaining = false; + bool two_phase_remove_location = false; + bool no_implicit_indexing_of_active_buckets = false; }; using EntryVector = vespalib::Array<Entry>; private: |