diff options
4 files changed, 64 insertions, 34 deletions
diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp index 7282d2e7d2a..0c1ec1e77de 100644 --- a/storage/src/tests/distributor/statecheckerstest.cpp +++ b/storage/src/tests/distributor/statecheckerstest.cpp @@ -158,6 +158,7 @@ struct StateCheckersTest : Test, DistributorTestUtil { struct CheckerParams { std::string _bucketInfo; std::string _clusterState {"distributor:1 storage:2"}; + std::string _pending_cluster_state; std::string _expect; static const PendingMessage NO_OP_BLOCKER; const PendingMessage* _blockerMessage {&NO_OP_BLOCKER}; @@ -182,6 +183,10 @@ struct StateCheckersTest : Test, DistributorTestUtil { _clusterState = state; return *this; } + CheckerParams& pending_cluster_state(const std::string& state) { + _pending_cluster_state = state; + return *this; + } CheckerParams& blockerMessage(const PendingMessage& blocker) { _blockerMessage = &blocker; return *this; @@ -208,6 +213,11 @@ struct StateCheckersTest : Test, DistributorTestUtil { addNodesToBucketDB(bid, params._bucketInfo); setRedundancy(params._redundancy); enableDistributorClusterState(params._clusterState); + if (!params._pending_cluster_state.empty()) { + auto cmd = std::make_shared<api::SetSystemStateCommand>(lib::ClusterState(params._pending_cluster_state)); + _distributor->onDown(cmd); + tick(); // Trigger command processing and pending state setup. + } NodeMaintenanceStatsTracker statsTracker; StateChecker::Context c( getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid)); @@ -640,10 +650,8 @@ TEST_F(StateCheckersTest, synchronize_and_move) { runAndVerify<SynchronizeAndMoveStateChecker>( CheckerParams().expect( "[Synchronizing buckets with different checksums " - "node(idx=0,crc=0x1,docs=1/1,bytes=1/1,trusted=false," - "active=false,ready=false), " - "node(idx=1,crc=0x2,docs=2/2,bytes=2/2,trusted=false," - "active=false,ready=false)] " + "node(idx=0,crc=0x1,docs=1/1,bytes=1/1,trusted=false,active=false,ready=false), " + "node(idx=1,crc=0x2,docs=2/2,bytes=2/2,trusted=false,active=false,ready=false)] " "(scheduling pri MEDIUM)") .bucketInfo("0=1,1=2") .includeSchedulingPriority(true)); @@ -698,12 +706,9 @@ TEST_F(StateCheckersTest, synchronize_and_move) { runAndVerify<SynchronizeAndMoveStateChecker>( CheckerParams() .expect("[Synchronizing buckets with different checksums " - "node(idx=0,crc=0x3,docs=3/3,bytes=3/3,trusted=false," - "active=false,ready=false), " - "node(idx=1,crc=0x3,docs=3/3,bytes=3/3,trusted=false," - "active=false,ready=false), " - "node(idx=2,crc=0x0,docs=0/0,bytes=0/0,trusted=false," - "active=false,ready=false)]") + "node(idx=0,crc=0x3,docs=3/3,bytes=3/3,trusted=false,active=false,ready=false), " + "node(idx=1,crc=0x3,docs=3/3,bytes=3/3,trusted=false,active=false,ready=false), " + "node(idx=2,crc=0x0,docs=0/0,bytes=0/0,trusted=false,active=false,ready=false)]") .bucketInfo("0=3,1=3,2=0") .clusterState("distributor:1 storage:3")); @@ -712,14 +717,10 @@ TEST_F(StateCheckersTest, synchronize_and_move) { runAndVerify<SynchronizeAndMoveStateChecker>( CheckerParams() .expect("[Synchronizing buckets with different checksums " - "node(idx=0,crc=0x2,docs=3/3,bytes=4/4,trusted=false," - "active=false,ready=false), " - "node(idx=1,crc=0x1,docs=2/2,bytes=3/3,trusted=true," - "active=false,ready=false), " - "node(idx=2,crc=0x1,docs=2/2,bytes=3/3,trusted=true," - "active=false,ready=false), " - "node(idx=3,crc=0x1,docs=2/2,bytes=3/3,trusted=true," - "active=false,ready=false)] " + "node(idx=0,crc=0x2,docs=3/3,bytes=4/4,trusted=false,active=false,ready=false), " + "node(idx=1,crc=0x1,docs=2/2,bytes=3/3,trusted=true,active=false,ready=false), " + "node(idx=2,crc=0x1,docs=2/2,bytes=3/3,trusted=true,active=false,ready=false), " + "node(idx=3,crc=0x1,docs=2/2,bytes=3/3,trusted=true,active=false,ready=false)] " "(pri 120) (scheduling pri MEDIUM)") .bucketInfo("0=2/3/4,1=1/2/3/t,2=1/2/3/t,3=1/2/3/t") .clusterState("distributor:1 storage:5") @@ -750,6 +751,25 @@ TEST_F(StateCheckersTest, synchronize_and_move) { .clusterState("distributor:1 storage:4")); } +// Upon entering a cluster state transition edge the distributor will +// prune all replicas from its DB that are on nodes that are unavailable +// in the _pending_ state. As long as this state is pending, the _current_ +// state will include these nodes as available. But since replicas for +// the unavailable node(s) have been pruned away, started merges that +// involve these nodes as part of their chain are doomed to fail. +TEST_F(StateCheckersTest, do_not_schedule_merges_when_included_node_is_unavailable_in_pending_state) { + runAndVerify<SynchronizeAndMoveStateChecker>( + CheckerParams() + .expect("NO OPERATIONS GENERATED") + .redundancy(3) + .bucketInfo("1=1,2=1") // Node 0 pruned from DB since it's s:m in state 2 + .clusterState("version:1 distributor:2 storage:3") + // We change the distributor set as well as the content node set. Just setting a node + // into maintenance does not trigger a pending state since it does not require any + // bucket info fetches from any of the nodes. + .pending_cluster_state("version:2 distributor:1 storage:3 .0.s:m")); +} + TEST_F(StateCheckersTest, do_not_merge_inconsistently_split_buckets) { // No merge generated if buckets are inconsistently split. // This matches the case where a bucket has been split into 2 on one diff --git a/storage/src/vespa/storage/distributor/statechecker.cpp b/storage/src/vespa/storage/distributor/statechecker.cpp index 12dbd7f3c52..f7ef1122692 100644 --- a/storage/src/vespa/storage/distributor/statechecker.cpp +++ b/storage/src/vespa/storage/distributor/statechecker.cpp @@ -66,6 +66,7 @@ StateChecker::Context::Context(const DistributorComponent& c, : bucket(bucket_), siblingBucket(c.getSibling(bucket.getBucketId())), systemState(distributorBucketSpace.getClusterState()), + pending_cluster_state(c.getDistributor().pendingClusterStateOrNull(bucket_.getBucketSpace())), distributorConfig(c.getDistributor().getConfig()), distribution(distributorBucketSpace.getDistribution()), gcTimeCalculator(c.getDistributor().getBucketIdHasher(), @@ -75,12 +76,11 @@ StateChecker::Context::Context(const DistributorComponent& c, db(distributorBucketSpace.getBucketDatabase()), stats(statsTracker) { - idealState = - distribution.getIdealStorageNodes(systemState, bucket.getBucketId()); + idealState = distribution.getIdealStorageNodes(systemState, bucket.getBucketId()); unorderedIdealState.insert(idealState.begin(), idealState.end()); } -StateChecker::Context::~Context() {} +StateChecker::Context::~Context() = default; std::string StateChecker::Context::toString() const @@ -99,12 +99,7 @@ StateChecker::Context::toString() const return ss.str(); } -StateChecker::StateChecker() -{ -} - -StateChecker::~StateChecker() -{ -} +StateChecker::StateChecker() = default; +StateChecker::~StateChecker() = default; } diff --git a/storage/src/vespa/storage/distributor/statechecker.h b/storage/src/vespa/storage/distributor/statechecker.h index 9c0fe2ef53c..5b9ce065f99 100644 --- a/storage/src/vespa/storage/distributor/statechecker.h +++ b/storage/src/vespa/storage/distributor/statechecker.h @@ -64,6 +64,7 @@ public: // Common const lib::ClusterState& systemState; + const lib::ClusterState* pending_cluster_state; // nullptr if no state is pending. const DistributorConfiguration& distributorConfig; const lib::Distribution& distribution; diff --git a/storage/src/vespa/storage/distributor/statecheckers.cpp b/storage/src/vespa/storage/distributor/statecheckers.cpp index 0a0abb8d417..dab2025bfbb 100644 --- a/storage/src/vespa/storage/distributor/statecheckers.cpp +++ b/storage/src/vespa/storage/distributor/statecheckers.cpp @@ -584,18 +584,29 @@ SplitInconsistentStateChecker::check(StateChecker::Context& c) } namespace { + bool containsMaintenanceNode(const std::vector<uint16_t>& ideal, const StateChecker::Context& c) { - for (uint32_t i = 0; i < ideal.size(); i++) { - if (c.systemState.getNodeState(lib::Node(lib::NodeType::STORAGE, - ideal[i])).getState() - == lib::State::MAINTENANCE) - { + for (uint16_t n : ideal) { + lib::Node node(lib::NodeType::STORAGE, n); + if (c.systemState.getNodeState(node).getState() == lib::State::MAINTENANCE) { return true; } } + return false; +} +bool ideal_node_is_unavailable_in_pending_state(const StateChecker::Context& c) { + if (!c.pending_cluster_state) { + return false; + } + for (uint16_t n : c.idealState) { + lib::Node node(lib::NodeType::STORAGE, n); + if (!c.pending_cluster_state->getNodeState(node).getState().oneOf("uir")){ + return true; + } + } return false; } @@ -715,7 +726,7 @@ private: uint8_t _priority; }; -MergeNodes::~MergeNodes() {} +MergeNodes::~MergeNodes() = default; bool presentInIdealState(const StateChecker::Context& c, uint16_t node) @@ -830,6 +841,9 @@ SynchronizeAndMoveStateChecker::check(StateChecker::Context& c) if (containsMaintenanceNode(c.idealState, c)) { return Result::noMaintenanceNeeded(); } + if (ideal_node_is_unavailable_in_pending_state(c)) { + return Result::noMaintenanceNeeded(); + } if (allCopiesAreInvalid(c)) { return Result::noMaintenanceNeeded(); } |