summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--storage/src/tests/distributor/statecheckerstest.cpp56
-rw-r--r--storage/src/vespa/storage/distributor/statechecker.cpp15
-rw-r--r--storage/src/vespa/storage/distributor/statechecker.h1
-rw-r--r--storage/src/vespa/storage/distributor/statecheckers.cpp26
4 files changed, 64 insertions, 34 deletions
diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp
index 7282d2e7d2a..0c1ec1e77de 100644
--- a/storage/src/tests/distributor/statecheckerstest.cpp
+++ b/storage/src/tests/distributor/statecheckerstest.cpp
@@ -158,6 +158,7 @@ struct StateCheckersTest : Test, DistributorTestUtil {
struct CheckerParams {
std::string _bucketInfo;
std::string _clusterState {"distributor:1 storage:2"};
+ std::string _pending_cluster_state;
std::string _expect;
static const PendingMessage NO_OP_BLOCKER;
const PendingMessage* _blockerMessage {&NO_OP_BLOCKER};
@@ -182,6 +183,10 @@ struct StateCheckersTest : Test, DistributorTestUtil {
_clusterState = state;
return *this;
}
+ CheckerParams& pending_cluster_state(const std::string& state) {
+ _pending_cluster_state = state;
+ return *this;
+ }
CheckerParams& blockerMessage(const PendingMessage& blocker) {
_blockerMessage = &blocker;
return *this;
@@ -208,6 +213,11 @@ struct StateCheckersTest : Test, DistributorTestUtil {
addNodesToBucketDB(bid, params._bucketInfo);
setRedundancy(params._redundancy);
enableDistributorClusterState(params._clusterState);
+ if (!params._pending_cluster_state.empty()) {
+ auto cmd = std::make_shared<api::SetSystemStateCommand>(lib::ClusterState(params._pending_cluster_state));
+ _distributor->onDown(cmd);
+ tick(); // Trigger command processing and pending state setup.
+ }
NodeMaintenanceStatsTracker statsTracker;
StateChecker::Context c(
getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid));
@@ -640,10 +650,8 @@ TEST_F(StateCheckersTest, synchronize_and_move) {
runAndVerify<SynchronizeAndMoveStateChecker>(
CheckerParams().expect(
"[Synchronizing buckets with different checksums "
- "node(idx=0,crc=0x1,docs=1/1,bytes=1/1,trusted=false,"
- "active=false,ready=false), "
- "node(idx=1,crc=0x2,docs=2/2,bytes=2/2,trusted=false,"
- "active=false,ready=false)] "
+ "node(idx=0,crc=0x1,docs=1/1,bytes=1/1,trusted=false,active=false,ready=false), "
+ "node(idx=1,crc=0x2,docs=2/2,bytes=2/2,trusted=false,active=false,ready=false)] "
"(scheduling pri MEDIUM)")
.bucketInfo("0=1,1=2")
.includeSchedulingPriority(true));
@@ -698,12 +706,9 @@ TEST_F(StateCheckersTest, synchronize_and_move) {
runAndVerify<SynchronizeAndMoveStateChecker>(
CheckerParams()
.expect("[Synchronizing buckets with different checksums "
- "node(idx=0,crc=0x3,docs=3/3,bytes=3/3,trusted=false,"
- "active=false,ready=false), "
- "node(idx=1,crc=0x3,docs=3/3,bytes=3/3,trusted=false,"
- "active=false,ready=false), "
- "node(idx=2,crc=0x0,docs=0/0,bytes=0/0,trusted=false,"
- "active=false,ready=false)]")
+ "node(idx=0,crc=0x3,docs=3/3,bytes=3/3,trusted=false,active=false,ready=false), "
+ "node(idx=1,crc=0x3,docs=3/3,bytes=3/3,trusted=false,active=false,ready=false), "
+ "node(idx=2,crc=0x0,docs=0/0,bytes=0/0,trusted=false,active=false,ready=false)]")
.bucketInfo("0=3,1=3,2=0")
.clusterState("distributor:1 storage:3"));
@@ -712,14 +717,10 @@ TEST_F(StateCheckersTest, synchronize_and_move) {
runAndVerify<SynchronizeAndMoveStateChecker>(
CheckerParams()
.expect("[Synchronizing buckets with different checksums "
- "node(idx=0,crc=0x2,docs=3/3,bytes=4/4,trusted=false,"
- "active=false,ready=false), "
- "node(idx=1,crc=0x1,docs=2/2,bytes=3/3,trusted=true,"
- "active=false,ready=false), "
- "node(idx=2,crc=0x1,docs=2/2,bytes=3/3,trusted=true,"
- "active=false,ready=false), "
- "node(idx=3,crc=0x1,docs=2/2,bytes=3/3,trusted=true,"
- "active=false,ready=false)] "
+ "node(idx=0,crc=0x2,docs=3/3,bytes=4/4,trusted=false,active=false,ready=false), "
+ "node(idx=1,crc=0x1,docs=2/2,bytes=3/3,trusted=true,active=false,ready=false), "
+ "node(idx=2,crc=0x1,docs=2/2,bytes=3/3,trusted=true,active=false,ready=false), "
+ "node(idx=3,crc=0x1,docs=2/2,bytes=3/3,trusted=true,active=false,ready=false)] "
"(pri 120) (scheduling pri MEDIUM)")
.bucketInfo("0=2/3/4,1=1/2/3/t,2=1/2/3/t,3=1/2/3/t")
.clusterState("distributor:1 storage:5")
@@ -750,6 +751,25 @@ TEST_F(StateCheckersTest, synchronize_and_move) {
.clusterState("distributor:1 storage:4"));
}
+// Upon entering a cluster state transition edge the distributor will
+// prune all replicas from its DB that are on nodes that are unavailable
+// in the _pending_ state. As long as this state is pending, the _current_
+// state will include these nodes as available. But since replicas for
+// the unavailable node(s) have been pruned away, started merges that
+// involve these nodes as part of their chain are doomed to fail.
+TEST_F(StateCheckersTest, do_not_schedule_merges_when_included_node_is_unavailable_in_pending_state) {
+ runAndVerify<SynchronizeAndMoveStateChecker>(
+ CheckerParams()
+ .expect("NO OPERATIONS GENERATED")
+ .redundancy(3)
+ .bucketInfo("1=1,2=1") // Node 0 pruned from DB since it's s:m in state 2
+ .clusterState("version:1 distributor:2 storage:3")
+ // We change the distributor set as well as the content node set. Just setting a node
+ // into maintenance does not trigger a pending state since it does not require any
+ // bucket info fetches from any of the nodes.
+ .pending_cluster_state("version:2 distributor:1 storage:3 .0.s:m"));
+}
+
TEST_F(StateCheckersTest, do_not_merge_inconsistently_split_buckets) {
// No merge generated if buckets are inconsistently split.
// This matches the case where a bucket has been split into 2 on one
diff --git a/storage/src/vespa/storage/distributor/statechecker.cpp b/storage/src/vespa/storage/distributor/statechecker.cpp
index 12dbd7f3c52..f7ef1122692 100644
--- a/storage/src/vespa/storage/distributor/statechecker.cpp
+++ b/storage/src/vespa/storage/distributor/statechecker.cpp
@@ -66,6 +66,7 @@ StateChecker::Context::Context(const DistributorComponent& c,
: bucket(bucket_),
siblingBucket(c.getSibling(bucket.getBucketId())),
systemState(distributorBucketSpace.getClusterState()),
+ pending_cluster_state(c.getDistributor().pendingClusterStateOrNull(bucket_.getBucketSpace())),
distributorConfig(c.getDistributor().getConfig()),
distribution(distributorBucketSpace.getDistribution()),
gcTimeCalculator(c.getDistributor().getBucketIdHasher(),
@@ -75,12 +76,11 @@ StateChecker::Context::Context(const DistributorComponent& c,
db(distributorBucketSpace.getBucketDatabase()),
stats(statsTracker)
{
- idealState =
- distribution.getIdealStorageNodes(systemState, bucket.getBucketId());
+ idealState = distribution.getIdealStorageNodes(systemState, bucket.getBucketId());
unorderedIdealState.insert(idealState.begin(), idealState.end());
}
-StateChecker::Context::~Context() {}
+StateChecker::Context::~Context() = default;
std::string
StateChecker::Context::toString() const
@@ -99,12 +99,7 @@ StateChecker::Context::toString() const
return ss.str();
}
-StateChecker::StateChecker()
-{
-}
-
-StateChecker::~StateChecker()
-{
-}
+StateChecker::StateChecker() = default;
+StateChecker::~StateChecker() = default;
}
diff --git a/storage/src/vespa/storage/distributor/statechecker.h b/storage/src/vespa/storage/distributor/statechecker.h
index 9c0fe2ef53c..5b9ce065f99 100644
--- a/storage/src/vespa/storage/distributor/statechecker.h
+++ b/storage/src/vespa/storage/distributor/statechecker.h
@@ -64,6 +64,7 @@ public:
// Common
const lib::ClusterState& systemState;
+ const lib::ClusterState* pending_cluster_state; // nullptr if no state is pending.
const DistributorConfiguration& distributorConfig;
const lib::Distribution& distribution;
diff --git a/storage/src/vespa/storage/distributor/statecheckers.cpp b/storage/src/vespa/storage/distributor/statecheckers.cpp
index 0a0abb8d417..dab2025bfbb 100644
--- a/storage/src/vespa/storage/distributor/statecheckers.cpp
+++ b/storage/src/vespa/storage/distributor/statecheckers.cpp
@@ -584,18 +584,29 @@ SplitInconsistentStateChecker::check(StateChecker::Context& c)
}
namespace {
+
bool containsMaintenanceNode(const std::vector<uint16_t>& ideal,
const StateChecker::Context& c)
{
- for (uint32_t i = 0; i < ideal.size(); i++) {
- if (c.systemState.getNodeState(lib::Node(lib::NodeType::STORAGE,
- ideal[i])).getState()
- == lib::State::MAINTENANCE)
- {
+ for (uint16_t n : ideal) {
+ lib::Node node(lib::NodeType::STORAGE, n);
+ if (c.systemState.getNodeState(node).getState() == lib::State::MAINTENANCE) {
return true;
}
}
+ return false;
+}
+bool ideal_node_is_unavailable_in_pending_state(const StateChecker::Context& c) {
+ if (!c.pending_cluster_state) {
+ return false;
+ }
+ for (uint16_t n : c.idealState) {
+ lib::Node node(lib::NodeType::STORAGE, n);
+ if (!c.pending_cluster_state->getNodeState(node).getState().oneOf("uir")){
+ return true;
+ }
+ }
return false;
}
@@ -715,7 +726,7 @@ private:
uint8_t _priority;
};
-MergeNodes::~MergeNodes() {}
+MergeNodes::~MergeNodes() = default;
bool
presentInIdealState(const StateChecker::Context& c, uint16_t node)
@@ -830,6 +841,9 @@ SynchronizeAndMoveStateChecker::check(StateChecker::Context& c)
if (containsMaintenanceNode(c.idealState, c)) {
return Result::noMaintenanceNeeded();
}
+ if (ideal_node_is_unavailable_in_pending_state(c)) {
+ return Result::noMaintenanceNeeded();
+ }
if (allCopiesAreInvalid(c)) {
return Result::noMaintenanceNeeded();
}