summaryrefslogtreecommitdiffstats
path: root/storage/src
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2019-09-19 13:37:40 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2019-09-19 13:44:02 +0000
commitcf2056375dc2b12e2833efd002019f36fcfb035d (patch)
tree298f1960c252ff2e4bc5b5e1d1e6f26552d726b6 /storage/src
parent4746c714d1a59e69a75bc328cb52b540445fa04f (diff)
Inhibit merges when ideal node is unavailable in pending state
Upon entering a cluster state transition edge the distributor will prune all replicas from its DB that are on nodes that are unavailable in the _pending_ state. As long as this state is pending, the _current_ state will include these nodes as available. But since replicas for the unavailable node(s) have been pruned away, started merges that involve these nodes as part of their chain are doomed to fail. We therefore inhibit such merges from being started in the first place.
Diffstat (limited to 'storage/src')
-rw-r--r--storage/src/tests/distributor/statecheckerstest.cpp56
-rw-r--r--storage/src/vespa/storage/distributor/statechecker.cpp15
-rw-r--r--storage/src/vespa/storage/distributor/statechecker.h1
-rw-r--r--storage/src/vespa/storage/distributor/statecheckers.cpp26
4 files changed, 64 insertions, 34 deletions
diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp
index 7282d2e7d2a..0c1ec1e77de 100644
--- a/storage/src/tests/distributor/statecheckerstest.cpp
+++ b/storage/src/tests/distributor/statecheckerstest.cpp
@@ -158,6 +158,7 @@ struct StateCheckersTest : Test, DistributorTestUtil {
struct CheckerParams {
std::string _bucketInfo;
std::string _clusterState {"distributor:1 storage:2"};
+ std::string _pending_cluster_state;
std::string _expect;
static const PendingMessage NO_OP_BLOCKER;
const PendingMessage* _blockerMessage {&NO_OP_BLOCKER};
@@ -182,6 +183,10 @@ struct StateCheckersTest : Test, DistributorTestUtil {
_clusterState = state;
return *this;
}
+ CheckerParams& pending_cluster_state(const std::string& state) {
+ _pending_cluster_state = state;
+ return *this;
+ }
CheckerParams& blockerMessage(const PendingMessage& blocker) {
_blockerMessage = &blocker;
return *this;
@@ -208,6 +213,11 @@ struct StateCheckersTest : Test, DistributorTestUtil {
addNodesToBucketDB(bid, params._bucketInfo);
setRedundancy(params._redundancy);
enableDistributorClusterState(params._clusterState);
+ if (!params._pending_cluster_state.empty()) {
+ auto cmd = std::make_shared<api::SetSystemStateCommand>(lib::ClusterState(params._pending_cluster_state));
+ _distributor->onDown(cmd);
+ tick(); // Trigger command processing and pending state setup.
+ }
NodeMaintenanceStatsTracker statsTracker;
StateChecker::Context c(
getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid));
@@ -640,10 +650,8 @@ TEST_F(StateCheckersTest, synchronize_and_move) {
runAndVerify<SynchronizeAndMoveStateChecker>(
CheckerParams().expect(
"[Synchronizing buckets with different checksums "
- "node(idx=0,crc=0x1,docs=1/1,bytes=1/1,trusted=false,"
- "active=false,ready=false), "
- "node(idx=1,crc=0x2,docs=2/2,bytes=2/2,trusted=false,"
- "active=false,ready=false)] "
+ "node(idx=0,crc=0x1,docs=1/1,bytes=1/1,trusted=false,active=false,ready=false), "
+ "node(idx=1,crc=0x2,docs=2/2,bytes=2/2,trusted=false,active=false,ready=false)] "
"(scheduling pri MEDIUM)")
.bucketInfo("0=1,1=2")
.includeSchedulingPriority(true));
@@ -698,12 +706,9 @@ TEST_F(StateCheckersTest, synchronize_and_move) {
runAndVerify<SynchronizeAndMoveStateChecker>(
CheckerParams()
.expect("[Synchronizing buckets with different checksums "
- "node(idx=0,crc=0x3,docs=3/3,bytes=3/3,trusted=false,"
- "active=false,ready=false), "
- "node(idx=1,crc=0x3,docs=3/3,bytes=3/3,trusted=false,"
- "active=false,ready=false), "
- "node(idx=2,crc=0x0,docs=0/0,bytes=0/0,trusted=false,"
- "active=false,ready=false)]")
+ "node(idx=0,crc=0x3,docs=3/3,bytes=3/3,trusted=false,active=false,ready=false), "
+ "node(idx=1,crc=0x3,docs=3/3,bytes=3/3,trusted=false,active=false,ready=false), "
+ "node(idx=2,crc=0x0,docs=0/0,bytes=0/0,trusted=false,active=false,ready=false)]")
.bucketInfo("0=3,1=3,2=0")
.clusterState("distributor:1 storage:3"));
@@ -712,14 +717,10 @@ TEST_F(StateCheckersTest, synchronize_and_move) {
runAndVerify<SynchronizeAndMoveStateChecker>(
CheckerParams()
.expect("[Synchronizing buckets with different checksums "
- "node(idx=0,crc=0x2,docs=3/3,bytes=4/4,trusted=false,"
- "active=false,ready=false), "
- "node(idx=1,crc=0x1,docs=2/2,bytes=3/3,trusted=true,"
- "active=false,ready=false), "
- "node(idx=2,crc=0x1,docs=2/2,bytes=3/3,trusted=true,"
- "active=false,ready=false), "
- "node(idx=3,crc=0x1,docs=2/2,bytes=3/3,trusted=true,"
- "active=false,ready=false)] "
+ "node(idx=0,crc=0x2,docs=3/3,bytes=4/4,trusted=false,active=false,ready=false), "
+ "node(idx=1,crc=0x1,docs=2/2,bytes=3/3,trusted=true,active=false,ready=false), "
+ "node(idx=2,crc=0x1,docs=2/2,bytes=3/3,trusted=true,active=false,ready=false), "
+ "node(idx=3,crc=0x1,docs=2/2,bytes=3/3,trusted=true,active=false,ready=false)] "
"(pri 120) (scheduling pri MEDIUM)")
.bucketInfo("0=2/3/4,1=1/2/3/t,2=1/2/3/t,3=1/2/3/t")
.clusterState("distributor:1 storage:5")
@@ -750,6 +751,25 @@ TEST_F(StateCheckersTest, synchronize_and_move) {
.clusterState("distributor:1 storage:4"));
}
+// Upon entering a cluster state transition edge the distributor will
+// prune all replicas from its DB that are on nodes that are unavailable
+// in the _pending_ state. As long as this state is pending, the _current_
+// state will include these nodes as available. But since replicas for
+// the unavailable node(s) have been pruned away, started merges that
+// involve these nodes as part of their chain are doomed to fail.
+TEST_F(StateCheckersTest, do_not_schedule_merges_when_included_node_is_unavailable_in_pending_state) {
+ runAndVerify<SynchronizeAndMoveStateChecker>(
+ CheckerParams()
+ .expect("NO OPERATIONS GENERATED")
+ .redundancy(3)
+ .bucketInfo("1=1,2=1") // Node 0 pruned from DB since it's s:m in state 2
+ .clusterState("version:1 distributor:2 storage:3")
+ // We change the distributor set as well as the content node set. Just setting a node
+ // into maintenance does not trigger a pending state since it does not require any
+ // bucket info fetches from any of the nodes.
+ .pending_cluster_state("version:2 distributor:1 storage:3 .0.s:m"));
+}
+
TEST_F(StateCheckersTest, do_not_merge_inconsistently_split_buckets) {
// No merge generated if buckets are inconsistently split.
// This matches the case where a bucket has been split into 2 on one
diff --git a/storage/src/vespa/storage/distributor/statechecker.cpp b/storage/src/vespa/storage/distributor/statechecker.cpp
index 12dbd7f3c52..f7ef1122692 100644
--- a/storage/src/vespa/storage/distributor/statechecker.cpp
+++ b/storage/src/vespa/storage/distributor/statechecker.cpp
@@ -66,6 +66,7 @@ StateChecker::Context::Context(const DistributorComponent& c,
: bucket(bucket_),
siblingBucket(c.getSibling(bucket.getBucketId())),
systemState(distributorBucketSpace.getClusterState()),
+ pending_cluster_state(c.getDistributor().pendingClusterStateOrNull(bucket_.getBucketSpace())),
distributorConfig(c.getDistributor().getConfig()),
distribution(distributorBucketSpace.getDistribution()),
gcTimeCalculator(c.getDistributor().getBucketIdHasher(),
@@ -75,12 +76,11 @@ StateChecker::Context::Context(const DistributorComponent& c,
db(distributorBucketSpace.getBucketDatabase()),
stats(statsTracker)
{
- idealState =
- distribution.getIdealStorageNodes(systemState, bucket.getBucketId());
+ idealState = distribution.getIdealStorageNodes(systemState, bucket.getBucketId());
unorderedIdealState.insert(idealState.begin(), idealState.end());
}
-StateChecker::Context::~Context() {}
+StateChecker::Context::~Context() = default;
std::string
StateChecker::Context::toString() const
@@ -99,12 +99,7 @@ StateChecker::Context::toString() const
return ss.str();
}
-StateChecker::StateChecker()
-{
-}
-
-StateChecker::~StateChecker()
-{
-}
+StateChecker::StateChecker() = default;
+StateChecker::~StateChecker() = default;
}
diff --git a/storage/src/vespa/storage/distributor/statechecker.h b/storage/src/vespa/storage/distributor/statechecker.h
index 9c0fe2ef53c..5b9ce065f99 100644
--- a/storage/src/vespa/storage/distributor/statechecker.h
+++ b/storage/src/vespa/storage/distributor/statechecker.h
@@ -64,6 +64,7 @@ public:
// Common
const lib::ClusterState& systemState;
+ const lib::ClusterState* pending_cluster_state; // nullptr if no state is pending.
const DistributorConfiguration& distributorConfig;
const lib::Distribution& distribution;
diff --git a/storage/src/vespa/storage/distributor/statecheckers.cpp b/storage/src/vespa/storage/distributor/statecheckers.cpp
index 0a0abb8d417..dab2025bfbb 100644
--- a/storage/src/vespa/storage/distributor/statecheckers.cpp
+++ b/storage/src/vespa/storage/distributor/statecheckers.cpp
@@ -584,18 +584,29 @@ SplitInconsistentStateChecker::check(StateChecker::Context& c)
}
namespace {
+
bool containsMaintenanceNode(const std::vector<uint16_t>& ideal,
const StateChecker::Context& c)
{
- for (uint32_t i = 0; i < ideal.size(); i++) {
- if (c.systemState.getNodeState(lib::Node(lib::NodeType::STORAGE,
- ideal[i])).getState()
- == lib::State::MAINTENANCE)
- {
+ for (uint16_t n : ideal) {
+ lib::Node node(lib::NodeType::STORAGE, n);
+ if (c.systemState.getNodeState(node).getState() == lib::State::MAINTENANCE) {
return true;
}
}
+ return false;
+}
+bool ideal_node_is_unavailable_in_pending_state(const StateChecker::Context& c) {
+ if (!c.pending_cluster_state) {
+ return false;
+ }
+ for (uint16_t n : c.idealState) {
+ lib::Node node(lib::NodeType::STORAGE, n);
+ if (!c.pending_cluster_state->getNodeState(node).getState().oneOf("uir")){
+ return true;
+ }
+ }
return false;
}
@@ -715,7 +726,7 @@ private:
uint8_t _priority;
};
-MergeNodes::~MergeNodes() {}
+MergeNodes::~MergeNodes() = default;
bool
presentInIdealState(const StateChecker::Context& c, uint16_t node)
@@ -830,6 +841,9 @@ SynchronizeAndMoveStateChecker::check(StateChecker::Context& c)
if (containsMaintenanceNode(c.idealState, c)) {
return Result::noMaintenanceNeeded();
}
+ if (ideal_node_is_unavailable_in_pending_state(c)) {
+ return Result::noMaintenanceNeeded();
+ }
if (allCopiesAreInvalid(c)) {
return Result::noMaintenanceNeeded();
}