summaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2019-10-30 15:05:05 +0100
committerTor Brede Vekterli <vekterli@verizonmedia.com>2019-10-30 15:05:05 +0100
commit056af10e314ef6fc924402238c9719cb473b7f18 (patch)
treeaa7bc119d1b8e07d23a78bfe21167d4c535a42f3 /clustercontroller-core
parent91f027082d74a11016375782a9e4c2effd02f8a5 (diff)
Move grace period event edge from timer to event diff calculator
Ensures that event is only emitted when we're actually publishing a state containing the state transition. Emitting events in the timer code is fragile when it does not modify any state, risking emitting the same event an unbounded amount of times if the condition keeps holding for each timer cycle.
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGenerator.java21
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java34
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java3
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java4
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateReason.java2
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java4
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java6
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java39
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java15
9 files changed, 103 insertions, 25 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGenerator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGenerator.java
index 26fdddb01e5..74f3cc276a5 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGenerator.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGenerator.java
@@ -132,7 +132,7 @@ public class ClusterStateGenerator {
final Map<Node, NodeStateReason> nodeStateReasons = new HashMap<>();
for (final NodeInfo nodeInfo : cluster.getNodeInfo()) {
- final NodeState nodeState = computeEffectiveNodeState(nodeInfo, params);
+ final NodeState nodeState = computeEffectiveNodeState(nodeInfo, params, nodeStateReasons);
workingState.setNodeState(nodeInfo.getNode(), nodeState);
}
@@ -159,7 +159,10 @@ public class ClusterStateGenerator {
baseline.setDescription(wanted.getDescription());
}
- private static NodeState computeEffectiveNodeState(final NodeInfo nodeInfo, final Params params) {
+ private static NodeState computeEffectiveNodeState(final NodeInfo nodeInfo,
+ final Params params,
+ Map<Node, NodeStateReason> nodeStateReasons)
+ {
final NodeState reported = nodeInfo.getReportedState();
final NodeState wanted = nodeInfo.getWantedState();
final NodeState baseline = reported.clone();
@@ -171,7 +174,7 @@ public class ClusterStateGenerator {
baseline.setStartTimestamp(0);
}
if (nodeInfo.isStorage()) {
- applyStorageSpecificStateTransforms(nodeInfo, params, reported, wanted, baseline);
+ applyStorageSpecificStateTransforms(nodeInfo, params, reported, wanted, baseline, nodeStateReasons);
}
if (baseline.above(wanted)) {
applyWantedStateToBaselineState(baseline, wanted);
@@ -181,7 +184,8 @@ public class ClusterStateGenerator {
}
private static void applyStorageSpecificStateTransforms(NodeInfo nodeInfo, Params params, NodeState reported,
- NodeState wanted, NodeState baseline)
+ NodeState wanted, NodeState baseline,
+ Map<Node, NodeStateReason> nodeStateReasons)
{
if (reported.getState() == State.INITIALIZING) {
if (timedOutWithoutNewInitProgress(reported, nodeInfo, params)
@@ -195,7 +199,7 @@ public class ClusterStateGenerator {
}
}
// TODO ensure that maintenance cannot override Down for any other cases
- if (withinTemporalMaintenancePeriod(nodeInfo, baseline, params) && wanted.getState() != State.DOWN) {
+ if (withinTemporalMaintenancePeriod(nodeInfo, baseline, nodeStateReasons, params) && wanted.getState() != State.DOWN) {
baseline.setState(State.MAINTENANCE);
}
}
@@ -244,13 +248,18 @@ public class ClusterStateGenerator {
*/
private static boolean withinTemporalMaintenancePeriod(final NodeInfo nodeInfo,
final NodeState baseline,
+ Map<Node, NodeStateReason> nodeStateReasons,
final Params params)
{
final Integer transitionTime = params.transitionTimes.get(nodeInfo.getNode().getType());
if (transitionTime == 0 || !baseline.getState().oneOf("sd")) {
return false;
}
- return nodeInfo.getTransitionTime() + transitionTime > params.currentTimeInMillis;
+ if (nodeInfo.getTransitionTime() + transitionTime > params.currentTimeInMillis) {
+ return true;
+ }
+ nodeStateReasons.put(nodeInfo.getNode(), NodeStateReason.NODE_NOT_BACK_UP_WITHIN_GRACE_PERIOD);
+ return false;
}
private static void takeDownGroupsWithTooLowAvailability(final ClusterState workingState,
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java
index cadc065dd51..2f065a9ba75 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java
@@ -31,6 +31,7 @@ public class EventDiffCalculator {
ClusterStateBundle fromState;
ClusterStateBundle toState;
long currentTime;
+ long maxMaintenanceGracePeriodTimeMs;
public Params cluster(ContentCluster cluster) {
this.cluster = cluster;
@@ -48,6 +49,10 @@ public class EventDiffCalculator {
this.currentTime = time;
return this;
}
+ public Params maxMaintenanceGracePeriodTimeMs(long timeMs) {
+ this.maxMaintenanceGracePeriodTimeMs = timeMs;
+ return this;
+ }
}
public static Params params() { return new Params(); }
@@ -58,17 +63,20 @@ public class EventDiffCalculator {
final AnnotatedClusterState fromState;
final AnnotatedClusterState toState;
final long currentTime;
+ final long maxMaintenanceGracePeriodTimeMs;
PerStateParams(ContentCluster cluster,
Optional<String> bucketSpace,
AnnotatedClusterState fromState,
AnnotatedClusterState toState,
- long currentTime) {
+ long currentTime,
+ long maxMaintenanceGracePeriodTimeMs) {
this.cluster = cluster;
this.bucketSpace = bucketSpace;
this.fromState = fromState;
this.toState = toState;
this.currentTime = currentTime;
+ this.maxMaintenanceGracePeriodTimeMs = maxMaintenanceGracePeriodTimeMs;
}
}
@@ -86,7 +94,8 @@ public class EventDiffCalculator {
Optional.empty(),
params.fromState.getBaselineAnnotatedState(),
params.toState.getBaselineAnnotatedState(),
- params.currentTime);
+ params.currentTime,
+ params.maxMaintenanceGracePeriodTimeMs);
}
private static void emitWholeClusterDiffEvent(final PerStateParams params, final List<Event> events) {
@@ -137,11 +146,10 @@ public class EventDiffCalculator {
final NodeState nodeTo = toState.getNodeState(n);
if (!nodeTo.equals(nodeFrom)) {
final NodeInfo info = cluster.getNodeInfo(n);
- events.add(createNodeEvent(info, String.format("Altered node state in cluster state from '%s' to '%s'",
- nodeFrom.toString(true), nodeTo.toString(true)), params));
-
NodeStateReason prevReason = params.fromState.getNodeStateReasons().get(n);
NodeStateReason currReason = params.toState.getNodeStateReasons().get(n);
+ // Add specific reason events for node edge _before_ the actual transition event itself.
+ // This makes the timeline of events more obvious.
if (isGroupDownEdge(prevReason, currReason)) {
events.add(createNodeEvent(info, "Group node availability is below configured threshold", params));
} else if (isGroupUpEdge(prevReason, currReason)) {
@@ -150,7 +158,12 @@ public class EventDiffCalculator {
events.add(createNodeEvent(info, "Node may have merges pending", params));
} else if (isMayHaveMergesPendingDownEdge(prevReason, currReason)) {
events.add(createNodeEvent(info, "Node no longer has merges pending", params));
+ } else if (isMaintenanceGracePeriodExceededDownEdge(prevReason, currReason, nodeFrom, nodeTo)) {
+ events.add(createNodeEvent(info, String.format("Exceeded implicit maintenance mode grace period of " +
+ "%d milliseconds. Marking node down.", params.maxMaintenanceGracePeriodTimeMs), params));
}
+ events.add(createNodeEvent(info, String.format("Altered node state in cluster state from '%s' to '%s'",
+ nodeFrom.toString(true), nodeTo.toString(true)), params));
}
}
@@ -178,6 +191,14 @@ public class EventDiffCalculator {
return prevReason == NodeStateReason.MAY_HAVE_MERGES_PENDING && currReason != NodeStateReason.MAY_HAVE_MERGES_PENDING;
}
+ private static boolean isMaintenanceGracePeriodExceededDownEdge(NodeStateReason prevReason, NodeStateReason currReason,
+ NodeState prevState, NodeState currState) {
+ return (prevReason != NodeStateReason.NODE_NOT_BACK_UP_WITHIN_GRACE_PERIOD &&
+ currReason == NodeStateReason.NODE_NOT_BACK_UP_WITHIN_GRACE_PERIOD &&
+ prevState.getState() == State.MAINTENANCE &&
+ currState.getState() == State.DOWN);
+ }
+
private static boolean clusterHasTransitionedToUpState(ClusterState prevState, ClusterState currentState) {
return prevState.getClusterState() != State.UP && currentState.getClusterState() == State.UP;
}
@@ -207,7 +228,8 @@ public class EventDiffCalculator {
Optional.of(bucketSpace),
fromDerivedState,
toDerivedState,
- params.currentTime);
+ params.currentTime,
+ params.maxMaintenanceGracePeriodTimeMs);
}
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 364184331a8..b4965fbff7b 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -928,7 +928,8 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
.cluster(cluster)
.fromState(fromState)
.toState(toState)
- .currentTimeMs(timeNowMs));
+ .currentTimeMs(timeNowMs)
+ .maxMaintenanceGracePeriodTimeMs(options.storageNodeMaxTransitionTimeMs()));
for (Event event : deltaEvents) {
eventLog.add(event, isMaster);
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
index f49b626d347..5e9e91e1cb6 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -157,6 +157,10 @@ public class FleetControllerOptions implements Cloneable {
this.maxDeferredTaskVersionWaitTime = maxDeferredTaskVersionWaitTime;
}
+ public long storageNodeMaxTransitionTimeMs() {
+ return maxTransitionTime.getOrDefault(NodeType.STORAGE, 10_000);
+ }
+
public FleetControllerOptions clone() {
try {
// TODO: This should deep clone
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateReason.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateReason.java
index 7a6be664ec8..3f550724cef 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateReason.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateReason.java
@@ -6,8 +6,8 @@ public enum NodeStateReason {
// FIXME some of these reasons may be unnecessary as they are reported implicitly by reported/wanted state changes
NODE_TOO_UNSTABLE,
WITHIN_MAINTENANCE_GRACE_PERIOD,
+ NODE_NOT_BACK_UP_WITHIN_GRACE_PERIOD,
FORCED_INTO_MAINTENANCE,
GROUP_IS_DOWN,
MAY_HAVE_MERGES_PENDING
-
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java
index 3f38ea6c018..3c19f70d1e2 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java
@@ -305,10 +305,6 @@ public class StateChangeHandler {
if (nodeStillUnavailableAfterTransitionTimeExceeded(
currentTime, node, currentStateInSystem, lastReportedState))
{
- eventLog.add(NodeEvent.forBaseline(node, String.format(
- "%d milliseconds without contact. Marking node down.",
- currentTime - node.getTransitionTime()),
- NodeEvent.Type.CURRENT, currentTime), isMaster);
triggeredAnyTimers = true;
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java
index 2f91e569b89..08329c874b5 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java
@@ -12,7 +12,9 @@ import java.util.Optional;
import static com.yahoo.vespa.clustercontroller.core.matchers.HasStateReasonForNode.hasStateReasonForNode;
import static com.yahoo.vespa.clustercontroller.core.ClusterFixture.storageNode;
+import static com.yahoo.vespa.clustercontroller.core.ClusterFixture.distributorNode;
+import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.core.IsEqual.equalTo;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;
@@ -340,6 +342,8 @@ public class ClusterStateGeneratorTest {
final AnnotatedClusterState state = ClusterStateGenerator.generatedStateFrom(params);
assertThat(state.toString(), equalTo("distributor:5 storage:5 .1.s:d"));
+ assertThat(state.getNodeStateReasons(),
+ hasStateReasonForNode(storageNode(1), NodeStateReason.NODE_NOT_BACK_UP_WITHIN_GRACE_PERIOD));
}
@Test
@@ -355,6 +359,8 @@ public class ClusterStateGeneratorTest {
final AnnotatedClusterState state = ClusterStateGenerator.generatedStateFrom(params);
assertThat(state.toString(), equalTo("distributor:5 .2.s:d storage:5"));
+ assertThat(state.getNodeStateReasons(),
+ not(hasStateReasonForNode(distributorNode(1), NodeStateReason.NODE_NOT_BACK_UP_WITHIN_GRACE_PERIOD)));
}
@Test
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java
index e7c4bbfcaa8..ab8d73be99d 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java
@@ -32,6 +32,7 @@ public class EventDiffCalculatorTest {
Map<String, AnnotatedClusterState.Builder> derivedBefore = new HashMap<>();
Map<String, AnnotatedClusterState.Builder> derivedAfter = new HashMap<>();
long currentTimeMs = 0;
+ long maxMaintenanceGracePeriodTimeMs = 10_000;
EventFixture(int nodeCount) {
this.clusterFixture = ClusterFixture.forFlatCluster(nodeCount);
@@ -65,6 +66,10 @@ public class EventDiffCalculatorTest {
this.currentTimeMs = timeMs;
return this;
}
+ EventFixture maxMaintenanceGracePeriodTimeMs(long timeMs) {
+ this.maxMaintenanceGracePeriodTimeMs = timeMs;
+ return this;
+ }
EventFixture derivedClusterStateBefore(String bucketSpace, String stateStr) {
getBuilder(derivedBefore, bucketSpace).clusterState(stateStr);
return this;
@@ -91,7 +96,8 @@ public class EventDiffCalculatorTest {
.cluster(clusterFixture.cluster())
.fromState(ClusterStateBundle.of(baselineBefore.build(), toDerivedStates(derivedBefore)))
.toState(ClusterStateBundle.of(baselineAfter.build(), toDerivedStates(derivedAfter)))
- .currentTimeMs(currentTimeMs));
+ .currentTimeMs(currentTimeMs)
+ .maxMaintenanceGracePeriodTimeMs(maxMaintenanceGracePeriodTimeMs));
}
private static Map<String, AnnotatedClusterState> toDerivedStates(Map<String, AnnotatedClusterState.Builder> derivedBuilders) {
@@ -407,4 +413,35 @@ public class EventDiffCalculatorTest {
nodeEventWithDescription("Altered node state in cluster state from 'U' to 'M'"))));
}
+ @Test
+ public void storage_node_passed_maintenance_grace_period_emits_event() {
+ final EventFixture fixture = EventFixture.createForNodes(3)
+ .clusterStateBefore("distributor:3 storage:3 .0.s:m")
+ .clusterStateAfter("distributor:3 storage:3 .0.s:d")
+ .maxMaintenanceGracePeriodTimeMs(123_456)
+ .storageNodeReasonAfter(0, NodeStateReason.NODE_NOT_BACK_UP_WITHIN_GRACE_PERIOD);
+
+ final List<Event> events = fixture.computeEventDiff();
+ // Down edge event + event explaining why the node went down
+ assertThat(events.size(), equalTo(2));
+ assertThat(events, hasItem(allOf(
+ eventForNode(storageNode(0)),
+ nodeEventWithDescription("Exceeded implicit maintenance mode grace period of 123456 milliseconds. Marking node down."),
+ nodeEventForBaseline())));
+ }
+
+ @Test
+ public void storage_node_maintenance_grace_period_event_only_emitted_on_maintenance_to_down_edge() {
+ final EventFixture fixture = EventFixture.createForNodes(3)
+ .clusterStateBefore("distributor:3 storage:3 .0.s:u")
+ .clusterStateAfter("distributor:3 storage:3 .0.s:d")
+ .maxMaintenanceGracePeriodTimeMs(123_456)
+ .storageNodeReasonAfter(0, NodeStateReason.NODE_NOT_BACK_UP_WITHIN_GRACE_PERIOD);
+ final List<Event> events = fixture.computeEventDiff();
+ assertThat(events.size(), equalTo(1));
+ assertThat(events, hasItem(allOf(
+ eventForNode(storageNode(0)),
+ nodeEventForBaseline())));
+ }
+
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
index 016cce9580e..2c8220c0dba 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
@@ -247,7 +247,7 @@ public class StateChangeTest extends FleetControllerTest {
"Event: storage.0: Failed to get node state: D: Closed at other end\n" +
"Event: storage.0: Stopped or possibly crashed after 1000 ms, which is before stable state time period. Premature crash count is now 1.\n" +
"Event: storage.0: Altered node state in cluster state from 'U' to 'M: Closed at other end'\n" +
- "Event: storage.0: 5001 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.0: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.\n" +
"Event: storage.0: Altered node state in cluster state from 'M: Closed at other end' to 'D: Closed at other end'\n" +
"Event: storage.0: Now reporting state U, t 12345679\n" +
"Event: storage.0: Altered node state in cluster state from 'D: Closed at other end' to 'U, t 12345679'\n");
@@ -326,7 +326,7 @@ public class StateChangeTest extends FleetControllerTest {
"Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" +
"Event: storage.0: Failed to get node state: D: controlled shutdown\n" +
"Event: storage.0: Altered node state in cluster state from 'U' to 'M: controlled shutdown'\n" +
- "Event: storage.0: 5001 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.0: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.\n" +
"Event: storage.0: Altered node state in cluster state from 'M: controlled shutdown' to 'D: controlled shutdown'\n" +
"Event: storage.0: Now reporting state U\n" +
"Event: storage.0: Altered node state in cluster state from 'D: controlled shutdown' to 'U'\n");
@@ -569,7 +569,7 @@ public class StateChangeTest extends FleetControllerTest {
"Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" +
"Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
"Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'\n" +
- "Event: storage.6: 100000 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.6: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.\n" +
"Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'D: Connection error: Closed at other end'\n" +
"Event: storage.6: Now reporting state I, i 0.00100 (ls)\n" +
"Event: storage.6: Now reporting state I, i 0.100 (read)\n" +
@@ -650,7 +650,7 @@ public class StateChangeTest extends FleetControllerTest {
"Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" +
"Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
"Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'\n" +
- "Event: storage.6: 1000000 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.6: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.\n" +
"Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'D: Connection error: Closed at other end'\n" +
"Event: storage.6: Now reporting state I, i 0.100 (read)\n" +
"Event: storage.6: Altered node state in cluster state from 'D: Connection error: Closed at other end' to 'I, i 0.100 (read)'\n" +
@@ -1213,8 +1213,11 @@ public class StateChangeTest extends FleetControllerTest {
"Event: storage.2: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" +
"Event: storage.2: Failed to get node state: D: foo\n" +
"Event: storage.2: Stopped or possibly crashed after 500 ms, which is before stable state time period. Premature crash count is now 1.\n" +
- "Event: storage.2: Altered node state in cluster state from 'U' to 'M: foo'\n" +
- "Event: storage.2: 5000 milliseconds without contact. Marking node down.\n");
+ "Event: storage.2: Altered node state in cluster state from 'U' to 'M: foo'\n");
+ // Note: even though max transition time has passed, events are now emitted only on cluster state
+ // publish edges. These are currently suppressed when the cluster state is down, as all cluster down
+ // states are considered similar to other cluster down states. This is not necessarily optimal, but
+ // if the cluster is down there are bigger problems than not having some debug events logged.
}
@Test