aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2016-12-09 17:01:26 +0100
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2016-12-09 17:37:57 +0100
commit8d91f9f5aa262d230eab778c1228f2d199eaea0e (patch)
tree8fc7fa5dccdca43b013081ab792e91b193f1736d
parentf0b529841383c7fc24fbb6ed84ab0c1b57a88a9d (diff)
Use latest candidate cluster state when comparing against reported node states
Using just the versioned cluster state instead can cause the code to erroneously believe that it is seeing repeated reported state changes for the first time. This happens when the diffs in the reported node states are not in and by themselves enough to trigger a new cluster state version containing the changes. This can in turn spam the logs and event buffers until a new cluster state has been versioned.
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java6
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java41
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateVersionTrackerTest.java6
3 files changed, 52 insertions, 1 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index b21cae4ed71..2a9c1e0e9b3 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -299,10 +299,14 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
}
}
+ private ClusterState latestCandidateClusterState() {
+ return stateVersionTracker.getLatestCandidateState().getClusterState();
+ }
+
@Override
public void handleNewNodeState(NodeInfo node, NodeState newState) {
verifyInControllerThread();
- stateChangeHandler.handleNewReportedNodeState(stateVersionTracker.getVersionedClusterState(), node, newState, this);
+ stateChangeHandler.handleNewReportedNodeState(latestCandidateClusterState(), node, newState, this);
}
@Override
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
index c31f80d9b53..20f37768970 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
@@ -1198,4 +1198,45 @@ public class StateChangeTest extends FleetControllerTest {
"Event: storage.2: 5000 milliseconds without contact. Marking node down.\n");
}
+ @Test
+ public void do_not_emit_multiple_events_when_node_state_does_not_match_versioned_state() throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ initialize(options);
+
+ ctrl.tick();
+ communicator.setNodeState(
+ new Node(NodeType.STORAGE, 2),
+ new NodeState(NodeType.STORAGE, State.INITIALIZING)
+ .setInitProgress(0.1).setMinUsedBits(16), "");
+
+ ctrl.tick();
+
+ // Node 2 is in Init mode with 16 min used bits. Emulate continuous init progress reports
+ // from the content node which also contains an updated min used bits. Since init progress
+ // and min used bits changes do not by themselves trigger a new cluster state version,
+ // deciding whether to emit new events by comparing the reported node state versus the
+ // versioned cluster state will cause the code to believe there's been a change every
+ // time a new message is received. This will cause a lot of "Altered min distribution
+ // bit count" events to be emitted, one per init progress update from the content node.
+ // There may be thousands of such updates from each node during their init sequence, so
+ // this gets old really quickly.
+ for (int i = 1; i < 10; ++i) {
+ communicator.setNodeState(
+ new Node(NodeType.STORAGE, 2),
+ new NodeState(NodeType.STORAGE, State.INITIALIZING)
+ .setInitProgress((i * 0.1) + 0.1).setMinUsedBits(17), "");
+ timer.advanceTime(1000);
+ ctrl.tick();
+ }
+
+ // We should only get "Altered min distribution bit count" event once, not 9 times.
+ verifyNodeEvents(new Node(NodeType.STORAGE, 2),
+ "Event: storage.2: Now reporting state U\n" +
+ "Event: storage.2: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" +
+ "Event: storage.2: Now reporting state I, i 0.100 (read)\n" +
+ "Event: storage.2: Altered node state in cluster state from 'U' to 'I, i 0.100 (read)'\n" +
+ "Event: storage.2: Altered min distribution bit count from 16 to 17\n");
+
+ }
+
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateVersionTrackerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateVersionTrackerTest.java
index 72f8c9fb8b7..db66c4c9046 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateVersionTrackerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateVersionTrackerTest.java
@@ -133,6 +133,12 @@ public class StateVersionTrackerTest {
}
@Test
+ public void init_progress_change_not_counted_as_changed_state() {
+ assertFalse(stateChangedBetween("distributor:2 storage:2 .0.s:i .0.i:0.5",
+ "distributor:2 storage:2 .0.s:i .0.i:0.6"));
+ }
+
+ @Test
public void lowest_observed_distribution_bit_is_initially_16() {
final StateVersionTracker versionTracker = createWithMockedMetrics();
assertThat(versionTracker.getLowestObservedDistributionBits(), equalTo(16));