summaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2016-06-17 16:49:39 +0200
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2016-06-17 16:49:39 +0200
commit90c3b21d21d83f2cc4b148afe4d4e6278d1f8394 (patch)
treeda4dde815dbcf47eb4744091dab1b66aebc65ccc /clustercontroller-core
parent916e5a5b8a4574bb2d878c5b07c97b1678df81b3 (diff)
Don't reintroduce already observed timestamps in cluster state
Also address code review comments.
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java34
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java35
2 files changed, 60 insertions, 9 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java
index 3dd1216d5d0..2af9fe1f091 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java
@@ -336,6 +336,16 @@ public class SystemStateGenerator {
baseline.setState(wanted.getState());
}
}
+ // Don't reintroduce start timestamp to the node's state if it has already been
+ // observed by all distributors. This matches how handleNewReportedNodeState() sets timestamps.
+ // TODO make timestamp semantics clearer. Non-obvious what the two different timestamp stores imply.
+ // For posterity: reported.getStartTimestamp() is the start timestamp the node itself has stated.
+ // info.getStartTimestamp() is the timestamp written as having been observed by all distributors
+ // (which is done in handleAllDistributorsInSync()).
+ if (reported.getStartTimestamp() <= info.getStartTimestamp()) {
+ baseline.setStartTimestamp(0);
+ }
+
return baseline;
}
@@ -349,24 +359,32 @@ public class SystemStateGenerator {
final Node node = storageNode(i);
final NodeInfo info = cluster.getNodeInfo(node);
final NodeState currentState = candidateState.getNodeState(node);
- if (currentState.getState() == State.DOWN) {
- if (mayClearNodeDownState(info)) {
- candidateState.setNodeState(node, baselineNodeState(info));
- clearedNodes.add(i);
- }
+ if (mayClearCurrentNodeState(currentState, info)) {
+ candidateState.setNodeState(node, baselineNodeState(info));
+ clearedNodes.add(i);
}
}
return clearedNodes;
}
- private boolean mayClearNodeDownState(NodeInfo info) {
+ private boolean mayClearCurrentNodeState(NodeState currentState, NodeInfo info) {
+ if (currentState.getState() != State.DOWN) {
+ return false;
+ }
if (info == null) {
// Nothing known about node in cluster info; we definitely don't want it
// to be taken up at this point.
return false;
}
+ // Rationale: we can only enter this statement if the _current_ (generated) state
+ // of the node is Down. Aside from the group take-down logic, there should not exist
+ // any other edges in the cluster controller state transition logic where a node
+ // may be set Down while both its reported state, RPC connectivity and wanted state
+ // imply that a better state should already have been chosen. Consequently we allow
+ // the node to have its Down-state cleared.
return (info.getReportedState().getState() != State.DOWN
- && info.getWantedState().getState().oneOf("ur"));
+ && !info.isRpcAddressOutdated()
+ && !info.getWantedState().getState().oneOf("d"));
}
private ClusterStateView createNextVersionOfClusterStateView(ContentCluster cluster) {
@@ -736,7 +754,7 @@ public class SystemStateGenerator {
triggeredAnyTimers = true;
}
- // If node haven't increased its initializing progress within initprogresstime, mark it down.
+ // If node hasn't increased its initializing progress within initprogresstime, mark it down.
if (!currentStateInSystem.getState().equals(State.DOWN)
&& node.getWantedState().above(new NodeState(node.getNode().getType(), State.DOWN))
&& lastReportedState.getState().equals(State.INITIALIZING)
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java
index 93e34b1f772..be60fba234a 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java
@@ -7,6 +7,7 @@ import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener;
import org.junit.Test;
@@ -23,6 +24,7 @@ import static org.mockito.Matchers.any;
import static org.mockito.Matchers.argThat;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
public class GroupAutoTakedownTest {
@@ -308,7 +310,6 @@ public class GroupAutoTakedownTest {
ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
- final Node node = new Node(NodeType.STORAGE, 4);
final NodeState newState = new NodeState(NodeType.STORAGE, State.INITIALIZING);
newState.setInitProgress(0.5);
@@ -367,4 +368,36 @@ public class GroupAutoTakedownTest {
verboseStateAfterStorageTransition(fixture, 4, State.UP));
}
+ @Test
+ public void previously_cleared_start_timestamps_are_not_reintroduced_on_up_edge() throws Exception {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ final NodeState newState = new NodeState(NodeType.STORAGE, State.UP);
+ newState.setStartTimestamp(123456);
+
+ fixture.reportStorageNodeState(4, newState);
+
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+
+ assertEquals("version:1 distributor:6 storage:6 .4.t:123456", fixture.generatedClusterState());
+
+ DatabaseHandler handler = mock(DatabaseHandler.class);
+ DatabaseHandler.Context context = mock(DatabaseHandler.Context.class);
+ when(context.getCluster()).thenReturn(fixture.cluster);
+
+ fixture.generator.handleAllDistributorsInSync(handler, context);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+
+ // Timestamp should now be cleared from state
+ assertEquals("version:2 distributor:6 storage:6", fixture.generatedClusterState());
+
+ // Trigger a group down+up edge. Timestamp should _not_ be reintroduced since it was previously cleared.
+ assertEquals("version:3 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ assertEquals("version:4 distributor:6 storage:6",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+ }
+
}