summaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@yahoo-inc.com>2017-05-21 01:28:02 +0200
committerHåkon Hallingstad <hakon@yahoo-inc.com>2017-05-21 01:28:02 +0200
commit5a3e16eb58305a61d6accf0fd104159728017729 (patch)
tree63b81a3fc0072e78982d78ec7bcc44edd11386fd /clustercontroller-core
parent414a52d969971841abdb4b93e1e955e3266414ec (diff)
Verify version and reported state
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java21
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java66
2 files changed, 76 insertions, 11 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index 1f2897a1453..7f2d9ba9611 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -6,6 +6,7 @@ import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.core.hostinfo.Metrics;
import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
@@ -120,11 +121,17 @@ public class NodeStateChangeChecker {
}
private Result canSetStateDownPermanently(Node node, ClusterState clusterState) {
- NodeInfo nodeInfo = clusterInfo.getNodeInfo(node);
+ StorageNodeInfo nodeInfo = clusterInfo.getStorageNodeInfo(node.getIndex());
if (nodeInfo == null) {
return Result.createDisallowed("Unknown node " + node);
}
+ State reportedState = nodeInfo.getReportedState().getState();
+ if (reportedState != State.UP) {
+ return Result.createDisallowed("Reported state (" + reportedState
+ + ") is not UP, so no bucket data is available");
+ }
+
NodeState currentState = clusterState.getNodeState(node);
if (currentState.getState() != State.RETIRED) {
return Result.createDisallowed("Only retired nodes are allowed to be set to DOWN in safe mode - is "
@@ -136,8 +143,16 @@ public class NodeStateChangeChecker {
return thresholdCheckResult;
}
- Optional<Metrics.Value> bucketsMetric = clusterInfo.getStorageNodeInfo(node.getIndex())
- .getHostInfo().getMetrics().getValue(BUCKETS_METRIC_NAME);
+ HostInfo hostInfo = nodeInfo.getHostInfo();
+ Integer hostInfoNodeVersion = hostInfo.getClusterStateVersionOrNull();
+ int clusterControllerVersion = clusterState.getVersion();
+ if (hostInfoNodeVersion == null || hostInfoNodeVersion != clusterControllerVersion) {
+ return Result.createDisallowed("Cluster controller at version " + clusterControllerVersion
+ + " got info for storage node " + node.getIndex() + " at a different version "
+ + hostInfoNodeVersion);
+ }
+
+ Optional<Metrics.Value> bucketsMetric = hostInfo.getMetrics().getValue(BUCKETS_METRIC_NAME);
if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) {
return Result.createDisallowed("Missing last value of the " + BUCKETS_METRIC_NAME +
" metric for storage node " + node.getIndex());
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
index 37c241e7bc6..9dd1d305405 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -20,9 +20,13 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
-import static org.hamcrest.core.StringContains.containsString;
import static org.hamcrest.core.Is.is;
-import static org.junit.Assert.*;
+import static org.hamcrest.core.StringContains.containsString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -413,8 +417,9 @@ public class NodeStateChangeCheckerTest {
ContentCluster cluster = createCluster(createNodes(4));
NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
- cluster.clusterInfo().getStorageNodeInfo(nodeStorage.getIndex())
- .setHostInfo(createHostInfoWithMetrics(currentClusterStateVersion, 1));
+ StorageNodeInfo nodeInfo = cluster.clusterInfo().getStorageNodeInfo(nodeStorage.getIndex());
+ nodeInfo.setReportedState(new NodeState(NodeType.STORAGE, State.UP), 0);
+ nodeInfo.setHostInfo(createHostInfoWithMetrics(currentClusterStateVersion, 1));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
@@ -433,8 +438,9 @@ public class NodeStateChangeCheckerTest {
"version:%d distributor:4 storage:4 .%d.s:r",
currentClusterStateVersion, nodeStorage.getIndex()));
- cluster.clusterInfo().getStorageNodeInfo(nodeStorage.getIndex())
- .setHostInfo(createHostInfoWithMetrics(currentClusterStateVersion, 1));
+ StorageNodeInfo nodeInfo = cluster.clusterInfo().getStorageNodeInfo(nodeStorage.getIndex());
+ nodeInfo.setReportedState(new NodeState(NodeType.STORAGE, State.UP), 0);
+ nodeInfo.setHostInfo(createHostInfoWithMetrics(currentClusterStateVersion, 1));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
nodeStorage, stateWithRetiredNode, SetUnitStateRequest.Condition.SAFE,
@@ -445,6 +451,49 @@ public class NodeStateChangeCheckerTest {
}
@Test
+ public void testDownDisallowedByReportedState() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+
+ ClusterState stateWithRetiredNode = clusterState(String.format(
+ "version:%d distributor:4 storage:4 .%d.s:r",
+ currentClusterStateVersion, nodeStorage.getIndex()));
+
+ StorageNodeInfo nodeInfo = cluster.clusterInfo().getStorageNodeInfo(nodeStorage.getIndex());
+ nodeInfo.setReportedState(new NodeState(NodeType.STORAGE, State.INITIALIZING), 0);
+ nodeInfo.setHostInfo(createHostInfoWithMetrics(currentClusterStateVersion, 0));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, stateWithRetiredNode, SetUnitStateRequest.Condition.SAFE,
+ UP_NODE_STATE, DOWN_NODE_STATE);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertEquals("Reported state (Initializing) is not UP, so no bucket data is available", result.getReason());
+ }
+
+ @Test
+ public void testDownDisallowedByVersionMismatch() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+
+ ClusterState stateWithRetiredNode = clusterState(String.format(
+ "version:%d distributor:4 storage:4 .%d.s:r",
+ currentClusterStateVersion, nodeStorage.getIndex()));
+
+ StorageNodeInfo nodeInfo = cluster.clusterInfo().getStorageNodeInfo(nodeStorage.getIndex());
+ nodeInfo.setReportedState(new NodeState(NodeType.STORAGE, State.UP), 0);
+ nodeInfo.setHostInfo(createHostInfoWithMetrics(currentClusterStateVersion - 1, 0));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, stateWithRetiredNode, SetUnitStateRequest.Condition.SAFE,
+ UP_NODE_STATE, DOWN_NODE_STATE);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertEquals("Cluster controller at version 2 got info for storage node 1 at a different version 1",
+ result.getReason());
+ }
+
+ @Test
public void testAllowedToSetDown() {
ContentCluster cluster = createCluster(createNodes(4));
NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
@@ -453,8 +502,9 @@ public class NodeStateChangeCheckerTest {
"version:%d distributor:4 storage:4 .%d.s:r",
currentClusterStateVersion, nodeStorage.getIndex()));
- cluster.clusterInfo().getStorageNodeInfo(nodeStorage.getIndex())
- .setHostInfo(createHostInfoWithMetrics(currentClusterStateVersion, 0));
+ StorageNodeInfo nodeInfo = cluster.clusterInfo().getStorageNodeInfo(nodeStorage.getIndex());
+ nodeInfo.setReportedState(new NodeState(NodeType.STORAGE, State.UP), 0);
+ nodeInfo.setHostInfo(createHostInfoWithMetrics(currentClusterStateVersion, 0));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
nodeStorage, stateWithRetiredNode, SetUnitStateRequest.Condition.SAFE,