aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-04-17 15:06:48 +0200
committerHarald Musum <musum@yahooinc.com>2023-04-17 15:06:48 +0200
commite87be82cddf3145659a9ed75a0f5730fcb345fe3 (patch)
tree8568cb36c19ba37feafec27824ae8da6572c85e5
parent8162871b7bdc83d4ab03c14979b3b1f8020a81b5 (diff)
Check state down later and simplify
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java59
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java33
2 files changed, 46 insertions, 46 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index 50ea6d4acde..84deeeac4e3 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -222,11 +222,6 @@ public class NodeStateChangeChecker {
oldWantedState.getState() + ": " + oldWantedState.getDescription());
}
- if (clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN) {
- log.log(FINE, "node is DOWN, allow");
- return allowSettingOfWantedState();
- }
-
if (maxNumberOfGroupsAllowedToBeDown == -1) {
var otherGroupCheck = anotherNodeInAnotherGroupHasWantedState(nodeInfo);
if (!otherGroupCheck.settingWantedStateIsAllowed()) {
@@ -241,6 +236,11 @@ public class NodeStateChangeChecker {
return result.get();
}
+ if (clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN) {
+ log.log(FINE, "node is DOWN, allow");
+ return allowSettingOfWantedState();
+ }
+
Result allNodesAreUpCheck = checkAllNodesAreUp(clusterState);
if (!allNodesAreUpCheck.settingWantedStateIsAllowed()) {
log.log(FINE, "allNodesAreUpCheck: " + allNodesAreUpCheck);
@@ -296,39 +296,16 @@ public class NodeStateChangeChecker {
Node node = nodeInfo.getNode();
if (groupVisiting.isHierarchical()) {
- if (maxNumberOfGroupsAllowedToBeDown <= 1) {
- SettableOptional<Result> anotherNodeHasWantedState = new SettableOptional<>();
- groupVisiting.visit(group -> {
- if (!groupContainsNode(group, node)) {
- Result result = otherNodeInGroupHasWantedState(group);
- if (!result.settingWantedStateIsAllowed()) {
- anotherNodeHasWantedState.set(result);
- // Have found a node that is suspended, halt the visiting
- return false;
- }
- }
- return true;
- });
- if (anotherNodeHasWantedState.isPresent()) {
- log.log(FINE, "anotherNodeHasWantedState: " + anotherNodeHasWantedState.get());
- return Optional.of(anotherNodeHasWantedState.get());
- }
- if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) {
- log.log(FINE, "anotherNodeInGroupAlreadyAllowed, allow");
- return Optional.of(allowSettingOfWantedState());
- }
- } else {
- Set<Integer> groupsWithStorageNodesWantedStateNotUp = groupsWithStorageNodesWantedStateNotUp();
- String disallowMessage = "At most nodes in " + maxNumberOfGroupsAllowedToBeDown + " groups can have wanted state";
- if (groupsWithStorageNodesWantedStateNotUp.size() < maxNumberOfGroupsAllowedToBeDown)
- return Optional.of(allowSettingOfWantedState());
- if (groupsWithStorageNodesWantedStateNotUp.size() > maxNumberOfGroupsAllowedToBeDown)
- return Optional.of(createDisallowed(disallowMessage));
- if (aGroupContainsNode(groupsWithStorageNodesWantedStateNotUp, node))
- return Optional.of(allowSettingOfWantedState());
-
- return Optional.of(createDisallowed(disallowMessage));
- }
+ Set<Integer> groupsWithStorageNodesWantedStateNotUp = groupsWithUserWantedStateNotUp();
+ String disallowMessage = "At most nodes in " + maxNumberOfGroupsAllowedToBeDown + " groups can have wanted state";
+ if (groupsWithStorageNodesWantedStateNotUp.size() == 0)
+ return Optional.empty();
+ if (groupsWithStorageNodesWantedStateNotUp.size() < maxNumberOfGroupsAllowedToBeDown)
+ return Optional.of(allowSettingOfWantedState());
+ if (aGroupContainsNode(groupsWithStorageNodesWantedStateNotUp, node))
+ return Optional.of(allowSettingOfWantedState());
+
+ return Optional.of(createDisallowed(disallowMessage));
} else {
// Return a disallow-result if there is another node with a wanted state
var otherNodeHasWantedState = otherNodeHasWantedState(nodeInfo);
@@ -527,9 +504,9 @@ public class NodeStateChangeChecker {
return allowSettingOfWantedState();
}
- private Set<Integer> groupsWithStorageNodesWantedStateNotUp() {
- return clusterInfo.getStorageNodeInfos().stream()
- .filter(sni -> !UP.equals(sni.getWantedState().getState()))
+ private Set<Integer> groupsWithUserWantedStateNotUp() {
+ return clusterInfo.getAllNodeInfos().stream()
+ .filter(sni -> !UP.equals(sni.getUserWantedState().getState()))
.map(NodeInfo::getGroup)
.filter(Objects::nonNull)
.filter(Group::isLeafGroup)
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
index 37d95f8c4b1..551b60be741 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -192,6 +192,18 @@ public class NodeStateChangeCheckerTest {
setStorageNodeWantedStateToMaintenance(cluster, nodeIndex);
}
+ // Nodes in group 0 and 1 in maintenance, try to set storage node in group 2 to maintenance while storage node 2 is down, should fail
+ {
+ ClusterState clusterState = clusterState(String.format("version:%d distributor:4 storage:4 .0.s:m .1.s:m .2.s:d", currentClusterStateVersion));
+ int nodeIndex = 2;
+ cluster.clusterInfo().getStorageNodeInfo(nodeIndex).setReportedState(new NodeState(STORAGE, DOWN), 0);
+ Node node = new Node(STORAGE, nodeIndex);
+ Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
+ assertFalse(result.settingWantedStateIsAllowed(), result.toString());
+ assertFalse(result.wantedStateAlreadySet());
+ assertEquals("At most nodes in 2 groups can have wanted state", result.getReason());
+ }
+
// Nodes in group 0 and 1 in maintenance, try to set storage node in group 2 to maintenance, should fail
{
ClusterState clusterState = clusterState(String.format("version:%d distributor:4 storage:4 .0.s:m .1.s:m", currentClusterStateVersion));
@@ -300,7 +312,10 @@ public class NodeStateChangeCheckerTest {
SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
- assertEquals("At most one group can have wanted state: Other distributor 0 in group 0 has wanted state Down", result.getReason());
+ if (maxNumberOfGroupsAllowedToBeDown >= 1)
+ assertEquals("At most nodes in 1 groups can have wanted state", result.getReason());
+ else
+ assertEquals("At most one group can have wanted state: Other distributor 0 in group 0 has wanted state Down", result.getReason());
}
{
@@ -309,8 +324,13 @@ public class NodeStateChangeCheckerTest {
Result result = nodeStateChangeChecker.evaluateTransition(
new Node(STORAGE, 1), clusterStateWith0InMaintenance,
SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
- assertFalse(result.settingWantedStateIsAllowed(), result.getReason());
- assertEquals("Another distributor wants state DOWN: 0", result.getReason());
+ if (maxNumberOfGroupsAllowedToBeDown >= 1) {
+ assertTrue(result.settingWantedStateIsAllowed(), result.getReason());
+ assertFalse(result.wantedStateAlreadySet());
+ } else {
+ assertFalse(result.settingWantedStateIsAllowed(), result.getReason());
+ assertEquals("Another distributor wants state DOWN: 0", result.getReason());
+ }
}
}
@@ -333,8 +353,11 @@ public class NodeStateChangeCheckerTest {
SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
- assertEquals("At most one group can have wanted state: Other storage node 0 in group 0 has wanted state Maintenance",
- result.getReason());
+ if (maxNumberOfGroupsAllowedToBeDown >= 1)
+ assertEquals("At most nodes in 1 groups can have wanted state", result.getReason());
+ else
+ assertEquals("At most one group can have wanted state: Other storage node 0 in group 0 has wanted state Maintenance",
+ result.getReason());
}
{