From e87be82cddf3145659a9ed75a0f5730fcb345fe3 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Mon, 17 Apr 2023 15:06:48 +0200 Subject: Check state down later and simplify --- .../core/NodeStateChangeChecker.java | 59 +++++++--------------- .../core/NodeStateChangeCheckerTest.java | 33 ++++++++++-- 2 files changed, 46 insertions(+), 46 deletions(-) (limited to 'clustercontroller-core') diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index 50ea6d4acde..84deeeac4e3 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -222,11 +222,6 @@ public class NodeStateChangeChecker { oldWantedState.getState() + ": " + oldWantedState.getDescription()); } - if (clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN) { - log.log(FINE, "node is DOWN, allow"); - return allowSettingOfWantedState(); - } - if (maxNumberOfGroupsAllowedToBeDown == -1) { var otherGroupCheck = anotherNodeInAnotherGroupHasWantedState(nodeInfo); if (!otherGroupCheck.settingWantedStateIsAllowed()) { @@ -241,6 +236,11 @@ public class NodeStateChangeChecker { return result.get(); } + if (clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN) { + log.log(FINE, "node is DOWN, allow"); + return allowSettingOfWantedState(); + } + Result allNodesAreUpCheck = checkAllNodesAreUp(clusterState); if (!allNodesAreUpCheck.settingWantedStateIsAllowed()) { log.log(FINE, "allNodesAreUpCheck: " + allNodesAreUpCheck); @@ -296,39 +296,16 @@ public class NodeStateChangeChecker { Node node = nodeInfo.getNode(); if (groupVisiting.isHierarchical()) { - if (maxNumberOfGroupsAllowedToBeDown <= 1) { - SettableOptional anotherNodeHasWantedState = new SettableOptional<>(); - groupVisiting.visit(group -> { - if (!groupContainsNode(group, node)) { - Result result = otherNodeInGroupHasWantedState(group); - if (!result.settingWantedStateIsAllowed()) { - anotherNodeHasWantedState.set(result); - // Have found a node that is suspended, halt the visiting - return false; - } - } - return true; - }); - if (anotherNodeHasWantedState.isPresent()) { - log.log(FINE, "anotherNodeHasWantedState: " + anotherNodeHasWantedState.get()); - return Optional.of(anotherNodeHasWantedState.get()); - } - if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) { - log.log(FINE, "anotherNodeInGroupAlreadyAllowed, allow"); - return Optional.of(allowSettingOfWantedState()); - } - } else { - Set groupsWithStorageNodesWantedStateNotUp = groupsWithStorageNodesWantedStateNotUp(); - String disallowMessage = "At most nodes in " + maxNumberOfGroupsAllowedToBeDown + " groups can have wanted state"; - if (groupsWithStorageNodesWantedStateNotUp.size() < maxNumberOfGroupsAllowedToBeDown) - return Optional.of(allowSettingOfWantedState()); - if (groupsWithStorageNodesWantedStateNotUp.size() > maxNumberOfGroupsAllowedToBeDown) - return Optional.of(createDisallowed(disallowMessage)); - if (aGroupContainsNode(groupsWithStorageNodesWantedStateNotUp, node)) - return Optional.of(allowSettingOfWantedState()); - - return Optional.of(createDisallowed(disallowMessage)); - } + Set groupsWithStorageNodesWantedStateNotUp = groupsWithUserWantedStateNotUp(); + String disallowMessage = "At most nodes in " + maxNumberOfGroupsAllowedToBeDown + " groups can have wanted state"; + if (groupsWithStorageNodesWantedStateNotUp.size() == 0) + return Optional.empty(); + if (groupsWithStorageNodesWantedStateNotUp.size() < maxNumberOfGroupsAllowedToBeDown) + return Optional.of(allowSettingOfWantedState()); + if (aGroupContainsNode(groupsWithStorageNodesWantedStateNotUp, node)) + return Optional.of(allowSettingOfWantedState()); + + return Optional.of(createDisallowed(disallowMessage)); } else { // Return a disallow-result if there is another node with a wanted state var otherNodeHasWantedState = otherNodeHasWantedState(nodeInfo); @@ -527,9 +504,9 @@ public class NodeStateChangeChecker { return allowSettingOfWantedState(); } - private Set groupsWithStorageNodesWantedStateNotUp() { - return clusterInfo.getStorageNodeInfos().stream() - .filter(sni -> !UP.equals(sni.getWantedState().getState())) + private Set groupsWithUserWantedStateNotUp() { + return clusterInfo.getAllNodeInfos().stream() + .filter(sni -> !UP.equals(sni.getUserWantedState().getState())) .map(NodeInfo::getGroup) .filter(Objects::nonNull) .filter(Group::isLeafGroup) diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index 37d95f8c4b1..551b60be741 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -192,6 +192,18 @@ public class NodeStateChangeCheckerTest { setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); } + // Nodes in group 0 and 1 in maintenance, try to set storage node in group 2 to maintenance while storage node 2 is down, should fail + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:4 storage:4 .0.s:m .1.s:m .2.s:d", currentClusterStateVersion)); + int nodeIndex = 2; + cluster.clusterInfo().getStorageNodeInfo(nodeIndex).setReportedState(new NodeState(STORAGE, DOWN), 0); + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + assertEquals("At most nodes in 2 groups can have wanted state", result.getReason()); + } + // Nodes in group 0 and 1 in maintenance, try to set storage node in group 2 to maintenance, should fail { ClusterState clusterState = clusterState(String.format("version:%d distributor:4 storage:4 .0.s:m .1.s:m", currentClusterStateVersion)); @@ -300,7 +312,10 @@ public class NodeStateChangeCheckerTest { SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); - assertEquals("At most one group can have wanted state: Other distributor 0 in group 0 has wanted state Down", result.getReason()); + if (maxNumberOfGroupsAllowedToBeDown >= 1) + assertEquals("At most nodes in 1 groups can have wanted state", result.getReason()); + else + assertEquals("At most one group can have wanted state: Other distributor 0 in group 0 has wanted state Down", result.getReason()); } { @@ -309,8 +324,13 @@ public class NodeStateChangeCheckerTest { Result result = nodeStateChangeChecker.evaluateTransition( new Node(STORAGE, 1), clusterStateWith0InMaintenance, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); - assertFalse(result.settingWantedStateIsAllowed(), result.getReason()); - assertEquals("Another distributor wants state DOWN: 0", result.getReason()); + if (maxNumberOfGroupsAllowedToBeDown >= 1) { + assertTrue(result.settingWantedStateIsAllowed(), result.getReason()); + assertFalse(result.wantedStateAlreadySet()); + } else { + assertFalse(result.settingWantedStateIsAllowed(), result.getReason()); + assertEquals("Another distributor wants state DOWN: 0", result.getReason()); + } } } @@ -333,8 +353,11 @@ public class NodeStateChangeCheckerTest { SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); - assertEquals("At most one group can have wanted state: Other storage node 0 in group 0 has wanted state Maintenance", - result.getReason()); + if (maxNumberOfGroupsAllowedToBeDown >= 1) + assertEquals("At most nodes in 1 groups can have wanted state", result.getReason()); + else + assertEquals("At most one group can have wanted state: Other storage node 0 in group 0 has wanted state Maintenance", + result.getReason()); } { -- cgit v1.2.3