diff options
author | Harald Musum <musum@verizonmedia.com> | 2023-04-19 12:55:25 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-19 12:55:25 +0200 |
commit | 1b0363508a5a14e8d9d39bb7421ed1040b1a2a6a (patch) | |
tree | bd8c0fef6267611e314cb35251bc497c72998055 | |
parent | f8f367921956b5e0a7e9927fdecaf1713a80fbf8 (diff) | |
parent | c38a75f69c5206e30aa8023be08707fdb7adb132 (diff) |
Merge pull request #26652 from vespa-engine/hmusum/allow-groups-to-be-down
Hmusum/allow groups to be down
6 files changed, 450 insertions, 133 deletions
diff --git a/clustercontroller-core/pom.xml b/clustercontroller-core/pom.xml index b4ac5ca869c..647d8ca4e64 100644 --- a/clustercontroller-core/pom.xml +++ b/clustercontroller-core/pom.xml @@ -64,6 +64,11 @@ <scope>test</scope> </dependency> <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-params</artifactId> + <scope>test</scope> + </dependency> + <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <scope>provided</scope> diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java index b83d70b8656..2535589395d 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java @@ -32,7 +32,7 @@ public class ContentCluster { private final int maxNumberOfGroupsAllowedToBeDown; public ContentCluster(String clusterName, Collection<ConfiguredNode> configuredNodes, Distribution distribution) { - this(clusterName, configuredNodes, distribution, 1); + this(clusterName, configuredNodes, distribution, -1); } public ContentCluster(FleetControllerOptions options) { @@ -40,9 +40,9 @@ public class ContentCluster { } ContentCluster(String clusterName, - Collection<ConfiguredNode> configuredNodes, - Distribution distribution, - int maxNumberOfGroupsAllowedToBeDown) { + Collection<ConfiguredNode> configuredNodes, + Distribution distribution, + int maxNumberOfGroupsAllowedToBeDown) { if (configuredNodes == null) throw new IllegalArgumentException("Nodes must be set"); this.clusterName = clusterName; this.distribution = distribution; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index e242833fd0c..c823c94afd1 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -13,13 +13,20 @@ import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.hostinfo.Metrics; import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode; import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; +import java.util.logging.Logger; +import java.util.stream.Collectors; import static com.yahoo.vdslib.state.NodeType.STORAGE; import static com.yahoo.vdslib.state.State.DOWN; +import static com.yahoo.vdslib.state.State.MAINTENANCE; import static com.yahoo.vdslib.state.State.RETIRED; import static com.yahoo.vdslib.state.State.UP; import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Result.allowSettingOfWantedState; @@ -27,6 +34,7 @@ import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Resu import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Result.createDisallowed; import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest.Condition.FORCE; import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest.Condition.SAFE; +import static java.util.logging.Level.FINE; /** * Checks if a node can be upgraded. @@ -35,8 +43,9 @@ import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetU */ public class NodeStateChangeChecker { - public static final String BUCKETS_METRIC_NAME = "vds.datastored.bucket_space.buckets_total"; - public static final Map<String, String> BUCKETS_METRIC_DIMENSIONS = Map.of("bucketSpace", "default"); + private static final Logger log = Logger.getLogger(NodeStateChangeChecker.class.getName()); + private static final String BUCKETS_METRIC_NAME = "vds.datastored.bucket_space.buckets_total"; + private static final Map<String, String> BUCKETS_METRIC_DIMENSIONS = Map.of("bucketSpace", "default"); private final int requiredRedundancy; private final HierarchicalGroupVisiting groupVisiting; @@ -50,6 +59,8 @@ public class NodeStateChangeChecker { this.clusterInfo = cluster.clusterInfo(); this.inMoratorium = inMoratorium; this.maxNumberOfGroupsAllowedToBeDown = cluster.maxNumberOfGroupsAllowedToBeDown(); + if ( ! groupVisiting.isHierarchical() && maxNumberOfGroupsAllowedToBeDown > 1) + throw new IllegalArgumentException("Cannot have both 1 group and maxNumberOfGroupsAllowedToBeDown > 1"); } public static class Result { @@ -214,26 +225,34 @@ public class NodeStateChangeChecker { oldWantedState.getState() + ": " + oldWantedState.getDescription()); } - Result otherGroupCheck = anotherNodeInAnotherGroupHasWantedState(nodeInfo); - if (!otherGroupCheck.settingWantedStateIsAllowed()) { - return otherGroupCheck; + if (maxNumberOfGroupsAllowedToBeDown == -1) { + var otherGroupCheck = anotherNodeInAnotherGroupHasWantedState(nodeInfo); + if (!otherGroupCheck.settingWantedStateIsAllowed()) { + return otherGroupCheck; + } + if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) { + return allowSettingOfWantedState(); + } + } else { + var result = otherNodesHaveWantedState(nodeInfo, newDescription, clusterState); + if (result.isPresent()) + return result.get(); } if (clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN) { - return allowSettingOfWantedState(); - } - - if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) { + log.log(FINE, "node is DOWN, allow"); return allowSettingOfWantedState(); } Result allNodesAreUpCheck = checkAllNodesAreUp(clusterState); if (!allNodesAreUpCheck.settingWantedStateIsAllowed()) { + log.log(FINE, "allNodesAreUpCheck: " + allNodesAreUpCheck); return allNodesAreUpCheck; } Result checkDistributorsResult = checkDistributors(nodeInfo.getNode(), clusterState.getVersion()); if (!checkDistributorsResult.settingWantedStateIsAllowed()) { + log.log(FINE, "checkDistributors: "+ checkDistributorsResult); return checkDistributorsResult; } @@ -268,6 +287,65 @@ public class NodeStateChangeChecker { } } + /** + * Returns an optional Result, where return value is: + * For flat setup: Return Optional.of(disallowed) if wanted state is set on some node, else Optional.empty + * For hierarchical setup: No wanted state for other nodes, return Optional.empty + * Wanted state for nodes/groups are not UP: + * if less than maxNumberOfGroupsAllowedToBeDown: return Optional.of(allowed) + * else: if node is in group with nodes already down: return Optional.of(allowed), else Optional.of(disallowed) + */ + private Optional<Result> otherNodesHaveWantedState(StorageNodeInfo nodeInfo, String newDescription, ClusterState clusterState) { + Node node = nodeInfo.getNode(); + + if (groupVisiting.isHierarchical()) { + Set<Integer> groupsWithNodesWantedStateNotUp = groupsWithUserWantedStateNotUp(); + if (groupsWithNodesWantedStateNotUp.size() == 0) { + log.log(FINE, "groupsWithNodesWantedStateNotUp=0"); + return Optional.empty(); + } + + Set<Integer> groupsWithSameStateAndDescription = groupsWithSameStateAndDescription(MAINTENANCE, newDescription); + if (aGroupContainsNode(groupsWithSameStateAndDescription, node)) { + log.log(FINE, "Node is in group with same state and description, allow"); + return Optional.of(allowSettingOfWantedState()); + } + // There are groups with nodes not up, but with another description, probably operator set + if (groupsWithSameStateAndDescription.size() == 0) { + return Optional.of(createDisallowed("Wanted state already set for another node in groups: " + + sortSetIntoList(groupsWithNodesWantedStateNotUp))); + } + + Set<Integer> retiredAndNotUpGroups = groupsWithNotRetiredAndNotUp(clusterState); + int numberOfGroupsToConsider = retiredAndNotUpGroups.size(); + // Subtract one group if node is in a group with nodes already retired or not up, since number of such groups will + // not increase if we allow node to go down + if (aGroupContainsNode(retiredAndNotUpGroups, node)) { + numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1; + } + if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) { + log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups); + return Optional.of(allowSettingOfWantedState()); + } + + return Optional.of(createDisallowed(String.format("At most %d groups can have wanted state: %s", + maxNumberOfGroupsAllowedToBeDown, + sortSetIntoList(retiredAndNotUpGroups)))); + } else { + // Return a disallow-result if there is another node with a wanted state + var otherNodeHasWantedState = otherNodeHasWantedState(nodeInfo); + if ( ! otherNodeHasWantedState.settingWantedStateIsAllowed()) + return Optional.of(otherNodeHasWantedState); + } + return Optional.empty(); + } + + private ArrayList<Integer> sortSetIntoList(Set<Integer> set) { + var sortedList = new ArrayList<>(set); + Collections.sort(sortedList); + return sortedList; + } + /** Returns a disallow-result, if there is a node in the group with wanted state != UP. */ private Result otherNodeInGroupHasWantedState(Group group) { for (var configuredNode : group.getNodes()) { @@ -354,6 +432,22 @@ public class NodeStateChangeChecker { return false; } + private boolean aGroupContainsNode(Collection<Integer> groupIndexes, Node node) { + for (Group group : getGroupsWithIndexes(groupIndexes)) { + if (groupContainsNode(group, node)) + return true; + } + + return false; + } + + private List<Group> getGroupsWithIndexes(Collection<Integer> groupIndexes) { + return clusterInfo.getStorageNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(group -> groupIndexes.contains(group.getIndex())) + .collect(Collectors.toList()); + } + private Result checkAllNodesAreUp(ClusterState clusterState) { // This method verifies both storage nodes and distributors are up (or retired). // The complicated part is making a summary error message. @@ -441,4 +535,43 @@ public class NodeStateChangeChecker { return allowSettingOfWantedState(); } + private Set<Integer> groupsWithUserWantedStateNotUp() { + return clusterInfo.getAllNodeInfos().stream() + .filter(sni -> !UP.equals(sni.getUserWantedState().getState())) + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .map(Group::getIndex) + .collect(Collectors.toSet()); + } + + // groups with at least one node with the same state & description + private Set<Integer> groupsWithSameStateAndDescription(State state, String newDescription) { + return clusterInfo.getAllNodeInfos().stream() + .filter(nodeInfo -> { + var userWantedState = nodeInfo.getUserWantedState(); + return userWantedState.getState() == state && + Objects.equals(userWantedState.getDescription(), newDescription); + }) + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .map(Group::getIndex) + .collect(Collectors.toSet()); + } + + // groups with at least one node in state (not retired AND not up) + private Set<Integer> groupsWithNotRetiredAndNotUp(ClusterState clusterState) { + return clusterInfo.getAllNodeInfos().stream() + .filter(nodeInfo -> (nodeInfo.getUserWantedState().getState() != RETIRED + && nodeInfo.getUserWantedState().getState() != UP) + || (clusterState.getNodeState(nodeInfo.getNode()).getState() != RETIRED + && clusterState.getNodeState(nodeInfo.getNode()).getState() != UP)) + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .map(Group::getIndex) + .collect(Collectors.toSet()); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index 45ca07c88e4..c4fd7cb69b9 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -10,7 +10,8 @@ import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.config.content.StorDistributionConfig; import org.junit.jupiter.api.Test; -import java.text.ParseException; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -45,13 +46,7 @@ public class NodeStateChangeCheckerTest { return new NodeState(STORAGE, state).setDescription(description); } - private static ClusterState clusterState(String state) { - try { - return new ClusterState(state); - } catch (ParseException e) { - throw new RuntimeException(e); - } - } + private static ClusterState clusterState(String state) { return ClusterState.stateFromString(state); } private static ClusterState defaultAllUpClusterState() { return defaultAllUpClusterState(4); @@ -68,14 +63,14 @@ public class NodeStateChangeCheckerTest { return new NodeStateChangeChecker(cluster, false); } - private ContentCluster createCluster(int nodeCount) { - return createCluster(nodeCount, 1); + private ContentCluster createCluster(int nodeCount, int maxNumberOfGroupsAllowedToBeDown) { + return createCluster(nodeCount, 1, maxNumberOfGroupsAllowedToBeDown); } - private ContentCluster createCluster(int nodeCount, int groupCount) { - Collection<ConfiguredNode> nodes = createNodes(nodeCount); + private ContentCluster createCluster(int nodeCount, int groupCount, int maxNumberOfGroupsAllowedToBeDown) { + List<ConfiguredNode> nodes = createNodes(nodeCount); Distribution distribution = new Distribution(createDistributionConfig(nodeCount, groupCount)); - return new ContentCluster("Clustername", nodes, distribution); + return new ContentCluster("Clustername", nodes, distribution, maxNumberOfGroupsAllowedToBeDown); } private String createDistributorHostInfo(int replicationfactor1, int replicationfactor2, int replicationfactor3) { @@ -113,9 +108,10 @@ public class NodeStateChangeCheckerTest { } } - @Test - void testCanUpgradeForce() { - var nodeStateChangeChecker = createChangeChecker(createCluster(1)); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeWithForce(int maxNumberOfGroupsAllowedToBeDown) { + var nodeStateChangeChecker = createChangeChecker(createCluster(1, maxNumberOfGroupsAllowedToBeDown)); NodeState newState = new NodeState(STORAGE, INITIALIZING); Result result = nodeStateChangeChecker.evaluateTransition( nodeDistributor, defaultAllUpClusterState(), FORCE, @@ -124,9 +120,10 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testDeniedInMoratorium() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDeniedInMoratorium(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, true); Result result = nodeStateChangeChecker.evaluateTransition( new Node(STORAGE, 10), defaultAllUpClusterState(), SAFE, @@ -136,9 +133,10 @@ public class NodeStateChangeCheckerTest { assertEquals("Master cluster controller is bootstrapping and in moratorium", result.getReason()); } - @Test - void testUnknownStorageNode() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testUnknownStorageNode(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); var nodeStateChangeChecker = createChangeChecker(cluster); Result result = nodeStateChangeChecker.evaluateTransition( new Node(STORAGE, 10), defaultAllUpClusterState(), SAFE, @@ -148,11 +146,12 @@ public class NodeStateChangeCheckerTest { assertEquals("Unknown node storage.10", result.getReason()); } - @Test - void testSafeMaintenanceDisallowedWhenOtherStorageNodeInFlatClusterIsSuspended() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSafeMaintenanceDisallowedWhenOtherStorageNodeInFlatClusterIsSuspended(int maxNumberOfGroupsAllowedToBeDown) { // Nodes 0-3, storage node 0 being in maintenance with "Orchestrator" description. - ContentCluster cluster = createCluster(4); - cluster.clusterInfo().getStorageNodeInfo(0).setWantedState(new NodeState(STORAGE, MAINTENANCE).setDescription("Orchestrator")); + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); + setStorageNodeWantedStateToMaintenance(cluster, 0); var nodeStateChangeChecker = createChangeChecker(cluster); ClusterState clusterStateWith0InMaintenance = clusterState(String.format( "version:%d distributor:4 storage:4 .0.s:m", @@ -168,9 +167,130 @@ public class NodeStateChangeCheckerTest { } @Test - void testSafeMaintenanceDisallowedWhenOtherDistributorInFlatClusterIsSuspended() { + void testMaintenanceAllowedFor2Of4Groups() { + // 4 groups with 1 node in each group + Collection<ConfiguredNode> nodes = createNodes(4); + StorDistributionConfig config = createDistributionConfig(4, 4); + + int maxNumberOfGroupsAllowedToBeDown = 2; + var cluster = new ContentCluster("Clustername", nodes, new Distribution(config), maxNumberOfGroupsAllowedToBeDown); + setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6))); + var nodeStateChangeChecker = createChangeChecker(cluster); + + // All nodes up, set a storage node in group 0 to maintenance + { + int nodeIndex = 0; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, defaultAllUpClusterState()); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // Node in group 0 in maintenance, set storage node in group 1 to maintenance + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:4 .0.s:d storage:4 .0.s:m", currentClusterStateVersion)); + int nodeIndex = 1; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // Nodes in group 0 and 1 in maintenance, try to set storage node in group 2 to maintenance while storage node 2 is down, should fail + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:4 storage:4 .0.s:m .1.s:m .2.s:d", currentClusterStateVersion)); + int nodeIndex = 2; + cluster.clusterInfo().getStorageNodeInfo(nodeIndex).setReportedState(new NodeState(STORAGE, DOWN), 0); + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + assertEquals("At most 2 groups can have wanted state: [0, 1, 2]", result.getReason()); + } + + // Nodes in group 0 and 1 in maintenance, try to set storage node in group 2 to maintenance, should fail + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:4 storage:4 .0.s:m .1.s:m", currentClusterStateVersion)); + int nodeIndex = 2; + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + assertEquals("At most 2 groups can have wanted state: [0, 1]", result.getReason()); + } + + } + + @Test + void testMaintenanceAllowedFor2Of4Groups8Nodes() { + // 4 groups with 2 nodes in each group + Collection<ConfiguredNode> nodes = createNodes(8); + StorDistributionConfig config = createDistributionConfig(8, 4); + + int maxNumberOfGroupsAllowedToBeDown = 2; + var cluster = new ContentCluster("Clustername", nodes, new Distribution(config), maxNumberOfGroupsAllowedToBeDown); + setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6))); + var nodeStateChangeChecker = createChangeChecker(cluster); + + // All nodes up, set a storage node in group 0 to maintenance + { + ClusterState clusterState = defaultAllUpClusterState(8); + int nodeIndex = 0; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // 1 Node in group 0 in maintenance, try to set node 1 in group 0 to maintenance + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 .0.s:d storage:8 .0.s:m", currentClusterStateVersion)); + int nodeIndex = 1; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // 2 nodes in group 0 in maintenance, try to set storage node 2 in group 1 to maintenance + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m", currentClusterStateVersion)); + int nodeIndex = 2; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // 2 nodes in group 0 and 1 in group 1 in maintenance, try to set storage node 4 in group 2 to maintenance, should fail (different group) + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .2.s:m", currentClusterStateVersion)); + int nodeIndex = 4; + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + assertEquals("At most 2 groups can have wanted state: [0, 1]", result.getReason()); + } + + // 2 nodes in group 0 and 1 in group 1 in maintenance, try to set storage node 3 in group 1 to maintenance + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .2.s:m", currentClusterStateVersion)); + int nodeIndex = 3; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // 2 nodes in group 0 in maintenance, storage node 3 in group 1 is in maintenance with another description + // (set in maintenance by operator), try to set storage node 3 in group 1 to maintenance, should bew allowed + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .3.s:m", currentClusterStateVersion)); + setStorageNodeWantedState(cluster, 3, MAINTENANCE, "Maintenance, set by operator"); // Set to another description + setStorageNodeWantedState(cluster, 2, UP, ""); // Set back to UP, want to set this to maintenance again + int nodeIndex = 2; + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertTrue(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + } + + } + + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSafeMaintenanceDisallowedWhenOtherDistributorInFlatClusterIsSuspended(int maxNumberOfGroupsAllowedToBeDown) { // Nodes 0-3, distributor 0 being down with "Orchestrator" description. - ContentCluster cluster = createCluster(4); + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); setDistributorNodeWantedState(cluster, 0, DOWN, "Orchestrator"); var nodeStateChangeChecker = createChangeChecker(cluster); ClusterState clusterStateWith0InMaintenance = clusterState(String.format( @@ -186,11 +306,12 @@ public class NodeStateChangeCheckerTest { result.getReason()); } - @Test - void testSafeMaintenanceDisallowedWhenDistributorInGroupIsDown() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSafeMaintenanceDisallowedWhenDistributorInGroupIsDown(int maxNumberOfGroupsAllowedToBeDown) { // Nodes 0-3, distributor 0 being in maintenance with "Orchestrator" description. // 2 groups: nodes 0-1 is group 0, 2-3 is group 1. - ContentCluster cluster = createCluster(4, 2); + ContentCluster cluster = createCluster(4, 2, maxNumberOfGroupsAllowedToBeDown); setDistributorNodeWantedState(cluster, 0, DOWN, "Orchestrator"); var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, false); ClusterState clusterStateWith0InMaintenance = clusterState(String.format( @@ -204,7 +325,10 @@ public class NodeStateChangeCheckerTest { SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); - assertEquals("At most one group can have wanted state: Other distributor 0 in group 0 has wanted state Down", result.getReason()); + if (maxNumberOfGroupsAllowedToBeDown >= 1) + assertEquals("Wanted state already set for another node in groups: [0]", result.getReason()); + else + assertEquals("At most one group can have wanted state: Other distributor 0 in group 0 has wanted state Down", result.getReason()); } { @@ -213,16 +337,22 @@ public class NodeStateChangeCheckerTest { Result result = nodeStateChangeChecker.evaluateTransition( new Node(STORAGE, 1), clusterStateWith0InMaintenance, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); - assertFalse(result.settingWantedStateIsAllowed(), result.getReason()); - assertEquals("Another distributor wants state DOWN: 0", result.getReason()); + if (maxNumberOfGroupsAllowedToBeDown >= 1) { + assertFalse(result.settingWantedStateIsAllowed(), result.getReason()); + assertEquals("Wanted state already set for another node in groups: [0]", result.getReason()); + } else { + assertFalse(result.settingWantedStateIsAllowed(), result.getReason()); + assertEquals("Another distributor wants state DOWN: 0", result.getReason()); + } } } - @Test - void testSafeMaintenanceWhenOtherStorageNodeInGroupIsSuspended() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSafeMaintenanceWhenOtherStorageNodeInGroupIsSuspended(int maxNumberOfGroupsAllowedToBeDown) { // Nodes 0-3, storage node 0 being in maintenance with "Orchestrator" description. // 2 groups: nodes 0-1 is group 0, 2-3 is group 1. - ContentCluster cluster = createCluster(4, 2); + ContentCluster cluster = createCluster(4, 2, maxNumberOfGroupsAllowedToBeDown); setStorageNodeWantedState(cluster, 0, MAINTENANCE, "Orchestrator"); var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, false); ClusterState clusterStateWith0InMaintenance = clusterState(String.format( @@ -236,8 +366,11 @@ public class NodeStateChangeCheckerTest { SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); - assertEquals("At most one group can have wanted state: Other storage node 0 in group 0 has wanted state Maintenance", - result.getReason()); + if (maxNumberOfGroupsAllowedToBeDown >= 1) + assertEquals("At most 1 groups can have wanted state: [0]", result.getReason()); + else + assertEquals("At most one group can have wanted state: Other storage node 0 in group 0 has wanted state Maintenance", + result.getReason()); } { @@ -251,9 +384,10 @@ public class NodeStateChangeCheckerTest { } } - @Test - void testSafeSetStateDistributors() { - NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(1)); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSafeSetStateDistributors(int maxNumberOfGroupsAllowedToBeDown) { + NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(1, 1, maxNumberOfGroupsAllowedToBeDown)); Result result = nodeStateChangeChecker.evaluateTransition( nodeDistributor, defaultAllUpClusterState(), SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); @@ -262,10 +396,11 @@ public class NodeStateChangeCheckerTest { assertTrue(result.getReason().contains("Safe-set of node state is only supported for storage nodes")); } - @Test - void testCanUpgradeSafeMissingStorage() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeSafeMissingStorage(int maxNumberOfGroupsAllowedToBeDown) { // Create a content cluster with 4 nodes, and storage node with index 3 down. - ContentCluster cluster = createCluster(4); + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6))); cluster.clusterInfo().getStorageNodeInfo(3).setReportedState(new NodeState(STORAGE, DOWN), 0); ClusterState clusterStateWith3Down = clusterState(String.format( @@ -282,16 +417,18 @@ public class NodeStateChangeCheckerTest { assertEquals("Another storage node has state DOWN: 3", result.getReason()); } - @Test - void testCanUpgradeStorageSafeYes() { - Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4), defaultAllUpClusterState()); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeStorageSafeYes(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4, 1, maxNumberOfGroupsAllowedToBeDown), defaultAllUpClusterState()); assertTrue(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); } - @Test - void testSetUpFailsIfReportedIsDown() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSetUpFailsIfReportedIsDown(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); // Not setting nodes up -> all are down @@ -304,9 +441,10 @@ public class NodeStateChangeCheckerTest { // A node may be reported as Up but have a generated state of Down if it's part of // nodes taken down implicitly due to a group having too low node availability. - @Test - void testSetUpSucceedsIfReportedIsUpButGeneratedIsDown() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSetUpSucceedsIfReportedIsUpButGeneratedIsDown(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); markAllNodesAsReportingStateUp(cluster); @@ -322,9 +460,10 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCanSetUpEvenIfOldWantedStateIsDown() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanSetUpEvenIfOldWantedStateIsDown(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6))); @@ -335,9 +474,10 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCanUpgradeStorageSafeNo() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeStorageSafeNo(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6))); @@ -350,9 +490,10 @@ public class NodeStateChangeCheckerTest { result.getReason()); } - @Test - void testCanUpgradeIfMissingMinReplicationFactor() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeIfMissingMinReplicationFactor(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6))); @@ -363,9 +504,10 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCanUpgradeIfStorageNodeMissingFromNodeInfo() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeIfStorageNodeMissingFromNodeInfo(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); String hostInfo = "{\n" + " \"cluster-state-version\": 2,\n" + @@ -387,9 +529,10 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testMissingDistributorState() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testMissingDistributorState(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); cluster.clusterInfo().getStorageNodeInfo(1).setReportedState(new NodeState(STORAGE, UP), 0); @@ -400,8 +543,8 @@ public class NodeStateChangeCheckerTest { assertEquals("Distributor node 0 has not reported any cluster state version yet.", result.getReason()); } - private Result transitionToSameState(State state, String oldDescription, String newDescription) { - ContentCluster cluster = createCluster(4); + private Result transitionToSameState(State state, String oldDescription, String newDescription, int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); NodeState currentNodeState = createNodeState(state, oldDescription); @@ -411,26 +554,29 @@ public class NodeStateChangeCheckerTest { currentNodeState, newNodeState); } - private Result transitionToSameState(String oldDescription, String newDescription) { - return transitionToSameState(MAINTENANCE, oldDescription, newDescription); + private Result transitionToSameState(String oldDescription, String newDescription, int maxNumberOfGroupsAllowedToBeDown) { + return transitionToSameState(MAINTENANCE, oldDescription, newDescription, maxNumberOfGroupsAllowedToBeDown); } - @Test - void testSettingUpWhenUpCausesAlreadySet() { - Result result = transitionToSameState(UP, "foo", "bar"); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSettingUpWhenUpCausesAlreadySet(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToSameState(UP, "foo", "bar", maxNumberOfGroupsAllowedToBeDown); assertTrue(result.wantedStateAlreadySet()); } - @Test - void testSettingAlreadySetState() { - Result result = transitionToSameState("foo", "foo"); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSettingAlreadySetState(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToSameState("foo", "foo", maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertTrue(result.wantedStateAlreadySet()); } - @Test - void testDifferentDescriptionImpliesDenied() { - Result result = transitionToSameState("foo", "bar"); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDifferentDescriptionImpliesDenied(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToSameState("foo", "bar", maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); } @@ -439,10 +585,9 @@ public class NodeStateChangeCheckerTest { NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); for (int x = 0; x < cluster.clusterInfo().getConfiguredNodes().size(); x++) { - State state = UP; - cluster.clusterInfo().getDistributorNodeInfo(x).setReportedState(new NodeState(DISTRIBUTOR, state), 0); + cluster.clusterInfo().getDistributorNodeInfo(x).setReportedState(new NodeState(DISTRIBUTOR, UP), 0); cluster.clusterInfo().getDistributorNodeInfo(x).setHostInfo(HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6))); - cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(STORAGE, state), 0); + cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(STORAGE, UP), 0); } return nodeStateChangeChecker.evaluateTransition( @@ -462,26 +607,29 @@ public class NodeStateChangeCheckerTest { return transitionToMaintenanceWithOneStorageNodeDown(cluster, clusterState); } - @Test - void testCanUpgradeWhenAllUp() { - Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4), defaultAllUpClusterState()); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeWhenAllUp(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4, maxNumberOfGroupsAllowedToBeDown), defaultAllUpClusterState()); assertTrue(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCanUpgradeWhenAllUpOrRetired() { - Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4), defaultAllUpClusterState()); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeWhenAllUpOrRetired(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4, maxNumberOfGroupsAllowedToBeDown), defaultAllUpClusterState()); assertTrue(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCanUpgradeWhenStorageIsDown() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeWhenStorageIsDown(int maxNumberOfGroupsAllowedToBeDown) { ClusterState clusterState = defaultAllUpClusterState(); var storageNodeIndex = nodeStorage.getIndex(); - ContentCluster cluster = createCluster(4); + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeState downNodeState = new NodeState(STORAGE, DOWN); cluster.clusterInfo().getStorageNodeInfo(storageNodeIndex).setReportedState(downNodeState, 4 /* time */); clusterState.setNodeState(new Node(STORAGE, storageNodeIndex), downNodeState); @@ -491,13 +639,14 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCannotUpgradeWhenOtherStorageIsDown() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCannotUpgradeWhenOtherStorageIsDown(int maxNumberOfGroupsAllowedToBeDown) { int otherIndex = 2; // If this fails, just set otherIndex to some other valid index. assertNotEquals(nodeStorage.getIndex(), otherIndex); - ContentCluster cluster = createCluster(4); + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); ClusterState clusterState = defaultAllUpClusterState(); NodeState downNodeState = new NodeState(STORAGE, DOWN); cluster.clusterInfo().getStorageNodeInfo(otherIndex).setReportedState(downNodeState, 4 /* time */); @@ -509,9 +658,10 @@ public class NodeStateChangeCheckerTest { assertTrue(result.getReason().contains("Another storage node has state DOWN: 2")); } - @Test - void testNodeRatioRequirementConsidersGeneratedNodeStates() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testNodeRatioRequirementConsidersGeneratedNodeStates(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); markAllNodesAsReportingStateUp(cluster); @@ -531,62 +681,72 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testDownDisallowedByNonRetiredState() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDownDisallowedByNonRetiredState(int maxNumberOfGroupsAllowedToBeDown) { Result result = evaluateDownTransition( defaultAllUpClusterState(), UP, currentClusterStateVersion, - 0); + 0, + maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); assertEquals("Only retired nodes are allowed to be set to DOWN in safe mode - is Up", result.getReason()); } - @Test - void testDownDisallowedByBuckets() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDownDisallowedByBuckets(int maxNumberOfGroupsAllowedToBeDown) { Result result = evaluateDownTransition( retiredClusterStateSuffix(), UP, currentClusterStateVersion, - 1); + 1, + maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); assertEquals("The storage node manages 1 buckets", result.getReason()); } - @Test - void testDownDisallowedByReportedState() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDownDisallowedByReportedState(int maxNumberOfGroupsAllowedToBeDown) { Result result = evaluateDownTransition( retiredClusterStateSuffix(), INITIALIZING, currentClusterStateVersion, - 0); + 0, + maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); assertEquals("Reported state (Initializing) is not UP, so no bucket data is available", result.getReason()); } - @Test - void testDownDisallowedByVersionMismatch() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDownDisallowedByVersionMismatch(int maxNumberOfGroupsAllowedToBeDown) { Result result = evaluateDownTransition( retiredClusterStateSuffix(), UP, currentClusterStateVersion - 1, - 0); + 0, + maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); assertEquals("Cluster controller at version 2 got info for storage node 1 at a different version 1", result.getReason()); } - @Test - void testAllowedToSetDown() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testAllowedToSetDown(int maxNumberOfGroupsAllowedToBeDown) { Result result = evaluateDownTransition( retiredClusterStateSuffix(), UP, currentClusterStateVersion, - 0); + 0, + maxNumberOfGroupsAllowedToBeDown); assertTrue(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); } @@ -594,8 +754,9 @@ public class NodeStateChangeCheckerTest { private Result evaluateDownTransition(ClusterState clusterState, State reportedState, int hostInfoClusterStateVersion, - int lastAlldisksBuckets) { - ContentCluster cluster = createCluster(4); + int lastAlldisksBuckets, + int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); StorageNodeInfo nodeInfo = cluster.clusterInfo().getStorageNodeInfo(nodeStorage.getIndex()); @@ -763,6 +924,18 @@ public class NodeStateChangeCheckerTest { return configBuilder.build(); } + private void checkSettingToMaintenanceIsAllowed(int nodeIndex, NodeStateChangeChecker nodeStateChangeChecker, ClusterState clusterState) { + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertTrue(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + assertEquals("Preconditions fulfilled and new state different", result.getReason()); + } + + private void setStorageNodeWantedStateToMaintenance(ContentCluster cluster, int nodeIndex) { + setStorageNodeWantedState(cluster, nodeIndex, MAINTENANCE, "Orchestrator"); + } + private void setStorageNodeWantedState(ContentCluster cluster, int nodeIndex, State state, String description) { NodeState nodeState = new NodeState(STORAGE, state); cluster.clusterInfo().getStorageNodeInfo(nodeIndex).setWantedState(nodeState.setDescription(description)); diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def index 98b4c3b0216..93a20e4ee0d 100644 --- a/configdefinitions/src/vespa/fleetcontroller.def +++ b/configdefinitions/src/vespa/fleetcontroller.def @@ -199,6 +199,7 @@ cluster_feed_block_limit{} double # This is in absolute numbers, so 0.01 implies that a block limit of 0.8 effectively # becomes 0.79 for an already blocked node. cluster_feed_block_noise_level double default=0.0 -# For apps that have several groups this controls how many are allowed to be down -# simultaneously. -max_number_of_groups_allowed_to_be_down int default=1 + +# For apps that have several groups this controls how many groups are allowed to +# be down simultaneously in this cluster. +max_number_of_groups_allowed_to_be_down int default=-1 diff --git a/parent/pom.xml b/parent/pom.xml index 8d2f802e34b..ffd8c596277 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -1009,6 +1009,11 @@ <version>${junit.version}</version> </dependency> <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-params</artifactId> + <version>${junit.version}</version> + </dependency> + <dependency> <groupId>org.junit.vintage</groupId> <artifactId>junit-vintage-engine</artifactId> <version>${junit.version}</version> |