diff options
3 files changed, 49 insertions, 10 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java index b83d70b8656..2535589395d 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java @@ -32,7 +32,7 @@ public class ContentCluster { private final int maxNumberOfGroupsAllowedToBeDown; public ContentCluster(String clusterName, Collection<ConfiguredNode> configuredNodes, Distribution distribution) { - this(clusterName, configuredNodes, distribution, 1); + this(clusterName, configuredNodes, distribution, -1); } public ContentCluster(FleetControllerOptions options) { @@ -40,9 +40,9 @@ public class ContentCluster { } ContentCluster(String clusterName, - Collection<ConfiguredNode> configuredNodes, - Distribution distribution, - int maxNumberOfGroupsAllowedToBeDown) { + Collection<ConfiguredNode> configuredNodes, + Distribution distribution, + int maxNumberOfGroupsAllowedToBeDown) { if (configuredNodes == null) throw new IllegalArgumentException("Nodes must be set"); this.clusterName = clusterName; this.distribution = distribution; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index c323149e99b..50ea6d4acde 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -227,9 +227,19 @@ public class NodeStateChangeChecker { return allowSettingOfWantedState(); } - var result = otherNodesHaveWantedState(nodeInfo, newDescription); - if (result.isPresent()) - return result.get(); + if (maxNumberOfGroupsAllowedToBeDown == -1) { + var otherGroupCheck = anotherNodeInAnotherGroupHasWantedState(nodeInfo); + if (!otherGroupCheck.settingWantedStateIsAllowed()) { + return otherGroupCheck; + } + if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) { + return allowSettingOfWantedState(); + } + } else { + var result = otherNodesHaveWantedState(nodeInfo, newDescription); + if (result.isPresent()) + return result.get(); + } Result allNodesAreUpCheck = checkAllNodesAreUp(clusterState); if (!allNodesAreUpCheck.settingWantedStateIsAllowed()) { @@ -247,6 +257,34 @@ public class NodeStateChangeChecker { } /** + * Returns a disallow-result if there is another node (in another group, if hierarchical) + * that has a wanted state != UP. We disallow more than 1 suspended node/group at a time. + */ + private Result anotherNodeInAnotherGroupHasWantedState(StorageNodeInfo nodeInfo) { + if (groupVisiting.isHierarchical()) { + SettableOptional<Result> anotherNodeHasWantedState = new SettableOptional<>(); + + groupVisiting.visit(group -> { + if (!groupContainsNode(group, nodeInfo.getNode())) { + Result result = otherNodeInGroupHasWantedState(group); + if (!result.settingWantedStateIsAllowed()) { + anotherNodeHasWantedState.set(result); + // Have found a node that is suspended, halt the visiting + return false; + } + } + + return true; + }); + + return anotherNodeHasWantedState.asOptional().orElseGet(Result::allowSettingOfWantedState); + } else { + // Return a disallow-result if there is another node with a wanted state + return otherNodeHasWantedState(nodeInfo); + } + } + + /** * Returns an optional Result, where return value is: * For flat setup: Return Optional.of(disallowed) if wanted state is set on some node, else Optional.empty * For hierarchical setup: No wanted state for other nodes, return Optional.empty diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def index 10eb408ed69..3fe29bfd071 100644 --- a/configdefinitions/src/vespa/fleetcontroller.def +++ b/configdefinitions/src/vespa/fleetcontroller.def @@ -207,6 +207,7 @@ cluster_feed_block_limit{} double # This is in absolute numbers, so 0.01 implies that a block limit of 0.8 effectively # becomes 0.79 for an already blocked node. cluster_feed_block_noise_level double default=0.0 -# For apps that have several groups this controls how many are allowed to be down -# simultaneously. -max_number_of_groups_allowed_to_be_down int default=1 + +# For apps that have several groups this controls how many groups are allowed to +# be down simultaneously in this cluster. +max_number_of_groups_allowed_to_be_down int default=-1 |