aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-04-14 11:25:32 +0200
committerHarald Musum <musum@yahooinc.com>2023-04-14 11:25:32 +0200
commit6e2fc3a4bb3fd81f26357f875f7349e11ca9d3ab (patch)
tree26a910247e705ef7e652d9387f748f080637ad85
parent16176985401e8fef7bd3d95e081c333cd652fe30 (diff)
Add back original code and use it for default config
Set default value for max_number_of_groups_allowed_to_be_down to -1 and use that to switch between old and new code
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java8
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java44
-rw-r--r--configdefinitions/src/vespa/fleetcontroller.def7
3 files changed, 49 insertions, 10 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
index b83d70b8656..2535589395d 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
@@ -32,7 +32,7 @@ public class ContentCluster {
private final int maxNumberOfGroupsAllowedToBeDown;
public ContentCluster(String clusterName, Collection<ConfiguredNode> configuredNodes, Distribution distribution) {
- this(clusterName, configuredNodes, distribution, 1);
+ this(clusterName, configuredNodes, distribution, -1);
}
public ContentCluster(FleetControllerOptions options) {
@@ -40,9 +40,9 @@ public class ContentCluster {
}
ContentCluster(String clusterName,
- Collection<ConfiguredNode> configuredNodes,
- Distribution distribution,
- int maxNumberOfGroupsAllowedToBeDown) {
+ Collection<ConfiguredNode> configuredNodes,
+ Distribution distribution,
+ int maxNumberOfGroupsAllowedToBeDown) {
if (configuredNodes == null) throw new IllegalArgumentException("Nodes must be set");
this.clusterName = clusterName;
this.distribution = distribution;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index c323149e99b..50ea6d4acde 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -227,9 +227,19 @@ public class NodeStateChangeChecker {
return allowSettingOfWantedState();
}
- var result = otherNodesHaveWantedState(nodeInfo, newDescription);
- if (result.isPresent())
- return result.get();
+ if (maxNumberOfGroupsAllowedToBeDown == -1) {
+ var otherGroupCheck = anotherNodeInAnotherGroupHasWantedState(nodeInfo);
+ if (!otherGroupCheck.settingWantedStateIsAllowed()) {
+ return otherGroupCheck;
+ }
+ if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) {
+ return allowSettingOfWantedState();
+ }
+ } else {
+ var result = otherNodesHaveWantedState(nodeInfo, newDescription);
+ if (result.isPresent())
+ return result.get();
+ }
Result allNodesAreUpCheck = checkAllNodesAreUp(clusterState);
if (!allNodesAreUpCheck.settingWantedStateIsAllowed()) {
@@ -247,6 +257,34 @@ public class NodeStateChangeChecker {
}
/**
+ * Returns a disallow-result if there is another node (in another group, if hierarchical)
+ * that has a wanted state != UP. We disallow more than 1 suspended node/group at a time.
+ */
+ private Result anotherNodeInAnotherGroupHasWantedState(StorageNodeInfo nodeInfo) {
+ if (groupVisiting.isHierarchical()) {
+ SettableOptional<Result> anotherNodeHasWantedState = new SettableOptional<>();
+
+ groupVisiting.visit(group -> {
+ if (!groupContainsNode(group, nodeInfo.getNode())) {
+ Result result = otherNodeInGroupHasWantedState(group);
+ if (!result.settingWantedStateIsAllowed()) {
+ anotherNodeHasWantedState.set(result);
+ // Have found a node that is suspended, halt the visiting
+ return false;
+ }
+ }
+
+ return true;
+ });
+
+ return anotherNodeHasWantedState.asOptional().orElseGet(Result::allowSettingOfWantedState);
+ } else {
+ // Return a disallow-result if there is another node with a wanted state
+ return otherNodeHasWantedState(nodeInfo);
+ }
+ }
+
+ /**
* Returns an optional Result, where return value is:
* For flat setup: Return Optional.of(disallowed) if wanted state is set on some node, else Optional.empty
* For hierarchical setup: No wanted state for other nodes, return Optional.empty
diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def
index 10eb408ed69..3fe29bfd071 100644
--- a/configdefinitions/src/vespa/fleetcontroller.def
+++ b/configdefinitions/src/vespa/fleetcontroller.def
@@ -207,6 +207,7 @@ cluster_feed_block_limit{} double
# This is in absolute numbers, so 0.01 implies that a block limit of 0.8 effectively
# becomes 0.79 for an already blocked node.
cluster_feed_block_noise_level double default=0.0
-# For apps that have several groups this controls how many are allowed to be down
-# simultaneously.
-max_number_of_groups_allowed_to_be_down int default=1
+
+# For apps that have several groups this controls how many groups are allowed to
+# be down simultaneously in this cluster.
+max_number_of_groups_allowed_to_be_down int default=-1