aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-03-30 15:28:39 +0200
committerHarald Musum <musum@yahooinc.com>2023-03-30 15:28:39 +0200
commit05b4c5f5b2192657f6c1ecd29e04593abfa4ea6f (patch)
treeeaea260dc925521c56d9585190a9505803500271
parent5b4d42061b67488998e48e571e0f41292e299987 (diff)
Allow more than 1 group in a content to be down at the same time
Based on config, all functional changes guarded by config field max_number_of_groups_allowed_to_be_down in fleetcontroller config
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java90
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java143
2 files changed, 217 insertions, 16 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index e242833fd0c..adbc8da8d08 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -13,13 +13,18 @@ import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.core.hostinfo.Metrics;
import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
import static com.yahoo.vdslib.state.NodeType.STORAGE;
import static com.yahoo.vdslib.state.State.DOWN;
+import static com.yahoo.vdslib.state.State.MAINTENANCE;
import static com.yahoo.vdslib.state.State.RETIRED;
import static com.yahoo.vdslib.state.State.UP;
import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Result.allowSettingOfWantedState;
@@ -27,6 +32,8 @@ import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Resu
import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Result.createDisallowed;
import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest.Condition.FORCE;
import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest.Condition.SAFE;
+import static java.util.logging.Level.FINE;
+import static java.util.logging.Level.INFO;
/**
* Checks if a node can be upgraded.
@@ -35,8 +42,9 @@ import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetU
*/
public class NodeStateChangeChecker {
- public static final String BUCKETS_METRIC_NAME = "vds.datastored.bucket_space.buckets_total";
- public static final Map<String, String> BUCKETS_METRIC_DIMENSIONS = Map.of("bucketSpace", "default");
+ private static final Logger log = Logger.getLogger(NodeStateChangeChecker.class.getName());
+ private static final String BUCKETS_METRIC_NAME = "vds.datastored.bucket_space.buckets_total";
+ private static final Map<String, String> BUCKETS_METRIC_DIMENSIONS = Map.of("bucketSpace", "default");
private final int requiredRedundancy;
private final HierarchicalGroupVisiting groupVisiting;
@@ -50,6 +58,8 @@ public class NodeStateChangeChecker {
this.clusterInfo = cluster.clusterInfo();
this.inMoratorium = inMoratorium;
this.maxNumberOfGroupsAllowedToBeDown = cluster.maxNumberOfGroupsAllowedToBeDown();
+ if ( ! groupVisiting.isHierarchical() && maxNumberOfGroupsAllowedToBeDown > 1)
+ throw new IllegalArgumentException("Cannot have both 1 group and maxNumberOfGroupsAllowedToBeDown > 1");
}
public static class Result {
@@ -214,26 +224,37 @@ public class NodeStateChangeChecker {
oldWantedState.getState() + ": " + oldWantedState.getDescription());
}
- Result otherGroupCheck = anotherNodeInAnotherGroupHasWantedState(nodeInfo);
- if (!otherGroupCheck.settingWantedStateIsAllowed()) {
- return otherGroupCheck;
+ if (moreThanOneGroupAllowedToBeDown()) {
+ Result otherGroupCheck = nodesInOtherGroups(nodeInfo.getNode());
+ if (!otherGroupCheck.settingWantedStateIsAllowed()) {
+ return otherGroupCheck;
+ }
+ } else {
+ Result otherGroupCheck = anotherNodeInAnotherGroupHasWantedState(nodeInfo);
+ if (!otherGroupCheck.settingWantedStateIsAllowed()) {
+ return otherGroupCheck;
+ }
}
if (clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN) {
+ log.log(FINE, "node is DOWN, allow");
return allowSettingOfWantedState();
}
if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) {
+ log.log(FINE, "anotherNodeInGroupAlreadyAllowed, allow");
return allowSettingOfWantedState();
}
Result allNodesAreUpCheck = checkAllNodesAreUp(clusterState);
if (!allNodesAreUpCheck.settingWantedStateIsAllowed()) {
+ log.log(FINE, "allNodesAreUpCheck: " + allNodesAreUpCheck);
return allNodesAreUpCheck;
}
Result checkDistributorsResult = checkDistributors(nodeInfo.getNode(), clusterState.getVersion());
if (!checkDistributorsResult.settingWantedStateIsAllowed()) {
+ log.log(FINE, "checkDistributors: "+ checkDistributorsResult);
return checkDistributorsResult;
}
@@ -268,6 +289,29 @@ public class NodeStateChangeChecker {
}
}
+ /**
+ * Returns a disallow-result if there is other groups with nodes that are allowed to be down
+ * so that the number of groups > maxNumberOfGroupsAllowedToBeDown
+ */
+ private Result nodesInOtherGroups(Node node) {
+ log.log(INFO, maxNumberOfGroupsAllowedToBeDown + " groups allowed to be down");
+ var groupsWithNodesInMaintenance = groupsWithStorageNodesInMaintenance();
+ log.log(INFO, "groupsWithStorageNodesInMaintenance " + groupsWithNodesInMaintenance);
+ if (! groupsWithNodesInMaintenance.isEmpty() && oneOfGroupsContainsNode(groupsWithNodesInMaintenance, node)) {
+ log.log(INFO, "a group with storage nodes in maintenance contains node " + node);
+ return allowSettingOfWantedState();
+ }
+ if (groupsWithNodesInMaintenance.size() >= maxNumberOfGroupsAllowedToBeDown)
+ return createDisallowed("Nodes in " + groupsWithNodesInMaintenance.size() + " groups are already down, cannot take down another node");
+ else {
+ return allowSettingOfWantedState();
+ }
+ }
+
+ private boolean moreThanOneGroupAllowedToBeDown() {
+ return maxNumberOfGroupsAllowedToBeDown > 1;
+ }
+
/** Returns a disallow-result, if there is a node in the group with wanted state != UP. */
private Result otherNodeInGroupHasWantedState(Group group) {
for (var configuredNode : group.getNodes()) {
@@ -354,7 +398,22 @@ public class NodeStateChangeChecker {
return false;
}
+ private static boolean oneOfGroupsContainsNode(Collection<Group> groups, Node node) {
+ for (Group group : groups) {
+ for (ConfiguredNode configuredNode : group.getNodes()) {
+ if (configuredNode.index() == node.getIndex()) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
private Result checkAllNodesAreUp(ClusterState clusterState) {
+ // TODO Revisit this, check that all nodes in other groups are up?
+ if (moreThanOneGroupAllowedToBeDown()) return allowSettingOfWantedState();
+
// This method verifies both storage nodes and distributors are up (or retired).
// The complicated part is making a summary error message.
@@ -441,4 +500,25 @@ public class NodeStateChangeChecker {
return allowSettingOfWantedState();
}
+ private Collection<Group> groupsWithStorageNodesInMaintenance() {
+ var groupIndexes = clusterInfo.getStorageNodeInfos().stream()
+ .filter(sni -> MAINTENANCE.equals(sni.getWantedState().getState()))
+ .map(NodeInfo::getGroup)
+ .filter(Objects::nonNull)
+ .filter(Group::isLeafGroup)
+ .map(Group::getIndex)
+ .collect(Collectors.toSet());
+
+ // Groups are not equal even if they have the same index
+ var groups = new HashMap<Integer, Group>();
+ for (NodeInfo nodeInfo : clusterInfo.getStorageNodeInfos()) {
+ Group group = nodeInfo.getGroup();
+ int index = group.getIndex();
+ if (groupIndexes.contains(index) && !groups.containsKey(index))
+ groups.put(index, group);
+ }
+
+ return groups.values();
+ }
+
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
index a22785e6d7a..58f9b563288 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -19,6 +19,7 @@ import static com.yahoo.vdslib.state.NodeType.DISTRIBUTOR;
import static com.yahoo.vdslib.state.NodeType.STORAGE;
import static com.yahoo.vdslib.state.State.DOWN;
import static com.yahoo.vdslib.state.State.INITIALIZING;
+import static com.yahoo.vdslib.state.State.MAINTENANCE;
import static com.yahoo.vdslib.state.State.UP;
import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Result;
import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest.Condition.FORCE;
@@ -37,7 +38,7 @@ public class NodeStateChangeCheckerTest {
private static final Node nodeStorage = new Node(STORAGE, 1);
private static final NodeState UP_NODE_STATE = new NodeState(STORAGE, UP);
- private static final NodeState MAINTENANCE_NODE_STATE = createNodeState(State.MAINTENANCE, "Orchestrator");
+ private static final NodeState MAINTENANCE_NODE_STATE = createNodeState(MAINTENANCE, "Orchestrator");
private static final NodeState DOWN_NODE_STATE = createNodeState(DOWN, "RetireEarlyExpirer");
private static NodeState createNodeState(State state, String description) {
@@ -53,7 +54,11 @@ public class NodeStateChangeCheckerTest {
}
private static ClusterState defaultAllUpClusterState() {
- return clusterState(String.format("version:%d distributor:4 storage:4", currentClusterStateVersion));
+ return defaultAllUpClusterState(4);
+ }
+
+ private static ClusterState defaultAllUpClusterState(int nodeCount) {
+ return clusterState(String.format("version:%d distributor:" + nodeCount + " storage:" + nodeCount, currentClusterStateVersion));
}
private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) {
@@ -144,7 +149,7 @@ public class NodeStateChangeCheckerTest {
void testSafeMaintenanceDisallowedWhenOtherStorageNodeInFlatClusterIsSuspended() {
// Nodes 0-3, storage node 0 being in maintenance with "Orchestrator" description.
ContentCluster cluster = createCluster(4);
- cluster.clusterInfo().getStorageNodeInfo(0).setWantedState(new NodeState(STORAGE, State.MAINTENANCE).setDescription("Orchestrator"));
+ setStorageNodeWantedStateToMaintenance(cluster, 0);
var nodeStateChangeChecker = createChangeChecker(cluster);
ClusterState clusterStateWith0InMaintenance = clusterState(String.format(
"version:%d distributor:4 storage:4 .0.s:m",
@@ -160,11 +165,105 @@ public class NodeStateChangeCheckerTest {
}
@Test
+ void testMaintenanceAllowedFor2Of4Groups() {
+ // 4 groups with 1 node in each group
+ Collection<ConfiguredNode> nodes = createNodes(4);
+ StorDistributionConfig config = createDistributionConfig(4, 4);
+
+ int maxNumberOfGroupsAllowedToBeDown = 2;
+ var cluster = new ContentCluster("Clustername", nodes, new Distribution(config), maxNumberOfGroupsAllowedToBeDown);
+ setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6)));
+ var nodeStateChangeChecker = createChangeChecker(cluster);
+
+ // All nodes up, set a storage node in group 0 to maintenance
+ {
+ int nodeIndex = 0;
+ checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, defaultAllUpClusterState());
+ setStorageNodeWantedStateToMaintenance(cluster, nodeIndex);
+ }
+
+ // Node in group 0 in maintenance, set storage node in group 1 to maintenance
+ {
+ ClusterState clusterState = clusterState(String.format("version:%d distributor:4 .0.s:d storage:4 .0.s:m", currentClusterStateVersion));
+ int nodeIndex = 1;
+ checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState);
+ setStorageNodeWantedStateToMaintenance(cluster, nodeIndex);
+ }
+
+ // Nodes in group 0 and 1 in maintenance, try to set storage node in group 2 to maintenance, should fail
+ {
+ ClusterState clusterState = clusterState(String.format("version:%d distributor:4 storage:4 .0.s:m .1.s:m", currentClusterStateVersion));
+ int nodeIndex = 2;
+ Node node = new Node(STORAGE, nodeIndex);
+ Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
+ assertFalse(result.settingWantedStateIsAllowed(), result.toString());
+ assertFalse(result.wantedStateAlreadySet());
+ assertEquals("Nodes in 2 groups are already down, cannot take down another node", result.getReason());
+ }
+
+ }
+
+ @Test
+ void testMaintenanceAllowedFor2Of4Groups8Nodes() {
+ // 4 groups with 2 node in each group
+ Collection<ConfiguredNode> nodes = createNodes(8);
+ StorDistributionConfig config = createDistributionConfig(8, 4);
+
+ int maxNumberOfGroupsAllowedToBeDown = 2;
+ var cluster = new ContentCluster("Clustername", nodes, new Distribution(config), maxNumberOfGroupsAllowedToBeDown);
+ setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6)));
+ var nodeStateChangeChecker = createChangeChecker(cluster);
+
+ // All nodes up, set a storage node in group 0 to maintenance
+ {
+ ClusterState clusterState = defaultAllUpClusterState(8);
+ int nodeIndex = 0;
+ checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState);
+ setStorageNodeWantedStateToMaintenance(cluster, nodeIndex);
+ }
+
+ // 1 Node in group 0 in maintenance, try to set node 1 in group 0 to maintenance
+ {
+ ClusterState clusterState = clusterState(String.format("version:%d distributor:8 .0.s:d storage:8 .0.s:m", currentClusterStateVersion));
+ int nodeIndex = 1;
+ checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState);
+ setStorageNodeWantedStateToMaintenance(cluster, nodeIndex);
+ }
+
+ // Nodes in group 0 in maintenance, try to set storage node 2 in group 1 to maintenance
+ {
+ ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m", currentClusterStateVersion));
+ int nodeIndex = 2;
+ checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState);
+ setStorageNodeWantedStateToMaintenance(cluster, nodeIndex);
+ }
+
+ // 2 nodes in group 0 and 1 in group 1 in maintenance, try to set storage node 4 in group 2 to maintenance, should fail (different group)
+ {
+ ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .2.s:m", currentClusterStateVersion));
+ int nodeIndex = 4;
+ Node node = new Node(STORAGE, nodeIndex);
+ Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
+ assertFalse(result.settingWantedStateIsAllowed(), result.toString());
+ assertFalse(result.wantedStateAlreadySet());
+ assertEquals("Nodes in 2 groups are already down, cannot take down another node", result.getReason());
+ }
+
+ // 2 nodes in group 0 and 1 in group 1 in maintenance, try to set storage node 3 in group 1 to maintenance
+ {
+ ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .2.s:m", currentClusterStateVersion));
+ int nodeIndex = 3;
+ checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState);
+ setStorageNodeWantedStateToMaintenance(cluster, nodeIndex);
+ }
+
+ }
+
+ @Test
void testSafeMaintenanceDisallowedWhenOtherDistributorInFlatClusterIsSuspended() {
// Nodes 0-3, distributor 0 being down with "Orchestrator" description.
ContentCluster cluster = createCluster(4);
- cluster.clusterInfo().getDistributorNodeInfo(0)
- .setWantedState(new NodeState(DISTRIBUTOR, DOWN).setDescription("Orchestrator"));
+ setDistributorNodeWantedState(cluster, 0, new NodeState(DISTRIBUTOR, DOWN), "Orchestrator");
var nodeStateChangeChecker = createChangeChecker(cluster);
ClusterState clusterStateWith0InMaintenance = clusterState(String.format(
"version:%d distributor:4 .0.s:d storage:4",
@@ -184,8 +283,7 @@ public class NodeStateChangeCheckerTest {
// Nodes 0-3, distributor 0 being in maintenance with "Orchestrator" description.
// 2 groups: nodes 0-1 is group 0, 2-3 is group 1.
ContentCluster cluster = createCluster(4, 2);
- cluster.clusterInfo().getDistributorNodeInfo(0)
- .setWantedState(new NodeState(STORAGE, DOWN).setDescription("Orchestrator"));
+ setDistributorNodeWantedState(cluster, 0, new NodeState(DISTRIBUTOR, DOWN), "Orchestrator");
var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, false);
ClusterState clusterStateWith0InMaintenance = clusterState(String.format(
"version:%d distributor:4 .0.s:d storage:4",
@@ -217,7 +315,7 @@ public class NodeStateChangeCheckerTest {
// Nodes 0-3, storage node 0 being in maintenance with "Orchestrator" description.
// 2 groups: nodes 0-1 is group 0, 2-3 is group 1.
ContentCluster cluster = createCluster(4, 2);
- cluster.clusterInfo().getStorageNodeInfo(0).setWantedState(new NodeState(STORAGE, State.MAINTENANCE).setDescription("Orchestrator"));
+ setStorageNodeWantedState(cluster, 0, new NodeState(STORAGE, MAINTENANCE), "Orchestrator");
var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, false);
ClusterState clusterStateWith0InMaintenance = clusterState(String.format(
"version:%d distributor:4 storage:4 .0.s:m",
@@ -433,10 +531,9 @@ public class NodeStateChangeCheckerTest {
NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
for (int x = 0; x < cluster.clusterInfo().getConfiguredNodes().size(); x++) {
- State state = UP;
- cluster.clusterInfo().getDistributorNodeInfo(x).setReportedState(new NodeState(DISTRIBUTOR, state), 0);
+ cluster.clusterInfo().getDistributorNodeInfo(x).setReportedState(new NodeState(DISTRIBUTOR, UP), 0);
cluster.clusterInfo().getDistributorNodeInfo(x).setHostInfo(HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6)));
- cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(STORAGE, state), 0);
+ cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(STORAGE, UP), 0);
}
return nodeStateChangeChecker.evaluateTransition(
@@ -757,4 +854,28 @@ public class NodeStateChangeCheckerTest {
return configBuilder.build();
}
+ private void checkSettingToMaintenanceIsAllowed(int nodeIndex, NodeStateChangeChecker nodeStateChangeChecker, ClusterState clusterState) {
+ Node node = new Node(STORAGE, nodeIndex);
+ Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertEquals("Preconditions fulfilled and new state different", result.getReason());
+ }
+
+ private void setStorageNodeWantedStateToMaintenance(ContentCluster cluster, int nodeIndex) {
+ setStorageNodeWantedStateToMaintenance(cluster, nodeIndex, "Orchestrator");
+ }
+
+ private void setStorageNodeWantedStateToMaintenance(ContentCluster cluster, int nodeIndex, String description) {
+ cluster.clusterInfo().getStorageNodeInfo(nodeIndex).setWantedState(MAINTENANCE_NODE_STATE.setDescription(description));
+ }
+
+ private void setStorageNodeWantedState(ContentCluster cluster, int nodeIndex, NodeState state, String description) {
+ cluster.clusterInfo().getStorageNodeInfo(nodeIndex).setWantedState(state.setDescription(description));
+ }
+
+ private void setDistributorNodeWantedState(ContentCluster cluster, int nodeIndex, NodeState state, String description) {
+ cluster.clusterInfo().getDistributorNodeInfo(nodeIndex).setWantedState(state.setDescription(description));
+ }
+
}