summaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-07-16 22:46:09 +0200
committerHarald Musum <musum@yahooinc.com>2023-07-16 22:46:09 +0200
commit1375d80d4949799cb37d68c8f2b90a7a4f331b57 (patch)
tree17ce8affd385db9cc61daf298bc1e81575059083 /clustercontroller-core
parent9f56b9f995e2297f60b02fc06af5a8f0897612bf (diff)
Check redundancy also for groups that are up
When we allow several groups to go down for maintenance we should check nodes in the groups that are up if they have the required redundancy. They might be up but have not yet synced all buckets after coming up. We want to wait with allowing more nodes to be taken down until that is done.
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java79
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java45
2 files changed, 106 insertions, 18 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index a36bd187a47..31bd9c59992 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -13,10 +13,12 @@ import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -262,6 +264,11 @@ public class NodeStateChangeChecker {
if (aGroupContainsNode(retiredAndNotUpGroups, node)) {
numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1;
}
+
+ var result = checkRedundancyForGroupsThatAreUp(retiredAndNotUpGroups);
+ if (result.isPresent() && result.get().notAllowed())
+ return result;
+
if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) {
log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups);
return Optional.of(allow());
@@ -272,6 +279,28 @@ public class NodeStateChangeChecker {
sortSetIntoList(retiredAndNotUpGroups))));
}
+ private Optional<Result> checkRedundancyForGroupsThatAreUp(Set<Integer> retiredAndNotUpGroups) {
+ Set<Integer> groupsThatAreUp = groupsThatAreUp(retiredAndNotUpGroups);
+ log.log(FINE, "Check min replication for groups " + groupsThatAreUp);
+ for (int group : groupsThatAreUp) {
+ List<ConfiguredNode> nodesInGroup = getNodesInGroup(group);
+ for (var n : nodesInGroup) {
+ log.log(FINE, "Check min replication for index " + n.index() + " in group " + group);
+ Set<Integer> indexesToCheck = nodesInGroup.stream().map(ConfiguredNode::index).collect(Collectors.toSet());
+ var r = checkRedundancySeenFromDistributor(clusterInfo.getDistributorNodeInfo(n.index()), indexesToCheck);
+ if (r.notAllowed())
+ return Optional.of(r);
+ }
+ }
+ return Optional.empty();
+ }
+
+ private Set<Integer> groupsThatAreUp(Set<Integer> retiredAndNotUpGroups) {
+ var allGroups = allGroupIndexes();
+ allGroups.removeAll(retiredAndNotUpGroups);
+ return allGroups;
+ }
+
private static boolean nodeIsDown(ClusterState clusterState, NodeInfo nodeInfo) {
return clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN;
}
@@ -404,6 +433,33 @@ public class NodeStateChangeChecker {
return allow();
}
+ private Result checkRedundancySeenFromDistributor(DistributorNodeInfo distributorNodeInfo, Set<Integer> indexesToCheck) {
+ Map<Integer, Integer> replication = new LinkedHashMap<>(minReplication(distributorNodeInfo));
+
+ Integer minReplication = null;
+ Integer minReplicationIndex = null;
+ for (var entry : replication.entrySet()) {
+ Integer value = entry.getValue();
+ Integer nodeIndex = entry.getKey();
+ if ( ! indexesToCheck.contains(nodeIndex)) continue;
+ if (minReplication == null || (value != null && value < minReplication)) {
+ minReplication = value;
+ minReplicationIndex = nodeIndex;
+ if (minReplication < requiredRedundancy) break;
+ }
+ }
+
+ // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system).
+ if (minReplication != null && minReplication < requiredRedundancy) {
+ return disallow("Distributor " + distributorNodeInfo.getNodeIndex()
+ + " says storage node " + minReplicationIndex
+ + " has buckets with redundancy as low as "
+ + minReplication + ", but we require at least " + requiredRedundancy);
+ }
+
+ return allow();
+ }
+
private Map<Integer, Integer> minReplication(DistributorNodeInfo distributorNodeInfo) {
Map<Integer, Integer> replicationPerNodeIndex = new HashMap<>();
for (StorageNode storageNode : distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes()) {
@@ -456,6 +512,25 @@ public class NodeStateChangeChecker {
.collect(Collectors.toSet());
}
+ private Set<Integer> allGroupIndexes() {
+ return clusterInfo.getAllNodeInfos().stream()
+ .map(NodeInfo::getGroup)
+ .filter(Objects::nonNull)
+ .filter(Group::isLeafGroup)
+ .map(Group::getIndex)
+ .collect(Collectors.toSet());
+ }
+
+ private Group groupForThisIndex(int groupIndex) {
+ return clusterInfo.getAllNodeInfos().stream()
+ .map(NodeInfo::getGroup)
+ .filter(Objects::nonNull)
+ .filter(Group::isLeafGroup)
+ .filter(group -> group.getIndex() == groupIndex)
+ .findFirst()
+ .orElseThrow();
+ }
+
// groups with at least one node with the same state & description
private Set<Integer> groupsWithSameStateAndDescription(State state, String newDescription) {
return clusterInfo.getAllNodeInfos().stream()
@@ -485,6 +560,10 @@ public class NodeStateChangeChecker {
.collect(Collectors.toSet());
}
+ private List<ConfiguredNode> getNodesInGroup(int groupIndex) {
+ return groupForThisIndex(groupIndex).getNodes();
+ }
+
public static class Result {
public enum Action {
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
index 7b20fcf694a..2174a206d4a 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.clustercontroller.core;
+import com.yahoo.log.LogSetup;
import com.yahoo.vdslib.distribution.ConfiguredNode;
import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.state.ClusterState;
@@ -215,6 +216,8 @@ public class NodeStateChangeCheckerTest {
@Test
void testMaintenanceAllowedFor2Of4Groups8Nodes() {
+ // LogSetup.initVespaLogging("foo");
+
int maxNumberOfGroupsAllowedToBeDown = 2;
// 4 groups with 2 nodes in each group
var cluster = createCluster(8, 4, maxNumberOfGroupsAllowedToBeDown);
@@ -275,25 +278,8 @@ public class NodeStateChangeCheckerTest {
assertEquals("At most 2 groups can have wanted state: [0, 1]", result.reason());
}
- // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance,
- // try to set storage node 4 in group 2 to maintenance
- /* WIP
- {
- ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .2.s:m .3.s:m", currentClusterStateVersion));
- setStorageNodeWantedState(cluster, 0, UP, "");
- setStorageNodeWantedState(cluster, 1, UP, "");
- int nodeIndex = 4;
- Node node = new Node(STORAGE, nodeIndex);
- Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
- assertFalse(result.settingWantedStateIsAllowed(), result.toString());
- assertFalse(result.wantedStateAlreadySet());
- assertEquals("At most 2 groups can have wanted state: [0, 1]", result.getReason());
- }
-
- */
-
// 2 nodes in group 0 in maintenance, storage node 3 in group 1 is in maintenance with another description
- // (set in maintenance by operator), try to set storage node 3 in group 1 to maintenance, should bew allowed
+ // (set in maintenance by operator), try to set storage node 2 in group 1 to maintenance, should be allowed
{
ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .3.s:m", currentClusterStateVersion));
setStorageNodeWantedState(cluster, 3, MAINTENANCE, "Maintenance, set by operator"); // Set to another description
@@ -305,6 +291,29 @@ public class NodeStateChangeCheckerTest {
assertFalse(result.isAlreadySet());
}
+ // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance,
+ // try to set storage node 4 in group 2 to maintenance
+ {
+ setStorageNodeWantedState(cluster, 0, MAINTENANCE, "Orchestrator");
+ setStorageNodeWantedState(cluster, 1, MAINTENANCE, "Orchestrator");
+ setStorageNodeWantedState(cluster, 2, UP, ""); // Set up again
+ setStorageNodeWantedState(cluster, 3, UP, ""); // Set up again
+ ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m", currentClusterStateVersion));
+
+ // Set bucket in sync to 1 for node 2 in group 1
+ var distributorHostInfo = createDistributorHostInfo(1, 2, 1);
+ cluster.clusterInfo().getDistributorNodeInfo(0).setHostInfo(HostInfo.createHostInfo(distributorHostInfo));
+ cluster.clusterInfo().getDistributorNodeInfo(1).setHostInfo(HostInfo.createHostInfo(distributorHostInfo));
+ cluster.clusterInfo().getDistributorNodeInfo(2).setHostInfo(HostInfo.createHostInfo(distributorHostInfo));
+
+ int nodeIndex = 2;
+ Node node = new Node(STORAGE, nodeIndex);
+ Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
+ assertFalse(result.allowed(), result.toString());
+ assertFalse(result.isAlreadySet());
+ assertEquals("Distributor 2 says storage node 2 has buckets with redundancy as low as 1, but we require at least 4", result.reason());
+ }
+
}
@ParameterizedTest