diff options
author | Harald Musum <musum@yahooinc.com> | 2023-07-16 22:46:09 +0200 |
---|---|---|
committer | Harald Musum <musum@yahooinc.com> | 2023-07-16 22:46:09 +0200 |
commit | 1375d80d4949799cb37d68c8f2b90a7a4f331b57 (patch) | |
tree | 17ce8affd385db9cc61daf298bc1e81575059083 /clustercontroller-core/src/main | |
parent | 9f56b9f995e2297f60b02fc06af5a8f0897612bf (diff) |
Check redundancy also for groups that are up
When we allow several groups to go down for maintenance we should check
nodes in the groups that are up if they have the required redundancy. They
might be up but have not yet synced all buckets after coming up. We want
to wait with allowing more nodes to be taken down until that is done.
Diffstat (limited to 'clustercontroller-core/src/main')
-rw-r--r-- | clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index a36bd187a47..31bd9c59992 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -13,10 +13,12 @@ import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode; import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -262,6 +264,11 @@ public class NodeStateChangeChecker { if (aGroupContainsNode(retiredAndNotUpGroups, node)) { numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1; } + + var result = checkRedundancyForGroupsThatAreUp(retiredAndNotUpGroups); + if (result.isPresent() && result.get().notAllowed()) + return result; + if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) { log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups); return Optional.of(allow()); @@ -272,6 +279,28 @@ public class NodeStateChangeChecker { sortSetIntoList(retiredAndNotUpGroups)))); } + private Optional<Result> checkRedundancyForGroupsThatAreUp(Set<Integer> retiredAndNotUpGroups) { + Set<Integer> groupsThatAreUp = groupsThatAreUp(retiredAndNotUpGroups); + log.log(FINE, "Check min replication for groups " + groupsThatAreUp); + for (int group : groupsThatAreUp) { + List<ConfiguredNode> nodesInGroup = getNodesInGroup(group); + for (var n : nodesInGroup) { + log.log(FINE, "Check min replication for index " + n.index() + " in group " + group); + Set<Integer> indexesToCheck = nodesInGroup.stream().map(ConfiguredNode::index).collect(Collectors.toSet()); + var r = checkRedundancySeenFromDistributor(clusterInfo.getDistributorNodeInfo(n.index()), indexesToCheck); + if (r.notAllowed()) + return Optional.of(r); + } + } + return Optional.empty(); + } + + private Set<Integer> groupsThatAreUp(Set<Integer> retiredAndNotUpGroups) { + var allGroups = allGroupIndexes(); + allGroups.removeAll(retiredAndNotUpGroups); + return allGroups; + } + private static boolean nodeIsDown(ClusterState clusterState, NodeInfo nodeInfo) { return clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN; } @@ -404,6 +433,33 @@ public class NodeStateChangeChecker { return allow(); } + private Result checkRedundancySeenFromDistributor(DistributorNodeInfo distributorNodeInfo, Set<Integer> indexesToCheck) { + Map<Integer, Integer> replication = new LinkedHashMap<>(minReplication(distributorNodeInfo)); + + Integer minReplication = null; + Integer minReplicationIndex = null; + for (var entry : replication.entrySet()) { + Integer value = entry.getValue(); + Integer nodeIndex = entry.getKey(); + if ( ! indexesToCheck.contains(nodeIndex)) continue; + if (minReplication == null || (value != null && value < minReplication)) { + minReplication = value; + minReplicationIndex = nodeIndex; + if (minReplication < requiredRedundancy) break; + } + } + + // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system). + if (minReplication != null && minReplication < requiredRedundancy) { + return disallow("Distributor " + distributorNodeInfo.getNodeIndex() + + " says storage node " + minReplicationIndex + + " has buckets with redundancy as low as " + + minReplication + ", but we require at least " + requiredRedundancy); + } + + return allow(); + } + private Map<Integer, Integer> minReplication(DistributorNodeInfo distributorNodeInfo) { Map<Integer, Integer> replicationPerNodeIndex = new HashMap<>(); for (StorageNode storageNode : distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes()) { @@ -456,6 +512,25 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } + private Set<Integer> allGroupIndexes() { + return clusterInfo.getAllNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .map(Group::getIndex) + .collect(Collectors.toSet()); + } + + private Group groupForThisIndex(int groupIndex) { + return clusterInfo.getAllNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .filter(group -> group.getIndex() == groupIndex) + .findFirst() + .orElseThrow(); + } + // groups with at least one node with the same state & description private Set<Integer> groupsWithSameStateAndDescription(State state, String newDescription) { return clusterInfo.getAllNodeInfos().stream() @@ -485,6 +560,10 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } + private List<ConfiguredNode> getNodesInGroup(int groupIndex) { + return groupForThisIndex(groupIndex).getNodes(); + } + public static class Result { public enum Action { |