From 1375d80d4949799cb37d68c8f2b90a7a4f331b57 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Sun, 16 Jul 2023 22:46:09 +0200 Subject: Check redundancy also for groups that are up When we allow several groups to go down for maintenance we should check nodes in the groups that are up if they have the required redundancy. They might be up but have not yet synced all buckets after coming up. We want to wait with allowing more nodes to be taken down until that is done. --- .../core/NodeStateChangeChecker.java | 79 ++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'clustercontroller-core/src/main') diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index a36bd187a47..31bd9c59992 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -13,10 +13,12 @@ import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode; import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -262,6 +264,11 @@ public class NodeStateChangeChecker { if (aGroupContainsNode(retiredAndNotUpGroups, node)) { numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1; } + + var result = checkRedundancyForGroupsThatAreUp(retiredAndNotUpGroups); + if (result.isPresent() && result.get().notAllowed()) + return result; + if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) { log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups); return Optional.of(allow()); @@ -272,6 +279,28 @@ public class NodeStateChangeChecker { sortSetIntoList(retiredAndNotUpGroups)))); } + private Optional checkRedundancyForGroupsThatAreUp(Set retiredAndNotUpGroups) { + Set groupsThatAreUp = groupsThatAreUp(retiredAndNotUpGroups); + log.log(FINE, "Check min replication for groups " + groupsThatAreUp); + for (int group : groupsThatAreUp) { + List nodesInGroup = getNodesInGroup(group); + for (var n : nodesInGroup) { + log.log(FINE, "Check min replication for index " + n.index() + " in group " + group); + Set indexesToCheck = nodesInGroup.stream().map(ConfiguredNode::index).collect(Collectors.toSet()); + var r = checkRedundancySeenFromDistributor(clusterInfo.getDistributorNodeInfo(n.index()), indexesToCheck); + if (r.notAllowed()) + return Optional.of(r); + } + } + return Optional.empty(); + } + + private Set groupsThatAreUp(Set retiredAndNotUpGroups) { + var allGroups = allGroupIndexes(); + allGroups.removeAll(retiredAndNotUpGroups); + return allGroups; + } + private static boolean nodeIsDown(ClusterState clusterState, NodeInfo nodeInfo) { return clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN; } @@ -404,6 +433,33 @@ public class NodeStateChangeChecker { return allow(); } + private Result checkRedundancySeenFromDistributor(DistributorNodeInfo distributorNodeInfo, Set indexesToCheck) { + Map replication = new LinkedHashMap<>(minReplication(distributorNodeInfo)); + + Integer minReplication = null; + Integer minReplicationIndex = null; + for (var entry : replication.entrySet()) { + Integer value = entry.getValue(); + Integer nodeIndex = entry.getKey(); + if ( ! indexesToCheck.contains(nodeIndex)) continue; + if (minReplication == null || (value != null && value < minReplication)) { + minReplication = value; + minReplicationIndex = nodeIndex; + if (minReplication < requiredRedundancy) break; + } + } + + // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system). + if (minReplication != null && minReplication < requiredRedundancy) { + return disallow("Distributor " + distributorNodeInfo.getNodeIndex() + + " says storage node " + minReplicationIndex + + " has buckets with redundancy as low as " + + minReplication + ", but we require at least " + requiredRedundancy); + } + + return allow(); + } + private Map minReplication(DistributorNodeInfo distributorNodeInfo) { Map replicationPerNodeIndex = new HashMap<>(); for (StorageNode storageNode : distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes()) { @@ -456,6 +512,25 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } + private Set allGroupIndexes() { + return clusterInfo.getAllNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .map(Group::getIndex) + .collect(Collectors.toSet()); + } + + private Group groupForThisIndex(int groupIndex) { + return clusterInfo.getAllNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .filter(group -> group.getIndex() == groupIndex) + .findFirst() + .orElseThrow(); + } + // groups with at least one node with the same state & description private Set groupsWithSameStateAndDescription(State state, String newDescription) { return clusterInfo.getAllNodeInfos().stream() @@ -485,6 +560,10 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } + private List getNodesInGroup(int groupIndex) { + return groupForThisIndex(groupIndex).getNodes(); + } + public static class Result { public enum Action { -- cgit v1.2.3