diff options
author | Harald Musum <musum@yahooinc.com> | 2023-07-17 21:24:11 +0200 |
---|---|---|
committer | Harald Musum <musum@yahooinc.com> | 2023-07-17 21:24:11 +0200 |
commit | 715a4b3e10738eec80446dd06cb8c8e9911cec50 (patch) | |
tree | 8a561d13bfa7e0f759b570a571484bd9c9b05ca5 /clustercontroller-core | |
parent | 4de39b50afecc59d1e7cc05b658200979016c5ef (diff) |
Check min replication seen from all distributors that are UP
This means we will also check distributors that are on same node as
a retired storage node
Diffstat (limited to 'clustercontroller-core')
2 files changed, 17 insertions, 29 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index 01d9e73c7bc..c6fc9c8c184 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -18,6 +18,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -265,7 +266,7 @@ public class NodeStateChangeChecker { numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1; } - var result = checkRedundancyForGroupsThatAreUp(retiredAndNotUpGroups); + var result = checkRedundancy(retiredAndNotUpGroups); if (result.isPresent() && result.get().notAllowed()) return result; @@ -279,28 +280,23 @@ public class NodeStateChangeChecker { sortSetIntoList(retiredAndNotUpGroups)))); } - private Optional<Result> checkRedundancyForGroupsThatAreUp(Set<Integer> retiredAndNotUpGroups) { - Set<Integer> groupsThatAreUp = groupsThatAreUp(retiredAndNotUpGroups); - log.log(FINE, "Check min replication for groups " + groupsThatAreUp); - for (int group : groupsThatAreUp) { - List<ConfiguredNode> nodesInGroup = getNodesInGroup(group); - for (var n : nodesInGroup) { - log.log(FINE, "Check min replication for index " + n.index() + " in group " + group); - Set<Integer> indexesToCheck = nodesInGroup.stream().map(ConfiguredNode::index).collect(Collectors.toSet()); - var r = checkRedundancySeenFromDistributor(clusterInfo.getDistributorNodeInfo(n.index()), indexesToCheck); - if (r.notAllowed()) - return Optional.of(r); - } + // Check redundancy for nodes seen from all distributors that are UP for + // storage nodes that are in groups that should be UP + private Optional<Result> checkRedundancy(Set<Integer> retiredAndNotUpGroups) { + Set<Integer> indexesToCheck = new HashSet<>(); + retiredAndNotUpGroups.forEach(index -> getNodesInGroup(index).forEach(node -> indexesToCheck.add(node.index()))); + + for (var distributorNodeInfo : clusterInfo.getDistributorNodeInfos()) { + // Skip distributors that are DOWN (otherwise they will be UP and should be checked) + if (distributorNodeInfo.getUserWantedState().getState() != UP) continue; + + var r = checkRedundancySeenFromDistributor(distributorNodeInfo, indexesToCheck); + if (r.notAllowed()) + return Optional.of(r); } return Optional.empty(); } - private Set<Integer> groupsThatAreUp(Set<Integer> retiredAndNotUpGroups) { - var allGroups = allGroupIndexes(); - allGroups.removeAll(retiredAndNotUpGroups); - return allGroups; - } - private static boolean nodeIsDown(ClusterState clusterState, NodeInfo nodeInfo) { return clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN; } @@ -458,6 +454,7 @@ public class NodeStateChangeChecker { return allow(); } + // Replication per storage node index private Map<Integer, Integer> minReplication(DistributorNodeInfo distributorNodeInfo) { Map<Integer, Integer> replicationPerNodeIndex = new HashMap<>(); for (StorageNode storageNode : distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes()) { @@ -510,15 +507,6 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } - private Set<Integer> allGroupIndexes() { - return clusterInfo.getAllNodeInfos().stream() - .map(NodeInfo::getGroup) - .filter(Objects::nonNull) - .filter(Group::isLeafGroup) - .map(Group::getIndex) - .collect(Collectors.toSet()); - } - private Group groupForThisIndex(int groupIndex) { return clusterInfo.getAllNodeInfos().stream() .map(NodeInfo::getGroup) diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index 9cb589636da..b73ee86251f 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -309,7 +309,7 @@ public class NodeStateChangeCheckerTest { Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); assertFalse(result.allowed(), result.toString()); assertFalse(result.isAlreadySet()); - assertEquals("Distributor 2 says storage node 2 has buckets with redundancy as low as 1, but we require at least 4", result.reason()); + assertEquals("Distributor 0 says storage node 0 has buckets with redundancy as low as 1, but we require at least 4", result.reason()); } } |