diff options
author | Harald Musum <musum@yahooinc.com> | 2023-07-16 22:46:09 +0200 |
---|---|---|
committer | Harald Musum <musum@yahooinc.com> | 2023-07-16 22:46:09 +0200 |
commit | 1375d80d4949799cb37d68c8f2b90a7a4f331b57 (patch) | |
tree | 17ce8affd385db9cc61daf298bc1e81575059083 /clustercontroller-core | |
parent | 9f56b9f995e2297f60b02fc06af5a8f0897612bf (diff) |
Check redundancy also for groups that are up
When we allow several groups to go down for maintenance we should check
nodes in the groups that are up if they have the required redundancy. They
might be up but have not yet synced all buckets after coming up. We want
to wait with allowing more nodes to be taken down until that is done.
Diffstat (limited to 'clustercontroller-core')
2 files changed, 106 insertions, 18 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index a36bd187a47..31bd9c59992 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -13,10 +13,12 @@ import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode; import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -262,6 +264,11 @@ public class NodeStateChangeChecker { if (aGroupContainsNode(retiredAndNotUpGroups, node)) { numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1; } + + var result = checkRedundancyForGroupsThatAreUp(retiredAndNotUpGroups); + if (result.isPresent() && result.get().notAllowed()) + return result; + if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) { log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups); return Optional.of(allow()); @@ -272,6 +279,28 @@ public class NodeStateChangeChecker { sortSetIntoList(retiredAndNotUpGroups)))); } + private Optional<Result> checkRedundancyForGroupsThatAreUp(Set<Integer> retiredAndNotUpGroups) { + Set<Integer> groupsThatAreUp = groupsThatAreUp(retiredAndNotUpGroups); + log.log(FINE, "Check min replication for groups " + groupsThatAreUp); + for (int group : groupsThatAreUp) { + List<ConfiguredNode> nodesInGroup = getNodesInGroup(group); + for (var n : nodesInGroup) { + log.log(FINE, "Check min replication for index " + n.index() + " in group " + group); + Set<Integer> indexesToCheck = nodesInGroup.stream().map(ConfiguredNode::index).collect(Collectors.toSet()); + var r = checkRedundancySeenFromDistributor(clusterInfo.getDistributorNodeInfo(n.index()), indexesToCheck); + if (r.notAllowed()) + return Optional.of(r); + } + } + return Optional.empty(); + } + + private Set<Integer> groupsThatAreUp(Set<Integer> retiredAndNotUpGroups) { + var allGroups = allGroupIndexes(); + allGroups.removeAll(retiredAndNotUpGroups); + return allGroups; + } + private static boolean nodeIsDown(ClusterState clusterState, NodeInfo nodeInfo) { return clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN; } @@ -404,6 +433,33 @@ public class NodeStateChangeChecker { return allow(); } + private Result checkRedundancySeenFromDistributor(DistributorNodeInfo distributorNodeInfo, Set<Integer> indexesToCheck) { + Map<Integer, Integer> replication = new LinkedHashMap<>(minReplication(distributorNodeInfo)); + + Integer minReplication = null; + Integer minReplicationIndex = null; + for (var entry : replication.entrySet()) { + Integer value = entry.getValue(); + Integer nodeIndex = entry.getKey(); + if ( ! indexesToCheck.contains(nodeIndex)) continue; + if (minReplication == null || (value != null && value < minReplication)) { + minReplication = value; + minReplicationIndex = nodeIndex; + if (minReplication < requiredRedundancy) break; + } + } + + // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system). + if (minReplication != null && minReplication < requiredRedundancy) { + return disallow("Distributor " + distributorNodeInfo.getNodeIndex() + + " says storage node " + minReplicationIndex + + " has buckets with redundancy as low as " + + minReplication + ", but we require at least " + requiredRedundancy); + } + + return allow(); + } + private Map<Integer, Integer> minReplication(DistributorNodeInfo distributorNodeInfo) { Map<Integer, Integer> replicationPerNodeIndex = new HashMap<>(); for (StorageNode storageNode : distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes()) { @@ -456,6 +512,25 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } + private Set<Integer> allGroupIndexes() { + return clusterInfo.getAllNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .map(Group::getIndex) + .collect(Collectors.toSet()); + } + + private Group groupForThisIndex(int groupIndex) { + return clusterInfo.getAllNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .filter(group -> group.getIndex() == groupIndex) + .findFirst() + .orElseThrow(); + } + // groups with at least one node with the same state & description private Set<Integer> groupsWithSameStateAndDescription(State state, String newDescription) { return clusterInfo.getAllNodeInfos().stream() @@ -485,6 +560,10 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } + private List<ConfiguredNode> getNodesInGroup(int groupIndex) { + return groupForThisIndex(groupIndex).getNodes(); + } + public static class Result { public enum Action { diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index 7b20fcf694a..2174a206d4a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; +import com.yahoo.log.LogSetup; import com.yahoo.vdslib.distribution.ConfiguredNode; import com.yahoo.vdslib.distribution.Distribution; import com.yahoo.vdslib.state.ClusterState; @@ -215,6 +216,8 @@ public class NodeStateChangeCheckerTest { @Test void testMaintenanceAllowedFor2Of4Groups8Nodes() { + // LogSetup.initVespaLogging("foo"); + int maxNumberOfGroupsAllowedToBeDown = 2; // 4 groups with 2 nodes in each group var cluster = createCluster(8, 4, maxNumberOfGroupsAllowedToBeDown); @@ -275,25 +278,8 @@ public class NodeStateChangeCheckerTest { assertEquals("At most 2 groups can have wanted state: [0, 1]", result.reason()); } - // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance, - // try to set storage node 4 in group 2 to maintenance - /* WIP - { - ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .2.s:m .3.s:m", currentClusterStateVersion)); - setStorageNodeWantedState(cluster, 0, UP, ""); - setStorageNodeWantedState(cluster, 1, UP, ""); - int nodeIndex = 4; - Node node = new Node(STORAGE, nodeIndex); - Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); - assertFalse(result.settingWantedStateIsAllowed(), result.toString()); - assertFalse(result.wantedStateAlreadySet()); - assertEquals("At most 2 groups can have wanted state: [0, 1]", result.getReason()); - } - - */ - // 2 nodes in group 0 in maintenance, storage node 3 in group 1 is in maintenance with another description - // (set in maintenance by operator), try to set storage node 3 in group 1 to maintenance, should bew allowed + // (set in maintenance by operator), try to set storage node 2 in group 1 to maintenance, should be allowed { ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .3.s:m", currentClusterStateVersion)); setStorageNodeWantedState(cluster, 3, MAINTENANCE, "Maintenance, set by operator"); // Set to another description @@ -305,6 +291,29 @@ public class NodeStateChangeCheckerTest { assertFalse(result.isAlreadySet()); } + // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance, + // try to set storage node 4 in group 2 to maintenance + { + setStorageNodeWantedState(cluster, 0, MAINTENANCE, "Orchestrator"); + setStorageNodeWantedState(cluster, 1, MAINTENANCE, "Orchestrator"); + setStorageNodeWantedState(cluster, 2, UP, ""); // Set up again + setStorageNodeWantedState(cluster, 3, UP, ""); // Set up again + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m", currentClusterStateVersion)); + + // Set bucket in sync to 1 for node 2 in group 1 + var distributorHostInfo = createDistributorHostInfo(1, 2, 1); + cluster.clusterInfo().getDistributorNodeInfo(0).setHostInfo(HostInfo.createHostInfo(distributorHostInfo)); + cluster.clusterInfo().getDistributorNodeInfo(1).setHostInfo(HostInfo.createHostInfo(distributorHostInfo)); + cluster.clusterInfo().getDistributorNodeInfo(2).setHostInfo(HostInfo.createHostInfo(distributorHostInfo)); + + int nodeIndex = 2; + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.allowed(), result.toString()); + assertFalse(result.isAlreadySet()); + assertEquals("Distributor 2 says storage node 2 has buckets with redundancy as low as 1, but we require at least 4", result.reason()); + } + } @ParameterizedTest |