diff options
2 files changed, 106 insertions, 18 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index a36bd187a47..31bd9c59992 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -13,10 +13,12 @@ import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode; import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -262,6 +264,11 @@ public class NodeStateChangeChecker { if (aGroupContainsNode(retiredAndNotUpGroups, node)) { numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1; } + + var result = checkRedundancyForGroupsThatAreUp(retiredAndNotUpGroups); + if (result.isPresent() && result.get().notAllowed()) + return result; + if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) { log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups); return Optional.of(allow()); @@ -272,6 +279,28 @@ public class NodeStateChangeChecker { sortSetIntoList(retiredAndNotUpGroups)))); } + private Optional<Result> checkRedundancyForGroupsThatAreUp(Set<Integer> retiredAndNotUpGroups) { + Set<Integer> groupsThatAreUp = groupsThatAreUp(retiredAndNotUpGroups); + log.log(FINE, "Check min replication for groups " + groupsThatAreUp); + for (int group : groupsThatAreUp) { + List<ConfiguredNode> nodesInGroup = getNodesInGroup(group); + for (var n : nodesInGroup) { + log.log(FINE, "Check min replication for index " + n.index() + " in group " + group); + Set<Integer> indexesToCheck = nodesInGroup.stream().map(ConfiguredNode::index).collect(Collectors.toSet()); + var r = checkRedundancySeenFromDistributor(clusterInfo.getDistributorNodeInfo(n.index()), indexesToCheck); + if (r.notAllowed()) + return Optional.of(r); + } + } + return Optional.empty(); + } + + private Set<Integer> groupsThatAreUp(Set<Integer> retiredAndNotUpGroups) { + var allGroups = allGroupIndexes(); + allGroups.removeAll(retiredAndNotUpGroups); + return allGroups; + } + private static boolean nodeIsDown(ClusterState clusterState, NodeInfo nodeInfo) { return clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN; } @@ -404,6 +433,33 @@ public class NodeStateChangeChecker { return allow(); } + private Result checkRedundancySeenFromDistributor(DistributorNodeInfo distributorNodeInfo, Set<Integer> indexesToCheck) { + Map<Integer, Integer> replication = new LinkedHashMap<>(minReplication(distributorNodeInfo)); + + Integer minReplication = null; + Integer minReplicationIndex = null; + for (var entry : replication.entrySet()) { + Integer value = entry.getValue(); + Integer nodeIndex = entry.getKey(); + if ( ! indexesToCheck.contains(nodeIndex)) continue; + if (minReplication == null || (value != null && value < minReplication)) { + minReplication = value; + minReplicationIndex = nodeIndex; + if (minReplication < requiredRedundancy) break; + } + } + + // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system). + if (minReplication != null && minReplication < requiredRedundancy) { + return disallow("Distributor " + distributorNodeInfo.getNodeIndex() + + " says storage node " + minReplicationIndex + + " has buckets with redundancy as low as " + + minReplication + ", but we require at least " + requiredRedundancy); + } + + return allow(); + } + private Map<Integer, Integer> minReplication(DistributorNodeInfo distributorNodeInfo) { Map<Integer, Integer> replicationPerNodeIndex = new HashMap<>(); for (StorageNode storageNode : distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes()) { @@ -456,6 +512,25 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } + private Set<Integer> allGroupIndexes() { + return clusterInfo.getAllNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .map(Group::getIndex) + .collect(Collectors.toSet()); + } + + private Group groupForThisIndex(int groupIndex) { + return clusterInfo.getAllNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .filter(group -> group.getIndex() == groupIndex) + .findFirst() + .orElseThrow(); + } + // groups with at least one node with the same state & description private Set<Integer> groupsWithSameStateAndDescription(State state, String newDescription) { return clusterInfo.getAllNodeInfos().stream() @@ -485,6 +560,10 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } + private List<ConfiguredNode> getNodesInGroup(int groupIndex) { + return groupForThisIndex(groupIndex).getNodes(); + } + public static class Result { public enum Action { diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index 7b20fcf694a..2174a206d4a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; +import com.yahoo.log.LogSetup; import com.yahoo.vdslib.distribution.ConfiguredNode; import com.yahoo.vdslib.distribution.Distribution; import com.yahoo.vdslib.state.ClusterState; @@ -215,6 +216,8 @@ public class NodeStateChangeCheckerTest { @Test void testMaintenanceAllowedFor2Of4Groups8Nodes() { + // LogSetup.initVespaLogging("foo"); + int maxNumberOfGroupsAllowedToBeDown = 2; // 4 groups with 2 nodes in each group var cluster = createCluster(8, 4, maxNumberOfGroupsAllowedToBeDown); @@ -275,25 +278,8 @@ public class NodeStateChangeCheckerTest { assertEquals("At most 2 groups can have wanted state: [0, 1]", result.reason()); } - // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance, - // try to set storage node 4 in group 2 to maintenance - /* WIP - { - ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .2.s:m .3.s:m", currentClusterStateVersion)); - setStorageNodeWantedState(cluster, 0, UP, ""); - setStorageNodeWantedState(cluster, 1, UP, ""); - int nodeIndex = 4; - Node node = new Node(STORAGE, nodeIndex); - Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); - assertFalse(result.settingWantedStateIsAllowed(), result.toString()); - assertFalse(result.wantedStateAlreadySet()); - assertEquals("At most 2 groups can have wanted state: [0, 1]", result.getReason()); - } - - */ - // 2 nodes in group 0 in maintenance, storage node 3 in group 1 is in maintenance with another description - // (set in maintenance by operator), try to set storage node 3 in group 1 to maintenance, should bew allowed + // (set in maintenance by operator), try to set storage node 2 in group 1 to maintenance, should be allowed { ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .3.s:m", currentClusterStateVersion)); setStorageNodeWantedState(cluster, 3, MAINTENANCE, "Maintenance, set by operator"); // Set to another description @@ -305,6 +291,29 @@ public class NodeStateChangeCheckerTest { assertFalse(result.isAlreadySet()); } + // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance, + // try to set storage node 4 in group 2 to maintenance + { + setStorageNodeWantedState(cluster, 0, MAINTENANCE, "Orchestrator"); + setStorageNodeWantedState(cluster, 1, MAINTENANCE, "Orchestrator"); + setStorageNodeWantedState(cluster, 2, UP, ""); // Set up again + setStorageNodeWantedState(cluster, 3, UP, ""); // Set up again + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m", currentClusterStateVersion)); + + // Set bucket in sync to 1 for node 2 in group 1 + var distributorHostInfo = createDistributorHostInfo(1, 2, 1); + cluster.clusterInfo().getDistributorNodeInfo(0).setHostInfo(HostInfo.createHostInfo(distributorHostInfo)); + cluster.clusterInfo().getDistributorNodeInfo(1).setHostInfo(HostInfo.createHostInfo(distributorHostInfo)); + cluster.clusterInfo().getDistributorNodeInfo(2).setHostInfo(HostInfo.createHostInfo(distributorHostInfo)); + + int nodeIndex = 2; + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.allowed(), result.toString()); + assertFalse(result.isAlreadySet()); + assertEquals("Distributor 2 says storage node 2 has buckets with redundancy as low as 1, but we require at least 4", result.reason()); + } + } @ParameterizedTest |