summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java79
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java45
2 files changed, 106 insertions, 18 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index a36bd187a47..31bd9c59992 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -13,10 +13,12 @@ import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -262,6 +264,11 @@ public class NodeStateChangeChecker {
if (aGroupContainsNode(retiredAndNotUpGroups, node)) {
numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1;
}
+
+ var result = checkRedundancyForGroupsThatAreUp(retiredAndNotUpGroups);
+ if (result.isPresent() && result.get().notAllowed())
+ return result;
+
if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) {
log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups);
return Optional.of(allow());
@@ -272,6 +279,28 @@ public class NodeStateChangeChecker {
sortSetIntoList(retiredAndNotUpGroups))));
}
+ private Optional<Result> checkRedundancyForGroupsThatAreUp(Set<Integer> retiredAndNotUpGroups) {
+ Set<Integer> groupsThatAreUp = groupsThatAreUp(retiredAndNotUpGroups);
+ log.log(FINE, "Check min replication for groups " + groupsThatAreUp);
+ for (int group : groupsThatAreUp) {
+ List<ConfiguredNode> nodesInGroup = getNodesInGroup(group);
+ for (var n : nodesInGroup) {
+ log.log(FINE, "Check min replication for index " + n.index() + " in group " + group);
+ Set<Integer> indexesToCheck = nodesInGroup.stream().map(ConfiguredNode::index).collect(Collectors.toSet());
+ var r = checkRedundancySeenFromDistributor(clusterInfo.getDistributorNodeInfo(n.index()), indexesToCheck);
+ if (r.notAllowed())
+ return Optional.of(r);
+ }
+ }
+ return Optional.empty();
+ }
+
+ private Set<Integer> groupsThatAreUp(Set<Integer> retiredAndNotUpGroups) {
+ var allGroups = allGroupIndexes();
+ allGroups.removeAll(retiredAndNotUpGroups);
+ return allGroups;
+ }
+
private static boolean nodeIsDown(ClusterState clusterState, NodeInfo nodeInfo) {
return clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN;
}
@@ -404,6 +433,33 @@ public class NodeStateChangeChecker {
return allow();
}
+ private Result checkRedundancySeenFromDistributor(DistributorNodeInfo distributorNodeInfo, Set<Integer> indexesToCheck) {
+ Map<Integer, Integer> replication = new LinkedHashMap<>(minReplication(distributorNodeInfo));
+
+ Integer minReplication = null;
+ Integer minReplicationIndex = null;
+ for (var entry : replication.entrySet()) {
+ Integer value = entry.getValue();
+ Integer nodeIndex = entry.getKey();
+ if ( ! indexesToCheck.contains(nodeIndex)) continue;
+ if (minReplication == null || (value != null && value < minReplication)) {
+ minReplication = value;
+ minReplicationIndex = nodeIndex;
+ if (minReplication < requiredRedundancy) break;
+ }
+ }
+
+ // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system).
+ if (minReplication != null && minReplication < requiredRedundancy) {
+ return disallow("Distributor " + distributorNodeInfo.getNodeIndex()
+ + " says storage node " + minReplicationIndex
+ + " has buckets with redundancy as low as "
+ + minReplication + ", but we require at least " + requiredRedundancy);
+ }
+
+ return allow();
+ }
+
private Map<Integer, Integer> minReplication(DistributorNodeInfo distributorNodeInfo) {
Map<Integer, Integer> replicationPerNodeIndex = new HashMap<>();
for (StorageNode storageNode : distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes()) {
@@ -456,6 +512,25 @@ public class NodeStateChangeChecker {
.collect(Collectors.toSet());
}
+ private Set<Integer> allGroupIndexes() {
+ return clusterInfo.getAllNodeInfos().stream()
+ .map(NodeInfo::getGroup)
+ .filter(Objects::nonNull)
+ .filter(Group::isLeafGroup)
+ .map(Group::getIndex)
+ .collect(Collectors.toSet());
+ }
+
+ private Group groupForThisIndex(int groupIndex) {
+ return clusterInfo.getAllNodeInfos().stream()
+ .map(NodeInfo::getGroup)
+ .filter(Objects::nonNull)
+ .filter(Group::isLeafGroup)
+ .filter(group -> group.getIndex() == groupIndex)
+ .findFirst()
+ .orElseThrow();
+ }
+
// groups with at least one node with the same state & description
private Set<Integer> groupsWithSameStateAndDescription(State state, String newDescription) {
return clusterInfo.getAllNodeInfos().stream()
@@ -485,6 +560,10 @@ public class NodeStateChangeChecker {
.collect(Collectors.toSet());
}
+ private List<ConfiguredNode> getNodesInGroup(int groupIndex) {
+ return groupForThisIndex(groupIndex).getNodes();
+ }
+
public static class Result {
public enum Action {
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
index 7b20fcf694a..2174a206d4a 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.clustercontroller.core;
+import com.yahoo.log.LogSetup;
import com.yahoo.vdslib.distribution.ConfiguredNode;
import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.state.ClusterState;
@@ -215,6 +216,8 @@ public class NodeStateChangeCheckerTest {
@Test
void testMaintenanceAllowedFor2Of4Groups8Nodes() {
+ // LogSetup.initVespaLogging("foo");
+
int maxNumberOfGroupsAllowedToBeDown = 2;
// 4 groups with 2 nodes in each group
var cluster = createCluster(8, 4, maxNumberOfGroupsAllowedToBeDown);
@@ -275,25 +278,8 @@ public class NodeStateChangeCheckerTest {
assertEquals("At most 2 groups can have wanted state: [0, 1]", result.reason());
}
- // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance,
- // try to set storage node 4 in group 2 to maintenance
- /* WIP
- {
- ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .2.s:m .3.s:m", currentClusterStateVersion));
- setStorageNodeWantedState(cluster, 0, UP, "");
- setStorageNodeWantedState(cluster, 1, UP, "");
- int nodeIndex = 4;
- Node node = new Node(STORAGE, nodeIndex);
- Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
- assertFalse(result.settingWantedStateIsAllowed(), result.toString());
- assertFalse(result.wantedStateAlreadySet());
- assertEquals("At most 2 groups can have wanted state: [0, 1]", result.getReason());
- }
-
- */
-
// 2 nodes in group 0 in maintenance, storage node 3 in group 1 is in maintenance with another description
- // (set in maintenance by operator), try to set storage node 3 in group 1 to maintenance, should bew allowed
+ // (set in maintenance by operator), try to set storage node 2 in group 1 to maintenance, should be allowed
{
ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .3.s:m", currentClusterStateVersion));
setStorageNodeWantedState(cluster, 3, MAINTENANCE, "Maintenance, set by operator"); // Set to another description
@@ -305,6 +291,29 @@ public class NodeStateChangeCheckerTest {
assertFalse(result.isAlreadySet());
}
+ // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance,
+ // try to set storage node 4 in group 2 to maintenance
+ {
+ setStorageNodeWantedState(cluster, 0, MAINTENANCE, "Orchestrator");
+ setStorageNodeWantedState(cluster, 1, MAINTENANCE, "Orchestrator");
+ setStorageNodeWantedState(cluster, 2, UP, ""); // Set up again
+ setStorageNodeWantedState(cluster, 3, UP, ""); // Set up again
+ ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m", currentClusterStateVersion));
+
+ // Set bucket in sync to 1 for node 2 in group 1
+ var distributorHostInfo = createDistributorHostInfo(1, 2, 1);
+ cluster.clusterInfo().getDistributorNodeInfo(0).setHostInfo(HostInfo.createHostInfo(distributorHostInfo));
+ cluster.clusterInfo().getDistributorNodeInfo(1).setHostInfo(HostInfo.createHostInfo(distributorHostInfo));
+ cluster.clusterInfo().getDistributorNodeInfo(2).setHostInfo(HostInfo.createHostInfo(distributorHostInfo));
+
+ int nodeIndex = 2;
+ Node node = new Node(STORAGE, nodeIndex);
+ Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
+ assertFalse(result.allowed(), result.toString());
+ assertFalse(result.isAlreadySet());
+ assertEquals("Distributor 2 says storage node 2 has buckets with redundancy as low as 1, but we require at least 4", result.reason());
+ }
+
}
@ParameterizedTest