diff options
Diffstat (limited to 'clustercontroller-core/src')
13 files changed, 649 insertions, 349 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java index 2cfaf64fe83..0f119d8de50 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java @@ -15,7 +15,7 @@ import java.util.Set; import java.util.TreeMap; /** - * Detail information about the current state of all the distributor and storage nodes of the cluster. + * Detailed information about the current state of all the distributor and storage nodes of the cluster. * * @author hakonhall * @author bratseth @@ -127,11 +127,10 @@ public class ClusterInfo { /** Returns the node info object for a given node identifier */ private NodeInfo getInfo(Node node) { - switch (node.getType()) { - case DISTRIBUTOR : return getDistributorNodeInfo(node.getIndex()); - case STORAGE : return getStorageNodeInfo(node.getIndex()); - default : throw new IllegalArgumentException("No node type " + node.getType().toString()); - } + return switch (node.getType()) { + case DISTRIBUTOR -> getDistributorNodeInfo(node.getIndex()); + case STORAGE -> getStorageNodeInfo(node.getIndex()); + }; } } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java index 70fbbb60e26..6855859c96f 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java @@ -2,7 +2,7 @@ package com.yahoo.vespa.clustercontroller.core; /** - * Base class for distributor/content node node RPC requests that are bound + * Base class for distributor/content node RPC requests that are bound * to a particular cluster state version. */ public abstract class ClusterStateVersionSpecificRequest { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java index 9538167c6de..2535589395d 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java @@ -20,32 +20,29 @@ import static com.yahoo.vdslib.state.NodeState.ORCHESTRATOR_RESERVED_DESCRIPTION public class ContentCluster { - private final String clusterName; + private static final int pollingFrequency = 5000; + private final String clusterName; private final ClusterInfo clusterInfo = new ClusterInfo(); - private final Map<Node, Long> nodeStartTimestamps = new TreeMap<>(); private int slobrokGenerationCount = 0; - - private int pollingFrequency = 5000; - private Distribution distribution; private final int maxNumberOfGroupsAllowedToBeDown; public ContentCluster(String clusterName, Collection<ConfiguredNode> configuredNodes, Distribution distribution) { - this(clusterName, configuredNodes, distribution, 1); + this(clusterName, configuredNodes, distribution, -1); } public ContentCluster(FleetControllerOptions options) { this(options.clusterName(), options.nodes(), options.storageDistribution(), options.maxNumberOfGroupsAllowedToBeDown()); } - private ContentCluster(String clusterName, - Collection<ConfiguredNode> configuredNodes, - Distribution distribution, - int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster(String clusterName, + Collection<ConfiguredNode> configuredNodes, + Distribution distribution, + int maxNumberOfGroupsAllowedToBeDown) { if (configuredNodes == null) throw new IllegalArgumentException("Nodes must be set"); this.clusterName = clusterName; this.distribution = distribution; @@ -91,7 +88,6 @@ public class ContentCluster { } public int getPollingFrequency() { return pollingFrequency; } - public void setPollingFrequency(int millisecs) { pollingFrequency = millisecs; } /** Returns the configured nodes of this as a read-only map indexed on node index (distribution key) */ public Map<Integer, ConfiguredNode> getConfiguredNodes() { @@ -131,7 +127,7 @@ public class ContentCluster { * @param clusterState the current cluster state version * @param condition the upgrade condition * @param oldState the old/current wanted state - * @param newState state wanted to be set @return NodeUpgradePrechecker.Response + * @param newState state wanted to be set * @param inMoratorium whether the CC is in moratorium */ public NodeStateChangeChecker.Result calculateEffectOfNewState( @@ -144,22 +140,22 @@ public class ContentCluster { /** Returns the indices of the nodes that have been safely set to the given state by the Orchestrator (best guess). */ public List<Integer> nodesSafelySetTo(State state) { - switch (state) { - case MAINTENANCE: // Orchestrator's ALLOWED_TO_BE_DOWN - case DOWN: // Orchestrator's PERMANENTLY_DOWN - return clusterInfo.getStorageNodeInfos().stream() - .filter(storageNodeInfo -> { - NodeState userWantedState = storageNodeInfo.getUserWantedState(); - return userWantedState.getState() == state && - Objects.equals(userWantedState.getDescription(), ORCHESTRATOR_RESERVED_DESCRIPTION); - }) - .map(NodeInfo::getNodeIndex) - .toList(); - default: - // Note: There is no trace left if the Orchestrator set the state to UP, so that's handled - // like any other state: - return List.of(); - } + return switch (state) { + // Orchestrator's ALLOWED_TO_BE_DOWN or PERMANENTLY_DOWN, respectively + case MAINTENANCE, DOWN -> + clusterInfo.getStorageNodeInfos().stream() + .filter(storageNodeInfo -> { + NodeState userWantedState = storageNodeInfo.getUserWantedState(); + return userWantedState.getState() == state && + Objects.equals(userWantedState.getDescription(), ORCHESTRATOR_RESERVED_DESCRIPTION); + }) + .map(NodeInfo::getNodeIndex) + .toList(); + default -> + // Note: There is no trace left if the Orchestrator sets the state to UP, so that's handled + // like any other state: + List.of(); + }; } public boolean hasConfiguredNode(int index) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/HierarchicalGroupVisiting.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/HierarchicalGroupVisiting.java index 19ff51f4cc4..09f1824824c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/HierarchicalGroupVisiting.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/HierarchicalGroupVisiting.java @@ -1,16 +1,38 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// package com.yahoo.vespa.clustercontroller.core; +import com.yahoo.vdslib.distribution.Distribution; import com.yahoo.vdslib.distribution.GroupVisitor; -public interface HierarchicalGroupVisiting { - /** Returns true if the group contains more than one (leaf) group. */ - boolean isHierarchical(); +class HierarchicalGroupVisiting { + + private final Distribution distribution; + + public HierarchicalGroupVisiting(Distribution distribution) { + this.distribution = distribution; + } + + /** + * Returns true if the group contains more than one (leaf) group. + */ + public boolean isHierarchical() { + return !distribution.getRootGroup().isLeafGroup(); + } /** * Invoke the visitor for each leaf group of an implied group. If the group is non-hierarchical * (flat), the visitor will not be invoked. */ - void visit(GroupVisitor visitor); + public void visit(GroupVisitor visitor) { + if (isHierarchical()) { + distribution.visitGroups(group -> { + if (group.isLeafGroup()) { + return visitor.visitGroup(group); + } + + return true; + }); + } + } + } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/HierarchicalGroupVisitingAdapter.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/HierarchicalGroupVisitingAdapter.java deleted file mode 100644 index 4bc487bfa7f..00000000000 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/HierarchicalGroupVisitingAdapter.java +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// -package com.yahoo.vespa.clustercontroller.core; - -import com.yahoo.vdslib.distribution.Distribution; -import com.yahoo.vdslib.distribution.GroupVisitor; - -/** - * Exposes {@link Distribution} as a {@link HierarchicalGroupVisiting}. - * - * @author hakon - */ -public class HierarchicalGroupVisitingAdapter implements HierarchicalGroupVisiting { - private final Distribution distribution; - - public HierarchicalGroupVisitingAdapter(Distribution distribution) { - this.distribution = distribution; - } - - @Override - public boolean isHierarchical() { - return !distribution.getRootGroup().isLeafGroup(); - } - - @Override - public void visit(GroupVisitor visitor) { - if (isHierarchical()) { - distribution.visitGroups(group -> { - if (group.isLeafGroup()) { - return visitor.visitGroup(group); - } - - return true; - }); - } - } -} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java index 3f1a7ab5d7b..d7aac1c26fa 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java @@ -55,7 +55,10 @@ abstract public class NodeInfo implements Comparable<NodeInfo> { private long nextAttemptTime; /** Cached connection to this node. */ private Target connection; - /** We cache last connection we did request info on, as we want to report appropriate error for node regardless of whether other commands have created new connection. */ + /** + * We cache last connection we did request info on, as we want to report appropriate error for + * node regardless of whether other commands have created new connection. + */ public Target lastRequestInfoConnection; /** * Counts the number of attempts we have tried since last time we had @@ -163,7 +166,7 @@ abstract public class NodeInfo implements Comparable<NodeInfo> { } if (prematureCrashCount != count) { prematureCrashCount = count; - log.log(Level.FINE, () -> "Premature crash count on " + toString() + " set to " + count); + log.log(Level.FINE, () -> "Premature crash count on " + this + " set to " + count); } } public int getPrematureCrashCount() { return prematureCrashCount; } @@ -311,13 +314,13 @@ abstract public class NodeInfo implements Comparable<NodeInfo> { } if (state.getState().equals(State.DOWN) && !reportedState.getState().oneOf("d")) { downStableStateTime = time; - log.log(Level.FINE, () -> "Down stable state on " + toString() + " altered to " + time); + log.log(Level.FINE, () -> "Down stable state on " + this + " altered to " + time); if (reportedState.getState() == State.INITIALIZING) { recentlyObservedUnstableDuringInit = true; } } else if (state.getState().equals(State.UP) && !reportedState.getState().oneOf("u")) { upStableStateTime = time; - log.log(Level.FINE, () -> "Up stable state on " + toString() + " altered to " + time); + log.log(Level.FINE, () -> "Up stable state on " + this + " altered to " + time); } if (!state.getState().validReportedNodeState(node.getType())) { throw new IllegalStateException("Trying to set illegal reported node state: " + state); @@ -340,14 +343,14 @@ abstract public class NodeInfo implements Comparable<NodeInfo> { } else { nextAttemptTime = time + 5000; } - log.log(Level.FINEST, () -> "Failed to get state from node " + toString() + ", scheduling next attempt in " + (nextAttemptTime - time) + " ms."); + log.log(Level.FINEST, () -> "Failed to get state from node " + this + ", scheduling next attempt in " + (nextAttemptTime - time) + " ms."); } else { connectionAttemptCount = 0; timeOfFirstFailingConnectionAttempt = 0; reportedState = state; if (version == 0 || state.getState().equals(State.STOPPING)) { nextAttemptTime = time + cluster.getPollingFrequency(); - log.log(Level.FINEST, () -> "Scheduling next attempt to get state from " + toString() + " in " + (nextAttemptTime - time) + " ms (polling freq)."); + log.log(Level.FINEST, () -> "Scheduling next attempt to get state from " + this + " in " + (nextAttemptTime - time) + " ms (polling freq)."); } else { nextAttemptTime = time; } @@ -368,7 +371,7 @@ abstract public class NodeInfo implements Comparable<NodeInfo> { } catch (Exception e) { StringWriter sw = new StringWriter(); e.printStackTrace(new PrintWriter(sw)); - log.warning("Attempted to set wanted state with more than just a main state. Extra data stripped. Original data '" + state.serialize(true) + ":\n" + sw.toString()); + log.warning("Attempted to set wanted state with more than just a main state. Extra data stripped. Original data '" + state.serialize(true) + ":\n" + sw); } } wantedState = newWanted; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index 2025dfef562..c823c94afd1 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -13,13 +13,20 @@ import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.hostinfo.Metrics; import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode; import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; +import java.util.logging.Logger; +import java.util.stream.Collectors; import static com.yahoo.vdslib.state.NodeType.STORAGE; import static com.yahoo.vdslib.state.State.DOWN; +import static com.yahoo.vdslib.state.State.MAINTENANCE; import static com.yahoo.vdslib.state.State.RETIRED; import static com.yahoo.vdslib.state.State.UP; import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Result.allowSettingOfWantedState; @@ -27,6 +34,7 @@ import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Resu import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Result.createDisallowed; import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest.Condition.FORCE; import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest.Condition.SAFE; +import static java.util.logging.Level.FINE; /** * Checks if a node can be upgraded. @@ -35,8 +43,9 @@ import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetU */ public class NodeStateChangeChecker { - public static final String BUCKETS_METRIC_NAME = "vds.datastored.bucket_space.buckets_total"; - public static final Map<String, String> BUCKETS_METRIC_DIMENSIONS = Map.of("bucketSpace", "default"); + private static final Logger log = Logger.getLogger(NodeStateChangeChecker.class.getName()); + private static final String BUCKETS_METRIC_NAME = "vds.datastored.bucket_space.buckets_total"; + private static final Map<String, String> BUCKETS_METRIC_DIMENSIONS = Map.of("bucketSpace", "default"); private final int requiredRedundancy; private final HierarchicalGroupVisiting groupVisiting; @@ -46,10 +55,12 @@ public class NodeStateChangeChecker { public NodeStateChangeChecker(ContentCluster cluster, boolean inMoratorium) { this.requiredRedundancy = cluster.getDistribution().getRedundancy(); - this.groupVisiting = new HierarchicalGroupVisitingAdapter(cluster.getDistribution()); + this.groupVisiting = new HierarchicalGroupVisiting(cluster.getDistribution()); this.clusterInfo = cluster.clusterInfo(); this.inMoratorium = inMoratorium; this.maxNumberOfGroupsAllowedToBeDown = cluster.maxNumberOfGroupsAllowedToBeDown(); + if ( ! groupVisiting.isHierarchical() && maxNumberOfGroupsAllowedToBeDown > 1) + throw new IllegalArgumentException("Cannot have both 1 group and maxNumberOfGroupsAllowedToBeDown > 1"); } public static class Result { @@ -214,26 +225,34 @@ public class NodeStateChangeChecker { oldWantedState.getState() + ": " + oldWantedState.getDescription()); } - Result otherGroupCheck = anotherNodeInAnotherGroupHasWantedState(nodeInfo); - if (!otherGroupCheck.settingWantedStateIsAllowed()) { - return otherGroupCheck; + if (maxNumberOfGroupsAllowedToBeDown == -1) { + var otherGroupCheck = anotherNodeInAnotherGroupHasWantedState(nodeInfo); + if (!otherGroupCheck.settingWantedStateIsAllowed()) { + return otherGroupCheck; + } + if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) { + return allowSettingOfWantedState(); + } + } else { + var result = otherNodesHaveWantedState(nodeInfo, newDescription, clusterState); + if (result.isPresent()) + return result.get(); } if (clusterState.getNodeState(nodeInfo.getNode()).getState() == DOWN) { - return allowSettingOfWantedState(); - } - - if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) { + log.log(FINE, "node is DOWN, allow"); return allowSettingOfWantedState(); } Result allNodesAreUpCheck = checkAllNodesAreUp(clusterState); if (!allNodesAreUpCheck.settingWantedStateIsAllowed()) { + log.log(FINE, "allNodesAreUpCheck: " + allNodesAreUpCheck); return allNodesAreUpCheck; } Result checkDistributorsResult = checkDistributors(nodeInfo.getNode(), clusterState.getVersion()); if (!checkDistributorsResult.settingWantedStateIsAllowed()) { + log.log(FINE, "checkDistributors: "+ checkDistributorsResult); return checkDistributorsResult; } @@ -268,6 +287,65 @@ public class NodeStateChangeChecker { } } + /** + * Returns an optional Result, where return value is: + * For flat setup: Return Optional.of(disallowed) if wanted state is set on some node, else Optional.empty + * For hierarchical setup: No wanted state for other nodes, return Optional.empty + * Wanted state for nodes/groups are not UP: + * if less than maxNumberOfGroupsAllowedToBeDown: return Optional.of(allowed) + * else: if node is in group with nodes already down: return Optional.of(allowed), else Optional.of(disallowed) + */ + private Optional<Result> otherNodesHaveWantedState(StorageNodeInfo nodeInfo, String newDescription, ClusterState clusterState) { + Node node = nodeInfo.getNode(); + + if (groupVisiting.isHierarchical()) { + Set<Integer> groupsWithNodesWantedStateNotUp = groupsWithUserWantedStateNotUp(); + if (groupsWithNodesWantedStateNotUp.size() == 0) { + log.log(FINE, "groupsWithNodesWantedStateNotUp=0"); + return Optional.empty(); + } + + Set<Integer> groupsWithSameStateAndDescription = groupsWithSameStateAndDescription(MAINTENANCE, newDescription); + if (aGroupContainsNode(groupsWithSameStateAndDescription, node)) { + log.log(FINE, "Node is in group with same state and description, allow"); + return Optional.of(allowSettingOfWantedState()); + } + // There are groups with nodes not up, but with another description, probably operator set + if (groupsWithSameStateAndDescription.size() == 0) { + return Optional.of(createDisallowed("Wanted state already set for another node in groups: " + + sortSetIntoList(groupsWithNodesWantedStateNotUp))); + } + + Set<Integer> retiredAndNotUpGroups = groupsWithNotRetiredAndNotUp(clusterState); + int numberOfGroupsToConsider = retiredAndNotUpGroups.size(); + // Subtract one group if node is in a group with nodes already retired or not up, since number of such groups will + // not increase if we allow node to go down + if (aGroupContainsNode(retiredAndNotUpGroups, node)) { + numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1; + } + if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) { + log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups); + return Optional.of(allowSettingOfWantedState()); + } + + return Optional.of(createDisallowed(String.format("At most %d groups can have wanted state: %s", + maxNumberOfGroupsAllowedToBeDown, + sortSetIntoList(retiredAndNotUpGroups)))); + } else { + // Return a disallow-result if there is another node with a wanted state + var otherNodeHasWantedState = otherNodeHasWantedState(nodeInfo); + if ( ! otherNodeHasWantedState.settingWantedStateIsAllowed()) + return Optional.of(otherNodeHasWantedState); + } + return Optional.empty(); + } + + private ArrayList<Integer> sortSetIntoList(Set<Integer> set) { + var sortedList = new ArrayList<>(set); + Collections.sort(sortedList); + return sortedList; + } + /** Returns a disallow-result, if there is a node in the group with wanted state != UP. */ private Result otherNodeInGroupHasWantedState(Group group) { for (var configuredNode : group.getNodes()) { @@ -354,6 +432,22 @@ public class NodeStateChangeChecker { return false; } + private boolean aGroupContainsNode(Collection<Integer> groupIndexes, Node node) { + for (Group group : getGroupsWithIndexes(groupIndexes)) { + if (groupContainsNode(group, node)) + return true; + } + + return false; + } + + private List<Group> getGroupsWithIndexes(Collection<Integer> groupIndexes) { + return clusterInfo.getStorageNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(group -> groupIndexes.contains(group.getIndex())) + .collect(Collectors.toList()); + } + private Result checkAllNodesAreUp(ClusterState clusterState) { // This method verifies both storage nodes and distributors are up (or retired). // The complicated part is making a summary error message. @@ -441,4 +535,43 @@ public class NodeStateChangeChecker { return allowSettingOfWantedState(); } + private Set<Integer> groupsWithUserWantedStateNotUp() { + return clusterInfo.getAllNodeInfos().stream() + .filter(sni -> !UP.equals(sni.getUserWantedState().getState())) + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .map(Group::getIndex) + .collect(Collectors.toSet()); + } + + // groups with at least one node with the same state & description + private Set<Integer> groupsWithSameStateAndDescription(State state, String newDescription) { + return clusterInfo.getAllNodeInfos().stream() + .filter(nodeInfo -> { + var userWantedState = nodeInfo.getUserWantedState(); + return userWantedState.getState() == state && + Objects.equals(userWantedState.getDescription(), newDescription); + }) + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .map(Group::getIndex) + .collect(Collectors.toSet()); + } + + // groups with at least one node in state (not retired AND not up) + private Set<Integer> groupsWithNotRetiredAndNotUp(ClusterState clusterState) { + return clusterInfo.getAllNodeInfos().stream() + .filter(nodeInfo -> (nodeInfo.getUserWantedState().getState() != RETIRED + && nodeInfo.getUserWantedState().getState() != UP) + || (clusterState.getNodeState(nodeInfo.getNode()).getState() != RETIRED + && clusterState.getNodeState(nodeInfo.getNode()).getState() != UP)) + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .map(Group::getIndex) + .collect(Collectors.toSet()); + } + } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java index 68e46414c22..6f4d0749f3f 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java @@ -4,7 +4,6 @@ package com.yahoo.vespa.clustercontroller.core; import com.yahoo.jrt.ErrorCode; import com.yahoo.jrt.Target; import com.yahoo.vdslib.state.NodeState; -import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.listeners.NodeListener; import java.util.LinkedList; @@ -12,6 +11,9 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; +import static com.yahoo.vdslib.state.State.DOWN; +import static com.yahoo.vdslib.state.State.STOPPING; + /** * Collects the state of all nodes by making remote requests and handling the replies. */ @@ -65,20 +67,20 @@ public class NodeStateGatherer { if (info.getRpcAddress() == null || info.isNotInSlobrok()) { // Cannot query state of node without RPC address or not in slobrok log.log(Level.FINE, () -> "Not sending getNodeState request to node " + info.getNode() + ": Not in slobrok"); NodeState reportedState = info.getReportedState().clone(); - if (( ! reportedState.getState().equals(State.DOWN) && currentTime - info.lastSeenInSlobrok() > maxSlobrokDisconnectGracePeriod) - || reportedState.getState().equals(State.STOPPING)) // Don't wait for grace period if we expect node to be stopping + if (( ! reportedState.getState().equals(DOWN) && currentTime - info.lastSeenInSlobrok() > maxSlobrokDisconnectGracePeriod) + || reportedState.getState().equals(STOPPING)) // Don't wait for grace period if we expect node to be stopping { log.log(Level.FINE, () -> "Setting reported state to DOWN " - + (reportedState.getState().equals(State.STOPPING) + + (reportedState.getState().equals(STOPPING) ? "as node completed stopping." - : "as node has been out of slobrok longer than " + maxSlobrokDisconnectGracePeriod + ".")); + : "as node has been out of slobrok longer than " + maxSlobrokDisconnectGracePeriod + " ms.")); if (reportedState.getState().oneOf("iur") || ! reportedState.hasDescription()) { - StringBuilder sb = new StringBuilder().append("Set node down as it has been out of slobrok for ") - .append(currentTime - info.lastSeenInSlobrok()).append(" ms which is more than the max limit of ") - .append(maxSlobrokDisconnectGracePeriod).append(" ms."); - reportedState.setDescription(sb.toString()); + reportedState.setDescription("Set node down as it has been out of slobrok for " + + (currentTime - info.lastSeenInSlobrok()) + + " ms which is more than the max limit of " + + maxSlobrokDisconnectGracePeriod + " ms."); } - reportedState.setState(State.DOWN); + reportedState.setState(DOWN); listener.handleNewNodeState(info, reportedState.clone()); } info.setReportedState(reportedState, currentTime); // Must reset it to null to get connection attempts counted @@ -135,7 +137,7 @@ public class NodeStateGatherer { info.setReportedState(state, currentTime); } catch (Exception e) { log.log(Level.WARNING, "Failed to process get node state response", e); - info.setReportedState(new NodeState(info.getNode().getType(), State.DOWN), currentTime); + info.setReportedState(new NodeState(info.getNode().getType(), DOWN), currentTime); } // Important: The old host info should be accessible in info.getHostInfo(), see interface. @@ -152,7 +154,7 @@ public class NodeStateGatherer { private NodeState handleError(GetNodeStateRequest req, NodeInfo info, long currentTime) { String prefix = "Failed get node state request: "; - NodeState newState = new NodeState(info.getNode().getType(), State.DOWN); + NodeState newState = new NodeState(info.getNode().getType(), DOWN); if (req.getReply().getReturnCode() == ErrorCode.TIMEOUT) { String msg = "RPC timeout"; if (info.getReportedState().getState().oneOf("ui")) { @@ -177,7 +179,7 @@ public class NodeStateGatherer { log.log(Level.FINE, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } - newState.setState(State.DOWN); + newState.setState(DOWN); } else if (msg.equals("jrt: Connection closed by peer") || msg.equals("Connection reset by peer")) { msg = "Connection error: Closed at other end. (Node or switch likely shut down)"; if (info.isNotInSlobrok()) { @@ -189,7 +191,7 @@ public class NodeStateGatherer { if (log.isLoggable(Level.FINE)) log.log(Level.FINE, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } - newState.setState(State.DOWN).setDescription(msg); + newState.setState(DOWN).setDescription(msg); } else if (msg.equals("Connection timed out")) { if (info.getReportedState().getState().oneOf("ui")) { msg = "Connection error: Timeout"; @@ -228,11 +230,11 @@ public class NodeStateGatherer { } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) { log.log(Level.FINE, () -> "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } - newState.setState(State.DOWN).setDescription(msg + ": get node state"); + newState.setState(DOWN).setDescription(msg + ": get node state"); } else if (req.getReply().getReturnCode() == 75004) { String msg = "Node refused to answer RPC request and is likely stopping: " + req.getReply().getReturnMessage(); // The node is shutting down and is not accepting requests from anyone - if (info.getReportedState().getState().equals(State.STOPPING)) { + if (info.getReportedState().getState().equals(STOPPING)) { log.log(Level.FINE, () -> "Failed to get node state from " + info + " because it is still shutting down."); } else { if (info.getReportedState().getState().oneOf("ui")) { @@ -241,7 +243,7 @@ public class NodeStateGatherer { log.log(Level.FINE, () -> "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } } - newState.setState(State.STOPPING).setDescription(msg); + newState.setState(STOPPING).setDescription(msg); } else { String msg = "Got unexpected error, assumed to be node issue " + req.getReply().getReturnCode() + ": " + req.getReply().getReturnMessage(); if (info.getReportedState().getState().oneOf("ui")) { @@ -249,7 +251,7 @@ public class NodeStateGatherer { } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) { log.log(Level.FINE, () -> "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } - newState.setState(State.DOWN).setDescription(msg); + newState.setState(DOWN).setDescription(msg); } return newState; } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java index 4ab80ec6d7a..28149477e36 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java @@ -9,13 +9,19 @@ import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; import com.yahoo.vespa.clustercontroller.core.listeners.NodeListener; - +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.logging.Level; import java.util.logging.Logger; +import static com.yahoo.vdslib.state.State.DOWN; +import static com.yahoo.vdslib.state.State.INITIALIZING; +import static com.yahoo.vdslib.state.State.STOPPING; +import static java.util.logging.Level.FINE; +import static java.util.logging.Level.FINEST; + /** * This class gets node state updates and timer events and uses these to decide * whether a new cluster state should be generated. @@ -52,9 +58,9 @@ public class StateChangeHandler { public void handleAllDistributorsInSync(final ClusterState currentState, final Set<ConfiguredNode> nodes, final DatabaseHandler database, - final DatabaseHandler.DatabaseContext dbContext) throws InterruptedException { + final DatabaseHandler.DatabaseContext dbContext) { int startTimestampsReset = 0; - context.log(log, Level.FINE, "handleAllDistributorsInSync invoked for state version %d", currentState.getVersion()); + context.log(log, FINE, "handleAllDistributorsInSync invoked for state version %d", currentState.getVersion()); for (NodeType nodeType : NodeType.getTypes()) { for (ConfiguredNode configuredNode : nodes) { final Node node = new Node(nodeType, configuredNode.index()); @@ -62,15 +68,15 @@ public class StateChangeHandler { final NodeState nodeState = currentState.getNodeState(node); if (nodeInfo != null && nodeState != null) { if (nodeState.getStartTimestamp() > nodeInfo.getStartTimestamp()) { - log.log(Level.FINE, () -> String.format("Storing away new start timestamp for node %s (%d)", node, nodeState.getStartTimestamp())); + log.log(FINE, () -> String.format("Storing away new start timestamp for node %s (%d)", node, nodeState.getStartTimestamp())); nodeInfo.setStartTimestamp(nodeState.getStartTimestamp()); } if (nodeState.getStartTimestamp() > 0) { - log.log(Level.FINE, "Resetting timestamp in cluster state for node %s", node); + log.log(FINE, "Resetting timestamp in cluster state for node %s", node); ++startTimestampsReset; } - } else if (log.isLoggable(Level.FINE)) { - log.log(Level.FINE, node + ": " + + } else if (log.isLoggable(FINE)) { + log.log(FINE, node + ": " + (nodeInfo == null ? "null" : nodeInfo.getStartTimestamp()) + ", " + (nodeState == null ? "null" : nodeState.getStartTimestamp())); } @@ -83,7 +89,7 @@ public class StateChangeHandler { stateMayHaveChanged = true; database.saveStartTimestamps(dbContext); } else { - log.log(Level.FINE, "Found no start timestamps to reset in cluster state."); + log.log(FINE, "Found no start timestamps to reset in cluster state."); } } @@ -110,48 +116,45 @@ public class StateChangeHandler { // TODO nodeListener is only used via updateNodeInfoFromReportedState -> handlePrematureCrash // TODO this will recursively invoke proposeNewNodeState, which will presumably (i.e. hopefully) be a no-op... - public void handleNewReportedNodeState(final ClusterState currentClusterState, - final NodeInfo node, - final NodeState reportedState, - final NodeListener nodeListener) - { - final NodeState currentState = currentClusterState.getNodeState(node.getNode()); - final Level level = (currentState.equals(reportedState) && node.getVersion() == 0) ? Level.FINEST : Level.FINE; - if (log.isLoggable(level)) { - log.log(level, String.format("Got nodestate reply from %s: %s (Current state is %s)", - node, node.getReportedState().getTextualDifference(reportedState), currentState.toString(true))); - } - final long currentTime = timer.getCurrentTimeInMillis(); - - if (reportedState.getState().equals(State.DOWN)) { + public void handleNewReportedNodeState(ClusterState currentClusterState, + NodeInfo node, + NodeState reportedState, + NodeListener nodeListener) { + NodeState currentState = currentClusterState.getNodeState(node.getNode()); + Level level = (currentState.equals(reportedState) && node.getVersion() == 0) ? FINEST : FINE; + log.log(level, () -> String.format("Got nodestate reply from %s: %s (Current state is %s)", + node, node.getReportedState().getTextualDifference(reportedState), currentState.toString(true))); + long currentTime = timer.getCurrentTimeInMillis(); + + if (reportedState.getState().equals(DOWN)) { node.setTimeOfFirstFailingConnectionAttempt(currentTime); } // *** LOGGING ONLY if ( ! reportedState.similarTo(node.getReportedState())) { - if (reportedState.getState().equals(State.DOWN)) { + if (reportedState.getState().equals(DOWN)) { eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(node, "Failed to get node state: " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), Level.INFO); } else { - eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(node, "Now reporting state " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), Level.FINE); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(node, "Now reporting state " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), FINE); } } - if (reportedState.equals(node.getReportedState()) && ! reportedState.getState().equals(State.INITIALIZING)) { + if (reportedState.equals(node.getReportedState()) && ! reportedState.getState().equals(INITIALIZING)) { return; } updateNodeInfoFromReportedState(node, currentState, reportedState, nodeListener); if (reportedState.getMinUsedBits() != currentState.getMinUsedBits()) { - final int oldCount = currentState.getMinUsedBits(); - final int newCount = reportedState.getMinUsedBits(); - log.log(Level.FINE, + int oldCount = currentState.getMinUsedBits(); + int newCount = reportedState.getMinUsedBits(); + log.log(FINE, () -> String.format("Altering node state to reflect that min distribution bit count has changed from %d to %d", oldCount, newCount)); eventLog.add(NodeEvent.forBaseline(node, String.format("Altered min distribution bit count from %d to %d", oldCount, newCount), NodeEvent.Type.CURRENT, currentTime), isMaster); } else { - log.log(Level.FINE, () -> String.format("Not altering state of %s in cluster state because new state is too similar: %s", - node, currentState.getTextualDifference(reportedState))); + log.log(FINE, () -> String.format("Not altering state of %s in cluster state because new state is too similar: %s", + node, currentState.getTextualDifference(reportedState))); } stateMayHaveChanged = true; @@ -162,10 +165,8 @@ public class StateChangeHandler { eventLog.add(NodeEvent.forBaseline(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster); } - public void handleMissingNode(final ClusterState currentClusterState, - final NodeInfo node, - final NodeListener nodeListener) { - final long timeNow = timer.getCurrentTimeInMillis(); + public void handleMissingNode(ClusterState currentClusterState, NodeInfo node, NodeListener nodeListener) { + long timeNow = timer.getCurrentTimeInMillis(); if (node.getLatestNodeStateRequestTime() != null) { eventLog.add(NodeEvent.forBaseline(node, "Node is no longer in slobrok, but we still have a pending state request.", NodeEvent.Type.REPORTED, timeNow), isMaster); @@ -173,13 +174,13 @@ public class StateChangeHandler { eventLog.add(NodeEvent.forBaseline(node, "Node is no longer in slobrok. No pending state request to node.", NodeEvent.Type.REPORTED, timeNow), isMaster); } - if (node.getReportedState().getState().equals(State.STOPPING)) { - log.log(Level.FINE, () -> "Node " + node.getNode() + " is no longer in slobrok. Was in stopping state, so assuming it has shut down normally. Setting node down"); + if (node.getReportedState().getState().equals(STOPPING)) { + log.log(FINE, () -> "Node " + node.getNode() + " is no longer in slobrok. Was in stopping state, so assuming it has shut down normally. Setting node down"); NodeState ns = node.getReportedState().clone(); - ns.setState(State.DOWN); + ns.setState(DOWN); handleNewReportedNodeState(currentClusterState, node, ns.clone(), nodeListener); } else { - log.log(Level.FINE, () -> "Node " + node.getNode() + " no longer in slobrok was in state " + node.getReportedState() + ". Waiting to see if it reappears in slobrok"); + log.log(FINE, () -> "Node " + node.getNode() + " no longer in slobrok was in state " + node.getReportedState() + ". Waiting to see if it reappears in slobrok"); } stateMayHaveChanged = true; @@ -192,19 +193,19 @@ public class StateChangeHandler { * If the newly proposed state differs from the state the node currently has in the system, * a cluster state regeneration will be triggered. */ - public void proposeNewNodeState(final ClusterState currentClusterState, final NodeInfo node, final NodeState proposedState) { - final NodeState currentState = currentClusterState.getNodeState(node.getNode()); - final NodeState currentReported = node.getReportedState(); + public void proposeNewNodeState(ClusterState currentClusterState, NodeInfo node, NodeState proposedState) { + NodeState currentState = currentClusterState.getNodeState(node.getNode()); - if (currentState.getState().equals(proposedState.getState())) { + if (currentState.getState().equals(proposedState.getState())) return; - } + stateMayHaveChanged = true; - log.log(Level.FINE, () -> String.format("Got new wanted nodestate for %s: %s", node, currentState.getTextualDifference(proposedState))); + log.log(FINE, () -> String.format("Got new wanted nodestate for %s: %s", node, currentState.getTextualDifference(proposedState))); // Should be checked earlier before state was set in cluster assert(proposedState.getState().validWantedNodeState(node.getNode().getType())); long timeNow = timer.getCurrentTimeInMillis(); + NodeState currentReported = node.getReportedState(); if (proposedState.above(currentReported)) { eventLog.add(NodeEvent.forBaseline(node, String.format("Wanted state %s, but we cannot force node into that " + "state yet as it is currently in %s", proposedState, currentReported), @@ -239,12 +240,9 @@ public class StateChangeHandler { // generated cluster state. Still a bit of a mine field... // TODO remove all node state mutation from this function entirely in favor of ClusterStateGenerator! // `--> this will require adding more event edges and premature crash handling to it. Which is fine. - public boolean watchTimers(final ContentCluster cluster, - final ClusterState currentClusterState, - final NodeListener nodeListener) - { + public boolean watchTimers(ContentCluster cluster, ClusterState currentClusterState, NodeListener nodeListener) { boolean triggeredAnyTimers = false; - final long currentTime = timer.getCurrentTimeInMillis(); + long currentTime = timer.getCurrentTimeInMillis(); for(NodeInfo node : cluster.getNodeInfos()) { triggeredAnyTimers |= handleTimeDependentOpsForNode(currentClusterState, nodeListener, currentTime, node); @@ -256,23 +254,17 @@ public class StateChangeHandler { return triggeredAnyTimers; } - private boolean handleTimeDependentOpsForNode(final ClusterState currentClusterState, - final NodeListener nodeListener, - final long currentTime, - final NodeInfo node) - { - final NodeState currentStateInSystem = currentClusterState.getNodeState(node.getNode()); - final NodeState lastReportedState = node.getReportedState(); - boolean triggeredAnyTimers = false; - - triggeredAnyTimers = reportDownIfOutdatedSlobrokNode( - currentClusterState, nodeListener, currentTime, node, lastReportedState); + private boolean handleTimeDependentOpsForNode(ClusterState currentClusterState, + NodeListener nodeListener, + long currentTime, + NodeInfo node) { + NodeState currentStateInSystem = currentClusterState.getNodeState(node.getNode()); + NodeState lastReportedState = node.getReportedState(); + boolean triggeredAnyTimers = + reportDownIfOutdatedSlobrokNode(currentClusterState, nodeListener, currentTime, node, lastReportedState); - if (nodeStillUnavailableAfterTransitionTimeExceeded( - currentTime, node, currentStateInSystem, lastReportedState)) - { + if (nodeStillUnavailableAfterTransitionTimeExceeded(currentTime, node, currentStateInSystem, lastReportedState)) triggeredAnyTimers = true; - } if (nodeInitProgressHasTimedOut(currentTime, node, currentStateInSystem, lastReportedState)) { eventLog.add(NodeEvent.forBaseline(node, String.format( @@ -287,11 +279,11 @@ public class StateChangeHandler { if (mayResetCrashCounterOnStableUpNode(currentTime, node, lastReportedState)) { node.setPrematureCrashCount(0); - log.log(Level.FINE, () -> "Resetting premature crash count on node " + node + " as it has been up for a long time."); + log.log(FINE, () -> "Resetting premature crash count on node " + node + " as it has been up for a long time."); triggeredAnyTimers = true; } else if (mayResetCrashCounterOnStableDownNode(currentTime, node, lastReportedState)) { node.setPrematureCrashCount(0); - log.log(Level.FINE, () -> "Resetting premature crash count on node " + node + " as it has been down for a long time."); + log.log(FINE, () -> "Resetting premature crash count on node " + node + " as it has been down for a long time."); triggeredAnyTimers = true; } @@ -299,17 +291,18 @@ public class StateChangeHandler { } private boolean nodeInitProgressHasTimedOut(long currentTime, NodeInfo node, NodeState currentStateInSystem, NodeState lastReportedState) { - return !currentStateInSystem.getState().equals(State.DOWN) - && node.getWantedState().above(new NodeState(node.getNode().getType(), State.DOWN)) - && lastReportedState.getState().equals(State.INITIALIZING) + return !currentStateInSystem.getState().equals(DOWN) + && node.getWantedState().above(new NodeState(node.getNode().getType(), DOWN)) + && lastReportedState.getState().equals(INITIALIZING) && maxInitProgressTime != 0 && node.getInitProgressTime() + maxInitProgressTime <= currentTime && node.getNode().getType().equals(NodeType.STORAGE); } + // TODO: Merge this and the below method private boolean mayResetCrashCounterOnStableDownNode(long currentTime, NodeInfo node, NodeState lastReportedState) { return node.getDownStableStateTime() + stableStateTimePeriod <= currentTime - && lastReportedState.getState().equals(State.DOWN) + && lastReportedState.getState().equals(DOWN) && node.getPrematureCrashCount() <= maxPrematureCrashes && node.getPrematureCrashCount() != 0; } @@ -328,8 +321,8 @@ public class StateChangeHandler { NodeState lastReportedState) { return currentStateInSystem.getState().equals(State.MAINTENANCE) - && node.getWantedState().above(new NodeState(node.getNode().getType(), State.DOWN)) - && (lastReportedState.getState().equals(State.DOWN) || node.isNotInSlobrok()) + && node.getWantedState().above(new NodeState(node.getNode().getType(), DOWN)) + && (lastReportedState.getState().equals(DOWN) || node.isNotInSlobrok()) && node.getTransitionTime() + maxTransitionTime.get(node.getNode().getType()) < currentTime; } @@ -340,7 +333,7 @@ public class StateChangeHandler { NodeState lastReportedState) { if (node.isNotInSlobrok() - && !lastReportedState.getState().equals(State.DOWN) + && !lastReportedState.getState().equals(DOWN) && node.lastSeenInSlobrok() + maxSlobrokDisconnectGracePeriod <= currentTime) { final String desc = String.format( @@ -350,7 +343,7 @@ public class StateChangeHandler { maxSlobrokDisconnectGracePeriod); node.abortCurrentNodeStateRequests(); NodeState state = lastReportedState.clone(); - state.setState(State.DOWN); + state.setState(DOWN); if (!state.hasDescription()) { state.setDescription(desc); } @@ -362,10 +355,12 @@ public class StateChangeHandler { return false; } + private boolean isNotControlledShutdown(NodeState state) { return ! isControlledShutdown(state); } + private boolean isControlledShutdown(NodeState state) { - return (state.getState() == State.STOPPING - && (state.getDescription().contains("Received signal 15 (SIGTERM - Termination signal)") - || state.getDescription().contains("controlled shutdown"))); + return state.getState() == State.STOPPING + && List.of("Received signal 15 (SIGTERM - Termination signal)", "controlled shutdown") + .contains(state.getDescription()); } /** @@ -381,14 +376,14 @@ public class StateChangeHandler { final NodeState reportedState, final NodeListener nodeListener) { final long timeNow = timer.getCurrentTimeInMillis(); - log.log(Level.FINE, () -> String.format("Finding new cluster state entry for %s switching state %s", node, currentState.getTextualDifference(reportedState))); + log.log(FINE, () -> String.format("Finding new cluster state entry for %s switching state %s", node, currentState.getTextualDifference(reportedState))); if (handleReportedNodeCrashEdge(node, currentState, reportedState, nodeListener, timeNow)) { return; } if (initializationProgressHasIncreased(currentState, reportedState)) { node.setInitProgressTime(timeNow); - log.log(Level.FINEST, () -> "Reset initialize timer on " + node + " to " + node.getInitProgressTime()); + log.log(FINEST, () -> "Reset initialize timer on " + node + " to " + node.getInitProgressTime()); } if (handleImplicitCrashEdgeFromReverseInitProgress(node, currentState, reportedState, nodeListener, timeNow)) { return; @@ -402,9 +397,9 @@ public class StateChangeHandler { final NodeState reportedState, final NodeListener nodeListener, final long timeNow) { - if (currentState.getState().equals(State.INITIALIZING) + if (currentState.getState().equals(INITIALIZING) && reportedState.getState().oneOf("ds") - && !isControlledShutdown(reportedState)) + && isNotControlledShutdown(reportedState)) { eventLog.add(NodeEvent.forBaseline(node, String.format("Stop or crash during initialization. " + "Premature crash count is now %d.", node.getPrematureCrashCount() + 1), @@ -421,8 +416,8 @@ public class StateChangeHandler { final NodeState reportedState, final NodeListener nodeListener, final long timeNow) { - if (currentState.getState().equals(State.INITIALIZING) && - (reportedState.getState().equals(State.INITIALIZING) && reportedState.getInitProgress() < currentState.getInitProgress())) + if (currentState.getState().equals(INITIALIZING) && + (reportedState.getState().equals(INITIALIZING) && reportedState.getInitProgress() < currentState.getInitProgress())) { eventLog.add(NodeEvent.forBaseline(node, String.format( "Stop or crash during initialization detected from reverse initializing progress." + @@ -442,8 +437,8 @@ public class StateChangeHandler { long timeNow) { if (nodeUpToDownEdge(node, currentState, reportedState)) { node.setTransitionTime(timeNow); - if (node.getUpStableStateTime() + stableStateTimePeriod > timeNow && !isControlledShutdown(reportedState)) { - log.log(Level.FINE, () -> "Stable state: " + node.getUpStableStateTime() + " + " + stableStateTimePeriod + " > " + timeNow); + if (node.getUpStableStateTime() + stableStateTimePeriod > timeNow && isNotControlledShutdown(reportedState)) { + log.log(FINE, () -> "Stable state: " + node.getUpStableStateTime() + " + " + stableStateTimePeriod + " > " + timeNow); eventLog.add(NodeEvent.forBaseline(node, String.format("Stopped or possibly crashed after %d ms, which is before " + "stable state time period. Premature crash count is now %d.", @@ -457,20 +452,20 @@ public class StateChangeHandler { } private boolean initializationProgressHasIncreased(NodeState currentState, NodeState reportedState) { - return reportedState.getState().equals(State.INITIALIZING) && - (!currentState.getState().equals(State.INITIALIZING) || + return reportedState.getState().equals(INITIALIZING) && + (!currentState.getState().equals(INITIALIZING) || reportedState.getInitProgress() > currentState.getInitProgress()); } private boolean nodeUpToDownEdge(NodeInfo node, NodeState currentState, NodeState reportedState) { return currentState.getState().oneOf("ur") && reportedState.getState().oneOf("dis") - && (node.getWantedState().getState().equals(State.RETIRED) || !reportedState.getState().equals(State.INITIALIZING)); + && (node.getWantedState().getState().equals(State.RETIRED) || !reportedState.getState().equals(INITIALIZING)); } private boolean handlePrematureCrash(NodeInfo node, NodeListener changeListener) { node.setPrematureCrashCount(node.getPrematureCrashCount() + 1); if (disableUnstableNodes && node.getPrematureCrashCount() > maxPrematureCrashes) { - NodeState wantedState = new NodeState(node.getNode().getType(), State.DOWN) + NodeState wantedState = new NodeState(node.getNode().getType(), DOWN) .setDescription("Disabled by fleet controller as it prematurely shut down " + node.getPrematureCrashCount() + " times in a row"); NodeState oldState = node.getWantedState(); node.setWantedState(wantedState); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java index cfe4c925551..4122abe1521 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java @@ -48,6 +48,8 @@ public class Response { public String getId() { return id; } @Override public String getReason() { return reason; } + @Override + public String toString() { return getId() +": " + getReason(); } } public static class Link implements SubUnitList { private final Map<String, String> links = new LinkedHashMap<>(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java index 1c72594377a..01a75034ddf 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java @@ -72,14 +72,13 @@ public class SetNodeStateRequest extends Request<SetResponse> { static NodeState getRequestedNodeState(Map<String, UnitState> newStates, Node n) throws StateRestApiException { UnitState newState = newStates.get("user"); if (newState == null) throw new InvalidContentException("No new user state given in request"); - State state; - switch (newState.getId().toLowerCase()) { - case "up": state = State.UP; break; - case "retired": state = State.RETIRED; break; - case "maintenance": state = State.MAINTENANCE; break; - case "down": state = State.DOWN; break; - default: throw new InvalidContentException("Invalid user state '" + newState.getId() + "' given."); - } + State state = switch (newState.getId().toLowerCase()) { + case "up" -> State.UP; + case "retired" -> State.RETIRED; + case "maintenance" -> State.MAINTENANCE; + case "down" -> State.DOWN; + default -> throw new InvalidContentException("Invalid user state '" + newState.getId() + "' given."); + }; return new NodeState(n.getType(), state).setDescription(newState.getReason()); } @@ -191,25 +190,18 @@ public class SetNodeStateRequest extends Request<SetResponse> { boolean probe) { Node distributorNode = new Node(NodeType.DISTRIBUTOR, index); NodeInfo nodeInfo = cluster.getNodeInfo(distributorNode); - if (nodeInfo == null) { - throw new IllegalStateException("Missing distributor at index " + - distributorNode.getIndex()); - } + if (nodeInfo == null) + throw new IllegalStateException("Missing distributor at index " + distributorNode.getIndex()); State newState; switch (newStorageWantedState.getState()) { - case MAINTENANCE: - newState = State.DOWN; - break; - case RETIRED: - newState = State.UP; - break; - default: + case MAINTENANCE -> newState = State.DOWN; + case RETIRED -> newState = State.UP; + default -> { newState = newStorageWantedState.getState(); - if (!newState.validWantedNodeState(distributorNode.getType())) { - throw new IllegalStateException("Distributor cannot be set to wanted state " + - newState); - } + if (!newState.validWantedNodeState(distributorNode.getType())) + throw new IllegalStateException("Distributor cannot be set to wanted state " + newState); + } } NodeState newWantedState = new NodeState(distributorNode.getType(), newState); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index e789a3cc6a6..c4fd7cb69b9 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -10,7 +10,8 @@ import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.config.content.StorDistributionConfig; import org.junit.jupiter.api.Test; -import java.text.ParseException; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -19,6 +20,7 @@ import static com.yahoo.vdslib.state.NodeType.DISTRIBUTOR; import static com.yahoo.vdslib.state.NodeType.STORAGE; import static com.yahoo.vdslib.state.State.DOWN; import static com.yahoo.vdslib.state.State.INITIALIZING; +import static com.yahoo.vdslib.state.State.MAINTENANCE; import static com.yahoo.vdslib.state.State.UP; import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Result; import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest.Condition.FORCE; @@ -37,37 +39,38 @@ public class NodeStateChangeCheckerTest { private static final Node nodeStorage = new Node(STORAGE, 1); private static final NodeState UP_NODE_STATE = new NodeState(STORAGE, UP); - private static final NodeState MAINTENANCE_NODE_STATE = createNodeState(State.MAINTENANCE, "Orchestrator"); + private static final NodeState MAINTENANCE_NODE_STATE = createNodeState(MAINTENANCE, "Orchestrator"); private static final NodeState DOWN_NODE_STATE = createNodeState(DOWN, "RetireEarlyExpirer"); private static NodeState createNodeState(State state, String description) { return new NodeState(STORAGE, state).setDescription(description); } - private static ClusterState clusterState(String state) { - try { - return new ClusterState(state); - } catch (ParseException e) { - throw new RuntimeException(e); - } - } + private static ClusterState clusterState(String state) { return ClusterState.stateFromString(state); } private static ClusterState defaultAllUpClusterState() { - return clusterState(String.format("version:%d distributor:4 storage:4", currentClusterStateVersion)); + return defaultAllUpClusterState(4); + } + + private static ClusterState defaultAllUpClusterState(int nodeCount) { + return clusterState(String.format("version:%d distributor:%d storage:%d", + currentClusterStateVersion, + nodeCount , + nodeCount)); } private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) { return new NodeStateChangeChecker(cluster, false); } - private ContentCluster createCluster(int nodeCount) { - return createCluster(nodeCount, 1); + private ContentCluster createCluster(int nodeCount, int maxNumberOfGroupsAllowedToBeDown) { + return createCluster(nodeCount, 1, maxNumberOfGroupsAllowedToBeDown); } - private ContentCluster createCluster(int nodeCount, int groupCount) { - Collection<ConfiguredNode> nodes = createNodes(nodeCount); + private ContentCluster createCluster(int nodeCount, int groupCount, int maxNumberOfGroupsAllowedToBeDown) { + List<ConfiguredNode> nodes = createNodes(nodeCount); Distribution distribution = new Distribution(createDistributionConfig(nodeCount, groupCount)); - return new ContentCluster("Clustername", nodes, distribution); + return new ContentCluster("Clustername", nodes, distribution, maxNumberOfGroupsAllowedToBeDown); } private String createDistributorHostInfo(int replicationfactor1, int replicationfactor2, int replicationfactor3) { @@ -105,9 +108,10 @@ public class NodeStateChangeCheckerTest { } } - @Test - void testCanUpgradeForce() { - var nodeStateChangeChecker = createChangeChecker(createCluster(1)); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeWithForce(int maxNumberOfGroupsAllowedToBeDown) { + var nodeStateChangeChecker = createChangeChecker(createCluster(1, maxNumberOfGroupsAllowedToBeDown)); NodeState newState = new NodeState(STORAGE, INITIALIZING); Result result = nodeStateChangeChecker.evaluateTransition( nodeDistributor, defaultAllUpClusterState(), FORCE, @@ -116,9 +120,10 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testDeniedInMoratorium() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDeniedInMoratorium(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, true); Result result = nodeStateChangeChecker.evaluateTransition( new Node(STORAGE, 10), defaultAllUpClusterState(), SAFE, @@ -128,9 +133,10 @@ public class NodeStateChangeCheckerTest { assertEquals("Master cluster controller is bootstrapping and in moratorium", result.getReason()); } - @Test - void testUnknownStorageNode() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testUnknownStorageNode(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); var nodeStateChangeChecker = createChangeChecker(cluster); Result result = nodeStateChangeChecker.evaluateTransition( new Node(STORAGE, 10), defaultAllUpClusterState(), SAFE, @@ -140,11 +146,12 @@ public class NodeStateChangeCheckerTest { assertEquals("Unknown node storage.10", result.getReason()); } - @Test - void testSafeMaintenanceDisallowedWhenOtherStorageNodeInFlatClusterIsSuspended() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSafeMaintenanceDisallowedWhenOtherStorageNodeInFlatClusterIsSuspended(int maxNumberOfGroupsAllowedToBeDown) { // Nodes 0-3, storage node 0 being in maintenance with "Orchestrator" description. - ContentCluster cluster = createCluster(4); - cluster.clusterInfo().getStorageNodeInfo(0).setWantedState(new NodeState(STORAGE, State.MAINTENANCE).setDescription("Orchestrator")); + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); + setStorageNodeWantedStateToMaintenance(cluster, 0); var nodeStateChangeChecker = createChangeChecker(cluster); ClusterState clusterStateWith0InMaintenance = clusterState(String.format( "version:%d distributor:4 storage:4 .0.s:m", @@ -160,11 +167,131 @@ public class NodeStateChangeCheckerTest { } @Test - void testSafeMaintenanceDisallowedWhenOtherDistributorInFlatClusterIsSuspended() { + void testMaintenanceAllowedFor2Of4Groups() { + // 4 groups with 1 node in each group + Collection<ConfiguredNode> nodes = createNodes(4); + StorDistributionConfig config = createDistributionConfig(4, 4); + + int maxNumberOfGroupsAllowedToBeDown = 2; + var cluster = new ContentCluster("Clustername", nodes, new Distribution(config), maxNumberOfGroupsAllowedToBeDown); + setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6))); + var nodeStateChangeChecker = createChangeChecker(cluster); + + // All nodes up, set a storage node in group 0 to maintenance + { + int nodeIndex = 0; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, defaultAllUpClusterState()); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // Node in group 0 in maintenance, set storage node in group 1 to maintenance + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:4 .0.s:d storage:4 .0.s:m", currentClusterStateVersion)); + int nodeIndex = 1; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // Nodes in group 0 and 1 in maintenance, try to set storage node in group 2 to maintenance while storage node 2 is down, should fail + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:4 storage:4 .0.s:m .1.s:m .2.s:d", currentClusterStateVersion)); + int nodeIndex = 2; + cluster.clusterInfo().getStorageNodeInfo(nodeIndex).setReportedState(new NodeState(STORAGE, DOWN), 0); + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + assertEquals("At most 2 groups can have wanted state: [0, 1, 2]", result.getReason()); + } + + // Nodes in group 0 and 1 in maintenance, try to set storage node in group 2 to maintenance, should fail + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:4 storage:4 .0.s:m .1.s:m", currentClusterStateVersion)); + int nodeIndex = 2; + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + assertEquals("At most 2 groups can have wanted state: [0, 1]", result.getReason()); + } + + } + + @Test + void testMaintenanceAllowedFor2Of4Groups8Nodes() { + // 4 groups with 2 nodes in each group + Collection<ConfiguredNode> nodes = createNodes(8); + StorDistributionConfig config = createDistributionConfig(8, 4); + + int maxNumberOfGroupsAllowedToBeDown = 2; + var cluster = new ContentCluster("Clustername", nodes, new Distribution(config), maxNumberOfGroupsAllowedToBeDown); + setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6))); + var nodeStateChangeChecker = createChangeChecker(cluster); + + // All nodes up, set a storage node in group 0 to maintenance + { + ClusterState clusterState = defaultAllUpClusterState(8); + int nodeIndex = 0; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // 1 Node in group 0 in maintenance, try to set node 1 in group 0 to maintenance + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 .0.s:d storage:8 .0.s:m", currentClusterStateVersion)); + int nodeIndex = 1; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // 2 nodes in group 0 in maintenance, try to set storage node 2 in group 1 to maintenance + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m", currentClusterStateVersion)); + int nodeIndex = 2; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // 2 nodes in group 0 and 1 in group 1 in maintenance, try to set storage node 4 in group 2 to maintenance, should fail (different group) + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .2.s:m", currentClusterStateVersion)); + int nodeIndex = 4; + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + assertEquals("At most 2 groups can have wanted state: [0, 1]", result.getReason()); + } + + // 2 nodes in group 0 and 1 in group 1 in maintenance, try to set storage node 3 in group 1 to maintenance + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .2.s:m", currentClusterStateVersion)); + int nodeIndex = 3; + checkSettingToMaintenanceIsAllowed(nodeIndex, nodeStateChangeChecker, clusterState); + setStorageNodeWantedStateToMaintenance(cluster, nodeIndex); + } + + // 2 nodes in group 0 in maintenance, storage node 3 in group 1 is in maintenance with another description + // (set in maintenance by operator), try to set storage node 3 in group 1 to maintenance, should bew allowed + { + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .3.s:m", currentClusterStateVersion)); + setStorageNodeWantedState(cluster, 3, MAINTENANCE, "Maintenance, set by operator"); // Set to another description + setStorageNodeWantedState(cluster, 2, UP, ""); // Set back to UP, want to set this to maintenance again + int nodeIndex = 2; + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertTrue(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + } + + } + + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSafeMaintenanceDisallowedWhenOtherDistributorInFlatClusterIsSuspended(int maxNumberOfGroupsAllowedToBeDown) { // Nodes 0-3, distributor 0 being down with "Orchestrator" description. - ContentCluster cluster = createCluster(4); - cluster.clusterInfo().getDistributorNodeInfo(0) - .setWantedState(new NodeState(DISTRIBUTOR, DOWN).setDescription("Orchestrator")); + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); + setDistributorNodeWantedState(cluster, 0, DOWN, "Orchestrator"); var nodeStateChangeChecker = createChangeChecker(cluster); ClusterState clusterStateWith0InMaintenance = clusterState(String.format( "version:%d distributor:4 .0.s:d storage:4", @@ -179,13 +306,13 @@ public class NodeStateChangeCheckerTest { result.getReason()); } - @Test - void testSafeMaintenanceDisallowedWhenDistributorInGroupIsDown() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSafeMaintenanceDisallowedWhenDistributorInGroupIsDown(int maxNumberOfGroupsAllowedToBeDown) { // Nodes 0-3, distributor 0 being in maintenance with "Orchestrator" description. // 2 groups: nodes 0-1 is group 0, 2-3 is group 1. - ContentCluster cluster = createCluster(4, 2); - cluster.clusterInfo().getDistributorNodeInfo(0) - .setWantedState(new NodeState(STORAGE, DOWN).setDescription("Orchestrator")); + ContentCluster cluster = createCluster(4, 2, maxNumberOfGroupsAllowedToBeDown); + setDistributorNodeWantedState(cluster, 0, DOWN, "Orchestrator"); var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, false); ClusterState clusterStateWith0InMaintenance = clusterState(String.format( "version:%d distributor:4 .0.s:d storage:4", @@ -198,7 +325,10 @@ public class NodeStateChangeCheckerTest { SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); - assertEquals("At most one group can have wanted state: Other distributor 0 in group 0 has wanted state Down", result.getReason()); + if (maxNumberOfGroupsAllowedToBeDown >= 1) + assertEquals("Wanted state already set for another node in groups: [0]", result.getReason()); + else + assertEquals("At most one group can have wanted state: Other distributor 0 in group 0 has wanted state Down", result.getReason()); } { @@ -207,17 +337,23 @@ public class NodeStateChangeCheckerTest { Result result = nodeStateChangeChecker.evaluateTransition( new Node(STORAGE, 1), clusterStateWith0InMaintenance, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); - assertFalse(result.settingWantedStateIsAllowed(), result.getReason()); - assertEquals("Another distributor wants state DOWN: 0", result.getReason()); + if (maxNumberOfGroupsAllowedToBeDown >= 1) { + assertFalse(result.settingWantedStateIsAllowed(), result.getReason()); + assertEquals("Wanted state already set for another node in groups: [0]", result.getReason()); + } else { + assertFalse(result.settingWantedStateIsAllowed(), result.getReason()); + assertEquals("Another distributor wants state DOWN: 0", result.getReason()); + } } } - @Test - void testSafeMaintenanceWhenOtherStorageNodeInGroupIsSuspended() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSafeMaintenanceWhenOtherStorageNodeInGroupIsSuspended(int maxNumberOfGroupsAllowedToBeDown) { // Nodes 0-3, storage node 0 being in maintenance with "Orchestrator" description. // 2 groups: nodes 0-1 is group 0, 2-3 is group 1. - ContentCluster cluster = createCluster(4, 2); - cluster.clusterInfo().getStorageNodeInfo(0).setWantedState(new NodeState(STORAGE, State.MAINTENANCE).setDescription("Orchestrator")); + ContentCluster cluster = createCluster(4, 2, maxNumberOfGroupsAllowedToBeDown); + setStorageNodeWantedState(cluster, 0, MAINTENANCE, "Orchestrator"); var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, false); ClusterState clusterStateWith0InMaintenance = clusterState(String.format( "version:%d distributor:4 storage:4 .0.s:m", @@ -230,8 +366,11 @@ public class NodeStateChangeCheckerTest { SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); - assertEquals("At most one group can have wanted state: Other storage node 0 in group 0 has wanted state Maintenance", - result.getReason()); + if (maxNumberOfGroupsAllowedToBeDown >= 1) + assertEquals("At most 1 groups can have wanted state: [0]", result.getReason()); + else + assertEquals("At most one group can have wanted state: Other storage node 0 in group 0 has wanted state Maintenance", + result.getReason()); } { @@ -245,9 +384,10 @@ public class NodeStateChangeCheckerTest { } } - @Test - void testSafeSetStateDistributors() { - NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(1)); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSafeSetStateDistributors(int maxNumberOfGroupsAllowedToBeDown) { + NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(1, 1, maxNumberOfGroupsAllowedToBeDown)); Result result = nodeStateChangeChecker.evaluateTransition( nodeDistributor, defaultAllUpClusterState(), SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); @@ -256,10 +396,11 @@ public class NodeStateChangeCheckerTest { assertTrue(result.getReason().contains("Safe-set of node state is only supported for storage nodes")); } - @Test - void testCanUpgradeSafeMissingStorage() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeSafeMissingStorage(int maxNumberOfGroupsAllowedToBeDown) { // Create a content cluster with 4 nodes, and storage node with index 3 down. - ContentCluster cluster = createCluster(4); + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6))); cluster.clusterInfo().getStorageNodeInfo(3).setReportedState(new NodeState(STORAGE, DOWN), 0); ClusterState clusterStateWith3Down = clusterState(String.format( @@ -276,16 +417,18 @@ public class NodeStateChangeCheckerTest { assertEquals("Another storage node has state DOWN: 3", result.getReason()); } - @Test - void testCanUpgradeStorageSafeYes() { - Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4), defaultAllUpClusterState()); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeStorageSafeYes(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4, 1, maxNumberOfGroupsAllowedToBeDown), defaultAllUpClusterState()); assertTrue(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); } - @Test - void testSetUpFailsIfReportedIsDown() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSetUpFailsIfReportedIsDown(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); // Not setting nodes up -> all are down @@ -298,9 +441,10 @@ public class NodeStateChangeCheckerTest { // A node may be reported as Up but have a generated state of Down if it's part of // nodes taken down implicitly due to a group having too low node availability. - @Test - void testSetUpSucceedsIfReportedIsUpButGeneratedIsDown() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSetUpSucceedsIfReportedIsUpButGeneratedIsDown(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); markAllNodesAsReportingStateUp(cluster); @@ -316,9 +460,10 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCanSetUpEvenIfOldWantedStateIsDown() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanSetUpEvenIfOldWantedStateIsDown(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6))); @@ -329,9 +474,10 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCanUpgradeStorageSafeNo() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeStorageSafeNo(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6))); @@ -344,9 +490,10 @@ public class NodeStateChangeCheckerTest { result.getReason()); } - @Test - void testCanUpgradeIfMissingMinReplicationFactor() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeIfMissingMinReplicationFactor(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6))); @@ -357,9 +504,10 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCanUpgradeIfStorageNodeMissingFromNodeInfo() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeIfStorageNodeMissingFromNodeInfo(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); String hostInfo = "{\n" + " \"cluster-state-version\": 2,\n" + @@ -381,9 +529,10 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testMissingDistributorState() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testMissingDistributorState(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); cluster.clusterInfo().getStorageNodeInfo(1).setReportedState(new NodeState(STORAGE, UP), 0); @@ -394,8 +543,8 @@ public class NodeStateChangeCheckerTest { assertEquals("Distributor node 0 has not reported any cluster state version yet.", result.getReason()); } - private Result transitionToSameState(State state, String oldDescription, String newDescription) { - ContentCluster cluster = createCluster(4); + private Result transitionToSameState(State state, String oldDescription, String newDescription, int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); NodeState currentNodeState = createNodeState(state, oldDescription); @@ -405,26 +554,29 @@ public class NodeStateChangeCheckerTest { currentNodeState, newNodeState); } - private Result transitionToSameState(String oldDescription, String newDescription) { - return transitionToSameState(State.MAINTENANCE, oldDescription, newDescription); + private Result transitionToSameState(String oldDescription, String newDescription, int maxNumberOfGroupsAllowedToBeDown) { + return transitionToSameState(MAINTENANCE, oldDescription, newDescription, maxNumberOfGroupsAllowedToBeDown); } - @Test - void testSettingUpWhenUpCausesAlreadySet() { - Result result = transitionToSameState(UP, "foo", "bar"); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSettingUpWhenUpCausesAlreadySet(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToSameState(UP, "foo", "bar", maxNumberOfGroupsAllowedToBeDown); assertTrue(result.wantedStateAlreadySet()); } - @Test - void testSettingAlreadySetState() { - Result result = transitionToSameState("foo", "foo"); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testSettingAlreadySetState(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToSameState("foo", "foo", maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertTrue(result.wantedStateAlreadySet()); } - @Test - void testDifferentDescriptionImpliesDenied() { - Result result = transitionToSameState("foo", "bar"); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDifferentDescriptionImpliesDenied(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToSameState("foo", "bar", maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); } @@ -433,10 +585,9 @@ public class NodeStateChangeCheckerTest { NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); for (int x = 0; x < cluster.clusterInfo().getConfiguredNodes().size(); x++) { - State state = UP; - cluster.clusterInfo().getDistributorNodeInfo(x).setReportedState(new NodeState(DISTRIBUTOR, state), 0); + cluster.clusterInfo().getDistributorNodeInfo(x).setReportedState(new NodeState(DISTRIBUTOR, UP), 0); cluster.clusterInfo().getDistributorNodeInfo(x).setHostInfo(HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6))); - cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(STORAGE, state), 0); + cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(STORAGE, UP), 0); } return nodeStateChangeChecker.evaluateTransition( @@ -456,26 +607,29 @@ public class NodeStateChangeCheckerTest { return transitionToMaintenanceWithOneStorageNodeDown(cluster, clusterState); } - @Test - void testCanUpgradeWhenAllUp() { - Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4), defaultAllUpClusterState()); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeWhenAllUp(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4, maxNumberOfGroupsAllowedToBeDown), defaultAllUpClusterState()); assertTrue(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCanUpgradeWhenAllUpOrRetired() { - Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4), defaultAllUpClusterState()); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeWhenAllUpOrRetired(int maxNumberOfGroupsAllowedToBeDown) { + Result result = transitionToMaintenanceWithNoStorageNodesDown(createCluster(4, maxNumberOfGroupsAllowedToBeDown), defaultAllUpClusterState()); assertTrue(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCanUpgradeWhenStorageIsDown() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCanUpgradeWhenStorageIsDown(int maxNumberOfGroupsAllowedToBeDown) { ClusterState clusterState = defaultAllUpClusterState(); var storageNodeIndex = nodeStorage.getIndex(); - ContentCluster cluster = createCluster(4); + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeState downNodeState = new NodeState(STORAGE, DOWN); cluster.clusterInfo().getStorageNodeInfo(storageNodeIndex).setReportedState(downNodeState, 4 /* time */); clusterState.setNodeState(new Node(STORAGE, storageNodeIndex), downNodeState); @@ -485,13 +639,14 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testCannotUpgradeWhenOtherStorageIsDown() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testCannotUpgradeWhenOtherStorageIsDown(int maxNumberOfGroupsAllowedToBeDown) { int otherIndex = 2; // If this fails, just set otherIndex to some other valid index. assertNotEquals(nodeStorage.getIndex(), otherIndex); - ContentCluster cluster = createCluster(4); + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); ClusterState clusterState = defaultAllUpClusterState(); NodeState downNodeState = new NodeState(STORAGE, DOWN); cluster.clusterInfo().getStorageNodeInfo(otherIndex).setReportedState(downNodeState, 4 /* time */); @@ -503,9 +658,10 @@ public class NodeStateChangeCheckerTest { assertTrue(result.getReason().contains("Another storage node has state DOWN: 2")); } - @Test - void testNodeRatioRequirementConsidersGeneratedNodeStates() { - ContentCluster cluster = createCluster(4); + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testNodeRatioRequirementConsidersGeneratedNodeStates(int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); markAllNodesAsReportingStateUp(cluster); @@ -525,62 +681,72 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } - @Test - void testDownDisallowedByNonRetiredState() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDownDisallowedByNonRetiredState(int maxNumberOfGroupsAllowedToBeDown) { Result result = evaluateDownTransition( defaultAllUpClusterState(), UP, currentClusterStateVersion, - 0); + 0, + maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); assertEquals("Only retired nodes are allowed to be set to DOWN in safe mode - is Up", result.getReason()); } - @Test - void testDownDisallowedByBuckets() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDownDisallowedByBuckets(int maxNumberOfGroupsAllowedToBeDown) { Result result = evaluateDownTransition( retiredClusterStateSuffix(), UP, currentClusterStateVersion, - 1); + 1, + maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); assertEquals("The storage node manages 1 buckets", result.getReason()); } - @Test - void testDownDisallowedByReportedState() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDownDisallowedByReportedState(int maxNumberOfGroupsAllowedToBeDown) { Result result = evaluateDownTransition( retiredClusterStateSuffix(), INITIALIZING, currentClusterStateVersion, - 0); + 0, + maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); assertEquals("Reported state (Initializing) is not UP, so no bucket data is available", result.getReason()); } - @Test - void testDownDisallowedByVersionMismatch() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testDownDisallowedByVersionMismatch(int maxNumberOfGroupsAllowedToBeDown) { Result result = evaluateDownTransition( retiredClusterStateSuffix(), UP, currentClusterStateVersion - 1, - 0); + 0, + maxNumberOfGroupsAllowedToBeDown); assertFalse(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); assertEquals("Cluster controller at version 2 got info for storage node 1 at a different version 1", result.getReason()); } - @Test - void testAllowedToSetDown() { + @ParameterizedTest + @ValueSource(ints = {-1, 1}) + void testAllowedToSetDown(int maxNumberOfGroupsAllowedToBeDown) { Result result = evaluateDownTransition( retiredClusterStateSuffix(), UP, currentClusterStateVersion, - 0); + 0, + maxNumberOfGroupsAllowedToBeDown); assertTrue(result.settingWantedStateIsAllowed()); assertFalse(result.wantedStateAlreadySet()); } @@ -588,8 +754,9 @@ public class NodeStateChangeCheckerTest { private Result evaluateDownTransition(ClusterState clusterState, State reportedState, int hostInfoClusterStateVersion, - int lastAlldisksBuckets) { - ContentCluster cluster = createCluster(4); + int lastAlldisksBuckets, + int maxNumberOfGroupsAllowedToBeDown) { + ContentCluster cluster = createCluster(4, maxNumberOfGroupsAllowedToBeDown); NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster); StorageNodeInfo nodeInfo = cluster.clusterInfo().getStorageNodeInfo(nodeStorage.getIndex()); @@ -741,13 +908,14 @@ public class NodeStateChangeCheckerTest { .capacity(nodes) .partitions("1|*")); + int nodeIndex = 0; for (int i = 0; i < groups; ++i) { var groupBuilder = new StorDistributionConfig.Group.Builder() .index(String.valueOf(i)) .name(String.valueOf(i)) .capacity(nodesPerGroup) .partitions(""); - for (int nodeIndex = 0; nodeIndex < nodesPerGroup; ++nodeIndex) { + for (int j = 0; j < nodesPerGroup; ++j, ++nodeIndex) { groupBuilder.nodes(new StorDistributionConfig.Group.Nodes.Builder() .index(nodeIndex)); } @@ -756,4 +924,26 @@ public class NodeStateChangeCheckerTest { return configBuilder.build(); } + private void checkSettingToMaintenanceIsAllowed(int nodeIndex, NodeStateChangeChecker nodeStateChangeChecker, ClusterState clusterState) { + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertTrue(result.settingWantedStateIsAllowed(), result.toString()); + assertFalse(result.wantedStateAlreadySet()); + assertEquals("Preconditions fulfilled and new state different", result.getReason()); + } + + private void setStorageNodeWantedStateToMaintenance(ContentCluster cluster, int nodeIndex) { + setStorageNodeWantedState(cluster, nodeIndex, MAINTENANCE, "Orchestrator"); + } + + private void setStorageNodeWantedState(ContentCluster cluster, int nodeIndex, State state, String description) { + NodeState nodeState = new NodeState(STORAGE, state); + cluster.clusterInfo().getStorageNodeInfo(nodeIndex).setWantedState(nodeState.setDescription(description)); + } + + private void setDistributorNodeWantedState(ContentCluster cluster, int nodeIndex, State state, String description) { + NodeState nodeState = new NodeState(DISTRIBUTOR, state); + cluster.clusterInfo().getDistributorNodeInfo(nodeIndex).setWantedState(nodeState.setDescription(description)); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java index 131b011df48..50fe6e9a154 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java @@ -72,6 +72,9 @@ public class SetNodeStateTest extends StateRestApiTest { public String getReason() { return reason; } + + @Override + public String toString() { return getId() +": " + getReason(); } }); return this; } |