summaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2016-06-17 10:41:33 +0200
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2016-06-17 10:41:33 +0200
commit916e5a5b8a4574bb2d878c5b07c97b1678df81b3 (patch)
treef88615323a5cf12cb08fb6a10977c9d74855dfda /clustercontroller-core
parent4edde0fd2999b4343cff30deda8c28e64cef6be7 (diff)
Add configurable automatic group up/down feature based on node availability
Available under content cluster tuning tag; feature is currently disabled by default (need prod experience for this first). Also improves handling of nodes removed from config by ensuring these are taken out of the core working cluster state instead of just patched away before each state publish.
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java2
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java11
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java10
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java117
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java32
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java202
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java5
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java136
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBuilder.java107
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java106
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java100
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java370
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculatorTest.java188
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java39
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java94
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java4
16 files changed, 1348 insertions, 175 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
index 67681f87d92..98648451e1d 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
@@ -191,7 +191,7 @@ public class ContentCluster {
* @param newState state wanted to be set @return NodeUpgradePrechecker.Response
*/
public NodeStateChangeChecker.Result calculateEffectOfNewState(
- Node node, int clusterState, SetUnitStateRequest.Condition condition, NodeState oldState, NodeState newState) {
+ Node node, ClusterState clusterState, SetUnitStateRequest.Condition condition, NodeState oldState, NodeState newState) {
NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker(
minStorageNodesUp,
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 572e24bcb35..ceeeddf49fa 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -416,8 +416,10 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
systemStateGenerator.setStableStateTimePeriod(options.stableStateTimePeriod);
systemStateGenerator.setMinNodesUp(options.minDistributorNodesUp, options.minStorageNodesUp,
options.minRatioOfDistributorNodesUp, options.minRatioOfStorageNodesUp);
+ systemStateGenerator.setMinNodeRatioPerGroup(options.minNodeRatioPerGroup);
systemStateGenerator.setMaxSlobrokDisconnectGracePeriod(options.maxSlobrokDisconnectGracePeriod);
systemStateGenerator.setDistributionBits(options.distributionBits);
+ systemStateGenerator.setDistribution(options.storageDistribution);
masterElectionHandler.setFleetControllerCount(options.fleetControllerCount);
masterElectionHandler.setMasterZooKeeperCooldownPeriod(options.masterZooKeeperCooldownPeriod);
@@ -426,9 +428,9 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
try{
rpcServer.setSlobrokConnectionSpecs(options.slobrokConnectionSpecs, options.rpcPort);
} catch (ListenFailedException e) {
- log.log(LogLevel.WARNING, "Failed to bind RPC server to port " + options.rpcPort +". This may be natural if cluster have altered the services running on this node: " + e.getMessage());
+ log.log(LogLevel.WARNING, "Failed to bind RPC server to port " + options.rpcPort +". This may be natural if cluster has altered the services running on this node: " + e.getMessage());
} catch (Exception e) {
- log.log(LogLevel.WARNING, "Failed to initailize RPC server socket: " + e.getMessage());
+ log.log(LogLevel.WARNING, "Failed to initialize RPC server socket: " + e.getMessage());
}
}
@@ -436,7 +438,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
try{
statusPageServer.setPort(options.httpPort);
} catch (Exception e) {
- log.log(LogLevel.WARNING, "Failed to initialize status server socket. This may be natural if cluster have altered the services running on this node: " + e.getMessage());
+ log.log(LogLevel.WARNING, "Failed to initialize status server socket. This may be natural if cluster has altered the services running on this node: " + e.getMessage());
}
}
@@ -503,7 +505,6 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
didWork |= systemStateBroadcaster.processResponses();
if (masterElectionHandler.isMaster()) {
didWork |= broadcastClusterStateToEligibleNodes();
-
}
didWork |= processAnyPendingStatusPageRequest();
@@ -637,7 +638,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
// Send getNodeState requests to zero or more nodes.
didWork |= stateGatherer.sendMessages(cluster, communicator, this);
didWork |= systemStateGenerator.watchTimers(cluster, this);
- didWork |= systemStateGenerator.notifyIfNewSystemState(this);
+ didWork |= systemStateGenerator.notifyIfNewSystemState(cluster, this);
if ( ! isStateGatherer) {
if ( ! isMaster) {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
index f18fa6d3a9b..a2449449a05 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -69,6 +69,16 @@ public class FleetControllerOptions implements Cloneable {
public double minRatioOfStorageNodesUp = 0.50;
/**
+ * Minimum ratio of nodes in an "available" state (up, initializing or maintenance)
+ * that shall be present in a group for the group itself to be considered available.
+ * If the ratio of available nodes drop under this limit, the group's nodes will be
+ * implicitly taken down.
+ *
+ * A value of 0.0 implies group auto-takedown feature is effectively disabled.
+ */
+ public double minNodeRatioPerGroup = 0.0;
+
+ /**
* Milliseconds to sleep after doing a work cycle where we did no work. Some events do not interrupt the sleeping,
* such as slobrok changes, so shouldn't set this too high.
*/
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java
new file mode 100644
index 00000000000..74b15b61ac3
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java
@@ -0,0 +1,117 @@
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.distribution.Group;
+import com.yahoo.vdslib.distribution.GroupVisitor;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Stream;
+
+class GroupAvailabilityCalculator {
+ private final Distribution distribution;
+ private final double minNodeRatioPerGroup;
+
+ private GroupAvailabilityCalculator(Distribution distribution,
+ double minNodeRatioPerGroup)
+ {
+ this.distribution = distribution;
+ this.minNodeRatioPerGroup = minNodeRatioPerGroup;
+ }
+
+ public static class Builder {
+ private Distribution distribution;
+ private double minNodeRatioPerGroup = 1.0;
+
+ Builder withDistribution(Distribution distribution) {
+ this.distribution = distribution;
+ return this;
+ }
+ Builder withMinNodeRatioPerGroup(double minRatio) {
+ this.minNodeRatioPerGroup = minRatio;
+ return this;
+ }
+ GroupAvailabilityCalculator build() {
+ return new GroupAvailabilityCalculator(distribution, minNodeRatioPerGroup);
+ }
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ private class InsufficientAvailabilityGroupVisitor implements GroupVisitor {
+ private final Set<Integer> implicitlyDown = new HashSet<>();
+ private final ClusterState clusterState;
+
+ public InsufficientAvailabilityGroupVisitor(ClusterState clusterState) {
+ this.clusterState = clusterState;
+ }
+
+ private boolean nodeIsAvailableInState(final int index, final String states) {
+ return clusterState.getNodeState(new Node(NodeType.STORAGE, index)).getState().oneOf(states);
+ }
+
+ private Stream<ConfiguredNode> availableNodesIn(Group g) {
+ // We consider nodes in states (u)p, (i)nitializing, (m)aintenance as being
+ // available from the perspective of taking entire groups down (even though
+ // maintenance mode is a half-truth in this regard).
+ return g.getNodes().stream().filter(n -> nodeIsAvailableInState(n.index(), "uim"));
+ }
+
+ private Stream<ConfiguredNode> candidateNodesForSettingDown(Group g) {
+ // We don't implicitly set (m)aintenance nodes down, as these are usually set
+ // in maintenance for a good reason (e.g. orchestration or manual reboot).
+ // Similarly, we don't take down (r)etired nodes as these may contain data
+ // that the rest of the cluster needs.
+ return g.getNodes().stream().filter(n -> nodeIsAvailableInState(n.index(), "ui"));
+ }
+
+ private double computeGroupAvailability(Group g) {
+ // TODO also look at distributors
+ final long availableNodes = availableNodesIn(g).count();
+ // Model should make it impossible to deploy with zero nodes in a group,
+ // so no div by zero risk.
+ return availableNodes / (double)g.getNodes().size();
+ }
+
+ private void markAllAvailableGroupNodeIndicesAsDown(Group group) {
+ candidateNodesForSettingDown(group).forEach(n -> implicitlyDown.add(n.index()));
+ }
+
+ @Override
+ public boolean visitGroup(Group group) {
+ if (group.isLeafGroup()) {
+ if (computeGroupAvailability(group) < minNodeRatioPerGroup) {
+ markAllAvailableGroupNodeIndicesAsDown(group);
+ }
+ }
+ return true;
+ }
+
+ Set<Integer> implicitlyDownNodeIndices() {
+ return implicitlyDown;
+ }
+ }
+
+ private static boolean isFlatCluster(Group root) {
+ return root.isLeafGroup();
+ }
+
+ public Set<Integer> nodesThatShouldBeDown(ClusterState state) {
+ if (isFlatCluster(distribution.getRootGroup())) {
+ // Implicit group takedown only applies to hierarchic cluster setups.
+ return new HashSet<>();
+ }
+ InsufficientAvailabilityGroupVisitor visitor = new InsufficientAvailabilityGroupVisitor(state);
+ distribution.visitGroups(visitor);
+ return visitor.implicitlyDownNodeIndices();
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index f312194c15d..12018ac4b6f 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -1,6 +1,7 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.clustercontroller.core;
+import com.yahoo.vdslib.state.ClusterState;
import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
@@ -79,7 +80,7 @@ public class NodeStateChangeChecker {
}
public Result evaluateTransition(
- Node node, int clusterStateVersion, SetUnitStateRequest.Condition condition,
+ Node node, ClusterState clusterState, SetUnitStateRequest.Condition condition,
NodeState oldState, NodeState newState) {
if (condition == SetUnitStateRequest.Condition.FORCE) {
return Result.allowSettingOfWantedState();
@@ -107,7 +108,7 @@ public class NodeStateChangeChecker {
case UP:
return canSetStateUp(node, oldState.getState());
case MAINTENANCE:
- return canSetStateMaintenance(node, clusterStateVersion);
+ return canSetStateMaintenance(node, clusterState);
default:
return Result.createDisallowed("Safe only supports state UP and MAINTENANCE, you tried: " + newState);
}
@@ -127,17 +128,17 @@ public class NodeStateChangeChecker {
return Result.allowSettingOfWantedState();
}
- private Result canSetStateMaintenance(Node node, int clusterStateVersion) {
+ private Result canSetStateMaintenance(Node node, ClusterState clusterState) {
NodeInfo nodeInfo = clusterInfo.getNodeInfo(node);
if (nodeInfo == null) {
return Result.createDisallowed("Unknown node " + node);
}
- NodeState reportedState = nodeInfo.getReportedState();
- if (reportedState.getState() == State.DOWN) {
+ NodeState currentState = clusterState.getNodeState(node);
+ if (currentState.getState() == State.DOWN) {
return Result.allowSettingOfWantedState();
}
- Result checkDistributorsResult = checkDistributors(node, clusterStateVersion);
+ Result checkDistributorsResult = checkDistributors(node, clusterState.getVersion());
if (!checkDistributorsResult.settingWantedStateIsAllowed()) {
return checkDistributorsResult;
}
@@ -151,7 +152,7 @@ public class NodeStateChangeChecker {
return Result.createDisallowed("There are only " + clusterInfo.getStorageNodeInfo().size() +
" storage nodes up, while config requires at least " + minStorageNodesUp);
}
- Result fractionCheck = isFractionHighEnough();
+ Result fractionCheck = isFractionHighEnough(clusterState);
if (!fractionCheck.settingWantedStateIsAllowed()) {
return fractionCheck;
}
@@ -168,16 +169,23 @@ public class NodeStateChangeChecker {
return Result.allowSettingOfWantedState();
}
- private Result isFractionHighEnough() {
+ private int contentNodesWithAvailableNodeState(ClusterState clusterState) {
+ final int nodeCount = clusterState.getNodeCount(NodeType.STORAGE);
int upNodesCount = 0;
- int nodesCount = 0;
- for (StorageNodeInfo storageNodeInfo : clusterInfo.getStorageNodeInfo()) {
- nodesCount++;
- State state = storageNodeInfo.getReportedState().getState();
+ for (int i = 0; i < nodeCount; ++i) {
+ final Node node = new Node(NodeType.STORAGE, i);
+ final State state = clusterState.getNodeState(node).getState();
if (state == State.UP || state == State.RETIRED || state == State.INITIALIZING) {
upNodesCount++;
}
}
+ return upNodesCount;
+ }
+
+ private Result isFractionHighEnough(ClusterState clusterState) {
+ final int nodesCount = clusterInfo.getStorageNodeInfo().size();
+ final int upNodesCount = contentNodesWithAvailableNodeState(clusterState);
+
if (nodesCount == 0) {
return Result.createDisallowed("No storage nodes in cluster state, not safe to restart.");
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java
index 5cf88b68f29..3dd1216d5d0 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.clustercontroller.core;
import com.yahoo.jrt.Spec;
import com.yahoo.log.LogLevel;
import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.state.*;
import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
@@ -18,7 +19,8 @@ import java.util.stream.Collectors;
/**
* This class get node state updates and uses them to decide the cluster state.
*/
- // TODO: Remove all current state from this and make it rely on state from ClusterInfo instead
+// TODO: Remove all current state from this and make it rely on state from ClusterInfo instead
+// TODO: Do this ASAP! SystemStateGenerator should ideally behave as a pure function!
public class SystemStateGenerator {
private static Logger log = Logger.getLogger(SystemStateGenerator.class.getName());
@@ -27,6 +29,7 @@ public class SystemStateGenerator {
private final EventLogInterface eventLog;
private ClusterStateView currentClusterStateView;
private ClusterStateView nextClusterStateView;
+ private Distribution distribution;
private boolean nextStateViewChanged = false;
private boolean isMaster = false;
@@ -41,6 +44,7 @@ public class SystemStateGenerator {
private int minStorageNodesUp = 1;
private double minRatioOfDistributorNodesUp = 0.50;
private double minRatioOfStorageNodesUp = 0.50;
+ private double minNodeRatioPerGroup = 0.0;
private int maxSlobrokDisconnectGracePeriod = 1000;
private int idealDistributionBits = 16;
private static final boolean disableUnstableNodes = true;
@@ -116,17 +120,18 @@ public class SystemStateGenerator {
minStorageNodesUp = minStorNodes;
minRatioOfDistributorNodesUp = minDistRatio;
minRatioOfStorageNodesUp = minStorRatio;
- nextStateViewChanged = true; // ... maybe
+ nextStateViewChanged = true;
+ }
+
+ public void setMinNodeRatioPerGroup(double upRatio) {
+ this.minNodeRatioPerGroup = upRatio;
+ nextStateViewChanged = true;
}
/** Sets the nodes of this and attempts to keep the node state in sync */
public void setNodes(ClusterInfo newClusterInfo) {
this.nodes = new HashSet<>(newClusterInfo.getConfiguredNodes().values());
- // Nodes that are removed from config will be automatically marked as DOWN
- // in the cluster state by createNextVersionOfClusterStateView, ensuring
- // that these are not carried over into new cluster states.
-
for (ConfiguredNode node : this.nodes) {
NodeInfo newNodeInfo = newClusterInfo.getStorageNodeInfo(node.index());
NodeState currentState = currentClusterStateView.getClusterState().getNodeState(new Node(NodeType.STORAGE, node.index()));
@@ -134,6 +139,23 @@ public class SystemStateGenerator {
proposeNewNodeState(newNodeInfo, new NodeState(NodeType.STORAGE, node.retired() ? State.RETIRED : State.UP));
}
}
+
+ // Ensure that any nodes that have been removed from the config are also
+ // promptly removed from the next (and subsequent) generated cluster states.
+ pruneAllNodesNotContainedInConfig();
+
+ nextStateViewChanged = true;
+ }
+
+ private void pruneAllNodesNotContainedInConfig() {
+ Set<Integer> configuredIndices = this.nodes.stream().map(ConfiguredNode::index).collect(Collectors.toSet());
+ final ClusterState candidateNextState = nextClusterStateView.getClusterState();
+ pruneNodesNotContainedInConfig(candidateNextState, configuredIndices, NodeType.DISTRIBUTOR);
+ pruneNodesNotContainedInConfig(candidateNextState, configuredIndices, NodeType.STORAGE);
+ }
+
+ public void setDistribution(Distribution distribution) {
+ this.distribution = distribution;
nextStateViewChanged = true;
}
@@ -196,14 +218,13 @@ public class SystemStateGenerator {
return state;
}
- private Event getDownDueToTooFewNodesEvent() {
- Event clusterEvent = null;
+ private Optional<Event> getDownDueToTooFewNodesEvent(ClusterState nextClusterState) {
int upStorageCount = 0, upDistributorCount = 0;
int dcount = nodes.size();
int scount = nodes.size();
for (NodeType type : NodeType.getTypes()) {
for (ConfiguredNode node : nodes) {
- NodeState ns = nextClusterStateView.getClusterState().getNodeState(new Node(type, node.index()));
+ NodeState ns = nextClusterState.getNodeState(new Node(type, node.index()));
if (ns.getState() == State.UP || ns.getState() == State.RETIRED || ns.getState() == State.INITIALIZING) {
if (type.equals(NodeType.STORAGE))
++upStorageCount;
@@ -215,46 +236,149 @@ public class SystemStateGenerator {
long timeNow = timer.getCurrentTimeInMillis();
if (upStorageCount < minStorageNodesUp) {
- clusterEvent = new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
+ return Optional.of(new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
"Less than " + minStorageNodesUp + " storage nodes available (" + upStorageCount + "). Setting cluster state down.",
- timeNow);
+ timeNow));
}
if (upDistributorCount < minDistributorNodesUp) {
- clusterEvent = new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
+ return Optional.of(new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
"Less than " + minDistributorNodesUp + " distributor nodes available (" + upDistributorCount + "). Setting cluster state down.",
- timeNow);
+ timeNow));
}
if (minRatioOfStorageNodesUp * scount > upStorageCount) {
- clusterEvent = new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
+ return Optional.of(new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
"Less than " + (100 * minRatioOfStorageNodesUp) + " % of storage nodes are available ("
+ upStorageCount + "/" + scount + "). Setting cluster state down.",
- timeNow);
+ timeNow));
}
if (minRatioOfDistributorNodesUp * dcount > upDistributorCount) {
- clusterEvent = new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
+ return Optional.of(new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
"Less than " + (100 * minRatioOfDistributorNodesUp) + " % of distributor nodes are available ("
+ upDistributorCount + "/" + dcount + "). Setting cluster state down.",
- timeNow);
+ timeNow));
+ }
+ return Optional.empty();
+ }
+
+ private static Node storageNode(int index) {
+ return new Node(NodeType.STORAGE, index);
+ }
+
+ private void performImplicitStorageNodeStateTransitions(ClusterState candidateState, ContentCluster cluster) {
+ if (distribution == null) {
+ return; // FIXME due to tests that don't bother setting distr config! Never happens in prod.
+ }
+ // First clear the states of any nodes that according to reported/wanted state alone
+ // should have their states cleared. We might still take these down again based on the
+ // decisions of the group availability calculator, but this way we ensure that groups
+ // that no longer should be down will have their nodes implicitly made available again.
+ // TODO this will be void once SystemStateGenerator has been rewritten to be stateless.
+ final Set<Integer> clearedNodes = clearDownStateForStorageNodesThatCanBeUp(candidateState, cluster);
+
+ final GroupAvailabilityCalculator calc = new GroupAvailabilityCalculator.Builder()
+ .withMinNodeRatioPerGroup(minNodeRatioPerGroup)
+ .withDistribution(distribution)
+ .build();
+ final Set<Integer> nodesToTakeDown = calc.nodesThatShouldBeDown(candidateState);
+ markNodesAsDownDueToGroupUnavailability(cluster, candidateState, nodesToTakeDown, clearedNodes);
+
+ clearedNodes.removeAll(nodesToTakeDown);
+ logEventsForNodesThatWereTakenUp(clearedNodes, cluster);
+ }
+
+ private void logEventsForNodesThatWereTakenUp(Set<Integer> newlyUpNodes, ContentCluster cluster) {
+ newlyUpNodes.forEach(i -> {
+ final NodeInfo info = cluster.getNodeInfo(storageNode(i)); // Should always be non-null here.
+ // TODO the fact that this only happens for group up events is implementation specific
+ // should generalize this if we get other such events.
+ eventLog.addNodeOnlyEvent(new NodeEvent(info,
+ "Group availability restored; taking node back up",
+ NodeEvent.Type.CURRENT, timer.getCurrentTimeInMillis()), LogLevel.INFO);
+ });
+ }
+
+ private void markNodesAsDownDueToGroupUnavailability(ContentCluster cluster,
+ ClusterState candidateState,
+ Set<Integer> nodesToTakeDown,
+ Set<Integer> clearedNodes)
+ {
+ for (Integer idx : nodesToTakeDown) {
+ final Node node = storageNode(idx);
+ NodeState newState = new NodeState(NodeType.STORAGE, State.DOWN);
+ newState.setDescription("group node availability below configured threshold");
+ candidateState.setNodeState(node, newState);
+
+ logNodeGroupDownEdgeEventOnce(clearedNodes, node, cluster);
+ }
+ }
+
+ private void logNodeGroupDownEdgeEventOnce(Set<Integer> clearedNodes, Node node, ContentCluster cluster) {
+ final NodeInfo nodeInfo = cluster.getNodeInfo(node);
+ // If clearedNodes contains the index it means we're just re-downing a node
+ // that was previously down. If this is the case, we'd cause a duplicate
+ // event if we logged it now as well.
+ if (nodeInfo != null && !clearedNodes.contains(node.getIndex())) {
+ eventLog.addNodeOnlyEvent(new NodeEvent(nodeInfo,
+ "Setting node down as the total availability of its group is " +
+ "below the configured threshold",
+ NodeEvent.Type.CURRENT, timer.getCurrentTimeInMillis()), LogLevel.INFO);
}
- return clusterEvent;
}
- private ClusterStateView createNextVersionOfClusterStateView(Event clusterEvent) {
+ private NodeState baselineNodeState(NodeInfo info) {
+ NodeState reported = info.getReportedState();
+ NodeState wanted = info.getWantedState();
+
+ final NodeState baseline = reported.clone();
+ if (wanted.getState() != State.UP) {
+ baseline.setDescription(wanted.getDescription());
+ if (reported.above(wanted)) {
+ baseline.setState(wanted.getState());
+ }
+ }
+ return baseline;
+ }
+
+ // Returns set of nodes whose state was cleared
+ private Set<Integer> clearDownStateForStorageNodesThatCanBeUp(
+ ClusterState candidateState, ContentCluster cluster)
+ {
+ final int nodeCount = candidateState.getNodeCount(NodeType.STORAGE);
+ final Set<Integer> clearedNodes = new HashSet<>();
+ for (int i = 0; i < nodeCount; ++i) {
+ final Node node = storageNode(i);
+ final NodeInfo info = cluster.getNodeInfo(node);
+ final NodeState currentState = candidateState.getNodeState(node);
+ if (currentState.getState() == State.DOWN) {
+ if (mayClearNodeDownState(info)) {
+ candidateState.setNodeState(node, baselineNodeState(info));
+ clearedNodes.add(i);
+ }
+ }
+ }
+ return clearedNodes;
+ }
+
+ private boolean mayClearNodeDownState(NodeInfo info) {
+ if (info == null) {
+ // Nothing known about node in cluster info; we definitely don't want it
+ // to be taken up at this point.
+ return false;
+ }
+ return (info.getReportedState().getState() != State.DOWN
+ && info.getWantedState().getState().oneOf("ur"));
+ }
+
+ private ClusterStateView createNextVersionOfClusterStateView(ContentCluster cluster) {
// If you change this method, see *) in notifyIfNewSystemState
ClusterStateView candidateClusterStateView = nextClusterStateView.cloneForNewState();
ClusterState candidateClusterState = candidateClusterStateView.getClusterState();
- candidateClusterState.setClusterState(clusterEvent == null ? State.UP : State.DOWN);
-
int currentDistributionBits = calculateMinDistributionBitCount();
if (currentDistributionBits != nextClusterStateView.getClusterState().getDistributionBitCount()) {
candidateClusterState.setDistributionBits(currentDistributionBits);
}
-
- Set<Integer> configuredIndices = this.nodes.stream().map(ConfiguredNode::index).collect(Collectors.toSet());
-
- pruneNodesNotContainedInConfig(candidateClusterState, configuredIndices, NodeType.DISTRIBUTOR);
- pruneNodesNotContainedInConfig(candidateClusterState, configuredIndices, NodeType.STORAGE);
+ performImplicitStorageNodeStateTransitions(candidateClusterState, cluster);
return candidateClusterStateView;
}
@@ -310,12 +434,29 @@ public class SystemStateGenerator {
}
}
- public boolean notifyIfNewSystemState(SystemStateListener stateListener) {
+ private void mergeIntoNextClusterState(ClusterState sourceState) {
+ final ClusterState nextState = nextClusterStateView.getClusterState();
+ final int nodeCount = sourceState.getNodeCount(NodeType.STORAGE);
+ for (int i = 0; i < nodeCount; ++i) {
+ final Node node = storageNode(i);
+ final NodeState stateInSource = sourceState.getNodeState(node);
+ final NodeState stateInTarget = nextState.getNodeState(node);
+ if (stateInSource.getState() != stateInTarget.getState()) {
+ nextState.setNodeState(node, stateInSource);
+ }
+ }
+ }
+
+ public boolean notifyIfNewSystemState(ContentCluster cluster, SystemStateListener stateListener) {
if ( ! nextStateViewChanged) return false;
- Event clusterEvent = getDownDueToTooFewNodesEvent();
- ClusterStateView newClusterStateView = createNextVersionOfClusterStateView(clusterEvent);
+ ClusterStateView newClusterStateView = createNextVersionOfClusterStateView(cluster);
+
ClusterState newClusterState = newClusterStateView.getClusterState();
+ // Creating the next version of the state may implicitly take down nodes, so our checks
+ // for taking the entire cluster down must happen _after_ this
+ Optional<Event> clusterDown = getDownDueToTooFewNodesEvent(newClusterState);
+ newClusterState.setClusterState(clusterDown.isPresent() ? State.DOWN : State.UP);
if (newClusterState.similarTo(currentClusterStateView.getClusterState())) {
log.log(LogLevel.DEBUG,
@@ -329,7 +470,7 @@ public class SystemStateGenerator {
newClusterState.setVersion(currentClusterStateView.getClusterState().getVersion() + 1);
recordNewClusterStateHasBeenChosen(currentClusterStateView.getClusterState(),
- newClusterStateView.getClusterState(), clusterEvent);
+ newClusterStateView.getClusterState(), clusterDown.orElse(null));
// *) Ensure next state is still up to date.
// This should make nextClusterStateView a deep-copy of currentClusterStateView.
@@ -338,6 +479,7 @@ public class SystemStateGenerator {
// This seems like a hack...
nextClusterStateView.getClusterState().setDistributionBits(newClusterState.getDistributionBitCount());
nextClusterStateView.getClusterState().setClusterState(newClusterState.getClusterState());
+ mergeIntoNextClusterState(newClusterState);
currentClusterStateView = newClusterStateView;
nextStateViewChanged = false;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
index d6dd6faa60d..1b97123d636 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
@@ -80,12 +80,11 @@ public class SetNodeStateRequest extends Request<SetResponse> {
NodeState wantedState = nodeInfo.getUserWantedState();
NodeState newWantedState = getRequestedNodeState(newStates, node);
- int version = currentClusterState.getVersion();
NodeStateChangeChecker.Result result = cluster.calculateEffectOfNewState(
- node, version, condition, wantedState, newWantedState);
+ node, currentClusterState, condition, wantedState, newWantedState);
log.log(LogLevel.DEBUG, "node=" + node +
- " version=" + version +
+ " current-cluster-state=" + currentClusterState + // Includes version in output format
" condition=" + condition +
" wanted-state=" + wantedState +
" new-wanted-state=" + newWantedState +
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java
new file mode 100644
index 00000000000..aca26000931
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java
@@ -0,0 +1,136 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import com.yahoo.vespa.clustercontroller.core.mocks.TestEventLog;
+import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter;
+import com.yahoo.vespa.config.content.StorDistributionConfig;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.mockito.Mockito.mock;
+
+class ClusterFixture {
+ public final ContentCluster cluster;
+ public final Distribution distribution;
+ public final FakeTimer timer;
+ public final EventLogInterface eventLog;
+ public final SystemStateGenerator generator;
+
+ public ClusterFixture(ContentCluster cluster, Distribution distribution) {
+ this.cluster = cluster;
+ this.distribution = distribution;
+ this.timer = new FakeTimer();
+ this.eventLog = mock(EventLogInterface.class);
+ this.generator = createGeneratorForFixtureCluster();
+ }
+
+ public SystemStateGenerator createGeneratorForFixtureCluster() {
+ final int controllerIndex = 0;
+ MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), controllerIndex);
+ SystemStateGenerator generator = new SystemStateGenerator(timer, eventLog, metricUpdater);
+ generator.setNodes(cluster.clusterInfo());
+ generator.setDistribution(distribution);
+ return generator;
+ }
+
+ public void bringEntireClusterUp() {
+ cluster.clusterInfo().getConfiguredNodes().forEach((idx, node) -> {
+ reportStorageNodeState(idx, State.UP);
+ reportDistributorNodeState(idx, State.UP);
+ });
+ }
+
+ public void reportStorageNodeState(final int index, State state) {
+ final Node node = new Node(NodeType.STORAGE, index);
+ final NodeState nodeState = new NodeState(NodeType.STORAGE, state);
+ nodeState.setDescription("mockdesc");
+ NodeStateOrHostInfoChangeHandler handler = mock(NodeStateOrHostInfoChangeHandler.class);
+ NodeInfo nodeInfo = cluster.getNodeInfo(node);
+
+ generator.handleNewReportedNodeState(nodeInfo, nodeState, handler);
+ nodeInfo.setReportedState(nodeState, timer.getCurrentTimeInMillis());
+ }
+
+ public void reportStorageNodeState(final int index, NodeState nodeState) {
+ final Node node = new Node(NodeType.STORAGE, index);
+ final NodeInfo nodeInfo = cluster.getNodeInfo(node);
+ final long mockTime = 1234;
+ NodeStateOrHostInfoChangeHandler changeListener = mock(NodeStateOrHostInfoChangeHandler.class);
+ generator.handleNewReportedNodeState(nodeInfo, nodeState, changeListener);
+ nodeInfo.setReportedState(nodeState, mockTime);
+ }
+
+ public void reportDistributorNodeState(final int index, State state) {
+ final Node node = new Node(NodeType.DISTRIBUTOR, index);
+ final NodeState nodeState = new NodeState(NodeType.DISTRIBUTOR, state);
+ NodeStateOrHostInfoChangeHandler handler = mock(NodeStateOrHostInfoChangeHandler.class);
+ NodeInfo nodeInfo = cluster.getNodeInfo(node);
+
+ generator.handleNewReportedNodeState(nodeInfo, nodeState, handler);
+ nodeInfo.setReportedState(nodeState, timer.getCurrentTimeInMillis());
+ }
+
+ public void proposeStorageNodeWantedState(final int index, State state) {
+ final Node node = new Node(NodeType.STORAGE, index);
+ final NodeState nodeState = new NodeState(NodeType.STORAGE, state);
+ nodeState.setDescription("mockdesc");
+ NodeInfo nodeInfo = cluster.getNodeInfo(node);
+ nodeInfo.setWantedState(nodeState);
+
+ generator.proposeNewNodeState(nodeInfo, nodeState);
+
+ }
+
+ public void disableAutoClusterTakedown() {
+ generator.setMinNodesUp(0, 0, 0.0, 0.0);
+ }
+
+ public void disableTransientMaintenanceModeOnDown() {
+ Map<NodeType, Integer> maxTransitionTime = new TreeMap<>();
+ maxTransitionTime.put(NodeType.DISTRIBUTOR, 0);
+ maxTransitionTime.put(NodeType.STORAGE, 0);
+ generator.setMaxTransitionTime(maxTransitionTime);
+ }
+
+ public void enableTransientMaintenanceModeOnDown(final int transitionTime) {
+ Map<NodeType, Integer> maxTransitionTime = new TreeMap<>();
+ maxTransitionTime.put(NodeType.DISTRIBUTOR, transitionTime);
+ maxTransitionTime.put(NodeType.STORAGE, transitionTime);
+ generator.setMaxTransitionTime(maxTransitionTime);
+ }
+
+ public String generatedClusterState() {
+ return generator.getClusterState().toString();
+ }
+
+ public String verboseGeneratedClusterState() { return generator.getClusterState().toString(true); }
+
+ public static ClusterFixture forFlatCluster(int nodeCount) {
+ Collection<ConfiguredNode> nodes = DistributionBuilder.buildConfiguredNodes(nodeCount);
+
+ Distribution distribution = DistributionBuilder.forFlatCluster(nodeCount);
+ ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0);
+
+ return new ClusterFixture(cluster, distribution);
+ }
+
+ public static ClusterFixture forHierarchicCluster(DistributionBuilder.GroupBuilder root) {
+ List<ConfiguredNode> nodes = DistributionBuilder.buildConfiguredNodes(root.totalNodeCount());
+ Distribution distribution = DistributionBuilder.forHierarchicCluster(root);
+ ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0);
+
+ return new ClusterFixture(cluster, distribution);
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBuilder.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBuilder.java
new file mode 100644
index 00000000000..ebd87d08403
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBuilder.java
@@ -0,0 +1,107 @@
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vespa.config.content.StorDistributionConfig;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+public class DistributionBuilder {
+ // TODO support nested groups
+ public static class GroupBuilder {
+ final int groupCount;
+ public List<Integer> groupsWithNodeCount;
+
+ public GroupBuilder(int groupCount) {
+ this.groupCount = groupCount;
+ }
+
+ public GroupBuilder(int... nodeCounts) {
+ this.groupCount = nodeCounts.length;
+ this.groupsWithNodeCount = IntStream.of(nodeCounts).boxed()
+ .collect(Collectors.toList());
+ }
+
+ public GroupBuilder eachWithNodeCount(int nodeCount) {
+ groupsWithNodeCount = IntStream.range(0, groupCount)
+ .map(i -> nodeCount).boxed()
+ .collect(Collectors.toList());
+ return this;
+ }
+
+ public int totalNodeCount() {
+ return groupsWithNodeCount.stream().reduce(0, Integer::sum);
+ }
+
+ public String groupDistributionSpec() {
+ return IntStream.range(0, groupCount).mapToObj(i -> "1")
+ .collect(Collectors.joining("|")) + "|*";
+ }
+ }
+
+ public static GroupBuilder withGroups(int groups) {
+ return new GroupBuilder(groups);
+ }
+
+ public static GroupBuilder withGroupNodes(int... nodeCounts) {
+ return new GroupBuilder(nodeCounts);
+ }
+
+ public static List<ConfiguredNode> buildConfiguredNodes(int nodeCount) {
+ return IntStream.range(0, nodeCount)
+ .mapToObj(i -> new ConfiguredNode(i, false))
+ .collect(Collectors.toList());
+ }
+
+ private static StorDistributionConfig.Group.Nodes.Builder configuredNode(ConfiguredNode node) {
+ StorDistributionConfig.Group.Nodes.Builder builder = new StorDistributionConfig.Group.Nodes.Builder();
+ builder.index(node.index());
+ return builder;
+ }
+
+ private static StorDistributionConfig.Group.Builder configuredGroup(
+ String name, int index, Collection<ConfiguredNode> nodes) {
+ StorDistributionConfig.Group.Builder builder = new StorDistributionConfig.Group.Builder();
+ builder.name(name);
+ builder.index(Integer.toString(index));
+ nodes.forEach(n -> builder.nodes(configuredNode(n)));
+ return builder;
+ }
+
+ public static Distribution forFlatCluster(int nodeCount) {
+ Collection<ConfiguredNode> nodes = buildConfiguredNodes(nodeCount);
+
+ StorDistributionConfig.Builder configBuilder = new StorDistributionConfig.Builder();
+ configBuilder.redundancy(2);
+ configBuilder.group(configuredGroup("bar", 0, nodes));
+
+ return new Distribution(new StorDistributionConfig(configBuilder));
+ }
+
+ public static Distribution forHierarchicCluster(GroupBuilder root) {
+ List<ConfiguredNode> nodes = buildConfiguredNodes(root.totalNodeCount());
+
+ StorDistributionConfig.Builder configBuilder = new StorDistributionConfig.Builder();
+ configBuilder.redundancy(2);
+
+ StorDistributionConfig.Group.Builder rootBuilder = new StorDistributionConfig.Group.Builder();
+ rootBuilder.name("invalid");
+ rootBuilder.index("invalid");
+ rootBuilder.partitions(root.groupDistributionSpec());
+ configBuilder.group(rootBuilder);
+
+ int offset = 0;
+ for (int group = 0; group < root.groupsWithNodeCount.size(); ++group) {
+ int nodeCount = root.groupsWithNodeCount.get(group);
+ StorDistributionConfig.Group.Builder groupBuilder
+ = configuredGroup("group_" + (group + 1), group + 1, nodes.subList(offset, offset + nodeCount));
+ configBuilder.group(groupBuilder);
+ offset += nodeCount;
+ }
+
+ return new Distribution(new StorDistributionConfig(configBuilder));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
index 86248d2e1e3..f4b3e648f63 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
@@ -10,6 +10,7 @@ import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.state.ClusterState;
import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator;
@@ -232,90 +233,43 @@ public abstract class FleetControllerTest implements Waiter {
return nodes;
}
- public interface NodeModifier {
- void modify(NodeInfo node);
+ protected static Set<Integer> asIntSet(Integer... idx) {
+ return Arrays.asList(idx).stream().collect(Collectors.toSet());
}
- NodeModifier makeDefaultTestNodeModifier() {
- return new NodeModifier() {
- @Override
- public void modify(NodeInfo node) {
- if (node.isDistributor()) {
- if (node.getNodeIndex() == 13) {
- node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes + 2);
- }
- return;
- }
- double latency = 75;
- long count = 1000;
- if (node.getNodeIndex() == 4) {
- latency = 300;
- count = 500;
- } else if (node.getNodeIndex() == 7) {
- latency = 120;
- count = 800;
- } else if (node.getNodeIndex() == 21) {
- latency = 2000;
- count = 600;
- } else if (node.getNodeIndex() == 25) {
- node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes + 1);
- } else if (node.getNodeIndex() == 26) {
- node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes);
- }
- String hostInfoString = generateHostInfo(latency, count);
- node.setHostInfo(HostInfo.createHostInfo(hostInfoString));
- }
- };
+ protected static Set<ConfiguredNode> asConfiguredNodes(Set<Integer> indices) {
+ return indices.stream().map(idx -> new ConfiguredNode(idx, false)).collect(Collectors.toSet());
}
- NodeModifier makeStdDevTestNodeModifier() {
- return new NodeModifier() {
- double[] latencies = new double[] { 30, 300, 60, 270 };
- int counter = 0;
-
+ protected void waitForStateExcludingNodeSubset(String expectedState, Set<Integer> excludedNodes) throws Exception {
+ // Due to the implementation details of the test base, this.waitForState() will always
+ // wait until all nodes added in the test have received the latest cluster state. Since we
+ // want to entirely ignore node #6, it won't get a cluster state at all and the test will
+ // fail unless otherwise handled. We thus use a custom waiter which filters out nodes with
+ // the sneaky index (storage and distributors with same index are treated as different nodes
+ // in this context).
+ Waiter subsetWaiter = new Waiter.Impl(new DataRetriever() {
@Override
- public void modify(NodeInfo node) {
- if (node.isDistributor()) {
- return;
- }
- String hostInfo = generateHostInfo(latencies[counter++ % latencies.length], 1500);
- node.setHostInfo(HostInfo.createHostInfo(hostInfo));
+ public Object getMonitor() { return timer; }
+ @Override
+ public FleetController getFleetController() { return fleetController; }
+ @Override
+ public List<DummyVdsNode> getDummyNodes() {
+ return nodes.stream()
+ .filter(n -> !excludedNodes.contains(n.getNode().getIndex()))
+ .collect(Collectors.toList());
}
- };
- }
-
- protected void setUpSlowDiskCluster(NodeModifier callback) throws Exception {
- int nodeCount = 31;
- FleetControllerOptions options = new FleetControllerOptions("mycluster");
- // TODO: multiple groups!
- options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, nodeCount)));
- setUpFleetController(true, options);
- setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeCount);
- waitForStableSystem(nodeCount);
- // Set one node as not being up. It should not contribute to the overall
- // latency or operation metrics, nor should its disks be included.
- nodes.get(2*13).disconnectAsShutdown();
- nodes.get(2*21+1).disconnectAsShutdown();
- waiter.waitForState("version:\\d+ distributor:31 .13.s:d storage:31 .21.s:m");
-
- for (NodeInfo node : fleetController.getCluster().getNodeInfo()) {
- callback.modify(node);
- }
+ @Override
+ public int getTimeoutMS() { return timeoutMS; }
+ });
+ subsetWaiter.waitForState(expectedState);
}
- protected void setUpSimpleCluster(int nodeCount) throws Exception {
- FleetControllerOptions options = new FleetControllerOptions("mycluster");
- // TODO: multiple groups!
- options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, nodeCount)));
- setUpFleetController(true, options);
- setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeCount);
- waitForStableSystem(nodeCount);
- waiter.waitForState("version:\\d+ distributor:" + nodeCount + " storage:" + nodeCount);
-
- NodeModifier callback = makeDefaultTestNodeModifier();
- for (NodeInfo node : fleetController.getCluster().getNodeInfo()) {
- callback.modify(node);
- }
+ protected static Map<NodeType, Integer> transitionTimes(int milliseconds) {
+ Map<NodeType, Integer> maxTransitionTime = new TreeMap<>();
+ maxTransitionTime.put(NodeType.DISTRIBUTOR, milliseconds);
+ maxTransitionTime.put(NodeType.STORAGE, milliseconds);
+ return maxTransitionTime;
}
protected void tearDownSystem() throws Exception {
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java
new file mode 100644
index 00000000000..64bea57c6ad
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java
@@ -0,0 +1,100 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.NodeType;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertFalse;
+
+public class GroupAutoTakedownLiveConfigTest extends FleetControllerTest {
+
+ private long mockConfigGeneration = 1;
+
+
+ private static FleetControllerOptions createOptions(
+ DistributionBuilder.GroupBuilder groupBuilder, double minNodeRatio)
+ {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(DistributionBuilder.forHierarchicCluster(groupBuilder));
+ options.nodes = DistributionBuilder.buildConfiguredNodes(groupBuilder.totalNodeCount())
+ .stream().collect(Collectors.toSet());
+ options.minNodeRatioPerGroup = minNodeRatio;
+ options.maxTransitionTime = transitionTimes(0);
+ return options;
+ }
+
+ private void updateConfigLive(FleetControllerOptions newOptions) {
+ ++mockConfigGeneration;
+ this.fleetController.updateOptions(newOptions, mockConfigGeneration);
+ }
+
+ private void reconfigureWithMinNodeRatio(double minNodeRatio) {
+ FleetControllerOptions newOptions = this.options.clone();
+ newOptions.minNodeRatioPerGroup = minNodeRatio;
+ updateConfigLive(newOptions);
+ }
+
+ private void reconfigureWithDistribution(DistributionBuilder.GroupBuilder groupBuilder) {
+ FleetControllerOptions newOptions = this.options.clone();
+ newOptions.nodes = DistributionBuilder.buildConfiguredNodes(groupBuilder.totalNodeCount())
+ .stream().collect(Collectors.toSet());
+ newOptions.storageDistribution = DistributionBuilder.forHierarchicCluster(groupBuilder);
+ updateConfigLive(newOptions);
+ }
+
+ private void setUp3x3ClusterWithMinNodeRatio(double minNodeRatio) throws Exception {
+ FleetControllerOptions options = createOptions(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3),
+ minNodeRatio);
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, 9);
+ waitForState("version:\\d+ distributor:9 storage:9");
+ }
+
+ private void takeDownContentNode(int index) {
+ // nodes list contains both distributors and storage nodes, with distributors
+ // in even positions and storage nodes in odd positions.
+ final int arrayIndex = index*2 + 1;
+ assertFalse(nodes.get(arrayIndex).isDistributor());
+ nodes.get(arrayIndex).disconnect();
+ }
+
+ @Test
+ public void bootstrap_min_ratio_option_is_propagated_to_group_availability_logic() throws Exception {
+ setUp3x3ClusterWithMinNodeRatio(0.67);
+ takeDownContentNode(0);
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .0.s:d .1.s:d .2.s:d", asIntSet(0));
+ }
+
+ @Test
+ public void min_ratio_live_reconfig_immediately_takes_effect() throws Exception {
+ // Initially, arbitrarily many nodes may be down in a group.
+ setUp3x3ClusterWithMinNodeRatio(0.0);
+ takeDownContentNode(3);
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d", asIntSet(3));
+
+ reconfigureWithMinNodeRatio(0.67);
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d .4.s:d .5.s:d", asIntSet(3));
+
+ reconfigureWithMinNodeRatio(0.0);
+ // Aaaand back up again!
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d", asIntSet(3));
+ }
+
+ @Test
+ public void live_distribution_config_changes_trigger_cluster_state_change() throws Exception {
+ setUp3x3ClusterWithMinNodeRatio(0.65);
+ takeDownContentNode(6);
+
+ // Not enough nodes down to trigger group take-down yet
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .6.s:d", asIntSet(6));
+ // Removing a node from the same group as node 6 will dip it under the configured threshold,
+ // taking down the entire group. In this case we configure out node 8.
+ reconfigureWithDistribution(DistributionBuilder.withGroupNodes(3, 3, 2));
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:8 storage:6", asIntSet(6, 8));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java
new file mode 100644
index 00000000000..93e34b1f772
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java
@@ -0,0 +1,370 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.state.DiskState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener;
+import org.junit.Test;
+import org.mockito.ArgumentMatcher;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.hamcrest.core.AllOf.allOf;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.argThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+public class GroupAutoTakedownTest {
+
+ private static ClusterFixture createFixtureForAllUpFlatCluster(int nodeCount, double minNodeRatioPerGroup) {
+ ClusterFixture fixture = ClusterFixture.forFlatCluster(nodeCount);
+ setSharedFixtureOptions(fixture, minNodeRatioPerGroup);
+ return fixture;
+ }
+
+ private static ClusterFixture createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.GroupBuilder root, double minNodeRatioPerGroup)
+ {
+ ClusterFixture fixture = ClusterFixture.forHierarchicCluster(root);
+ setSharedFixtureOptions(fixture, minNodeRatioPerGroup);
+ return fixture;
+ }
+
+ private static void setSharedFixtureOptions(ClusterFixture fixture, double minNodeRatioPerGroup) {
+ fixture.generator.setMinNodeRatioPerGroup(minNodeRatioPerGroup);
+ fixture.disableTransientMaintenanceModeOnDown();
+ fixture.disableAutoClusterTakedown();
+ fixture.bringEntireClusterUp();
+ }
+
+ private String stateAfterStorageTransition(ClusterFixture fixture, final int index, final State state) {
+ transitionStoreNodeToState(fixture, index, state);
+ return fixture.generatedClusterState();
+ }
+
+ private String verboseStateAfterStorageTransition(ClusterFixture fixture, final int index, final State state) {
+ transitionStoreNodeToState(fixture, index, state);
+ return fixture.verboseGeneratedClusterState();
+ }
+
+ private void transitionStoreNodeToState(ClusterFixture fixture, int index, State state) {
+ fixture.reportStorageNodeState(index, state);
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+ }
+
+ /**
+ * Use a per-group availability requirement ratio of 99%. Ensure that taking down a single
+ * node out of 5 in a flat hierarchy does not take down the cluster, i.e. the config should
+ * not apply to a flat structure.
+ */
+ @Test
+ public void config_does_not_apply_to_flat_hierarchy_clusters() {
+ ClusterFixture fixture = createFixtureForAllUpFlatCluster(5, 0.99);
+
+ SystemStateListener listener = mock(SystemStateListener.class);
+ // First invocation; generates initial state and clears "new state" flag
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+ assertEquals("version:1 distributor:5 storage:5", fixture.generatedClusterState());
+
+ assertEquals("version:2 distributor:5 storage:5 .1.s:d",
+ stateAfterStorageTransition(fixture, 1, State.DOWN));
+ }
+
+ @Test
+ public void group_node_down_edge_implicitly_marks_down_rest_of_nodes_in_group() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+ assertEquals("version:1 distributor:6 storage:6", fixture.generatedClusterState());
+
+ // Same group as node 4
+ assertEquals("version:2 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ // Same group as node 1
+ assertEquals("version:3 distributor:6 storage:4 .0.s:d .1.s:d",
+ stateAfterStorageTransition(fixture, 0, State.DOWN));
+ }
+
+ @Test
+ public void restored_group_node_availability_takes_group_back_up_automatically() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ // Group #2 -> down
+ assertEquals("version:1 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+
+ // Group #2 -> back up again
+ assertEquals("version:2 distributor:6 storage:6",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+ }
+
+ @Test
+ public void no_op_for_downed_nodes_in_already_downed_group() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ assertEquals("version:1 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+
+ // 4, 5 in same group; this should not cause a new state since it's already implicitly down
+ fixture.reportStorageNodeState(4, State.DOWN);
+
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertFalse(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+
+ assertEquals("version:1 distributor:6 storage:4", fixture.generatedClusterState());
+ }
+
+ @Test
+ public void verbose_node_state_description_updated_for_implicitly_downed_nodes() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.75);
+
+ // Nodes 6 and 7 are taken down implicitly and should have a message reflecting this.
+ // Node 8 is taken down by the fixture and gets a fixture-assigned message that
+ // we should _not_ lose/overwrite.
+ assertEquals("version:1 distributor:9 storage:9 .6.s:d " +
+ ".6.m:group\\x20node\\x20availability\\x20below\\x20configured\\x20threshold " +
+ ".7.s:d " +
+ ".7.m:group\\x20node\\x20availability\\x20below\\x20configured\\x20threshold " +
+ ".8.s:d .8.m:mockdesc",
+ verboseStateAfterStorageTransition(fixture, 8, State.DOWN));
+ }
+
+ @Test
+ public void legacy_cluster_wide_availabilty_ratio_is_computed_after_group_takedowns() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+ fixture.generator.setMinNodesUp(5, 5, 0.51, 0.51);
+
+ // Taking down a node in a group forces the entire group down, which leaves us with
+ // only 4 content nodes (vs. minimum of 5 as specified above). The entire cluster
+ // should be marked as down in this case.
+ assertEquals("version:1 cluster:d distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ }
+
+ @Test
+ public void maintenance_wanted_state_not_overwritten() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.99);
+
+ NodeInfo nodeInfo = fixture.cluster.getNodeInfo(new Node(NodeType.STORAGE, 5));
+ fixture.generator.proposeNewNodeState(nodeInfo, new NodeState(NodeType.STORAGE, State.MAINTENANCE));
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+
+ // Maintenance not counted as down, so group still up
+ assertEquals("version:1 distributor:9 storage:9 .5.s:m", fixture.generatedClusterState());
+
+ // Group goes down, but maintenance node should still be in maintenance
+ assertEquals("version:2 distributor:9 storage:9 .3.s:d .4.s:d .5.s:m",
+ stateAfterStorageTransition(fixture, 4, State.DOWN));
+ }
+
+ @Test
+ public void transient_maintenance_mode_on_down_edge_does_not_take_down_group() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.99);
+ fixture.enableTransientMaintenanceModeOnDown(1000);
+
+ // Our timers are mocked, so taking down node 4 will deterministically transition to
+ // a transient maintenance mode. Group should not be taken down here.
+ assertEquals("version:1 distributor:9 storage:9 .4.s:m",
+ stateAfterStorageTransition(fixture, 4, State.DOWN));
+
+ // However, once grace period expires the group should be taken down.
+ fixture.timer.advanceTime(1001);
+ NodeStateOrHostInfoChangeHandler changeListener = mock(NodeStateOrHostInfoChangeHandler.class);
+ fixture.generator.watchTimers(fixture.cluster, changeListener);
+ SystemStateListener stateListener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, stateListener));
+
+ assertEquals("version:2 distributor:9 storage:9 .3.s:d .4.s:d .5.s:d", fixture.generatedClusterState());
+ }
+
+ private static class NodeEventWithDescription extends ArgumentMatcher<NodeEvent> {
+ private final String expected;
+
+ NodeEventWithDescription(String expected) {
+ this.expected = expected;
+ }
+
+ @Override
+ public boolean matches(Object o) {
+ return expected.equals(((NodeEvent)o).getDescription());
+ }
+ }
+
+ private static NodeEventWithDescription nodeEventWithDescription(String description) {
+ return new NodeEventWithDescription(description);
+ }
+
+ private static class EventForNode extends ArgumentMatcher<NodeEvent> {
+ private final Node expected;
+
+ EventForNode(Node expected) {
+ this.expected = expected;
+ }
+
+ @Override
+ public boolean matches(Object o) {
+ return ((NodeEvent)o).getNode().getNode().equals(expected);
+ }
+ }
+
+ private static EventForNode eventForNode(Node expected) {
+ return new EventForNode(expected);
+ }
+
+ private static Node contentNode(int index) {
+ return new Node(NodeType.STORAGE, index);
+ }
+
+ @Test
+ public void taking_down_node_adds_node_specific_event() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ assertEquals("version:1 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+
+ verify(fixture.eventLog).addNodeOnlyEvent(argThat(allOf(
+ nodeEventWithDescription("Setting node down as the total availability of its group is " +
+ "below the configured threshold"),
+ eventForNode(contentNode(4)))), any());
+ }
+
+ @Test
+ public void bringing_node_back_up_adds_node_specific_event() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ assertEquals("version:1 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ assertEquals("version:2 distributor:6 storage:6",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+
+ verify(fixture.eventLog).addNodeOnlyEvent(argThat(allOf(
+ nodeEventWithDescription("Group availability restored; taking node back up"),
+ eventForNode(contentNode(4)))), any());
+ }
+
+ @Test
+ public void wanted_state_retired_implicitly_down_node_transitioned_it_to_retired_mode_immediately() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.99);
+
+ assertEquals("version:1 distributor:9 storage:6",
+ stateAfterStorageTransition(fixture, 6, State.DOWN));
+ // Node 7 is implicitly down. Mark wanted state as retired. It should now be Retired
+ // but not Down.
+ fixture.proposeStorageNodeWantedState(7, State.RETIRED);
+
+ SystemStateListener stateListener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, stateListener));
+ assertEquals("version:2 distributor:9 storage:8 .6.s:d .7.s:r", fixture.generatedClusterState());
+ }
+
+ @Test
+ public void downed_config_retired_node_transitions_back_to_retired_on_up_edge() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.49);
+
+ assertEquals("version:1 distributor:6 storage:6 .4.s:d",
+ stateAfterStorageTransition(fixture, 4, State.DOWN));
+ assertEquals("version:2 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+
+ // Node 5 gets config-retired under our feet.
+ Set<ConfiguredNode> nodes = new HashSet<>(fixture.cluster.clusterInfo().getConfiguredNodes().values());
+ nodes.remove(new ConfiguredNode(5, false));
+ nodes.add(new ConfiguredNode(5, true));
+ // TODO this should ideally also set the retired flag in the distribution
+ // config, but only the ConfiguredNodes are actually looked at currently.
+ fixture.cluster.setNodes(nodes);
+ fixture.generator.setNodes(fixture.cluster.clusterInfo());
+
+ assertEquals("version:3 distributor:6 storage:6 .4.s:d .5.s:r",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+ }
+
+ @Test
+ public void init_progress_is_preserved_across_group_down_up_edge() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ final Node node = new Node(NodeType.STORAGE, 4);
+ final NodeState newState = new NodeState(NodeType.STORAGE, State.INITIALIZING);
+ newState.setInitProgress(0.5);
+
+ fixture.reportStorageNodeState(4, newState);
+ SystemStateListener stateListener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, stateListener));
+
+ assertEquals("version:1 distributor:6 storage:6 .4.s:i .4.i:0.5", fixture.generatedClusterState());
+
+ assertEquals("version:2 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ assertEquals("version:3 distributor:6 storage:6 .4.s:i .4.i:0.5",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+ }
+
+ @Test
+ public void disk_states_are_preserved_across_group_down_up_edge() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ final Node node = new Node(NodeType.STORAGE, 4);
+ final NodeState newState = new NodeState(NodeType.STORAGE, State.UP);
+ newState.setDiskCount(7);
+ newState.setDiskState(5, new DiskState(State.DOWN));
+
+ fixture.reportStorageNodeState(4, newState);
+ SystemStateListener stateListener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, stateListener));
+
+ assertEquals("version:1 distributor:6 storage:6 .4.d:7 .4.d.5.s:d", fixture.generatedClusterState());
+
+ assertEquals("version:2 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ assertEquals("version:3 distributor:6 storage:6 .4.d:7 .4.d.5.s:d",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+ }
+
+ @Test
+ public void down_wanted_state_is_preserved_across_group_down_up_edge() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.60);
+
+ NodeInfo nodeInfo = fixture.cluster.getNodeInfo(new Node(NodeType.STORAGE, 5));
+ nodeInfo.setWantedState(new NodeState(NodeType.STORAGE, State.DOWN).setDescription("borkbork"));
+ fixture.generator.proposeNewNodeState(nodeInfo, nodeInfo.getWantedState());
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+
+ assertEquals("version:1 distributor:9 storage:9 .5.s:d .5.m:borkbork", fixture.verboseGeneratedClusterState());
+
+ assertEquals("version:2 distributor:9 storage:9 " +
+ ".3.s:d .3.m:group\\x20node\\x20availability\\x20below\\x20configured\\x20threshold " +
+ ".4.s:d .4.m:mockdesc .5.s:d .5.m:borkbork",
+ verboseStateAfterStorageTransition(fixture, 4, State.DOWN));
+ assertEquals("version:3 distributor:9 storage:9 .5.s:d .5.m:borkbork",
+ verboseStateAfterStorageTransition(fixture, 4, State.UP));
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculatorTest.java
new file mode 100644
index 00000000000..7d44afda68f
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculatorTest.java
@@ -0,0 +1,188 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.ClusterState;
+import org.junit.Test;
+
+import java.text.ParseException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+public class GroupAvailabilityCalculatorTest {
+
+ private static ClusterState clusterState(String state) {
+ try {
+ return new ClusterState(state);
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static GroupAvailabilityCalculator calcForFlatCluster(
+ final int nodeCount,
+ final double minNodeRatioPerGroup)
+ {
+ return GroupAvailabilityCalculator.builder()
+ .withDistribution(DistributionBuilder.forFlatCluster(nodeCount))
+ .withMinNodeRatioPerGroup(minNodeRatioPerGroup)
+ .build();
+ }
+
+ private static GroupAvailabilityCalculator calcForHierarchicCluster(
+ DistributionBuilder.GroupBuilder rootGroup,
+ final double minNodeRatioPerGroup)
+ {
+ return GroupAvailabilityCalculator.builder()
+ .withDistribution(DistributionBuilder.forHierarchicCluster(rootGroup))
+ .withMinNodeRatioPerGroup(minNodeRatioPerGroup)
+ .build();
+ }
+
+ private static Set<Integer> indices(Integer... idx) {
+ Set<Integer> indices = new HashSet<>();
+ Collections.addAll(indices, idx);
+ return indices;
+ }
+
+ private static Set<Integer> emptySet() { return indices(); }
+
+ @Test
+ public void flat_cluster_does_not_implicitly_take_down_nodes() {
+ GroupAvailabilityCalculator calc = calcForFlatCluster(5, 0.99);
+
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:5 storage:5 .1.s:d .2.s:d")), equalTo(emptySet()));
+
+ }
+
+ @Test
+ public void group_node_down_edge_implicitly_marks_down_rest_of_nodes_in_group() {
+ // 3 groups of 2 nodes, take down node #4 (1st node in last group). Since we require
+ // at least 51% of group capacity to be available, implicitly take down the last group
+ // entirely.
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:6 .4.s:d")), equalTo(indices(5)));
+ }
+
+ // Setting 50% as min ratio in a group with 2 nodes should let group be up if
+ // one node goes down.
+ @Test
+ public void min_ratio_per_group_is_closed_interval() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.50);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:6 .4.s:d")), equalTo(emptySet()));
+ }
+
+ @Test
+ public void retired_node_is_counted_as_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:6 .1.s:r")), equalTo(indices(0)));
+ }
+
+ @Test
+ public void initializing_node_not_counted_as_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:6 .4.s:i")), equalTo(emptySet()));
+ }
+
+ @Test
+ public void maintenance_node_not_counted_as_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:6 .4.s:m")), equalTo(emptySet()));
+ }
+
+ @Test
+ public void existing_maintenance_node_not_implicitly_downed_when_group_taken_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:9 storage:9 .4.s:m .5.s:d")), equalTo(indices(3))); // _not_ {3, 4}
+ }
+
+ @Test
+ public void existing_retired_node_not_implicitly_downed_when_group_taken_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:9 storage:9 .4.s:r .5.s:d")), equalTo(indices(3))); // _not_ {3, 4}
+ }
+
+ @Test
+ public void down_to_down_edge_keeps_group_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(2).eachWithNodeCount(4), 0.76);
+
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8 .1.s:d")), equalTo(indices(0, 2, 3)));
+
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8 .1.s:d .2.s:d")), equalTo(indices(0, 3)));
+ }
+
+ // Cluster state representations "prune" downed nodes at the end of the state,
+ // causing "storage:6 .5.s:d" to be reduced to "storage:5". This still implies a
+ // node is down according to the distribution config and must be handled as such.
+ @Test
+ public void implicitly_downed_node_at_state_end_is_counted_as_explicitly_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:5")), equalTo(indices(4)));
+ }
+
+ @Test
+ public void non_uniform_group_sizes_are_supported() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroupNodes(1, 2, 3, 4), 0.67);
+
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:10")), equalTo(emptySet()));
+ // Group 0 has only 1 node and should not cause any other nodes to be taken down
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:10 .0.s:d")), equalTo(emptySet()));
+ // Too little availability in group 1
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:10 .1.s:d")), equalTo(indices(2)));
+ // Too little availability in group 2
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:10 .3.s:d")), equalTo(indices(4, 5)));
+ // Group 4 has 75% availability (>= 67%), so no auto take-down there
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:10 .7.s:d")), equalTo(emptySet()));
+ // Drop group 4 availability to 50%; it should now be taken down entirely
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:9 .7.s:d")), equalTo(indices(6, 8)));
+ }
+
+ @Test
+ public void min_ratio_of_zero_never_takes_down_groups_implicitly() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(2).eachWithNodeCount(4), 0.0);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8")), equalTo(emptySet()));
+ // 1 down in each group
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8 .0.s:d .4.s:d")), equalTo(emptySet()));
+ // 2 down in each group
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8 .0.s:d .1.s:d .4.s:d .5.s:d")), equalTo(emptySet()));
+ // 3 down in each group
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8 .0.s:d .1.s:d .2.s:d .4.s:d .5.s:d .6.s:d")), equalTo(emptySet()));
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
index 10305de116a..ef4c80a2256 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
@@ -18,38 +18,6 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest
private final Set<Integer> nodeIndices = asIntSet(0, 1, 2, 3);
private final int foreignNode = 6;
- private void waitForStateExcludingNodeSubset(String expectedState, Set<Integer> excludedNodes) throws Exception {
- // Due to the implementation details of the test base, this.waitForState() will always
- // wait until all nodes added in the test have received the latest cluster state. Since we
- // want to entirely ignore node #6, it won't get a cluster state at all and the test will
- // fail unless otherwise handled. We thus use a custom waiter which filters out nodes with
- // the sneaky index (storage and distributors with same index are treated as different nodes
- // in this context).
- Waiter subsetWaiter = new Waiter.Impl(new DataRetriever() {
- @Override
- public Object getMonitor() { return timer; }
- @Override
- public FleetController getFleetController() { return fleetController; }
- @Override
- public List<DummyVdsNode> getDummyNodes() {
- return nodes.stream()
- .filter(n -> !excludedNodes.contains(n.getNode().getIndex()))
- .collect(Collectors.toList());
- }
- @Override
- public int getTimeoutMS() { return timeoutMS; }
- });
- subsetWaiter.waitForState(expectedState);
- }
-
- private static Set<Integer> asIntSet(Integer... idx) {
- return Arrays.asList(idx).stream().collect(Collectors.toSet());
- }
-
- private static Set<ConfiguredNode> asConfiguredNodes(Set<Integer> indices) {
- return indices.stream().map(idx -> new ConfiguredNode(idx, false)).collect(Collectors.toSet());
- }
-
private void setUpClusterWithForeignNode(Set<Integer> validIndices, final int foreignNodeIndex) throws Exception {
final Set<ConfiguredNode> configuredNodes = asConfiguredNodes(validIndices);
FleetControllerOptions options = optionsForConfiguredNodes(configuredNodes);
@@ -63,6 +31,7 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest
FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
options.nodeStateRequestTimeoutMS = 10000 * 60 * 1000;
+ options.maxTransitionTime = transitionTimes(0);
return options;
}
@@ -108,10 +77,14 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest
assertTrue(configuredNodes.remove(new ConfiguredNode(0, true)));
fleetController.updateOptions(options, 0);
- // The previously retired node should now be marked as done, as it no longer
+ // The previously retired node should now be marked as down, as it no longer
// exists from the point of view of the content cluster. We have to use a subset
// state waiter, as the controller will not send the new state to node 0.
waitForStateExcludingNodeSubset("version:\\d+ distributor:4 .0.s:d storage:4 .0.s:d", asIntSet(0));
+
+ // Ensure it remains down for subsequent cluster state versions as well.
+ nodes.get(3).disconnect();
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:4 .0.s:d storage:4 .0.s:d .1.s:d", asIntSet(0, 1));
}
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
index cb5cee70486..5b53e524102 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -4,15 +4,16 @@ package com.yahoo.vespa.clustercontroller.core;
import com.yahoo.vdslib.distribution.ConfiguredNode;
import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.distribution.Group;
+import com.yahoo.vdslib.state.ClusterState;
import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
-import org.junit.Before;
import org.junit.Test;
+import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
@@ -29,7 +30,7 @@ public class NodeStateChangeCheckerTest {
private static final int minStorageNodesUp = 3;
private static final int requiredRedundancy = 4;
- private static final int currentClusterState = 2;
+ private static final int currentClusterStateVersion = 2;
private static final double minRatioOfStorageNodesUp = 0.9;
private static final Node nodeDistributor = new Node(NodeType.DISTRIBUTOR, 1);
@@ -42,6 +43,18 @@ public class NodeStateChangeCheckerTest {
return new NodeState(NodeType.STORAGE, state).setDescription(description);
}
+ private static ClusterState clusterState(String state) {
+ try {
+ return new ClusterState(state);
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static ClusterState defaultAllUpClusterState() {
+ return clusterState(String.format("version:%d distributor:4 storage:4", currentClusterStateVersion));
+ }
+
private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) {
return new NodeStateChangeChecker(minStorageNodesUp, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
}
@@ -93,12 +106,22 @@ public class NodeStateChangeCheckerTest {
"}\n";
}
+ private void markAllNodesAsReportingStateUp(ContentCluster cluster) {
+ final ClusterInfo clusterInfo = cluster.clusterInfo();
+ final int configuredNodeCount = cluster.clusterInfo().getConfiguredNodes().size();
+ for (int i = 0; i < configuredNodeCount; i++) {
+ clusterInfo.getDistributorNodeInfo(i).setReportedState(new NodeState(NodeType.DISTRIBUTOR, State.UP), 0);
+ clusterInfo.getDistributorNodeInfo(i).setHostInfo(HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6)));
+ clusterInfo.getStorageNodeInfo(i).setReportedState(new NodeState(NodeType.STORAGE, State.UP), 0);
+ }
+ }
+
@Test
public void testCanUpgradeForce() {
NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(createNodes(1)));
NodeState newState = new NodeState(NodeType.STORAGE, State.INITIALIZING);
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeDistributor, currentClusterState, SetUnitStateRequest.Condition.FORCE,
+ nodeDistributor, defaultAllUpClusterState(), SetUnitStateRequest.Condition.FORCE,
upNodeState, newState);
assertTrue(result.settingWantedStateIsAllowed());
assertTrue(!result.wantedStateAlreadySet());
@@ -108,7 +131,7 @@ public class NodeStateChangeCheckerTest {
public void testSafeSetStateDistributors() {
NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(createNodes(1)));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeDistributor, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeDistributor, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
upNodeState, maintenanceNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -122,7 +145,7 @@ public class NodeStateChangeCheckerTest {
NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker(
5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
upNodeState, maintenanceNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -143,12 +166,32 @@ public class NodeStateChangeCheckerTest {
// Not setting nodes up -> all are down
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
maintenanceNodeState, upNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
}
+ // A node may be reported as Up but have a generated state of Down if it's part of
+ // nodes taken down implicitly due to a group having too low node availability.
+ @Test
+ public void testSetUpSucceedsIfReportedIsUpButGeneratedIsDown() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+
+ markAllNodesAsReportingStateUp(cluster);
+
+ ClusterState stateWithNodeDown = clusterState(String.format(
+ "version:%d distributor:4 storage:4 .%d.s:d",
+ currentClusterStateVersion, nodeStorage.getIndex()));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, stateWithNodeDown, SetUnitStateRequest.Condition.SAFE,
+ maintenanceNodeState, upNodeState);
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
@Test
public void testCannotSetUpIfUnknownOldStateAndReportedIsDown() {
ContentCluster cluster = createCluster(createNodes(4));
@@ -156,7 +199,7 @@ public class NodeStateChangeCheckerTest {
// Not setting nodes up -> all are down
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
new NodeState(NodeType.STORAGE, State.DOWN), upNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -170,7 +213,7 @@ public class NodeStateChangeCheckerTest {
setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6)));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
upNodeState, maintenanceNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -185,7 +228,7 @@ public class NodeStateChangeCheckerTest {
setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6)));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- new Node(NodeType.STORAGE, 3), currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ new Node(NodeType.STORAGE, 3), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
upNodeState, maintenanceNodeState);
assertTrue(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -209,7 +252,7 @@ public class NodeStateChangeCheckerTest {
setAllNodesUp(cluster, HostInfo.createHostInfo(hostInfo));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- new Node(NodeType.STORAGE, 1), currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ new Node(NodeType.STORAGE, 1), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
upNodeState, maintenanceNodeState);
assertTrue(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -222,7 +265,7 @@ public class NodeStateChangeCheckerTest {
cluster.clusterInfo().getStorageNodeInfo(1).setReportedState(new NodeState(NodeType.STORAGE, State.UP), 0);
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
assertThat(result.getReason(), is("Distributor node (0) has not reported any cluster state version yet."));
@@ -235,7 +278,7 @@ public class NodeStateChangeCheckerTest {
NodeState currentNodeState = createNodeState(state, oldDescription);
NodeState newNodeState = createNodeState(state, newDescription);
return nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
currentNodeState, newNodeState);
}
@@ -280,13 +323,16 @@ public class NodeStateChangeCheckerTest {
cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(NodeType.STORAGE, state), 0);
}
+ ClusterState clusterState = defaultAllUpClusterState();
+
if (storageNodeIndex >= 0) { // TODO: Move this into the calling test
NodeState downNodeState = new NodeState(NodeType.STORAGE, State.DOWN);
cluster.clusterInfo().getStorageNodeInfo(storageNodeIndex).setReportedState(downNodeState, 4 /* time */);
+ clusterState.setNodeState(new Node(NodeType.STORAGE, storageNodeIndex), downNodeState);
}
return nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
+ nodeStorage, clusterState, SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
}
private void setAllNodesUp(ContentCluster cluster, HostInfo distributorHostInfo) {
@@ -339,6 +385,28 @@ public class NodeStateChangeCheckerTest {
assertThat(result.getReason(), containsString("Not enough storage nodes running"));
}
+ @Test
+ public void testNodeRatioRequirementConsidersGeneratedNodeStates() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+
+ markAllNodesAsReportingStateUp(cluster);
+
+ // Both minRatioOfStorageNodesUp and minStorageNodesUp imply that a single node being
+ // in state Down should halt the upgrade. This must also take into account the generated
+ // state, not just the reported state. In this case, all nodes are reported as being Up
+ // but one node has a generated state of Down.
+ ClusterState stateWithNodeDown = clusterState(String.format(
+ "version:%d distributor:4 storage:4 .3.s:d",
+ currentClusterStateVersion));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, stateWithNodeDown, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
private List<ConfiguredNode> createNodes(int count) {
List<ConfiguredNode> nodes = new ArrayList<>();
for (int i = 0; i < count; i++)
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java
index ab6185d2b56..35118933b42 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java
@@ -99,7 +99,7 @@ public class SystemStateGeneratorTest extends TestCase {
}
private void assertNewClusterStateReceived() {
- assertTrue(generator.notifyIfNewSystemState(systemStateListener));
+ assertTrue(generator.notifyIfNewSystemState(cluster, systemStateListener));
assertTrue(systemStateListener.toString(), systemStateListener.states.size() == 1);
systemStateListener.states.clear();
}
@@ -147,7 +147,7 @@ public class SystemStateGeneratorTest extends TestCase {
private void verifyClusterStateChanged(Node node, State state) {
log.info("Verifying cluster state has been updated for " + node + " to " + state);
- assertTrue(generator.notifyIfNewSystemState(systemStateListener));
+ assertTrue(generator.notifyIfNewSystemState(cluster, systemStateListener));
assertEquals(1, systemStateListener.states.size());
assertEquals(state, systemStateListener.states.get(0).getNodeState(node).getState());
systemStateListener.states.clear();