aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core/src/test/java/com/yahoo
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2016-06-17 10:41:33 +0200
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2016-06-17 10:41:33 +0200
commit916e5a5b8a4574bb2d878c5b07c97b1678df81b3 (patch)
treef88615323a5cf12cb08fb6a10977c9d74855dfda /clustercontroller-core/src/test/java/com/yahoo
parent4edde0fd2999b4343cff30deda8c28e64cef6be7 (diff)
Add configurable automatic group up/down feature based on node availability
Available under content cluster tuning tag; feature is currently disabled by default (need prod experience for this first). Also improves handling of nodes removed from config by ensuring these are taken out of the core working cluster state instead of just patched away before each state publish.
Diffstat (limited to 'clustercontroller-core/src/test/java/com/yahoo')
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java136
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBuilder.java107
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java106
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java100
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java370
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculatorTest.java188
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java39
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java94
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java4
9 files changed, 1020 insertions, 124 deletions
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java
new file mode 100644
index 00000000000..aca26000931
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java
@@ -0,0 +1,136 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import com.yahoo.vespa.clustercontroller.core.mocks.TestEventLog;
+import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter;
+import com.yahoo.vespa.config.content.StorDistributionConfig;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.mockito.Mockito.mock;
+
+class ClusterFixture {
+ public final ContentCluster cluster;
+ public final Distribution distribution;
+ public final FakeTimer timer;
+ public final EventLogInterface eventLog;
+ public final SystemStateGenerator generator;
+
+ public ClusterFixture(ContentCluster cluster, Distribution distribution) {
+ this.cluster = cluster;
+ this.distribution = distribution;
+ this.timer = new FakeTimer();
+ this.eventLog = mock(EventLogInterface.class);
+ this.generator = createGeneratorForFixtureCluster();
+ }
+
+ public SystemStateGenerator createGeneratorForFixtureCluster() {
+ final int controllerIndex = 0;
+ MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), controllerIndex);
+ SystemStateGenerator generator = new SystemStateGenerator(timer, eventLog, metricUpdater);
+ generator.setNodes(cluster.clusterInfo());
+ generator.setDistribution(distribution);
+ return generator;
+ }
+
+ public void bringEntireClusterUp() {
+ cluster.clusterInfo().getConfiguredNodes().forEach((idx, node) -> {
+ reportStorageNodeState(idx, State.UP);
+ reportDistributorNodeState(idx, State.UP);
+ });
+ }
+
+ public void reportStorageNodeState(final int index, State state) {
+ final Node node = new Node(NodeType.STORAGE, index);
+ final NodeState nodeState = new NodeState(NodeType.STORAGE, state);
+ nodeState.setDescription("mockdesc");
+ NodeStateOrHostInfoChangeHandler handler = mock(NodeStateOrHostInfoChangeHandler.class);
+ NodeInfo nodeInfo = cluster.getNodeInfo(node);
+
+ generator.handleNewReportedNodeState(nodeInfo, nodeState, handler);
+ nodeInfo.setReportedState(nodeState, timer.getCurrentTimeInMillis());
+ }
+
+ public void reportStorageNodeState(final int index, NodeState nodeState) {
+ final Node node = new Node(NodeType.STORAGE, index);
+ final NodeInfo nodeInfo = cluster.getNodeInfo(node);
+ final long mockTime = 1234;
+ NodeStateOrHostInfoChangeHandler changeListener = mock(NodeStateOrHostInfoChangeHandler.class);
+ generator.handleNewReportedNodeState(nodeInfo, nodeState, changeListener);
+ nodeInfo.setReportedState(nodeState, mockTime);
+ }
+
+ public void reportDistributorNodeState(final int index, State state) {
+ final Node node = new Node(NodeType.DISTRIBUTOR, index);
+ final NodeState nodeState = new NodeState(NodeType.DISTRIBUTOR, state);
+ NodeStateOrHostInfoChangeHandler handler = mock(NodeStateOrHostInfoChangeHandler.class);
+ NodeInfo nodeInfo = cluster.getNodeInfo(node);
+
+ generator.handleNewReportedNodeState(nodeInfo, nodeState, handler);
+ nodeInfo.setReportedState(nodeState, timer.getCurrentTimeInMillis());
+ }
+
+ public void proposeStorageNodeWantedState(final int index, State state) {
+ final Node node = new Node(NodeType.STORAGE, index);
+ final NodeState nodeState = new NodeState(NodeType.STORAGE, state);
+ nodeState.setDescription("mockdesc");
+ NodeInfo nodeInfo = cluster.getNodeInfo(node);
+ nodeInfo.setWantedState(nodeState);
+
+ generator.proposeNewNodeState(nodeInfo, nodeState);
+
+ }
+
+ public void disableAutoClusterTakedown() {
+ generator.setMinNodesUp(0, 0, 0.0, 0.0);
+ }
+
+ public void disableTransientMaintenanceModeOnDown() {
+ Map<NodeType, Integer> maxTransitionTime = new TreeMap<>();
+ maxTransitionTime.put(NodeType.DISTRIBUTOR, 0);
+ maxTransitionTime.put(NodeType.STORAGE, 0);
+ generator.setMaxTransitionTime(maxTransitionTime);
+ }
+
+ public void enableTransientMaintenanceModeOnDown(final int transitionTime) {
+ Map<NodeType, Integer> maxTransitionTime = new TreeMap<>();
+ maxTransitionTime.put(NodeType.DISTRIBUTOR, transitionTime);
+ maxTransitionTime.put(NodeType.STORAGE, transitionTime);
+ generator.setMaxTransitionTime(maxTransitionTime);
+ }
+
+ public String generatedClusterState() {
+ return generator.getClusterState().toString();
+ }
+
+ public String verboseGeneratedClusterState() { return generator.getClusterState().toString(true); }
+
+ public static ClusterFixture forFlatCluster(int nodeCount) {
+ Collection<ConfiguredNode> nodes = DistributionBuilder.buildConfiguredNodes(nodeCount);
+
+ Distribution distribution = DistributionBuilder.forFlatCluster(nodeCount);
+ ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0);
+
+ return new ClusterFixture(cluster, distribution);
+ }
+
+ public static ClusterFixture forHierarchicCluster(DistributionBuilder.GroupBuilder root) {
+ List<ConfiguredNode> nodes = DistributionBuilder.buildConfiguredNodes(root.totalNodeCount());
+ Distribution distribution = DistributionBuilder.forHierarchicCluster(root);
+ ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0);
+
+ return new ClusterFixture(cluster, distribution);
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBuilder.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBuilder.java
new file mode 100644
index 00000000000..ebd87d08403
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBuilder.java
@@ -0,0 +1,107 @@
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vespa.config.content.StorDistributionConfig;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+public class DistributionBuilder {
+ // TODO support nested groups
+ public static class GroupBuilder {
+ final int groupCount;
+ public List<Integer> groupsWithNodeCount;
+
+ public GroupBuilder(int groupCount) {
+ this.groupCount = groupCount;
+ }
+
+ public GroupBuilder(int... nodeCounts) {
+ this.groupCount = nodeCounts.length;
+ this.groupsWithNodeCount = IntStream.of(nodeCounts).boxed()
+ .collect(Collectors.toList());
+ }
+
+ public GroupBuilder eachWithNodeCount(int nodeCount) {
+ groupsWithNodeCount = IntStream.range(0, groupCount)
+ .map(i -> nodeCount).boxed()
+ .collect(Collectors.toList());
+ return this;
+ }
+
+ public int totalNodeCount() {
+ return groupsWithNodeCount.stream().reduce(0, Integer::sum);
+ }
+
+ public String groupDistributionSpec() {
+ return IntStream.range(0, groupCount).mapToObj(i -> "1")
+ .collect(Collectors.joining("|")) + "|*";
+ }
+ }
+
+ public static GroupBuilder withGroups(int groups) {
+ return new GroupBuilder(groups);
+ }
+
+ public static GroupBuilder withGroupNodes(int... nodeCounts) {
+ return new GroupBuilder(nodeCounts);
+ }
+
+ public static List<ConfiguredNode> buildConfiguredNodes(int nodeCount) {
+ return IntStream.range(0, nodeCount)
+ .mapToObj(i -> new ConfiguredNode(i, false))
+ .collect(Collectors.toList());
+ }
+
+ private static StorDistributionConfig.Group.Nodes.Builder configuredNode(ConfiguredNode node) {
+ StorDistributionConfig.Group.Nodes.Builder builder = new StorDistributionConfig.Group.Nodes.Builder();
+ builder.index(node.index());
+ return builder;
+ }
+
+ private static StorDistributionConfig.Group.Builder configuredGroup(
+ String name, int index, Collection<ConfiguredNode> nodes) {
+ StorDistributionConfig.Group.Builder builder = new StorDistributionConfig.Group.Builder();
+ builder.name(name);
+ builder.index(Integer.toString(index));
+ nodes.forEach(n -> builder.nodes(configuredNode(n)));
+ return builder;
+ }
+
+ public static Distribution forFlatCluster(int nodeCount) {
+ Collection<ConfiguredNode> nodes = buildConfiguredNodes(nodeCount);
+
+ StorDistributionConfig.Builder configBuilder = new StorDistributionConfig.Builder();
+ configBuilder.redundancy(2);
+ configBuilder.group(configuredGroup("bar", 0, nodes));
+
+ return new Distribution(new StorDistributionConfig(configBuilder));
+ }
+
+ public static Distribution forHierarchicCluster(GroupBuilder root) {
+ List<ConfiguredNode> nodes = buildConfiguredNodes(root.totalNodeCount());
+
+ StorDistributionConfig.Builder configBuilder = new StorDistributionConfig.Builder();
+ configBuilder.redundancy(2);
+
+ StorDistributionConfig.Group.Builder rootBuilder = new StorDistributionConfig.Group.Builder();
+ rootBuilder.name("invalid");
+ rootBuilder.index("invalid");
+ rootBuilder.partitions(root.groupDistributionSpec());
+ configBuilder.group(rootBuilder);
+
+ int offset = 0;
+ for (int group = 0; group < root.groupsWithNodeCount.size(); ++group) {
+ int nodeCount = root.groupsWithNodeCount.get(group);
+ StorDistributionConfig.Group.Builder groupBuilder
+ = configuredGroup("group_" + (group + 1), group + 1, nodes.subList(offset, offset + nodeCount));
+ configBuilder.group(groupBuilder);
+ offset += nodeCount;
+ }
+
+ return new Distribution(new StorDistributionConfig(configBuilder));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
index 86248d2e1e3..f4b3e648f63 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
@@ -10,6 +10,7 @@ import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.state.ClusterState;
import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator;
@@ -232,90 +233,43 @@ public abstract class FleetControllerTest implements Waiter {
return nodes;
}
- public interface NodeModifier {
- void modify(NodeInfo node);
+ protected static Set<Integer> asIntSet(Integer... idx) {
+ return Arrays.asList(idx).stream().collect(Collectors.toSet());
}
- NodeModifier makeDefaultTestNodeModifier() {
- return new NodeModifier() {
- @Override
- public void modify(NodeInfo node) {
- if (node.isDistributor()) {
- if (node.getNodeIndex() == 13) {
- node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes + 2);
- }
- return;
- }
- double latency = 75;
- long count = 1000;
- if (node.getNodeIndex() == 4) {
- latency = 300;
- count = 500;
- } else if (node.getNodeIndex() == 7) {
- latency = 120;
- count = 800;
- } else if (node.getNodeIndex() == 21) {
- latency = 2000;
- count = 600;
- } else if (node.getNodeIndex() == 25) {
- node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes + 1);
- } else if (node.getNodeIndex() == 26) {
- node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes);
- }
- String hostInfoString = generateHostInfo(latency, count);
- node.setHostInfo(HostInfo.createHostInfo(hostInfoString));
- }
- };
+ protected static Set<ConfiguredNode> asConfiguredNodes(Set<Integer> indices) {
+ return indices.stream().map(idx -> new ConfiguredNode(idx, false)).collect(Collectors.toSet());
}
- NodeModifier makeStdDevTestNodeModifier() {
- return new NodeModifier() {
- double[] latencies = new double[] { 30, 300, 60, 270 };
- int counter = 0;
-
+ protected void waitForStateExcludingNodeSubset(String expectedState, Set<Integer> excludedNodes) throws Exception {
+ // Due to the implementation details of the test base, this.waitForState() will always
+ // wait until all nodes added in the test have received the latest cluster state. Since we
+ // want to entirely ignore node #6, it won't get a cluster state at all and the test will
+ // fail unless otherwise handled. We thus use a custom waiter which filters out nodes with
+ // the sneaky index (storage and distributors with same index are treated as different nodes
+ // in this context).
+ Waiter subsetWaiter = new Waiter.Impl(new DataRetriever() {
@Override
- public void modify(NodeInfo node) {
- if (node.isDistributor()) {
- return;
- }
- String hostInfo = generateHostInfo(latencies[counter++ % latencies.length], 1500);
- node.setHostInfo(HostInfo.createHostInfo(hostInfo));
+ public Object getMonitor() { return timer; }
+ @Override
+ public FleetController getFleetController() { return fleetController; }
+ @Override
+ public List<DummyVdsNode> getDummyNodes() {
+ return nodes.stream()
+ .filter(n -> !excludedNodes.contains(n.getNode().getIndex()))
+ .collect(Collectors.toList());
}
- };
- }
-
- protected void setUpSlowDiskCluster(NodeModifier callback) throws Exception {
- int nodeCount = 31;
- FleetControllerOptions options = new FleetControllerOptions("mycluster");
- // TODO: multiple groups!
- options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, nodeCount)));
- setUpFleetController(true, options);
- setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeCount);
- waitForStableSystem(nodeCount);
- // Set one node as not being up. It should not contribute to the overall
- // latency or operation metrics, nor should its disks be included.
- nodes.get(2*13).disconnectAsShutdown();
- nodes.get(2*21+1).disconnectAsShutdown();
- waiter.waitForState("version:\\d+ distributor:31 .13.s:d storage:31 .21.s:m");
-
- for (NodeInfo node : fleetController.getCluster().getNodeInfo()) {
- callback.modify(node);
- }
+ @Override
+ public int getTimeoutMS() { return timeoutMS; }
+ });
+ subsetWaiter.waitForState(expectedState);
}
- protected void setUpSimpleCluster(int nodeCount) throws Exception {
- FleetControllerOptions options = new FleetControllerOptions("mycluster");
- // TODO: multiple groups!
- options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, nodeCount)));
- setUpFleetController(true, options);
- setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeCount);
- waitForStableSystem(nodeCount);
- waiter.waitForState("version:\\d+ distributor:" + nodeCount + " storage:" + nodeCount);
-
- NodeModifier callback = makeDefaultTestNodeModifier();
- for (NodeInfo node : fleetController.getCluster().getNodeInfo()) {
- callback.modify(node);
- }
+ protected static Map<NodeType, Integer> transitionTimes(int milliseconds) {
+ Map<NodeType, Integer> maxTransitionTime = new TreeMap<>();
+ maxTransitionTime.put(NodeType.DISTRIBUTOR, milliseconds);
+ maxTransitionTime.put(NodeType.STORAGE, milliseconds);
+ return maxTransitionTime;
}
protected void tearDownSystem() throws Exception {
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java
new file mode 100644
index 00000000000..64bea57c6ad
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java
@@ -0,0 +1,100 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.NodeType;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertFalse;
+
+public class GroupAutoTakedownLiveConfigTest extends FleetControllerTest {
+
+ private long mockConfigGeneration = 1;
+
+
+ private static FleetControllerOptions createOptions(
+ DistributionBuilder.GroupBuilder groupBuilder, double minNodeRatio)
+ {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(DistributionBuilder.forHierarchicCluster(groupBuilder));
+ options.nodes = DistributionBuilder.buildConfiguredNodes(groupBuilder.totalNodeCount())
+ .stream().collect(Collectors.toSet());
+ options.minNodeRatioPerGroup = minNodeRatio;
+ options.maxTransitionTime = transitionTimes(0);
+ return options;
+ }
+
+ private void updateConfigLive(FleetControllerOptions newOptions) {
+ ++mockConfigGeneration;
+ this.fleetController.updateOptions(newOptions, mockConfigGeneration);
+ }
+
+ private void reconfigureWithMinNodeRatio(double minNodeRatio) {
+ FleetControllerOptions newOptions = this.options.clone();
+ newOptions.minNodeRatioPerGroup = minNodeRatio;
+ updateConfigLive(newOptions);
+ }
+
+ private void reconfigureWithDistribution(DistributionBuilder.GroupBuilder groupBuilder) {
+ FleetControllerOptions newOptions = this.options.clone();
+ newOptions.nodes = DistributionBuilder.buildConfiguredNodes(groupBuilder.totalNodeCount())
+ .stream().collect(Collectors.toSet());
+ newOptions.storageDistribution = DistributionBuilder.forHierarchicCluster(groupBuilder);
+ updateConfigLive(newOptions);
+ }
+
+ private void setUp3x3ClusterWithMinNodeRatio(double minNodeRatio) throws Exception {
+ FleetControllerOptions options = createOptions(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3),
+ minNodeRatio);
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, 9);
+ waitForState("version:\\d+ distributor:9 storage:9");
+ }
+
+ private void takeDownContentNode(int index) {
+ // nodes list contains both distributors and storage nodes, with distributors
+ // in even positions and storage nodes in odd positions.
+ final int arrayIndex = index*2 + 1;
+ assertFalse(nodes.get(arrayIndex).isDistributor());
+ nodes.get(arrayIndex).disconnect();
+ }
+
+ @Test
+ public void bootstrap_min_ratio_option_is_propagated_to_group_availability_logic() throws Exception {
+ setUp3x3ClusterWithMinNodeRatio(0.67);
+ takeDownContentNode(0);
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .0.s:d .1.s:d .2.s:d", asIntSet(0));
+ }
+
+ @Test
+ public void min_ratio_live_reconfig_immediately_takes_effect() throws Exception {
+ // Initially, arbitrarily many nodes may be down in a group.
+ setUp3x3ClusterWithMinNodeRatio(0.0);
+ takeDownContentNode(3);
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d", asIntSet(3));
+
+ reconfigureWithMinNodeRatio(0.67);
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d .4.s:d .5.s:d", asIntSet(3));
+
+ reconfigureWithMinNodeRatio(0.0);
+ // Aaaand back up again!
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d", asIntSet(3));
+ }
+
+ @Test
+ public void live_distribution_config_changes_trigger_cluster_state_change() throws Exception {
+ setUp3x3ClusterWithMinNodeRatio(0.65);
+ takeDownContentNode(6);
+
+ // Not enough nodes down to trigger group take-down yet
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .6.s:d", asIntSet(6));
+ // Removing a node from the same group as node 6 will dip it under the configured threshold,
+ // taking down the entire group. In this case we configure out node 8.
+ reconfigureWithDistribution(DistributionBuilder.withGroupNodes(3, 3, 2));
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:8 storage:6", asIntSet(6, 8));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java
new file mode 100644
index 00000000000..93e34b1f772
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java
@@ -0,0 +1,370 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.state.DiskState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener;
+import org.junit.Test;
+import org.mockito.ArgumentMatcher;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.hamcrest.core.AllOf.allOf;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.argThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+public class GroupAutoTakedownTest {
+
+ private static ClusterFixture createFixtureForAllUpFlatCluster(int nodeCount, double minNodeRatioPerGroup) {
+ ClusterFixture fixture = ClusterFixture.forFlatCluster(nodeCount);
+ setSharedFixtureOptions(fixture, minNodeRatioPerGroup);
+ return fixture;
+ }
+
+ private static ClusterFixture createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.GroupBuilder root, double minNodeRatioPerGroup)
+ {
+ ClusterFixture fixture = ClusterFixture.forHierarchicCluster(root);
+ setSharedFixtureOptions(fixture, minNodeRatioPerGroup);
+ return fixture;
+ }
+
+ private static void setSharedFixtureOptions(ClusterFixture fixture, double minNodeRatioPerGroup) {
+ fixture.generator.setMinNodeRatioPerGroup(minNodeRatioPerGroup);
+ fixture.disableTransientMaintenanceModeOnDown();
+ fixture.disableAutoClusterTakedown();
+ fixture.bringEntireClusterUp();
+ }
+
+ private String stateAfterStorageTransition(ClusterFixture fixture, final int index, final State state) {
+ transitionStoreNodeToState(fixture, index, state);
+ return fixture.generatedClusterState();
+ }
+
+ private String verboseStateAfterStorageTransition(ClusterFixture fixture, final int index, final State state) {
+ transitionStoreNodeToState(fixture, index, state);
+ return fixture.verboseGeneratedClusterState();
+ }
+
+ private void transitionStoreNodeToState(ClusterFixture fixture, int index, State state) {
+ fixture.reportStorageNodeState(index, state);
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+ }
+
+ /**
+ * Use a per-group availability requirement ratio of 99%. Ensure that taking down a single
+ * node out of 5 in a flat hierarchy does not take down the cluster, i.e. the config should
+ * not apply to a flat structure.
+ */
+ @Test
+ public void config_does_not_apply_to_flat_hierarchy_clusters() {
+ ClusterFixture fixture = createFixtureForAllUpFlatCluster(5, 0.99);
+
+ SystemStateListener listener = mock(SystemStateListener.class);
+ // First invocation; generates initial state and clears "new state" flag
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+ assertEquals("version:1 distributor:5 storage:5", fixture.generatedClusterState());
+
+ assertEquals("version:2 distributor:5 storage:5 .1.s:d",
+ stateAfterStorageTransition(fixture, 1, State.DOWN));
+ }
+
+ @Test
+ public void group_node_down_edge_implicitly_marks_down_rest_of_nodes_in_group() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+ assertEquals("version:1 distributor:6 storage:6", fixture.generatedClusterState());
+
+ // Same group as node 4
+ assertEquals("version:2 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ // Same group as node 1
+ assertEquals("version:3 distributor:6 storage:4 .0.s:d .1.s:d",
+ stateAfterStorageTransition(fixture, 0, State.DOWN));
+ }
+
+ @Test
+ public void restored_group_node_availability_takes_group_back_up_automatically() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ // Group #2 -> down
+ assertEquals("version:1 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+
+ // Group #2 -> back up again
+ assertEquals("version:2 distributor:6 storage:6",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+ }
+
+ @Test
+ public void no_op_for_downed_nodes_in_already_downed_group() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ assertEquals("version:1 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+
+ // 4, 5 in same group; this should not cause a new state since it's already implicitly down
+ fixture.reportStorageNodeState(4, State.DOWN);
+
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertFalse(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+
+ assertEquals("version:1 distributor:6 storage:4", fixture.generatedClusterState());
+ }
+
+ @Test
+ public void verbose_node_state_description_updated_for_implicitly_downed_nodes() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.75);
+
+ // Nodes 6 and 7 are taken down implicitly and should have a message reflecting this.
+ // Node 8 is taken down by the fixture and gets a fixture-assigned message that
+ // we should _not_ lose/overwrite.
+ assertEquals("version:1 distributor:9 storage:9 .6.s:d " +
+ ".6.m:group\\x20node\\x20availability\\x20below\\x20configured\\x20threshold " +
+ ".7.s:d " +
+ ".7.m:group\\x20node\\x20availability\\x20below\\x20configured\\x20threshold " +
+ ".8.s:d .8.m:mockdesc",
+ verboseStateAfterStorageTransition(fixture, 8, State.DOWN));
+ }
+
+ @Test
+ public void legacy_cluster_wide_availabilty_ratio_is_computed_after_group_takedowns() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+ fixture.generator.setMinNodesUp(5, 5, 0.51, 0.51);
+
+ // Taking down a node in a group forces the entire group down, which leaves us with
+ // only 4 content nodes (vs. minimum of 5 as specified above). The entire cluster
+ // should be marked as down in this case.
+ assertEquals("version:1 cluster:d distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ }
+
+ @Test
+ public void maintenance_wanted_state_not_overwritten() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.99);
+
+ NodeInfo nodeInfo = fixture.cluster.getNodeInfo(new Node(NodeType.STORAGE, 5));
+ fixture.generator.proposeNewNodeState(nodeInfo, new NodeState(NodeType.STORAGE, State.MAINTENANCE));
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+
+ // Maintenance not counted as down, so group still up
+ assertEquals("version:1 distributor:9 storage:9 .5.s:m", fixture.generatedClusterState());
+
+ // Group goes down, but maintenance node should still be in maintenance
+ assertEquals("version:2 distributor:9 storage:9 .3.s:d .4.s:d .5.s:m",
+ stateAfterStorageTransition(fixture, 4, State.DOWN));
+ }
+
+ @Test
+ public void transient_maintenance_mode_on_down_edge_does_not_take_down_group() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.99);
+ fixture.enableTransientMaintenanceModeOnDown(1000);
+
+ // Our timers are mocked, so taking down node 4 will deterministically transition to
+ // a transient maintenance mode. Group should not be taken down here.
+ assertEquals("version:1 distributor:9 storage:9 .4.s:m",
+ stateAfterStorageTransition(fixture, 4, State.DOWN));
+
+ // However, once grace period expires the group should be taken down.
+ fixture.timer.advanceTime(1001);
+ NodeStateOrHostInfoChangeHandler changeListener = mock(NodeStateOrHostInfoChangeHandler.class);
+ fixture.generator.watchTimers(fixture.cluster, changeListener);
+ SystemStateListener stateListener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, stateListener));
+
+ assertEquals("version:2 distributor:9 storage:9 .3.s:d .4.s:d .5.s:d", fixture.generatedClusterState());
+ }
+
+ private static class NodeEventWithDescription extends ArgumentMatcher<NodeEvent> {
+ private final String expected;
+
+ NodeEventWithDescription(String expected) {
+ this.expected = expected;
+ }
+
+ @Override
+ public boolean matches(Object o) {
+ return expected.equals(((NodeEvent)o).getDescription());
+ }
+ }
+
+ private static NodeEventWithDescription nodeEventWithDescription(String description) {
+ return new NodeEventWithDescription(description);
+ }
+
+ private static class EventForNode extends ArgumentMatcher<NodeEvent> {
+ private final Node expected;
+
+ EventForNode(Node expected) {
+ this.expected = expected;
+ }
+
+ @Override
+ public boolean matches(Object o) {
+ return ((NodeEvent)o).getNode().getNode().equals(expected);
+ }
+ }
+
+ private static EventForNode eventForNode(Node expected) {
+ return new EventForNode(expected);
+ }
+
+ private static Node contentNode(int index) {
+ return new Node(NodeType.STORAGE, index);
+ }
+
+ @Test
+ public void taking_down_node_adds_node_specific_event() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ assertEquals("version:1 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+
+ verify(fixture.eventLog).addNodeOnlyEvent(argThat(allOf(
+ nodeEventWithDescription("Setting node down as the total availability of its group is " +
+ "below the configured threshold"),
+ eventForNode(contentNode(4)))), any());
+ }
+
+ @Test
+ public void bringing_node_back_up_adds_node_specific_event() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ assertEquals("version:1 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ assertEquals("version:2 distributor:6 storage:6",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+
+ verify(fixture.eventLog).addNodeOnlyEvent(argThat(allOf(
+ nodeEventWithDescription("Group availability restored; taking node back up"),
+ eventForNode(contentNode(4)))), any());
+ }
+
+ @Test
+ public void wanted_state_retired_implicitly_down_node_transitioned_it_to_retired_mode_immediately() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.99);
+
+ assertEquals("version:1 distributor:9 storage:6",
+ stateAfterStorageTransition(fixture, 6, State.DOWN));
+ // Node 7 is implicitly down. Mark wanted state as retired. It should now be Retired
+ // but not Down.
+ fixture.proposeStorageNodeWantedState(7, State.RETIRED);
+
+ SystemStateListener stateListener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, stateListener));
+ assertEquals("version:2 distributor:9 storage:8 .6.s:d .7.s:r", fixture.generatedClusterState());
+ }
+
+ @Test
+ public void downed_config_retired_node_transitions_back_to_retired_on_up_edge() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.49);
+
+ assertEquals("version:1 distributor:6 storage:6 .4.s:d",
+ stateAfterStorageTransition(fixture, 4, State.DOWN));
+ assertEquals("version:2 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+
+ // Node 5 gets config-retired under our feet.
+ Set<ConfiguredNode> nodes = new HashSet<>(fixture.cluster.clusterInfo().getConfiguredNodes().values());
+ nodes.remove(new ConfiguredNode(5, false));
+ nodes.add(new ConfiguredNode(5, true));
+ // TODO this should ideally also set the retired flag in the distribution
+ // config, but only the ConfiguredNodes are actually looked at currently.
+ fixture.cluster.setNodes(nodes);
+ fixture.generator.setNodes(fixture.cluster.clusterInfo());
+
+ assertEquals("version:3 distributor:6 storage:6 .4.s:d .5.s:r",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+ }
+
+ @Test
+ public void init_progress_is_preserved_across_group_down_up_edge() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ final Node node = new Node(NodeType.STORAGE, 4);
+ final NodeState newState = new NodeState(NodeType.STORAGE, State.INITIALIZING);
+ newState.setInitProgress(0.5);
+
+ fixture.reportStorageNodeState(4, newState);
+ SystemStateListener stateListener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, stateListener));
+
+ assertEquals("version:1 distributor:6 storage:6 .4.s:i .4.i:0.5", fixture.generatedClusterState());
+
+ assertEquals("version:2 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ assertEquals("version:3 distributor:6 storage:6 .4.s:i .4.i:0.5",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+ }
+
+ @Test
+ public void disk_states_are_preserved_across_group_down_up_edge() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ final Node node = new Node(NodeType.STORAGE, 4);
+ final NodeState newState = new NodeState(NodeType.STORAGE, State.UP);
+ newState.setDiskCount(7);
+ newState.setDiskState(5, new DiskState(State.DOWN));
+
+ fixture.reportStorageNodeState(4, newState);
+ SystemStateListener stateListener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, stateListener));
+
+ assertEquals("version:1 distributor:6 storage:6 .4.d:7 .4.d.5.s:d", fixture.generatedClusterState());
+
+ assertEquals("version:2 distributor:6 storage:4",
+ stateAfterStorageTransition(fixture, 5, State.DOWN));
+ assertEquals("version:3 distributor:6 storage:6 .4.d:7 .4.d.5.s:d",
+ stateAfterStorageTransition(fixture, 5, State.UP));
+ }
+
+ @Test
+ public void down_wanted_state_is_preserved_across_group_down_up_edge() {
+ ClusterFixture fixture = createFixtureForAllUpHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.60);
+
+ NodeInfo nodeInfo = fixture.cluster.getNodeInfo(new Node(NodeType.STORAGE, 5));
+ nodeInfo.setWantedState(new NodeState(NodeType.STORAGE, State.DOWN).setDescription("borkbork"));
+ fixture.generator.proposeNewNodeState(nodeInfo, nodeInfo.getWantedState());
+ SystemStateListener listener = mock(SystemStateListener.class);
+ assertTrue(fixture.generator.notifyIfNewSystemState(fixture.cluster, listener));
+
+ assertEquals("version:1 distributor:9 storage:9 .5.s:d .5.m:borkbork", fixture.verboseGeneratedClusterState());
+
+ assertEquals("version:2 distributor:9 storage:9 " +
+ ".3.s:d .3.m:group\\x20node\\x20availability\\x20below\\x20configured\\x20threshold " +
+ ".4.s:d .4.m:mockdesc .5.s:d .5.m:borkbork",
+ verboseStateAfterStorageTransition(fixture, 4, State.DOWN));
+ assertEquals("version:3 distributor:9 storage:9 .5.s:d .5.m:borkbork",
+ verboseStateAfterStorageTransition(fixture, 4, State.UP));
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculatorTest.java
new file mode 100644
index 00000000000..7d44afda68f
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculatorTest.java
@@ -0,0 +1,188 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.ClusterState;
+import org.junit.Test;
+
+import java.text.ParseException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+public class GroupAvailabilityCalculatorTest {
+
+ private static ClusterState clusterState(String state) {
+ try {
+ return new ClusterState(state);
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static GroupAvailabilityCalculator calcForFlatCluster(
+ final int nodeCount,
+ final double minNodeRatioPerGroup)
+ {
+ return GroupAvailabilityCalculator.builder()
+ .withDistribution(DistributionBuilder.forFlatCluster(nodeCount))
+ .withMinNodeRatioPerGroup(minNodeRatioPerGroup)
+ .build();
+ }
+
+ private static GroupAvailabilityCalculator calcForHierarchicCluster(
+ DistributionBuilder.GroupBuilder rootGroup,
+ final double minNodeRatioPerGroup)
+ {
+ return GroupAvailabilityCalculator.builder()
+ .withDistribution(DistributionBuilder.forHierarchicCluster(rootGroup))
+ .withMinNodeRatioPerGroup(minNodeRatioPerGroup)
+ .build();
+ }
+
+ private static Set<Integer> indices(Integer... idx) {
+ Set<Integer> indices = new HashSet<>();
+ Collections.addAll(indices, idx);
+ return indices;
+ }
+
+ private static Set<Integer> emptySet() { return indices(); }
+
+ @Test
+ public void flat_cluster_does_not_implicitly_take_down_nodes() {
+ GroupAvailabilityCalculator calc = calcForFlatCluster(5, 0.99);
+
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:5 storage:5 .1.s:d .2.s:d")), equalTo(emptySet()));
+
+ }
+
+ @Test
+ public void group_node_down_edge_implicitly_marks_down_rest_of_nodes_in_group() {
+ // 3 groups of 2 nodes, take down node #4 (1st node in last group). Since we require
+ // at least 51% of group capacity to be available, implicitly take down the last group
+ // entirely.
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.51);
+
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:6 .4.s:d")), equalTo(indices(5)));
+ }
+
+ // Setting 50% as min ratio in a group with 2 nodes should let group be up if
+ // one node goes down.
+ @Test
+ public void min_ratio_per_group_is_closed_interval() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.50);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:6 .4.s:d")), equalTo(emptySet()));
+ }
+
+ @Test
+ public void retired_node_is_counted_as_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:6 .1.s:r")), equalTo(indices(0)));
+ }
+
+ @Test
+ public void initializing_node_not_counted_as_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:6 .4.s:i")), equalTo(emptySet()));
+ }
+
+ @Test
+ public void maintenance_node_not_counted_as_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:6 .4.s:m")), equalTo(emptySet()));
+ }
+
+ @Test
+ public void existing_maintenance_node_not_implicitly_downed_when_group_taken_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:9 storage:9 .4.s:m .5.s:d")), equalTo(indices(3))); // _not_ {3, 4}
+ }
+
+ @Test
+ public void existing_retired_node_not_implicitly_downed_when_group_taken_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(3), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:9 storage:9 .4.s:r .5.s:d")), equalTo(indices(3))); // _not_ {3, 4}
+ }
+
+ @Test
+ public void down_to_down_edge_keeps_group_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(2).eachWithNodeCount(4), 0.76);
+
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8 .1.s:d")), equalTo(indices(0, 2, 3)));
+
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8 .1.s:d .2.s:d")), equalTo(indices(0, 3)));
+ }
+
+ // Cluster state representations "prune" downed nodes at the end of the state,
+ // causing "storage:6 .5.s:d" to be reduced to "storage:5". This still implies a
+ // node is down according to the distribution config and must be handled as such.
+ @Test
+ public void implicitly_downed_node_at_state_end_is_counted_as_explicitly_down() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(3).eachWithNodeCount(2), 0.99);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:6 storage:5")), equalTo(indices(4)));
+ }
+
+ @Test
+ public void non_uniform_group_sizes_are_supported() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroupNodes(1, 2, 3, 4), 0.67);
+
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:10")), equalTo(emptySet()));
+ // Group 0 has only 1 node and should not cause any other nodes to be taken down
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:10 .0.s:d")), equalTo(emptySet()));
+ // Too little availability in group 1
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:10 .1.s:d")), equalTo(indices(2)));
+ // Too little availability in group 2
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:10 .3.s:d")), equalTo(indices(4, 5)));
+ // Group 4 has 75% availability (>= 67%), so no auto take-down there
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:10 .7.s:d")), equalTo(emptySet()));
+ // Drop group 4 availability to 50%; it should now be taken down entirely
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:10 storage:9 .7.s:d")), equalTo(indices(6, 8)));
+ }
+
+ @Test
+ public void min_ratio_of_zero_never_takes_down_groups_implicitly() {
+ GroupAvailabilityCalculator calc = calcForHierarchicCluster(
+ DistributionBuilder.withGroups(2).eachWithNodeCount(4), 0.0);
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8")), equalTo(emptySet()));
+ // 1 down in each group
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8 .0.s:d .4.s:d")), equalTo(emptySet()));
+ // 2 down in each group
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8 .0.s:d .1.s:d .4.s:d .5.s:d")), equalTo(emptySet()));
+ // 3 down in each group
+ assertThat(calc.nodesThatShouldBeDown(clusterState(
+ "distributor:8 storage:8 .0.s:d .1.s:d .2.s:d .4.s:d .5.s:d .6.s:d")), equalTo(emptySet()));
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
index 10305de116a..ef4c80a2256 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
@@ -18,38 +18,6 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest
private final Set<Integer> nodeIndices = asIntSet(0, 1, 2, 3);
private final int foreignNode = 6;
- private void waitForStateExcludingNodeSubset(String expectedState, Set<Integer> excludedNodes) throws Exception {
- // Due to the implementation details of the test base, this.waitForState() will always
- // wait until all nodes added in the test have received the latest cluster state. Since we
- // want to entirely ignore node #6, it won't get a cluster state at all and the test will
- // fail unless otherwise handled. We thus use a custom waiter which filters out nodes with
- // the sneaky index (storage and distributors with same index are treated as different nodes
- // in this context).
- Waiter subsetWaiter = new Waiter.Impl(new DataRetriever() {
- @Override
- public Object getMonitor() { return timer; }
- @Override
- public FleetController getFleetController() { return fleetController; }
- @Override
- public List<DummyVdsNode> getDummyNodes() {
- return nodes.stream()
- .filter(n -> !excludedNodes.contains(n.getNode().getIndex()))
- .collect(Collectors.toList());
- }
- @Override
- public int getTimeoutMS() { return timeoutMS; }
- });
- subsetWaiter.waitForState(expectedState);
- }
-
- private static Set<Integer> asIntSet(Integer... idx) {
- return Arrays.asList(idx).stream().collect(Collectors.toSet());
- }
-
- private static Set<ConfiguredNode> asConfiguredNodes(Set<Integer> indices) {
- return indices.stream().map(idx -> new ConfiguredNode(idx, false)).collect(Collectors.toSet());
- }
-
private void setUpClusterWithForeignNode(Set<Integer> validIndices, final int foreignNodeIndex) throws Exception {
final Set<ConfiguredNode> configuredNodes = asConfiguredNodes(validIndices);
FleetControllerOptions options = optionsForConfiguredNodes(configuredNodes);
@@ -63,6 +31,7 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest
FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
options.nodeStateRequestTimeoutMS = 10000 * 60 * 1000;
+ options.maxTransitionTime = transitionTimes(0);
return options;
}
@@ -108,10 +77,14 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest
assertTrue(configuredNodes.remove(new ConfiguredNode(0, true)));
fleetController.updateOptions(options, 0);
- // The previously retired node should now be marked as done, as it no longer
+ // The previously retired node should now be marked as down, as it no longer
// exists from the point of view of the content cluster. We have to use a subset
// state waiter, as the controller will not send the new state to node 0.
waitForStateExcludingNodeSubset("version:\\d+ distributor:4 .0.s:d storage:4 .0.s:d", asIntSet(0));
+
+ // Ensure it remains down for subsequent cluster state versions as well.
+ nodes.get(3).disconnect();
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:4 .0.s:d storage:4 .0.s:d .1.s:d", asIntSet(0, 1));
}
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
index cb5cee70486..5b53e524102 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -4,15 +4,16 @@ package com.yahoo.vespa.clustercontroller.core;
import com.yahoo.vdslib.distribution.ConfiguredNode;
import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.distribution.Group;
+import com.yahoo.vdslib.state.ClusterState;
import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
-import org.junit.Before;
import org.junit.Test;
+import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
@@ -29,7 +30,7 @@ public class NodeStateChangeCheckerTest {
private static final int minStorageNodesUp = 3;
private static final int requiredRedundancy = 4;
- private static final int currentClusterState = 2;
+ private static final int currentClusterStateVersion = 2;
private static final double minRatioOfStorageNodesUp = 0.9;
private static final Node nodeDistributor = new Node(NodeType.DISTRIBUTOR, 1);
@@ -42,6 +43,18 @@ public class NodeStateChangeCheckerTest {
return new NodeState(NodeType.STORAGE, state).setDescription(description);
}
+ private static ClusterState clusterState(String state) {
+ try {
+ return new ClusterState(state);
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static ClusterState defaultAllUpClusterState() {
+ return clusterState(String.format("version:%d distributor:4 storage:4", currentClusterStateVersion));
+ }
+
private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) {
return new NodeStateChangeChecker(minStorageNodesUp, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
}
@@ -93,12 +106,22 @@ public class NodeStateChangeCheckerTest {
"}\n";
}
+ private void markAllNodesAsReportingStateUp(ContentCluster cluster) {
+ final ClusterInfo clusterInfo = cluster.clusterInfo();
+ final int configuredNodeCount = cluster.clusterInfo().getConfiguredNodes().size();
+ for (int i = 0; i < configuredNodeCount; i++) {
+ clusterInfo.getDistributorNodeInfo(i).setReportedState(new NodeState(NodeType.DISTRIBUTOR, State.UP), 0);
+ clusterInfo.getDistributorNodeInfo(i).setHostInfo(HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6)));
+ clusterInfo.getStorageNodeInfo(i).setReportedState(new NodeState(NodeType.STORAGE, State.UP), 0);
+ }
+ }
+
@Test
public void testCanUpgradeForce() {
NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(createNodes(1)));
NodeState newState = new NodeState(NodeType.STORAGE, State.INITIALIZING);
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeDistributor, currentClusterState, SetUnitStateRequest.Condition.FORCE,
+ nodeDistributor, defaultAllUpClusterState(), SetUnitStateRequest.Condition.FORCE,
upNodeState, newState);
assertTrue(result.settingWantedStateIsAllowed());
assertTrue(!result.wantedStateAlreadySet());
@@ -108,7 +131,7 @@ public class NodeStateChangeCheckerTest {
public void testSafeSetStateDistributors() {
NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(createNodes(1)));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeDistributor, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeDistributor, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
upNodeState, maintenanceNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -122,7 +145,7 @@ public class NodeStateChangeCheckerTest {
NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker(
5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
upNodeState, maintenanceNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -143,12 +166,32 @@ public class NodeStateChangeCheckerTest {
// Not setting nodes up -> all are down
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
maintenanceNodeState, upNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
}
+ // A node may be reported as Up but have a generated state of Down if it's part of
+ // nodes taken down implicitly due to a group having too low node availability.
+ @Test
+ public void testSetUpSucceedsIfReportedIsUpButGeneratedIsDown() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+
+ markAllNodesAsReportingStateUp(cluster);
+
+ ClusterState stateWithNodeDown = clusterState(String.format(
+ "version:%d distributor:4 storage:4 .%d.s:d",
+ currentClusterStateVersion, nodeStorage.getIndex()));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, stateWithNodeDown, SetUnitStateRequest.Condition.SAFE,
+ maintenanceNodeState, upNodeState);
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
@Test
public void testCannotSetUpIfUnknownOldStateAndReportedIsDown() {
ContentCluster cluster = createCluster(createNodes(4));
@@ -156,7 +199,7 @@ public class NodeStateChangeCheckerTest {
// Not setting nodes up -> all are down
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
new NodeState(NodeType.STORAGE, State.DOWN), upNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -170,7 +213,7 @@ public class NodeStateChangeCheckerTest {
setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6)));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
upNodeState, maintenanceNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -185,7 +228,7 @@ public class NodeStateChangeCheckerTest {
setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6)));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- new Node(NodeType.STORAGE, 3), currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ new Node(NodeType.STORAGE, 3), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
upNodeState, maintenanceNodeState);
assertTrue(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -209,7 +252,7 @@ public class NodeStateChangeCheckerTest {
setAllNodesUp(cluster, HostInfo.createHostInfo(hostInfo));
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- new Node(NodeType.STORAGE, 1), currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ new Node(NodeType.STORAGE, 1), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
upNodeState, maintenanceNodeState);
assertTrue(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
@@ -222,7 +265,7 @@ public class NodeStateChangeCheckerTest {
cluster.clusterInfo().getStorageNodeInfo(1).setReportedState(new NodeState(NodeType.STORAGE, State.UP), 0);
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
assertFalse(result.settingWantedStateIsAllowed());
assertFalse(result.wantedStateAlreadySet());
assertThat(result.getReason(), is("Distributor node (0) has not reported any cluster state version yet."));
@@ -235,7 +278,7 @@ public class NodeStateChangeCheckerTest {
NodeState currentNodeState = createNodeState(state, oldDescription);
NodeState newNodeState = createNodeState(state, newDescription);
return nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
currentNodeState, newNodeState);
}
@@ -280,13 +323,16 @@ public class NodeStateChangeCheckerTest {
cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(NodeType.STORAGE, state), 0);
}
+ ClusterState clusterState = defaultAllUpClusterState();
+
if (storageNodeIndex >= 0) { // TODO: Move this into the calling test
NodeState downNodeState = new NodeState(NodeType.STORAGE, State.DOWN);
cluster.clusterInfo().getStorageNodeInfo(storageNodeIndex).setReportedState(downNodeState, 4 /* time */);
+ clusterState.setNodeState(new Node(NodeType.STORAGE, storageNodeIndex), downNodeState);
}
return nodeStateChangeChecker.evaluateTransition(
- nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
+ nodeStorage, clusterState, SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
}
private void setAllNodesUp(ContentCluster cluster, HostInfo distributorHostInfo) {
@@ -339,6 +385,28 @@ public class NodeStateChangeCheckerTest {
assertThat(result.getReason(), containsString("Not enough storage nodes running"));
}
+ @Test
+ public void testNodeRatioRequirementConsidersGeneratedNodeStates() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+
+ markAllNodesAsReportingStateUp(cluster);
+
+ // Both minRatioOfStorageNodesUp and minStorageNodesUp imply that a single node being
+ // in state Down should halt the upgrade. This must also take into account the generated
+ // state, not just the reported state. In this case, all nodes are reported as being Up
+ // but one node has a generated state of Down.
+ ClusterState stateWithNodeDown = clusterState(String.format(
+ "version:%d distributor:4 storage:4 .3.s:d",
+ currentClusterStateVersion));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, stateWithNodeDown, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
private List<ConfiguredNode> createNodes(int count) {
List<ConfiguredNode> nodes = new ArrayList<>();
for (int i = 0; i < count; i++)
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java
index ab6185d2b56..35118933b42 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java
@@ -99,7 +99,7 @@ public class SystemStateGeneratorTest extends TestCase {
}
private void assertNewClusterStateReceived() {
- assertTrue(generator.notifyIfNewSystemState(systemStateListener));
+ assertTrue(generator.notifyIfNewSystemState(cluster, systemStateListener));
assertTrue(systemStateListener.toString(), systemStateListener.states.size() == 1);
systemStateListener.states.clear();
}
@@ -147,7 +147,7 @@ public class SystemStateGeneratorTest extends TestCase {
private void verifyClusterStateChanged(Node node, State state) {
log.info("Verifying cluster state has been updated for " + node + " to " + state);
- assertTrue(generator.notifyIfNewSystemState(systemStateListener));
+ assertTrue(generator.notifyIfNewSystemState(cluster, systemStateListener));
assertEquals(1, systemStateListener.states.size());
assertEquals(state, systemStateListener.states.get(0).getNodeState(node).getState());
systemStateListener.states.clear();