aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core/src/test/java/com/yahoo/vespa
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /clustercontroller-core/src/test/java/com/yahoo/vespa
Publish
Diffstat (limited to 'clustercontroller-core/src/test/java/com/yahoo/vespa')
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateViewTest.java111
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java217
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java75
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java166
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java110
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java114
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java504
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java9
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventLogTest.java63
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeTimer.java34
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java555
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/LeafGroupsTest.java67
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java440
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java25
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java117
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java349
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java627
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java116
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java1135
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java81
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodeTest.java44
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java385
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainerTest.java43
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsTest.java25
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java198
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java82
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperStressTest.java157
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperTestServer.java86
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java107
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridgeTest.java67
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/mocks/TestEventLog.java56
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java85
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterListTest.java51
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java58
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java200
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NotMasterTest.java137
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/PartitionTest.java64
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/RequestTest.java33
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ServiceTest.java121
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java348
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java170
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java94
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/LogFormatter.java34
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java114
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java174
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitTask.java41
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java105
47 files changed, 7994 insertions, 0 deletions
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateViewTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateViewTest.java
new file mode 100644
index 00000000000..a62a8676096
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateViewTest.java
@@ -0,0 +1,111 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNodeStatsBridge;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.*;
+
+/**
+ * @author hakon
+ * @since 5.34
+ */
+public class ClusterStateViewTest {
+ final Map<Integer, String> hostnames = new HashMap<>();
+ final NodeInfo nodeInfo = mock(NodeInfo.class);
+ final Node node = mock(Node.class);
+ final ClusterStatsAggregator statsAggregator = mock(ClusterStatsAggregator.class);
+ final StorageMergeStats storageStats = mock(StorageMergeStats.class);
+ final ClusterState clusterState = mock(ClusterState.class);
+ final MetricUpdater metricUpdater = mock(MetricUpdater.class);
+ final ClusterStateView clusterStateView = new ClusterStateView(clusterState, statsAggregator, metricUpdater);
+
+ HostInfo createHostInfo(String version) {
+ return HostInfo.createHostInfo("{ \"cluster-state-version\": " + version + " }");
+ }
+
+ @Test
+ public void testWrongNodeType() {
+ when(nodeInfo.isDistributor()).thenReturn(false);
+
+ clusterStateView.handleUpdatedHostInfo(hostnames, nodeInfo, createHostInfo("101"));
+
+ verify(statsAggregator, never()).updateForDistributor(any(), anyInt(), any());
+ }
+
+
+
+ @Test
+ public void testStateVersionMismatch() {
+ when(nodeInfo.isDistributor()).thenReturn(true);
+ when(clusterState.getVersion()).thenReturn(101);
+
+ clusterStateView.handleUpdatedHostInfo(hostnames, nodeInfo, createHostInfo("22"));
+
+ verify(statsAggregator, never()).updateForDistributor(any(), anyInt(), any());
+ }
+
+ @Test
+ public void testFailToGetStats() {
+ when(nodeInfo.isDistributor()).thenReturn(true);
+ when(clusterState.getVersion()).thenReturn(101);
+
+ clusterStateView.handleUpdatedHostInfo(hostnames, nodeInfo, createHostInfo("22"));
+
+ verify(statsAggregator, never()).updateForDistributor(any(), anyInt(), any());
+ }
+
+ @Test
+ public void testSuccessCase() {
+ when(nodeInfo.isDistributor()).thenReturn(true);
+ HostInfo hostInfo = HostInfo.createHostInfo(
+ "{" +
+ " \"cluster-state-version\": 101," +
+ " \"distributor\": {\n" +
+ " \"storage-nodes\": [\n" +
+ " {\n" +
+ " \"node-index\": 3\n" +
+ " }\n" +
+ " ]}}");
+
+
+ when(nodeInfo.getNodeIndex()).thenReturn(3);
+ when(clusterState.getVersion()).thenReturn(101);
+
+ clusterStateView.handleUpdatedHostInfo(hostnames, nodeInfo, hostInfo);
+
+ verify(statsAggregator).updateForDistributor(
+ hostnames, 3, StorageNodeStatsBridge.generate(hostInfo.getDistributor()));
+ }
+
+ @Test
+ public void testIndicesOfUpNodes() {
+ when(clusterState.getNodeCount(NodeType.DISTRIBUTOR)).thenReturn(7);
+
+ NodeState nodeState = mock(NodeState.class);
+ when(nodeState.getState()).
+ thenReturn(State.MAINTENANCE). // 0
+ thenReturn(State.RETIRED). // 1
+ thenReturn(State.INITIALIZING). // 2
+ thenReturn(State.DOWN).
+ thenReturn(State.STOPPING).
+ thenReturn(State.UNKNOWN).
+ thenReturn(State.UP); // 6
+
+ when(clusterState.getNodeState(any())).thenReturn(nodeState);
+
+ Set<Integer> indices = ClusterStateView.getIndicesOfUpNodes(clusterState, NodeType.DISTRIBUTOR);
+ assertEquals(4, indices.size());
+ assert(indices.contains(0));
+ assert(indices.contains(1));
+ assert(indices.contains(2));
+ assert(indices.contains(6));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java
new file mode 100644
index 00000000000..e87cad135c8
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java
@@ -0,0 +1,217 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.runners.MockitoJUnitRunner;
+
+import java.util.*;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.*;
+
+/**
+ * @author hakon
+ * @since 5.34
+ */
+@RunWith(MockitoJUnitRunner.class)
+public class ClusterStatsAggregatorTest {
+
+ final Set<Integer> distributors = new HashSet<>();
+ final Set<Integer> storageNodes = new HashSet<>();
+ final Map<Integer, String> hostnames = new HashMap<>();
+ final MetricUpdater updater = mock(MetricUpdater.class);
+ StorageMergeStats storageStats;
+
+ private void addDistributors(Integer... indices) {
+ for (Integer i : indices) {
+ distributors.add(i);
+ }
+ }
+
+ private static class StorageNodeSpec {
+ public StorageNodeSpec(Integer index, String hostname) {
+ this.index = index;
+ this.hostname = hostname;
+ }
+ public Integer index;
+ public String hostname;
+ }
+
+ private void addStorageNodes(StorageNodeSpec... specs) {
+ for (StorageNodeSpec spec : specs) {
+ storageNodes.add(spec.index);
+ hostnames.put(spec.index, spec.hostname);
+ }
+ storageStats = new StorageMergeStats(storageNodes);
+ }
+
+ private void putStorageStats(int index, int syncing, int copyingIn, int movingOut, int copyingOut) {
+ storageStats.getStorageNode(index).set(createStats(index, syncing, copyingIn, movingOut, copyingOut));
+ }
+
+ private static NodeMergeStats createStats(int index, int syncing, int copyingIn, int movingOut, int copyingOut) {
+ return new NodeMergeStats(
+ index,
+ new NodeMergeStats.Amount(syncing),
+ new NodeMergeStats.Amount(copyingIn),
+ new NodeMergeStats.Amount(movingOut),
+ new NodeMergeStats.Amount(copyingOut));
+ }
+
+ @Test
+ public void testSimple() {
+ final int distributorIndex = 1;
+ addDistributors(distributorIndex);
+
+ final int storageNodeIndex = 11;
+ addStorageNodes(new StorageNodeSpec(storageNodeIndex, "storage-node"));
+
+ putStorageStats(storageNodeIndex, 5, 6, 7, 8);
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+ aggregator.updateForDistributor(hostnames, distributorIndex, storageStats);
+
+ Map<String, NodeMergeStats> expectedStorageNodeStats = new HashMap<>();
+ expectedStorageNodeStats.put("storage-node", createStats(storageNodeIndex, 5, 6, 7, 8));
+
+ verify(updater).updateMergeOpMetrics(expectedStorageNodeStats);
+ }
+
+ @Test
+ public void testComplex() {
+ final int distributor1 = 1;
+ final int distributor2 = 2;
+ addDistributors(distributor1, distributor2);
+
+ final int storageNode1 = 11;
+ final int storageNode2 = 12;
+ addStorageNodes(
+ new StorageNodeSpec(storageNode1, "storage-node-1"),
+ new StorageNodeSpec(storageNode2, "storage-node-2"));
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+
+ // Distributor 1.
+ putStorageStats(storageNode1, 0, 1, 2, 3);
+ putStorageStats(storageNode2, 20, 21, 22, 23);
+ aggregator.updateForDistributor(hostnames, distributor1, storageStats);
+
+ // Distributor 2.
+ putStorageStats(storageNode1, 10, 11, 12, 13);
+ putStorageStats(storageNode2, 30, 31, 32, 33);
+ aggregator.updateForDistributor(hostnames, distributor2, storageStats);
+
+ Map<String, NodeMergeStats> expectedStorageNodeStats = new HashMap<>();
+ expectedStorageNodeStats.put("storage-node-1", createStats(storageNode1, 0 + 10, 1 + 11, 2 + 12, 3 + 13));
+ expectedStorageNodeStats.put("storage-node-2", createStats(storageNode2, 20 + 30, 21 + 31, 22 + 32, 23 + 33));
+
+ verify(updater, times(1)).updateMergeOpMetrics(expectedStorageNodeStats);
+ }
+
+ @Test
+ public void testHashCodeCache() {
+ final int distributor1 = 1;
+ final int distributor2 = 2;
+ addDistributors(distributor1, distributor2);
+
+ final int storageNode1 = 11;
+ final int storageNode2 = 12;
+ addStorageNodes(
+ new StorageNodeSpec(storageNode1, "storage-node-1"),
+ new StorageNodeSpec(storageNode2, "storage-node-2"));
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+
+ // Distributor 1.
+ putStorageStats(storageNode1, 0, 1, 2, 3);
+ putStorageStats(storageNode2, 20, 21, 22, 23);
+ aggregator.updateForDistributor(hostnames, distributor1, storageStats);
+
+ // Distributor 2.
+ putStorageStats(storageNode1, 10, 11, 12, 13);
+ putStorageStats(storageNode2, 30, 31, 32, 33);
+ aggregator.updateForDistributor(hostnames, distributor2, storageStats);
+
+ // If we add call another updateForDistributor with the same arguments, updateMergeOpMetrics() should not be called.
+ // See times(1) below.
+ aggregator.updateForDistributor(hostnames, distributor2, storageStats);
+
+ Map<String, NodeMergeStats> expectedStorageNodeStats = new HashMap<>();
+ expectedStorageNodeStats.put("storage-node-1", createStats(storageNode1, 0 + 10, 1 + 11, 2 + 12, 3 + 13));
+ expectedStorageNodeStats.put("storage-node-2", createStats(storageNode2, 20 + 30, 21 + 31, 22 + 32, 23 + 33));
+
+
+ verify(updater, times(1)).updateMergeOpMetrics(expectedStorageNodeStats);
+ }
+
+ @Test
+ public void testUnknownDistributor() {
+ final int upDistributor = 1;
+ final int DownDistributorIndex = 2;
+ addDistributors(upDistributor);
+
+ final int storageNodeIndex = 11;
+ addStorageNodes(new StorageNodeSpec(storageNodeIndex, "storage-node"));
+
+ putStorageStats(storageNodeIndex, 5, 6, 7, 8);
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+ aggregator.updateForDistributor(hostnames, DownDistributorIndex, storageStats);
+
+ verify(updater, never()).updateMergeOpMetrics(any());
+ }
+
+ @Test
+ public void testMoreStorageNodesThanDistributors() {
+ final int distributor1 = 1;
+ addDistributors(distributor1);
+
+ final int storageNode1 = 11;
+ final int storageNode2 = 12;
+ addStorageNodes(
+ new StorageNodeSpec(storageNode1, "storage-node-1"),
+ new StorageNodeSpec(storageNode2, "storage-node-2"));
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+
+ // Distributor 1.
+ putStorageStats(storageNode1, 0, 1, 2, 3);
+ putStorageStats(storageNode2, 20, 21, 22, 23);
+ aggregator.updateForDistributor(hostnames, distributor1, storageStats);
+
+ Map<String, NodeMergeStats> expectedStorageNodeStats = new HashMap<>();
+ expectedStorageNodeStats.put("storage-node-1", createStats(storageNode1, 0, 1, 2, 3));
+ expectedStorageNodeStats.put("storage-node-2", createStats(storageNode2, 20, 21, 22, 23));
+
+ verify(updater, times(1)).updateMergeOpMetrics(expectedStorageNodeStats);
+ }
+
+ @Test
+ public void testMoreDistributorsThanStorageNodes() {
+ final int distributor1 = 1;
+ final int distributor2 = 2;
+ addDistributors(distributor1, distributor2);
+
+ final int storageNode1 = 11;
+ addStorageNodes(new StorageNodeSpec(storageNode1, "storage-node-1"));
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+
+ // Distributor 1.
+ putStorageStats(storageNode1, 0, 1, 2, 3);
+ aggregator.updateForDistributor(hostnames, distributor1, storageStats);
+
+ // Distributor 2.
+ putStorageStats(storageNode1, 10, 11, 12, 13);
+ aggregator.updateForDistributor(hostnames, distributor2, storageStats);
+
+ Map<String, NodeMergeStats> expectedStorageNodeStats = new HashMap<>();
+ expectedStorageNodeStats.put("storage-node-1", createStats(storageNode1, 0 + 10, 1 + 11, 2 + 12, 3 + 13));
+
+ verify(updater, times(1)).updateMergeOpMetrics(expectedStorageNodeStats);
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java
new file mode 100644
index 00000000000..5cf5060e240
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.VdsClusterHtmlRendrer;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONWriter;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.StringWriter;
+import java.io.Writer;
+import java.text.ParseException;
+import java.util.TreeMap;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.mock;
+
+public class ContentClusterHtmlRendrerTest {
+ VdsClusterHtmlRendrer rendrer = new VdsClusterHtmlRendrer();
+ final static int slobrokGeneration = 34;
+ final static String clusterName = "clustername";
+ final TreeMap<Integer, NodeInfo> storageNodeInfoByIndex = new TreeMap<>();
+ final TreeMap<Integer, NodeInfo> distributorNodeInfoByIndex = new TreeMap<>();
+ String result;
+
+ @Before
+ public void before() throws JSONException, ParseException {
+ final ClusterState state = new ClusterState("version:34633 bits:24 distributor:211 storage:211");
+ final EventLog eventLog = new EventLog(new FakeTimer(), null);
+
+ final VdsClusterHtmlRendrer.Table table = rendrer.createNewClusterHtmlTable(clusterName, slobrokGeneration);
+
+ final ContentCluster contentCluster = mock(ContentCluster.class);
+
+ for (int x = 0; x < 10; x++) {
+ NodeInfo nodeInfo = new DistributorNodeInfo(contentCluster, x, "dist " + x, null);
+ final Writer writer = new StringWriter();
+ new JSONWriter(writer)
+ .object().key("vtag")
+ // Let one node have a different release tag.
+ .object().key("version").value("release1" + (x == 2 ? "bad" : ""))
+ .endObject()
+ .endObject();
+ nodeInfo.setHostInfo(HostInfo.createHostInfo(writer.toString()));
+ distributorNodeInfoByIndex.put(x, nodeInfo);
+ }
+ storageNodeInfoByIndex.put(2, new StorageNodeInfo(contentCluster, 2, false, "storage" + 2, null));
+
+ table.renderNodes(
+ storageNodeInfoByIndex,
+ distributorNodeInfoByIndex,
+ new FakeTimer(),
+ state,
+ 10,
+ eventLog,
+ "pathPrefix",
+ "name");
+ final StringBuilder stringBuilder = new StringBuilder();
+ table.addTable(stringBuilder, 34);
+ result = stringBuilder.toString();
+ }
+
+ @Test
+ public void testVtagRendering() throws Exception {
+ // 9 distribution nodes should have green tag on release1.
+ assertThat(result.split("<td bgcolor=\"#c0ffc0\" align=\"right\"><nobr>release1</nobr></td>").length, is(10));
+ // 1 distribution node should have warning on release1bad.
+ assertThat(result.split("<td bgcolor=\"#ffffc0\" align=\"right\"><nobr>release1bad</nobr></td>").length, is(2));
+ // 1 storage node should should have warning on release "not set".
+ assertThat(result.split("<td bgcolor=\"#ffffc0\" align=\"right\"><nobr>not set</nobr></td>").length, is(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java
new file mode 100644
index 00000000000..65661d99d4a
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java
@@ -0,0 +1,166 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.StringValue;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import org.junit.After;
+import org.junit.Ignore;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.logging.Logger;
+
+public class DatabaseTest extends FleetControllerTest {
+
+ private static Logger log = Logger.getLogger(DatabaseTest.class.getName());
+
+ protected Supervisor supervisor;
+
+ @After
+ public void tearDown() throws Exception {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ }
+ super.tearDown();
+ }
+
+ private void setWantedState(Node n, NodeState ns, Map<Node, NodeState> wantedStates) {
+ int rpcPort = fleetController.getRpcPort();
+ if (supervisor == null) {
+ supervisor = new Supervisor(new Transport());
+ }
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/" + n.getType().toString() + "/" + n.getIndex()));
+ req.parameters().add(new StringValue(ns.serialize(true)));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("s"));
+ wantedStates.put(n, ns);
+ }
+
+ // These tests work in isolation but causes other tests to hang
+ @Ignore
+ @Test
+ public void testWantedStatesInZooKeeper() throws Exception {
+ startingTest("DatabaseTest::testWantedStatesInZooKeeper");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.zooKeeperServerAddress = "127.0.0.1";
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ log.info("WAITING FOR STABLE SYSTEM");
+ waitForStableSystem();
+
+
+ log.info("VALIDATE STARTING WANTED STATES");
+ Map<Node, NodeState> wantedStates = new TreeMap<>();
+ for (DummyVdsNode node : nodes) {
+ wantedStates.put(node.getNode(), new NodeState(node.getType(), State.UP));
+ }
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ log.info("SET A WANTED STATE AND SEE THAT IT GETS PROPAGATED");
+ setWantedState(new Node(NodeType.STORAGE, 3), new NodeState(NodeType.STORAGE, State.MAINTENANCE).setDescription("Yoo"), wantedStates);
+ waitForState("version:\\d+ distributor:10 storage:10 .3.s:m");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ log.info("SET ANOTHER WANTED STATE AND SEE THAT IT GETS PROPAGATED");
+ setWantedState(new Node(NodeType.DISTRIBUTOR, 2), new NodeState(NodeType.DISTRIBUTOR, State.DOWN), wantedStates);
+ waitForState("version:\\d+ distributor:10 .2.s:d storage:10 .3.s:m");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ log.info("SET YET ANOTHER WANTED STATE AND SEE THAT IT GETS PROPAGATED");
+ setWantedState(new Node(NodeType.STORAGE, 7), new NodeState(NodeType.STORAGE, State.RETIRED).setDescription("We wanna replace this node"), wantedStates);
+ waitForState("version:\\d+ distributor:10 .2.s:d storage:10 .3.s:m .7.s:r");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ log.info("CHECK THAT WANTED STATES PERSIST FLEETCONTROLLER RESTART");
+ stopFleetController();
+ startFleetController();
+
+ waitForState("version:\\d+ distributor:10 .2.s:d storage:10 .3.s:m .7.s:r");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ log.info("CLEAR WANTED STATE");
+ setWantedState(new Node(NodeType.STORAGE, 7), new NodeState(NodeType.STORAGE, State.UP), wantedStates);
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ setWantedState(new Node(NodeType.DISTRIBUTOR, 5), new NodeState(NodeType.DISTRIBUTOR, State.DOWN), wantedStates);
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ setWantedState(new Node(NodeType.DISTRIBUTOR, 2), new NodeState(NodeType.DISTRIBUTOR, State.UP), wantedStates);
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ setWantedState(new Node(NodeType.STORAGE, 9), new NodeState(NodeType.STORAGE, State.DOWN), wantedStates);
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+ }
+
+ // These tests work in isolation but causes other tests to hang
+ @Ignore
+ @Test
+ public void testWantedStateOfUnknownNode() throws Exception {
+ startingTest("DatabaseTest::testWantedStatesOfUnknownNode");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.minRatioOfDistributorNodesUp = 0;
+ options.minRatioOfStorageNodesUp = 0;
+ options.zooKeeperServerAddress = "localhost";
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ // Populate map of wanted states we should have
+ Map<Node, NodeState> wantedStates = new TreeMap<>();
+ for (DummyVdsNode node : nodes) {
+ wantedStates.put(node.getNode(), new NodeState(node.getType(), State.UP));
+ }
+
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ setWantedState(new Node(NodeType.STORAGE, 1), new NodeState(NodeType.STORAGE, State.MAINTENANCE).setDescription("Yoo"), wantedStates);
+ waitForState("version:\\d+ distributor:10 storage:10 .1.s:m");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ // This should not show up, as it is down
+ setWantedState(new Node(NodeType.DISTRIBUTOR, 8), new NodeState(NodeType.DISTRIBUTOR, State.DOWN), wantedStates);
+ waitForState("version:\\d+ distributor:10 .8.s:d storage:10 .1.s:m");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ // This should show up, as down nodes can be turned to maintenance
+ setWantedState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.MAINTENANCE).setDescription("foobar"), wantedStates);
+ waitForState("version:\\d+ distributor:10 .8.s:d storage:10 .1.s:m .6.s:m");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ // This should not show up, as we cannot turn a down node retired
+ setWantedState(new Node(NodeType.STORAGE, 7), new NodeState(NodeType.STORAGE, State.RETIRED).setDescription("foobar"), wantedStates);
+ waitForState("version:\\d+ distributor:10 .8.s:d storage:10 .1.s:m .6.s:m .7.s:r");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ // This should not show up, as it is down
+ setWantedState(new Node(NodeType.STORAGE, 8), new NodeState(NodeType.STORAGE, State.DOWN).setDescription("foobar"), wantedStates);
+ waitForState("version:\\d+ distributor:10 .8.s:d storage:10 .1.s:m .6.s:m .7.s:r .8.s:d");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ stopFleetController();
+ for (int i=6; i<nodes.size(); ++i) nodes.get(i).disconnect();
+ startFleetController();
+
+ waitForState("version:\\d+ distributor:3 storage:7 .1.s:m .3.s:d .4.s:d .5.s:d .6.s:m");
+
+ setWantedState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.UP), wantedStates);
+ waitForState("version:\\d+ distributor:3 storage:3 .1.s:m");
+
+ for (int i=6; i<nodes.size(); ++i) nodes.get(i).connect();
+ waitForState("version:\\d+ distributor:10 .8.s:d storage:10 .1.s:m .7.s:r .8.s:d");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java
new file mode 100644
index 00000000000..1adb0dcad7d
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class DistributionBitCountTest extends FleetControllerTest {
+
+ private void setUpSystem(String testName) throws Exception {
+ List<ConfiguredNode> configuredNodes = new ArrayList<>();
+ for (int i = 0 ; i < 10; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.distributionBits = 17;
+ setUpFleetController(false, options);
+ startingTest(testName);
+ List<DummyVdsNode> nodes = setUpVdsNodes(false, new DummyVdsNodeOptions(), true, configuredNodes);
+ for (DummyVdsNode node : nodes) {
+ node.setNodeState(new NodeState(node.getType(), State.UP).setMinUsedBits(20));
+ node.connect();
+ }
+ waitForState("version:\\d+ bits:17 distributor:10 storage:10");
+ }
+
+ /**
+ * Test that then altering config to increased bit count, that a new system state is sent out if the least split storagenode use more bits.
+ * Test that then altering config to increased bit count, that a new system state is not sent out (and not altered) if a storagenode needs it to be no further split.
+ */
+ @Test
+ public void testDistributionBitCountConfigIncrease() throws Exception {
+ setUpSystem("DistributionBitCountTest::testDistributionBitCountConfigIncrease");
+ options.distributionBits = 20;
+ fleetController.updateOptions(options, 0);
+ ClusterState currentState = waitForState("version:\\d+ bits:20 distributor:10 storage:10");
+
+ int version = currentState.getVersion();
+ options.distributionBits = 23;
+ fleetController.updateOptions(options, 0);
+ assertEquals(version, currentState.getVersion());
+ }
+
+ /**
+ * Test that then altering config to decrease bit count, that a new system state is sent out with that bit count.
+ */
+ @Test
+ public void testDistributionBitCountConfigDecrease() throws Exception {
+ setUpSystem("DistributionBitCountTest::testDistributionBitCountConfigDecrease");
+ options.distributionBits = 12;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ bits:12 distributor:10 storage:10");
+ }
+
+
+ /**
+ * Test that when storage node reports higher bit count, but another storage
+ * node has equally low bitcount, the fleetcontroller does nothing.
+ *
+ * Test that when storage node reports higher bit count, but another storage
+ * node now being lowest, the fleetcontroller adjusts to use that bit in system state.
+ */
+ @Test
+ public void testStorageNodeReportingHigherBitCount() throws Exception {
+ setUpSystem("DistributionBitCountTest::testStorageNodeReportingHigherBitCount");
+
+ nodes.get(1).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(11));
+ nodes.get(3).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(11));
+
+ ClusterState startState = waitForState("version:\\d+ bits:11 distributor:10 storage:10");
+ ClusterState state = waitForClusterStateIncludingNodesWithMinUsedBits(11, 2);
+
+ nodes.get(1).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(12));
+ assertEquals(state + "->" + fleetController.getSystemState(), startState.getVersion(), fleetController.getSystemState().getVersion());
+
+ for (int i = 0; i < 10; ++i) {
+ nodes.get(i).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(17));
+ }
+ assertEquals(startState.getVersion() + 1, waitForState("version:\\d+ bits:17 distributor:10 storage:10").getVersion());
+ }
+
+ /**
+ * Test that then storage node report lower bit count, but another storage node with equally low bitcount, the fleetcontroller does nothing.
+ * Test that then storage node report lower bit count, and then becomes the smallest, the fleetcontroller adjusts to use that bit in system state.
+ */
+ @Test
+ public void testStorageNodeReportingLowerBitCount() throws Exception {
+ setUpSystem("DistributionBitCountTest::testStorageNodeReportingLowerBitCount");
+
+ nodes.get(1).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(13));
+ ClusterState currentState = waitForState("version:\\d+ bits:13 distributor:10 storage:10");
+ int version = currentState.getVersion();
+
+ nodes.get(3).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(15));
+ assertEquals(version, currentState.getVersion());
+
+ nodes.get(3).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(13));
+ assertEquals(version, currentState.getVersion());
+
+ nodes.get(3).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(12));
+ waitForState("version:\\d+ bits:12 distributor:10 storage:10");
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java
new file mode 100644
index 00000000000..11b685d3f84
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+public class DummyCommunicator implements Communicator, NodeLookup {
+
+ List<Node> newNodes;
+ Timer timer;
+
+ public class DummyGetNodeStateRequest extends GetNodeStateRequest {
+ Waiter<GetNodeStateRequest> waiter;
+
+ public DummyGetNodeStateRequest(NodeInfo nodeInfo, Waiter<GetNodeStateRequest> waiter) {
+ super(nodeInfo);
+
+ this.waiter = waiter;
+ }
+
+ @Override
+ public void abort() {
+
+ }
+ }
+
+ public class DummySetClusterStateRequest extends SetClusterStateRequest {
+
+ public DummySetClusterStateRequest(NodeInfo nodeInfo, ClusterState state) {
+ super(nodeInfo, state.getVersion());
+ }
+
+ }
+
+ private Map<Node, DummyGetNodeStateRequest> getNodeStateRequests = new TreeMap<>();
+
+ public DummyCommunicator(List<Node> nodeList, Timer timer) {
+ this.newNodes = nodeList;
+ this.timer = timer;
+ }
+
+ @Override
+ public synchronized void getNodeState(NodeInfo node, Waiter<GetNodeStateRequest> waiter) {
+ DummyGetNodeStateRequest req = new DummyGetNodeStateRequest(node, waiter);
+ getNodeStateRequests.put(node.getNode(), req);
+ node.setCurrentNodeStateRequest(req, timer.getCurrentTimeInMillis());
+ notifyAll();
+ }
+
+ public void propagateOptions(final FleetControllerOptions options) {
+
+ }
+
+ public boolean setNodeState(Node node, State state, String description) throws Exception {
+ return setNodeState(node, new NodeState(node.getType(), state).setDescription(description), "");
+ }
+
+ public boolean setNodeState(Node node, NodeState state, String hostInfo) throws Exception {
+ DummyGetNodeStateRequest req = getNodeStateRequests.remove(node);
+
+ if (req == null) {
+ throw new IllegalStateException("Premature set node state - wait for fleet controller to request first: " + node);
+ }
+
+ GetNodeStateRequest.Reply reply = new GetNodeStateRequest.Reply(state.serialize(), hostInfo);
+ req.setReply(reply);
+
+ req.waiter.done(req);
+
+ return true;
+ }
+
+ @Override
+ public void setSystemState(ClusterState state, NodeInfo node, Waiter<SetClusterStateRequest> waiter) {
+ DummySetClusterStateRequest req = new DummySetClusterStateRequest(node, state);
+ node.setSystemStateVersionSent(state);
+ req.setReply(new SetClusterStateRequest.Reply());
+ waiter.done(req);
+ }
+
+ @Override
+ public void shutdown() {
+ }
+
+ @Override
+ public boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener) {
+ if (newNodes != null) {
+ List<Node> tmp = newNodes;
+
+ for (Node node : tmp)
+ cluster.clusterInfo().setRpcAddress(node, "foo");
+
+ for (NodeInfo info : cluster.getNodeInfo()) {
+ if (!tmp.contains(info.getNode())) {
+ info.markRpcAddressOutdated(timer);
+ listener.handleMissingNode(info);
+ }
+ }
+
+ newNodes = null;
+ return true;
+ }
+
+ return false;
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java
new file mode 100644
index 00000000000..ff470bc6b75
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java
@@ -0,0 +1,504 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.StringValue;
+import com.yahoo.jrt.slobrok.api.BackOffPolicy;
+import com.yahoo.jrt.slobrok.api.Register;
+import com.yahoo.jrt.slobrok.api.SlobrokList;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.*;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.*;
+import java.util.logging.Logger;
+
+/**
+ *
+ * Used to fake a node in VDS, such that we can test the fleetcontroller without dummy interface for talking to
+ * VDS nodes.
+ */
+public class DummyVdsNode {
+
+ public static Logger log = Logger.getLogger(DummyVdsNode.class.getName());
+
+ private String slobrokConnectionSpecs[];
+ private String clusterName;
+ private NodeType type;
+ private int index;
+ private NodeState nodeState;
+ private Supervisor supervisor;
+ private Acceptor acceptor;
+ private Register register;
+ private int stateCommunicationVersion;
+ private boolean negotiatedHandle = false;
+ private final Timer timer;
+ private boolean failSetSystemStateRequests = false;
+ private boolean resetTimestampOnReconnect = false;
+ private long startTimestamp;
+ private Map<Node, Long> highestStartTimestamps = new TreeMap<Node, Long>();
+ public int timedOutStateReplies = 0;
+ public int outdatedStateReplies = 0;
+ public int immediateStateReplies = 0;
+ public int setNodeStateReplies = 0;
+ private boolean registeredInSlobrok = false;
+
+ class Req {
+ Request request;
+ long timeout;
+
+ Req(Request r, long timeout) {
+ request = r;
+ this.timeout = timeout;
+ }
+ }
+ class BackOff implements BackOffPolicy {
+ public void reset() {}
+ public double get() { return 0.01; }
+ public boolean shouldWarn(double v) { return false; }
+ }
+ private final List<Req> waitingRequests = new LinkedList<>();
+
+ /**
+ * History of received system states.
+ * Any access to this list or to its members must be synchronized on the timer variable.
+ */
+ private List<ClusterState> systemState = new LinkedList<>();
+
+ private Thread messageResponder = new Thread() {
+ public void run() {
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": starting message reponder thread");
+ while (true) {
+ synchronized (timer) {
+ if (isInterrupted()) break;
+ long currentTime = timer.getCurrentTimeInMillis();
+ for (Iterator<Req> it = waitingRequests.iterator(); it.hasNext(); ) {
+ Req r = it.next();
+ if (r.timeout <= currentTime) {
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": Responding to node state request at time " + currentTime);
+ r.request.returnValues().add(new StringValue(nodeState.serialize()));
+ if (r.request.methodName().equals("getnodestate3")) {
+ r.request.returnValues().add(new StringValue("No host info in dummy implementation"));
+ }
+ r.request.returnRequest();
+ it.remove();
+ ++timedOutStateReplies;
+ }
+ }
+ try{
+ timer.wait(100);
+ } catch (InterruptedException e) {
+ break;
+ }
+ }
+ }
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": shut down message reponder thread");
+ }
+ };
+
+ public DummyVdsNode(Timer timer, DummyVdsNodeOptions options, String slobrokConnectionSpecs[], String clusterName, boolean distributor, int index) throws Exception {
+ this.timer = timer;
+ this.slobrokConnectionSpecs = slobrokConnectionSpecs;
+ this.clusterName = clusterName;
+ type = distributor ? NodeType.DISTRIBUTOR : NodeType.STORAGE;
+ this.index = index;
+ this.nodeState = new NodeState(type, State.UP);
+ this.stateCommunicationVersion = options.stateCommunicationVersion;
+ messageResponder.start();
+ nodeState.setStartTimestamp(timer.getCurrentTimeInMillis() / 1000);
+ }
+
+ public void resetStartTimestamp() {
+ resetTimestampOnReconnect = true;
+ }
+
+ public int getPendingNodeStateCount() { return waitingRequests.size(); }
+
+ public void shutdown() {
+ messageResponder.interrupt();
+ try{ messageResponder.join(); } catch (InterruptedException e) {}
+ disconnect();
+ }
+
+ public int connect() throws ListenFailedException, UnknownHostException {
+ if (resetTimestampOnReconnect) {
+ startTimestamp = timer.getCurrentTimeInMillis() / 1000;
+ nodeState.setStartTimestamp(startTimestamp);
+ resetTimestampOnReconnect = false;
+ }
+ supervisor = new Supervisor(new Transport());
+ addMethods();
+ acceptor = supervisor.listen(new Spec(0));
+ SlobrokList slist = new SlobrokList();
+ slist.setup(slobrokConnectionSpecs);
+ register = new Register(supervisor, slist, new Spec("localhost", acceptor.port()), new BackOff());
+ registerSlobrok();
+ negotiatedHandle = false;
+ return acceptor.port();
+ }
+
+ public boolean isConnected() {
+ return (registeredInSlobrok && supervisor != null);
+ }
+
+ public void registerSlobrok() {
+ register.registerName(getSlobrokName());
+ register.registerName(getSlobrokName() + "/default");
+ registeredInSlobrok = true;
+ }
+
+ public void disconnectSlobrok() {
+ register.unregisterName(getSlobrokName());
+ register.unregisterName(getSlobrokName() + "/default");
+ registeredInSlobrok = false;
+ }
+
+ public void disconnect() { disconnectImmediately(); }
+ public void disconnectImmediately() { disconnect(false, 0, false); }
+ public void disconnectBreakConnection() { disconnect(true, FleetControllerTest.timeoutMS, false); }
+ public void disconnectAsShutdown() { disconnect(true, FleetControllerTest.timeoutMS, true); }
+ public void disconnect(boolean waitForPendingNodeStateRequest, long timeoutms, boolean setStoppingStateFirst) {
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": Breaking connection." + (waitForPendingNodeStateRequest ? " Waiting for pending state first." : ""));
+ if (waitForPendingNodeStateRequest) {
+ this.waitForPendingGetNodeStateRequest(timeoutms);
+ }
+ if (setStoppingStateFirst) {
+ NodeState newState = nodeState.clone();
+ newState.setState(State.STOPPING);
+ // newState.setDescription("Received signal 15 (SIGTERM - Termination signal)");
+ // Altered in storageserver implementation. Updating now to fit
+ newState.setDescription("controlled shutdown");
+ setNodeState(newState);
+ // Sleep a bit in hopes of answer being written before shutting down socket
+ try{ Thread.sleep(100); } catch (InterruptedException e) {}
+ }
+ if (supervisor == null) return;
+ register.shutdown();
+ acceptor.shutdown().join();
+ supervisor.transport().shutdown().join();
+ supervisor = null;
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": Done breaking connection.");
+ }
+
+ public String toString() {
+ return type + "." + index;
+ }
+
+ public boolean isDistributor() { return type.equals(NodeType.DISTRIBUTOR); }
+ public NodeType getType() { return type; }
+
+ public Node getNode() {
+ return new Node(type, index);
+ }
+
+ public int getStateCommunicationVersion() { return stateCommunicationVersion; }
+
+ public void waitForSystemStateVersion(int version, long timeout) {
+ try {
+ long startTime = System.currentTimeMillis();
+ while (getLatestSystemStateVersion().orElse(-1) < version) {
+ if ( (System.currentTimeMillis() - startTime) > timeout)
+ throw new RuntimeException("Timed out waiting for state version " + version + " in " + this);
+ Thread.sleep(10);
+ }
+ }
+ catch (InterruptedException e) {
+ }
+ }
+
+ /** Returns the latest system state version received, or empty if none are received yet. */
+ private Optional<Integer> getLatestSystemStateVersion() {
+ synchronized(timer) {
+ if (systemState.isEmpty()) return Optional.empty();
+ return Optional.of(systemState.get(0).getVersion());
+ }
+ }
+
+ public boolean hasPendingGetNodeStateRequest() {
+ synchronized (timer) {
+ return !waitingRequests.isEmpty();
+ }
+ }
+
+ public void waitForPendingGetNodeStateRequest(long timeout) {
+ long startTime = System.currentTimeMillis();
+ long endTime = startTime + timeout;
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " waiting for pending node state request.");
+ while (true) {
+ synchronized(timer) {
+ if (!waitingRequests.isEmpty()) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " has pending request, returning.");
+ return;
+ }
+ try{
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " waiting " + (endTime - startTime) + " ms for pending request.");
+ timer.wait(endTime - startTime);
+ } catch (InterruptedException e) {
+ }
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " woke up to recheck.");
+ }
+ startTime = System.currentTimeMillis();
+ if (startTime >= endTime) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " timeout passed. Don't have pending request.");
+ if (!waitingRequests.isEmpty()) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ". Non-empty set of waiting requests");
+ }
+ throw new IllegalStateException("Timeout. No pending get node state request pending after waiting " + timeout + " milliseconds.");
+ }
+ }
+ }
+
+ public void replyToPendingNodeStateRequests() {
+ for(Req req : waitingRequests) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " answering pending node state request.");
+ req.request.returnValues().add(new StringValue(nodeState.serialize()));
+ if (req.request.methodName().equals("getnodestate3")) {
+ req.request.returnValues().add(new StringValue("Dummy node host info"));
+ }
+ req.request.returnRequest();
+ ++setNodeStateReplies;
+ }
+ waitingRequests.clear();
+ }
+
+ public void setNodeState(NodeState state) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " got new state: " + state);
+ synchronized(timer) {
+ this.nodeState = state;
+ replyToPendingNodeStateRequests();
+ }
+ }
+
+ public void setNodeState(State state) {
+ setNodeState(new NodeState(type, state));
+ }
+
+ public NodeState getNodeState() {
+ synchronized(timer) {
+ return nodeState;
+ }
+ }
+
+ public List<ClusterState> getSystemStatesReceived() {
+ List<ClusterState> states = new ArrayList<>();
+ synchronized(timer) {
+ states.addAll(systemState);
+ }
+ return states;
+ }
+
+ public ClusterState getClusterState() {
+ synchronized(timer) {
+ return (systemState.isEmpty() ? null : systemState.get(0));
+ }
+ }
+
+ public String getSlobrokName() {
+ return "storage/cluster." + clusterName + "/" + type + "/" + index;
+ }
+
+ private void addMethods() {
+ Method m;
+
+ m = new Method("vespa.storage.connect", "s", "i", this, "rpc_storageConnect");
+ m.methodDesc("Binds connection to a storage API handle");
+ m.paramDesc(0, "somearg", "Argument looking like slobrok address of the ones we're asking for some reason");
+ m.returnDesc(0, "returnCode", "Returncode of request. Should be 0 = OK");
+ supervisor.addMethod(m);
+
+ m = new Method("getnodestate", "", "issi", this, "rpc_getNodeState");
+ m.methodDesc("Get nodeState of a node");
+ m.returnDesc(0, "returnCode", "Returncode of request. Should be 1 = OK");
+ m.returnDesc(1, "returnMessage", "Textual error message if returncode is not ok.");
+ m.returnDesc(2, "nodeState", "The node state of the given node");
+ m.returnDesc(3, "progress", "Progress in percent of node initialization");
+ supervisor.addMethod(m);
+
+ m = new Method("setsystemstate", "s", "is", this, "rpc_setSystemState");
+ m.methodDesc("Set system state of entire system");
+ m.paramDesc(0, "systemState", "new systemstate");
+ m.returnDesc(0, "returnCode", "Returncode of request. Should be 1 = OK");
+ m.returnDesc(1, "returnMessage", "Textual error message if returncode is not ok.");
+ supervisor.addMethod(m);
+
+ if (stateCommunicationVersion > 0) {
+ m = new Method("getnodestate2", "si", "s", this, "rpc_getNodeState2");
+ m.methodDesc("Get nodeState of a node, answer when state changes from given state.");
+ m.paramDesc(0, "nodeStateIn", "The node state of the given node");
+ m.paramDesc(1, "timeout", "Time timeout in milliseconds set by the state requester.");
+ m.returnDesc(0, "nodeStateOut", "The node state of the given node");
+ supervisor.addMethod(m);
+
+ m = new Method("setsystemstate2", "s", "", this, "rpc_setSystemState2");
+ m.methodDesc("Set system state of entire system");
+ m.paramDesc(0, "systemState", "new systemstate");
+ supervisor.addMethod(m);
+
+ if (stateCommunicationVersion > 1) {
+ m = new Method("getnodestate3", "sii", "ss", this, "rpc_getNodeState2");
+ m.methodDesc("Get nodeState of a node, answer when state changes from given state.");
+ m.paramDesc(0, "nodeStateIn", "The node state of the given node");
+ m.paramDesc(1, "timeout", "Time timeout in milliseconds set by the state requester.");
+ m.returnDesc(0, "nodeStateOut", "The node state of the given node");
+ m.returnDesc(1, "hostinfo", "Information on the host node is running on");
+ supervisor.addMethod(m);
+ }
+ }
+ }
+
+ public void rpc_storageConnect(Request req) {
+ synchronized(timer) {
+ log.log(LogLevel.SPAM, "Dummy node " + this + " got old type handle connect message.");
+ req.returnValues().add(new Int32Value(0));
+ negotiatedHandle = true;
+ }
+ }
+
+ public void rpc_getNodeState(Request req) {
+ synchronized(timer) {
+ if (!negotiatedHandle) {
+ req.setError(75000, "Connection not bound to a handle");
+ return;
+ }
+ String stateString = nodeState.serialize(-1, true);
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " got old type get node state request, answering: " + stateString);
+ req.returnValues().add(new Int32Value(1));
+ req.returnValues().add(new StringValue(""));
+ req.returnValues().add(new StringValue(stateString));
+ req.returnValues().add(new Int32Value(0));
+ }
+ }
+
+ public boolean sendGetNodeStateReply(int index) {
+ for (Iterator<Req> it = waitingRequests.iterator(); it.hasNext(); ) {
+ Req r = it.next();
+ if (r.request.parameters().size() > 2 && r.request.parameters().get(2).asInt32() == index) {
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": Responding to node state reply from controller " + index + " as we received new one");
+ r.request.returnValues().add(new StringValue(nodeState.serialize()));
+ r.request.returnValues().add(new StringValue("No host info from dummy implementation"));
+ r.request.returnRequest();
+ it.remove();
+ ++outdatedStateReplies;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public void rpc_getNodeState2(Request req) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ": Got " + req.methodName() + " request");
+ try{
+ String oldState = req.parameters().get(0).asString();
+ int timeout = req.parameters().get(1).asInt32();
+ int index = -1;
+ if (req.parameters().size() > 2) {
+ index = req.parameters().get(2).asInt32();
+ }
+ synchronized(timer) {
+ boolean sentReply = sendGetNodeStateReply(index);
+ NodeState givenState = (oldState.equals("unknown") ? null : NodeState.deserialize(type, oldState));
+ if (givenState != null && (givenState.equals(nodeState) || sentReply)) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ": Has same state as reported " + givenState + ". Queing request. Timeout is " + timeout + " ms. "
+ + "Will be answered at time " + (timer.getCurrentTimeInMillis() + timeout * 800l / 1000));
+ req.detach();
+ waitingRequests.add(new Req(req, timer.getCurrentTimeInMillis() + timeout * 800l / 1000));
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " has now " + waitingRequests.size() + " entries and is " + (waitingRequests.isEmpty() ? "empty" : "not empty"));
+ timer.notifyAll();
+ } else {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ": Request had " + (givenState == null ? "no state" : "different state(" + givenState +")") + ". Answering with " + nodeState);
+ req.returnValues().add(new StringValue(nodeState.serialize()));
+ if (req.methodName().equals("getnodestate3")) {
+ req.returnValues().add(new StringValue("Dummy node host info"));
+ }
+ ++immediateStateReplies;
+ }
+ }
+ } catch (Exception e) {
+ log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occured when answering " + req.methodName() + " request: " + e.getMessage());
+ e.printStackTrace(System.err);
+ req.setError(ErrorCode.METHOD_FAILED, e.getMessage());
+ }
+ }
+
+ public long getStartTimestamp(Node n) {
+ Long ts = highestStartTimestamps.get(n);
+ return (ts == null ? 0 : ts);
+ }
+
+ private void updateStartTimestamps(ClusterState state) {
+ for(int i=0; i<2; ++i) {
+ NodeType nodeType = (i == 0 ? NodeType.DISTRIBUTOR : NodeType.STORAGE);
+ for (int j=0, n=state.getNodeCount(nodeType); j<n; ++j) {
+ Node node = new Node(nodeType, j);
+ NodeState ns = state.getNodeState(node);
+ if (ns.getStartTimestamp() != 0) {
+ Long oldValue = highestStartTimestamps.get(node);
+ if (oldValue != null && oldValue > ns.getStartTimestamp()) {
+ throw new Error("Somehow start timestamp of node " + node + " has gone down");
+ }
+ highestStartTimestamps.put(node, ns.getStartTimestamp());
+ }
+ }
+ }
+ }
+
+ public void failSetSystemState(boolean failSystemStateRequests) {
+ synchronized (timer) {
+ this.failSetSystemStateRequests = failSystemStateRequests;
+ }
+ }
+
+ private boolean shouldFailSetSystemStateRequests() {
+ synchronized (timer) {
+ return failSetSystemStateRequests;
+ }
+ }
+
+ public void rpc_setSystemState(Request req) {
+ try{
+ if (shouldFailSetSystemStateRequests()) {
+ req.setError(ErrorCode.GENERAL_ERROR, "Dummy node configured to fail setSystemState() calls");
+ return;
+ }
+ if (!negotiatedHandle) {
+ req.setError(75000, "Connection not bound to a handle");
+ return;
+ }
+ ClusterState newState = new ClusterState(req.parameters().get(0).asString());
+ synchronized(timer) {
+ updateStartTimestamps(newState);
+ systemState.add(0, newState);
+ timer.notifyAll();
+ }
+ req.returnValues().add(new Int32Value(1));
+ req.returnValues().add(new StringValue("OK"));
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ": Got new system state (through old setsystemstate call) " + newState);
+ } catch (Exception e) {
+ log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occured when answering setsystemstate request: " + e.getMessage());
+ e.printStackTrace(System.err);
+ req.returnValues().add(new Int32Value(ErrorCode.METHOD_FAILED));
+ req.returnValues().add(new StringValue(e.getMessage()));
+ }
+ }
+
+ public void rpc_setSystemState2(Request req) {
+ try{
+ if (shouldFailSetSystemStateRequests()) {
+ req.setError(ErrorCode.GENERAL_ERROR, "Dummy node configured to fail setSystemState2() calls");
+ return;
+ }
+ ClusterState newState = new ClusterState(req.parameters().get(0).asString());
+ synchronized(timer) {
+ updateStartTimestamps(newState);
+ systemState.add(0, newState);
+ timer.notifyAll();
+ }
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ": Got new system state " + newState);
+ } catch (Exception e) {
+ log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occured when answering setsystemstate request: " + e.getMessage());
+ e.printStackTrace(System.err);
+ req.setError(ErrorCode.METHOD_FAILED, e.getMessage());
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java
new file mode 100644
index 00000000000..5436e1f62c7
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+/**
+ *
+ */
+public class DummyVdsNodeOptions {
+ public int stateCommunicationVersion = 2; // 0 - 4.1, 1 - 4.2-5.0.10, 2 - 5.0.11+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventLogTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventLogTest.java
new file mode 100644
index 00000000000..65950136675
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventLogTest.java
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.utils.util.MetricReporter;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.runners.MockitoJUnitRunner;
+
+import java.util.logging.Level;
+
+import static org.junit.Assert.assertNotEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class EventLogTest {
+ private MetricUpdater metricUpdater = mock(MetricUpdater.class);
+ private NodeEvent nodeEvent = mock(NodeEvent.class);
+
+ private EventLog eventLog;
+
+ private void initialize(MetricUpdater updater) {
+ eventLog = new EventLog(new FakeTimer(), updater);
+
+ // Avoid NullPointerException...
+ NodeInfo nodeInfo = mock(NodeInfo.class);
+ when(nodeEvent.getNode()).thenReturn(nodeInfo);
+ Node node = mock(Node.class);
+ when(nodeInfo.getNode()).thenReturn(node);
+ }
+
+ @Test
+ public void testMetric() {
+ initialize(metricUpdater);
+
+ eventLog.addNodeOnlyEvent(nodeEvent, Level.INFO);
+
+ verify(metricUpdater).recordNewNodeEvent();
+ verifyNoMoreInteractions(metricUpdater);
+ }
+
+ @Test
+ public void testNullMetricReporter() {
+ initialize(null);
+
+ eventLog.addNodeOnlyEvent(nodeEvent, Level.INFO);
+
+ verifyNoMoreInteractions(metricUpdater);
+ }
+
+ @Test
+ public void testNoEventsDoNotThrowException() {
+ initialize(metricUpdater);
+ StringBuilder builder = new StringBuilder();
+ Node nonExistantNode = new Node(NodeType.DISTRIBUTOR, 0);
+ eventLog.writeHtmlState(builder, nonExistantNode);
+ assertNotEquals("", builder.toString());
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeTimer.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeTimer.java
new file mode 100644
index 00000000000..26c14d3f82a
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeTimer.java
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.clustercontroller.core.testutils.LogFormatter;
+
+import java.util.logging.Logger;
+
+/**
+ * FakeTimer
+ *
+ * Used to fake timing for unit test purposes.
+ */
+public class FakeTimer implements Timer {
+ private static Logger log = Logger.getLogger(FakeTimer.class.getName());
+ // Don't start at zero. Clock users may initialize a 'last run' entry with 0, and we want first time to always look like a timeout
+ long currentTime = (long)30 * 365 * 24 * 60 * 60 * 1000;
+
+ public synchronized long getCurrentTimeInMillis() {
+ return currentTime;
+ }
+
+ public synchronized void advanceTime(long time) {
+ long currentTime = getCurrentTimeInMillis();
+ this.currentTime += time;
+ log.log(LogLevel.DEBUG, "Time advanced by " + time + " ms. Time increased from " + currentTime + " to " + (currentTime + time));
+ notifyAll();
+ }
+
+ static {
+ LogFormatter.initializeLogging();
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
new file mode 100644
index 00000000000..86248d2e1e3
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
@@ -0,0 +1,555 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.slobrok.api.BackOffPolicy;
+import com.yahoo.jrt.slobrok.server.Slobrok;
+import com.yahoo.log.LogLevel;
+import com.yahoo.log.LogSetup;
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator;
+import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer;
+import com.yahoo.vespa.clustercontroller.core.rpc.SlobrokClient;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServerInterface;
+import com.yahoo.vespa.clustercontroller.core.testutils.WaitCondition;
+import com.yahoo.vespa.clustercontroller.core.testutils.WaitTask;
+import com.yahoo.vespa.clustercontroller.core.testutils.Waiter;
+import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter;
+import org.junit.After;
+import org.junit.Rule;
+import org.junit.rules.TestRule;
+import org.junit.rules.TestWatcher;
+import org.junit.runner.Description;
+
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.*;
+import java.util.logging.Logger;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * @author humbe
+ */
+public abstract class FleetControllerTest implements Waiter {
+
+ private static Logger log = Logger.getLogger(FleetControllerTest.class.getName());
+ protected static final int DEFAULT_NODE_COUNT = 10;
+
+ protected FakeTimer timer = new FakeTimer();
+ protected boolean usingFakeTimer = false;
+ protected Slobrok slobrok;
+ protected FleetControllerOptions options;
+ protected ZooKeeperTestServer zooKeeperServer;
+ protected FleetController fleetController;
+ protected List<DummyVdsNode> nodes = new ArrayList<>();
+ protected String testName;
+
+ public final static int timeoutS;
+ public final static int timeoutMS;
+ private final Waiter waiter = new Waiter.Impl(new DataRetriever() {
+ @Override
+ public Object getMonitor() { return timer; }
+ @Override
+ public FleetController getFleetController() { return fleetController; }
+ @Override
+ public List<DummyVdsNode> getDummyNodes() { return nodes; }
+ @Override
+ public int getTimeoutMS() { return timeoutMS; }
+ });
+
+ static {
+ LogSetup.initVespaLogging("fleetcontroller");
+ timeoutS = 120;
+ timeoutMS = timeoutS * 1000;
+ }
+
+ class BackOff implements BackOffPolicy {
+ private int counter = 0;
+ public void reset() { counter = 0; }
+ public double get() { ++counter; return 0.01; }
+ public boolean shouldWarn(double v) { return ((counter % 1000) == 10); }
+ }
+
+ protected class CleanupZookeeperLogsOnSuccess extends TestWatcher {
+ @Override
+ protected void failed(Throwable e, Description description) {
+ System.err.println("TEST FAILED - NOT cleaning up zookeeper directory");
+ shutdownZooKeeper(false);
+ }
+
+ @Override
+ protected void succeeded(Description description) {
+ System.err.println("TEST SUCCEEDED - cleaning up zookeeper directory");
+ shutdownZooKeeper(true);
+ }
+
+ private void shutdownZooKeeper(boolean cleanupZooKeeperDir) {
+ if (zooKeeperServer != null) {
+ zooKeeperServer.shutdown(cleanupZooKeeperDir);
+ zooKeeperServer = null;
+ }
+ }
+ }
+
+ @Rule
+ public TestRule cleanupZookeeperLogsOnSuccess = new CleanupZookeeperLogsOnSuccess();
+
+ protected void startingTest(String name) {
+ System.err.println("STARTING TEST: " + name);
+ testName = name;
+ }
+
+ protected void setUpSystem(boolean useFakeTimer, FleetControllerOptions options) throws Exception {
+ log.log(LogLevel.DEBUG, "Setting up system");
+ slobrok = new Slobrok();
+ this.options = options;
+ if (options.zooKeeperServerAddress != null) {
+ zooKeeperServer = new ZooKeeperTestServer();
+ this.options.zooKeeperServerAddress = zooKeeperServer.getAddress();
+ log.log(LogLevel.DEBUG, "Set up new zookeeper server at " + this.options.zooKeeperServerAddress);
+ }
+ this.options.slobrokConnectionSpecs = new String[1];
+ this.options.slobrokConnectionSpecs[0] = "tcp/localhost:" + slobrok.port();
+ this.usingFakeTimer = useFakeTimer;
+ }
+
+ protected FleetController createFleetController(boolean useFakeTimer, FleetControllerOptions options, boolean startThread, StatusPageServerInterface status) throws Exception {
+ Timer timer = useFakeTimer ? this.timer : new RealTimer();
+ MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex);
+ EventLog log = new EventLog(timer, metricUpdater);
+ ContentCluster cluster = new ContentCluster(
+ options.clusterName,
+ options.nodes,
+ options.storageDistribution,
+ options.minStorageNodesUp,
+ options.minRatioOfStorageNodesUp);
+ NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, log);
+ Communicator communicator = new RPCCommunicator(
+ timer,
+ options.fleetControllerIndex,
+ options.nodeStateRequestTimeoutMS,
+ options.nodeStateRequestTimeoutEarliestPercentage,
+ options.nodeStateRequestTimeoutLatestPercentage,
+ options.nodeStateRequestRoundTripTimeMaxSeconds);
+ SlobrokClient lookUp = new SlobrokClient(timer);
+ lookUp.setSlobrokConnectionSpecs(new String[0]);
+ if (status == null) {
+ status = new StatusPageServer(timer, timer, options.httpPort);
+ }
+ RpcServer rpcServer = new RpcServer(timer, timer, options.clusterName, options.fleetControllerIndex, options.slobrokBackOffPolicy);
+ DatabaseHandler database = new DatabaseHandler(timer, options.zooKeeperServerAddress, options.fleetControllerIndex, timer);
+ SystemStateGenerator stateGenerator = new SystemStateGenerator(timer, log, metricUpdater);
+ SystemStateBroadcaster stateBroadcaster = new SystemStateBroadcaster(timer, timer);
+ MasterElectionHandler masterElectionHandler = new MasterElectionHandler(options.fleetControllerIndex, options.fleetControllerCount, timer, timer);
+ FleetController controller = new FleetController(timer, log, cluster, stateGatherer, communicator, status, rpcServer, lookUp, database, stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options);
+ if (startThread) {
+ controller.start();
+ }
+ return controller;
+ }
+
+ protected void setUpFleetController(boolean useFakeTimer, FleetControllerOptions options) throws Exception {
+ setUpFleetController(useFakeTimer, options, true);
+ }
+
+ protected void setUpFleetController(boolean useFakeTimer, FleetControllerOptions options, boolean startThread) throws Exception {
+ setUpFleetController(useFakeTimer, options, startThread, null);
+ }
+ protected void setUpFleetController(boolean useFakeTimer, FleetControllerOptions options, boolean startThread, StatusPageServerInterface status) throws Exception {
+ if (slobrok == null) setUpSystem(useFakeTimer, options);
+ if (fleetController == null) {
+ fleetController = createFleetController(useFakeTimer, options, startThread, status);
+ } else {
+ throw new Exception("called setUpFleetcontroller but it was already setup");
+ }
+ }
+
+ protected void stopFleetController() throws Exception {
+ if (fleetController != null) {
+ fleetController.shutdown();
+ fleetController = null;
+ }
+ }
+ protected void startFleetController() throws Exception {
+ if (fleetController == null) {
+ fleetController = createFleetController(usingFakeTimer, options, true, null);
+ } else {
+ log.log(LogLevel.WARNING, "already started fleetcontroller, not starting another");
+ }
+ }
+
+ protected void setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options) throws Exception {
+ setUpVdsNodes(useFakeTimer, options, false);
+ }
+ protected void setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options, boolean startDisconnected) throws Exception {
+ setUpVdsNodes(useFakeTimer, options, startDisconnected, DEFAULT_NODE_COUNT);
+ }
+ protected void setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options, boolean startDisconnected, int nodeCount) throws Exception {
+ TreeSet<Integer> nodeIndexes = new TreeSet<>();
+ for (int i = 0; i < nodeCount; ++i)
+ nodeIndexes.add(this.nodes.size()/2 + i); // divide by 2 because there are 2 nodes (storage and distributor) per index
+ setUpVdsNodes(useFakeTimer, options, startDisconnected, nodeIndexes);
+ }
+ protected void setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options, boolean startDisconnected, Set<Integer> nodeIndexes) throws Exception {
+ String connectionSpecs[] = new String[1];
+ connectionSpecs[0] = "tcp/localhost:" + slobrok.port();
+ for (int nodeIndex : nodeIndexes) {
+ nodes.add(new DummyVdsNode(useFakeTimer ? timer : new RealTimer(), options, connectionSpecs, this.options.clusterName, true, nodeIndex));
+ if ( ! startDisconnected) nodes.get(nodes.size() - 1).connect();
+ nodes.add(new DummyVdsNode(useFakeTimer ? timer : new RealTimer(), options, connectionSpecs, this.options.clusterName, false, nodeIndex));
+ if ( ! startDisconnected) nodes.get(nodes.size() - 1).connect();
+ }
+ }
+ // TODO: Replace all usages of the above setUp methods with this one, and remove the nodes field
+
+ /**
+ * Creates dummy vds nodes for the list of configured nodes and returns them.
+ * As two dummy nodes are created for each configured node - one distributor and one storage node -
+ * the returned list is twice as large as configuredNodes.
+ */
+ protected List<DummyVdsNode> setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options, boolean startDisconnected, List<ConfiguredNode> configuredNodes) throws Exception {
+ String connectionSpecs[] = new String[1];
+ connectionSpecs[0] = "tcp/localhost:" + slobrok.port();
+ nodes = new ArrayList<>();
+ final boolean distributor = true;
+ for (ConfiguredNode configuredNode : configuredNodes) {
+ nodes.add(new DummyVdsNode(useFakeTimer ? timer : new RealTimer(), options, connectionSpecs, this.options.clusterName, distributor, configuredNode.index()));
+ if ( ! startDisconnected) nodes.get(nodes.size() - 1).connect();
+ nodes.add(new DummyVdsNode(useFakeTimer ? timer : new RealTimer(), options, connectionSpecs, this.options.clusterName, !distributor, configuredNode.index()));
+ if ( ! startDisconnected) nodes.get(nodes.size() - 1).connect();
+ }
+ return nodes;
+ }
+
+ public interface NodeModifier {
+ void modify(NodeInfo node);
+ }
+
+ NodeModifier makeDefaultTestNodeModifier() {
+ return new NodeModifier() {
+ @Override
+ public void modify(NodeInfo node) {
+ if (node.isDistributor()) {
+ if (node.getNodeIndex() == 13) {
+ node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes + 2);
+ }
+ return;
+ }
+ double latency = 75;
+ long count = 1000;
+ if (node.getNodeIndex() == 4) {
+ latency = 300;
+ count = 500;
+ } else if (node.getNodeIndex() == 7) {
+ latency = 120;
+ count = 800;
+ } else if (node.getNodeIndex() == 21) {
+ latency = 2000;
+ count = 600;
+ } else if (node.getNodeIndex() == 25) {
+ node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes + 1);
+ } else if (node.getNodeIndex() == 26) {
+ node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes);
+ }
+ String hostInfoString = generateHostInfo(latency, count);
+ node.setHostInfo(HostInfo.createHostInfo(hostInfoString));
+ }
+ };
+ }
+
+ NodeModifier makeStdDevTestNodeModifier() {
+ return new NodeModifier() {
+ double[] latencies = new double[] { 30, 300, 60, 270 };
+ int counter = 0;
+
+ @Override
+ public void modify(NodeInfo node) {
+ if (node.isDistributor()) {
+ return;
+ }
+ String hostInfo = generateHostInfo(latencies[counter++ % latencies.length], 1500);
+ node.setHostInfo(HostInfo.createHostInfo(hostInfo));
+ }
+ };
+ }
+
+ protected void setUpSlowDiskCluster(NodeModifier callback) throws Exception {
+ int nodeCount = 31;
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ // TODO: multiple groups!
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, nodeCount)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeCount);
+ waitForStableSystem(nodeCount);
+ // Set one node as not being up. It should not contribute to the overall
+ // latency or operation metrics, nor should its disks be included.
+ nodes.get(2*13).disconnectAsShutdown();
+ nodes.get(2*21+1).disconnectAsShutdown();
+ waiter.waitForState("version:\\d+ distributor:31 .13.s:d storage:31 .21.s:m");
+
+ for (NodeInfo node : fleetController.getCluster().getNodeInfo()) {
+ callback.modify(node);
+ }
+ }
+
+ protected void setUpSimpleCluster(int nodeCount) throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ // TODO: multiple groups!
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, nodeCount)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeCount);
+ waitForStableSystem(nodeCount);
+ waiter.waitForState("version:\\d+ distributor:" + nodeCount + " storage:" + nodeCount);
+
+ NodeModifier callback = makeDefaultTestNodeModifier();
+ for (NodeInfo node : fleetController.getCluster().getNodeInfo()) {
+ callback.modify(node);
+ }
+ }
+
+ protected void tearDownSystem() throws Exception {
+ if (testName != null) {
+ //log.log(LogLevel.INFO, "STOPPING TEST " + testName);
+ System.err.println("STOPPING TEST " + testName);
+ testName = null;
+ }
+ if (fleetController != null) {
+ fleetController.shutdown();
+ fleetController = null;
+ }
+ if (nodes != null) for (DummyVdsNode node : nodes) {
+ node.shutdown();
+ nodes = null;
+ }
+ if (slobrok != null) {
+ slobrok.stop();
+ slobrok = null;
+ }
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ tearDownSystem();
+ }
+
+ public ClusterState waitForStableSystem() throws Exception { return waiter.waitForStableSystem(); }
+ public ClusterState waitForStableSystem(int nodeCount) throws Exception { return waiter.waitForStableSystem(nodeCount); }
+ public ClusterState waitForState(String state) throws Exception { return waiter.waitForState(state); }
+ public ClusterState waitForState(String state, int timeoutMS) throws Exception { return waiter.waitForState(state, timeoutMS); }
+ public ClusterState waitForInitProgressPassed(Node n, double progress) { return waiter.waitForInitProgressPassed(n, progress); }
+ public ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount) { return waiter.waitForClusterStateIncludingNodesWithMinUsedBits(bitcount, nodecount); }
+
+ protected void waitForNodeStateReported(int nodeIndex, NodeState state, int ms) {
+ long timeoutAtTime = System.currentTimeMillis() + ms;
+ while (true) {
+ Node node = nodes.get(nodeIndex).getNode();
+ NodeState ns = fleetController.getReportedNodeState(node);
+ if ((ns == null && state == null) || (ns != null && state != null && ns.equals(state))) break;
+ if (System.currentTimeMillis() > timeoutAtTime) {
+ throw new IllegalStateException("Failed to find " + node + " in nodestate " + state + " before timeout of " + ms + " milliseconds.");
+ }
+ }
+ }
+
+ public void wait(WaitCondition c, WaitTask wt, int timeoutMS) {
+ waiter.wait(c, wt, timeoutMS);
+ }
+
+ public void waitForCompleteCycle() {
+ fleetController.waitForCompleteCycle(timeoutMS);
+ }
+
+ protected void verifyNodeEvents(Node n, String exp) {
+ verifyNodeEvents(n, exp, null);
+ }
+
+ private class ExpectLine {
+ Pattern regex;
+ int matchedCount = 0;
+ int minCount = 1;
+ int maxCount = 1;
+ boolean repeatable() { return (maxCount == 0 || maxCount > matchedCount); }
+ boolean optional() { return (matchedCount >= minCount); }
+
+ boolean matches(String event) {
+ if (event == null) return false;
+ boolean m = regex.matcher(event).matches();
+ if (m) ++matchedCount;
+ return m;
+ }
+
+ ExpectLine(String pattern) {
+ if (pattern.charAt(0) == '?') {
+ pattern = pattern.substring(1);
+ minCount = 0;
+ } else if (pattern.charAt(0) == '*') {
+ pattern = pattern.substring(1);
+ minCount = 0;
+ maxCount = 0;
+ } else if (pattern.charAt(0) == '+') {
+ pattern = pattern.substring(1);
+ maxCount = 0;
+ }
+ regex = Pattern.compile(pattern);
+ }
+
+ public String toString() {
+ return "{"+minCount+","+maxCount+"}("+matchedCount+") " + regex;
+ }
+ }
+
+ /**
+ * Verifies that node event list is equal to some expected value.
+ * The format of the expected values is as follows:
+ * <ul>
+ * <li>Each line in the exp string specifies a pattern to match one or more events.
+ * <li>A line starting with ? * or + means that the line can match 0 or 1, 0 or more or 1 or more respectively.
+ * <li>The rest of the line is a regular expression.
+ * </ul>
+ */
+ protected void verifyNodeEvents(Node n, String exp, String ignoreRegex) {
+ Pattern ignorePattern = (ignoreRegex == null ? null : Pattern.compile(ignoreRegex));
+ List<NodeEvent> events = fleetController.getNodeEvents(n);
+ String[] expectLines = exp.split("\n");
+ List<ExpectLine> expected = new ArrayList<ExpectLine>();
+ for (String line : expectLines) {
+ expected.add(new ExpectLine(line));
+ }
+
+ boolean mismatch = false;
+ StringBuilder eventLog = new StringBuilder();
+ StringBuilder errors = new StringBuilder();
+
+ int gotno = 0;
+ int expno = 0;
+
+ while (gotno < events.size() || expno < expected.size()) {
+ String eventLine = null;
+ if (gotno < events.size()) {
+ NodeEvent e = events.get(gotno);
+ eventLine = e.toString();
+ }
+
+ if (ignorePattern != null && ignorePattern.matcher(eventLine).matches()) {
+ ++gotno;
+ continue;
+ }
+
+ ExpectLine pattern = null;
+ if (expno < expected.size()) {
+ pattern = expected.get(expno);
+ }
+ eventLog.append(eventLine).append("\n");
+
+ if (pattern == null) {
+ errors.append("Exhausted expected list before matching event " + gotno
+ + ": '" + eventLine + "'.");
+ mismatch = true;
+ break;
+ }
+
+ if (pattern.matches(eventLine)) {
+ if (! pattern.repeatable()) {
+ ++expno;
+ }
+ ++gotno;
+ } else {
+ if (pattern.optional()) {
+ ++expno;
+ } else {
+ errors.append("Event " + gotno + ": '" + eventLine
+ + "' did not match regex " + expno + ": " + pattern);
+ mismatch = true;
+ break;
+ }
+ }
+ }
+ if (!mismatch && expno < expected.size()) {
+ errors.append("Too few entries in event log (only matched "
+ + expno + " of " + expected.size() + ")");
+ mismatch = true;
+ }
+ if (mismatch) {
+ StringBuilder eventsGotten = new StringBuilder();
+ for (Event e : events) {
+ String eventLine = e.toString();
+ if (ignorePattern != null && ignorePattern.matcher(eventLine).matches()) {
+ continue;
+ }
+ eventsGotten.append(eventLine).append("\n");
+ }
+ errors.append("\nExpected events matching:\n" + exp + "\n");
+ errors.append("but got the following events:\n" + eventsGotten.toString());
+ fail(errors.toString());
+ }
+ }
+
+ protected String generateHostInfo(double averagePutLatency, long operationCount) {
+ return ("{\n" +
+ " \"metrics\":\n" +
+ " {\n" +
+ " \"snapshot\":\n" +
+ " {\n" +
+ " \"from\":1335527020,\n" +
+ " \"to\":1335527320\n" +
+ " },\n" +
+ " \"values\":\n" +
+ " [\n" +
+ " {\n" +
+ " \"name\":\"vds.filestor.disk_0.allthreads.put.sum.latency\",\n" +
+ " \"values\":\n" +
+ " {\n" +
+ " \"average\":" + averagePutLatency + ",\n" +
+ " \"rate\":123.00000\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"vds.filestor.disk_0.allthreads.operations\",\n" +
+ " \"values\":\n" +
+ " {\n" +
+ " \"count\":" + operationCount + ",\n" +
+ " \"rate\":3.266666\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ "}\n");
+ }
+
+ protected String readFile(String filename) throws IOException {
+ FileInputStream stream = new FileInputStream(new File(filename));
+ ByteArrayOutputStream output = new ByteArrayOutputStream();
+ try {
+ byte [] buf = new byte[4096];
+ while (true) {
+ int read = stream.read(buf);
+ if (read<=0) {
+ break;
+ }
+ output.write(buf, 0, read);
+ }
+ output.close();
+ return output.toString();
+ } finally {
+ stream.close();
+ }
+ }
+
+ public static Set<ConfiguredNode> toNodes(Integer ... indexes) {
+ return Arrays.asList(indexes).stream()
+ .map(i -> new ConfiguredNode(i, false))
+ .collect(Collectors.toSet());
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/LeafGroupsTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/LeafGroupsTest.java
new file mode 100644
index 00000000000..ae1f10eb61f
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/LeafGroupsTest.java
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.Group;
+import org.junit.Test;
+
+import java.util.List;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+
+public class LeafGroupsTest {
+
+ @Test
+ public void rootGroupCountedAsLeafWhenNoChildren() {
+ Group g = new Group(0, "donkeykong");
+
+ List<Group> leaves = LeafGroups.enumerateFrom(g);
+ assertThat(leaves.size(), is(1));
+ assertThat(leaves.get(0).getName(), is("donkeykong"));
+ }
+
+ private Group.Distribution dummyDistribution() throws Exception {
+ return new Group.Distribution("*", 1);
+ }
+
+ @Test
+ public void singleLeafIsEnumerated() throws Exception {
+ Group g = new Group(0, "donkeykong", dummyDistribution());
+ Group child = new Group(1, "mario");
+ g.addSubGroup(child);
+
+ List<Group> leaves = LeafGroups.enumerateFrom(g);
+ assertThat(leaves.size(), is(1));
+ assertThat(leaves.get(0).getName(), is("mario"));
+ }
+
+ @Test
+ public void singleLeafIsEnumeratedInNestedCase() throws Exception {
+ Group g = new Group(0, "donkeykong", dummyDistribution());
+ Group child = new Group(1, "mario", dummyDistribution());
+ child.addSubGroup(new Group(2, "toad"));
+ g.addSubGroup(child);
+
+ List<Group> leaves = LeafGroups.enumerateFrom(g);
+ assertThat(leaves.size(), is(1));
+ assertThat(leaves.get(0).getName(), is("toad"));
+ }
+
+ @Test
+ public void multipleLeafGroupsAreEnumerated() throws Exception {
+ Group g = new Group(0, "donkeykong", dummyDistribution());
+ Group child = new Group(1, "mario", dummyDistribution());
+ child.addSubGroup(new Group(2, "toad"));
+ child.addSubGroup(new Group(3, "yoshi"));
+ g.addSubGroup(child);
+ g.addSubGroup(new Group(4, "luigi"));
+
+ List<Group> leaves = LeafGroups.enumerateFrom(g);
+ // Ensure that output order matches insertion order.
+ leaves.sort((a, b) -> Integer.compare(a.getIndex(), b.getIndex()));
+ assertThat(leaves.size(), is(3));
+ assertThat(leaves.get(0).getName(), is("toad"));
+ assertThat(leaves.get(1).getName(), is("yoshi"));
+ assertThat(leaves.get(2).getName(), is("luigi"));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
new file mode 100644
index 00000000000..ba2cd287a9a
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
@@ -0,0 +1,440 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.slobrok.server.Slobrok;
+import com.yahoo.log.LogLevel;
+
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.TimeoutException;
+import java.util.logging.Logger;
+
+import com.yahoo.vdslib.state.ClusterState;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+
+public class MasterElectionTest extends FleetControllerTest {
+
+ private static Logger log = Logger.getLogger(MasterElectionTest.class.getName());
+
+ protected Supervisor supervisor;
+ protected List<FleetController> fleetControllers = new ArrayList<>();
+
+ @Rule
+ public TestRule cleanupZookeeperLogsOnSuccess = new CleanupZookeeperLogsOnSuccess();
+
+ protected void setUpFleetController(int count, boolean useFakeTimer, FleetControllerOptions options) throws Exception {
+ if (zooKeeperServer == null) {
+ zooKeeperServer = new ZooKeeperTestServer();
+ }
+ slobrok = new Slobrok();
+ usingFakeTimer = useFakeTimer;
+ this.options = options;
+ this.options.zooKeeperSessionTimeout = 10 * timeoutMS;
+ this.options.zooKeeperServerAddress = zooKeeperServer.getAddress();
+ this.options.slobrokConnectionSpecs = new String[1];
+ this.options.slobrokConnectionSpecs[0] = "tcp/"+ InetAddress.getLocalHost().getHostName()+":" + slobrok.port();
+ this.options.fleetControllerCount = count;
+ for (int i=0; i<count; ++i) {
+ FleetControllerOptions nodeOptions = options.clone();
+ nodeOptions.fleetControllerIndex = i;
+ fleetControllers.add(createFleetController(usingFakeTimer, nodeOptions, true, null));
+ }
+ }
+
+ public FleetControllerOptions adjustConfig(FleetControllerOptions o,
+ int fleetControllerIndex, int fleetControllerCount) throws Exception
+ {
+ FleetControllerOptions options = o.clone();
+ options.zooKeeperSessionTimeout = 10 * timeoutMS;
+ options.zooKeeperServerAddress = zooKeeperServer.getAddress();
+ options.slobrokConnectionSpecs = new String[1];
+ options.slobrokConnectionSpecs[0] = "tcp/"+ InetAddress.getLocalHost().getHostName()+":" + slobrok.port();
+ options.fleetControllerIndex = fleetControllerIndex;
+ options.fleetControllerCount = fleetControllerCount;
+ return options;
+ }
+
+ protected void waitForZookeeperDisconnected() throws TimeoutException {
+ long maxTime = System.currentTimeMillis() + timeoutMS;
+ for(FleetController f : fleetControllers) {
+ while (true) {
+ if (!f.hasZookeeperConnection()) break;
+ timer.advanceTime(1000);
+ try{ Thread.sleep(1); } catch (InterruptedException e) {}
+ if (System.currentTimeMillis() > maxTime) throw new TimeoutException("Failed to notice zookeeper down within timeout of " + timeoutMS + " ms");
+ }
+ }
+ waitForCompleteCycles();
+ }
+
+ protected void waitForCompleteCycle(int findex) {
+ fleetControllers.get(findex).waitForCompleteCycle(timeoutMS);
+ }
+
+ protected void waitForCompleteCycles() {
+ for (int i = 0; i < fleetControllers.size(); ++i) {
+ waitForCompleteCycle(i);
+ }
+ }
+
+ protected void tearDownSystem() throws Exception {
+ for (FleetController fleetController : fleetControllers) {
+ if (fleetController != null) {
+ fleetController.shutdown();
+ }
+ }
+ if (slobrok != null) {
+ slobrok.stop();
+ }
+ super.tearDownSystem();
+ }
+
+ public void tearDown() throws Exception {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ }
+ super.tearDown();
+ }
+
+ /** Ignored for unknown reasons */
+ @Test
+ @Ignore
+ public void testMasterElection() throws Exception {
+ startingTest("MasterElectionTest::testMasterElection");
+ log.log(LogLevel.INFO, "STARTING TEST: MasterElectionTest::testMasterElection()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 1;
+ setUpFleetController(5, true, options);
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 0");
+ fleetControllers.get(0).shutdown();
+ waitForMaster(1);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 1");
+ fleetControllers.get(1).shutdown();
+ waitForMaster(2);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 2");
+ fleetControllers.get(2).shutdown();
+
+ // Too few for there to be a master at this point
+ for (int i=0; i<fleetControllers.size(); ++i) {
+ if (fleetControllers.get(i).isRunning()) waitForCompleteCycle(i);
+ assertEquals("Fleet controller " + i, false, fleetControllers.get(i).isMaster());
+ }
+
+ log.log(LogLevel.INFO, "STARTING FLEET CONTROLLER 2");
+ fleetControllers.set(2, createFleetController(usingFakeTimer, fleetControllers.get(2).getOptions(), true, null));
+ waitForMaster(2);
+ log.log(LogLevel.INFO, "STARTING FLEET CONTROLLER 0");
+ fleetControllers.set(0, createFleetController(usingFakeTimer, fleetControllers.get(0).getOptions(), true, null));
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "STARTING FLEET CONTROLLER 1");
+ fleetControllers.set(1, createFleetController(usingFakeTimer, fleetControllers.get(1).getOptions(), true, null));
+ waitForMaster(0);
+
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 4");
+ fleetControllers.get(4).shutdown();
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 3");
+ fleetControllers.get(3).shutdown();
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 2");
+ fleetControllers.get(2).shutdown();
+
+ // Too few for there to be a master at this point
+ for (int i=0; i<fleetControllers.size(); ++i) {
+ if (fleetControllers.get(i).isRunning()) waitForCompleteCycle(i);
+ assertEquals(false, fleetControllers.get(i).isMaster());
+ }
+ }
+
+ protected void waitForMaster(int master) {
+ log.log(LogLevel.INFO, "Entering waitForMaster");
+ boolean isOnlyMaster = false;
+ for (int i=0; i < FleetControllerTest.timeoutMS; i+=100) {
+ if (!fleetControllers.get(master).isMaster()) {
+ log.log(LogLevel.INFO, "Node " + master + " is not master yet, sleeping more");
+ timer.advanceTime(100);
+ waitForCompleteCycle(master);
+ } else {
+ log.log(LogLevel.INFO, "Node " + master + " is master. Checking that noone else is master");
+ isOnlyMaster = true;
+ for (int j=0; j<fleetControllers.size(); ++j) {
+ if (j != master && fleetControllers.get(j).isMaster()) {
+ isOnlyMaster = false;
+ log.log(LogLevel.INFO, "Node " + j + " also says it is master.");
+ }
+ }
+
+ if (isOnlyMaster) {
+ break;
+ }
+ }
+ // Have to wait to get zookeeper communication chance to happen.
+ try{ Thread.sleep(100); } catch (InterruptedException e) {}
+ }
+
+ if (!isOnlyMaster) {
+ log.log(LogLevel.INFO, "Node " + master + " is not the only master");
+ throw new IllegalStateException("Node " + master + " never got to be the only master.");
+ }
+
+ log.log(LogLevel.INFO, "Leaving waitForMaster");
+ }
+
+ private static class VersionMonotonicityChecker {
+ private ClusterState lastState;
+
+ private VersionMonotonicityChecker(ClusterState initialState) {
+ this.lastState = initialState;
+ }
+
+ public static VersionMonotonicityChecker bootstrappedWith(ClusterState initialState) {
+ return new VersionMonotonicityChecker(initialState);
+ }
+
+ public void updateAndVerify(ClusterState currentState) {
+ final ClusterState last = lastState;
+ lastState = currentState;
+ if (currentState.getVersion() <= last.getVersion()) {
+ throw new IllegalStateException(
+ String.format("Cluster state version monotonicity invariant broken! " +
+ "Old state was '%s', new state is '%s'", last, currentState));
+ }
+ }
+ }
+
+ @Test
+ public void testClusterStateVersionIncreasesAcrossMasterElections() throws Exception {
+ startingTest("MasterElectionTest::testClusterStateVersionIncreasesAcrossMasterElections");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 1;
+ setUpFleetController(5, true, options);
+ // Currently need to have content nodes present for the cluster controller to even bother
+ // attempting to persisting its cluster state version to ZK.
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ fleetController = fleetControllers.get(0); // Required to prevent waitForStableSystem from NPE'ing
+ waitForStableSystem();
+ waitForMaster(0);
+ Arrays.asList(0, 1, 2, 3, 4).stream().forEach(this::waitForCompleteCycle);
+ VersionMonotonicityChecker checker = VersionMonotonicityChecker.bootstrappedWith(fleetControllers.get(0).getClusterState());
+ fleetControllers.get(0).shutdown();
+ waitForMaster(1);
+ Arrays.asList(1, 2, 3, 4).stream().forEach(this::waitForCompleteCycle);
+ checker.updateAndVerify(fleetControllers.get(1).getClusterState());
+ fleetControllers.get(1).shutdown();
+ waitForMaster(2); // Still a quorum available
+ Arrays.asList(2, 3, 4).stream().forEach(this::waitForCompleteCycle);
+ checker.updateAndVerify(fleetControllers.get(2).getClusterState());
+ }
+
+ @Test
+ public void testVotingCorrectnessInFaceOfZKDisconnect() throws Exception {
+ startingTest("MasterElectionTest::testVotingCorrectnessInFaceOfZKDisconnect");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ // "Magic" port value is in range allocated to module for testing.
+ zooKeeperServer = ZooKeeperTestServer.createWithFixedPort(18342);
+ options.zooKeeperSessionTimeout = 100;
+ options.masterZooKeeperCooldownPeriod = 100;
+ setUpFleetController(2, true, options);
+ waitForMaster(0);
+
+ zooKeeperServer.shutdown(true);
+ waitForCompleteCycles();
+ timer.advanceTime(options.zooKeeperSessionTimeout);
+ waitForZookeeperDisconnected();
+
+ zooKeeperServer = ZooKeeperTestServer.createWithFixedPort(18342);
+ timer.advanceTime(10 * 1000); // Wait long enough for fleetcontroller wanting to retry zookeeper connection
+
+ log.log(LogLevel.INFO, "WAITING FOR 0 TO BE MASTER");
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN");
+ }
+
+ @Test
+ public void testZooKeeperUnavailable() throws Exception {
+ startingTest("MasterElectionTest::testZooKeeperUnavailable");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.zooKeeperSessionTimeout = 100;
+ options.masterZooKeeperCooldownPeriod = 100;
+ options.zooKeeperServerAddress = "localhost";
+ setUpFleetController(5, true, options);
+ waitForMaster(0);
+
+ log.log(LogLevel.INFO, "STOPPING ZOOKEEPER SERVER AT " + zooKeeperServer.getAddress());
+ zooKeeperServer.shutdown(true);
+ waitForCompleteCycles();
+ timer.advanceTime(options.zooKeeperSessionTimeout);
+ waitForZookeeperDisconnected();
+ // Noone can be master if server is unavailable
+ log.log(LogLevel.INFO, "Checking master status");
+ for (int i=0; i<fleetControllers.size(); ++i) {
+ assertEquals("Index " + i, false, fleetControllers.get(i).isMaster());
+ }
+
+ zooKeeperServer = new ZooKeeperTestServer();
+ log.log(LogLevel.INFO, "STARTED ZOOKEEPER SERVER AT " + zooKeeperServer.getAddress());
+ for (FleetController fc : fleetControllers) {
+ FleetControllerOptions myoptions = fc.getOptions();
+ myoptions.zooKeeperServerAddress = zooKeeperServer.getAddress();
+ fc.updateOptions(myoptions, 0);
+ log.log(LogLevel.INFO, "Should now have sent out new zookeeper server address " + myoptions.zooKeeperServerAddress + " to fleetcontroller " + myoptions.fleetControllerIndex);
+ }
+ timer.advanceTime(10 * 1000); // Wait long enough for fleetcontroller wanting to retry zookeeper connection
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN");
+ }
+
+ /** Ignored for unknown reasons */
+ @Test
+ @Ignore
+ public void testMasterZooKeeperCooldown() throws Exception {
+ startingTest("MasterElectionTest::testMasterZooKeeperCooldown");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 3600 * 1000; // An hour
+ setUpFleetController(3, true, options);
+ waitForMaster(0);
+ timer.advanceTime(24 * 3600 * 1000); // A day
+ waitForCompleteCycle(1);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 0");
+ fleetControllers.get(0).shutdown();
+ waitForCompleteCycle(1);
+ // 5 minutes is not long enough period to wait before letting this node be master.
+ timer.advanceTime(300 * 1000); // 5 minutes
+ waitForCompleteCycle(1);
+ assertFalse(fleetControllers.get(1).isMaster());
+ // But after an hour it should become one.
+ timer.advanceTime(4000 * 1000); // more than 60 minutes
+ waitForMaster(1);
+ }
+
+ private void waitForMasterReason(String reason, Integer master, List<Target> connections, int nodes[]) {
+ long endTime = System.currentTimeMillis() + timeoutMS;
+ while (System.currentTimeMillis() < endTime) {
+ boolean allOk = true;
+ for (int i=0; i<nodes.length; ++i) {
+ Request req = new Request("getMaster");
+ connections.get(nodes[i]).invokeSync(req, FleetControllerTest.timeoutS);
+ if (req.isError()) { allOk = false; break; }
+ if (master != null && master != req.returnValues().get(0).asInt32()) { allOk = false; break; }
+ if (reason != null && !reason.equals(req.returnValues().get(1).asString())) { allOk = false; break; }
+ }
+ if (allOk) return;
+ try{ Thread.sleep(100); } catch (InterruptedException e) {}
+ }
+ throw new IllegalStateException("Did not get master reason '" + reason
+ + "' within timeout of " + timeoutMS + " ms");
+ }
+
+ /** Ignored for unknown reasons */
+ @Test
+ @Ignore
+ public void testGetMaster() throws Exception {
+ startingTest("MasterElectionTest::testGetMaster");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 3600 * 1000; // An hour
+ setUpFleetController(3, true, options);
+ waitForMaster(0);
+
+ supervisor = new Supervisor(new Transport());
+ List<Target> connections = new ArrayList<Target>();
+ for (FleetController fleetController : fleetControllers) {
+ int rpcPort = fleetController.getRpcPort();
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+ connections.add(connection);
+ }
+
+ timer.advanceTime(24 * 3600 * 1000); // A day
+ waitForCompleteCycles();
+
+ Request req = new Request("getMaster");
+
+ for (int nodeIndex = 0; nodeIndex<3; ++nodeIndex) {
+ for (int retry = 0; retry < FleetControllerTest.timeoutS * 10; ++retry) {
+ req = new Request("getMaster");
+ connections.get(nodeIndex).invokeSync(req, FleetControllerTest.timeoutS);
+ assertEquals(req.errorMessage(), false, req.isError());
+ if (req.returnValues().get(0).asInt32() == 0 &&
+ req.returnValues().get(1).asString().equals("All 3 nodes agree that 0 is current master.")) {
+ break;
+ }
+ }
+ assertEquals(req.toString(), 0, req.returnValues().get(0).asInt32());
+ assertEquals(req.toString(), "All 3 nodes agree that 0 is current master.", req.returnValues().get(1).asString());
+ }
+
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 0");
+ fleetControllers.get(0).shutdown();
+ // Wait until fc 1 & 2 votes for node 1
+ waitForCompleteCycle(1);
+ waitForCompleteCycle(2);
+ // 5 minutes is not long enough period to wait before letting this node be master.
+ timer.advanceTime(300 * 1000); // 5 minutes
+
+ int remainingNodes[] = { 1, 2 };
+ waitForMasterReason(
+ "2 of 3 nodes agree 1 should be master, but old master cooldown period of 3600000 ms has not passed yet. To ensure it has got time to realize it is no longer master before we elect a new one, currently there is no master.",
+ -1, connections, remainingNodes);
+ // Verify that fc 1 is not master, and the correct reasons for why not
+ assertFalse(fleetControllers.get(1).isMaster());
+
+ // But after an hour it should become one.
+ timer.advanceTime(3600 * 1000); // 60 minutes
+ waitForMaster(1);
+
+ req = new Request("getMaster");
+ connections.get(0).invokeSync(req, FleetControllerTest.timeoutS);
+ assertEquals(req.toString(), 104, req.errorCode());
+ assertEquals(req.toString(), "Connection error", req.errorMessage());
+
+ for (int i=0; i<FleetControllerTest.timeoutS * 10; ++i) {
+ req = new Request("getMaster");
+ connections.get(1).invokeSync(req, FleetControllerTest.timeoutS);
+ assertEquals(req.errorMessage(), false, req.isError());
+ if (req.returnValues().get(0).asInt32() != -1) break;
+ // We may have bad timing causing node not to have realized it is master yet
+ }
+ assertEquals(req.toString(), 1, req.returnValues().get(0).asInt32());
+ assertEquals(req.toString(), "2 of 3 nodes agree 1 is master.", req.returnValues().get(1).asString());
+
+ for (int i=0; i<FleetControllerTest.timeoutS * 10; ++i) {
+ req = new Request("getMaster");
+ connections.get(2).invokeSync(req, FleetControllerTest.timeoutS);
+ assertEquals(req.errorMessage(), false, req.isError());
+ if (req.returnValues().get(0).asInt32() != -1) break;
+ }
+ assertEquals(req.toString(), 1, req.returnValues().get(0).asInt32());
+ assertEquals(req.toString(), "2 of 3 nodes agree 1 is master.", req.returnValues().get(1).asString());
+ }
+
+ @Test
+ public void testReconfigure() throws Exception {
+ startingTest("MasterElectionTest::testReconfigure");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 1;
+ setUpFleetController(3, true, options);
+ waitForMaster(0);
+
+ FleetControllerOptions newOptions = options.clone();
+ for (int i=0; i<fleetControllers.size(); ++i) {
+ FleetControllerOptions nodeOptions = adjustConfig(newOptions, i, fleetControllers.size());
+ fleetControllers.get(i).updateOptions(nodeOptions, 2);
+ }
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 0");
+ fleetControllers.get(0).shutdown();
+ waitForMaster(1);
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java
new file mode 100644
index 00000000000..2191819858c
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+public class NoZooKeeperTest extends FleetControllerTest {
+
+ @Test
+ public void testWantedStatesInZooKeeper() throws Exception {
+ startingTest("NoZooKeeperTest::testWantedStatesInZooKeeper");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.zooKeeperServerAddress = null;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ assertEquals(true, nodes.get(0).isDistributor());
+ nodes.get(0).disconnect();
+ waitForState("version:\\d+ distributor:10 .0.s:d storage:10");
+
+ nodes.get(0).connect();
+ waitForState("version:\\d+ distributor:10 storage:10");
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
new file mode 100644
index 00000000000..10305de116a
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
@@ -0,0 +1,117 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vespa.clustercontroller.core.testutils.Waiter;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertTrue;
+
+public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest {
+
+ private final Set<Integer> nodeIndices = asIntSet(0, 1, 2, 3);
+ private final int foreignNode = 6;
+
+ private void waitForStateExcludingNodeSubset(String expectedState, Set<Integer> excludedNodes) throws Exception {
+ // Due to the implementation details of the test base, this.waitForState() will always
+ // wait until all nodes added in the test have received the latest cluster state. Since we
+ // want to entirely ignore node #6, it won't get a cluster state at all and the test will
+ // fail unless otherwise handled. We thus use a custom waiter which filters out nodes with
+ // the sneaky index (storage and distributors with same index are treated as different nodes
+ // in this context).
+ Waiter subsetWaiter = new Waiter.Impl(new DataRetriever() {
+ @Override
+ public Object getMonitor() { return timer; }
+ @Override
+ public FleetController getFleetController() { return fleetController; }
+ @Override
+ public List<DummyVdsNode> getDummyNodes() {
+ return nodes.stream()
+ .filter(n -> !excludedNodes.contains(n.getNode().getIndex()))
+ .collect(Collectors.toList());
+ }
+ @Override
+ public int getTimeoutMS() { return timeoutMS; }
+ });
+ subsetWaiter.waitForState(expectedState);
+ }
+
+ private static Set<Integer> asIntSet(Integer... idx) {
+ return Arrays.asList(idx).stream().collect(Collectors.toSet());
+ }
+
+ private static Set<ConfiguredNode> asConfiguredNodes(Set<Integer> indices) {
+ return indices.stream().map(idx -> new ConfiguredNode(idx, false)).collect(Collectors.toSet());
+ }
+
+ private void setUpClusterWithForeignNode(Set<Integer> validIndices, final int foreignNodeIndex) throws Exception {
+ final Set<ConfiguredNode> configuredNodes = asConfiguredNodes(validIndices);
+ FleetControllerOptions options = optionsForConfiguredNodes(configuredNodes);
+ setUpFleetController(true, options);
+ Set<Integer> nodesWithStranger = new TreeSet<>(validIndices);
+ nodesWithStranger.add(foreignNodeIndex);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodesWithStranger);
+ }
+
+ private FleetControllerOptions optionsForConfiguredNodes(Set<ConfiguredNode> configuredNodes) {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+ options.nodeStateRequestTimeoutMS = 10000 * 60 * 1000;
+ return options;
+ }
+
+ @Test
+ public void testSlobrokNodeOutsideConfiguredIndexSetIsNotIncludedInCluster() throws Exception {
+ setUpClusterWithForeignNode(nodeIndices, foreignNode);
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:4 storage:4", asIntSet(foreignNode));
+ }
+
+ @Test
+ public void testNodeSetReconfigurationForcesFreshSlobrokFetch() throws Exception {
+ setUpClusterWithForeignNode(nodeIndices, foreignNode);
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:4 storage:4", asIntSet(foreignNode));
+
+ // If we get a configuration with the node present, we have to accept it into
+ // cluster. If we do not re-fetch state from slobrok we risk racing
+ nodeIndices.add(foreignNode);
+ options.nodes = asConfiguredNodes(nodeIndices);
+ fleetController.updateOptions(options, 0);
+ // Need to treat cluster as having 6 nodes due to ideal state algo semantics.
+ // Note that we do not use subsetWaiter here since we want node 6 included.
+ waitForState("version:\\d+ distributor:7 .4.s:d .5.s:d storage:7 .4.s:d .5.s:d");
+ }
+
+ @Test
+ public void test_removed_retired_node_is_not_included_in_state() throws Exception {
+ final Set<ConfiguredNode> configuredNodes = asConfiguredNodes(nodeIndices);
+ FleetControllerOptions options = optionsForConfiguredNodes(configuredNodes);
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeIndices);
+
+ waitForState("version:\\d+ distributor:4 storage:4");
+
+ // Update options with 1 node config-retired
+ assertTrue(configuredNodes.remove(new ConfiguredNode(0, false)));
+ configuredNodes.add(new ConfiguredNode(0, true));
+ options.nodes = configuredNodes;
+ fleetController.updateOptions(options, 0);
+
+ waitForState("version:\\d+ distributor:4 storage:4 .0.s:r");
+
+ // Now remove the retired node entirely from config
+ assertTrue(configuredNodes.remove(new ConfiguredNode(0, true)));
+ fleetController.updateOptions(options, 0);
+
+ // The previously retired node should now be marked as done, as it no longer
+ // exists from the point of view of the content cluster. We have to use a subset
+ // state waiter, as the controller will not send the new state to node 0.
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:4 .0.s:d storage:4 .0.s:d", asIntSet(0));
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
new file mode 100644
index 00000000000..cb5cee70486
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -0,0 +1,349 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.distribution.Group;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static org.hamcrest.core.StringContains.containsString;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class NodeStateChangeCheckerTest {
+
+ private static final int minStorageNodesUp = 3;
+ private static final int requiredRedundancy = 4;
+ private static final int currentClusterState = 2;
+ private static final double minRatioOfStorageNodesUp = 0.9;
+
+ private static final Node nodeDistributor = new Node(NodeType.DISTRIBUTOR, 1);
+ private static final Node nodeStorage = new Node(NodeType.STORAGE, 1);
+
+ private static final NodeState upNodeState = new NodeState(NodeType.STORAGE, State.UP);
+ public static final NodeState maintenanceNodeState = createNodeState(State.MAINTENANCE, "Orchestrator");
+
+ private static NodeState createNodeState(State state, String description) {
+ return new NodeState(NodeType.STORAGE, state).setDescription(description);
+ }
+
+ private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) {
+ return new NodeStateChangeChecker(minStorageNodesUp, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
+ }
+
+ private ContentCluster createCluster(Collection<ConfiguredNode> nodes) {
+ Distribution distribution = mock(Distribution.class);
+ Group group = new Group(2, "to");
+ when(distribution.getRootGroup()).thenReturn(group);
+ return new ContentCluster("Clustername", nodes, distribution, minStorageNodesUp, 0.0);
+ }
+
+ private StorageNodeInfo createStorageNodeInfo(int index, State state) {
+ Distribution distribution = mock(Distribution.class);
+ Group group = new Group(2, "to");
+ when(distribution.getRootGroup()).thenReturn(group);
+
+ String clusterName = "Clustername";
+ Set<ConfiguredNode> configuredNodeIndexes = new HashSet<>();
+ ContentCluster cluster = new ContentCluster(clusterName, configuredNodeIndexes, distribution, minStorageNodesUp, 0.0);
+
+ String rpcAddress = "";
+ StorageNodeInfo storageNodeInfo = new StorageNodeInfo(cluster, index, false, rpcAddress, distribution);
+ storageNodeInfo.setReportedState(new NodeState(NodeType.STORAGE, state), 3 /* time */);
+ return storageNodeInfo;
+ }
+
+ private String createDistributorHostInfo(int replicationfactor1, int replicationfactor2, int replicationfactor3) {
+ return "{\n" +
+ " \"cluster-state-version\": 2,\n" +
+ " \"distributor\": {\n" +
+ " \"storage-nodes\": [\n" +
+ " {\n" +
+ " \"node-index\": 0,\n" +
+ " \"min-current-replication-factor\": " + replicationfactor1 + "\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 1,\n" +
+ " \"min-current-replication-factor\": " + replicationfactor2 + "\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 2,\n" +
+ " \"min-current-replication-factor\": " + replicationfactor3 + "\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 3\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ "}\n";
+ }
+
+ @Test
+ public void testCanUpgradeForce() {
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(createNodes(1)));
+ NodeState newState = new NodeState(NodeType.STORAGE, State.INITIALIZING);
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeDistributor, currentClusterState, SetUnitStateRequest.Condition.FORCE,
+ upNodeState, newState);
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertTrue(!result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testSafeSetStateDistributors() {
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(createNodes(1)));
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeDistributor, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), containsString("Safe-set of node state is only supported for storage nodes"));
+ }
+
+ @Test
+ public void testCanUpgradeSafeMissingStorage() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6)));
+ NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker(
+ 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), is("There are only 4 storage nodes up, while config requires at least 5"));
+ }
+
+ @Test
+ public void testCanUpgradeStorageSafeYes() {
+ NodeStateChangeChecker.Result result = transitionToMaintenanceWithNoStorageNodesDown();
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testSetUpFailsIfReportedIsDown() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ // Not setting nodes up -> all are down
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ maintenanceNodeState, upNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testCannotSetUpIfUnknownOldStateAndReportedIsDown() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ // Not setting nodes up -> all are down
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ new NodeState(NodeType.STORAGE, State.DOWN), upNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), is("Refusing to set wanted state to up when it is currently in Down"));
+ }
+
+ @Test
+ public void testCanUpgradeStorageSafeNo() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6)));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), is("Distributor 0 says storage node 1 " +
+ "has buckets with redundancy as low as 3, but we require at least 4"));
+ }
+
+ @Test
+ public void testCanUpgradeIfMissingMinReplicationFactor() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6)));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ new Node(NodeType.STORAGE, 3), currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testCanUpgradeIfStorageNodeMissingFromNodeInfo() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ String hostInfo = "{\n" +
+ " \"cluster-state-version\": 2,\n" +
+ " \"distributor\": {\n" +
+ " \"storage-nodes\": [\n" +
+ " {\n" +
+ " \"node-index\": 0,\n" +
+ " \"min-current-replication-factor\": " + requiredRedundancy + "\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ "}\n";
+ setAllNodesUp(cluster, HostInfo.createHostInfo(hostInfo));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ new Node(NodeType.STORAGE, 1), currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testMissingDistributorState() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ cluster.clusterInfo().getStorageNodeInfo(1).setReportedState(new NodeState(NodeType.STORAGE, State.UP), 0);
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), is("Distributor node (0) has not reported any cluster state version yet."));
+ }
+
+ private NodeStateChangeChecker.Result transitionToSameState(State state, String oldDescription, String newDescription) {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+
+ NodeState currentNodeState = createNodeState(state, oldDescription);
+ NodeState newNodeState = createNodeState(state, newDescription);
+ return nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ currentNodeState, newNodeState);
+ }
+
+ private NodeStateChangeChecker.Result transitionToSameState(String oldDescription, String newDescription) {
+ return transitionToSameState(State.MAINTENANCE, oldDescription, newDescription);
+ }
+
+ @Test
+ public void testSettingUpWhenUpCausesAlreadySet() {
+ NodeStateChangeChecker.Result result = transitionToSameState(State.UP, "foo", "bar");
+ assertTrue(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testSettingAlreadySetState() {
+ NodeStateChangeChecker.Result result = transitionToSameState("foo", "foo");
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertTrue(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testDifferentDescriptionImpliesAlreadySet() {
+ NodeStateChangeChecker.Result result = transitionToSameState("foo", "bar");
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertTrue(result.wantedStateAlreadySet());
+ }
+
+ private NodeStateChangeChecker.Result transitionToMaintenanceWithOneStorageNodeDown(
+ int storageNodeIndex, boolean alternatingUpRetiredAndInitializing) {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+
+ for (int x = 0; x < cluster.clusterInfo().getConfiguredNodes().size(); x++) {
+ State state = State.UP;
+ // Pick some retired and initializing nodes too
+ if (alternatingUpRetiredAndInitializing) { // TODO: Move this into the calling test
+ if (x % 3 == 1) state = State.RETIRED;
+ else if (x % 3 == 2) state = State.INITIALIZING;
+ }
+ cluster.clusterInfo().getDistributorNodeInfo(x).setReportedState(new NodeState(NodeType.DISTRIBUTOR, state), 0);
+ cluster.clusterInfo().getDistributorNodeInfo(x).setHostInfo(HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6)));
+ cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(NodeType.STORAGE, state), 0);
+ }
+
+ if (storageNodeIndex >= 0) { // TODO: Move this into the calling test
+ NodeState downNodeState = new NodeState(NodeType.STORAGE, State.DOWN);
+ cluster.clusterInfo().getStorageNodeInfo(storageNodeIndex).setReportedState(downNodeState, 4 /* time */);
+ }
+
+ return nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
+ }
+
+ private void setAllNodesUp(ContentCluster cluster, HostInfo distributorHostInfo) {
+ for (int x = 0; x < cluster.clusterInfo().getConfiguredNodes().size(); x++) {
+ State state = State.UP;
+ cluster.clusterInfo().getDistributorNodeInfo(x).setReportedState(new NodeState(NodeType.DISTRIBUTOR, state), 0);
+ cluster.clusterInfo().getDistributorNodeInfo(x).setHostInfo(distributorHostInfo);
+ cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(NodeType.STORAGE, state), 0);
+ }
+ }
+
+ private NodeStateChangeChecker.Result transitionToMaintenanceWithOneStorageNodeDown(int storageNodeIndex) {
+ return transitionToMaintenanceWithOneStorageNodeDown(storageNodeIndex, false);
+ }
+
+ private NodeStateChangeChecker.Result transitionToMaintenanceWithNoStorageNodesDown() {
+ return transitionToMaintenanceWithOneStorageNodeDown(-1, false);
+ }
+
+ @Test
+ public void testCanUpgradeWhenAllUp() {
+ NodeStateChangeChecker.Result result = transitionToMaintenanceWithNoStorageNodesDown();
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testCanUpgradeWhenAllUpOrRetired() {
+ NodeStateChangeChecker.Result result = transitionToMaintenanceWithNoStorageNodesDown();
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testCanUpgradeWhenStorageIsDown() {
+ NodeStateChangeChecker.Result result = transitionToMaintenanceWithOneStorageNodeDown(nodeStorage.getIndex());
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testCannotUpgradeWhenOtherStorageIsDown() {
+ int otherIndex = 2;
+ // If this fails, just set otherIndex to some other valid index.
+ assertNotEquals(nodeStorage.getIndex(), otherIndex);
+
+ NodeStateChangeChecker.Result result = transitionToMaintenanceWithOneStorageNodeDown(otherIndex);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), containsString("Not enough storage nodes running"));
+ }
+
+ private List<ConfiguredNode> createNodes(int count) {
+ List<ConfiguredNode> nodes = new ArrayList<>();
+ for (int i = 0; i < count; i++)
+ nodes.add(new ConfiguredNode(i, false));
+ return nodes;
+ }
+
+} \ No newline at end of file
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java
new file mode 100644
index 00000000000..2816b75622e
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java
@@ -0,0 +1,627 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer;
+import com.yahoo.vespa.clustercontroller.core.testutils.LogFormatter;
+import com.yahoo.vespa.clustercontroller.core.testutils.WaitCondition;
+import com.yahoo.vespa.config.content.StorDistributionConfig;
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.StringValue;
+import com.yahoo.jrt.slobrok.server.Slobrok;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.*;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+
+import java.net.InetAddress;
+import java.util.*;
+import java.util.logging.Logger;
+
+/**
+ * @author humbe
+ */
+public class RpcServerTest extends FleetControllerTest {
+
+ public static Logger log = Logger.getLogger(RpcServerTest.class.getName());
+
+ protected Supervisor supervisor;
+
+ public void tearDown() throws Exception {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ }
+ super.tearDown();
+ }
+
+ @Test
+ public void testRebinding() throws Exception {
+ startingTest("RpcServerTest::testRebinding");
+ Slobrok slobrok = new Slobrok();
+ String slobrokConnectionSpecs[] = new String[1];
+ slobrokConnectionSpecs[0] = "tcp/"+ InetAddress.getLocalHost().getHostName()+":" + slobrok.port();
+ RpcServer server = new RpcServer(timer, new Object(), "mycluster", 0, new BackOff());
+ server.setSlobrokConnectionSpecs(slobrokConnectionSpecs, 0);
+ int portUsed = server.getPort();
+ server.setSlobrokConnectionSpecs(slobrokConnectionSpecs, portUsed);
+ server.disconnect();
+ server.disconnect();
+ server.connect();
+ server.connect();
+ server.disconnect();
+ server.connect();
+ server.shutdown();
+ slobrok.stop();
+ }
+
+ /**
+ * For some reason, the first test trying to set up a stable system here occasionally times out.
+ * The theory is that some test run before it does something that is not cleaned up in time.
+ * Trying to add a test that should provoke the failure, but not fail due to it to see if we can verify that
+ * assumption.
+ *
+ * (testRebinding() does not seem to be that test. Tests in StateChangeTest that runs before this test tests very
+ * similar things, so strange if it should be from them too though. Maybe last test there.
+ */
+ @Test
+ public void testFailOccasionallyAndIgnoreToSeeIfOtherTestsThenWork() {
+ try{
+ startingTest("RpcServerTest::testFailOccasionallyAndIgnoreToSeeIfOtherTestsThenWork");
+ setUpFleetController(true, new FleetControllerOptions("mycluster"));
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+ } catch (Throwable t) {}
+ }
+
+ @Test
+ public void testGetSystemState() throws Exception {
+ LogFormatter.initializeLogging();
+ startingTest("RpcServerTest::testGetSystemState");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ assertEquals(true, nodes.get(0).isDistributor());
+ log.log(LogLevel.INFO, "Disconnecting distributor 0. Waiting for state to reflect change.");
+ nodes.get(0).disconnect();
+ nodes.get(19).disconnect();
+ fleetController.waitForNodesInSlobrok(9, 9, timeoutMS);
+ timer.advanceTime(options.nodeStateRequestTimeoutMS + options.maxSlobrokDisconnectGracePeriod);
+
+ wait(new WaitCondition.StateWait(fleetController, fleetController.getMonitor()) {
+ @Override
+ public String isConditionMet() {
+ if (currentState == null) {
+ return "No cluster state defined yet";
+ }
+ NodeState distState = currentState.getNodeState(new Node(NodeType.DISTRIBUTOR, 0));
+ if (distState.getState() != State.DOWN) {
+ return "Distributor not detected down yet: " + currentState.toString();
+ }
+ NodeState storState = currentState.getNodeState(new Node(NodeType.STORAGE, 9));
+ if (!storState.getState().oneOf("md")) {
+ return "Storage node not detected down yet: " + currentState.toString();
+ }
+ return null;
+ }
+ }, null, timeoutMS);
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("getSystemState");
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ss"));
+ String systemState = req.returnValues().get(1).asString();
+ ClusterState retrievedClusterState = new ClusterState(systemState);
+ assertEquals(systemState, State.DOWN, retrievedClusterState.getNodeState(new Node(NodeType.DISTRIBUTOR, 0)).getState());
+ assertTrue(systemState, retrievedClusterState.getNodeState(new Node(NodeType.STORAGE, 9)).getState().oneOf("md"));
+ }
+
+ private void setWantedNodeState(State newState, NodeType nodeType, int nodeIndex) {
+ int rpcPort = fleetController.getRpcPort();
+ if (supervisor == null) {
+ supervisor = new Supervisor(new Transport());
+ }
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Node node = new Node(nodeType, nodeIndex);
+ NodeState newNodeState = new NodeState(nodeType, newState);
+
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/" + node.getType().toString() + "/" + node.getIndex()));
+ req.parameters().add(new StringValue(newNodeState.serialize(true)));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("s"));
+ }
+
+ @Test
+ public void testGetNodeState() throws Exception {
+ startingTest("RpcServerTest::testGetNodeState");
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 10; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.minRatioOfStorageNodesUp = 0;
+ options.maxInitProgressTime = 30000;
+ options.stableStateTimePeriod = 60000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ setWantedNodeState(State.DOWN, NodeType.DISTRIBUTOR, 2);
+ setWantedNodeState(State.RETIRED, NodeType.STORAGE, 2);
+ setWantedNodeState(State.MAINTENANCE, NodeType.STORAGE, 7);
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(0).disconnect();
+ nodes.get(3).disconnect();
+ nodes.get(5).disconnect();
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:m .2.s:m .7.s:m");
+ timer.advanceTime(1000000);
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:d .2.s:d .7.s:m");
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(3).setNodeState(new NodeState(nodes.get(3).getType(), State.INITIALIZING).setInitProgress(0.2));
+ nodes.get(3).connect();
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:i .1.i:0.2 .2.s:d .7.s:m");
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("getNodeState");
+ req.parameters().add(new StringValue("distributor"));
+ req.parameters().add(new Int32Value(0));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(0).asString()).getState());
+ NodeState reported = NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(1).asString());
+ assertTrue(req.returnValues().get(1).asString(), reported.getState().oneOf("d-"));
+ assertEquals("", req.returnValues().get(2).asString());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("distributor"));
+ req.parameters().add(new Int32Value(2));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(0).asString()).getState());
+ assertEquals("t:946080000", req.returnValues().get(1).asString());
+ assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(2).asString()).getState());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("distributor"));
+ req.parameters().add(new Int32Value(4));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals("", req.returnValues().get(0).asString());
+ assertEquals("t:946080000", req.returnValues().get(1).asString());
+ assertEquals("", req.returnValues().get(2).asString());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("distributor"));
+ req.parameters().add(new Int32Value(15));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.METHOD_FAILED, req.errorCode());
+ assertEquals("No node distributor.15 exists in cluster mycluster", req.errorMessage());
+ assertFalse(req.toString(), req.checkReturnTypes("ssss"));
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("storage"));
+ req.parameters().add(new Int32Value(1));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals("s:i i:0.2", req.returnValues().get(0).asString());
+ assertEquals("s:i i:0.2", req.returnValues().get(1).asString());
+ assertEquals("", req.returnValues().get(2).asString());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("storage"));
+ req.parameters().add(new Int32Value(2));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals(State.DOWN, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(0).asString()).getState());
+ reported = NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(1).asString());
+ assertTrue(req.returnValues().get(1).asString(), reported.getState().oneOf("d-"));
+ assertEquals(State.RETIRED, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(2).asString()).getState());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("storage"));
+ req.parameters().add(new Int32Value(5));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals("", req.returnValues().get(0).asString());
+ assertEquals("t:946080000", req.returnValues().get(1).asString());
+ assertEquals("", req.returnValues().get(2).asString());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("storage"));
+ req.parameters().add(new Int32Value(7));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals(State.MAINTENANCE, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(0).asString()).getState());
+ assertEquals("t:946080000", req.returnValues().get(1).asString());
+ assertEquals(State.MAINTENANCE, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(2).asString()).getState());
+ }
+
+ @Test
+ public void testGetNodeStateWithConfiguredRetired() throws Exception {
+ startingTest("RpcServerTest::testGetNodeStateWithConfiguredRetired");
+ List<ConfiguredNode> configuredNodes = new ArrayList<>();
+ for (int i = 0; i < 9; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ configuredNodes.add(new ConfiguredNode(9, true)); // Last node is configured retired
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.minRatioOfStorageNodesUp = 0;
+ options.maxInitProgressTime = 30000;
+ options.stableStateTimePeriod = 60000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, configuredNodes);
+ waitForState("version:\\d+ distributor:10 storage:10 .9.s:r");
+
+ setWantedNodeState(State.DOWN, NodeType.DISTRIBUTOR, 2);
+ setWantedNodeState(State.RETIRED, NodeType.STORAGE, 2);
+ setWantedNodeState(State.MAINTENANCE, NodeType.STORAGE, 7);
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(0).disconnect();
+ nodes.get(3).disconnect();
+ nodes.get(5).disconnect();
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:m .2.s:m .7.s:m .9.s:r");
+ timer.advanceTime(1000000);
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:d .2.s:d .7.s:m .9.s:r");
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(3).setNodeState(new NodeState(nodes.get(3).getType(), State.INITIALIZING).setInitProgress(0.2));
+ nodes.get(3).connect();
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:i .1.i:0.2 .2.s:d .7.s:m .9.s:r");
+ }
+
+ @Test
+ public void testGetNodeStateWithConfigurationChangeToRetiredWhileNodeDown() throws Exception {
+ startingTest("RpcServerTest::testGetNodeStateWithConfigurationChangeToRetiredWhileNodeDown");
+
+ { // Configuration: 5 nodes, all normal
+ List<ConfiguredNode> configuredNodes = new ArrayList<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.maxInitProgressTime = 30000;
+ options.stableStateTimePeriod = 60000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, configuredNodes);
+ waitForState("version:\\d+ distributor:5 storage:5");
+ }
+
+ { // 2 first storage nodes go down (0 and 2 are the corresponding distributors)
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(1).disconnectImmediately();
+ nodes.get(3).disconnectImmediately();
+ waitForState("version:\\d+ distributor:5 storage:5 .0.s:m .1.s:m");
+ }
+
+ { // Configuration change: Add 2 new nodes and retire the 5 existing ones
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, 2);
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, true));
+ configuredNodes.add(new ConfiguredNode(5, false));
+ configuredNodes.add(new ConfiguredNode(6, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:m .1.s:m .2.s:r .3.s:r .4.s:r");
+ }
+
+ { // 2 storage nodes down come up, should go to state retired
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(1).connect();
+ nodes.get(3).connect();
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:r .1.s:r .2.s:r .3.s:r .4.s:r");
+ }
+
+ { // 2 first storage nodes go down again
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(1).disconnectImmediately();
+ nodes.get(3).disconnectImmediately();
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:m .1.s:m .2.s:r .3.s:r .4.s:r");
+ }
+
+ { // Configuration change: Unretire the nodes
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 7; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:m .1.s:m");
+ }
+
+ { // 2 storage nodes down come up, should go to state up
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(1).connect();
+ nodes.get(3).connect();
+ waitForState("version:\\d+ distributor:7 storage:7");
+ }
+
+ }
+
+ @Test
+ public void testGetNodeStateWithConfigurationChangeToRetired() throws Exception {
+ startingTest("RpcServerTest::testGetNodeStateWithConfigurationChangeToRetired");
+
+ { // Configuration: 5 nodes, all normal
+ List<ConfiguredNode> configuredNodes = new ArrayList<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.maxInitProgressTime = 30000;
+ options.stableStateTimePeriod = 60000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, configuredNodes);
+ waitForState("version:\\d+ distributor:5 storage:5");
+ }
+
+ { // Reconfigure with the same state
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ distributor:5 storage:5");
+ }
+
+ { // Configuration change: Add 2 new nodes and retire the 5 existing ones
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, 2);
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, true));
+ configuredNodes.add(new ConfiguredNode(5, false));
+ configuredNodes.add(new ConfiguredNode(6, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:r .1.s:r .2.s:r .3.s:r .4.s:r");
+ }
+
+ { // Reconfigure with the same state
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, true));
+ configuredNodes.add(new ConfiguredNode(5, false));
+ configuredNodes.add(new ConfiguredNode(6, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:r .1.s:r .2.s:r .3.s:r .4.s:r");
+ }
+
+ { // Configuration change: Remove the previously retired nodes
+ /*
+ TODO: Verify current result: version:23 distributor:7 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d storage:7 .0.s:m .1.s:m .2.s:m .3.s:m .4.s:m
+ TODO: Make this work without stopping/disconnecting (see SystemStateGenerator.setNodes
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ configuredNodes.add(new ConfiguredNode(5, false));
+ configuredNodes.add(new ConfiguredNode(6, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ for (int i = 0; i < 5*2; i++) {
+ nodes.get(i).disconnectSlobrok();
+ nodes.get(i).disconnect();
+ }
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d");
+ */
+ }
+ }
+
+ public StorDistributionConfig getDistConfig(Set<Integer> nodes) {
+ List<StorDistributionConfig.Group.Nodes.Builder> nodeList = new LinkedList<>();
+ for (int i : nodes) {
+ StorDistributionConfig.Group.Nodes.Builder nodeConfig = new StorDistributionConfig.Group.Nodes.Builder();
+ nodeConfig.index(i);
+ nodeList.add(nodeConfig);
+ }
+ StorDistributionConfig.Group.Builder groupConfig = new StorDistributionConfig.Group.Builder();
+ groupConfig.nodes(nodeList);
+ groupConfig.index("0");
+ groupConfig.name("foo");
+ StorDistributionConfig.Builder distConfig = new StorDistributionConfig.Builder();
+ distConfig.group(groupConfig);
+ return new StorDistributionConfig(distConfig);
+ }
+
+ @Test
+ public void testSetNodeState() throws Exception {
+ startingTest("RpcServerTest::testSetNodeState");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ Set<Integer> nodeIndexes = new TreeSet<>(Arrays.asList(new Integer[]{4, 6, 9, 10, 14, 16, 21, 22, 23, 25}));
+ options.setStorageDistribution(new Distribution(getDistConfig(nodeIndexes)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeIndexes);
+ waitForState("version:\\d+ distributor:26 .0.s:d .1.s:d .2.s:d .3.s:d .5.s:d .7.s:d .8.s:d .11.s:d .12.s:d .13.s:d .15.s:d .17.s:d .18.s:d .19.s:d .20.s:d .24.s:d storage:26 .0.s:d .1.s:d .2.s:d .3.s:d .5.s:d .7.s:d .8.s:d .11.s:d .12.s:d .13.s:d .15.s:d .17.s:d .18.s:d .19.s:d .20.s:d .24.s:d");
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/storage/14"));
+ req.parameters().add(new StringValue("s:r"));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("s"));
+
+ waitForState("version:\\d+ distributor:26 .* storage:26 .* .14.s:r .*");
+
+ req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/storage/16"));
+ req.parameters().add(new StringValue("s:m"));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("s"));
+
+ ClusterState state = waitForState("version:\\d+ distributor:26 .* storage:26 .* .14.s:r.* .16.s:m .*");
+ nodes.get(5 * 2 + 1).disconnect();
+ waitForCompleteCycle();
+ timer.advanceTime(100000000);
+ waitForCompleteCycle();
+ assertEquals(State.MAINTENANCE, fleetController.getSystemState().getNodeState(new Node(NodeType.STORAGE, 16)).getState());
+
+ nodes.get(4 * 2 + 1).disconnect();
+ waitForState("version:\\d+ distributor:26 .* storage:26 .* .14.s:m.* .16.s:m .*");
+ nodes.get(4 * 2 + 1).connect();
+ timer.advanceTime(100000000);
+ // Might need to pass more actual time while waiting below?
+ waitForState("version:\\d+ distributor:26 .* storage:26 .* .14.s:r.* .16.s:m .*");
+ }
+
+ @Test
+ public void testSetNodeStateOutOfRange() throws Exception {
+ startingTest("RpcServerTest::testSetNodeStateOutOfRange");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(2, 10)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/storage/10"));
+ req.parameters().add(new StringValue("s:m"));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.METHOD_FAILED, req.errorCode());
+ assertEquals(req.toString(), "Cannot set wanted state of node storage.10. Index does not correspond to a configured node.", req.errorMessage());
+
+ req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/distributor/10"));
+ req.parameters().add(new StringValue("s:m"));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.METHOD_FAILED, req.errorCode());
+ assertEquals(req.toString(), "Cannot set wanted state of node distributor.10. Index does not correspond to a configured node.", req.errorMessage());
+
+ req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/storage/9"));
+ req.parameters().add(new StringValue("s:m"));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+
+ waitForState("version:\\d+ distributor:10 storage:10 .9.s:m");
+ }
+
+ @Test
+ public void testGetMaster() throws Exception {
+ startingTest("RpcServerTest::testGetMaster");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(2, 10)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("getMaster");
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), 0, req.returnValues().get(0).asInt32());
+ assertEquals(req.toString(), "All 1 nodes agree that 0 is current master.", req.returnValues().get(1).asString());
+
+ // Note that this feature is tested better in MasterElectionTest.testGetMaster as it has multiple fleetcontrollers
+ }
+
+ @Test
+ public void testGetNodeList() throws Exception {
+ startingTest("RpcServerTest::testGetNodeList");
+ setUpFleetController(true, new FleetControllerOptions("mycluster"));
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ assertEquals(true, nodes.get(0).isDistributor());
+ nodes.get(0).disconnect();
+ waitForState("version:\\d+ distributor:10 .0.s:d storage:10");
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ // Possibly do request multiple times if we haven't lost slobrok contact first times yet.
+ for (int j=0; j<=10; ++j) {
+ Request req = new Request("getNodeList");
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.errorMessage(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("SS"));
+ String slobrok[] = req.returnValues().get(0).asStringArray().clone();
+ String rpc[] = req.returnValues().get(1).asStringArray().clone();
+
+ assertEquals(20, slobrok.length);
+ assertEquals(20, rpc.length);
+
+ // Verify that we can connect to all addresses returned.
+ for (int i=0; i<20; ++i) {
+ if (slobrok[i].equals("storage/cluster.mycluster/distributor/0")) {
+ if (j < 10 && !"".equals(rpc[i])) {
+ continue;
+ }
+ assertEquals(slobrok[i], "", rpc[i]);
+ continue;
+ }
+ assertTrue(slobrok[i], !rpc[i].equals(""));
+ Request req2 = new Request("getnodestate2");
+ req2.parameters().add(new StringValue("unknown"));
+ Target connection2 = supervisor.connect(new Spec(rpc[i]));
+ connection2.invokeSync(req2, timeoutS);
+ assertEquals(req2.toString(), ErrorCode.NONE, req.errorCode());
+ }
+ break;
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java
new file mode 100644
index 00000000000..f043a325fdd
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.slobrok.server.Slobrok;
+import com.yahoo.log.LogLevel;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.logging.Logger;
+
+public class SlobrokTest extends FleetControllerTest {
+
+ private static Logger log = Logger.getLogger(SlobrokTest.class.getName());
+
+ private boolean clusterAvailable() {
+ boolean ok = true;
+ ContentCluster cluster = fleetController.getCluster();
+ for (NodeInfo info : cluster.getNodeInfo()) {
+ if (info.getConnectionAttemptCount() > 0) ok = false;
+ if (info.getLatestNodeStateRequestTime() == null) ok = false;
+ }
+ return ok;
+ }
+ private void assertClusterAvailable() {
+ ContentCluster cluster = fleetController.getCluster();
+ for (NodeInfo info : cluster.getNodeInfo()) {
+ assertEquals("Node " + info + " connection attempts.", 0, info.getConnectionAttemptCount());
+ assertTrue("Node " + info + " has no last request time.", info.getLatestNodeStateRequestTime() != 0);
+ }
+ }
+
+ @Test
+ public void testSingleSlobrokRestart() throws Exception {
+ startingTest("SlobrokTest::testSingleSlobrokRestart");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.nodeStateRequestTimeoutMS = 60 * 60 * 1000;
+ options.maxSlobrokDisconnectGracePeriod = 60 * 60 * 1000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ int version = fleetController.getSystemState().getVersion();
+ int slobrokPort = slobrok.port();
+
+ // Test that we survive some slobrok instability without changing system state.
+ for (int j=0; j<4; ++j) {
+ log.log(LogLevel.INFO, "Mirror updateForDistributor count is " + fleetController.getSlobrokMirrorUpdates());
+ log.log(LogLevel.INFO, "STOPPING SLOBROK SERVER (" + (j+1) + "/4)");
+ slobrok.stop();
+ for (int i=0; i<10; ++i) {
+ // Force one node to at least notice that the slobrok server is gone
+ if (i == 5) {
+ log.log(LogLevel.INFO, "Forcing one node to initate a resend: " + nodes.get(3));
+ nodes.get(3).replyToPendingNodeStateRequests();
+ }
+ waitForCompleteCycle();
+ timer.advanceTime(100);
+ }
+ log.log(LogLevel.INFO, "STARTING SLOBROK SERVER AGAIN (" + (j+1) + "/4)");
+ slobrok = new Slobrok(slobrokPort);
+ // May take up to 30 seconds for slobrok clients to re-register. Trigger retry.
+ for (DummyVdsNode node : nodes) {
+ node.disconnectSlobrok();
+ node.registerSlobrok();
+ }
+ //fleetController.setFreshSlobrokMirror();
+ waitForCompleteCycle();
+ fleetController.waitForNodesInSlobrok(10, 10, timeoutMS);
+
+ log.log(LogLevel.INFO, "Waiting for cluster to be up and available again");
+ for (int i = 0; i < timeoutMS; i += 10) {
+ if (clusterAvailable()) break;
+ timer.advanceTime(1000);
+ waitForCompleteCycle();
+ try{
+ Thread.sleep(10);
+ } catch (InterruptedException e) {}
+ }
+ assertClusterAvailable();
+ }
+
+ assertEquals("Cluster state was affected, although it should not have been.",
+ version, fleetController.getSystemState().getVersion());
+ }
+
+ @Test
+ public void testNodeTooLongOutOfSlobrok() throws Exception {
+ startingTest("SlobrokTest::testNodeTooLongOutOfSlobrok");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+ options.nodeStateRequestTimeoutMS = 10000 * 60 * 1000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ int version = fleetController.getSystemState().getVersion();
+ nodes.get(0).disconnectSlobrok();
+ log.log(LogLevel.INFO, "DISCONNECTED NODE FROM SLOBROK. SHOULD BE IN COOLDOWN PERIOD");
+ fleetController.waitForNodesInSlobrok(9, 10, timeoutMS);
+ synchronized (timer) {
+ nodes.get(0).sendGetNodeStateReply(0);
+ }
+
+ // Give system a little time to possible faultily removing node not in slobrok
+ timer.advanceTime(1000);
+ try{ Thread.sleep(10); } catch (InterruptedException e) {}
+ assertEquals(version, fleetController.getSystemState().getVersion());
+ log.log(LogLevel.INFO, "JUMPING TIME. NODE SHOULD BE MARKED DOWN");
+ // At this point the fleetcontroller might not have noticed that the node is out of slobrok yet.
+ // Thus we keep advancing time another minute such that it should get down.
+ timer.advanceTime(options.nodeStateRequestTimeoutMS + options.maxSlobrokDisconnectGracePeriod);
+ waitForState("version:\\d+ distributor:10 .0.s:d storage:10");
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
new file mode 100644
index 00000000000..b94691bb880
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
@@ -0,0 +1,1135 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.*;
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
+import com.yahoo.vespa.clustercontroller.core.testutils.StateWaiter;
+import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.*;
+import java.util.logging.Logger;
+
+public class StateChangeTest extends FleetControllerTest {
+
+ public static Logger log = Logger.getLogger(StateChangeTest.class.getName());
+ private Supervisor supervisor;
+ private FleetController ctrl;
+ private DummyCommunicator communicator;
+ private EventLog eventLog;
+
+ @Before
+ public void setUp() {
+ supervisor = new Supervisor(new Transport());
+ }
+
+ private void initialize(FleetControllerOptions options) throws Exception {
+ List<Node> nodes = new ArrayList<>();
+ for (int i = 0; i < options.nodes.size(); ++i) {
+ nodes.add(new Node(NodeType.STORAGE, i));
+ nodes.add(new Node(NodeType.DISTRIBUTOR, i));
+ }
+
+ communicator = new DummyCommunicator(nodes, timer);
+ MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex);
+ eventLog = new EventLog(timer, metricUpdater);
+ ContentCluster cluster = new ContentCluster(options.clusterName, options.nodes, options.storageDistribution,
+ options.minStorageNodesUp, options.minRatioOfStorageNodesUp);
+ NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, eventLog);
+ DatabaseHandler database = new DatabaseHandler(timer, options.zooKeeperServerAddress, options.fleetControllerIndex, timer);
+ SystemStateGenerator stateGenerator = new SystemStateGenerator(timer, eventLog, metricUpdater);
+ SystemStateBroadcaster stateBroadcaster = new SystemStateBroadcaster(timer, timer);
+ MasterElectionHandler masterElectionHandler = new MasterElectionHandler(options.fleetControllerIndex, options.fleetControllerCount, timer, timer);
+ ctrl = new FleetController(timer, eventLog, cluster, stateGatherer, communicator, null, null, communicator, database, stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options);
+
+ ctrl.tick();
+
+ for (int i = 0; i < options.nodes.size(); ++i) {
+ communicator.setNodeState(new Node(NodeType.STORAGE, i), State.UP, "");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, i), State.UP, "");
+ }
+
+ ctrl.tick();
+ }
+
+ public void tearDown() throws Exception {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ supervisor = null;
+ }
+ super.tearDown();
+ }
+
+ public void verifyNodeEvents(Node n, String correct) {
+ String actual = "";
+ for (NodeEvent e : eventLog.getNodeEvents(n)) {
+ actual += e.toString() + "\n";
+ }
+
+ assertEquals(correct, actual);
+
+ }
+
+ private List<ConfiguredNode> createNodes(int count) {
+ List<ConfiguredNode> nodes = new ArrayList<>();
+ for (int i = 0; i < count; i++)
+ nodes.add(new ConfiguredNode(i, false));
+ return nodes;
+ }
+
+ @Test
+ public void testNormalStartup() throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxInitProgressTime = 50000;
+
+ initialize(options);
+
+ // Should now pick up previous node states
+ ctrl.tick();
+
+
+ for (int j = 0; j < 10; ++j) {
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, j), new NodeState(NodeType.DISTRIBUTOR, State.INITIALIZING).setInitProgress(0.0), "");
+ }
+
+ for (int i=0; i<100; i += 10) {
+ timer.advanceTime(options.maxInitProgressTime / 20);
+ ctrl.tick();
+ for (int j = 0; j < 10; ++j) {
+ communicator.setNodeState(new Node(NodeType.STORAGE, j), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(i / 100.0), "");
+ }
+ }
+
+ // Now, fleet controller should have generated a new cluster state.
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 .0.s:i .0.i:0.0 .1.s:i .1.i:0.0 .2.s:i .2.i:0.0 .3.s:i .3.i:0.0 .4.s:i .4.i:0.0 .5.s:i .5.i:0.0 .6.s:i .6.i:0.0 .7.s:i .7.i:0.0 .8.s:i .8.i:0.0 .9.s:i .9.i:0.0 storage:10 .0.s:i .0.i:0.9 .1.s:i .1.i:0.9 .2.s:i .2.i:0.9 .3.s:i .3.i:0.9 .4.s:i .4.i:0.9 .5.s:i .5.i:0.9 .6.s:i .6.i:0.9 .7.s:i .7.i:0.9 .8.s:i .8.i:0.9 .9.s:i .9.i:0.9",
+ ctrl.getSystemState().toString());
+
+ timer.advanceTime(options.maxInitProgressTime / 20);
+ ctrl.tick();
+
+ for (int i = 0; i < 10; ++i) {
+ communicator.setNodeState(new Node(NodeType.STORAGE, i), new NodeState(NodeType.STORAGE, State.UP), "");
+ }
+
+ timer.advanceTime(options.maxInitProgressTime / 20);
+ ctrl.tick();
+
+ for (int i = 0; i < 10; ++i) {
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, i), new NodeState(NodeType.STORAGE, State.UP), "");
+ }
+
+ timer.advanceTime(options.maxInitProgressTime / 20);
+ ctrl.tick();
+
+ assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+
+ verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0),
+ "Event: distributor.0: Now reporting state U\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: distributor.0: Now reporting state I, i 0.00\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'U' to 'I, i 0.00'.\n" +
+ "Event: distributor.0: Now reporting state U\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'I, i 0.00' to 'U'.\n");
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 0),
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.0: Now reporting state I, i 0.00 (ls)\n" +
+ "Event: storage.0: Altered node state in cluster state from 'U' to 'D: Listing buckets. Progress 0.0 %.'.\n" +
+ "Event: storage.0: Now reporting state I, i 0.100 (read)\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D: Listing buckets. Progress 0.0 %.' to 'I, i 0.100 (read)'.\n" +
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'I, i 0.900 (read)' to 'U'.\n");
+ }
+
+ @Test
+ public void testNodeGoingDownAndUp() throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.nodeStateRequestTimeoutMS = 60 * 60 * 1000;
+ options.minTimeBetweenNewSystemStates = 0;
+ options.maxInitProgressTime = 50000;
+
+ initialize(options);
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.DOWN, "Closed at other end");
+
+ ctrl.tick();
+
+ String desc = ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Closed at other end") != -1);
+
+ assertEquals("version:4 distributor:10 .0.s:d storage:10", ctrl.getSystemState().toString());
+
+ timer.advanceTime(1000);
+ long distStartTime = timer.getCurrentTimeInMillis() / 1000;
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), new NodeState(NodeType.DISTRIBUTOR, State.UP).setStartTimestamp(12345678), "");
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), State.DOWN, "Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 .0.t:12345678 storage:10 .0.s:m", ctrl.getSystemState().toString());
+
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).hasDescription());
+ desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Closed at other end") != -1);
+
+ timer.advanceTime(options.maxTransitionTime.get(NodeType.STORAGE) + 1);
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 .0.t:12345678 storage:10 .0.s:d", ctrl.getSystemState().toString());
+
+ desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Closed at other end") != -1);
+
+ timer.advanceTime(1000);
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), new NodeState(NodeType.STORAGE, State.UP).setStartTimestamp(12345679), "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 distributor:10 storage:10 .0.t:12345679", ctrl.getSystemState().toString());
+
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).hasDescription());
+
+ verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0),
+ "Event: distributor.0: Now reporting state U\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: distributor.0: Failed to get node state: D: Closed at other end\n" +
+ "Event: distributor.0: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'U' to 'D: Closed at other end'.\n" +
+ "Event: distributor.0: Now reporting state U, t 12345678\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'D: Closed at other end' to 'U, t 12345678'.\n");
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 0),
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.0: Failed to get node state: D: Closed at other end\n" +
+ "Event: storage.0: Stopped or possibly crashed after 1000 ms, which is before stable state time period. Premature crash count is now 1.\n" +
+ "Event: storage.0: Altered node state in cluster state from 'U' to 'M: Closed at other end'.\n" +
+ "Event: storage.0: 5001 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.0: Altered node state in cluster state from 'M: Closed at other end' to 'D: Closed at other end'.\n" +
+ "Event: storage.0: Now reporting state U, t 12345679\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D: Closed at other end' to 'U, t 12345679'.\n");
+
+ assertEquals(1, ctrl.getCluster().getNodeInfo(new Node(NodeType.DISTRIBUTOR, 0)).getPrematureCrashCount());
+ assertEquals(1, ctrl.getCluster().getNodeInfo(new Node(NodeType.STORAGE, 0)).getPrematureCrashCount());
+ }
+
+ public void tick(int timeMs) throws Exception {
+ timer.advanceTime(timeMs);
+ ctrl.tick();
+ }
+
+ @Test
+ public void testNodeGoingDownAndUpNotifying() throws Exception {
+ // Same test as above, but node manage to notify why it is going down first.
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.nodeStateRequestTimeoutMS = 60 * 60 * 1000;
+ options.maxSlobrokDisconnectGracePeriod = 100000;
+
+ initialize(options);
+
+ ctrl.tick();
+
+ tick((int)options.stableStateTimePeriod + 1);
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.DOWN, "controlled shutdown");
+
+ ctrl.tick();
+
+ String desc = ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Received signal 15 (SIGTERM - Termination signal)") != -1
+ || desc.indexOf("controlled shutdown") != -1);
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.UP, "");
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), State.DOWN, "controlled shutdown");
+
+ tick(1000);
+
+ assertEquals("version:5 distributor:10 storage:10 .0.s:m", ctrl.getSystemState().toString());
+
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).hasDescription());
+ desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Received signal 15 (SIGTERM - Termination signal)") != -1
+ || desc.indexOf("controlled shutdown") != -1);
+
+ tick(options.maxTransitionTime.get(NodeType.STORAGE) + 1);
+
+ assertEquals("version:6 distributor:10 storage:10 .0.s:d", ctrl.getSystemState().toString());
+ desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Received signal 15 (SIGTERM - Termination signal)") != -1
+ || desc.indexOf("controlled shutdown") != -1);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), State.UP, "");
+
+ tick(1000);
+
+ assertEquals("version:7 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).hasDescription());
+
+ assertEquals(0, ctrl.getCluster().getNodeInfo(new Node(NodeType.DISTRIBUTOR, 0)).getPrematureCrashCount());
+ assertEquals(0, ctrl.getCluster().getNodeInfo(new Node(NodeType.STORAGE, 0)).getPrematureCrashCount());
+
+ verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0),
+ "Event: distributor.0: Now reporting state U\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: distributor.0: Failed to get node state: D: controlled shutdown\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'U' to 'D: controlled shutdown'.\n" +
+ "Event: distributor.0: Now reporting state U\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'D: controlled shutdown' to 'U'.\n");
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 0),
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.0: Failed to get node state: D: controlled shutdown\n" +
+ "Event: storage.0: Altered node state in cluster state from 'U' to 'M: controlled shutdown'.\n" +
+ "Event: storage.0: 5001 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.0: Altered node state in cluster state from 'M: controlled shutdown' to 'D: controlled shutdown'.\n" +
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D: controlled shutdown' to 'U'.\n");
+
+ }
+
+ @Test
+ public void testNodeGoingDownAndUpFast() throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+
+ initialize(options);
+
+ ctrl.tick();
+
+ // Node dropped out of slobrok
+ List<Node> nodes = new ArrayList<>();
+ for (int i = 1; i < 10; ++i) {
+ nodes.add(new Node(NodeType.STORAGE, i));
+ nodes.add(new Node(NodeType.DISTRIBUTOR, i));
+ }
+
+ communicator.newNodes = nodes;
+
+ ctrl.tick();
+ ctrl.tick();
+
+ assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ nodes = new ArrayList<>();
+ for (int i = 0; i < 10; ++i) {
+ nodes.add(new Node(NodeType.STORAGE, i));
+ nodes.add(new Node(NodeType.DISTRIBUTOR, i));
+ }
+
+ communicator.newNodes = nodes;
+
+ ctrl.tick();
+
+ assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 0),
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.0: Node is no longer in slobrok, but we still have a pending state request.\n");
+ }
+
+ @Test
+ public void testMaintenanceWhileNormalStorageNodeRestart() throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+
+ initialize(options);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ NodeState ns = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6));
+ assertTrue(ns.toString(), ns.getDescription().indexOf("Connection error: Closed at other end") != -1);
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.0), "");
+
+ ctrl.tick();
+
+ // Still maintenance since .i progress 0.0 is really down.
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.6), "");
+
+ ctrl.tick();
+
+ // Now it's OK
+ assertEquals("version:5 distributor:10 storage:10 .6.s:i .6.i:0.6", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.UP), "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6)).hasDescription());
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 6),
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
+ "Event: storage.6: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.\n" +
+ "Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: Now reporting state I, i 0.00 (ls)\n" +
+ "Event: storage.6: Now reporting state I, i 0.600 (read)\n" +
+ "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'I, i 0.600 (read)'.\n" +
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'I, i 0.600 (read)' to 'U'.\n");
+ }
+
+ @Test
+ public void testMaintenanceWithoutInitIfRetired() throws Exception {
+ List<ConfiguredNode> nodes = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ boolean retired = (i == 6);
+ nodes.add(new ConfiguredNode(i, retired));
+ }
+
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", nodes);
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+
+ initialize(options);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ NodeState ns = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6));
+ assertTrue(ns.toString(), ns.getDescription().indexOf("Connection error: Closed at other end") != -1);
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.0), "");
+
+ ctrl.tick();
+
+ // Still maintenance since .i progress 0.0 is really down.
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.6), "");
+
+ ctrl.tick();
+
+ // Still maintenance since configured.
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.UP), "");
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:r", ctrl.getSystemState().toString());
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6)).hasDescription());
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 6),
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D' to 'R'.\n" +
+ "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
+ "Event: storage.6: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.\n" +
+ "Event: storage.6: Altered node state in cluster state from 'R' to 'M: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: Now reporting state I, i 0.00 (ls)\n" +
+ "Event: storage.6: Now reporting state I, i 0.600 (read)\n" +
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'R: Connection error: Closed at other end'.\n");
+ }
+
+ @Test
+ public void testMaintenanceToDownIfPastTransitionTimeAndRetired() throws Exception {
+ List<ConfiguredNode> nodes = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ boolean retired = (i == 6);
+ nodes.add(new ConfiguredNode(i, retired));
+ }
+
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", nodes);
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+
+ initialize(options);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(100000);
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ }
+
+ // Test that a node that has been down for a long time (above steady state period), actually alters cluster state to
+ // tell that it is initializing, rather than being ignored as a just restarted/unstable node should be.
+ @Test
+ public void testDownNodeInitializing() throws Exception {
+ // Actually report initializing state if node has been down steadily for a while
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 5000);
+ options.maxInitProgressTime = 5000;
+ options.stableStateTimePeriod = 20000;
+ options.nodeStateRequestTimeoutMS = 1000000;
+ options.maxSlobrokDisconnectGracePeriod = 1000000;
+
+ initialize(options);
+
+ timer.advanceTime(100000); // Node has been in steady state up
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(100000); // Node has been in steady state down
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.001), "");
+
+ ctrl.tick();
+
+ assertEquals("Listing buckets. Progress 0.1 %.", ctrl.getSystemState().getNodeState(new Node(NodeType.STORAGE, 6)).getDescription());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.1), "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.UP), "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 6),
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
+ "Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: 100000 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'D: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: Now reporting state I, i 0.00100 (ls)\n" +
+ "Event: storage.6: Now reporting state I, i 0.100 (read)\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D: Listing buckets. Progress 0.1 %.' to 'I, i 0.100 (read)'.\n" +
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'I, i 0.100 (read)' to 'U'.\n");
+ }
+
+ @Test
+ public void testNodeInitializationStalled() throws Exception {
+ // Node should eventually be marked down, and not become initializing next time, but stay down until up
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 5000);
+ options.maxInitProgressTime = 5000;
+ options.stableStateTimePeriod = 1000000;
+ options.maxSlobrokDisconnectGracePeriod = 10000000;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(1000000); // Node has been in steady state down
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.1), "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
+
+ timer.advanceTime(options.maxInitProgressTime + 1);
+
+ ctrl.tick();
+
+ // We should now get the node marked down.
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ tick(options.nodeStateRequestTimeoutMS + 1);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.0), "");
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.1), "");
+
+ tick(1000);
+
+ // Still down since it seemingly crashed during last init.
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ assertEquals("Down: 5001 ms without initialize progress. Assuming node has deadlocked.",
+ ctrl.getSystemState().getNodeState(new Node(NodeType.STORAGE, 6)).toString());
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.UP, "");
+
+ ctrl.tick();
+
+ assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 6),
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
+ "Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: 1000000 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'D: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: Now reporting state I, i 0.100 (read)\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D: Connection error: Closed at other end' to 'I, i 0.100 (read)'.\n" +
+ "Event: storage.6: 5001 milliseconds without initialize progress. Marking node down. Premature crash count is now 1.\n" +
+ "Event: storage.6: Altered node state in cluster state from 'I, i 0.100 (read)' to 'D: 5001 ms without initialize progress. Assuming node has deadlocked.'.\n" +
+ "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
+ "Event: storage.6: Now reporting state I, i 0.00 (ls)\n" +
+ "Event: storage.6: Now reporting state I, i 0.100 (read)\n" +
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D: 5001 ms without initialize progress. Assuming node has deadlocked.' to 'U'.\n");
+
+ }
+
+ @Test
+ public void testBackwardsInitializationProgress() throws Exception {
+ // Same as stalled. Mark down, keep down until up
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 5000);
+ options.maxInitProgressTime = 5000;
+ options.stableStateTimePeriod = 1000000;
+ // Set long so we dont time out RPC requests and mark nodes down due to advancing time to get in steady state
+ options.nodeStateRequestTimeoutMS = (int) options.stableStateTimePeriod * 2;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(1000000); // Node has been in steady state down
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.3), "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.3", ctrl.getSystemState().toString());
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.2), "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ String desc = ctrl.getSystemState().getNodeState(new Node(NodeType.STORAGE, 6)).getDescription();
+ assertEquals("Got reverse intialize progress. Assuming node have prematurely crashed", desc);
+ }
+
+ @Test
+ public void testNodeGoingDownWhileInitializing() throws Exception {
+ // Same as stalled. Mark down, keep down until up
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 5000);
+ options.maxInitProgressTime = 5000;
+ options.stableStateTimePeriod = 1000000;
+ options.nodeStateRequestTimeoutMS = 365 * 24 * 60 * 1000; // Set very high so the advanceTime don't start sending state replies right before we disconnect.
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(1000000); // Node has been in steady state down
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.3), "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.3", ctrl.getSystemState().toString());
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.3), "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.UP, "");
+
+ ctrl.tick();
+
+ assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString());
+ }
+
+ @Test
+ public void testContinuousCrashRightAfterInit() throws Exception {
+ startingTest("StateChangeTest::testContinuousCrashRightAfterInit");
+ // If node does this too many times, take it out of service
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 5000);
+ options.maxInitProgressTime = 5000;
+ options.maxPrematureCrashes = 2;
+ options.stableStateTimePeriod = 1000000;
+ options.maxSlobrokDisconnectGracePeriod = 10000000;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(1000000); // Node has been in steady state down
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ for (int j = 0; j <= options.maxPrematureCrashes; ++j) {
+ ctrl.tick();
+
+ tick(options.nodeStateRequestTimeoutMS + 1);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ tick(options.nodeStateRequestTimeoutMS + 1);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.0), "");
+
+ ctrl.tick();
+
+ tick(options.nodeStateRequestTimeoutMS + 1);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.1), "");
+
+ tick(1000);
+ }
+
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ }
+
+ @Test
+ public void testClusterStateMinNodes() throws Exception {
+ startingTest("StateChangeTest::testClusterStateMinNodes");
+ // If node does this too many times, take it out of service
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 0);
+ options.maxInitProgressTime = 0;
+ options.minDistributorNodesUp = 6;
+ options.minStorageNodesUp = 8;
+ options.minRatioOfDistributorNodesUp = 0.0;
+ options.minRatioOfStorageNodesUp = 0.0;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 1), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 2), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 3), State.DOWN, "Connection error: Closed at other end");
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.STORAGE, 1), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 4), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:5 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 4), State.UP, "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 2), State.DOWN, "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d .2.s:d", ctrl.getSystemState().toString());
+ }
+
+ @Test
+ public void testClusterStateMinFactor() throws Exception {
+ startingTest("StateChangeTest::testClusterStateMinFactor");
+ // If node does this too many times, take it out of service
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 0);
+ options.maxInitProgressTime = 0;
+ options.minDistributorNodesUp = 0;
+ options.minStorageNodesUp = 0;
+ options.minRatioOfDistributorNodesUp = 0.6;
+ options.minRatioOfStorageNodesUp = 0.8;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 1), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 2), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 3), State.DOWN, "Connection error: Closed at other end");
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.STORAGE, 1), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 4), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:5 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 4), State.UP, "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 2), State.DOWN, "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d .2.s:d", ctrl.getSystemState().toString());
+ }
+
+ /**
+ * Class for testing states of all nodes. Will fail in constructor with
+ * debug message on non-expected results.
+ */
+ abstract class StateMessageChecker {
+ StateMessageChecker(final List<DummyVdsNode> nodes) {
+ for (final DummyVdsNode node : nodes) {
+ final List<ClusterState> states = node.getSystemStatesReceived();
+ final StringBuilder debugString = new StringBuilder();
+ debugString.append("Node ").append(node).append("\n");
+ for (ClusterState state : states) {
+ debugString.append(state.toString()).append("\n");
+ }
+ assertEquals(debugString.toString(), expectedMessageCount(node), states.size());
+ }
+ }
+ abstract int expectedMessageCount(final DummyVdsNode node);
+ }
+
+ @Test
+ public void testNoSystemStateBeforeInitialTimePeriod() throws Exception {
+ startingTest("StateChangeTest::testNoSystemStateBeforeInitialTimePeriod()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.minTimeBeforeFirstSystemStateBroadcast = 3 * 60 * 1000;
+ setUpSystem(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), true);
+ // Leave one node down to avoid sending cluster state due to having seen all node states.
+ for (int i=0; i<nodes.size(); ++i) {
+ if (i != 3) {
+ nodes.get(i).connect();
+ }
+ }
+ setUpFleetController(true, options);
+
+ StateWaiter waiter = new StateWaiter(timer);
+ fleetController.addSystemStateListener(waiter);
+
+ // Ensure all nodes have been seen by fleetcontroller and that it has had enough time to possibly have sent a cluster state
+ waiter.waitForState("version:\\d+ distributor:10 (\\.\\d+\\.t:\\d+ )*storage:10 (\\.\\d+\\.t:\\d+ )*.1.s:d( \\.\\d+\\.t:\\d+)*", timeoutMS);
+ waitForCompleteCycle();
+ new StateMessageChecker(nodes) {
+ @Override int expectedMessageCount(final DummyVdsNode node) { return 0; }
+ };
+
+ // Pass time and see that the nodes get state
+ timer.advanceTime(3 * 60 * 1000);
+ waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:d", timeoutMS);
+
+ int version = waiter.getCurrentSystemState().getVersion();
+ fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 19, timeoutMS);
+
+ new StateMessageChecker(nodes) {
+ @Override int expectedMessageCount(final DummyVdsNode node) {
+ return node.getNode().equals(new Node(NodeType.STORAGE, 1)) ? 0 : 2;
+ }
+ };
+ assertEquals(version, waiter.getCurrentSystemState().getVersion());
+ }
+
+ @Test
+ public void testSystemStateSentWhenNodesReplied() throws Exception {
+ startingTest("StateChangeTest::testSystemStateSentWhenNodesReplied()");
+ final FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.minTimeBeforeFirstSystemStateBroadcast = 300 * 60 * 1000;
+
+ setUpSystem(true, options);
+
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), true);
+
+ for (int i=0; i<nodes.size(); ++i) {
+ nodes.get(i).connect();
+ }
+ // Marking one node as 'initializing' improves testing of state later on.
+ nodes.get(3).setNodeState(State.INITIALIZING);
+
+ setUpFleetController(true, options);
+
+ final StateWaiter waiter = new StateWaiter(timer);
+
+ fleetController.addSystemStateListener(waiter);
+ waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:i .1.i:1.0", timeoutMS);
+ waitForCompleteCycle();
+
+ final int version = waiter.getCurrentSystemState().getVersion();
+ fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 20, timeoutMS);
+
+ // The last two versions of the cluster state should be seen (all nodes up,
+ // zero out timestate)
+ new StateMessageChecker(nodes) {
+ @Override int expectedMessageCount(final DummyVdsNode node) { return 2; }
+ };
+ }
+
+ @Test
+ public void testDontTagFailingSetSystemStateOk() throws Exception {
+ startingTest("StateChangeTest::testDontTagFailingSetSystemStateOk()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ StateWaiter waiter = new StateWaiter(timer);
+ fleetController.addSystemStateListener(waiter);
+
+ nodes.get(1).failSetSystemState(true);
+ int versionBeforeChange = nodes.get(1).getSystemStatesReceived().get(0).getVersion();
+ nodes.get(2).disconnect(); // cause a new state
+ waiter.waitForState("version:\\d+ distributor:10 .1.s:d storage:10", timeoutMS);
+ int versionAfterChange = waiter.getCurrentSystemState().getVersion();
+ assertTrue(versionAfterChange > versionBeforeChange);
+ fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(versionAfterChange, 18, timeoutMS);
+
+ // Assert that the failed node has not acknowledged the latest version.
+ // (The version may still be larger than versionBeforeChange if the fleet controller sends a
+ // "stable system" update without timestamps in the meantime
+ assertTrue(fleetController.getCluster().getNodeInfo(nodes.get(1).getNode()).getSystemStateVersionAcknowledged() < versionAfterChange);
+
+ // Ensure non-concurrent access to getNewestSystemStateVersionSent
+ synchronized(timer) {
+ int sentVersion = fleetController.getCluster().getNodeInfo(nodes.get(1).getNode()).getNewestSystemStateVersionSent();
+ assertTrue(sentVersion == -1 || sentVersion == versionAfterChange);
+ }
+ }
+
+ @Test
+ public void testAlteringDistributionSplitCount() throws Exception {
+ startingTest("StateChangeTest::testAlteringDistributionSplitCount");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.distributionBits = 17;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ setMinUsedBitsForAllNodes(15);
+
+ ctrl.tick();
+
+ assertEquals("version:4 bits:15 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(13), "");
+
+ ctrl.tick();
+
+ assertEquals("version:5 bits:13 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ tick(1000);
+ setMinUsedBitsForAllNodes(16);
+ ctrl.tick();
+
+ // Don't increase dist bits until we've reached at least the wanted
+ // level, in order to avoid multiple full redistributions of data.
+ assertEquals("version:5 bits:13 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ tick(1000);
+ setMinUsedBitsForAllNodes(19);
+ ctrl.tick();
+
+ assertEquals("version:6 bits:17 distributor:10 storage:10", ctrl.getSystemState().toString());
+ }
+
+ private void setMinUsedBitsForAllNodes(int bits) throws Exception {
+ for (int i = 0; i < 10; ++i) {
+ communicator.setNodeState(new Node(NodeType.STORAGE, i), new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(bits), "");
+ }
+ }
+
+ @Test
+ public void testSetAllTimestampsAfterDowntime() throws Exception {
+ startingTest("StateChangeTest::testSetAllTimestampsAfterDowntime");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ StateWaiter waiter = new StateWaiter(timer);
+ fleetController.addSystemStateListener(waiter);
+
+ // Simulate netsplit. Take node down without node booting
+ assertEquals(true, nodes.get(0).isDistributor());
+ nodes.get(0).disconnectImmediately();
+ waiter.waitForState("version:\\d+ distributor:10 .0.s:d storage:10", timeoutMS);
+
+ // Add node back.
+ nodes.get(0).connect();
+ waitForStableSystem();
+
+ // At this time, node taken down should have cluster states with all starting timestamps set. Others node should not.
+ for (DummyVdsNode node : nodes) {
+ node.waitForSystemStateVersion(waiter.getCurrentSystemState().getVersion(), timeoutMS);
+ List<ClusterState> states = node.getSystemStatesReceived();
+ ClusterState lastState = states.get(0);
+ StringBuilder stateHistory = new StringBuilder();
+ for (ClusterState state : states) {
+ stateHistory.append(state.toString()).append("\n");
+ }
+
+ if (node.getNode().equals(new Node(NodeType.DISTRIBUTOR, 0))) {
+ for (ConfiguredNode i : options.nodes) {
+ Node nodeId = new Node(NodeType.STORAGE, i.index());
+ long ts = lastState.getNodeState(nodeId).getStartTimestamp();
+ assertTrue(nodeId + "\n" + stateHistory + "\nWas " + ts + " should be " + fleetController.getCluster().getNodeInfo(nodeId).getStartTimestamp(), ts > 0);
+ }
+ } else {
+ for (ConfiguredNode i : options.nodes) {
+ Node nodeId = new Node(NodeType.STORAGE, i.index());
+ assertTrue(nodeId.toString(), lastState.getNodeState(nodeId).getStartTimestamp() == 0);
+ }
+ }
+
+ for (ConfiguredNode i : options.nodes) {
+ Node nodeId = new Node(NodeType.DISTRIBUTOR, i.index());
+ assertTrue(nodeId.toString(), lastState.getNodeState(nodeId).getStartTimestamp() == 0);
+ }
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java
new file mode 100644
index 00000000000..cf3a47b1add
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java
@@ -0,0 +1,81 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.log.LogLevel;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+import java.net.InetAddress;
+import java.util.concurrent.TimeoutException;
+import java.util.logging.Logger;
+
+public class StateGatherTest extends FleetControllerTest {
+
+ public static Logger log = Logger.getLogger(StateGatherTest.class.getName());
+
+ public String getGetNodeStateReplyCounts(DummyVdsNode node) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("timedout ").append(node.timedOutStateReplies)
+ .append(", outdated ").append(node.outdatedStateReplies)
+ .append(", immediate ").append(node.immediateStateReplies)
+ .append(", setstate ").append(node.setNodeStateReplies)
+ .append(", pending ").append(node.getPendingNodeStateCount());
+ return sb.toString();
+ }
+
+ @Test
+ public void testAlwaysHavePendingGetNodeStateRequestTowardsNodes() throws Exception {
+ Logger.getLogger(NodeStateGatherer.class.getName()).setLevel(LogLevel.SPAM);
+ startingTest("StateGatherTest::testOverlappingGetNodeStateRequests");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.nodeStateRequestTimeoutMS = 10 * 60 * 1000;
+ // Force actual message timeout to be lower than request timeout.
+ options.nodeStateRequestTimeoutEarliestPercentage = 80;
+ options.nodeStateRequestTimeoutLatestPercentage = 80;
+ setUpFleetController(true, options);
+ String connectionSpecs[] = new String[1];
+ connectionSpecs[0] = "tcp/" + InetAddress.getLocalHost().getHostName() + ":" + slobrok.port();
+ DummyVdsNodeOptions dummyOptions = new DummyVdsNodeOptions();
+ DummyVdsNode dnode = new DummyVdsNode(timer, dummyOptions, connectionSpecs, this.options.clusterName, true, 0);
+ DummyVdsNode snode = new DummyVdsNode(timer, dummyOptions, connectionSpecs, this.options.clusterName, false, 0);
+ dnode.connect();
+ snode.connect();
+
+ waitUntilPendingGetNodeState(dnode, snode);
+
+ assertEquals("timedout 0, outdated 0, immediate 1, setstate 0, pending 1", getGetNodeStateReplyCounts(dnode));
+ assertEquals("timedout 0, outdated 0, immediate 1, setstate 0, pending 1", getGetNodeStateReplyCounts(snode));
+
+ waitForCompleteCycle();
+ timer.advanceTime(9 * 60 * 1000); // Requests should have timed out on nodes (8 min timeout).
+
+ waitUntilTimedOutGetNodeState(dnode, snode);
+ waitForCompleteCycle(); // Send new node state requests.
+ waitUntilPendingGetNodeState(dnode, snode);
+
+ assertEquals("timedout 1, outdated 0, immediate 1, setstate 0, pending 1", getGetNodeStateReplyCounts(dnode));
+ assertEquals("timedout 1, outdated 0, immediate 1, setstate 0, pending 1", getGetNodeStateReplyCounts(snode));
+ }
+
+ private void waitUntilTimedOutGetNodeState(DummyVdsNode dnode, DummyVdsNode snode) throws TimeoutException {
+ long timeout = System.currentTimeMillis() + timeoutMS;
+ synchronized (timer) {
+ while (dnode.timedOutStateReplies != 1 || snode.timedOutStateReplies != 1) {
+ if (System.currentTimeMillis() > timeout) {
+ throw new TimeoutException("Did not get to have one timed out within timeout of " + timeoutMS + " ms"
+ + ", " + getGetNodeStateReplyCounts(dnode) + ", " + getGetNodeStateReplyCounts(snode));
+ }
+ try{ timer.wait(1); } catch (InterruptedException e) {}
+ }
+ }
+ }
+
+ private void waitUntilPendingGetNodeState(DummyVdsNode dnode, DummyVdsNode snode) throws TimeoutException {
+ long timeout = System.currentTimeMillis() + timeoutMS;
+ while (dnode.getPendingNodeStateCount() != 1 || snode.getPendingNodeStateCount() != 1) {
+ if (System.currentTimeMillis() > timeout) throw new TimeoutException("Did not get to have one pending within timeout of " + timeoutMS + " ms");
+ try{ Thread.sleep(1); } catch (InterruptedException e) {}
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodeTest.java
new file mode 100644
index 00000000000..ddf0286b0fe
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodeTest.java
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+/**
+ * @author hakon
+ */
+public class StatsForStorageNodeTest {
+ @Test
+ public void testStatsForStorage() {
+ Map<Integer, StorageNodeStats> statsMap = new HashMap<>();
+
+ LatencyStats putLatencyForA = new LatencyStats(1, 2);
+ StorageNodeStats nodeStatsForA = new StorageNodeStats(putLatencyForA);
+ statsMap.put(5, nodeStatsForA);
+
+ LatencyStats putLatencyForB = new LatencyStats(3, 4);
+ StorageNodeStats nodeStatsForB = new StorageNodeStats(putLatencyForB);
+ statsMap.put(6, nodeStatsForB);
+
+ StatsForStorageNodes stats = new StatsForStorageNodes(statsMap);
+
+ StorageNodeStats nodeStats = stats.getStatsForStorageNode(5);
+ assertNotNull(nodeStats);
+ assertEquals(1, nodeStatsForA.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(2, nodeStatsForA.getDistributorPutLatency().getCount());
+
+ nodeStats = stats.getStatsForStorageNode(6);
+ assertNotNull(nodeStats);
+ assertEquals(3, nodeStatsForB.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(4, nodeStatsForB.getDistributorPutLatency().getCount());
+
+ nodeStats = stats.getStatsForStorageNode(7);
+ assertNull(nodeStats);
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java
new file mode 100644
index 00000000000..b1ae39729ab
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java
@@ -0,0 +1,385 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.status.StatusHandler;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageResponse;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+import com.yahoo.vespa.clustercontroller.utils.communication.http.HttpRequest;
+import com.yahoo.vespa.clustercontroller.utils.communication.http.HttpResult;
+import org.codehaus.jettison.json.JSONObject;
+
+import java.io.*;
+import java.net.Socket;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.logging.Logger;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+
+public class StatusPagesTest extends FleetControllerTest {
+
+ public static Logger log = Logger.getLogger(StatusPagesTest.class.getName());
+
+ private String doHttpGetRequest(String request, Date ifModifiedSince) throws IOException {
+ int statusPort = fleetController.getHttpPort();
+ Socket socket = new Socket("localhost", statusPort);
+
+ BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream()));
+ bw.write("GET " + request + " HTTP/1.1\r\n");
+ if (ifModifiedSince != null) {
+ DateFormat df = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z");
+ df.setTimeZone(TimeZone.getTimeZone("GMT"));
+ bw.write("If-Modified-Since: " + df.format(ifModifiedSince) + "\r\n");
+ }
+ bw.write("\r\n");
+ bw.flush();
+
+ InputStream stream = socket.getInputStream();
+ ByteArrayOutputStream output = new ByteArrayOutputStream();
+ try {
+ byte [] buf = new byte[4096];
+ while (true) {
+ int read = stream.read(buf);
+ if (read<=0) {
+ break;
+ }
+ output.write(buf, 0, read);
+ }
+ output.close();
+ return output.toString();
+ } finally {
+ stream.close();
+ bw.close();
+ }
+ }
+
+ private String doHttpGetRequest(String request) throws IOException {
+ return doHttpGetRequest(request, null);
+ }
+
+ @Test
+ public void testStatusThroughContainer() throws Exception {
+ startingTest("StatusPagesTest::testStatusThroughContainer()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10)));
+ final StatusHandler.ContainerStatusPageServer statusServer = new StatusHandler.ContainerStatusPageServer();
+ setUpFleetController(true, options, true, statusServer);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ //ThreadPoolExecutor executor = new ThreadPoolExecutor(10, 100, 100, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(1000));
+ //FleetControllerComponent fcComp = new FleetControllerComponent();
+ //fcComp.addFleetController("mycluster", fleetController, statusServer);
+ StatusHandler comp = new StatusHandler(new StatusHandler.ClusterStatusPageServerSet() {
+ @Override
+ public StatusHandler.ContainerStatusPageServer get(String cluster) {
+ return ("mycluster".equals(cluster) ? statusServer : null);
+ }
+
+ @Override
+ public Map<String, StatusHandler.ContainerStatusPageServer> getAll() {
+ Map<String, StatusHandler.ContainerStatusPageServer> map = new HashMap<>();
+ map.put("mycluster", statusServer);
+ return map;
+ }
+ });
+
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 200, result.getHttpReturnCode());
+ assertEquals("<title>clusters</title>\n<a href=\"./mycluster\">mycluster</a><br>\n", result.getContent().toString());
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1/");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 200, result.getHttpReturnCode());
+ assertEquals("<title>clusters</title>\n<a href=\"./mycluster\">mycluster</a><br>\n", result.getContent().toString());
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1/mycluster");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 200, result.getHttpReturnCode());
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "mycluster Cluster Controller 0 Status Page"));
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1/mycluster/");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 200, result.getHttpReturnCode());
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "mycluster Cluster Controller 0 Status Page"));
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "href=\"/clustercontroller-status/v1/mycluster/node=distributor.0\""));
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "href=\"/clustercontroller-status/v1/mycluster/node=storage.0\""));
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1/mycluster/node=storage.0");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 200, result.getHttpReturnCode());
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "Node status for storage.0"));
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "href=\"/clustercontroller-status/v1/mycluster\""));
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1/foo");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 404, result.getHttpReturnCode());
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/foobar/v1/mycluster/");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 404, result.getHttpReturnCode());
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v2/");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 404, result.getHttpReturnCode());
+ }
+ //executor.shutdown();
+ }
+
+ @Test
+ public void testZooKeeperAddressSplitting() {
+ String rawAddress = "conc1.foo.yahoo.com:2181,conc2.foo.yahoo.com:2181,"
+ + "dp1.foo.yahoo.com:2181,dp2.foo.yahoo.com:2181,"
+ + "dp3.foo.yahoo.com:2181";
+ String result = "conc1.foo.yahoo.com:2181, conc2.foo.yahoo.com:2181, "
+ + "dp1.foo.yahoo.com:2181, dp2.foo.yahoo.com:2181, "
+ + "dp3.foo.yahoo.com:2181";
+ String split = FleetControllerOptions.splitZooKeeperAddress(rawAddress);
+ assertEquals(result, split);
+ }
+
+ @Test
+ public void testSimpleConnectionWithSomeContent() throws Exception {
+ // Set this to true temporary if you want to check status page from browser. Should be false in checked in code always.
+ boolean haltTestToViewStatusPage = false;
+ startingTest("StatusPagesTest::testSimpleConnectionWithSomeContent()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10)));
+ //options.minRatioOfStorageNodesUp = 0.99;
+ if (haltTestToViewStatusPage) {
+ options.httpPort = 19234;
+ }
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ nodes.get(2).disconnectBreakConnection();
+ nodes.get(5).disconnectAsShutdown();
+ nodes.get(7).disconnectSlobrok();
+
+ fleetController.getCluster().getNodeInfo(new Node(NodeType.STORAGE, 3)).setWantedState(new NodeState(NodeType.STORAGE, State.MAINTENANCE).setDescription("Test&<>special"));
+
+ String content = doHttpGetRequest("/");
+
+ assertTrue(content, content.contains("<html>"));
+ assertTrue(content, content.contains("</html>"));
+ assertTrue(content, content.contains("Current cluster state"));
+ assertTrue(content, content.contains("Cluster states"));
+ assertTrue(content, content.contains("Event log"));
+
+ if (haltTestToViewStatusPage) {
+ System.err.println(content);
+ try{
+ Thread.sleep(1000000);
+ } catch (InterruptedException e) {}
+ }
+ }
+
+ @Test
+ public void testNodePage() throws Exception {
+ startingTest("StatusPagesTest::testNodePage()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ String content = doHttpGetRequest("/node=storage.0");
+
+ assertTrue(content, content.contains("<html>"));
+ assertTrue(content, content.contains("</html>"));
+ assertTrue(content, content.contains("Node status for storage.0"));
+ assertTrue(content, content.contains("REPORTED"));
+ assertTrue(content, content.contains("Altered node state in cluster state from"));
+ //System.err.println(sb.toString());
+
+ }
+
+ @Test
+ public void testErrorResponseCode() throws Exception {
+ startingTest("StatusPagesTest::testNodePage()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ String content = doHttpGetRequest("/fraggle/rock");
+
+ assertTrue(content.contains("404 Not Found"));
+ //System.err.println(sb.toString());
+ }
+
+ private StatusPageServer.HttpRequest makeHttpRequest(String request) {
+ return new StatusPageServer.HttpRequest(request);
+ }
+
+ @Test
+ public void testHttpRequestParsing() {
+ {
+ StatusPageServer.HttpRequest request = makeHttpRequest("/") ;
+ assertEquals("/", request.getPath());
+ assertFalse(request.hasQueryParameters());
+ }
+ {
+ StatusPageServer.HttpRequest request = makeHttpRequest("/foo/bar");
+ assertEquals("/foo/bar", request.getPath());
+ assertFalse(request.hasQueryParameters());
+ }
+ {
+ StatusPageServer.HttpRequest request = makeHttpRequest("/foo/bar?baz=baff");
+ assertEquals("/foo/bar", request.getPath());
+ assertTrue(request.hasQueryParameters());
+ assertEquals("baff", request.getQueryParameter("baz"));
+ }
+ {
+ StatusPageServer.HttpRequest request = makeHttpRequest("/?baz=baff&blarg=blee");
+ assertEquals("/", request.getPath());
+ assertTrue(request.hasQueryParameters());
+ assertEquals("baff", request.getQueryParameter("baz"));
+ assertEquals("blee", request.getQueryParameter("blarg"));
+ }
+ {
+ StatusPageServer.HttpRequest request = makeHttpRequest("/node=storage.101?showlocal");
+ assertEquals("/node=storage.101", request.getPath());
+ assertTrue(request.hasQueryParameters());
+ assertTrue(request.hasQueryParameter("showlocal"));
+ assertNull(request.getQueryParameter("showlocal"));
+ }
+ }
+
+ private static class DummyRequestHandler implements StatusPageServer.RequestHandler {
+ private String returnData;
+ public DummyRequestHandler(String returnData) {
+ this.returnData = returnData;
+ }
+
+ @Override
+ public StatusPageResponse handle(StatusPageServer.HttpRequest request) {
+ StatusPageResponse response = new StatusPageResponse();
+ response.writeContent(returnData);
+ return response;
+ }
+ }
+
+ private String invokeHandler(StatusPageServer.RequestRouter router, String request) {
+ StatusPageServer.HttpRequest httpRequest = makeHttpRequest(request);
+ StatusPageServer.RequestHandler handler = router.resolveHandler(httpRequest);
+ if (handler == null) {
+ return null;
+ }
+ try {
+ return handler.handle(httpRequest).getOutputStream().toString("UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ return "<ERROR>";
+ }
+ }
+
+ @Test
+ public void testRequestRouting() {
+ StatusPageServer.PatternRequestRouter router = new StatusPageServer.PatternRequestRouter();
+ router.addHandler("^/alerts/red.*", new DummyRequestHandler("red alert!"));
+ router.addHandler("^/alerts.*", new DummyRequestHandler("beige alert"));
+ router.addHandler("^/$", new DummyRequestHandler("root"));
+ assertEquals("root", invokeHandler(router, "/"));
+ assertEquals("beige alert", invokeHandler(router, "/alerts"));
+ assertEquals("beige alert", invokeHandler(router, "/alerts?foo"));
+ assertEquals("red alert!", invokeHandler(router, "/alerts/red"));
+ assertEquals("red alert!", invokeHandler(router, "/alerts/red/blue"));
+ assertNull(invokeHandler(router, "/blarg"));
+ }
+
+ public String[] getResponseParts(String response) {
+ int offset = response.indexOf("\r\n\r\n");
+ if (offset == -1) {
+ throw new IllegalStateException("No HTTP header delimiter found");
+ }
+ return new String[] {
+ response.substring(0, offset + 2), // all header lines must have linebreaks
+ response.substring(offset + 4)
+ };
+ }
+
+ private String getHeaderValue(String header, String name) {
+ int offset = header.indexOf(name + ": ");
+ if (offset == -1) {
+ throw new IllegalStateException("No HTTP header found for " + name);
+ }
+ int end = header.indexOf("\r\n", offset);
+ if (end == -1) {
+ throw new IllegalStateException("No EOL found for " + name);
+ }
+ return header.substring(offset + name.length() + 2, end);
+ }
+
+ @Test
+ public void testStateServing() throws Exception {
+ startingTest("StatusPagesTest::testStateServing()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ setUpFleetController(true, options);
+ fleetController.updateOptions(options, 5);
+ waitForCompleteCycle();
+ {
+ String content = doHttpGetRequest("/state/v1/health");
+ String[] parts = getResponseParts(content);
+ String body = parts[1];
+ String expected =
+ "{\n" +
+ " \"status\" : {\n" +
+ " \"code\" : \"up\"\n" +
+ " },\n" +
+ " \"config\" : {\n" +
+ " \"component\" : {\n" +
+ " \"generation\" : 5\n" +
+ " }\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, body);
+ // Check that it actually parses
+ JSONObject o = new JSONObject(expected);
+ }
+ }
+
+ @Test
+ public void testClusterStateServing() throws Exception {
+ startingTest("StatusPagesTest::testClusterStateServing()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ setUpFleetController(true, options);
+ fleetController.updateOptions(options, 5);
+ waitForCompleteCycle();
+ {
+ String content = doHttpGetRequest("/clusterstate");
+ String[] parts = getResponseParts(content);
+ String body = parts[1];
+ String expected = "version:2 cluster:d";
+ assertEquals(expected, body);
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainerTest.java
new file mode 100644
index 00000000000..e2832c5b6b9
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainerTest.java
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+/**
+ * @author hakon
+ */
+public class StorageNodeStatsContainerTest {
+ @Test
+ public void testStatsForStorage() {
+ StorageNodeStatsContainer statsContainer = new StorageNodeStatsContainer();
+ Map<Integer, StorageNodeStats> statsMap = new HashMap<>();
+
+ LatencyStats putLatencyForA = new LatencyStats(1, 2);
+ StorageNodeStats nodeStatsForA = new StorageNodeStats(putLatencyForA);
+ statsContainer.put(5, nodeStatsForA);
+
+ LatencyStats putLatencyForB = new LatencyStats(3, 4);
+ StorageNodeStats nodeStatsForB = new StorageNodeStats(putLatencyForB);
+ statsContainer.put(6, nodeStatsForB);
+
+ StorageNodeStats nodeStats = statsContainer.get(5);
+ assertNotNull(nodeStats);
+ assertEquals(1, nodeStatsForA.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(2, nodeStatsForA.getDistributorPutLatency().getCount());
+
+ nodeStats = statsContainer.get(6);
+ assertNotNull(nodeStats);
+ assertEquals(3, nodeStatsForB.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(4, nodeStatsForB.getDistributorPutLatency().getCount());
+
+ nodeStats = statsContainer.get(7);
+ assertNull(nodeStats);
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsTest.java
new file mode 100644
index 00000000000..b905cd32979
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsTest.java
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author hakon
+ */
+public class StorageNodeStatsTest {
+ @Test
+ public void testStorageNodeStats() {
+ LatencyStats putLatency = new LatencyStats(1, 2);
+ StorageNodeStats stats = new StorageNodeStats(putLatency);
+ assertEquals(1, stats.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(2, stats.getDistributorPutLatency().getCount());
+
+ LatencyStats putLatencyToAdd = new LatencyStats(3, 4);
+ StorageNodeStats statsToAdd = new StorageNodeStats(putLatencyToAdd);
+ stats.add(statsToAdd);
+ assertEquals(1 + 3, stats.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(2 + 4, stats.getDistributorPutLatency().getCount());
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java
new file mode 100644
index 00000000000..ab6185d2b56
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java
@@ -0,0 +1,198 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener;
+import com.yahoo.vespa.clustercontroller.core.mocks.TestEventLog;
+import com.yahoo.vespa.clustercontroller.core.testutils.LogFormatter;
+import junit.framework.TestCase;
+
+import java.util.LinkedList;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.logging.Logger;
+
+public class SystemStateGeneratorTest extends TestCase {
+ private static final Logger log = Logger.getLogger(SystemStateGeneratorTest.class.getName());
+ class Config {
+ int nodeCount = 3;
+ int stableStateTime = 1000 * 60000;
+ int maxSlobrokDisconnectPeriod = 60000;
+ int maxPrematureCrashes = 3;
+ }
+ class TestSystemStateListener implements SystemStateListener {
+ LinkedList<ClusterState> states = new LinkedList<>();
+
+ @Override
+ public void handleNewSystemState(ClusterState state) {
+ states.add(state);
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("States(");
+ for (ClusterState state : states) sb.append('\n').append(state.toString());
+ sb.append(")");
+ return sb.toString();
+ }
+
+ }
+
+ class TestNodeStateOrHostInfoChangeHandler implements NodeStateOrHostInfoChangeHandler {
+
+ LinkedList<String> events = new LinkedList<>();
+
+ @Override
+ public void handleNewNodeState(NodeInfo node, NodeState newState) {
+ events.add(node + " - " + newState);
+ }
+
+ @Override
+ public void handleNewWantedNodeState(NodeInfo node, NodeState newState) {
+ events.add(node + " - " + newState);
+ }
+
+ @Override
+ public void handleUpdatedHostInfo(NodeInfo node, HostInfo newHostInfo) {
+ events.add(node + " - " + newHostInfo);
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("NodeChanges(");
+ for (String change : events) sb.append('\n').append(change);
+ sb.append(")");
+ return sb.toString();
+ }
+ }
+
+ private FakeTimer clock = new FakeTimer();
+ private TestEventLog eventLog = new TestEventLog();
+ private Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ private Config config;
+ private ContentCluster cluster;
+ private SystemStateGenerator generator;
+ private TestSystemStateListener systemStateListener;
+ private TestNodeStateOrHostInfoChangeHandler nodeStateUpdateListener;
+
+ public void setUp() {
+ LogFormatter.initializeLogging();
+ }
+
+ private void initialize(Config config) {
+ Distribution distribution = new Distribution(Distribution.getDefaultDistributionConfig(2, 100));
+ this.config = config;
+ for (int i=0; i<config.nodeCount; ++i) configuredNodes.add(new ConfiguredNode(i, false));
+ cluster = new ContentCluster("testcluster", configuredNodes, distribution, 0, 0.0);
+ generator = new SystemStateGenerator(clock, eventLog, null);
+ generator.setNodes(cluster.clusterInfo());
+ generator.setStableStateTimePeriod(config.stableStateTime);
+ generator.setMaxPrematureCrashes(config.maxPrematureCrashes);
+ generator.setMaxSlobrokDisconnectGracePeriod(config.maxSlobrokDisconnectPeriod);
+ generator.setMinNodesUp(1, 1, 0, 0);
+ systemStateListener = new TestSystemStateListener();
+ nodeStateUpdateListener = new TestNodeStateOrHostInfoChangeHandler();
+ }
+
+ private void assertNewClusterStateReceived() {
+ assertTrue(generator.notifyIfNewSystemState(systemStateListener));
+ assertTrue(systemStateListener.toString(), systemStateListener.states.size() == 1);
+ systemStateListener.states.clear();
+ }
+
+ private void startWithStableStateClusterWithNodesUp() {
+ for (NodeType type : NodeType.getTypes()) {
+ for (ConfiguredNode i : configuredNodes) {
+ NodeInfo nodeInfo = cluster.clusterInfo().setRpcAddress(new Node(type, i.index()), null);
+ nodeInfo.markRpcAddressLive();
+ generator.handleNewReportedNodeState(nodeInfo, new NodeState(type, State.UP), null);
+ nodeInfo.setReportedState(new NodeState(type, State.UP), clock.getCurrentTimeInMillis());
+ }
+ }
+ assertNewClusterStateReceived();
+ for (NodeType type : NodeType.getTypes()) {
+ for (ConfiguredNode i : configuredNodes) {
+ Node n = new Node(type, i.index());
+ assertEquals(State.UP, generator.getClusterState().getNodeState(n).getState());
+ }
+ }
+ clock.advanceTime(config.stableStateTime);
+ }
+
+ private void markNodeOutOfSlobrok(Node node) {
+ log.info("Marking " + node + " out of slobrok");
+ cluster.getNodeInfo(node).markRpcAddressOutdated(clock);
+ generator.handleMissingNode(cluster.getNodeInfo(node), nodeStateUpdateListener);
+ assertTrue(nodeStateUpdateListener.toString(), nodeStateUpdateListener.events.isEmpty());
+ nodeStateUpdateListener.events.clear();
+ assertTrue(eventLog.toString(), eventLog.toString().contains("Node is no longer in slobrok"));
+ eventLog.clear();
+ }
+
+ private void markNodeBackIntoSlobrok(Node node, State state) {
+ log.info("Marking " + node + " back in slobrok");
+ cluster.getNodeInfo(node).markRpcAddressLive();
+ generator.handleReturnedRpcAddress(cluster.getNodeInfo(node));
+ assertEquals(0, nodeStateUpdateListener.events.size());
+ assertEquals(0, systemStateListener.states.size());
+ generator.handleNewReportedNodeState(cluster.getNodeInfo(node), new NodeState(node.getType(), state), nodeStateUpdateListener);
+ cluster.getNodeInfo(node).setReportedState(new NodeState(node.getType(), state), clock.getCurrentTimeInMillis());
+ assertEquals(0, nodeStateUpdateListener.events.size());
+ assertEquals(0, systemStateListener.states.size());
+ }
+
+ private void verifyClusterStateChanged(Node node, State state) {
+ log.info("Verifying cluster state has been updated for " + node + " to " + state);
+ assertTrue(generator.notifyIfNewSystemState(systemStateListener));
+ assertEquals(1, systemStateListener.states.size());
+ assertEquals(state, systemStateListener.states.get(0).getNodeState(node).getState());
+ systemStateListener.states.clear();
+ assertEquals(state, generator.getClusterState().getNodeState(node).getState());
+ }
+
+ private void verifyNodeStateAfterTimerWatch(Node node, State state) {
+ log.info("Verifying state of node after timer watch.");
+ generator.watchTimers(cluster, nodeStateUpdateListener);
+ assertEquals(0, nodeStateUpdateListener.events.size());
+ verifyClusterStateChanged(node, state);
+ }
+
+ private void verifyPrematureCrashCountCleared(Node node) {
+ assertTrue(generator.watchTimers(cluster, nodeStateUpdateListener));
+ assertEquals(0, nodeStateUpdateListener.events.size());
+ assertEquals(0, cluster.getNodeInfo(node).getPrematureCrashCount());
+ }
+
+ public void testUnstableNodeInSlobrok() throws Exception {
+ initialize(new Config());
+ startWithStableStateClusterWithNodesUp();
+ Node node = new Node(NodeType.STORAGE, 0);
+ for (int j=0; j<3; ++j) {
+ log.info("Iteration " + j);
+ assertEquals(0, cluster.getNodeInfo(node).getPrematureCrashCount());
+ assertEquals(State.UP, cluster.getNodeInfo(node).getWantedState().getState());
+ assertEquals(State.UP, generator.getClusterState().getNodeState(node).getState());
+ for (int k=0; k<config.maxPrematureCrashes; ++k) {
+ log.info("Premature iteration " + k);
+ markNodeOutOfSlobrok(node);
+
+ log.info("Passing max disconnect time period. Watching timers");
+ clock.advanceTime(config.maxSlobrokDisconnectPeriod);
+
+ verifyNodeStateAfterTimerWatch(node, State.MAINTENANCE);
+ cluster.getNodeInfo(node).setReportedState(new NodeState(node.getType(), State.DOWN), clock.getCurrentTimeInMillis());
+
+ assertEquals(k, cluster.getNodeInfo(node).getPrematureCrashCount());
+ markNodeBackIntoSlobrok(node, State.UP);
+ verifyClusterStateChanged(node, State.UP);
+ }
+ log.info("Passing steady state to get premature crash count flag cleared");
+ clock.advanceTime(config.stableStateTime);
+ verifyPrematureCrashCountCleared(node);
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java
new file mode 100644
index 00000000000..8065d701f6b
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.StringValue;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.State;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class WantedStateTest extends FleetControllerTest {
+
+ private Supervisor supervisor;
+
+ @Before
+ public void setUp() {
+ supervisor = new Supervisor(new Transport());
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ supervisor = null;
+ }
+ super.tearDown();
+ }
+
+ public void setWantedState(DummyVdsNode node, State state, String reason) {
+ NodeState ns = new NodeState(node.getType(), state);
+ if (reason != null) ns.setDescription(reason);
+ Target connection = supervisor.connect(new Spec(fleetController.getRpcPort()));
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue(node.getSlobrokName()));
+ req.parameters().add(new StringValue(ns.serialize()));
+ connection.invokeSync(req, timeoutS);
+ if (req.isError()) {
+ assertTrue("Failed to invoke setNodeState(): " + req.errorCode() + ": " + req.errorMessage(), false);
+ }
+ if (!req.checkReturnTypes("s")) {
+ assertTrue("Failed to invoke setNodeState(): Invalid return types.", false);
+ }
+ }
+
+ @Test
+ public void testSettingStorageNodeMaintenanceAndBack() throws Exception {
+ startingTest("WantedStateTest::testSettingStorageNodeMaintenanceAndBack()");
+ setUpFleetController(true, new FleetControllerOptions("mycluster"));
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ setWantedState(nodes.get(1), State.MAINTENANCE, null);
+ waitForState("version:\\d+ distributor:10 storage:10 .0.s:m");
+
+ setWantedState(nodes.get(1), State.UP, null);
+ waitForState("version:\\d+ distributor:10 storage:10");
+ }
+
+ @Test
+ public void testOverridingWantedStateOtherReason() throws Exception {
+ startingTest("WantedStateTest::testOverridingWantedStateOtherReason()");
+ setUpFleetController(true, new FleetControllerOptions("mycluster"));
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ setWantedState(nodes.get(1), State.MAINTENANCE, "Foo");
+ waitForState("version:\\d+ distributor:10 storage:10 .0.s:m");
+ assertEquals("Foo", fleetController.getWantedNodeState(nodes.get(1).getNode()).getDescription());
+
+ setWantedState(nodes.get(1), State.MAINTENANCE, "Bar");
+ waitForCompleteCycle();
+ assertEquals("Bar", fleetController.getWantedNodeState(nodes.get(1).getNode()).getDescription());
+
+ setWantedState(nodes.get(1), State.UP, null);
+ waitForState("version:\\d+ distributor:10 storage:10");
+ }
+
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperStressTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperStressTest.java
new file mode 100644
index 00000000000..bc317b78ff1
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperStressTest.java
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabase;
+
+import java.util.Map;
+
+public class ZooKeeperStressTest extends junit.framework.TestCase {
+ private Object lock = new Object();
+ private int waitTime = 0;
+
+ class LoadGiver extends Thread {
+ ZooKeeperDatabase db;
+ public int count = 0;
+ public int errors = 0;
+ public int index;
+ public boolean stopNow = false;
+
+ LoadGiver(ZooKeeperDatabase db, int index) {
+ this.db = db;
+ this.index = index;
+ }
+
+ public void doStop() {
+ stopNow = true;
+ }
+
+ public void run() {
+ try{
+ while (!this.isInterrupted() && !stopNow) {
+ // Needs to take lock for each operation. Store new mastervote can not run at the same time as
+ // another store new master vote as they kill the ephemeral node
+ synchronized (lock) {
+ if (db.isClosed()) { System.err.println(this + " Session broke"); break; }
+ ++count;
+ if (db.retrieveLatestSystemStateVersion() == null) {
+ System.err.println("retrieveLatestSystemStateVersion() failed");
+ ++errors;
+ }
+ }
+ Map<Node, NodeState> wantedStates;
+ synchronized (lock) {
+ if (db.isClosed()) { System.err.println(this + " Session broke"); break; }
+ ++count;
+ wantedStates = db.retrieveWantedStates();
+ if (wantedStates == null) {
+ System.err.println("retrieveWantedStates() failed");
+ ++errors;
+ }
+ }
+ synchronized (lock) {
+ if (db.isClosed()) { System.err.println(this + " Session broke"); break; }
+ ++count;
+ if (!db.storeLatestSystemStateVersion(5)) {
+ System.err.println("storeLastestSystemStateVersion() failed");
+ ++errors;
+ }
+ }
+ synchronized (lock) {
+ if (db.isClosed()) { System.err.println(this + " Session broke"); break; }
+ ++count;
+ if (!db.storeMasterVote(0)) {
+ System.err.println("storeMasterVote() failed");
+ ++errors;
+ }
+ }
+ synchronized (lock) {
+ if (db.isClosed()) { System.err.println(this + " Session broke"); break; }
+ if (wantedStates != null) {
+ ++count;
+ if (!db.storeWantedStates(wantedStates)) {
+ System.err.println("storeWantedState() failed");
+ ++errors;
+ }
+ }
+ }
+ try{ Thread.sleep(waitTime); } catch (Exception e) {}
+ }
+ } catch (InterruptedException e) {}
+ }
+
+ public String toString() {
+ return "LoadGiver(" + index + ": count " + count + ", errors " + errors + ")";
+ }
+ }
+
+ public void testNothing() throws Exception {
+ // Stupid junit fails if there's testclass without tests
+ }
+
+ public void testZooKeeperStressed() throws Exception {
+ // Disabled for now.: Unstable
+ /*
+ ZooKeeperTestServer zooKeeperServer = new ZooKeeperTestServer();
+ Database.DatabaseListener zksl = new Database.DatabaseListener() {
+ public void handleZooKeeperSessionDown() {
+ assertFalse("We lost session to ZooKeeper. Shouldn't happen", true);
+ }
+
+ public void handleMasterData(Map<Integer, Integer> data) {
+ }
+ };
+ VdsCluster cluster = new VdsCluster("mycluster", 10, 10, true);
+ int timeout = 30000;
+ ZooKeeperDatabase db = new ZooKeeperDatabase(cluster, 0, zooKeeperServer.getAddress(), timeout, zksl);
+
+ Collection<LoadGiver> loadGivers = new ArrayList();
+ long time = System.currentTimeMillis();
+ for (int i = 0; i<10; ++i) {
+ loadGivers.add(new LoadGiver(db, i));
+ }
+ for (LoadGiver lg : loadGivers) {
+ lg.start();
+ }
+ for (int i = 0; i<30000; i += 100) {
+ Thread.sleep(100);
+ boolean failed = false;
+ for (LoadGiver lg : loadGivers) {
+ if (lg.errors > 0) {
+ failed = true;
+ }
+ }
+ if (failed) i += 5000;
+ }
+ int throughput = 0;
+ int errors = 0;
+ for (LoadGiver lg : loadGivers) {
+ assertTrue("Error check prior to attempting to stop: " + lg.toString(), lg.errors == 0);
+ }
+ for (LoadGiver lg : loadGivers) {
+ lg.doStop();
+ throughput += lg.count;
+ errors += lg.errors;
+ }
+ time = System.currentTimeMillis() - time;
+ Double timesecs = new Double(time / 1000.0);
+ if (timesecs > 0.001) {
+ System.err.println("Throughput is " + (throughput / timesecs) + "msgs/sec, " + errors + " errors, total messages sent: " + throughput + ", waittime = " + waitTime);
+ } else {
+ System.err.println("too small time period " + time + " to calculate throughput");
+ }
+ //try{ Thread.sleep(5000); } catch (Exception e) {}
+ for (LoadGiver lg : loadGivers) {
+ lg.join();
+ }
+ for (LoadGiver lg : loadGivers) {
+ System.err.println(lg);
+ }
+ // Disabling test. This fails occasionally for some reason.
+ for (LoadGiver lg : loadGivers) {
+ // assertTrue("Error check after having stopped: " + lg.toString(), lg.errors == 0);
+ }
+ */
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperTestServer.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperTestServer.java
new file mode 100644
index 00000000000..a5191df5f73
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperTestServer.java
@@ -0,0 +1,86 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.apache.zookeeper.server.ZooKeeperServer;
+import org.apache.zookeeper.server.NIOServerCnxnFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+/**
+ * This class sets up a zookeeper server, such that we can test fleetcontroller zookeeper parts without stubbing in the client.
+ */
+public class ZooKeeperTestServer {
+ private File zooKeeperDir;
+ private ZooKeeperServer server;
+ private static final int tickTime = 100;
+ private NIOServerCnxnFactory factory;
+ private static final String DIR_PREFIX = "test_fltctrl_zk";
+ private static final String DIR_POSTFIX = "sdir";
+
+ public ZooKeeperTestServer() throws IOException {
+ this(0);
+ }
+
+ private ZooKeeperTestServer(int port) throws IOException {
+ zooKeeperDir = getTempDir();
+ delete(zooKeeperDir);
+ if (!zooKeeperDir.mkdir()) {
+ throw new IllegalStateException("Failed to create directory " + zooKeeperDir);
+ }
+ zooKeeperDir.deleteOnExit();
+ server = new ZooKeeperServer(zooKeeperDir, zooKeeperDir, tickTime);
+ final int maxcc = 10000; // max number of connections from the same client
+ factory = new NIOServerCnxnFactory();
+ factory.configure(new InetSocketAddress(port), maxcc); // Use any port
+ try{
+ factory.startup(server);
+ } catch (InterruptedException e) {
+ throw (RuntimeException) new IllegalStateException("Interrupted during test startup: ").initCause(e);
+ }
+ }
+
+ public static ZooKeeperTestServer createWithFixedPort(int port) throws IOException {
+ return new ZooKeeperTestServer(port);
+ }
+
+ public int getPort() {
+ return factory.getLocalPort();
+ }
+
+ public String getAddress() {
+ return factory.getLocalAddress().getHostName() + ":" + getPort();
+ }
+
+ public void shutdown(boolean cleanupZooKeeperDir) {
+ server.shutdown();
+
+ if (cleanupZooKeeperDir) {
+ delete(zooKeeperDir);
+ }
+
+ factory.shutdown();
+ }
+
+ public void delete(File f) {
+ if (f.isDirectory()) {
+ for (File file : f.listFiles()) {
+ delete(file);
+ }
+ }
+ f.delete();
+ }
+
+ private static File getTempDir() throws IOException {
+ // The pom file sets java.io.tmpdir to ${project.build.directory}. This doesn't happen within (e.g.) IntelliJ, but happens
+ // on Screwdriver (tm). So if we're running tests on Screwdriver (tm), put the log in 'surefire-reports' instead so the
+ // user can find them along with the other test reports.
+ final File surefireReportsDir = new File(System.getProperty("java.io.tmpdir") + File.separator + "surefire-reports");
+ if (surefireReportsDir.isDirectory()) {
+ return File.createTempFile(DIR_PREFIX, DIR_POSTFIX, surefireReportsDir);
+ }
+
+ return File.createTempFile(DIR_PREFIX, DIR_POSTFIX);
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java
new file mode 100644
index 00000000000..d24b45817e0
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java
@@ -0,0 +1,107 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.hostinfo;
+
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+
+public class HostInfoTest {
+
+ private static String readDataFile(String filename) throws IOException {
+ String directory = "../protocols/getnodestate/";
+ Path path = Paths.get(directory + filename);
+ byte[] encoded;
+ encoded = Files.readAllBytes(path);
+ return new String(encoded, StandardCharsets.UTF_8);
+ }
+
+ @Test
+ public void testEmptyJson() throws IOException {
+ HostInfo hostInfo = HostInfo.createHostInfo("{}");
+ assertThat(hostInfo.getVtag().getVersionOrNull(), is(nullValue()));
+ assertThat(hostInfo.getDistributor().getStorageNodes().size(), is(0));
+ assertThat(hostInfo.getMetrics().getValues().size(), is(0));
+ assertThat(hostInfo.getClusterStateVersionOrNull(), is(nullValue()));
+ }
+
+ @Test
+ public void testExtendedJson() throws IOException {
+ HostInfo hostInfo = HostInfo.createHostInfo(readDataFile("host_info.json"));
+ assertThat(hostInfo.getVtag().getVersionOrNull(), is("5.32.76"));
+ }
+
+ @Test
+ public void testFullSet() throws IOException {
+ HostInfo hostInfo = HostInfo.createHostInfo(readDataFile("host_info.json"));
+ List<StorageNode> storageNodeList = hostInfo.getDistributor().getStorageNodes();
+ assertThat(storageNodeList.size(), is(2));
+ assertThat(storageNodeList.get(0).getIndex(), is(0));
+ assertThat(storageNodeList.get(0).getOpsLatenciesOrNull().getPut().getCount(), is(16L));
+ assertThat(storageNodeList.get(1).getOpsLatenciesOrNull().getPut().getCount(), is(18L));
+ assertThat(storageNodeList.get(0).getOpsLatenciesOrNull().getPut().getLatencyMsSum(), is(15L));
+ List<Metrics.Metric> metrics = hostInfo.getMetrics().getValues();
+ assertThat(metrics.size(), is(2));
+ Metrics.Value value = metrics.get(0).getValue();
+ assertThat(value.getLast(), is(5095L));
+ assertThat(metrics.get(0).getName(), equalTo("vds.datastored.alldisks.buckets"));
+ assertThat(hostInfo.getClusterStateVersionOrNull(), is(123));
+ }
+
+ @Test
+ public void testSpeed() throws Exception {
+ String json = readDataFile("slow_host_info.json");
+
+ long start = 0;
+ for (int x = 0; x < 100; x++) {
+ if (x == 90) {
+ start = System.currentTimeMillis();
+ }
+ HostInfo hostInfo = HostInfo.createHostInfo(json);
+ // Check a value so not all code is removed by optimizer.
+ if (hostInfo.getMetrics().getValues().size() == -1) return;
+ }
+ long end = System.currentTimeMillis();
+ System.out.println("Should take about 1.5 ms on fast machine, actually " + (end - start) / 10. + " ms.");
+ }
+
+ @Test
+ public void testSharedFile() throws Exception {
+ String json = readDataFile("distributor.json");
+ HostInfo hostInfo = HostInfo.createHostInfo(json);
+
+ List<StorageNode> storageNodeList = hostInfo.getDistributor().getStorageNodes();
+ assertThat(storageNodeList.size(), is(2));
+ Map<Integer, StorageNode> storageNodeByIndex = new TreeMap<>();
+ for (StorageNode node : storageNodeList) {
+ Integer index = node.getIndex();
+ assertFalse(storageNodeByIndex.containsKey(index));
+ storageNodeByIndex.put(index, node);
+ }
+
+ assertTrue(storageNodeByIndex.containsKey(0));
+ assertThat(storageNodeByIndex.get(0).getIndex(), is(0));
+ assertThat(storageNodeByIndex.get(0).getMinCurrentReplicationFactorOrNull(), is(2));
+ assertNotNull(storageNodeByIndex.get(0).getOpsLatenciesOrNull());
+ assertThat(storageNodeByIndex.get(0).getOpsLatenciesOrNull().getPut().getLatencyMsSum(), is(10000L));
+ assertThat(storageNodeByIndex.get(0).getOpsLatenciesOrNull().getPut().getCount(), is(3L));
+
+ assertTrue(storageNodeByIndex.containsKey(5));
+ assertThat(storageNodeByIndex.get(5).getIndex(), is(5));
+ assertThat(storageNodeByIndex.get(5).getMinCurrentReplicationFactorOrNull(), is(9));
+ assertNotNull(storageNodeByIndex.get(5).getOpsLatenciesOrNull());
+ assertThat(storageNodeByIndex.get(5).getOpsLatenciesOrNull().getPut().getLatencyMsSum(), is(25000L));
+ assertThat(storageNodeByIndex.get(5).getOpsLatenciesOrNull().getPut().getCount(), is(7L));
+ }
+} \ No newline at end of file
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridgeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridgeTest.java
new file mode 100644
index 00000000000..9d23031cd55
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridgeTest.java
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.hostinfo;
+
+import com.yahoo.vespa.clustercontroller.core.NodeMergeStats;
+import com.yahoo.vespa.clustercontroller.core.StorageMergeStats;
+import com.yahoo.vespa.clustercontroller.core.StorageNodeStats;
+import com.yahoo.vespa.clustercontroller.core.StorageNodeStatsContainer;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+/**
+ * @author hakon
+ */
+public class StorageNodeStatsBridgeTest {
+
+ private static String getJsonString() throws IOException {
+ Path path = Paths.get("../protocols/getnodestate/host_info.json");
+ byte[] encoded;
+ encoded = Files.readAllBytes(path);
+ return new String(encoded, StandardCharsets.UTF_8);
+ }
+
+ @Test
+ public void testStorageNodeStatsContainer() throws IOException {
+ String data = getJsonString();
+ HostInfo hostInfo = HostInfo.createHostInfo(data);
+ StorageNodeStatsContainer container = StorageNodeStatsBridge.traverseHostInfo(hostInfo);
+ assertEquals(2, container.size());
+
+ StorageNodeStats node0 = container.get(0);
+ assertNotNull(node0);
+ assertEquals(15, node0.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(16, node0.getDistributorPutLatency().getCount());
+
+ StorageNodeStats node1 = container.get(1);
+ assertNotNull(node1);
+ assertEquals(17, node1.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(18, node1.getDistributorPutLatency().getCount());
+ }
+
+ @Test
+ public void testStorageMergeStats() throws IOException {
+ String data = getJsonString();
+ HostInfo hostInfo = HostInfo.createHostInfo(data);
+
+ StorageMergeStats storageMergeStats = StorageNodeStatsBridge.generate(hostInfo.getDistributor());
+ int size = 0;
+ for (NodeMergeStats mergeStats : storageMergeStats) {
+ assertThat(mergeStats.getCopyingIn().getBuckets(), is(2L));
+ assertThat(mergeStats.getCopyingOut().getBuckets(), is(4L));
+ assertThat(mergeStats.getSyncing().getBuckets(), is(1L));
+ assertThat(mergeStats.getMovingOut().getBuckets(), is(3L));
+ size++;
+ }
+ assertThat(size, is(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/mocks/TestEventLog.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/mocks/TestEventLog.java
new file mode 100644
index 00000000000..646421e93ae
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/mocks/TestEventLog.java
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.mocks;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vespa.clustercontroller.core.Event;
+import com.yahoo.vespa.clustercontroller.core.EventLogInterface;
+import com.yahoo.vespa.clustercontroller.core.NodeEvent;
+
+import java.util.logging.Level;
+
+public class TestEventLog implements EventLogInterface {
+ private StringBuilder events = new StringBuilder();
+ private int eventCount = 0;
+
+ public void clear() { events = new StringBuilder(); eventCount = 0; }
+ public String toString() { return events.toString(); }
+ public int getEventCount() { return eventCount; }
+
+ @Override
+ public void add(Event e) {
+ events.append("add(" + e.getDescription() + ")\n");
+ ++eventCount;
+ }
+
+ @Override
+ public void add(Event e, boolean logInfo) {
+ events.append("add(" + e + ", log ? " + logInfo + ")\n");
+ ++eventCount;
+ }
+
+ @Override
+ public void addNodeOnlyEvent(NodeEvent e, Level level) {
+ events.append("add(" + e + ", " + level + ")\n");
+ ++eventCount;
+ }
+
+ @Override
+ public int getNodeEventsSince(Node n, long time) {
+ throw new IllegalStateException("Should never be called.");
+ }
+
+ @Override
+ public long getRecentTimePeriod() {
+ throw new IllegalStateException("Should never be called.");
+ }
+
+ @Override
+ public void writeHtmlState(StringBuilder sb, Node node) {
+ throw new IllegalStateException("Should never be called.");
+ }
+
+ @Override
+ public void setMaxSize(int size, int nodesize) {
+ throw new IllegalStateException("Should never be called.");
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java
new file mode 100644
index 00000000000..a62b0cd9a7b
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java
@@ -0,0 +1,85 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vespa.clustercontroller.core.*;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+
+public class ClusterControllerMock implements RemoteClusterControllerTaskScheduler {
+ public RemoteClusterControllerTask.Context context = new RemoteClusterControllerTask.Context();
+
+ public int fleetControllerIndex;
+ public Integer fleetControllerMaster;
+ public StringBuilder events = new StringBuilder();
+
+ public ClusterControllerMock(ContentCluster cluster, ClusterState state,
+ int fcIndex, Integer fcMaster) {
+ this.fleetControllerIndex = fcIndex;
+ this.fleetControllerMaster = fcMaster;
+ context.cluster = cluster;
+ context.currentState = state;
+ context.masterInfo = new MasterInterface() {
+ @Override
+ public boolean isMaster() {
+ return (fleetControllerMaster != null &&
+ fleetControllerMaster == fleetControllerIndex);
+ }
+
+ @Override
+ public Integer getMaster() {
+ return fleetControllerMaster;
+ }
+ };
+ context.nodeStateOrHostInfoChangeHandler = new NodeStateOrHostInfoChangeHandler() {
+
+ @Override
+ public void handleNewNodeState(NodeInfo currentInfo, NodeState newState) {
+ events.append("newNodeState(").append(currentInfo.getNode()).append(": ").append(newState).append("\n");
+ }
+
+ @Override
+ public void handleNewWantedNodeState(NodeInfo node, NodeState newState) {
+ events.append("newWantedNodeState(").append(node.getNode()).append(": ").append(newState).append("\n");
+ }
+
+ @Override
+ public void handleUpdatedHostInfo(NodeInfo node, HostInfo newHostInfo) {
+ events.append("updatedHostInfo(").append(node.getNode()).append(": ")
+ .append(newHostInfo).append(")\n");
+ }
+
+ };
+ context.nodeAddedOrRemovedListener = new NodeAddedOrRemovedListener() {
+
+ @Override
+ public void handleNewNode(NodeInfo node) {
+ events.append("newNode(").append(node.getNode()).append(")\n");
+ }
+
+ @Override
+ public void handleMissingNode(NodeInfo node) {
+ events.append("newMissingNode(").append(node.getNode()).append("\n");
+ }
+
+ @Override
+ public void handleNewRpcAddress(NodeInfo node) {
+ events.append("newRpcAddress(").append(node.getNode()).append("\n");
+ }
+
+ @Override
+ public void handleReturnedRpcAddress(NodeInfo node) {
+ events.append("returnedRpcAddress(").append(node.getNode()).append(")\n");
+ }
+
+ };
+ }
+
+ @Override
+ public void schedule(RemoteClusterControllerTask task) {
+ task.doRemoteFleetControllerTask(context);
+ task.notifyCompleted();
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterListTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterListTest.java
new file mode 100644
index 00000000000..4e26585177f
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterListTest.java
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+public class ClusterListTest extends StateRestApiTest {
+
+ @Test
+ public void testClusterList() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("", 0));
+ String expected =
+ "{\"cluster\": {\n" +
+ " \"books\": {\"link\": \"\\/cluster\\/v2\\/books\"},\n" +
+ " \"music\": {\"link\": \"\\/cluster\\/v2\\/music\"}\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveClusterList() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("", 1));
+ String expected =
+ "{\"cluster\": {\n" +
+ " \"books\": {\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"service\": {\n" +
+ " \"storage\": {\"link\": \"\\/cluster\\/v2\\/books\\/storage\"},\n" +
+ " \"distributor\": {\"link\": \"\\/cluster\\/v2\\/books\\/distributor\"}\n" +
+ " }\n" +
+ " },\n" +
+ " \"music\": {\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"service\": {\n" +
+ " \"storage\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\"},\n" +
+ " \"distributor\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\"}\n" +
+ " }\n" +
+ " }\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java
new file mode 100644
index 00000000000..8fbee2c5952
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class ClusterTest extends StateRestApiTest {
+
+ @Test
+ public void testCluster() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music", 0));
+ String expected =
+ "{\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"service\": {\n" +
+ " \"storage\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\"},\n" +
+ " \"distributor\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\"}\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveCluster() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music", 1));
+ String expected =
+ "{\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"service\": {\n" +
+ " \"storage\": {\"node\": {\n" +
+ " \"1\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/1\"},\n" +
+ " \"2\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/2\"},\n" +
+ " \"3\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/3\"},\n" +
+ " \"5\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/5\"},\n" +
+ " \"7\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/7\"}\n" +
+ " }},\n" +
+ " \"distributor\": {\"node\": {\n" +
+ " \"1\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/1\"},\n" +
+ " \"2\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/2\"},\n" +
+ " \"3\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/3\"},\n" +
+ " \"5\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/5\"},\n" +
+ " \"7\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/7\"}\n" +
+ " }}\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java
new file mode 100644
index 00000000000..10d757901e2
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java
@@ -0,0 +1,200 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import org.codehaus.jettison.json.JSONObject;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class NodeTest extends StateRestApiTest {
+
+ @Test
+ public void testDistributor() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor/1", 0));
+ String expected =
+ "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testStorage() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/1", 0));
+ String expected =
+ "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " },\n" +
+ " \"metrics\": {\n" +
+ // Why 24 and 28? There are 4 distributor nodes seen in slobrok (see StateRestApiTest).
+ // Each gets a host info with distributor-put-latency-ms-sum 6 and
+ // distributor-put-latency-count 7 (see StateRestApiTest.getHostInfo()).
+ // Therefore, in aggregate, 4*6 is 24, and 4*7 is 28.
+ " \"distributor-put-latency-ms-sum\": 24,\n" +
+ " \"distributor-put-latency-count\": 28\n" +
+ " },\n" +
+ " \"partition\": {\n" +
+ " \"0\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/1\\/0\"},\n" +
+ " \"1\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/1\\/1\"}\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveNode() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/1", 1));
+ String expected =
+ "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " },\n" +
+ " \"metrics\": {\n" +
+ // Why 24 and 28? There are 4 distributor nodes seen in slobrok (see StateRestApiTest).
+ // Each gets a host info with distributor-put-latency-ms-sum 6 and
+ // distributor-put-latency-count 7 (see StateRestApiTest.getHostInfo()).
+ // Therefore, in aggregate, 4*6 is 24, and 4*7 is 28.
+ " \"distributor-put-latency-ms-sum\": 24,\n" +
+ " \"distributor-put-latency-count\": 28\n" +
+ " },\n" +
+ " \"partition\": {\n" +
+ " \"0\": {\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"metrics\": {\n" +
+ " \"bucket-count\": 1,\n" +
+ " \"unique-document-count\": 2,\n" +
+ " \"unique-document-total-size\": 3\n" +
+ " }\n" +
+ " },\n" +
+ " \"1\": {\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"metrics\": {\n" +
+ " \"bucket-count\": 1,\n" +
+ " \"unique-document-count\": 2,\n" +
+ " \"unique-document-total-size\": 3\n" +
+ " }\n" +
+ " }\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testNodeNotSeenInSlobrok() throws Exception {
+ setUp(true);
+ ContentCluster old = music.context.cluster;
+ music.context.cluster = new ContentCluster(old.getName(), old.getConfiguredNodes().values(), old.getDistribution(), 0, 0.0);
+ NodeState currentState = new NodeState(NodeType.STORAGE, State.DOWN);
+ currentState.setDescription("Not seen");
+ music.context.currentState.setNodeState(new Node(NodeType.STORAGE, 1), currentState);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/1", 0));
+ String expected =
+ "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"down\",\n" +
+ " \"reason\": \"Not seen\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"down\",\n" +
+ " \"reason\": \"Node not seen in slobrok.\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " },\n" +
+ " \"metrics\": {\n" +
+ " \"distributor-put-latency-ms-sum\": 0,\n" +
+ " \"distributor-put-latency-count\": 0\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveStorageClusterDoesNotIncludePerNodeStatsOrMetrics() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage", 1));
+ String expected =
+ "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " },\n" +
+ " \"partition\": {\n" +
+ " \"0\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/1\\/0\"},\n" +
+ " \"1\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/1\\/1\"}\n" +
+ " }\n" +
+ "}";
+ JSONObject json = jsonWriter.createJson(response);
+ assertEquals(expected, json.getJSONObject("node").getJSONObject("1").toString(2));
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NotMasterTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NotMasterTest.java
new file mode 100644
index 00000000000..4b59b3426c2
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NotMasterTest.java
@@ -0,0 +1,137 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.OtherMasterException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.UnknownMasterException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import static com.yahoo.vespa.defaults.Defaults.getDefaults;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class NotMasterTest extends StateRestApiTest {
+
+ @Test
+ public void testUnknownMaster() throws Exception {
+ setUp(true);
+ music.fleetControllerMaster = null;
+ // Non-recursive cluster list works, as it doesn't touches into fleetcontrollers
+ {
+ UnitResponse response = restAPI.getState(new StateRequest("", 0));
+ String expected =
+ "{\"cluster\": {\n" +
+ " \"books\": {\"link\": \"\\/cluster\\/v2\\/books\"},\n" +
+ " \"music\": {\"link\": \"\\/cluster\\/v2\\/music\"}\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+ // Recursive cluster list does not work
+ try{
+ restAPI.getState(new StateRequest("", 1));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ // Other requests does not work either
+ try{
+ restAPI.getState(new StateRequest("music", 0));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage", 0));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage/1", 0));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage/1/0", 0));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ try{
+ restAPI.setUnitState(new SetNodeStateTest.SetUnitStateRequestImpl("music/storage/1")
+ .setNewState("user", "down", "test"));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ }
+
+ @Test
+ public void testKnownOtherMaster() throws Exception {
+ setUp(true);
+ ccSockets.put(1, new ClusterControllerStateRestAPI.Socket("otherhost", getDefaults().vespaWebServicePort()));
+ music.fleetControllerMaster = 1;
+ // Non-recursive cluster list works, as it doesn't touches into fleetcontrollers
+ {
+ UnitResponse response = restAPI.getState(new StateRequest("", 0));
+ String expected =
+ "{\"cluster\": {\n" +
+ " \"books\": {\"link\": \"\\/cluster\\/v2\\/books\"},\n" +
+ " \"music\": {\"link\": \"\\/cluster\\/v2\\/music\"}\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+ // Recursive cluster list does not work
+ try{
+ restAPI.getState(new StateRequest("", 1));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ // Other requests does not work either
+ try{
+ restAPI.getState(new StateRequest("music", 0));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage", 0));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage/1", 0));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage/1/0", 0));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ try{
+ restAPI.setUnitState(new SetNodeStateTest.SetUnitStateRequestImpl("music/storage/1")
+ .setNewState("user", "down", "test"));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/PartitionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/PartitionTest.java
new file mode 100644
index 00000000000..f0f7f422824
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/PartitionTest.java
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import org.codehaus.jettison.json.JSONObject;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class PartitionTest extends StateRestApiTest {
+
+ @Test
+ public void testPartition() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/1/0", 0));
+ String expected =
+ "{\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"metrics\": {\n" +
+ " \"bucket-count\": 1,\n" +
+ " \"unique-document-count\": 2,\n" +
+ " \"unique-document-total-size\": 3\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveCluster() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/1/0", 1));
+ String expected =
+ "{\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"metrics\": {\n" +
+ " \"bucket-count\": 1,\n" +
+ " \"unique-document-count\": 2,\n" +
+ " \"unique-document-total-size\": 3\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveStorageClusterDoesNotIncludePartitionMetrics() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage", 2));
+ String expected =
+ "{\"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ "}}}";
+ JSONObject json = jsonWriter.createJson(response);
+ assertEquals(expected, json.getJSONObject("node").getJSONObject("1").
+ getJSONObject("partition").getJSONObject("0").
+ toString(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/RequestTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/RequestTest.java
new file mode 100644
index 00000000000..5d3813f44da
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/RequestTest.java
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InternalFailure;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+import junit.framework.TestCase;
+
+public class RequestTest extends TestCase {
+
+ public void testGetResultBeforeCompletion() {
+ Request<String> r = new Request<String>(Request.MasterState.MUST_BE_MASTER) {
+ @Override
+ public String calculateResult(Context context) throws StateRestApiException {
+ return "foo";
+ }
+ };
+ try{
+ r.getResult();
+ assertTrue(false);
+ } catch (InternalFailure e) {
+ } catch (Exception e) {
+ assertTrue(false);
+ }
+ r.notifyCompleted();
+ try{
+ r.getResult();
+ assertTrue(false);
+ } catch (InternalFailure e) {
+ } catch (Exception e) {
+ assertTrue(false);
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ServiceTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ServiceTest.java
new file mode 100644
index 00000000000..01bd7b6f033
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ServiceTest.java
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+// TODO: Author?
+public class ServiceTest extends StateRestApiTest {
+
+ @Test
+ public void testService() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor", 0));
+ String expected =
+ "{\"node\": {\n" +
+ " \"1\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/1\"},\n" +
+ " \"2\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/2\"},\n" +
+ " \"3\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/3\"},\n" +
+ " \"5\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/5\"},\n" +
+ " \"7\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/7\"}\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveCluster() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor", 1));
+ String expected =
+ "{\"node\": {\n" +
+ " \"1\": {\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"2\": {\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g1\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"down\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"down\",\n" +
+ " \"reason\": \"Node not seen in slobrok.\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"3\": {\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"5\": {\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"7\": {\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ " }\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java
new file mode 100644
index 00000000000..d574e949348
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java
@@ -0,0 +1,348 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InvalidContentException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.MissingUnitException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.OperationNotSupportedForUnitException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.SetResponse;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitState;
+import org.junit.Test;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.core.StringContains.containsString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class SetNodeStateTest extends StateRestApiTest {
+
+ public static class SetUnitStateRequestImpl extends StateRequest implements SetUnitStateRequest {
+ private Map<String, UnitState> newStates = new LinkedHashMap<>();
+ private Condition condition = Condition.FORCE;
+
+ public SetUnitStateRequestImpl(String req) {
+ super(req, 0);
+ }
+
+ public SetUnitStateRequestImpl setCondition(Condition condition) {
+ this.condition = condition;
+ return this;
+ }
+
+ public SetUnitStateRequestImpl setNewState(
+ final String type,
+ final String state,
+ final String reason) {
+ newStates.put(type, new UnitState() {
+ @Override
+ public String getId() {
+ return state;
+ }
+
+ @Override
+ public String getReason() {
+ return reason;
+ }
+ });
+ return this;
+ }
+
+ @Override
+ public Map<String, UnitState> getNewState() {
+ return newStates;
+ }
+
+ @Override
+ public Condition getCondition() {
+ return condition;
+ }
+ }
+
+ private void verifyStateSet(String state, String reason) throws Exception {
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", state, reason));
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor/1", 0));
+ String expected = musicClusterExpectedUserStateString("east.g2", "up", "up", state.toLowerCase(), reason);
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ private void verifyClusterSet(String state, String reason) throws Exception {
+ restAPI.setUnitState(new SetUnitStateRequestImpl("music").setNewState("user", state, reason));
+ for (int index : new int[]{1, 2, 3, 5, 7}) {
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/" + index, 0));
+ String actualState = response.getCurrentState().getStatePerType().get("user").getId();
+ assertThat(actualState, is(state.toLowerCase()));
+ String actualReason = response.getCurrentState().getStatePerType().get("user").getReason();
+ assertThat(actualReason, is(reason));
+ }
+ }
+
+ private String musicClusterExpectedUserStateStringWithUninitializedNode(String groupName,
+ String generatedState, String unitState,
+ String userState, String userReason) {
+ return "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"" + groupName + "\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"" + generatedState + "\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"" + unitState + "\",\n" +
+ " \"reason\": \"Node not seen in slobrok.\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"" + userState + "\",\n" +
+ " \"reason\": \"" + userReason + "\"\n" +
+ " }\n" +
+ " }\n" +
+ "}";
+ }
+
+ private String musicClusterExpectedUserStateString(String groupName,
+ String generatedState, String unitState,
+ String userState, String userReason) {
+ return "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"" + groupName + "\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"" + generatedState + "\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"" + unitState + "\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"" + userState + "\",\n" +
+ " \"reason\": \"" + userReason + "\"\n" +
+ " }\n" +
+ " }\n" +
+ "}";
+ }
+
+ @Test
+ public void testSimple() throws Exception {
+ setUp(true);
+ verifyStateSet("down", "testing");
+ verifyStateSet("up", "foo");
+ verifyStateSet("maintenance", "");
+ verifyStateSet("retired", "argh");
+ verifyStateSet("UP", "even uppercase");
+ }
+
+ @Test
+ public void testSetNodesForCluster() throws Exception {
+ setUp(true);
+ verifyClusterSet("maintenance", "prepare for maintenance");
+ verifyClusterSet("up", "and we're back online");
+ }
+
+ @Test
+ public void testShouldNotModifyDistributorSafe() throws Exception {
+ setUp(false);
+ SetResponse setResponse = restAPI.setUnitState(new SetUnitStateRequestImpl("music/distributor/1")
+ .setNewState("user", "up", "whatever reason.")
+ .setCondition(SetUnitStateRequest.Condition.SAFE));
+ assertThat(setResponse.getWasModified(), is(false));
+ assertThat(setResponse.getReason(), containsString(
+ "Safe-set of node state is only supported for storage nodes"));
+ }
+
+ @Test
+ public void testShouldModifyStorageSafeOk() throws Exception {
+ setUp(false);
+ SetResponse setResponse = restAPI.setUnitState(new SetUnitStateRequestImpl("music/storage/1")
+ .setNewState("user", "maintenance", "whatever reason.")
+ .setCondition(SetUnitStateRequest.Condition.SAFE));
+ assertThat(setResponse.getWasModified(), is(true));
+ assertThat(setResponse.getReason(), is("ok"));
+ }
+
+ @Test
+ public void testShouldModifyStorageSafeBlocked() throws Exception {
+ setUp(false);
+ {
+ SetResponse setResponse = restAPI.setUnitState(new SetUnitStateRequestImpl("music/storage/1")
+ .setNewState("user", "maintenance", "whatever reason.")
+ .setCondition(SetUnitStateRequest.Condition.SAFE));
+ assertThat(setResponse.getReason(), is("ok"));
+ assertThat(setResponse.getWasModified(), is(true));
+ }
+ {
+ SetResponse setResponse = restAPI.setUnitState(new SetUnitStateRequestImpl("music/storage/3")
+ .setNewState("user", "maintenance", "whatever reason.")
+ .setCondition(SetUnitStateRequest.Condition.SAFE));
+ assertThat(setResponse.getReason(), is(
+ "There is a node already in maintenance:1"));
+ assertThat(setResponse.getWasModified(), is(false));
+ }
+ }
+
+ @Test
+ public void testSetWantedStateOnNodeNotInSlobrok() throws Exception {
+ // Node 2 in cluster music does not have a valid NodeInfo due to passing true to setUp
+ setUp(true);
+ restAPI.setUnitState(new SetUnitStateRequestImpl("music/distributor/2").setNewState("user", "down", "borked node"));
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor/2", 0));
+ String expected = musicClusterExpectedUserStateStringWithUninitializedNode("east.g1", "down", "down", "down", "borked node");
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testWrongUnit() throws Exception {
+ setUp(true);
+
+ String wrongUnitMessage = "State can only be set at cluster or node level";
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (OperationNotSupportedForUnitException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains(wrongUnitMessage));
+ }
+
+ // ... setting at cluster-level is allowed
+
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (OperationNotSupportedForUnitException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains(wrongUnitMessage));
+ }
+
+ // ... setting at node-level is allowed
+
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/storage/1/0").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (OperationNotSupportedForUnitException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains(wrongUnitMessage));
+ }
+ }
+
+ @Test
+ public void testInvalidUnit() throws Exception {
+ setUp(true);
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "foo").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/content").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/storage/bah").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/storage/10").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/storage/1/0/1").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/storage/1/bar").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ }
+
+ @Test
+ public void testSettingInvalidStateType() throws Exception {
+ setUp(true);
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("foo", "down", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No new user state given"));
+ }
+ }
+
+ @Test
+ public void testSafeIsInvalidForSetNodesStatesForCluster() throws Exception {
+ setUp(true);
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl("music")
+ .setNewState("user", "maintenance", "example reason")
+ .setCondition(SetUnitStateRequest.Condition.SAFE));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains(
+ "Setting all nodes in a cluster to a state is only supported with FORCE"));
+ }
+ }
+
+ @Test
+ public void testSettingWrongStateType() throws Exception {
+ setUp(true);
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("generated", "down", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No new user state given"));
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("unit", "down", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No new user state given"));
+ }
+ }
+
+ @Test
+ public void testInvalidState() throws Exception {
+ setUp(true);
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", "initializing", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Invalid user state"));
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", "stopping", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Invalid user state"));
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", "foo", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Invalid user state"));
+ }
+ }
+
+ @Test
+ public void testOverwriteReason() throws Exception {
+ setUp(true);
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", "down", "testing"));
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", "down", "testing more"));
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor/1", 0));
+ String expected = musicClusterExpectedUserStateString("east.g2", "up", "up", "down", "testing more");
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java
new file mode 100644
index 00000000000..c25fe092c2a
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java
@@ -0,0 +1,170 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.FleetControllerTest;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTaskScheduler;
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.StateRestAPI;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.UnitStateRequest;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.server.JsonWriter;
+
+import java.util.*;
+
+// TODO: Author
+public abstract class StateRestApiTest {
+
+ protected ClusterControllerMock books;
+ protected ClusterControllerMock music;
+ protected StateRestAPI restAPI;
+ protected JsonWriter jsonWriter = new JsonWriter();
+ protected Map<Integer, ClusterControllerStateRestAPI.Socket> ccSockets;
+
+ public static class StateRequest implements UnitStateRequest {
+ private String[] path;
+ private int recursive;
+
+ public StateRequest(String req, int recursive) {
+ path = req.isEmpty() ? new String[0] : req.split("/");
+ this.recursive = recursive;
+ }
+ @Override
+ public int getRecursiveLevels() { return recursive;
+ }
+ @Override
+ public String[] getUnitPath() { return path; }
+ }
+
+ protected void setUp(boolean dontInitializeNode2) throws Exception {
+ Distribution distribution = new Distribution(Distribution.getSimpleGroupConfig(2, 10));
+ jsonWriter.setDefaultPathPrefix("/cluster/v2");
+ {
+ Set<ConfiguredNode> nodes = FleetControllerTest.toNodes(0, 1, 2, 3);
+ ContentCluster cluster = new ContentCluster(
+ "books", nodes, distribution, 6 /* minStorageNodesUp*/, 0.9 /* minRatioOfStorageNodesUp */);
+ initializeCluster(cluster, nodes);
+ ClusterState state = new ClusterState("distributor:4 storage:4");
+ books = new ClusterControllerMock(cluster, state, 0, 0);
+ }
+ {
+ Set<ConfiguredNode> nodes = FleetControllerTest.toNodes(1, 2, 3, 5, 7);
+ Set<ConfiguredNode> nodesInSlobrok = FleetControllerTest.toNodes(1, 3, 5, 7);
+
+ ContentCluster cluster = new ContentCluster(
+ "music", nodes, distribution, 4 /* minStorageNodesUp*/, 0.0 /* minRatioOfStorageNodesUp */);
+ if (dontInitializeNode2) {
+ // TODO: this skips initialization of node 2 to fake that it is not answering
+ // which really leaves us in an illegal state
+ initializeCluster(cluster, nodesInSlobrok);
+ }
+ else {
+ initializeCluster(cluster, nodes);
+ }
+ ClusterState state = new ClusterState("distributor:8 .0.s:d .2.s:d .4.s:d .6.s:d "
+ + "storage:8 .0.s:d .2.s:d .4.s:d .6.s:d");
+ music = new ClusterControllerMock(cluster, state, 0, 0);
+ }
+ ccSockets = new TreeMap<>();
+ ccSockets.put(0, new ClusterControllerStateRestAPI.Socket("localhost", 80));
+ restAPI = new ClusterControllerStateRestAPI(new ClusterControllerStateRestAPI.FleetControllerResolver() {
+ @Override
+ public Map<String, RemoteClusterControllerTaskScheduler> getFleetControllers() {
+ Map<String, RemoteClusterControllerTaskScheduler> fleetControllers = new LinkedHashMap<>();
+ fleetControllers.put(books.context.cluster.getName(), books);
+ fleetControllers.put(music.context.cluster.getName(), music);
+ return fleetControllers;
+ }
+ }, ccSockets);
+ }
+
+ protected void initializeCluster(ContentCluster cluster, Collection<ConfiguredNode> nodes) {
+ for (ConfiguredNode configuredNode : nodes) {
+ for (NodeType type : NodeType.getTypes()) {
+ NodeState reported = new NodeState(type, State.UP);
+ if (type.equals(NodeType.STORAGE)) {
+ reported.setDiskCount(2);
+ }
+
+ NodeInfo nodeInfo = cluster.clusterInfo().setRpcAddress(new Node(type, configuredNode.index()), "rpc:" + type + "/" + configuredNode);
+ nodeInfo.setReportedState(reported, 10);
+ nodeInfo.setHostInfo(HostInfo.createHostInfo(getHostInfo()));
+ }
+ }
+ }
+
+ private String getHostInfo() {
+ return "{\n" +
+ " \"cluster-state-version\": 0,\n" +
+ " \"metrics\": {\n" +
+ " \"values\": [\n" +
+ " {\n" +
+ " \"name\": \"vds.datastored.alldisks.buckets\",\n" +
+ " \"values\": {\n" +
+ " \"last\": 1\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\": \"vds.datastored.alldisks.docs\",\n" +
+ " \"values\": {\n" +
+ " \"last\": 2\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\": \"vds.datastored.alldisks.bytes\",\n" +
+ " \"values\": {\n" +
+ " \"last\": 3\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ " },\n" +
+ " \"distributor\": {\n" +
+ " \"storage-nodes\": [\n" +
+ " {\n" +
+ " \"node-index\": 1,\n" +
+ " \"min-current-replication-factor\": 2,\n" +
+ " \"ops-latency\": {\n" +
+ " \"put\": {\n" +
+ " \"latency-ms-sum\": 6,\n" +
+ " \"count\": 7\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 3,\n" +
+ " \"min-current-replication-factor\": 2,\n" +
+ " \"ops-latency\": {\n" +
+ " \"put\": {\n" +
+ " \"latency-ms-sum\": 5,\n" +
+ " \"count\": 4\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 5,\n" +
+ " \"min-current-replication-factor\": 2,\n" +
+ " \"ops-latency\": {\n" +
+ " \"put\": {\n" +
+ " \"latency-ms-sum\": 4,\n" +
+ " \"count\": 5\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 7,\n" +
+ " \"min-current-replication-factor\": 2,\n" +
+ " \"ops-latency\": {\n" +
+ " \"put\": {\n" +
+ " \"latency-ms-sum\": 6,\n" +
+ " \"count\": 7\n" +
+ " }\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ "}";
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java
new file mode 100644
index 00000000000..6c41cfd9a75
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.rpc;
+
+import com.yahoo.jrt.Request;
+import com.yahoo.jrt.RequestWaiter;
+import com.yahoo.jrt.Target;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.*;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.core.IsNot.not;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class RPCCommunicatorTest {
+
+ public static final int NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS = 10000;
+ public static final int NODE_STATE_REQUEST_TIMEOUT_INTERVAL_START_PERCENTAGE = 80;
+ public static final int NODE_STATE_REQUEST_TIMEOUT_INTERVAL_STOP_PERCENTAGE = 95;
+ public static final int INDEX = 0;
+ public static final int TEST_ITERATIONS = 500;
+ public static final int ROUNDTRIP_LATENCY_SECONDS = 2000;
+
+ @Test
+ public void testGenerateNodeStateRequestTimeoutMs() throws Exception {
+ final RPCCommunicator communicator = new RPCCommunicator(
+ null /* Timer */,
+ INDEX,
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS,
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_START_PERCENTAGE,
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_STOP_PERCENTAGE,
+ 0);
+ int max = -1;
+ int min = 100000;
+ final Set<Integer> uniqueTimeoutValues = new HashSet<>();
+ for (int x = 0; x < TEST_ITERATIONS; x++) {
+ int timeOutMs = communicator.generateNodeStateRequestTimeoutMs();
+ min = Math.min(min, timeOutMs);
+ max = Math.max(max, timeOutMs);
+ uniqueTimeoutValues.add(timeOutMs);
+ }
+ assertTrue(max <= NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS *
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_STOP_PERCENTAGE / 100.);
+ assertThat(min, is(not(max)));
+ assertTrue(min >= NODE_STATE_REQUEST_TIMEOUT_INTERVAL_START_PERCENTAGE *
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS / 100);
+ assertTrue(uniqueTimeoutValues.size()> TEST_ITERATIONS/2);
+ }
+
+ @Test
+ public void testGenerateNodeStateRequestTimeoutMsWithUpdates() throws Exception {
+ final RPCCommunicator communicator = new RPCCommunicator(null /* Timer */, INDEX, 1, 1, 100, 0);
+ FleetControllerOptions fleetControllerOptions = new FleetControllerOptions(null /*clustername*/);
+ fleetControllerOptions.nodeStateRequestTimeoutEarliestPercentage = 100;
+ fleetControllerOptions.nodeStateRequestTimeoutLatestPercentage = 100;
+ fleetControllerOptions.nodeStateRequestTimeoutMS = NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS;
+ communicator.propagateOptions(fleetControllerOptions);
+ int timeOutMs = communicator.generateNodeStateRequestTimeoutMs();
+ assertThat(timeOutMs, is(NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS));
+ }
+
+ @Test
+ public void testRoundtripLatency() throws Exception {
+ final Timer timer = new FakeTimer();
+ final RPCCommunicator communicator = new RPCCommunicator(
+ timer,
+ INDEX,
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS,
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_STOP_PERCENTAGE,
+ 100,
+ ROUNDTRIP_LATENCY_SECONDS);
+
+ final NodeInfo nodeInfo = mock(NodeInfo.class);
+ final Target target = mock(Target.class);
+
+ when(target.isValid()).thenReturn(true);
+ when(nodeInfo.getConnection()).thenReturn(target);
+ communicator.getNodeState(nodeInfo, null);
+ Mockito.verify(target).invokeAsync(
+ (Request)any(),
+ eq(ROUNDTRIP_LATENCY_SECONDS + NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS/1000.0),
+ (RequestWaiter)any());
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/LogFormatter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/LogFormatter.java
new file mode 100644
index 00000000000..d437053fcd9
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/LogFormatter.java
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.testutils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.logging.Formatter;
+import java.util.logging.LogManager;
+import java.util.logging.LogRecord;
+
+public class LogFormatter extends Formatter {
+ @Override
+ public String format(LogRecord record) {
+ return record.getMillis() + " " + record.getLevel() + " "
+ + record.getLoggerName().substring(record.getLoggerName().lastIndexOf('.') + 1) + " " + record.getMessage() + "\n";
+ }
+
+ private static boolean initialized = false;
+ public synchronized static void initializeLogging() {
+ if (initialized) return;
+ initialized = true;
+ try {
+ File f = new File("src/test/resources/test.logging.properties");
+ if (!f.exists()) {
+ System.err.println("Test logging property file does not exist");
+ }
+ final InputStream inputStream = new FileInputStream(f);
+ LogManager.getLogManager().readConfiguration(inputStream);
+ } catch (Throwable t) {
+ System.err.println("Failed to initialize logging");
+ t.printStackTrace();
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java
new file mode 100644
index 00000000000..db4879b89d4
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.testutils;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vespa.clustercontroller.core.FakeTimer;
+import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Old class used for waiting for something..
+ * Deprecated.. Use the Waiter class instead
+ */
+public class StateWaiter implements SystemStateListener {
+ private final FakeTimer timer;
+ protected ClusterState current;
+ private int stateUpdates = -1;
+
+ public StateWaiter(FakeTimer timer) {
+ this.timer = timer;
+ }
+
+ public void handleNewSystemState(ClusterState state) {
+ synchronized(timer) {
+ current = state;
+
+ ++stateUpdates;
+ timer.notifyAll();
+ }
+ }
+
+ public int getStateUpdates() { return Math.max(0, stateUpdates); }
+
+ public ClusterState getCurrentSystemState() {
+ synchronized(timer) {
+ return current;
+ }
+ }
+
+ public void waitForState(String stateRegex, long timeout) {
+ waitForState(stateRegex, timeout, 0);
+ }
+
+ /**
+ * WARNING: If timeIntervalToProvokeRetry is set != 0 that means time will can be set far into future
+ * and thus hit various unintended timeout periods. Only auto-step time if this is a non-issue.
+ */
+ public void waitForState(String stateRegex, long timeout, long timeIntervalToProvokeRetry) {
+ Pattern p = Pattern.compile(stateRegex);
+ long startTime = System.currentTimeMillis();
+ final long endTime = startTime + timeout;
+ int iteration = 0;
+ while (true) {
+ ClusterState currentClusterState;
+ synchronized(timer) {
+ currentClusterState = current;
+
+ if (currentClusterState != null) {
+ Matcher m = p.matcher(currentClusterState.toString());
+
+ if (m.matches()) {
+ return;
+ }
+ }
+ try{
+ if (timeIntervalToProvokeRetry == 0) {
+ timer.wait(endTime - startTime);
+ } else {
+ if (++iteration % 10 == 0) {
+ timer.advanceTime(timeIntervalToProvokeRetry);
+ }
+ timer.wait(10);
+ }
+ } catch (InterruptedException e) {
+ }
+ }
+ startTime = System.currentTimeMillis();
+ if (startTime >= endTime) {
+ throw new IllegalStateException("Timeout. Did not find a state matching " + stateRegex + " within timeout of " + timeout + " milliseconds. Current state is " + currentClusterState);
+ }
+ }
+ }
+ public void clear() {
+ synchronized(timer) {
+ current = null;
+ }
+ }
+
+ public void waitForInitProgressPassed(Node node, double minProgress, int timeoutMS) {
+ long startTime = System.currentTimeMillis();
+ long endTime = startTime + timeoutMS;
+ while (true) {
+ ClusterState currentClusterState;
+ synchronized(timer) {
+ currentClusterState = current;
+ if (currentClusterState != null) {
+ if (currentClusterState.getNodeState(node).getInitProgress() >= minProgress) {
+ return;
+ }
+ }
+ try{
+ timer.wait(endTime - startTime);
+ } catch (InterruptedException e) {
+ }
+ }
+ startTime = System.currentTimeMillis();
+ if (startTime >= endTime) {
+ throw new IllegalStateException("Timeout. Did not get to " + minProgress + " init progress on node " + node + " within timeout of " + timeoutMS + " ms. Current init progress is " + currentClusterState.getNodeState(node).getInitProgress());
+ }
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java
new file mode 100644
index 00000000000..eae5f92278e
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java
@@ -0,0 +1,174 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.testutils;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.DummyVdsNode;
+import com.yahoo.vespa.clustercontroller.core.FleetController;
+import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener;
+
+import java.util.Collection;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+* @author <a href="mailto:humbe@yahoo-inc.com">Haakon Humberset</a>
+*/
+public interface WaitCondition {
+
+ /** Return null if met, why not if it is not met. */
+ public String isConditionMet();
+
+ public abstract class StateWait implements WaitCondition {
+ private final Object monitor;
+ protected ClusterState currentState;
+ private final SystemStateListener listener = new SystemStateListener() {
+ @Override
+ public void handleNewSystemState(ClusterState state) {
+ synchronized (monitor) {
+ currentState = state;
+ monitor.notifyAll();
+ }
+ }
+ };
+
+ public StateWait(FleetController fc, Object monitor) {
+ this.monitor = monitor;
+ fc.addSystemStateListener(listener);
+ }
+
+ public ClusterState getCurrentState() {
+ synchronized (monitor) {
+ return currentState;
+ }
+ }
+ }
+
+ public class RegexStateMatcher extends StateWait {
+
+ private final Pattern pattern;
+ private Collection<DummyVdsNode> nodesToCheck;
+ private ClusterState lastCheckedState;
+
+ public RegexStateMatcher(String regex, FleetController fc, Object monitor) {
+ super(fc, monitor);
+ pattern = Pattern.compile(regex);
+ }
+
+ public RegexStateMatcher includeNotifyingNodes(Collection<DummyVdsNode> nodes) {
+ nodesToCheck = nodes;
+ return this;
+ }
+
+ @Override
+ public String isConditionMet() {
+ if (currentState != null) {
+ lastCheckedState = currentState;
+ Matcher m = pattern.matcher(lastCheckedState.toString());
+ if (m.matches()) {
+ if (nodesToCheck != null) {
+ for (DummyVdsNode node : nodesToCheck) {
+ if (node.getClusterState() == null) {
+ return "Node " + node + " has not received a cluster state yet";
+ }
+ if (! pattern.matcher(withoutTimestamps(node.getClusterState().toString())).matches()) {
+ return "Node " + node + " state mismatch.\n wanted: " + pattern + "\n is: " + node.getClusterState().toString();
+ }
+ if (node.getStateCommunicationVersion() > 0) {
+ if (!node.hasPendingGetNodeStateRequest()) {
+ return "Node " + node + " has not received another get node state request yet";
+ }
+ }
+ }
+ }
+ return null;
+ }
+ return "Cluster state mismatch";
+ }
+ return "No cluster state defined yet";
+ }
+
+ /** Returns the given state string with timestamps removed */
+ private String withoutTimestamps(String state) {
+ String[] parts = state.split(" ");
+ StringBuilder b = new StringBuilder();
+ for (String part : parts) {
+ if ( ! part.contains(".t"))
+ b.append(part).append(" ");
+ }
+ if (b.length() > 0)
+ b.setLength(b.length() - 1);
+ return b.toString();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("RegexStateMatcher(\n wanted: '").append(pattern.pattern())
+ .append("'\n last checked: '").append(lastCheckedState).append("'")
+ .append("'\n current: '").append(currentState).append(")");
+ return sb.toString();
+ }
+ }
+
+ public class InitProgressPassedMatcher extends StateWait {
+ private final Node node;
+ private final double minProgress;
+
+ public InitProgressPassedMatcher(Node n, double minProgress, FleetController fc, Object monitor) {
+ super(fc, monitor);
+ this.node = n;
+ this.minProgress = minProgress;
+ }
+
+ @Override
+ public String isConditionMet() {
+ if (currentState == null) {
+ return "No cluster state defined yet";
+ }
+ double currentProgress = currentState.getNodeState(node).getInitProgress();
+ if (currentProgress < minProgress) {
+ return "Current progress of node " + node + " at " + currentProgress + " is less than wanted progress of " + minProgress;
+ }
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ return "InitProgressPassedMatcher(" + node + ", " + minProgress + ")";
+ }
+ }
+
+ public static class MinUsedBitsMatcher extends StateWait {
+ private final int bitCount;
+ private final int nodeCount;
+
+ public MinUsedBitsMatcher(int bitCount, int nodeCount, FleetController fc, Object monitor) {
+ super(fc, monitor);
+ this.bitCount = bitCount;
+ this.nodeCount = nodeCount;
+ }
+
+ @Override
+ public String isConditionMet() {
+ if (currentState == null) {
+ return "No cluster state defined yet";
+ }
+ int nodebitcount = 0;
+ for (NodeType type : NodeType.getTypes()) {
+ int nodeCount = currentState.getNodeCount(type);
+ for (int i=0; i<nodeCount; ++i) {
+ if (currentState.getNodeState(new Node(type, i)).getMinUsedBits() == bitCount) {
+ ++nodebitcount;
+ }
+ }
+ }
+ if (nodebitcount == nodeCount) return null;
+ return "Currently, " + nodebitcount + " and not " + nodeCount + " nodes have " + bitCount + " min bits used set";
+ }
+
+ @Override
+ public String toString() { return "MinUsedBitsMatcher(" + bitCount + ", " + nodeCount + ")"; }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitTask.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitTask.java
new file mode 100644
index 00000000000..24333e28cd6
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitTask.java
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.testutils;
+
+import com.yahoo.vespa.clustercontroller.core.FleetController;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+
+/** A wait task is something that is performed once in a while while waiting for something. */
+public abstract class WaitTask {
+ public static final int defaultTaskFrequencyMillis = 1;
+
+ public abstract boolean performWaitTask();
+
+ public int getWaitTaskFrequencyInMillis() {
+ return defaultTaskFrequencyMillis;
+ }
+
+ public static class StateResender extends WaitTask {
+ public final FleetController fleetController;
+
+ public StateResender(FleetController fc) {
+ fleetController = fc;
+ }
+
+ @Override
+ public boolean performWaitTask() {
+ boolean didWork = false;
+ synchronized (fleetController.getMonitor()) {
+ for (NodeInfo info : fleetController.getCluster().getNodeInfo()) {
+ if (info.getTimeForNextStateRequestAttempt() != 0) didWork = true;
+ info.setNextGetStateAttemptTime(0);
+ }
+ }
+ return didWork;
+ }
+
+ @Override
+ public String toString() {
+ return "GetNodeStateResender";
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java
new file mode 100644
index 00000000000..a6789ae22e5
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.testutils;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vespa.clustercontroller.core.DummyVdsNode;
+import com.yahoo.vespa.clustercontroller.core.FleetController;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.logging.Logger;
+
+public interface Waiter {
+
+ public interface DataRetriever {
+ public Object getMonitor();
+ public FleetController getFleetController();
+ public List<DummyVdsNode> getDummyNodes();
+ public int getTimeoutMS();
+ }
+
+ public ClusterState waitForState(String state) throws Exception;
+ public ClusterState waitForState(String state, int timeoutMS) throws Exception;
+ public ClusterState waitForStableSystem() throws Exception;
+ public ClusterState waitForStableSystem(int nodeCount) throws Exception;
+ public ClusterState waitForInitProgressPassed(Node n, double progress);
+ public ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount);
+ public void wait(WaitCondition c, WaitTask wt, int timeoutMS);
+
+ public static class Impl implements Waiter {
+
+ private static final Logger log = Logger.getLogger(Impl.class.getName());
+ private final DataRetriever data;
+
+ public Impl(DataRetriever data) {
+ this.data = data;
+ }
+
+ public ClusterState waitForState(String state) throws Exception { return waitForState(state, data.getTimeoutMS()); }
+ public ClusterState waitForState(String state, int timeoutMS) throws Exception {
+ LinkedList<DummyVdsNode> nodesToCheck = new LinkedList<>();
+ for(DummyVdsNode node : data.getDummyNodes()) {
+ if (node.isConnected()) nodesToCheck.add(node);
+ }
+ WaitCondition.StateWait swc = new WaitCondition.RegexStateMatcher(state, data.getFleetController(), data.getMonitor()).includeNotifyingNodes(nodesToCheck);
+ wait(swc, new WaitTask.StateResender(data.getFleetController()), timeoutMS);
+ return swc.getCurrentState();
+ }
+ public ClusterState waitForStableSystem() throws Exception {
+ return waitForStableSystem(data.getDummyNodes().size() / 2);
+ }
+ public ClusterState waitForStableSystem(int nodeCount) throws Exception {
+ WaitCondition.StateWait swc = new WaitCondition.RegexStateMatcher("version:\\d+ distributor:"+nodeCount+" storage:"+nodeCount, data.getFleetController(), data.getMonitor()).includeNotifyingNodes(data.getDummyNodes());
+ wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS());
+ return swc.getCurrentState();
+ }
+ public ClusterState waitForInitProgressPassed(Node n, double progress) {
+ WaitCondition.StateWait swc = new WaitCondition.InitProgressPassedMatcher(n, progress, data.getFleetController(), data.getMonitor());
+ wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS());
+ return swc.getCurrentState();
+ }
+ public ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount) {
+ WaitCondition.StateWait swc = new WaitCondition.MinUsedBitsMatcher(bitcount, nodecount, data.getFleetController(), data.getMonitor());
+ wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS());
+ return swc.getCurrentState();
+ }
+
+ public final void wait(WaitCondition c, WaitTask wt, int timeoutMS) {
+ log.log(LogLevel.INFO, "Waiting for " + c + (wt == null ? "" : " with wait task " + wt));
+ final long startTime = System.currentTimeMillis();
+ final long endTime = startTime + timeoutMS;
+ String lastReason = null;
+ while (true) {
+ synchronized (data.getMonitor()) {
+ String reason = c.isConditionMet();
+ if (reason == null) {
+ log.log(LogLevel.INFO, "Condition met. Returning");
+ return;
+ }
+ if (lastReason == null || !lastReason.equals(reason)) {
+ log.log(LogLevel.INFO, "Wait condition not met: " + reason);
+ lastReason = reason;
+ }
+ try {
+ boolean allowWait = true;
+ if (wt != null) {
+ if (wt.performWaitTask()) {
+ data.getMonitor().notifyAll();
+ allowWait = false;
+ }
+ }
+ final long timeLeft = endTime - System.currentTimeMillis();
+ if (timeLeft <= 0) {
+ throw new IllegalStateException("Timed out waiting max " + timeoutMS + " ms for " + c + (wt == null ? "" : "\n with wait task " + wt) + ",\n reason: " + reason);
+ }
+ if (allowWait) data.getMonitor().wait(wt == null ? WaitTask.defaultTaskFrequencyMillis : Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft));
+ } catch (InterruptedException e) {
+ }
+ }
+ }
+ }
+ }
+
+} \ No newline at end of file