aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core/src
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /clustercontroller-core/src
Publish
Diffstat (limited to 'clustercontroller-core/src')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterEvent.java44
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java124
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateView.java123
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregator.java124
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Communicator.java24
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java216
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/DistributorNodeInfo.java42
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Event.java10
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventLog.java142
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventLogInterface.java18
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java794
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java210
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTask.java9
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GetNodeStateRequest.java68
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/LatencyStats.java31
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/LeafGroups.java31
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java287
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java9
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java91
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeEvent.java48
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java442
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java14
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeMergeStats.java152
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java244
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java254
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RealTimer.java61
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java49
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTaskScheduler.java6
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SetClusterStateRequest.java48
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodes.java25
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageMergeStats.java64
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeInfo.java20
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStats.java22
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainer.java27
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java171
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java771
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Timer.java10
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/config/.gitignore0
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/Database.java85
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java417
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java203
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java345
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Distributor.java19
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfo.java70
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java59
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNode.java100
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridge.java54
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Vtag.java23
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/NodeAddedOrRemovedListener.java14
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/NodeStateOrHostInfoChangeHandler.java21
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/SystemStateListener.java8
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/package-info.java5
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerStateRestAPI.java150
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Id.java65
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/MissingIdException.java19
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/OtherMasterIndexException.java12
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java63
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java129
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/UnitPathResolver.java95
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/package-info.java5
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterListRequest.java49
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterStateRequest.java36
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/NodeStateRequest.java62
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/PartitionStateRequest.java69
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ServiceStateRequest.java37
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java104
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java87
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/VerboseReport.java6
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicator.java200
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCGetNodeStateRequest.java21
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCGetNodeStateWaiter.java62
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCSetClusterStateRequest.java17
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCSetClusterStateWaiter.java57
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RpcServer.java307
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java220
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/ClusterStateRequestHandler.java24
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java133
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyNodePageRequestHandler.java65
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/NodeHealthRequestHandler.java37
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/RunDataExtractor.java18
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StaticResourceRequestHandler.java66
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StatusHandler.java129
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/package-info.java5
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/HtmlTable.java166
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageResponse.java84
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServer.java403
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServerInterface.java10
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRendrer.java304
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/package-info.java6
-rw-r--r--clustercontroller-core/src/main/resources/configdefinitions/.gitignore0
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateViewTest.java111
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java217
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java75
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java166
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java110
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java114
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java504
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java9
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventLogTest.java63
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeTimer.java34
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java555
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/LeafGroupsTest.java67
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java440
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java25
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java117
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java349
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java627
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java116
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java1135
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java81
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodeTest.java44
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java385
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainerTest.java43
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsTest.java25
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java198
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java82
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperStressTest.java157
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperTestServer.java86
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java107
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridgeTest.java67
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/mocks/TestEventLog.java56
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java85
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterListTest.java51
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java58
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java200
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NotMasterTest.java137
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/PartitionTest.java64
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/RequestTest.java33
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ServiceTest.java121
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java348
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java170
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java94
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/LogFormatter.java34
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java114
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java174
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitTask.java41
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java105
-rw-r--r--clustercontroller-core/src/test/resources/test.logging.properties5
138 files changed, 17569 insertions, 0 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterEvent.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterEvent.java
new file mode 100644
index 00000000000..23cb2ec7355
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterEvent.java
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+public class ClusterEvent implements Event{
+
+ enum Type {
+ SYSTEMSTATE,
+ MASTER_ELECTION
+ }
+
+ private String description;
+ private long timeMs = 0;
+ private Type type;
+
+ public ClusterEvent(Type type, String description, long timeMs) {
+ this.type = type;
+ this.description = description;
+ this.timeMs = timeMs;
+ }
+
+ @Override
+ public long getTimeMs() {
+ return timeMs;
+ }
+
+ @Override
+ public String getDescription() {
+ return description;
+ }
+
+ public Type getType() {
+ return type;
+ }
+
+ @Override
+ public String toString() {
+ return "Cluster event type " + type + " @" + timeMs + ": " + description;
+ }
+
+ @Override
+ public String getCategory() {
+ return type.toString();
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java
new file mode 100644
index 00000000000..19f8f81c628
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeType;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+/**
+ * Detail information about the current state of all the distributor and storage nodes of the cluster.
+ *
+ * @author hakon
+ * @author bratseth
+ */
+public class ClusterInfo {
+
+ /** The configured nodes of this cluster, indexed by node index */
+ private final Map<Integer, ConfiguredNode> nodes = new HashMap<>();
+
+ /** Information about the current state of distributors */
+ private final Map<Integer, DistributorNodeInfo> distributorNodeInfo = new TreeMap<>();
+ /** Information about the current state of storage nodes */
+ private final Map<Integer, StorageNodeInfo> storageNodeInfo = new TreeMap<>();
+ /** Information about the current state of all nodes - always consists of both sets of nodes in the two maps above */
+ private final Map<Node, NodeInfo> allNodeInfo = new TreeMap<>(); // TODO: Remove
+
+ DistributorNodeInfo getDistributorNodeInfo(int index) { return distributorNodeInfo.get(index); }
+
+ StorageNodeInfo getStorageNodeInfo(int index) { return storageNodeInfo.get(index); }
+
+ NodeInfo getNodeInfo(NodeType type, int index) { return getNodeInfo(new Node(type, index)); }
+
+ /** Returns information about the given node id, or null if this node does not exist */
+ public NodeInfo getNodeInfo(Node node) { return allNodeInfo.get(node); }
+
+ Collection<DistributorNodeInfo> getDistributorNodeInfo() { return Collections.unmodifiableCollection(distributorNodeInfo.values()); }
+
+ Collection<StorageNodeInfo> getStorageNodeInfo() { return Collections.unmodifiableCollection(storageNodeInfo.values()); }
+
+ Collection<NodeInfo> getAllNodeInfo() { return Collections.unmodifiableCollection(allNodeInfo.values()); }
+
+ /** Returns the configured nodes of this as a read-only map indexed on node index (distribution key) */
+ Map<Integer, ConfiguredNode> getConfiguredNodes() { return Collections.unmodifiableMap(nodes); }
+
+ /** Sets the nodes which belongs to this cluster */
+ void setNodes(Collection<ConfiguredNode> newNodes, ContentCluster owner, Distribution distribution) {
+ // Remove info for removed nodes
+ Set<ConfiguredNode> newNodesSet = new HashSet<>(newNodes);
+ for (ConfiguredNode existingNode : this.nodes.values()) {
+ if ( ! newNodesSet.contains(existingNode)) {
+ Node existingStorageNode = storageNodeInfo.remove(existingNode.index()).getNode();
+ Node existingDistributorNode = distributorNodeInfo.remove(existingNode.index()).getNode();
+ allNodeInfo.remove(existingDistributorNode);
+ allNodeInfo.remove(existingStorageNode);
+ }
+ }
+
+ // Add and update new nodes info
+ for (ConfiguredNode node : newNodes) {
+ if ( ! nodes.containsKey(node.index())) { // add new node info
+ addNodeInfo(new DistributorNodeInfo(owner, node.index(), null, distribution));
+ addNodeInfo(new StorageNodeInfo(owner, node.index(), node.retired(), null, distribution));
+ }
+ else {
+ getStorageNodeInfo(node.index()).setConfiguredRetired(node.retired());
+ }
+ }
+
+ // Update node set
+ nodes.clear();
+ for (ConfiguredNode node : newNodes)
+ this.nodes.put(node.index(), node);
+ }
+
+ private void addNodeInfo(NodeInfo nodeInfo) {
+ if (nodeInfo instanceof DistributorNodeInfo)
+ distributorNodeInfo.put(nodeInfo.getNodeIndex(), (DistributorNodeInfo)nodeInfo);
+ else
+ storageNodeInfo.put(nodeInfo.getNodeIndex(), (StorageNodeInfo)nodeInfo);
+ allNodeInfo.put(nodeInfo.getNode(), nodeInfo);
+ nodeInfo.setReportedState(nodeInfo.getReportedState().setDescription("Node not seen in slobrok."), 0);
+ }
+
+ /** Returns true if no nodes are down or unknown */
+ boolean allStatesReported() {
+ if (nodes.isEmpty()) return false;
+ for (ConfiguredNode node : nodes.values()) {
+ if (getDistributorNodeInfo(node.index()).getReportedState().getState().oneOf("d-")) return false;
+ if (getStorageNodeInfo(node.index()).getReportedState().getState().oneOf("d-")) return false;
+ }
+ return true;
+ }
+
+ /**
+ * Sets the rpc address of a node. If the node does not exist this does nothing.
+ *
+ * @return the info to which an rpc address is set, or null if none
+ */
+ public NodeInfo setRpcAddress(Node node, String rpcAddress) {
+ NodeInfo nodeInfo = getInfo(node);
+ if (nodeInfo != null)
+ nodeInfo.setRpcAddress(rpcAddress);
+ return nodeInfo;
+ }
+ // TODO: Do all mutation of node info through setters in this
+
+ /** Returns the node info object for a given node identifier */
+ private NodeInfo getInfo(Node node) {
+ switch (node.getType()) {
+ case DISTRIBUTOR : return getDistributorNodeInfo(node.getIndex());
+ case STORAGE : return getStorageNodeInfo(node.getIndex());
+ default : throw new IllegalArgumentException("No node type " + node.getType().toString());
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateView.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateView.java
new file mode 100644
index 00000000000..3444f4c2540
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateView.java
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNodeStatsBridge;
+
+import java.text.ParseException;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+
+/**
+ * The Cluster Controller's view of the cluster given a particular state version. Some parts of the view
+ * are static and only depend on the state version, e.g. which nodes are UP or DOWN. These static parts
+ * are mostly represented by the ClusterState. The dynamic parts include stats for tracking outstanding
+ * merges before steady-state is reached.
+ *
+ * @author hakon
+ * @since 5.33
+ */
+public class ClusterStateView {
+
+ private static Logger log = Logger.getLogger(ClusterStateView.class.getName());
+ private final ClusterState clusterState;
+ private final ClusterStatsAggregator statsAggregator;
+ private final MetricUpdater metricUpdater;
+
+ /**
+ * @param metricUpdater may be null, in which case no stats will be reported.
+ */
+ public static ClusterStateView create(String serializedClusterState, MetricUpdater metricUpdater)
+ throws ParseException {
+ ClusterState clusterState = new ClusterState(serializedClusterState);
+ return new ClusterStateView(clusterState, createNewAggregator(clusterState, metricUpdater), metricUpdater);
+ }
+
+ private static ClusterStatsAggregator createNewAggregator(ClusterState clusterState, MetricUpdater metricUpdater) {
+ Set<Integer> upDistributors = getIndicesOfUpNodes(clusterState, NodeType.DISTRIBUTOR);
+ Set<Integer> upStorageNodes = getIndicesOfUpNodes(clusterState, NodeType.STORAGE);
+ return new ClusterStatsAggregator(upDistributors, upStorageNodes, metricUpdater);
+ }
+
+ ClusterStateView(ClusterState clusterState, ClusterStatsAggregator statsAggregator, MetricUpdater metricUpdater) {
+ this.clusterState = clusterState;
+ this.statsAggregator = statsAggregator;
+ this.metricUpdater = metricUpdater;
+ }
+
+ /**
+ * Returns the set of nodes that are up for a given node type. Non-private for testing.
+ */
+ static Set<Integer> getIndicesOfUpNodes(ClusterState clusterState, NodeType type) {
+ int nodeCount = clusterState.getNodeCount(type);
+
+ Set<Integer> nodesBeingUp = new HashSet<>();
+ for (int i = 0; i < nodeCount; ++i) {
+ Node node = new Node(type, i);
+ NodeState nodeState = clusterState.getNodeState(node);
+ State state = nodeState.getState();
+ if (state == State.UP || state == State.INITIALIZING ||
+ state == State.RETIRED || state == State.MAINTENANCE) {
+ nodesBeingUp.add(i);
+ }
+ }
+
+ return nodesBeingUp;
+ }
+
+ /**
+ * Creates a new ClusterStateView which is set up with the same static view of the cluster state
+ * (i.e. the ClusterState is a clone of this instance's ClusterState), while transient and dynamic
+ * parts are cleared.
+ */
+ public ClusterStateView cloneForNewState() {
+ ClusterState clonedClusterState = clusterState.clone();
+ return new ClusterStateView(
+ clonedClusterState,
+ createNewAggregator(clonedClusterState, metricUpdater),
+ metricUpdater);
+ }
+
+ public ClusterState getClusterState() { return clusterState; }
+
+ public void handleUpdatedHostInfo(Map<Integer, String> hostnames, NodeInfo node, HostInfo hostInfo) {
+ if ( ! node.isDistributor()) return;
+
+ final int hostVersion;
+ if (hostInfo.getClusterStateVersionOrNull() == null) {
+ // TODO: Consider logging a warning in the future (>5.36).
+ // For now, a missing cluster state version probably means the content
+ // node has not been updated yet.
+ return;
+ } else {
+ hostVersion = hostInfo.getClusterStateVersionOrNull();
+ }
+ int currentStateVersion = clusterState.getVersion();
+
+ if (hostVersion != currentStateVersion) {
+ // The distributor may be old (null), or the distributor may not have updated
+ // to the latest state version just yet. We log here with fine, because it may
+ // also be a symptom of something wrong.
+ log.log(LogLevel.DEBUG, "Current state version is " + currentStateVersion +
+ ", while host info received from distributor " + node.getNodeIndex() +
+ " is " + hostVersion);
+ return;
+ }
+
+ statsAggregator.updateForDistributor(
+ hostnames, node.getNodeIndex(), StorageNodeStatsBridge.generate(hostInfo.getDistributor()));
+ }
+
+ public String toString() {
+ return clusterState.toString();
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregator.java
new file mode 100644
index 00000000000..a52034d10a2
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregator.java
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import com.yahoo.log.LogLevel;
+
+/**
+ * A class that stores stats about outstanding merge operations for
+ * the current cluster state version, and exports metrics about these.
+ *
+ * Each distributor reports outstanding merge operations for the different
+ * storage nodes. These reports arrive with getnodestate RPC calls,
+ * and eventually ends up as calls to updateForDistributor().
+ * No assumptions are made on the sequence of getnodestate calls.
+ * For instance, it's perfectly fine for the calls to arrive in the
+ * following order:
+ * distributor 0
+ * distributor 1
+ * distributor 1
+ * distributor 0
+ * distributor 2
+ * ... etc
+ *
+ * Whereas the metrics we want, is how many merge operations are outstanding
+ * for a given storage nodes. So we need to keep track of the latest info
+ * from each distributor.
+ *
+ * @author hakon
+ * @since 5.34
+ */
+public class ClusterStatsAggregator {
+
+ private static Logger log = Logger.getLogger(ClusterStatsAggregator.class.getName());
+
+ private final Set<Integer> distributors;
+ private final MetricUpdater updater;
+
+ // Maps the distributor node index to a map of storage node index to the
+ // storage node's merge stats.
+ private final Map<Integer, StorageMergeStats> distributorToStats = new HashMap<>();
+
+ // This is only needed as an optimization. should just be the sum of distributorToStats' StorageMergeStats.
+ // Maps the storage node index to the aggregate merge stats for that storage node.
+ // This MUST be kept up-to-date with distributorToStats;
+ private final StorageMergeStats aggregatedStats;
+
+ private int hostToStatsMapHashCode = 0;
+
+ ClusterStatsAggregator(Set<Integer> distributors, Set<Integer> storageNodes, MetricUpdater updater) {
+ this.distributors = distributors;
+ aggregatedStats = new StorageMergeStats(storageNodes);
+ this.updater = updater;
+ }
+
+ /**
+ * Update the aggregator with the newest available stats from a distributor.
+ * Will update metrics if necessary.
+ */
+ void updateForDistributor(Map<Integer, String> hostnames, int distributorIndex, StorageMergeStats storageStats) {
+ if (!distributors.contains(distributorIndex)) {
+ return;
+ }
+
+ addStatsFromDistributor(distributorIndex, storageStats);
+
+ if (distributorToStats.size() < distributors.size()) {
+ // Not all distributors have reported their merge stats through getnodestate yet.
+ return;
+ }
+
+ Map<String, NodeMergeStats> hostToStatsMap = getHostToStatsMap(hostnames);
+ if (hostToStatsMap == null) {
+ return;
+ }
+
+ if (hostToStatsMapHashCode == 0 || hostToStatsMapHashCode != hostToStatsMap.hashCode()) {
+ updater.updateMergeOpMetrics(hostToStatsMap);
+ hostToStatsMapHashCode = hostToStatsMap.hashCode();
+ }
+ }
+
+ private Map<String, NodeMergeStats> getHostToStatsMap(Map<Integer, String> hostnames) {
+ Map<String, NodeMergeStats> hostToStatsMap = new HashMap<>(aggregatedStats.size());
+ for (NodeMergeStats nodeStats : aggregatedStats) {
+ // The hosts names are kept up-to-date from Slobrok, and MAY therefore be arbitrarily
+ // different from the node set used by aggregatedStats (and typically tied to a cluster state).
+ // If so, we will not pretend the returned map is complete, and will return null.
+ String host = hostnames.get(nodeStats.getNodeIndex());
+ if (host == null) {
+ log.log(LogLevel.DEBUG, "Failed to find the host name of storage node " + nodeStats.getNodeIndex() +
+ ". Skipping the report from " + ClusterStatsAggregator.class.getName());
+ return null;
+ }
+
+ hostToStatsMap.put(host, nodeStats);
+ }
+
+ return hostToStatsMap;
+ }
+
+ private void addStatsFromDistributor(int distributorIndex, StorageMergeStats storageStatsFromDistributor) {
+ StorageMergeStats previousStorageStats = distributorToStats.put(distributorIndex, storageStatsFromDistributor);
+
+ for (NodeMergeStats storageNode : aggregatedStats) {
+ Integer storageNodeIndex = storageNode.getNodeIndex();
+
+ NodeMergeStats statsToAdd = storageStatsFromDistributor.getStorageNode(storageNodeIndex);
+ if (statsToAdd != null) {
+ storageNode.add(statsToAdd);
+ }
+
+ if (previousStorageStats != null) {
+ NodeMergeStats statsToSubtract = storageStatsFromDistributor.getStorageNode(storageNodeIndex);
+ if (statsToSubtract != null) {
+ storageNode.subtract(statsToSubtract);
+ }
+ }
+ }
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Communicator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Communicator.java
new file mode 100644
index 00000000000..977e4d87c48
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Communicator.java
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.ClusterState;
+
+/**
+ * Remote interface between the fleet controller and controlled nodes.
+ */
+public interface Communicator {
+
+ int TRANSIENT_ERROR = 9999;
+
+ interface Waiter<V> {
+ void done(V reply);
+ }
+
+ void propagateOptions(final FleetControllerOptions options);
+
+ void getNodeState(NodeInfo node, Waiter<GetNodeStateRequest> waiter);
+
+ void setSystemState(ClusterState state, NodeInfo node, Waiter<SetClusterStateRequest> waiter);
+
+ void shutdown();
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
new file mode 100644
index 00000000000..67681f87d92
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
@@ -0,0 +1,216 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @class VdsCluster
+ *
+ * Represents a VDS cluster.
+ */
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.distribution.Group;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.VdsClusterHtmlRendrer;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+
+import java.util.*;
+
+public class ContentCluster {
+
+ private final String clusterName;
+
+ private final ClusterInfo clusterInfo = new ClusterInfo();
+
+ private final Map<Node, Long> nodeStartTimestamps = new TreeMap<>();
+
+ private int slobrokGenerationCount = 0;
+
+ private int pollingFrequency = 5000;
+
+ private Distribution distribution;
+ private int minStorageNodesUp;
+ private double minRatioOfStorageNodesUp;
+
+ public ContentCluster(String clusterName, Collection<ConfiguredNode> configuredNodes, Distribution distribution,
+ int minStorageNodesUp, double minRatioOfStorageNodesUp) {
+ if (configuredNodes == null) throw new IllegalArgumentException("Nodes must be set");
+ this.clusterName = clusterName;
+ this.distribution = distribution;
+ this.minStorageNodesUp = minStorageNodesUp;
+ this.minRatioOfStorageNodesUp = minRatioOfStorageNodesUp;
+ setNodes(configuredNodes);
+ }
+
+ public void writeHtmlState(
+ final VdsClusterHtmlRendrer vdsClusterHtmlRendrer,
+ final StringBuilder sb,
+ final Timer timer,
+ final ClusterState state,
+ final Distribution distribution,
+ final FleetControllerOptions options,
+ final EventLog eventLog,
+ String pathPrefix) {
+ if (!pathPrefix.isEmpty()) {
+ pathPrefix += "/" + clusterName;
+ }
+
+ final VdsClusterHtmlRendrer.Table table =
+ vdsClusterHtmlRendrer.createNewClusterHtmlTable(clusterName, slobrokGenerationCount);
+
+ final List<Group> groups = LeafGroups.enumerateFrom(distribution.getRootGroup());
+
+ for (int j=0; j<groups.size(); ++j) {
+ final Group group = groups.get(j);
+ assert(group != null);
+ final String localName = group.getUnixStylePath();
+ assert(localName != null);
+ final TreeMap<Integer, NodeInfo> storageNodeInfoByIndex = new TreeMap<>();
+ final TreeMap<Integer, NodeInfo> distributorNodeInfoByIndex = new TreeMap<>();
+ for (ConfiguredNode configuredNode : group.getNodes()) {
+ storeNodeInfo(configuredNode.index(), NodeType.STORAGE, storageNodeInfoByIndex);
+ storeNodeInfo(configuredNode.index(), NodeType.DISTRIBUTOR, distributorNodeInfoByIndex);
+ }
+ table.renderNodes(
+ storageNodeInfoByIndex,
+ distributorNodeInfoByIndex,
+ timer,
+ state,
+ options.maxPrematureCrashes,
+ eventLog,
+ pathPrefix,
+ localName);
+ }
+ table.addTable(sb, options.stableStateTimePeriod);
+ }
+
+ private void storeNodeInfo(int nodeIndex, NodeType nodeType, Map<Integer, NodeInfo> nodeInfoByIndex) {
+ NodeInfo nodeInfo = getNodeInfo(new Node(nodeType, nodeIndex));
+ if (nodeInfo == null) return;
+ nodeInfoByIndex.put(nodeIndex, nodeInfo);
+ }
+
+ public Distribution getDistribution() { return distribution; }
+
+ public void setDistribution(Distribution distribution) {
+ this.distribution = distribution;
+ for (NodeInfo info : clusterInfo.getAllNodeInfo()) {
+ info.setGroup(distribution);
+ }
+ }
+
+ /** Sets the configured nodes of this cluster */
+ public final void setNodes(Collection<ConfiguredNode> configuredNodes) {
+ clusterInfo.setNodes(configuredNodes, this, distribution);
+ }
+
+ public void setStartTimestamp(Node n, long startTimestamp) {
+ nodeStartTimestamps.put(n, startTimestamp);
+ }
+
+ public long getStartTimestamp(Node n) {
+ Long value = nodeStartTimestamps.get(n);
+ return (value == null ? 0 : value);
+ }
+
+ public Map<Node, Long> getStartTimestamps() {
+ return nodeStartTimestamps;
+ }
+
+ public void clearStates() {
+ for (NodeInfo info : clusterInfo.getAllNodeInfo()) {
+ info.setReportedState(null, 0);
+ }
+ }
+
+ public boolean allStatesReported() {
+ return clusterInfo.allStatesReported();
+ }
+
+ public int getPollingFrequency() { return pollingFrequency; }
+ public void setPollingFrequency(int millisecs) { pollingFrequency = millisecs; }
+
+ /** Returns the configured nodes of this as a read-only map indexed on node index (distribution key) */
+ public Map<Integer, ConfiguredNode> getConfiguredNodes() {
+ return clusterInfo.getConfiguredNodes();
+ }
+
+ public Collection<NodeInfo> getNodeInfo() {
+ return Collections.unmodifiableCollection(clusterInfo.getAllNodeInfo());
+ }
+
+ public ClusterInfo clusterInfo() { return clusterInfo; }
+
+ public String getName() { return clusterName; }
+
+ public NodeInfo getNodeInfo(Node node) { return clusterInfo.getNodeInfo(node); }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("ContentCluster(").append(clusterName).append(") {");
+ for (NodeInfo node : clusterInfo.getAllNodeInfo()) {
+ sb.append("\n ").append(node);
+ }
+ sb.append("\n}");
+ return sb.toString();
+ }
+
+ public int getSlobrokGenerationCount() { return slobrokGenerationCount; }
+
+ public void setSlobrokGenerationCount(int count) { slobrokGenerationCount = count; }
+
+ private void getLeaves(Group node, List<Group> leaves, List<String> names, String name) {
+ if (node.isLeafGroup()) {
+ leaves.add(node);
+ names.add(name + "/" + node.getName());
+ return;
+ }
+ for (Group g : node.getSubgroups().values()) {
+ getLeaves(g, leaves, names, name + (node.getName() != null ? "/" + node.getName() : ""));
+ }
+ }
+
+ public StorageNodeStats getStorageNodeStats(int storageNodeIndex) {
+ LatencyStats aggregatePutLatencyStats = new LatencyStats();
+ StorageNodeStats aggregateStats = new StorageNodeStats(aggregatePutLatencyStats);
+ for (DistributorNodeInfo distributor : clusterInfo.getDistributorNodeInfo()) {
+ StorageNodeStats statsFromDistributor = distributor.getStorageNodeStatsOrNull(storageNodeIndex);
+ if (statsFromDistributor != null) {
+ aggregateStats.add(statsFromDistributor);
+ }
+ }
+
+ return aggregateStats;
+ }
+
+ /**
+ * Checks if a node can be upgraded
+ *
+ * @param node the node to be checked for upgrad
+ * @param clusterState the current cluster state version
+ * @param condition the upgrade condition
+ * @param newState state wanted to be set @return NodeUpgradePrechecker.Response
+ */
+ public NodeStateChangeChecker.Result calculateEffectOfNewState(
+ Node node, int clusterState, SetUnitStateRequest.Condition condition, NodeState oldState, NodeState newState) {
+
+ NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker(
+ minStorageNodesUp,
+ minRatioOfStorageNodesUp,
+ distribution.getRedundancy(),
+ clusterInfo);
+ return nodeStateChangeChecker.evaluateTransition(node, clusterState, condition, oldState, newState);
+ }
+
+ public void setMinStorageNodesUp(int minStorageNodesUp) {
+ this.minStorageNodesUp = minStorageNodesUp;
+ }
+
+ public void setMinRatioOfStorageNodesUp(double minRatioOfStorageNodesUp) {
+ this.minRatioOfStorageNodesUp = minRatioOfStorageNodesUp;
+ }
+
+ public boolean hasConfiguredNode(int index) {
+ return clusterInfo.getConfiguredNodes().containsKey(index);
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/DistributorNodeInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/DistributorNodeInfo.java
new file mode 100644
index 00000000000..32c68aff083
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/DistributorNodeInfo.java
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNodeStatsBridge;
+
+/**
+ * Class encapsulating what the Cluster Controller knows about a distributor node. Most of the information is
+ * common between Storage- and Distributor- nodes, and stored in the base class NodeInfo.
+ *
+ * @author hakon
+ */
+public class DistributorNodeInfo extends NodeInfo {
+
+ private StorageNodeStatsContainer storageNodeStatsContainer = null;
+
+ public DistributorNodeInfo(ContentCluster cluster, int index, String rpcAddress, Distribution distribution) {
+ super(cluster, new Node(NodeType.DISTRIBUTOR, index), false, rpcAddress, distribution);
+ }
+
+ @Override
+ public void setHostInfo(HostInfo hostInfo) {
+ // This affects getHostInfo(), and makes the host info available through NodeInfo.
+ super.setHostInfo(hostInfo);
+ storageNodeStatsContainer = StorageNodeStatsBridge.traverseHostInfo(hostInfo);
+ }
+
+ /**
+ * @return Stats this distributor has about a storage node, or null if unknown.
+ */
+ public StorageNodeStats getStorageNodeStatsOrNull(int storageNodeIndex) {
+ if (storageNodeStatsContainer == null) {
+ return null;
+ }
+
+ return storageNodeStatsContainer.get(storageNodeIndex);
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Event.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Event.java
new file mode 100644
index 00000000000..7c19c0e2f79
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Event.java
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+public interface Event {
+
+ long getTimeMs();
+ String getDescription();
+ String getCategory();
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventLog.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventLog.java
new file mode 100644
index 00000000000..cffe56380f3
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventLog.java
@@ -0,0 +1,142 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vespa.clustercontroller.utils.util.MetricReporter;
+
+import java.util.*;
+import java.util.logging.Logger;
+
+public class EventLog implements EventLogInterface {
+
+ public static Logger log = Logger.getLogger(EventLog.class.getName());
+
+ private final Timer timer;
+ private final LinkedList<Event> eventLog = new LinkedList<>();
+ private final Map<Node, LinkedList<NodeEvent>> nodeLog = new TreeMap<>();
+ private MetricUpdater metricUpdater; // may be null
+ private long eventsSeen = 0;
+ private long startTime;
+ private int maxSize = 1024;
+ private int maxNodeSize = 1024;
+ private long recentTimePeriod = 7 * 24 * 60 * 60 * 1000; // millisecs - 1 week
+
+ /** Note: metricReporter may be null. */
+ public EventLog(Timer timer, MetricUpdater metricUpdater) {
+ this.timer = timer;
+ this.startTime = timer.getCurrentTimeInMillis();
+ this.metricUpdater = metricUpdater;
+ }
+
+ public void setMaxSize(int size, int nodesize) {
+ if (size < 1 || nodesize < 1) {
+ throw new IllegalArgumentException("Max size must be at least 1");
+ }
+ maxSize = size;
+ while (eventLog.size() > maxSize) {
+ eventLog.remove(0);
+ }
+ maxNodeSize = nodesize;
+ for (List<NodeEvent> list : nodeLog.values()) {
+ while (list.size() > maxNodeSize) {
+ list.remove(0);
+ }
+ }
+ }
+
+ public long getRecentTimePeriod() { return recentTimePeriod; }
+
+ public void add(Event e) { add(e, true); }
+
+ public void add(Event e, boolean logInfo) {
+ ++eventsSeen;
+ eventLog.add(e);
+ if (eventLog.size() > maxSize) {
+ eventLog.remove(0);
+ }
+
+ if (e instanceof NodeEvent) {
+ addNodeOnlyEvent((NodeEvent)e, logInfo ? LogLevel.INFO: LogLevel.DEBUG);
+ } else {
+ log.log(logInfo ? LogLevel.INFO : LogLevel.DEBUG, e.toString());
+ }
+ }
+
+ public void addNodeOnlyEvent(NodeEvent e, java.util.logging.Level level) {
+ log.log(level, "Added node only event: " + e.toString());
+ if (metricUpdater != null) {
+ metricUpdater.recordNewNodeEvent();
+ }
+ LinkedList<NodeEvent> nodeList = nodeLog.get(e.getNode().getNode());
+ if (nodeList == null) {
+ nodeList = new LinkedList<>();
+ nodeLog.put(e.getNode().getNode(), nodeList);
+ }
+ nodeList.add(e);
+ if (nodeList.size() > maxNodeSize) {
+ nodeList.remove(0);
+ }
+ }
+
+ public int getNodeEventsSince(Node n, long time) {
+ LinkedList<NodeEvent> events = nodeLog.get(n);
+ int count = 0;
+ if (events != null) {
+ Iterator<NodeEvent> it = events.descendingIterator();
+ while (it.hasNext()) {
+ NodeEvent e = it.next();
+ if (e.getTimeMs() < time) break;
+ ++count;
+ }
+ }
+ return count;
+ }
+
+ /** Used in unit testing to verify events generated. */
+ public List<NodeEvent> getNodeEvents(Node n) {
+ return new ArrayList<>(nodeLog.get(n));
+ }
+
+ public void writeHtmlState(StringBuilder sb, Node node) {
+ TimeZone tz = TimeZone.getTimeZone("UTC");
+ LinkedList<Event> events = new LinkedList<>();
+ long currentTime = timer.getCurrentTimeInMillis();
+ long recentNodeEvents = 0;
+ if (node == null) {
+ events = eventLog;
+ sb.append("<h2 id=\"eventlog\">Event log</h2>\n")
+ .append("<p>A total number of " + eventsSeen + " has been seen since ").append(RealTimer.printDate(startTime, tz)).append(".</p>\n");
+ } else {
+ if (nodeLog.containsKey(node)) {
+ events.addAll(nodeLog.get(node));
+ }
+ recentNodeEvents = getNodeEventsSince(node, currentTime - recentTimePeriod);
+ sb.append("<h2>Node event log for " + node + "</h2>\n")
+ .append("<p>A total number of " + events.size() + " events has been seen since ")
+ .append(RealTimer.printDate(startTime, tz)).append(".</p>\n")
+ .append("<p>Recently, " + recentNodeEvents + " events has been seen since ")
+ .append(RealTimer.printDate(currentTime - recentTimePeriod, tz)).append(".</p>\n");
+ }
+ sb.append("<table border=\"1\" cellspacing=\"0\">\n")
+ .append("<tr><td>Date (").append(tz.getDisplayName(false, TimeZone.SHORT)).append(")</td><td>Type</td><td>Node</td><td>Event</td></tr>\n");
+ int nr = 0;
+ Iterator<Event> eventIterator = (events == null ? null : events.descendingIterator());
+ if (eventIterator != null) while (eventIterator.hasNext()) {
+ Event e = eventIterator.next();
+ String colStart = "<font color=\"" + (++nr > recentNodeEvents ? "grey" : "black") + "\">";
+ String colEnd = "</font>";
+ sb.append("<tr>\n");
+ sb.append(" <td><nobr>").append(colStart).append(RealTimer.printDate(e.getTimeMs(), tz)).append(colEnd).append("</nobr></td>\n");
+ sb.append(" <td><nobr>").append(colStart).append(e.getCategory()).append(colEnd).append("</nobr></td>\n");
+ if (e instanceof NodeEvent) {
+ sb.append(" <td><nobr>").append(colStart).append(((NodeEvent) e).getNode().toString()).append(colEnd).append("</nobr></td>\n");
+ } else {
+ sb.append(" <td><nobr>").append(colStart).append(" - ").append(colEnd).append("</nobr></td>\n");
+ }
+ sb.append(" <td>").append(colStart).append(e.getDescription()).append(colEnd).append("</td>\n");
+ sb.append("</tr>\n");
+ }
+ sb.append("</table>\n");
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventLogInterface.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventLogInterface.java
new file mode 100644
index 00000000000..37d155b569f
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventLogInterface.java
@@ -0,0 +1,18 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.Node;
+
+import java.util.List;
+
+public interface EventLogInterface {
+
+ public void add(Event e);
+ public void add(Event e, boolean logInfo);
+ public void addNodeOnlyEvent(NodeEvent e, java.util.logging.Level level);
+ public int getNodeEventsSince(Node n, long time);
+ public long getRecentTimePeriod();
+ public void writeHtmlState(StringBuilder sb, Node node);
+ public void setMaxSize(int size, int nodesize);
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
new file mode 100644
index 00000000000..572e24bcb35
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -0,0 +1,794 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.ListenFailedException;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.listeners.*;
+import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator;
+import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer;
+import com.yahoo.vespa.clustercontroller.core.rpc.SlobrokClient;
+import com.yahoo.vespa.clustercontroller.core.status.*;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageResponse;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServerInterface;
+import com.yahoo.vespa.clustercontroller.utils.util.MetricReporter;
+import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter;
+import org.apache.commons.lang.exception.ExceptionUtils;
+
+import java.io.FileNotFoundException;
+import java.util.*;
+import java.util.logging.Logger;
+
+public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAddedOrRemovedListener, SystemStateListener,
+ Runnable, RemoteClusterControllerTaskScheduler {
+
+ private static Logger log = Logger.getLogger(FleetController.class.getName());
+
+ private final Timer timer;
+ private final Object monitor;
+ private final EventLog eventLog;
+ private final NodeLookup nodeLookup;
+ private final ContentCluster cluster;
+ private final Communicator communicator;
+ private final NodeStateGatherer stateGatherer;
+ private final SystemStateGenerator systemStateGenerator;
+ private final SystemStateBroadcaster systemStateBroadcaster;
+ private final StatusPageServerInterface statusPageServer;
+ private final RpcServer rpcServer;
+ private final DatabaseHandler database;
+ private final MasterElectionHandler masterElectionHandler;
+ private Thread runner = null;
+ private boolean running = true;
+ private FleetControllerOptions options;
+ private FleetControllerOptions nextOptions;
+ private final List<SystemStateListener> systemStateListeners = new LinkedList<>();
+ private boolean processingCycle = false;
+ private boolean wantedStateChanged = false;
+ private long cycleCount = 0;
+ private long nextStateSendTime = 0;
+ private Long controllerThreadId = null;
+
+ private boolean waitingForCycle = false;
+ private StatusPageServer.PatternRequestRouter statusRequestRouter = new StatusPageServer.PatternRequestRouter();
+ private final List<com.yahoo.vdslib.state.ClusterState> newStates = new ArrayList<>();
+ private long configGeneration = -1;
+ private long nextConfigGeneration = -1;
+ private List<RemoteClusterControllerTask> remoteTasks = new ArrayList<>();
+ private final MetricUpdater metricUpdater;
+
+ private boolean isMaster = false;
+ private boolean isStateGatherer = false;
+ private long firstAllowedStateBroadcast = Long.MAX_VALUE;
+ private long tickStartTime = Long.MAX_VALUE;
+
+ private final RunDataExtractor dataExtractor = new RunDataExtractor() {
+ @Override
+ public com.yahoo.vdslib.state.ClusterState getLatestClusterState() { return systemStateGenerator.getClusterState(); }
+ @Override
+ public FleetControllerOptions getOptions() { return options; }
+ @Override
+ public long getConfigGeneration() { return configGeneration; }
+ @Override
+ public ContentCluster getCluster() { return cluster; }
+ };
+
+ public FleetController(Timer timer,
+ EventLog eventLog,
+ ContentCluster cluster,
+ NodeStateGatherer nodeStateGatherer,
+ Communicator communicator,
+ StatusPageServerInterface statusPage,
+ RpcServer server,
+ NodeLookup nodeLookup,
+ DatabaseHandler database,
+ SystemStateGenerator systemStateGenerator,
+ SystemStateBroadcaster systemStateBroadcaster,
+ MasterElectionHandler masterElectionHandler,
+ MetricUpdater metricUpdater,
+ FleetControllerOptions options) throws Exception
+ {
+ log.info("Starting up cluster controller " + options.fleetControllerIndex + " for cluster " + cluster.getName());
+ this.timer = timer;
+ this.monitor = timer;
+ this.eventLog = eventLog;
+ this.options = options;
+ this.nodeLookup = nodeLookup;
+ this.cluster = cluster;
+ this.communicator = communicator;
+ this.database = database;
+ this.stateGatherer = nodeStateGatherer;
+ this.systemStateGenerator = systemStateGenerator;
+ this.systemStateBroadcaster = systemStateBroadcaster;
+ this.metricUpdater = metricUpdater;
+
+ this.statusPageServer = statusPage;
+ this.rpcServer = server;
+
+ this.masterElectionHandler = masterElectionHandler;
+
+ this.statusRequestRouter.addHandler(
+ "^/node=([a-z]+)\\.(\\d+)$",
+ new LegacyNodePageRequestHandler(timer, eventLog, cluster));
+ this.statusRequestRouter.addHandler(
+ "^/state.*",
+ new NodeHealthRequestHandler(dataExtractor));
+ this.statusRequestRouter.addHandler(
+ "^/clusterstate",
+ new ClusterStateRequestHandler(systemStateGenerator));
+ this.statusRequestRouter.addHandler(
+ "^/$",
+ new LegacyIndexPageRequestHandler(
+ timer, options.showLocalSystemStatesInEventLog, cluster,
+ masterElectionHandler, systemStateGenerator,
+ eventLog, timer.getCurrentTimeInMillis(), dataExtractor));
+
+ propagateOptions();
+ }
+
+ public static FleetController createForContainer(FleetControllerOptions options,
+ StatusPageServerInterface statusPageServer,
+ MetricReporter metricReporter) throws Exception {
+ Timer timer = new RealTimer();
+ return create(options, timer, statusPageServer, null, metricReporter);
+ }
+
+ public static FleetController createForStandAlone(FleetControllerOptions options) throws Exception {
+ Timer timer = new RealTimer();
+ RpcServer rpcServer = new RpcServer(timer, timer, options.clusterName, options.fleetControllerIndex, options.slobrokBackOffPolicy);
+ StatusPageServer statusPageServer = new StatusPageServer(timer, timer, options.httpPort);
+ return create(options, timer, statusPageServer, rpcServer, new NoMetricReporter());
+ }
+
+ private static FleetController create(FleetControllerOptions options,
+ Timer timer,
+ StatusPageServerInterface statusPageServer,
+ RpcServer rpcServer,
+ MetricReporter metricReporter) throws Exception
+ {
+ MetricUpdater metricUpdater = new MetricUpdater(metricReporter, options.fleetControllerIndex);
+ EventLog log = new EventLog(timer, metricUpdater);
+ ContentCluster cluster = new ContentCluster(
+ options.clusterName,
+ options.nodes,
+ options.storageDistribution,
+ options.minStorageNodesUp,
+ options.minRatioOfStorageNodesUp);
+ NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, log);
+ Communicator communicator = new RPCCommunicator(
+ timer,
+ options.fleetControllerIndex,
+ options.nodeStateRequestTimeoutMS,
+ options.nodeStateRequestTimeoutEarliestPercentage,
+ options.nodeStateRequestTimeoutLatestPercentage,
+ options.nodeStateRequestRoundTripTimeMaxSeconds);
+ DatabaseHandler database = new DatabaseHandler(timer, options.zooKeeperServerAddress, options.fleetControllerIndex, timer);
+ NodeLookup lookUp = new SlobrokClient(timer);
+ SystemStateGenerator stateGenerator = new SystemStateGenerator(timer, log, metricUpdater);
+ SystemStateBroadcaster stateBroadcaster = new SystemStateBroadcaster(timer, timer);
+ MasterElectionHandler masterElectionHandler = new MasterElectionHandler(options.fleetControllerIndex, options.fleetControllerCount, timer, timer);
+ FleetController controller = new FleetController(
+ timer, log, cluster, stateGatherer, communicator, statusPageServer, rpcServer, lookUp, database, stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options);
+ controller.start();
+ return controller;
+ }
+
+ public void start() {
+ runner = new Thread(this);
+ runner.start();
+ }
+
+ public Object getMonitor() { return monitor; }
+
+ public boolean isRunning() {
+ synchronized(monitor) {
+ return running;
+ }
+ }
+
+ public boolean isMaster() {
+ synchronized (monitor) {
+ return masterElectionHandler.isMaster();
+ }
+ }
+
+ public ClusterState getClusterState() {
+ synchronized (monitor) {
+ return systemStateBroadcaster.getClusterState();
+ }
+ }
+
+ public void schedule(RemoteClusterControllerTask task) {
+ synchronized (monitor) {
+ log.fine("Scheduled remote task " + task.getClass().getName() + " for execution");
+ remoteTasks.add(task);
+ }
+ }
+
+ /** Used for unit testing. */
+ public void addSystemStateListener(SystemStateListener listener) {
+ synchronized (systemStateListeners) {
+ systemStateListeners.add(listener);
+ // Always give cluster state listeners the current state, in case acceptable state has come before listener is registered.
+ com.yahoo.vdslib.state.ClusterState state = getSystemState();
+ if (state == null) throw new NullPointerException("Cluster state should never be null at this point");
+ listener.handleNewSystemState(state);
+ }
+ }
+
+ public FleetControllerOptions getOptions() {
+ synchronized(monitor) {
+ return options.clone();
+ }
+ }
+
+ public NodeState getReportedNodeState(Node n) {
+ synchronized(monitor) {
+ NodeInfo node = cluster.getNodeInfo(n);
+ if (node == null) {
+ throw new IllegalStateException("Did not find node " + n + " in cluster " + cluster);
+ }
+ return node.getReportedState();
+ }
+ }
+
+ // Only used in tests
+ public NodeState getWantedNodeState(Node n) {
+ synchronized(monitor) {
+ return cluster.getNodeInfo(n).getWantedState();
+ }
+ }
+
+ public com.yahoo.vdslib.state.ClusterState getSystemState() {
+ synchronized(monitor) {
+ return systemStateGenerator.getClusterState();
+ }
+ }
+
+ public int getHttpPort() { return statusPageServer.getPort(); }
+ public int getRpcPort() { return rpcServer.getPort(); }
+
+ public void shutdown() throws InterruptedException, java.io.IOException {
+ boolean isStillRunning = false;
+ synchronized(monitor) {
+ if (running) {
+ isStillRunning = true;
+ }
+ }
+ if (runner != null && isStillRunning) {
+ log.log(LogLevel.INFO, "Joining event thread.");
+ running = false;
+ runner.interrupt();
+ runner.join();
+ }
+ log.log(LogLevel.INFO, "Fleetcontroller done shutting down event thread.");
+ controllerThreadId = Thread.currentThread().getId();
+ database.shutdown(this);
+
+ if (statusPageServer != null) {
+ statusPageServer.shutdown();
+ }
+ if (rpcServer != null) {
+ rpcServer.shutdown();
+ }
+ communicator.shutdown();
+ nodeLookup.shutdown();
+ }
+
+ public void updateOptions(FleetControllerOptions options, long configGeneration) {
+ synchronized(monitor) {
+ assert(this.options.fleetControllerIndex == options.fleetControllerIndex);
+ log.log(LogLevel.INFO, "Fleetcontroller " + options.fleetControllerIndex + " has new options");
+ nextOptions = options.clone();
+ nextConfigGeneration = configGeneration;
+ monitor.notifyAll();
+ }
+ }
+
+ private void verifyInControllerThread() {
+ if (controllerThreadId != null && controllerThreadId != Thread.currentThread().getId()) {
+ throw new IllegalStateException("Function called from non-controller thread. Shouldn't happen.");
+ }
+ }
+
+ @Override
+ public void handleNewNodeState(NodeInfo node, NodeState newState) {
+ verifyInControllerThread();
+ systemStateGenerator.handleNewReportedNodeState(node, newState, this);
+ }
+
+ @Override
+ public void handleNewWantedNodeState(NodeInfo node, NodeState newState) {
+ verifyInControllerThread();
+ wantedStateChanged = true;
+ systemStateGenerator.proposeNewNodeState(node, newState);
+ }
+
+ @Override
+ public void handleUpdatedHostInfo(NodeInfo nodeInfo, HostInfo newHostInfo) {
+ verifyInControllerThread();
+ systemStateGenerator.handleUpdatedHostInfo(nodeInfo, newHostInfo);
+ }
+
+ @Override
+ public void handleNewNode(NodeInfo node) {
+ verifyInControllerThread();
+ systemStateGenerator.handleNewNode(node);
+ }
+ @Override
+ public void handleMissingNode(NodeInfo node) {
+ verifyInControllerThread();
+ systemStateGenerator.handleMissingNode(node, this);
+ }
+ @Override
+ public void handleNewRpcAddress(NodeInfo node) {
+ verifyInControllerThread();
+ systemStateGenerator.handleNewRpcAddress(node);
+ }
+ @Override
+ public void handleReturnedRpcAddress(NodeInfo node) {
+ verifyInControllerThread();
+ systemStateGenerator.handleReturnedRpcAddress(node);
+ }
+
+ public void handleNewSystemState(com.yahoo.vdslib.state.ClusterState state) {
+ verifyInControllerThread();
+ newStates.add(state);
+ metricUpdater.updateClusterStateMetrics(cluster, state);
+ systemStateBroadcaster.handleNewSystemState(state);
+ }
+
+ /**
+ * This function gives data of the current state in master election.
+ * The keys in the given map are indexes of fleet controllers.
+ * The values are what fleetcontroller that fleetcontroller wants to
+ * become master.
+ *
+ * If more than half the fleetcontrollers want a node to be master and
+ * that node also wants itself as master, that node is the single master.
+ * If this condition is not met, there is currently no master.
+ */
+ public void handleFleetData(Map<Integer, Integer> data) {
+ verifyInControllerThread();
+ log.log(LogLevel.SPAM, "Sending fleet data event on to master election handler");
+ metricUpdater.updateMasterElectionMetrics(data);
+ masterElectionHandler.handleFleetData(data);
+ }
+
+ /**
+ * Called when we can no longer contact database.
+ */
+ public void lostDatabaseConnection() {
+ verifyInControllerThread();
+ masterElectionHandler.lostDatabaseConnection();
+ }
+
+ /** Called when all distributors have acked newest cluster state version. */
+ public void handleAllDistributorsInSync(DatabaseHandler database, DatabaseHandler.Context context) throws InterruptedException {
+ systemStateGenerator.handleAllDistributorsInSync(database, context);
+ }
+
+ private boolean changesConfiguredNodeSet(Collection<ConfiguredNode> newNodes) {
+ if (newNodes.size() != cluster.getConfiguredNodes().size()) return true;
+ if (! cluster.getConfiguredNodes().values().containsAll(newNodes)) return true;
+
+ // Check retirement changes
+ for (ConfiguredNode node : newNodes) {
+ if (node.retired() != cluster.getConfiguredNodes().get(node.index()).retired())
+ return true;
+ }
+
+ return false;
+ }
+
+ /** This is called when the options field has been set to a new set of options */
+ private void propagateOptions() throws java.io.IOException, ListenFailedException {
+ verifyInControllerThread();
+
+ if (changesConfiguredNodeSet(options.nodes)) {
+ // Force slobrok node re-fetch in case of changes to the set of configured nodes
+ cluster.setSlobrokGenerationCount(0);
+ }
+
+ communicator.propagateOptions(options);
+
+ if (nodeLookup instanceof SlobrokClient)
+ ((SlobrokClient)nodeLookup).setSlobrokConnectionSpecs(options.slobrokConnectionSpecs);
+ eventLog.setMaxSize(options.eventLogMaxSize, options.eventNodeLogMaxSize);
+ cluster.setPollingFrequency(options.statePollingFrequency);
+ cluster.setDistribution(options.storageDistribution);
+ cluster.setNodes(options.nodes);
+ cluster.setMinRatioOfStorageNodesUp(options.minRatioOfStorageNodesUp);
+ cluster.setMinStorageNodesUp(options.minStorageNodesUp);
+ database.setZooKeeperAddress(options.zooKeeperServerAddress);
+ database.setZooKeeperSessionTimeout(options.zooKeeperSessionTimeout);
+ stateGatherer.setMaxSlobrokDisconnectGracePeriod(options.maxSlobrokDisconnectGracePeriod);
+ stateGatherer.setNodeStateRequestTimeout(options.nodeStateRequestTimeoutMS);
+ systemStateGenerator.setNodes(cluster.clusterInfo());
+ systemStateGenerator.setMaxTransitionTime(options.maxTransitionTime);
+ systemStateGenerator.setMaxInitProgressTime(options.maxInitProgressTime);
+ systemStateGenerator.setMaxPrematureCrashes(options.maxPrematureCrashes);
+ systemStateGenerator.setStableStateTimePeriod(options.stableStateTimePeriod);
+ systemStateGenerator.setMinNodesUp(options.minDistributorNodesUp, options.minStorageNodesUp,
+ options.minRatioOfDistributorNodesUp, options.minRatioOfStorageNodesUp);
+ systemStateGenerator.setMaxSlobrokDisconnectGracePeriod(options.maxSlobrokDisconnectGracePeriod);
+ systemStateGenerator.setDistributionBits(options.distributionBits);
+ masterElectionHandler.setFleetControllerCount(options.fleetControllerCount);
+ masterElectionHandler.setMasterZooKeeperCooldownPeriod(options.masterZooKeeperCooldownPeriod);
+
+ if (rpcServer != null) {
+ rpcServer.setMasterElectionHandler(masterElectionHandler);
+ try{
+ rpcServer.setSlobrokConnectionSpecs(options.slobrokConnectionSpecs, options.rpcPort);
+ } catch (ListenFailedException e) {
+ log.log(LogLevel.WARNING, "Failed to bind RPC server to port " + options.rpcPort +". This may be natural if cluster have altered the services running on this node: " + e.getMessage());
+ } catch (Exception e) {
+ log.log(LogLevel.WARNING, "Failed to initailize RPC server socket: " + e.getMessage());
+ }
+ }
+
+ if (statusPageServer != null) {
+ try{
+ statusPageServer.setPort(options.httpPort);
+ } catch (Exception e) {
+ log.log(LogLevel.WARNING, "Failed to initialize status server socket. This may be natural if cluster have altered the services running on this node: " + e.getMessage());
+ }
+ }
+
+ long currentTime = timer.getCurrentTimeInMillis();
+ nextStateSendTime = Math.min(currentTime + options.minTimeBetweenNewSystemStates, nextStateSendTime);
+ configGeneration = nextConfigGeneration;
+ nextConfigGeneration = -1;
+ }
+
+ public StatusPageResponse fetchStatusPage(StatusPageServer.HttpRequest httpRequest) {
+ verifyInControllerThread();
+ StatusPageResponse.ResponseCode responseCode;
+ String message;
+ String hiddenMessage = "";
+ try {
+ StatusPageServer.RequestHandler handler = statusRequestRouter.resolveHandler(httpRequest);
+ if (handler == null) {
+ throw new FileNotFoundException("No handler found for request: " + httpRequest.getPath());
+ }
+ return handler.handle(httpRequest);
+ } catch (FileNotFoundException e) {
+ responseCode = StatusPageResponse.ResponseCode.NOT_FOUND;
+ message = e.getMessage();
+ } catch (Exception e) {
+ responseCode = StatusPageResponse.ResponseCode.INTERNAL_SERVER_ERROR;
+ message = "Internal Server Error";
+ hiddenMessage = ExceptionUtils.getStackTrace(e);
+ log.log(LogLevel.DEBUG, "Unknown exception thrown for request " + httpRequest.getRequest() +
+ ": " + hiddenMessage);
+ }
+
+ TimeZone tz = TimeZone.getTimeZone("UTC");
+ long currentTime = timer.getCurrentTimeInMillis();
+ StatusPageResponse response = new StatusPageResponse();
+ StringBuilder content = new StringBuilder();
+ response.setContentType("text/html");
+ response.setResponseCode(responseCode);
+ content.append("<!-- Answer to request " + httpRequest.getRequest() + " -->\n");
+ content.append("<p>UTC time when creating this page: ").append(RealTimer.printDateNoMilliSeconds(currentTime, tz)).append("</p>");
+ response.writeHtmlHeader(content, message);
+ response.writeHtmlFooter(content, hiddenMessage);
+ response.writeContent(content.toString());
+
+ return response;
+ }
+
+ public void tick() throws Exception {
+ synchronized (monitor) {
+ boolean didWork;
+ didWork = database.doNextZooKeeperTask(databaseContext);
+ didWork |= updateMasterElectionState();
+ didWork |= handleLeadershipEdgeTransitions();
+ systemStateGenerator.setMaster(isMaster);
+
+ // Process zero or more getNodeState responses that we have received.
+ didWork |= stateGatherer.processResponses(this);
+
+ if (masterElectionHandler.isAmongNthFirst(options.stateGatherCount)) {
+ didWork |= resyncLocallyCachedState();
+ } else {
+ stepDownAsStateGatherer();
+ }
+
+ didWork |= systemStateBroadcaster.processResponses();
+ if (masterElectionHandler.isMaster()) {
+ didWork |= broadcastClusterStateToEligibleNodes();
+
+ }
+
+ didWork |= processAnyPendingStatusPageRequest();
+
+ if (rpcServer != null) {
+ didWork |= rpcServer.handleRpcRequests(cluster, systemStateGenerator.getClusterState(), this, this);
+ }
+
+ processAllQueuedRemoteTasks();
+
+ processingCycle = false;
+ ++cycleCount;
+ long tickStopTime = timer.getCurrentTimeInMillis();
+ if (tickStopTime >= tickStartTime)
+ metricUpdater.addTickTime(tickStopTime - tickStartTime, didWork);
+ if ( ! didWork && ! waitingForCycle)
+ monitor.wait(options.cycleWaitTime);
+ tickStartTime = timer.getCurrentTimeInMillis();
+ processingCycle = true;
+ if (nextOptions != null) { // if reconfiguration has given us new options, propagate them
+ switchToNewConfig();
+ }
+ }
+
+ propagateNewStatesToListeners();
+ }
+
+ private boolean updateMasterElectionState() throws InterruptedException {
+ try {
+ return masterElectionHandler.watchMasterElection(database, databaseContext);
+ } catch (InterruptedException e) {
+ throw (InterruptedException) new InterruptedException("Interrupted").initCause(e);
+ } catch (Exception e) {
+ log.log(LogLevel.WARNING, "Failed to watch master election: " + e.toString());
+ }
+ return false;
+ }
+
+ private void stepDownAsStateGatherer() {
+ if (isStateGatherer) {
+ cluster.clearStates(); // Remove old states that we are no longer certain of as we stop gathering information
+ eventLog.add(new ClusterEvent(ClusterEvent.Type.MASTER_ELECTION, "This node is no longer a node state gatherer.", timer.getCurrentTimeInMillis()));
+ }
+ isStateGatherer = false;
+ }
+
+ private void switchToNewConfig() {
+ options = nextOptions;
+ nextOptions = null;
+ try {
+ propagateOptions();
+ } catch (Exception e) {
+ log.log(LogLevel.ERROR, "Failed to handle new fleet controller config", e);
+ }
+ }
+
+ private boolean processAnyPendingStatusPageRequest() {
+ if (statusPageServer != null) {
+ StatusPageServer.HttpRequest statusRequest = statusPageServer.getCurrentHttpRequest();
+ if (statusRequest != null) {
+ statusPageServer.answerCurrentStatusRequest(fetchStatusPage(statusRequest));
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean broadcastClusterStateToEligibleNodes() throws InterruptedException {
+ boolean sentAny = false;
+ // Give nodes a fair chance to respond first time to state gathering requests, so we don't
+ // disturb system when we take over. Allow anyways if we have states from all nodes.
+ long currentTime = timer.getCurrentTimeInMillis();
+ if ((currentTime >= firstAllowedStateBroadcast || cluster.allStatesReported())
+ && currentTime >= nextStateSendTime)
+ {
+ if (currentTime < firstAllowedStateBroadcast) {
+ log.log(LogLevel.DEBUG, "Not set to broadcast states just yet, but as we have gotten info from all nodes we can do so safely.");
+ // Reset timer to only see warning once.
+ firstAllowedStateBroadcast = currentTime;
+ }
+ sentAny = systemStateBroadcaster.broadcastNewState(database, databaseContext, communicator, this);
+ if (sentAny) {
+ nextStateSendTime = currentTime + options.minTimeBetweenNewSystemStates;
+ }
+ }
+ return sentAny;
+ }
+
+ private void propagateNewStatesToListeners() {
+ if ( ! newStates.isEmpty()) {
+ synchronized (systemStateListeners) {
+ for (ClusterState state : newStates) {
+ for(SystemStateListener listener : systemStateListeners) {
+ listener.handleNewSystemState(state);
+ }
+ }
+ newStates.clear();
+ }
+ }
+ }
+
+ private void processAllQueuedRemoteTasks() {
+ if ( ! remoteTasks.isEmpty()) {
+ RemoteClusterControllerTask.Context context = new RemoteClusterControllerTask.Context();
+ context.cluster = cluster;
+ context.currentState = systemStateGenerator.getConsolidatedClusterState();
+ context.masterInfo = masterElectionHandler;
+ context.nodeStateOrHostInfoChangeHandler = this;
+ context.nodeAddedOrRemovedListener = this;
+ for (RemoteClusterControllerTask task : remoteTasks) {
+ log.finest("Processing remote task " + task.getClass().getName());
+ task.doRemoteFleetControllerTask(context);
+ task.notifyCompleted();
+ log.finest("Done processing remote task " + task.getClass().getName());
+ }
+ log.fine("Completed processing remote tasks");
+ remoteTasks.clear();
+ }
+ }
+
+ private boolean resyncLocallyCachedState() throws InterruptedException {
+ boolean didWork = false;
+ // Let non-master state gatherers update wanted states once in a while, so states generated and shown are close to valid.
+ if ( ! isMaster && cycleCount % 100 == 0) {
+ didWork = database.loadWantedStates(databaseContext);
+ didWork |= database.loadStartTimestamps(cluster);
+ }
+ // If we have new slobrok information, update our cluster.
+ didWork |= nodeLookup.updateCluster(cluster, this);
+
+ // Send getNodeState requests to zero or more nodes.
+ didWork |= stateGatherer.sendMessages(cluster, communicator, this);
+ didWork |= systemStateGenerator.watchTimers(cluster, this);
+ didWork |= systemStateGenerator.notifyIfNewSystemState(this);
+
+ if ( ! isStateGatherer) {
+ if ( ! isMaster) {
+ eventLog.add(new ClusterEvent(ClusterEvent.Type.MASTER_ELECTION, "This node just became node state gatherer as we are fleetcontroller master candidate.", timer.getCurrentTimeInMillis()));
+ // Update versions to use so what is shown is closer to what is reality on the master
+ systemStateGenerator.setLatestSystemStateVersion(database.getLatestSystemStateVersion());
+ }
+ }
+ isStateGatherer = true;
+ return didWork;
+ }
+
+ private boolean handleLeadershipEdgeTransitions() throws InterruptedException {
+ boolean didWork = false;
+ if (masterElectionHandler.isMaster()) {
+ if ( ! isMaster) {
+ metricUpdater.becameMaster();
+ // If we just became master, restore wanted states from database
+ systemStateGenerator.setLatestSystemStateVersion(database.getLatestSystemStateVersion());
+ didWork = database.loadStartTimestamps(cluster);
+ didWork |= database.loadWantedStates(databaseContext);
+ eventLog.add(new ClusterEvent(ClusterEvent.Type.MASTER_ELECTION, "This node just became fleetcontroller master. Bumped version to "
+ + systemStateGenerator.getClusterState().getVersion() + " to be in line.", timer.getCurrentTimeInMillis()));
+ long currentTime = timer.getCurrentTimeInMillis();
+ firstAllowedStateBroadcast = currentTime + options.minTimeBeforeFirstSystemStateBroadcast;
+ log.log(LogLevel.DEBUG, "At time " + currentTime + " we set first system state broadcast time to be "
+ + options.minTimeBeforeFirstSystemStateBroadcast + " ms after at time " + firstAllowedStateBroadcast + ".");
+ }
+ isMaster = true;
+ if (wantedStateChanged) {
+ database.saveWantedStates(databaseContext);
+ wantedStateChanged = false;
+ }
+ } else {
+ if (isMaster) {
+ eventLog.add(new ClusterEvent(ClusterEvent.Type.MASTER_ELECTION, "This node is no longer fleetcontroller master.", timer.getCurrentTimeInMillis()));
+ firstAllowedStateBroadcast = Long.MAX_VALUE;
+ metricUpdater.noLongerMaster();
+ }
+ wantedStateChanged = false;
+ isMaster = false;
+ }
+ return didWork;
+ }
+
+ public void run() {
+ controllerThreadId = Thread.currentThread().getId();
+ try {
+ processingCycle = true;
+ while(running)
+ tick();
+ } catch (InterruptedException e) {
+ log.log(LogLevel.DEBUG, "Event thread stopped by interrupt exception: " + e);
+ } catch (Throwable t) {
+ log.log(LogLevel.ERROR, "Fatal error killed fleet controller", t);
+ synchronized (monitor) { running = false; }
+ System.exit(1);
+ }
+ }
+
+ public DatabaseHandler.Context databaseContext = new DatabaseHandler.Context() {
+ @Override
+ public ContentCluster getCluster() { return cluster; }
+ @Override
+ public FleetController getFleetController() { return FleetController.this; }
+ @Override
+ public NodeAddedOrRemovedListener getNodeAddedOrRemovedListener() { return FleetController.this; }
+ @Override
+ public NodeStateOrHostInfoChangeHandler getNodeStateUpdateListener() { return FleetController.this; }
+ };
+
+ public void waitForCompleteCycle(long timeoutMS) {
+ long endTime = System.currentTimeMillis() + timeoutMS;
+ synchronized (monitor) {
+ // To wait at least one complete cycle, if a cycle is already running we need to wait for the next one beyond.
+ long wantedCycle = cycleCount + (processingCycle ? 2 : 1);
+ waitingForCycle = true;
+ try{
+ while (cycleCount < wantedCycle) {
+ if (System.currentTimeMillis() > endTime) throw new IllegalStateException("Timed out waiting for cycle to complete. Not completed after " + timeoutMS + " ms.");
+ if (!running) throw new IllegalStateException("Fleetcontroller not running. Will never complete cycles");
+ try{ monitor.wait(100); } catch (InterruptedException e) {}
+ }
+ } finally {
+ waitingForCycle = false;
+ }
+ }
+ }
+
+ /**
+ * This function might not be 100% threadsafe, as in theory cluster can be changing while accessed.
+ * But it is only used in unit tests that should not trigger any thread issues. Don't want to add locks that reduce
+ * live performance to remove a non-problem.
+ */
+ public void waitForNodesHavingSystemStateVersionEqualToOrAbove(int version, int nodeCount, int timeout) throws InterruptedException {
+ long maxTime = System.currentTimeMillis() + timeout;
+ synchronized (monitor) {
+ while (true) {
+ int ackedNodes = 0;
+ for (NodeInfo node : cluster.getNodeInfo()) {
+ if (node.getSystemStateVersionAcknowledged() >= version) {
+ ++ackedNodes;
+ }
+ }
+ if (ackedNodes >= nodeCount) {
+ log.log(LogLevel.INFO, ackedNodes + " nodes now have acked system state " + version + " or higher.");
+ return;
+ }
+ long remainingTime = maxTime - System.currentTimeMillis();
+ if (remainingTime <= 0) {
+ throw new IllegalStateException("Did not get " + nodeCount + " nodes to system state " + version + " within timeout of " + timeout + " milliseconds.");
+ }
+ monitor.wait(10);
+ }
+ }
+ }
+
+ public void waitForNodesInSlobrok(int distNodeCount, int storNodeCount, int timeoutMillis) throws InterruptedException {
+ long maxTime = System.currentTimeMillis() + timeoutMillis;
+ synchronized (monitor) {
+ while (true) {
+ int distCount = 0, storCount = 0;
+ for (NodeInfo info : cluster.getNodeInfo()) {
+ if (!info.isRpcAddressOutdated()) {
+ if (info.isDistributor()) ++distCount;
+ else ++storCount;
+ }
+ }
+ if (distCount == distNodeCount && storCount == storNodeCount) return;
+
+ long remainingTime = maxTime - System.currentTimeMillis();
+ if (remainingTime <= 0) {
+ throw new IllegalStateException("Did not get all " + distNodeCount + " distributors and " + storNodeCount
+ + " storage nodes registered in slobrok within timeout of " + timeoutMillis + " ms. (Got "
+ + distCount + " distributors and " + storCount + " storage nodes)");
+ }
+ monitor.wait(10);
+ }
+ }
+ }
+
+ public boolean hasZookeeperConnection() { return !database.isClosed(); }
+
+ // Used by unit tests.
+ public int getSlobrokMirrorUpdates() { return ((SlobrokClient)nodeLookup).getMirror().updates(); }
+
+ public ContentCluster getCluster() { return cluster; }
+
+ public List<NodeEvent> getNodeEvents(Node n) { return eventLog.getNodeEvents(n); }
+
+ public EventLog getEventLog() {
+ return eventLog;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
new file mode 100644
index 00000000000..f18fa6d3a9b
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -0,0 +1,210 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.slobrok.api.BackOffPolicy;
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+
+import java.util.*;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+
+/**
+ * This class represents all the options that can be set in the fleetcontroller.
+ * Tests typically just generate an instance of this object to use in fleet controller for testing.
+ * A real application generate this object from config, and on config updates, post new options to the fleet controller.
+ */
+public class FleetControllerOptions implements Cloneable {
+
+ // TODO: Make fields private
+
+ public String fleetControllerConfigId;
+ public String slobrokConfigId;
+
+ public String clusterName;
+ public int fleetControllerIndex = 0;
+ public int fleetControllerCount = 1;
+ public int stateGatherCount = 2;
+
+ // TODO: This cannot be null but nonnull is not verified
+ public String slobrokConnectionSpecs[];
+ public int rpcPort = 0;
+ public int httpPort = 0;
+ public int distributionBits = 16;
+
+ /** Timeout before breaking zookeeper session (in milliseconds) */
+ public int zooKeeperSessionTimeout = 5 * 60 * 1000;
+ /**
+ * Timeout between master disappearing before new master will take over.
+ * (Grace period to allow old master to detect that it is disconnected from zookeeper)
+ */
+ public int masterZooKeeperCooldownPeriod = 15 * 1000;
+
+ public String zooKeeperServerAddress = null;
+
+ public int statePollingFrequency = 5000;
+ /**
+ * Max amount of time to keep a node, that has previously been available
+ * in steady state, in maintenance mode, while node is unreachable, before setting it down.
+ */
+ public Map<NodeType, Integer> maxTransitionTime = new TreeMap<>();
+
+ /**
+ * Max amount of time to keep a storage node, that is initializing, in maintenance mode, without any further
+ * initializing progress being received, before setting it down.
+ */
+ public int maxInitProgressTime = 5000;
+
+ public int maxPrematureCrashes = 4;
+ public long stableStateTimePeriod = 2 * 60 * 60 * 1000;
+
+ public int eventLogMaxSize = 1024;
+ public int eventNodeLogMaxSize = 1024;
+
+ public int minDistributorNodesUp = 1;
+ public int minStorageNodesUp = 1;
+ public double minRatioOfDistributorNodesUp = 0.50;
+ public double minRatioOfStorageNodesUp = 0.50;
+
+ /**
+ * Milliseconds to sleep after doing a work cycle where we did no work. Some events do not interrupt the sleeping,
+ * such as slobrok changes, so shouldn't set this too high.
+ */
+ public int cycleWaitTime = 100;
+ /**
+ * Minimum time to pass (in milliseconds) before broadcasting our first systemstate. Set small in unit tests,
+ * but should be a few seconds in a real system to prevent new nodes taking over from disturbing the system by
+ * putting out a different systemstate just because all nodes don't answer witihin a single cycle.
+ * If all nodes have reported before this time, the min time is ignored and system state is broadcasted.
+ */
+ public long minTimeBeforeFirstSystemStateBroadcast = 0;
+
+ /**
+ * StateRequestTimeout for the request are randomized a bit to avoid congestion on replies. The effective
+ * interval is
+ * [nodeStateRequestTimeoutEarliestPercentage * nodeStateRequestTimeoutMS / 100,
+ * nodeStateRequestTimeoutLatestPercentage * nodeStateRequestTimeoutMS / 100].
+ */
+ public int nodeStateRequestTimeoutMS = 5 * 60 * 1000;
+ public int nodeStateRequestTimeoutEarliestPercentage = 80;
+ public int nodeStateRequestTimeoutLatestPercentage = 95;
+ public int nodeStateRequestRoundTripTimeMaxSeconds = 5;
+
+ public int minTimeBetweenNewSystemStates = 0;
+ public boolean showLocalSystemStatesInEventLog = true;
+
+ /** Maximum time a node can be missing from slobrok before it is tagged down. */
+ public int maxSlobrokDisconnectGracePeriod = 1000;
+
+ /** Set by tests to retry often. */
+ public BackOffPolicy slobrokBackOffPolicy = null;
+
+ public Distribution storageDistribution;
+
+ // TODO: Get rid of this by always getting nodes by distribution.getNodes()
+ public Set<ConfiguredNode> nodes;
+
+ // TODO: Replace usage of this by usage where the nodes are explicitly passed (below)
+ public FleetControllerOptions(String clusterName) {
+ this.clusterName = clusterName;
+ maxTransitionTime.put(NodeType.DISTRIBUTOR, 0);
+ maxTransitionTime.put(NodeType.STORAGE, 5000);
+ nodes = new TreeSet<>();
+ for (int i = 0; i < 10; i++)
+ nodes.add(new ConfiguredNode(i, false));
+ }
+
+ public FleetControllerOptions(String clusterName, Collection<ConfiguredNode> nodes) {
+ this.clusterName = clusterName;
+ maxTransitionTime.put(NodeType.DISTRIBUTOR, 0);
+ maxTransitionTime.put(NodeType.STORAGE, 5000);
+ this.nodes = new TreeSet<>(nodes);
+ }
+
+ /** Called on reconfiguration of this cluster */
+ public void setStorageDistribution(Distribution distribution) {
+ this.storageDistribution = distribution;
+ this.nodes = distribution.getNodes();
+ }
+
+ public FleetControllerOptions clone() {
+ try {
+ // TODO: This should deep clone
+ return (FleetControllerOptions) super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException("Will not happen");
+ }
+ }
+
+ public static String splitZooKeeperAddress(String s) {
+ StringBuilder sb = new StringBuilder();
+ while (true) {
+ int index = s.indexOf(',');
+ if (index > 0) {
+ sb.append(s.substring(0, index + 1)).append(' ');
+ s = s.substring(index+1);
+ } else {
+ break;
+ }
+ }
+ sb.append(s);
+ return sb.toString();
+ }
+
+ static DecimalFormat DecimalDot2 = new DecimalFormat("0.00", new DecimalFormatSymbols(Locale.ENGLISH));
+
+ public void writeHtmlState(StringBuilder sb, StatusPageServer.HttpRequest request) {
+ String slobrokspecs = "";
+ for (int i=0; i<slobrokConnectionSpecs.length; ++i) {
+ if (i != 0) slobrokspecs += "<br>";
+ slobrokspecs += slobrokConnectionSpecs[i];
+ }
+ sb.append("<h1>Current config</h1>\n")
+ .append("<p>Fleet controller config id: ").append(fleetControllerConfigId == null ? null : fleetControllerConfigId.replaceAll("\n", "<br>\n")).append("</p>\n")
+ .append("<p>Slobrok config id: ").append(slobrokConfigId == null ? null : slobrokConfigId.replaceAll("\n", "<br>\n")).append("</p>\n")
+ .append("<table border=\"1\" cellspacing=\"0\"><tr><th>Property</th><th>Value</th></tr>\n");
+
+ sb.append("<tr><td><nobr>Cluster name</nobr></td><td align=\"right\">").append(clusterName).append("</td></tr>");
+ sb.append("<tr><td><nobr>Fleet controller index</nobr></td><td align=\"right\">").append(fleetControllerIndex).append("/").append(fleetControllerCount).append("</td></tr>");
+ sb.append("<tr><td><nobr>Number of fleetcontrollers gathering states from nodes</nobr></td><td align=\"right\">").append(stateGatherCount).append("</td></tr>");
+
+ sb.append("<tr><td><nobr>Slobrok connection spec</nobr></td><td align=\"right\">").append(slobrokspecs).append("</td></tr>");
+ sb.append("<tr><td><nobr>RPC port</nobr></td><td align=\"right\">").append(rpcPort == 0 ? "Pick random available" : rpcPort).append("</td></tr>");
+ sb.append("<tr><td><nobr>HTTP port</nobr></td><td align=\"right\">").append(httpPort == 0 ? "Pick random available" : httpPort).append("</td></tr>");
+ sb.append("<tr><td><nobr>Master cooldown period</nobr></td><td align=\"right\">").append(RealTimer.printDuration(masterZooKeeperCooldownPeriod)).append("</td></tr>");
+ String zooKeeperAddress = (zooKeeperServerAddress == null ? "Not using Zookeeper" : splitZooKeeperAddress(zooKeeperServerAddress));
+ sb.append("<tr><td><nobr>Zookeeper server address</nobr></td><td align=\"right\">").append(zooKeeperAddress).append("</td></tr>");
+ sb.append("<tr><td><nobr>Zookeeper session timeout</nobr></td><td align=\"right\">").append(RealTimer.printDuration(zooKeeperSessionTimeout)).append("</td></tr>");
+
+ sb.append("<tr><td><nobr>Cycle wait time</nobr></td><td align=\"right\">").append(cycleWaitTime).append(" ms</td></tr>");
+ sb.append("<tr><td><nobr>Minimum time before first clusterstate broadcast as master</nobr></td><td align=\"right\">").append(RealTimer.printDuration(minTimeBeforeFirstSystemStateBroadcast)).append("</td></tr>");
+ sb.append("<tr><td><nobr>Minimum time between official cluster states</nobr></td><td align=\"right\">").append(RealTimer.printDuration(minTimeBetweenNewSystemStates)).append("</td></tr>");
+ sb.append("<tr><td><nobr>Slobrok mirror backoff policy</nobr></td><td align=\"right\">").append(slobrokBackOffPolicy == null ? "default" : "overridden").append("</td></tr>");
+
+ sb.append("<tr><td><nobr>Node state request timeout</nobr></td><td align=\"right\">").append(RealTimer.printDuration(nodeStateRequestTimeoutMS)).append("</td></tr>");
+ sb.append("<tr><td><nobr>VDS 4.1 node state polling frequency</nobr></td><td align=\"right\">").append(RealTimer.printDuration(statePollingFrequency)).append("</td></tr>");
+ sb.append("<tr><td><nobr>Maximum distributor transition time</nobr></td><td align=\"right\">").append(RealTimer.printDuration(maxTransitionTime.get(NodeType.DISTRIBUTOR))).append("</td></tr>");
+ sb.append("<tr><td><nobr>Maximum storage transition time</nobr></td><td align=\"right\">").append(RealTimer.printDuration(maxTransitionTime.get(NodeType.STORAGE))).append("</td></tr>");
+ sb.append("<tr><td><nobr>Maximum initialize without progress time</nobr></td><td align=\"right\">").append(RealTimer.printDuration(maxInitProgressTime)).append("</td></tr>");
+ sb.append("<tr><td><nobr>Maximum premature crashes</nobr></td><td align=\"right\">").append(maxPrematureCrashes).append("</td></tr>");
+ sb.append("<tr><td><nobr>Stable state time period</nobr></td><td align=\"right\">").append(RealTimer.printDuration(stableStateTimePeriod)).append("</td></tr>");
+ sb.append("<tr><td><nobr>Slobrok disconnect grace period</nobr></td><td align=\"right\">").append(RealTimer.printDuration(maxSlobrokDisconnectGracePeriod)).append("</td></tr>");
+
+ sb.append("<tr><td><nobr>Number of distributor nodes</nobr></td><td align=\"right\">").append(nodes == null ? "Autodetect" : nodes.size()).append("</td></tr>");
+ sb.append("<tr><td><nobr>Number of storage nodes</nobr></td><td align=\"right\">").append(nodes == null ? "Autodetect" : nodes.size()).append("</td></tr>");
+ sb.append("<tr><td><nobr>Minimum distributor nodes being up for cluster to be up</nobr></td><td align=\"right\">").append(minDistributorNodesUp).append("</td></tr>");
+ sb.append("<tr><td><nobr>Minimum storage nodes being up for cluster to be up</nobr></td><td align=\"right\">").append(minStorageNodesUp).append("</td></tr>");
+ sb.append("<tr><td><nobr>Minimum percentage of distributor nodes being up for cluster to be up</nobr></td><td align=\"right\">").append(DecimalDot2.format(100 * minRatioOfDistributorNodesUp)).append(" %</td></tr>");
+ sb.append("<tr><td><nobr>Minimum percentage of storage nodes being up for cluster to be up</nobr></td><td align=\"right\">").append(DecimalDot2.format(100 * minRatioOfStorageNodesUp)).append(" %</td></tr>");
+
+ sb.append("<tr><td><nobr>Show local cluster state changes</nobr></td><td align=\"right\">").append(showLocalSystemStatesInEventLog).append("</td></tr>");
+ sb.append("<tr><td><nobr>Maximum event log size</nobr></td><td align=\"right\">").append(eventLogMaxSize).append("</td></tr>");
+ sb.append("<tr><td><nobr>Maximum node event log size</nobr></td><td align=\"right\">").append(eventNodeLogMaxSize).append("</td></tr>");
+ sb.append("<tr><td><nobr>Wanted distribution bits</nobr></td><td align=\"right\">").append(distributionBits).append("</td></tr>");
+
+ sb.append("</table>");
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTask.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTask.java
new file mode 100644
index 00000000000..1c717f14930
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTask.java
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+/**
+ * Represents a task that the fleet controller should perform.
+ */
+public interface FleetControllerTask {
+ public void execute(FleetController fleetController);
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GetNodeStateRequest.java
new file mode 100644
index 00000000000..5ff858079b1
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GetNodeStateRequest.java
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+/**
+ * Represents an abstract request sent from the fleet controller to the controlled nodes to get state.
+ */
+public abstract class GetNodeStateRequest {
+
+ private final NodeInfo nodeInfo;
+ private Reply reply;
+
+ public GetNodeStateRequest(NodeInfo nodeInfo) {
+ this.nodeInfo = nodeInfo;
+ }
+
+ public Reply getReply() {
+ return reply;
+ }
+
+ /** Called when the reply to this request becomes available. */
+ // TODO: The request shouldn't have this, of course
+ public void setReply(Reply reply) { this.reply = reply; }
+
+ public NodeInfo getNodeInfo() { return nodeInfo; }
+
+ public abstract void abort();
+
+ public static class Reply {
+
+ private final int returnCode;
+ private final String returnMessage;
+ private final String stateString;
+ private final String hostInfo;
+
+ /** Create a failure reply */
+ public Reply(int returnCode, String errorMessage) {
+ this.returnCode = returnCode;
+ this.returnMessage = errorMessage;
+ this.stateString = null;
+ this.hostInfo = null;
+ }
+
+ /** Create a successful reply */
+ public Reply(String stateString, String hostInfo) {
+ this.returnCode = 0;
+ this.returnMessage = null;
+ this.stateString = stateString;
+ this.hostInfo = hostInfo;
+ }
+
+ /** Returns the return code, which is 0 on success */
+ public int getReturnCode() { return returnCode; }
+
+ /** Returns the returned error message, or null on success */
+ public String getReturnMessage() { return returnMessage; }
+
+ /** Returns the state string, or null if this request failed */
+ public String getStateString() { return stateString; }
+
+ /** Returns the host info, or null if this request failed */
+ public String getHostInfo() { return hostInfo; }
+
+ /** Returns whether this request failed */
+ public boolean isError() { return returnCode != 0; }
+
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/LatencyStats.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/LatencyStats.java
new file mode 100644
index 00000000000..482bfbf6004
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/LatencyStats.java
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+/**
+ * LatencyStats handles adding latencies and counts.
+ * @author hakon
+ */
+public class LatencyStats {
+
+ private long latencyMsSum;
+ private long count;
+
+ public LatencyStats() { this(0, 0); }
+
+ /**
+ * @param latencyMsSum The sum of the latencies of all RPCs (or whatever) in milliseconds.
+ * @param count The number of RPC calls (or whatever).
+ */
+ public LatencyStats(long latencyMsSum, long count) {
+ this.latencyMsSum = latencyMsSum;
+ this.count = count;
+ }
+
+ void add(LatencyStats latencyToAdd) {
+ latencyMsSum += latencyToAdd.latencyMsSum;
+ count += latencyToAdd.count;
+ }
+
+ public long getLatencyMsSum() { return latencyMsSum; }
+ public long getCount() { return count; }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/LeafGroups.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/LeafGroups.java
new file mode 100644
index 00000000000..5de5fe65795
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/LeafGroups.java
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.Group;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class LeafGroups {
+
+ /**
+ * Return a list of all groups that do not themselves have any child groups,
+ * i.e. only the groups that contain nodes.
+ *
+ * The output order is not defined.
+ */
+ public static List<Group> enumerateFrom(Group root) {
+ List<Group> leaves = new ArrayList<>();
+ visitNode(root, leaves);
+ return leaves;
+ }
+
+ private static void visitNode(Group node, List<Group> leaves) {
+ if (node.isLeafGroup()) {
+ leaves.add(node);
+ } else {
+ node.getSubgroups().forEach((idx, g) -> visitNode(g, leaves));
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
new file mode 100644
index 00000000000..6c48bdf12d0
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
@@ -0,0 +1,287 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
+
+import java.util.Map;
+import java.util.logging.Logger;
+
+/**
+ * This class handles master election.
+ */
+public class MasterElectionHandler implements MasterInterface {
+
+ private static Logger log = Logger.getLogger(MasterElectionHandler.class.getName());
+
+ private final Object monitor;
+ private final Timer timer;
+ private int index;
+ private int totalCount;
+ private Integer masterCandidate; // The lowest indexed node in zookeeper
+ private int nextInLineCount; // Our position in line of the nodes in zookeeper
+ private int followers; // How many nodes are currently voting for the master candidate
+ private Map<Integer, Integer> masterData;
+ private Map<Integer, Integer> nextMasterData;
+ private long masterGoneFromZooKeeperTime; // Set to time master fleet controller disappears from zookeeper
+ private long masterZooKeeperCooldownPeriod; // The period in ms that we won't take over unless master come back.
+
+ public MasterElectionHandler(int index, int totalCount, Object monitor, Timer timer) {
+ this.monitor = monitor;
+ this.timer = timer;
+ this.index = index;
+ this.totalCount = totalCount;
+ this.nextInLineCount = Integer.MAX_VALUE;
+ // Only a given set of nodes can ever become master
+ if (index > (totalCount - 1) / 2) {
+ log.log(LogLevel.DEBUG, "Cluster controller " + index + ": We can never become master and will always stay a follower.");
+ }
+ // Tag current time as when we have not seen any other master. Make sure we're not taking over at once for master that is on the way down
+ masterGoneFromZooKeeperTime = timer.getCurrentTimeInMillis();
+ }
+
+ public void setFleetControllerCount(int count) {
+ totalCount = count;
+ if (count == 1) {
+ masterCandidate = 0;
+ followers = 1;
+ nextInLineCount = 0;
+ }
+ }
+
+ public void setMasterZooKeeperCooldownPeriod(int period) {
+ masterZooKeeperCooldownPeriod = period;
+ }
+
+ @Override
+ public boolean isMaster() {
+ Integer master = getMaster();
+ return (master == null ? false : master == index);
+ }
+
+ @Override
+ public Integer getMaster() {
+ // If too few followers there can be no master
+ if (2 * followers <= totalCount) {
+ return null;
+ }
+ // If all are following master candidate, it is master if it exists.
+ if (followers == totalCount) {
+ return masterCandidate;
+ }
+ // If not all are following we only accept master candidate if old master
+ // disappeared sufficient time ago
+ if (masterGoneFromZooKeeperTime + masterZooKeeperCooldownPeriod > timer.getCurrentTimeInMillis()) {
+ return null;
+ }
+ return masterCandidate;
+ }
+
+ public String getMasterReason() {
+ if (masterCandidate == null) {
+ return "There is currently no master candidate.";
+ }
+ // If too few followers there can be no master
+ if (2 * followers <= totalCount) {
+ return "More than half of the nodes must agree for there to be a master. Only " + followers + " of "
+ + totalCount + " nodes agree on current master candidate (" + masterCandidate + ").";
+ }
+ // If all are following master candidate, it is master if it exists.
+ if (followers == totalCount) {
+ return "All " + totalCount + " nodes agree that " + masterCandidate + " is current master.";
+ }
+
+ // If not all are following we only accept master candidate if old master
+ // disappeared sufficient time ago
+ if (masterGoneFromZooKeeperTime + masterZooKeeperCooldownPeriod > timer.getCurrentTimeInMillis()) {
+ return followers + " of " + totalCount + " nodes agree " + masterCandidate + " should be master, "
+ + "but old master cooldown period of " + masterZooKeeperCooldownPeriod + " ms has not passed yet. "
+ + "To ensure it has got time to realize it is no longer master before we elect a new one, "
+ + "currently there is no master.";
+ }
+ return followers + " of " + totalCount + " nodes agree " + masterCandidate + " is master.";
+ }
+
+ public boolean isAmongNthFirst(int first) { return (nextInLineCount < first); }
+
+ public boolean watchMasterElection(DatabaseHandler database,
+ DatabaseHandler.Context dbContext) throws InterruptedException {
+ if (totalCount == 1) return false; // No point in doing master election with only one node configured to be cluster controller
+ if (nextMasterData == null) {
+ if (masterCandidate == null) {
+ log.log(LogLevel.SPAM, "Cluster controller " + index + ": No current master candidate. Waiting for data to do master election.");
+ }
+ return false; // Nothing have happened since last time.
+ }
+ // Move next data to temporary, such that we don't need to keep lock, and such that we don't retry
+ // if we happen to fail processing the data.
+ Map<Integer, Integer> state;
+ log.log(LogLevel.INFO, "Cluster controller " + index + ": Handling new master election, as we have received " + nextMasterData.size() + " entries");
+ synchronized (monitor) {
+ state = nextMasterData;
+ nextMasterData = null;
+ }
+ log.log(LogLevel.INFO, "Cluster controller " + index + ": Got master election state " + toString(state) + ".");
+ if (state.isEmpty()) throw new IllegalStateException("Database has no master data. We should at least have data for ourselves.");
+ Map.Entry<Integer, Integer> first = state.entrySet().iterator().next();
+ Integer currentMaster = getMaster();
+ if (currentMaster != null && first.getKey().intValue() != currentMaster.intValue()) {
+ log.log(LogLevel.INFO, "Cluster controller " + index + ": Master gone from ZooKeeper. Tagging timestamp. Will wait " + this.masterZooKeeperCooldownPeriod + " ms.");
+ masterGoneFromZooKeeperTime = timer.getCurrentTimeInMillis();
+ masterCandidate = null;
+ }
+ if (first.getValue().intValue() != first.getKey().intValue()) {
+ log.log(LogLevel.INFO, "Fleet controller " + index + ": First index is not currently trying to become master. Waiting for it to change state");
+ masterCandidate = null;
+ if (first.getKey() == index) {
+ log.log(LogLevel.INFO, "Cluster controller " + index + ": We are next in line to become master. Altering our state to look for followers");
+ database.setMasterVote(dbContext, index);
+ }
+ } else {
+ masterCandidate = first.getValue();
+ followers = 0;
+ for (Map.Entry<Integer, Integer> current : state.entrySet()) {
+ if (current.getValue().intValue() == first.getKey().intValue()) {
+ ++followers;
+ }
+ }
+ if (2 * followers > totalCount) {
+ Integer newMaster = getMaster();
+ if (newMaster != null && currentMaster != null && newMaster.intValue() == currentMaster.intValue()) {
+ log.log(LogLevel.INFO, "MASTER_ELECTION: Cluster controller " + index + ": " + currentMaster + " is still the master");
+ } else if (newMaster != null && currentMaster != null) {
+ log.log(LogLevel.INFO, "MASTER_ELECTION: Cluster controller " + index + ": " + newMaster + " took over for fleet controller " + currentMaster + " as master");
+ } else if (newMaster == null) {
+ log.log(LogLevel.INFO, "MASTER_ELECTION: Cluster controller " + index + ": " + masterCandidate + " is new master candidate, but needs to wait before it can take over");
+ } else {
+ log.log(LogLevel.INFO, "MASTER_ELECTION: Cluster controller " + index + ": " + newMaster + " is newly elected master");
+ }
+ } else {
+ log.log(LogLevel.INFO, "MASTER_ELECTION: Cluster controller " + index + ": Currently too few followers for cluster controller candidate " + masterCandidate + ". No current master. (" + followers + "/" + totalCount + " followers)");
+ }
+ Integer ourState = state.get(index);
+ if (ourState == null) throw new IllegalStateException("Database lacks data from ourselves. This should always be present.");
+ if (ourState.intValue() != first.getKey().intValue()) {
+ log.log(LogLevel.INFO, "Cluster controller " + index + ": Altering our state to follow new fleet controller master candidate " + first.getKey());
+ database.setMasterVote(dbContext, first.getKey());
+ }
+ }
+ // Only a given set of nodes can ever become master
+ if (index <= (totalCount - 1) / 2) {
+ int ourPosition = 0;
+ for (Map.Entry<Integer, Integer> entry : state.entrySet()) {
+ if (entry.getKey() != index) {
+ ++ourPosition;
+ } else {
+ break;
+ }
+ }
+ if (nextInLineCount != ourPosition) {
+ nextInLineCount = ourPosition;
+ if (ourPosition > 0) {
+ log.log(LogLevel.DEBUG, "Cluster controller " + index + ": We are now " + getPosition(nextInLineCount) + " in queue to take over being master.");
+ }
+ }
+ }
+ masterData = state;
+ return true;
+ }
+
+ private static String toString(Map<Integer, Integer> data) {
+ StringBuilder sb = new StringBuilder();
+ for (Map.Entry<Integer, Integer> entry : data.entrySet()) {
+ sb.append(", ").append(entry.getKey()).append(" -> ").append(entry.getValue() == null ? "null" : entry.getValue());
+ }
+ if (sb.length() > 2) {
+ sb.delete(0, 2);
+ }
+ sb.insert(0, "data(");
+ sb.append(")");
+ return sb.toString();
+ }
+
+ private String getPosition(int val) {
+ if (val < 1) return "invalid(" + val + ")";
+ if (val == 1) { return "first"; }
+ if (val == 2) { return "second"; }
+ if (val == 3) { return "third"; }
+ return val + "th";
+ }
+
+ public void handleFleetData(Map<Integer, Integer> data) {
+ log.log(LogLevel.INFO, "Cluster controller " + index + ": Got new fleet data with " + data.size() + " entries: " + data);
+ synchronized (monitor) {
+ nextMasterData = data;
+ monitor.notifyAll();
+ }
+ }
+
+ public void lostDatabaseConnection() {
+ if (totalCount > 1) {
+ log.log(LogLevel.INFO, "Cluster controller " + index + ": Clearing master data as we lost connection on node " + index);
+ masterData = null;
+ masterCandidate = null;
+ followers = 0;
+ nextMasterData = null;
+ }
+ }
+
+ public void writeHtmlState(StringBuilder sb, int stateGatherCount) {
+ sb.append("<h2>Master state</h2>\n");
+ Integer master = getMaster();
+ if (master != null) {
+ sb.append("<p>Current cluster controller master is node " + master + ".");
+ if (master.intValue() == index) sb.append(" (This node)");
+ sb.append("</p>");
+ } else {
+ if (2 * followers <= totalCount) {
+ sb.append("<p>There is currently no master. Less than half the fleet controllers (")
+ .append(followers).append(") are following master candidate ").append(masterCandidate)
+ .append(".</p>");
+ } else if (masterGoneFromZooKeeperTime + masterZooKeeperCooldownPeriod > timer.getCurrentTimeInMillis()) {
+ long time = timer.getCurrentTimeInMillis() - masterGoneFromZooKeeperTime;
+ sb.append("<p>There is currently no master. Only " + (time / 1000) + " seconds have past since")
+ .append(" old master disappeared. At least " + (masterZooKeeperCooldownPeriod / 1000) + " must pass")
+ .append(" before electing new master unless all possible master candidates are online.</p>");
+ }
+ }
+ if ((master == null || master.intValue() != index) && nextInLineCount < stateGatherCount) {
+ sb.append("<p>As we are number ").append(nextInLineCount)
+ .append(" in line for taking over as master, we're gathering state from nodes.</p>");
+ sb.append("<p><font color=\"red\">As we are not the master, we don't know about nodes current system state"
+ + " or wanted states, so some statistics below are a bit incorrect. Look at status page on master "
+ + "for updated data.</font></p>");
+ }
+ if (index * 2 > totalCount) {
+ sb.append("<p>As lowest index fleet controller is prioritized to become master, and more than half "
+ + "of the fleet controllers need to be available to select a master, we can never become master.</p>");
+ }
+
+ // Debug data
+ sb.append("<p><font size=\"-1\" color=\"grey\">Master election handler internal state:")
+ .append("<br>Index: " + index)
+ .append("<br>Fleet controller count: " + totalCount)
+ .append("<br>Master candidate: " + masterCandidate)
+ .append("<br>Next in line count: " + nextInLineCount)
+ .append("<br>Followers: " + followers)
+ .append("<br>Master data:");
+ if (masterData == null) {
+ sb.append("null");
+ } else {
+ for (Map.Entry<Integer, Integer> e : masterData.entrySet()) {
+ sb.append(" ").append(e.getKey()).append("->").append(e.getValue());
+ }
+ }
+ sb.append("<br>Next master data:");
+ if (nextMasterData == null) {
+ sb.append("null");
+ } else {
+ for (Map.Entry<Integer, Integer> e : nextMasterData.entrySet()) {
+ sb.append(" ").append(e.getKey()).append("->").append(e.getValue());
+ }
+ }
+ sb.append("<br>Master gone from zookeeper time: " + masterGoneFromZooKeeperTime)
+ .append("<br>Master cooldown period: " + masterZooKeeperCooldownPeriod)
+ .append("</font></p>");
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java
new file mode 100644
index 00000000000..8f98ad63821
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+public interface MasterInterface {
+
+ boolean isMaster();
+ Integer getMaster();
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java
new file mode 100644
index 00000000000..ce6686c3ad9
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.utils.util.ComponentMetricReporter;
+import com.yahoo.vespa.clustercontroller.utils.util.MetricReporter;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+public class MetricUpdater {
+
+ private final ComponentMetricReporter metricReporter;
+
+ public MetricUpdater(MetricReporter metricReporter, int controllerIndex) {
+ this.metricReporter = new ComponentMetricReporter(metricReporter, "cluster-controller.");
+ this.metricReporter.addDimension("controller-index", String.valueOf(controllerIndex));
+ }
+
+ public MetricReporter.Context createContext(Map<String, String> dimensions) {
+ return metricReporter.createContext(dimensions);
+ }
+
+ public void updateClusterStateMetrics(ContentCluster cluster, ClusterState state) {
+ Map<String, String> dimensions = new HashMap<>();
+ dimensions.put("cluster", cluster.getName());
+ for (NodeType type : NodeType.getTypes()) {
+ dimensions.put("node-type", type.toString().toLowerCase());
+ MetricReporter.Context context = createContext(dimensions);
+ Map<State, Integer> nodeCounts = new HashMap<>();
+ for (State s : State.values()) {
+ nodeCounts.put(s, 0);
+ }
+ for (Integer i : cluster.getConfiguredNodes().keySet()) {
+ NodeState s = state.getNodeState(new Node(type, i));
+ Integer count = nodeCounts.get(s.getState());
+ nodeCounts.put(s.getState(), count + 1);
+ }
+ for (State s : State.values()) {
+ String name = s.toString().toLowerCase() + ".count";
+ metricReporter.set(name, nodeCounts.get(s), context);
+ }
+ }
+ dimensions.remove("node-type");
+ MetricReporter.Context context = createContext(dimensions);
+ metricReporter.add("cluster-state-change", 1, context);
+ }
+
+ public void updateMasterElectionMetrics(Map<Integer, Integer> data) {
+ Map<Integer, Integer> voteCounts = new HashMap<>();
+ for(Integer i : data.values()) {
+ int count = (voteCounts.get(i) == null ? 0 : voteCounts.get(i));
+ voteCounts.put(i, count + 1);
+ }
+ SortedSet<Integer> counts = new TreeSet<>(voteCounts.values());
+ if (counts.size() > 1 && counts.first() > counts.last()) {
+ throw new IllegalStateException("Assumed smallest count is sorted first");
+ }
+ int maxCount = counts.isEmpty() ? 0 : counts.last();
+ metricReporter.set("agreed-master-votes", maxCount);
+ }
+
+ public void becameMaster() {
+ metricReporter.set("is-master", 1);
+ metricReporter.add("master-change", 1);
+ }
+
+ public void noLongerMaster() {
+ metricReporter.set("is-master", 0);
+ metricReporter.add("master-change", 1);
+ }
+
+ public void addTickTime(long millis, boolean didWork) {
+ if (didWork) {
+ metricReporter.set("busy-tick-time-ms", millis);
+ } else {
+ metricReporter.set("idle-tick-time-ms", millis);
+ }
+ }
+
+ public void recordNewNodeEvent() {
+ // TODO(hakon): Replace add() with a persistent aggregate metric.
+ metricReporter.add("node-event", 1);
+ }
+
+ public void updateMergeOpMetrics(Map<String, NodeMergeStats> storageNodeStats) {
+ // TODO(hakon): Remove this method once we figure out how to propagate metrics to state HTTP API.
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeEvent.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeEvent.java
new file mode 100644
index 00000000000..d9d83c705b1
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeEvent.java
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+public class NodeEvent implements Event {
+
+ private final NodeInfo node;
+ private final String description;
+ private final long eventTime;
+
+ public enum Type {
+ REPORTED,
+ CURRENT,
+ WANTED
+ }
+
+ private final Type type;
+
+ public NodeEvent(NodeInfo node, String description, Type type, long currentTime) {
+ this.node = node;
+ this.description = description;
+ this.eventTime = currentTime;
+ this.type = type;
+ }
+
+ public NodeInfo getNode() {
+ return node;
+ }
+
+ @Override
+ public long getTimeMs() {
+ return eventTime;
+ }
+
+ @Override
+ public String getDescription() {
+ return description;
+ }
+
+ @Override
+ public String toString() {
+ return "Event: " + node.getNode() + ": " + description;
+ }
+
+ @Override
+ public String getCategory() {
+ return type.toString();
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java
new file mode 100644
index 00000000000..c261a4bb194
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java
@@ -0,0 +1,442 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.collections.Pair;
+import com.yahoo.jrt.Target;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.distribution.Group;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.logging.Logger;
+
+/**
+ * Represents a node in a content cluster.
+ */
+abstract public class NodeInfo implements Comparable<NodeInfo> {
+
+ public static Logger log = Logger.getLogger(NodeInfo.class.getName());
+
+ private final ContentCluster cluster;
+ private Node node;
+ private String rpcAddress;
+ /** If set to a timestamp, we haven't seen this node in slobrok since then. If not set, it is currently in slobrok. */
+ private Long lastSeenInSlobrok;
+ private List<Pair<GetNodeStateRequest, Long>> pendingNodeStateRequests = new LinkedList<>();
+ private NodeState reportedState;
+ private NodeState wantedState;
+
+ /** Whether this node has been configured to be retired and should therefore always return retired as its wanted state */
+ private boolean configuredRetired;
+
+ /** The time we set the current state last. */
+ private long nextAttemptTime;
+ /** Cached connection to this node. */
+ private Target connection;
+ /** We cache last connection we did request info on, as we want to report appropriate error for node regardless of whether other commands have created new connection. */
+ public Target lastRequestInfoConnection;
+ /** Sets the version we assumed we were when opening this connection. (Needed in case we need to do some sort of handshaking to decrease version. */
+ private int connectionVersion;
+ /**
+ * Counts the number of attempts we have tried since last time we had
+ * contact with the node. (Used to retry fast early)
+ */
+ private int connectionAttemptCount;
+ /**
+ * Set to 0 each time we get a successful node state reply from a node.
+ * Set to the current time each time we do a node state request, if the value was 0 to begin with.
+ * Thus, if value is not 0, this is the start of the period where we could not talk to the node.
+ */
+ private long timeOfFirstFailingConnectionAttempt;
+ /**
+ * Sets the version of the state transaction that this node accepts.
+ * Version 0 is the original one, with getnodestate command.
+ * Version 1 is the new one, with getnodestate2 command too.
+ */
+ private int version;
+
+ private Map<Integer, ClusterState> systemStateVersionSent = new TreeMap<>();
+ private ClusterState systemStateVersionAcknowledged;
+ /**
+ * When a node goes from an up state to a down state, update this flag with the start timestamp the node had before going down.
+ * The cluster state broadcaster will use this to identify whether distributors have restarted.
+ */
+ private long wentDownWithStartTime = 0;
+ private ClusterState wentDownAtClusterState;
+
+ private long transitionTime = -1;
+ private long initProgressTime = -1;
+ private long upStableStateTime = -1;
+ private long downStableStateTime = -1;
+
+ private int prematureCrashCount = 0;
+
+ /** Remember last time we adjusted version, such that if we had multiple requests pending when we did, we can avoid printing error right after. */
+ private long adjustedVersionTime = 0;
+
+ private HostInfo hostInfo = HostInfo.createHostInfo("{}");
+
+ private Group group;
+
+ // NOTE: See update(node) below
+ NodeInfo(ContentCluster cluster, Node n, boolean configuredRetired, String rpcAddress, Distribution distribution) {
+ if (cluster == null) throw new IllegalArgumentException("Cluster not set");
+ reportedState = new NodeState(n.getType(), State.DOWN);
+ wantedState = new NodeState(n.getType(), State.UP);
+ this.cluster = cluster;
+ this.node = n;
+ this.connectionAttemptCount = 0;
+ this.timeOfFirstFailingConnectionAttempt = 0;
+ this.version = getLatestVersion();
+ this.connectionVersion = getLatestVersion();
+ this.configuredRetired = configuredRetired;
+ this.rpcAddress = rpcAddress;
+ this.lastSeenInSlobrok = null;
+ this.nextAttemptTime = 0;
+ setGroup(distribution);
+ }
+
+ public void setRpcAddress(String rpcAddress) {
+ this.rpcAddress = rpcAddress;
+ resetConnectionInformation();
+ }
+
+ private void resetConnectionInformation() {
+ this.lastSeenInSlobrok = null;
+ this.nextAttemptTime = 0;
+ this.version = getLatestVersion();
+ this.connectionVersion = getLatestVersion();
+ }
+
+ public long getWentDownWithStartTime() { return wentDownWithStartTime; }
+
+ public long getStartTimestamp() { return cluster.getStartTimestamp(node); }
+ public void setStartTimestamp(long ts) { cluster.setStartTimestamp(node, ts); }
+
+ public void setTransitionTime(long time) { transitionTime = time; }
+ public long getTransitionTime() { return transitionTime; }
+
+ public void setInitProgressTime(long time) { initProgressTime = time; }
+ public long getInitProgressTime() { return initProgressTime; }
+
+ public long getUpStableStateTime() { return upStableStateTime; }
+
+ public long getDownStableStateTime() { return downStableStateTime; }
+
+ public int getConnectionAttemptCount() { return connectionAttemptCount; }
+
+ public void setPrematureCrashCount(int count) {
+ if (prematureCrashCount != count) {
+ prematureCrashCount = count;
+ log.log(LogLevel.DEBUG, "Premature crash count on " + toString() + " set to " + count);
+ }
+ }
+ public int getPrematureCrashCount() { return prematureCrashCount; }
+
+ public boolean isPendingGetNodeStateRequest(GetNodeStateRequest r) {
+ for(Pair<GetNodeStateRequest, Long> it : pendingNodeStateRequests) {
+ if (it.getFirst() == r) return true;
+ }
+ return false;
+ }
+
+ public void setConfiguredRetired(boolean retired) {
+ this.configuredRetired = retired;
+ }
+
+ public void setNextGetStateAttemptTime(long timeInMillis) {
+ nextAttemptTime = timeInMillis;
+ }
+
+ // TODO: This implements hashCode and compareTo, but not equals ... that's odd
+
+ @Override
+ public int compareTo(NodeInfo info) {
+ return node.compareTo(info.node);
+ }
+
+ @Override
+ public int hashCode() {
+ return node.hashCode();
+ }
+
+ @Override
+ public String toString() { return node.toString(); }
+
+ public void setGroup(Distribution distribution) {
+ this.group = null;
+ if (distribution != null) {
+ this.group = distribution.getRootGroup().getGroupForNode(node.getIndex());
+ }
+ }
+
+ public Group getGroup() {
+ return group;
+ }
+
+ public int getLatestVersion() {
+ return 2;
+ }
+
+ public String getSlobrokAddress() {
+ return "storage/cluster." + cluster.getName() + "/" + node.getType() + "/" + node.getIndex();
+ }
+
+ public void markRpcAddressOutdated(Timer timer) {
+ lastSeenInSlobrok = timer.getCurrentTimeInMillis();
+ }
+ public void markRpcAddressLive() {
+ lastSeenInSlobrok = null;
+ }
+
+ public Node getNode() { return node; }
+
+ public boolean isDistributor() {
+ return node.getType().equals(NodeType.DISTRIBUTOR);
+ }
+
+ public boolean isStorage() {
+ return node.getType().equals(NodeType.STORAGE);
+ }
+
+ public int getNodeIndex() {
+ return node.getIndex();
+ }
+
+ public ContentCluster getCluster() { return cluster; }
+
+ /** Returns true if the node is currentl registered in slobrok */
+ public boolean isRpcAddressOutdated() { return lastSeenInSlobrok != null; }
+
+ public Long getRpcAddressOutdatedTimestamp() { return lastSeenInSlobrok; }
+
+ public void abortCurrentNodeStateRequests() {
+ for(Pair<GetNodeStateRequest, Long> it : pendingNodeStateRequests) {
+ it.getFirst().abort();
+ }
+ pendingNodeStateRequests.clear();
+ }
+
+ public void setCurrentNodeStateRequest(GetNodeStateRequest r, long timeInMS) {
+ pendingNodeStateRequests.add(new Pair<>(r, timeInMS));
+ }
+
+ public String getRpcAddress() { return rpcAddress; }
+
+ public NodeState getReportedState() { return reportedState; }
+
+ /** Returns the wanted state of this node - which can either be set by a user or configured */
+ public NodeState getWantedState() {
+ if (configuredRetired) return new NodeState(node.getType(), State.RETIRED);
+ return wantedState;
+ }
+
+ /** Returns the wanted state set directly by a user (i.e not configured) */
+ public NodeState getUserWantedState() { return wantedState; }
+
+ public long getTimeOfFirstFailingConnectionAttempt() {
+ return timeOfFirstFailingConnectionAttempt;
+ }
+
+ public Long getLatestNodeStateRequestTime() {
+ if (pendingNodeStateRequests.isEmpty()) return null;
+ return pendingNodeStateRequests.get(pendingNodeStateRequests.size() - 1).getSecond();
+ }
+
+ public void setTimeOfFirstFailingConnectionAttempt(long timeInMS) {
+ if (timeOfFirstFailingConnectionAttempt == 0) {
+ timeOfFirstFailingConnectionAttempt = timeInMS;
+ }
+ }
+
+ public void removePendingGetNodeStateRequest(GetNodeStateRequest request) {
+ for (int i=0, n=pendingNodeStateRequests.size(); i<n; ++i) {
+ if (pendingNodeStateRequests.get(i).getFirst() == request) {
+ pendingNodeStateRequests.remove(i);
+ break;
+ }
+ }
+ }
+
+ public void setReportedState(NodeState state, long time) {
+ if (state == null) {
+ state = new NodeState(node.getType(), State.DOWN);
+ }
+ if (state.getState().oneOf("dsm") && !reportedState.getState().oneOf("dsm")) {
+ wentDownWithStartTime = reportedState.getStartTimestamp();
+ wentDownAtClusterState = getNewestSystemStateSent();
+ log.log(LogLevel.DEBUG, "Setting going down timestamp of node " + node + " to " + wentDownWithStartTime);
+ }
+ if (state.getState().equals(State.DOWN) && !reportedState.getState().oneOf("d")) {
+ downStableStateTime = time;
+ log.log(LogLevel.DEBUG, "Down stable state on " + toString() + " altered to " + time);
+ }
+ else if (state.getState().equals(State.UP) && !reportedState.getState().oneOf("u")) {
+ upStableStateTime = time;
+ log.log(LogLevel.DEBUG, "Up stable state on " + toString() + " altered to " + time);
+ }
+ if (!state.getState().validReportedNodeState(node.getType())) {
+ throw new IllegalStateException("Trying to set illegal reported node state: " + state);
+ }
+ if (state.getState().oneOf("sd")) {
+ // If we have multiple descriptions, assume that the first one happening after a node goes down is the most interesting one
+ if (!reportedState.getState().oneOf("ui") && reportedState.hasDescription()) {
+ state.setDescription(reportedState.getDescription());
+ }
+ reportedState = state;
+ if (connectionAttemptCount < Integer.MAX_VALUE) {
+ ++connectionAttemptCount;
+ }
+ if (connectionAttemptCount < 5) {
+ nextAttemptTime = time + 100;
+ } else if (connectionAttemptCount < 20) {
+ nextAttemptTime = time + 250;
+ } else if (connectionAttemptCount < 100) {
+ nextAttemptTime = time + 1000;
+ } else {
+ nextAttemptTime = time + 5000;
+ }
+ log.log(LogLevel.SPAM, "Failed to get state from node " + toString() + ", scheduling next attempt in " + (nextAttemptTime - time) + " ms.");
+ } else {
+ connectionAttemptCount = 0;
+ timeOfFirstFailingConnectionAttempt = 0;
+ reportedState = state;
+ if (version == 0 || state.getState().equals(State.STOPPING)) {
+ nextAttemptTime = time + cluster.getPollingFrequency();
+ log.log(LogLevel.SPAM, "Scheduling next attempt to get state from " + toString() + " in " + (nextAttemptTime - time) + " ms (polling freq).");
+ } else {
+ nextAttemptTime = time;
+ }
+ }
+ log.log(LogLevel.SPAM, "Set reported state of node " + this + " to " + reportedState + ". Next connection attempt is at " + nextAttemptTime);
+ }
+
+ /** Sets the wanted state. The wanted state is taken as UP if a null argument is given */
+ public void setWantedState(NodeState state) {
+ if (state == null)
+ state = new NodeState(node.getType(), State.UP);
+ NodeState newWanted = new NodeState(node.getType(), state.getState());
+ newWanted.setDescription(state.getDescription());
+ if (!newWanted.equals(state)) {
+ try{
+ throw new Exception();
+ } catch (Exception e) {
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ log.warning("Attempted to set wanted state with more than just a main state. Extra data stripped. Original data '" + state.serialize(true) + ":\n" + sw.toString());
+ }
+ }
+ wantedState = newWanted;
+ log.log(LogLevel.SPAM, "Set wanted state of node " + this + " to " + wantedState + ".");
+ }
+
+ public long getTimeForNextStateRequestAttempt() {
+ return nextAttemptTime;
+ }
+
+ /** @return True if we demoted communication version so this can be valid error. */
+ public boolean notifyNoSuchMethodError(String methodName, Timer timer) {
+ if (methodName.equals("getnodestate3")) {
+ if (version > 1) {
+ log.log(LogLevel.DEBUG, "Node " + toString() + " does not support " + methodName + " call. Setting version to 1.");
+ version = 1;
+ nextAttemptTime = 0;
+ adjustedVersionTime = timer.getCurrentTimeInMillis();
+ return true;
+ } else if (timer.getCurrentTimeInMillis() - 2000 < adjustedVersionTime) {
+ log.log(LogLevel.DEBUG, "Node " + toString() + " does not support " + methodName + " call. Version already at 1 and was recently adjusted, so ignoring it.");
+ return true;
+ }
+ } else if (methodName.equals("getnodestate2") || methodName.equals("setsystemstate2")) {
+ if (version > 0) {
+ log.log(LogLevel.DEBUG, "Node " + toString() + " does not support " + methodName + " call. Setting version to 0.");
+ version = 0;
+ nextAttemptTime = 0;
+ adjustedVersionTime = timer.getCurrentTimeInMillis();
+ return true;
+ } else if (timer.getCurrentTimeInMillis() - 2000 < adjustedVersionTime) {
+ log.log(LogLevel.DEBUG, "Node " + toString() + " does not support " + methodName + " call. Version already at 0 and was recently adjusted, so ignoring it.");
+ return true;
+ }
+ }
+ log.log(LogLevel.WARNING, "Node " + toString() + " does not support " + methodName + " which it should.");
+ return false;
+ }
+
+ public Target getConnection() {
+ return connection;
+ }
+
+ public Target setConnection(Target t) {
+ this.connection = t;
+ this.connectionVersion = getLatestVersion();
+ return t;
+ }
+
+ public int getVersion() { return version; }
+ public int getConnectionVersion() { return connectionVersion; }
+ public void setConnectionVersion(int version) { connectionVersion = version; }
+
+ public ClusterState getNewestSystemStateSent() {
+ ClusterState last = null;
+ for (ClusterState s : systemStateVersionSent.values()) {
+ if (last == null || last.getVersion() < s.getVersion()) {
+ last = s;
+ }
+ }
+ return last;
+ }
+ public int getNewestSystemStateVersionSent() {
+ ClusterState last = getNewestSystemStateSent();
+ return last == null ? -1 : last.getVersion();
+ }
+ public int getSystemStateVersionAcknowledged() {
+ return (systemStateVersionAcknowledged == null ? -1 : systemStateVersionAcknowledged.getVersion());
+ }
+ public void setSystemStateVersionSent(ClusterState state) {
+ if (state == null) throw new Error("Should not clear info for last version sent");
+ if (systemStateVersionSent.containsKey(state.getVersion())) {
+ throw new IllegalStateException("We have already sent cluster state version " + version + " to " + node);
+ }
+ systemStateVersionSent.put(state.getVersion(), state);
+ }
+ public void setSystemStateVersionAcknowledged(Integer version, boolean success) {
+ if (version == null) throw new Error("Should not clear info for last version acked");
+ if (!systemStateVersionSent.containsKey(version)) {
+ throw new IllegalStateException("Got response for cluster state " + version + " which is not tracked as pending for node " + node);
+ }
+ ClusterState state = systemStateVersionSent.remove(version);
+ if (success && (systemStateVersionAcknowledged == null || systemStateVersionAcknowledged.getVersion() < state.getVersion())) {
+ systemStateVersionAcknowledged = state;
+ if (wentDownWithStartTime != 0
+ && (wentDownAtClusterState == null || wentDownAtClusterState.getVersion() < state.getVersion())
+ && !state.getNodeState(node).getState().oneOf("dsm"))
+ {
+ log.log(LogLevel.DEBUG, "Clearing going down timestamp of node " + node + " after receiving ack of cluster state " + state);
+ wentDownWithStartTime = 0;
+ }
+ }
+ }
+
+ public void setHostInfo(HostInfo hostInfo) {
+ // Note: This will blank out any hostInfo we already had, if the parsing fails.
+ // This is intentional, to make sure we're never left with stale data.
+ this.hostInfo = hostInfo;
+ }
+
+ public HostInfo getHostInfo() { return hostInfo; }
+
+ /**
+ * @return vtag if set or null otherwise and on errors.
+ */
+ public String getVtag() {
+ return hostInfo.getVtag().getVersionOrNull();
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java
new file mode 100644
index 00000000000..66261f21582
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java
@@ -0,0 +1,14 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+
+/**
+ * Interface for a node lookup service, such as slobrok, config, or tier controller.
+ */
+public interface NodeLookup {
+
+ void shutdown();
+
+ boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener);
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeMergeStats.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeMergeStats.java
new file mode 100644
index 00000000000..67f11574c5b
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeMergeStats.java
@@ -0,0 +1,152 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode;
+
+/**
+ * @author hakon
+ * @since 5.33
+ */
+public class NodeMergeStats {
+
+ /**
+ * Constructor that sets values to zero if not present.
+ */
+ public NodeMergeStats(StorageNode storageNodePojo) {
+ this.nodeIndex = storageNodePojo.getIndex();
+
+ StorageNode.OutstandingMergeOps mergeOps = storageNodePojo.getOutstandingMergeOpsOrNull();
+ if (mergeOps == null) {
+ mergeOps = new StorageNode.OutstandingMergeOps();
+ }
+ syncing = createAmount(mergeOps.getSyncingOrNull());
+ copyingIn = createAmount(mergeOps.getCopyingInOrNull());
+ movingOut = createAmount(mergeOps.getMovingOutOrNull());
+ copyingOut = createAmount(mergeOps.getCopyingOutOrNull());
+ }
+
+ private static Amount createAmount(StorageNode.Buckets bucketOrNull) {
+ if (bucketOrNull == null) {
+ return new Amount();
+ }
+ return new Amount(bucketOrNull.getBuckets());
+ }
+
+ static public class Amount {
+ private long buckets;
+
+ Amount() { this(0); }
+ Amount(long buckets) { this.buckets = buckets; }
+
+ public void set(Amount other) {
+ buckets = other.buckets;
+ }
+
+ public long getBuckets() {
+ return buckets;
+ }
+
+ /**
+ * Logically, add (factor * amount) to this object.
+ */
+ void scaledAdd(int factor, Amount amount) {
+ buckets += factor * amount.buckets;
+ }
+
+ public boolean equals(Object other) {
+ if (!(other instanceof Amount)) {
+ return false;
+ }
+ Amount otherAmount = (Amount) other;
+ return buckets == otherAmount.buckets;
+ }
+
+ public int hashCode() {
+ return (int)buckets;
+ }
+
+ public String toString() {
+ return String.format("{buckets = %d}", buckets);
+ }
+ }
+
+ private final Amount syncing;
+ private final Amount copyingIn;
+ private final Amount movingOut;
+ private final Amount copyingOut;
+ private int nodeIndex;
+
+ /**
+ * An instance with all 0 amounts.
+ */
+ public NodeMergeStats(int index) {
+ this(index, new Amount(), new Amount(), new Amount(), new Amount());
+ }
+
+ NodeMergeStats(int index, Amount syncing, Amount copyingIn, Amount movingOut, Amount copyingOut) {
+ this.nodeIndex = index;
+ this.syncing = syncing;
+ this.copyingIn = copyingIn;
+ this.movingOut = movingOut;
+ this.copyingOut = copyingOut;
+ }
+
+ public void set(NodeMergeStats stats) {
+ nodeIndex = stats.nodeIndex;
+ syncing.set(stats.syncing);
+ copyingIn.set(stats.copyingIn);
+ movingOut.set(stats.movingOut);
+ copyingOut.set(stats.copyingOut);
+ }
+
+ int getNodeIndex() { return nodeIndex; }
+ public Amount getSyncing() { return syncing; }
+ public Amount getCopyingIn() { return copyingIn; }
+ public Amount getMovingOut() { return movingOut; }
+ public Amount getCopyingOut() { return copyingOut; }
+
+ void add(NodeMergeStats stats) {
+ scaledAdd(1, stats);
+ }
+
+ void subtract(NodeMergeStats stats) {
+ scaledAdd(-1, stats);
+ }
+
+ /**
+ * Logically, adds (factor * stats) to this object. factor of 1 is normal add, -1 is subtraction.
+ */
+ private void scaledAdd(int factor, NodeMergeStats stats) {
+ syncing.scaledAdd(factor, stats.syncing);
+ copyingIn.scaledAdd(factor, stats.copyingIn);
+ movingOut.scaledAdd(factor, stats.movingOut);
+ copyingOut.scaledAdd(factor, stats.copyingOut);
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) (syncing.buckets +
+ copyingIn.buckets * 31 +
+ movingOut.buckets * 17 +
+ copyingOut.buckets * 7);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof NodeMergeStats)) {
+ return false;
+ }
+
+ NodeMergeStats otherStats = (NodeMergeStats) other;
+ return nodeIndex == otherStats.nodeIndex &&
+ syncing.equals(otherStats.syncing) &&
+ copyingIn.equals(otherStats.copyingIn) &&
+ movingOut.equals(otherStats.movingOut) &&
+ copyingOut.equals(otherStats.copyingOut);
+ }
+
+ public String toString() {
+ return String.format("{index = %d, syncing = %s, copyingIn = %s, movingOut = %s, copyingOut = %s}",
+ nodeIndex, syncing, copyingIn, movingOut, copyingOut);
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
new file mode 100644
index 00000000000..f312194c15d
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -0,0 +1,244 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+
+import java.util.List;
+
+/**
+ * Checks if a node can be upgraded.
+ *
+ * @author dybdahl
+ */
+public class NodeStateChangeChecker {
+
+ private final int minStorageNodesUp;
+ private double minRatioOfStorageNodesUp;
+ private final int requiredRedundancy;
+ private final ClusterInfo clusterInfo;
+
+ public NodeStateChangeChecker(
+ int minStorageNodesUp,
+ double minRatioOfStorageNodesUp,
+ int requiredRedundancy,
+ ClusterInfo clusterInfo) {
+ this.minStorageNodesUp = minStorageNodesUp;
+ this.minRatioOfStorageNodesUp = minRatioOfStorageNodesUp;
+ this.requiredRedundancy = requiredRedundancy;
+ this.clusterInfo = clusterInfo;
+ }
+
+ public static class Result {
+
+ public enum Action {
+ MUST_SET_WANTED_STATE,
+ ALREADY_SET,
+ DISALLOWED
+ }
+
+ private final Action action;
+ private final String reason;
+
+ private Result(Action action, String reason) {
+ this.action = action;
+ this.reason = reason;
+ }
+
+ public static Result createDisallowed(String reason) {
+ return new Result(Action.DISALLOWED, reason);
+ }
+
+ public static Result allowSettingOfWantedState() {
+ return new Result(Action.MUST_SET_WANTED_STATE, "Preconditions fulfilled and new state different");
+ }
+
+ public static Result createAlreadySet() {
+ return new Result(Action.ALREADY_SET, "Basic preconditions fulfilled and new state is already effective");
+ }
+
+ public boolean settingWantedStateIsAllowed() {
+ return action == Action.MUST_SET_WANTED_STATE;
+ }
+
+ public boolean wantedStateAlreadySet() {
+ return action == Action.ALREADY_SET;
+ }
+
+ public String getReason() {
+ return reason;
+ }
+
+ public String toString() {
+ return "action " + action + ": " + reason;
+ }
+ }
+
+ public Result evaluateTransition(
+ Node node, int clusterStateVersion, SetUnitStateRequest.Condition condition,
+ NodeState oldState, NodeState newState) {
+ if (condition == SetUnitStateRequest.Condition.FORCE) {
+ return Result.allowSettingOfWantedState();
+ }
+
+ if (condition != SetUnitStateRequest.Condition.SAFE) {
+ return Result.createDisallowed("Condition not implemented: " + condition.name());
+ }
+
+ if (node.getType() != NodeType.STORAGE) {
+ return Result.createDisallowed("Safe-set of node state is only supported for storage nodes! " +
+ "Requested node type: " + node.getType().toString());
+ }
+
+ // If the new state and description equals the existing, we're done. This is done for 2 cases:
+ // - We can short-circuit setting of a new wanted state, which e.g. hits ZooKeeper.
+ // - We ensure that clients that have previously set the wanted state, continue
+ // to see the same conclusion, even though they possibly would have been denied
+ // MUST_SET_WANTED_STATE if re-evaluated. This is important for implementing idempotent clients.
+ if (newState.getState().equals(oldState.getState())) {
+ return Result.createAlreadySet();
+ }
+
+ switch (newState.getState()) {
+ case UP:
+ return canSetStateUp(node, oldState.getState());
+ case MAINTENANCE:
+ return canSetStateMaintenance(node, clusterStateVersion);
+ default:
+ return Result.createDisallowed("Safe only supports state UP and MAINTENANCE, you tried: " + newState);
+ }
+ }
+
+ private Result canSetStateUp(Node node, State oldState) {
+ if (oldState != State.MAINTENANCE) {
+ return Result.createDisallowed("Refusing to set wanted state to up when it is currently in " + oldState);
+ }
+
+ if (clusterInfo.getNodeInfo(node).getReportedState().getState() != State.UP) {
+ return Result.createDisallowed("Refuse to set wanted state to UP, " +
+ "since the reported state is not UP (" +
+ clusterInfo.getNodeInfo(node).getReportedState().getState() + ")");
+ }
+
+ return Result.allowSettingOfWantedState();
+ }
+
+ private Result canSetStateMaintenance(Node node, int clusterStateVersion) {
+ NodeInfo nodeInfo = clusterInfo.getNodeInfo(node);
+ if (nodeInfo == null) {
+ return Result.createDisallowed("Unknown node " + node);
+ }
+ NodeState reportedState = nodeInfo.getReportedState();
+ if (reportedState.getState() == State.DOWN) {
+ return Result.allowSettingOfWantedState();
+ }
+
+ Result checkDistributorsResult = checkDistributors(node, clusterStateVersion);
+ if (!checkDistributorsResult.settingWantedStateIsAllowed()) {
+ return checkDistributorsResult;
+ }
+
+ Result ongoingChanges = anyNodeSetToMaintenance();
+ if (!ongoingChanges.settingWantedStateIsAllowed()) {
+ return ongoingChanges;
+ }
+
+ if (clusterInfo.getStorageNodeInfo().size() < minStorageNodesUp) {
+ return Result.createDisallowed("There are only " + clusterInfo.getStorageNodeInfo().size() +
+ " storage nodes up, while config requires at least " + minStorageNodesUp);
+ }
+ Result fractionCheck = isFractionHighEnough();
+ if (!fractionCheck.settingWantedStateIsAllowed()) {
+ return fractionCheck;
+ }
+
+ return Result.allowSettingOfWantedState();
+ }
+
+ private Result anyNodeSetToMaintenance() {
+ for (NodeInfo nodeInfo : clusterInfo.getAllNodeInfo()) {
+ if (nodeInfo.getWantedState().getState() == State.MAINTENANCE) {
+ return Result.createDisallowed("There is a node already in maintenance:" + nodeInfo.getNodeIndex());
+ }
+ }
+ return Result.allowSettingOfWantedState();
+ }
+
+ private Result isFractionHighEnough() {
+ int upNodesCount = 0;
+ int nodesCount = 0;
+ for (StorageNodeInfo storageNodeInfo : clusterInfo.getStorageNodeInfo()) {
+ nodesCount++;
+ State state = storageNodeInfo.getReportedState().getState();
+ if (state == State.UP || state == State.RETIRED || state == State.INITIALIZING) {
+ upNodesCount++;
+ }
+ }
+ if (nodesCount == 0) {
+ return Result.createDisallowed("No storage nodes in cluster state, not safe to restart.");
+ }
+ if (((double)upNodesCount) / nodesCount < minRatioOfStorageNodesUp) {
+ return Result.createDisallowed("Not enough storage nodes running, running: " + upNodesCount
+ + " total storage nodes " + nodesCount +
+ " required fraction " + minRatioOfStorageNodesUp);
+ }
+ return Result.allowSettingOfWantedState();
+ }
+
+ private Result checkStorageNodesForDistributor(
+ DistributorNodeInfo distributorNodeInfo, List<StorageNode> storageNodes, Node node) {
+ for (StorageNode storageNode : storageNodes) {
+ if (storageNode.getIndex() == node.getIndex()) {
+ Integer minReplication = storageNode.getMinCurrentReplicationFactorOrNull();
+ // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system).
+ if (minReplication != null && minReplication < requiredRedundancy) {
+ return Result.createDisallowed("Distributor "
+ + distributorNodeInfo.getNodeIndex()
+ + " says storage node " + node.getIndex()
+ + " has buckets with redundancy as low as "
+ + storageNode.getMinCurrentReplicationFactorOrNull()
+ + ", but we require at least " + requiredRedundancy);
+ } else {
+ return Result.allowSettingOfWantedState();
+ }
+ }
+ }
+
+ return Result.allowSettingOfWantedState();
+ }
+
+ /**
+ * We want to check with the distributors to verify that it is safe to take down the storage node.
+ * @param node the node to be checked
+ * @param clusterStateVersion the cluster state we expect distributors to have
+ */
+ private Result checkDistributors(Node node, int clusterStateVersion) {
+ if (clusterInfo.getDistributorNodeInfo().isEmpty()) {
+ return Result.createDisallowed("Not aware of any distributors, probably not safe to upgrade?");
+ }
+ for (DistributorNodeInfo distributorNodeInfo : clusterInfo.getDistributorNodeInfo()) {
+ Integer distributorClusterStateVersion = distributorNodeInfo.getHostInfo().getClusterStateVersionOrNull();
+ if (distributorClusterStateVersion == null) {
+ return Result.createDisallowed("Distributor node (" + distributorNodeInfo.getNodeIndex()
+ + ") has not reported any cluster state version yet.");
+ } else if (distributorClusterStateVersion != clusterStateVersion) {
+ return Result.createDisallowed("Distributor node (" + distributorNodeInfo.getNodeIndex()
+ + ") does not report same version ("
+ + distributorNodeInfo.getHostInfo().getClusterStateVersionOrNull()
+ + ") as fleetcontroller has (" + clusterStateVersion + ")");
+ }
+
+ List<StorageNode> storageNodes = distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes();
+ Result storageNodesResult = checkStorageNodesForDistributor(distributorNodeInfo, storageNodes, node);
+ if (!storageNodesResult.settingWantedStateIsAllowed()) {
+ return storageNodesResult;
+ }
+ }
+
+ return Result.allowSettingOfWantedState();
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java
new file mode 100644
index 00000000000..b2d67d8d2fa
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java
@@ -0,0 +1,254 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.ErrorCode;
+import com.yahoo.jrt.Target;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * Collects the state of all nodes by making remote requests and handling the replies.
+ */
+public class NodeStateGatherer {
+
+ public static final Logger log = Logger.getLogger(NodeStateGatherer.class.getName());
+
+ private final Object monitor;
+ private final Timer timer;
+ private final List<GetNodeStateRequest> replies = new LinkedList<>();
+
+ private class NodeStateWaiter implements Communicator.Waiter<GetNodeStateRequest> {
+ @Override
+ public void done(GetNodeStateRequest reply) {
+ synchronized (monitor) {
+ replies.add(reply);
+ monitor.notifyAll();
+ }
+ }
+ }
+
+ private final NodeStateWaiter waiter = new NodeStateWaiter();
+
+ private final EventLog eventLog;
+ private int maxSlobrokDisconnectGracePeriod = 1000;
+ private long nodeStateRequestTimeoutMS = 10 * 1000;
+
+ public NodeStateGatherer(Object monitor, Timer timer, EventLog log) {
+ this.monitor = monitor;
+ this.timer = timer;
+ this.eventLog = log;
+ }
+
+ public void setMaxSlobrokDisconnectGracePeriod(int millisecs) { maxSlobrokDisconnectGracePeriod = millisecs; }
+
+ public void setNodeStateRequestTimeout(long millisecs) { nodeStateRequestTimeoutMS = millisecs; }
+
+ /**
+ * Sends state requests to nodes that does not have one pending and is due
+ * for another attempt.
+ */
+ public boolean sendMessages(ContentCluster cluster, Communicator communicator, NodeStateOrHostInfoChangeHandler listener) {
+ boolean sentAnyMessages = false;
+ long currentTime = timer.getCurrentTimeInMillis();
+ for (NodeInfo info : cluster.getNodeInfo()) {
+ Long requestTime = info.getLatestNodeStateRequestTime();
+
+ if (requestTime != null && (currentTime - requestTime < nodeStateRequestTimeoutMS)) continue; // pending request
+ if (info.getTimeForNextStateRequestAttempt() > currentTime) continue; // too early
+
+ if (info.getRpcAddress() == null || info.isRpcAddressOutdated()) { // Cannot query state of node without RPC address
+ log.log(LogLevel.DEBUG, "Not sending getNodeState request to node " + info.getNode() + ": Not in slobrok");
+ NodeState reportedState = info.getReportedState().clone();
+ if (( ! reportedState.getState().equals(State.DOWN) && currentTime - info.getRpcAddressOutdatedTimestamp() > maxSlobrokDisconnectGracePeriod)
+ || reportedState.getState().equals(State.STOPPING)) // Don't wait for grace period if we expect node to be stopping
+ {
+ log.log(LogLevel.DEBUG, "Setting reported state to DOWN "
+ + (reportedState.getState().equals(State.STOPPING)
+ ? "as node completed stopping."
+ : "as node has been out of slobrok longer than " + maxSlobrokDisconnectGracePeriod + "."));
+ if (reportedState.getState().oneOf("iur") || ! reportedState.hasDescription()) {
+ StringBuilder sb = new StringBuilder().append("Set node down as it has been out of slobrok for ")
+ .append(currentTime - info.getRpcAddressOutdatedTimestamp()).append(" ms which is more than the max limit of ")
+ .append(maxSlobrokDisconnectGracePeriod).append(" ms.");
+ reportedState.setDescription(sb.toString());
+ }
+ reportedState.setState(State.DOWN);
+ listener.handleNewNodeState(info, reportedState.clone());
+ }
+ info.setReportedState(reportedState, currentTime); // Must reset it to null to get connection attempts counted
+ continue;
+ }
+
+ communicator.getNodeState(info, waiter);
+ sentAnyMessages = true;
+ }
+ return sentAnyMessages;
+ }
+
+ /** Reads replies to get node state requests and create events. */
+ public boolean processResponses(NodeStateOrHostInfoChangeHandler listener) {
+ boolean processedAnyResponses = false;
+ long currentTime = timer.getCurrentTimeInMillis();
+ synchronized(monitor) {
+ for(GetNodeStateRequest req : replies) {
+ processedAnyResponses = true;
+ NodeInfo info = req.getNodeInfo();
+
+ if (!info.isPendingGetNodeStateRequest(req)) {
+ log.log(LogLevel.DEBUG, "Ignoring getnodestate response from " + info.getNode()
+ + " as request replied to is not the most recent pending request.");
+ continue;
+ }
+
+ info.removePendingGetNodeStateRequest(req);
+
+ GetNodeStateRequest.Reply reply = req.getReply();
+
+ if (reply.isError()) {
+ if (reply.getReturnCode() != ErrorCode.ABORT) {
+ NodeState newState = handleError(req, info, currentTime);
+ if (newState != null) {
+ listener.handleNewNodeState(info, newState.clone());
+ info.setReportedState(newState, currentTime);
+ } else {
+ log.log(LogLevel.DEBUG, "Ignoring get node state error. Need to resend");
+ }
+ } else {
+ log.log(LogLevel.DEBUG, "Ignoring getnodestate response from " + info.getNode() + " as it was aborted by client");
+ }
+
+ continue;
+ }
+
+ try {
+ NodeState state = NodeState.deserialize(info.getNode().getType(), reply.getStateString());
+
+ // For version 0 responses, we poll, so we likely have not altered the state
+ if ( ! state.equals(info.getReportedState()))
+ listener.handleNewNodeState(info, state.clone());
+ info.setReportedState(state, currentTime);
+ } catch (Exception e) {
+ log.log(LogLevel.WARNING, "Failed to process get node state response", e);
+ info.setReportedState(new NodeState(info.getNode().getType(), State.DOWN), currentTime);
+ }
+
+ // Important: The old host info should be accessible in info.getHostInfo(), see interface.
+ // Therefore, setHostInfo() must be called AFTER handleUpdatedHostInfo().
+ HostInfo hostInfo = HostInfo.createHostInfo(reply.getHostInfo());
+ listener.handleUpdatedHostInfo(info, hostInfo);
+ info.setHostInfo(hostInfo);
+
+ }
+ replies.clear();
+ }
+ return processedAnyResponses;
+ }
+
+ private NodeState handleError(GetNodeStateRequest req, NodeInfo info, long currentTime) {
+ String prefix = "Failed get node state request: ";
+ NodeState newState = new NodeState(info.getNode().getType(), State.DOWN);
+ if (req.getReply().getReturnCode() == ErrorCode.TIMEOUT) {
+ String msg = "RPC timeout";
+ if (info.getReportedState().getState().oneOf("ui")) {
+ eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + "RPC timeout talking to node.", NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
+ } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
+ log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
+ }
+ newState.setDescription(msg);
+ } else if (req.getReply().getReturnCode() == ErrorCode.CONNECTION) {
+ Target target = info.lastRequestInfoConnection;
+ Exception reason = (target == null ? null : target.getConnectionLostReason());
+ if (reason != null) {
+ String msg = reason.getMessage();
+ if (msg == null) msg = "(null)";
+ newState.setDescription(msg);
+ if (msg.equals("Connection refused")) {
+ msg = "Connection error: Connection refused";
+ if (info.getReportedState().getState().oneOf("ui")) {
+ eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
+ } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
+ log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode()
+ + " " + req.getReply().getReturnMessage() + ": " + msg);
+ }
+ newState.setState(State.DOWN);
+ } else if (msg.equals("jrt: Connection closed by peer") || msg.equals("Connection reset by peer")) {
+ msg = "Connection error: Closed at other end. (Node or switch likely shut down)";
+ if (info.isRpcAddressOutdated()) {
+ msg += " Node is no longer in slobrok.";
+ }
+ if (info.getReportedState().getState().oneOf("ui")) {
+ eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
+ } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
+ log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
+ }
+ newState.setState(State.DOWN).setDescription(msg);
+ } else if (msg.equals("Connection timed out")) {
+ if (info.getReportedState().getState().oneOf("ui")) {
+ msg = "Connection error: Timeout";
+ eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
+ } else {
+ log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
+ }
+ } else {
+ msg = "Connection error: " + reason;
+ if (info.getReportedState().getState().oneOf("ui")) {
+ eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING);
+ } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
+ log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
+ }
+ newState.setDescription(msg);
+ }
+ } else {
+ String msg = "Connection error: Unexpected error with no reason set. Assuming it is a network issue: " +
+ req.getReply().getReturnCode() + ": " + req.getReply().getReturnMessage();
+
+ if (info.getReportedState().getState().oneOf("ui")) {
+ eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING);
+ } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
+ log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
+ }
+ newState.setDescription(msg);
+ }
+ } else if (req.getReply().getReturnCode() == Communicator.TRANSIENT_ERROR) {
+ return null;
+ } else if (req.getReply().getReturnCode() == ErrorCode.NO_SUCH_METHOD) {
+ String msg = "no such RPC method error";
+ if (info.getReportedState().getState().oneOf("ui")) {
+ eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING);
+ } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
+ log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
+ }
+ newState.setState(State.DOWN).setDescription(msg + ": get node state");
+ } else if (req.getReply().getReturnCode() == 75004) {
+ String msg = "Node refused to answer RPC request and is likely stopping: " + req.getReply().getReturnMessage();
+ // The node is shutting down and is not accepting requests from anyone
+ if (info.getReportedState().getState().equals(State.STOPPING)) {
+ log.log(LogLevel.DEBUG, "Failed to get node state from " + info + " because it is still shutting down.");
+ } else {
+ if (info.getReportedState().getState().oneOf("ui")) {
+ eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
+ } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
+ log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
+ }
+ }
+ newState.setState(State.STOPPING).setDescription(msg);
+ } else {
+ String msg = "Got unexpected error, assumed to be node issue " + req.getReply().getReturnCode() + ": " + req.getReply().getReturnMessage();
+ if (info.getReportedState().getState().oneOf("ui")) {
+ eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING);
+ } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
+ log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
+ }
+ newState.setState(State.DOWN).setDescription(msg);
+ }
+ return newState;
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RealTimer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RealTimer.java
new file mode 100644
index 00000000000..1a4aa23aa97
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RealTimer.java
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import java.util.Calendar;
+import java.util.Locale;
+import java.util.TimeZone;
+
+/**
+ * Implementation of timer used when running for real.
+ */
+public class RealTimer implements Timer {
+
+ public long getCurrentTimeInMillis() {
+ return System.currentTimeMillis();
+ }
+
+ public static String printDuration(long time) {
+ StringBuilder sb = new StringBuilder();
+ if (time > 1000 * 60 * 60 * 24 * 2) {
+ double days = time / (1000.0 * 60 * 60 * 24);
+ sb.append(String.format(Locale.ENGLISH, "%.2f", days)).append(" day").append(Math.abs(days - 1.0) < 0.0001 ? "" : "s");
+ } else if (time > 1000 * 60 * 60 * 2) {
+ double hours = time / (1000.0 * 60 * 60);
+ sb.append(String.format(Locale.ENGLISH, "%.2f", hours)).append(" hour").append(Math.abs(hours - 1.0) < 0.0001 ? "" : "s");
+ } else if (time > 1000 * 60 * 2) {
+ double minutes = time / (1000.0 * 60);
+ sb.append(String.format(Locale.ENGLISH, "%.2f", minutes)).append(" minute").append(Math.abs(minutes - 1.0) < 0.0001 ? "" : "s");
+ } else if (time > 1000 * 2) {
+ double seconds = time / (1000.0);
+ sb.append(String.format(Locale.ENGLISH, "%.2f", seconds)).append(" s");
+ } else {
+ sb.append(time).append(" ms");
+ }
+ return sb.toString();
+ }
+
+ public static String printDateNoMilliSeconds(long time, TimeZone tz) {
+ Calendar cal = Calendar.getInstance(tz);
+ cal.setTimeInMillis(time);
+ return String.format(Locale.ENGLISH, "%04d-%02d-%02d %02d:%02d:%02d",
+ cal.get(Calendar.YEAR),
+ cal.get(Calendar.MONTH) + 1,
+ cal.get(Calendar.DAY_OF_MONTH),
+ cal.get(Calendar.HOUR_OF_DAY),
+ cal.get(Calendar.MINUTE),
+ cal.get(Calendar.SECOND));
+ }
+
+ public static String printDate(long time, TimeZone tz) {
+ Calendar cal = Calendar.getInstance(tz);
+ cal.setTimeInMillis(time);
+ return String.format(Locale.ENGLISH, "%04d-%02d-%02d %02d:%02d:%02d.%03d",
+ cal.get(Calendar.YEAR),
+ cal.get(Calendar.MONTH) + 1,
+ cal.get(Calendar.DAY_OF_MONTH),
+ cal.get(Calendar.HOUR_OF_DAY),
+ cal.get(Calendar.MINUTE),
+ cal.get(Calendar.SECOND),
+ cal.get(Calendar.MILLISECOND));
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java
new file mode 100644
index 00000000000..ca7a46b2350
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+
+public abstract class RemoteClusterControllerTask {
+
+ public static class Context {
+ public ContentCluster cluster;
+ public ClusterState currentState;
+ public MasterInterface masterInfo;
+ public NodeStateOrHostInfoChangeHandler nodeStateOrHostInfoChangeHandler;
+ public NodeAddedOrRemovedListener nodeAddedOrRemovedListener;
+ }
+
+ private final Object monitor = new Object();
+ private boolean completed = false;
+
+ public abstract void doRemoteFleetControllerTask(Context context);
+
+ public boolean isCompleted() {
+ synchronized (monitor) {
+ return completed;
+ }
+ }
+
+ /** This is called by the fleet controller. */
+ public void notifyCompleted() {
+ synchronized (monitor) {
+ completed = true;
+ monitor.notifyAll();
+ }
+ }
+
+ public void waitForCompletion() {
+ synchronized (monitor) {
+ while (!completed) {
+ try{
+ monitor.wait();
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTaskScheduler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTaskScheduler.java
new file mode 100644
index 00000000000..7fddd690a9c
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTaskScheduler.java
@@ -0,0 +1,6 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+public interface RemoteClusterControllerTaskScheduler {
+ public void schedule(RemoteClusterControllerTask task);
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SetClusterStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SetClusterStateRequest.java
new file mode 100644
index 00000000000..ebbf35aee08
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SetClusterStateRequest.java
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+public abstract class SetClusterStateRequest {
+
+ private final NodeInfo nodeInfo;
+ private final int systemStateVersion;
+ private Reply reply;
+
+ public SetClusterStateRequest(NodeInfo nodeInfo, int systemStateVersion) {
+ this.nodeInfo = nodeInfo;
+ this.systemStateVersion = systemStateVersion;
+ }
+
+ public NodeInfo getNodeInfo() { return nodeInfo; }
+
+ public int getSystemStateVersion() { return systemStateVersion; }
+
+ public void setReply(Reply reply) { this.reply = reply; }
+
+ public Reply getReply() { return reply; }
+
+ public static class Reply {
+
+ final int returnCode;
+ final String returnMessage;
+
+ public Reply() {
+ this(0, null);
+ }
+
+ public Reply(int returnCode, String returnMessage) {
+ this.returnCode = returnCode;
+ this.returnMessage = returnMessage;
+ }
+
+ /** Returns whether this is an error response */
+ public boolean isError() { return returnCode != 0; }
+
+ /** Returns the return code, which is 0 if this request was successful */
+ public int getReturnCode() { return returnCode; }
+
+ /** Returns the message returned, or null if none */
+ public String getReturnMessage() { return returnMessage; }
+
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodes.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodes.java
new file mode 100644
index 00000000000..a7e34c7321f
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodes.java
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import java.util.Map;
+
+/**
+ * Contains stats for a set of storage nodes. This is used to store the stats returned
+ * by Distributors from their getnodestate RPCs. The stats for a single storage node
+ * is represented by the StorageNodeStats class.
+ *
+ * @author hakon
+ */
+public class StatsForStorageNodes {
+
+ final private Map<Integer, StorageNodeStats> storageNodesByIndex;
+
+ StatsForStorageNodes(Map<Integer, StorageNodeStats> storageNodesByIndex) {
+ this.storageNodesByIndex = storageNodesByIndex;
+ }
+
+ StorageNodeStats getStatsForStorageNode(int nodeIndex) {
+ return storageNodesByIndex.get(nodeIndex);
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageMergeStats.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageMergeStats.java
new file mode 100644
index 00000000000..2c719f9fb3c
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageMergeStats.java
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Class for storing the pending merge operation stats for all the storage nodes.
+ *
+ * @author hakon
+ * @since 5.34
+ */
+public class StorageMergeStats implements Iterable<NodeMergeStats> {
+
+ // Maps a storage node index to the storage node's pending merges stats.
+ private final Map<Integer, NodeMergeStats> mapToNodeStats;
+
+ public StorageMergeStats(Set<Integer> storageNodes) {
+ mapToNodeStats = new HashMap<>(storageNodes.size());
+ for (Integer index : storageNodes) {
+ mapToNodeStats.put(index, new NodeMergeStats(index));
+ }
+ }
+
+ public StorageMergeStats(Map<Integer, NodeMergeStats> mapToNodeStats) {
+ this.mapToNodeStats = mapToNodeStats;
+ }
+
+ @Override
+ public Iterator<NodeMergeStats> iterator() {
+ return mapToNodeStats.values().iterator();
+ }
+
+ NodeMergeStats getStorageNode(Integer index) {
+ return mapToNodeStats.get(index);
+ }
+
+ int size() {
+ return mapToNodeStats.size();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof StorageMergeStats)) {
+ return false;
+ }
+
+ StorageMergeStats that = (StorageMergeStats) o;
+
+ if (mapToNodeStats != null ? !mapToNodeStats.equals(that.mapToNodeStats) : that.mapToNodeStats != null) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return mapToNodeStats != null ? mapToNodeStats.hashCode() : 0;
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeInfo.java
new file mode 100644
index 00000000000..86f8be36a9d
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeInfo.java
@@ -0,0 +1,20 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeType;
+
+/**
+ * Class encapsulating what the Cluster Controller knows about a storage node. Most of the information is
+ * common between Storage- and Distributor- nodes, and stored in the base class NodeInfo.
+ *
+ * @author hakon
+ */
+public class StorageNodeInfo extends NodeInfo {
+
+ public StorageNodeInfo(ContentCluster cluster, int index, boolean configuredRetired, String rpcAddress, Distribution distribution) {
+ super(cluster, new Node(NodeType.STORAGE, index), configuredRetired, rpcAddress, distribution);
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStats.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStats.java
new file mode 100644
index 00000000000..c46e489453b
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStats.java
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+/**
+ * Contains stats related to a single storage node.
+ *
+ * @author hakon
+ */
+public class StorageNodeStats {
+
+ final private LatencyStats distributorPutLatency;
+
+ /**
+ * @param distributorPutLatency The "put" latency from the point of view of the distributor.
+ */
+ public StorageNodeStats(LatencyStats distributorPutLatency) { this.distributorPutLatency = distributorPutLatency; }
+ public LatencyStats getDistributorPutLatency() { return distributorPutLatency; }
+ public void add(StorageNodeStats statsToAdd) {
+ distributorPutLatency.add(statsToAdd.distributorPutLatency);
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainer.java
new file mode 100644
index 00000000000..bb01bf80d77
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainer.java
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Contains stats for a set of storage nodes. This is used to store the stats returned
+ * by Distributors from their getnodestate RPCs. The stats for a single storage node
+ * is represented by the StorageNodeStats class.
+ *
+ * @author hakon
+ */
+public class StorageNodeStatsContainer {
+
+ final private Map<Integer, StorageNodeStats> storageNodesByIndex = new HashMap<>();
+
+ public void put(int nodeIndex, StorageNodeStats nodeStats) {
+ storageNodesByIndex.put(nodeIndex, nodeStats);
+ }
+
+ public StorageNodeStats get(int nodeIndex) {
+ return storageNodesByIndex.get(nodeIndex);
+ }
+
+ public int size() { return storageNodesByIndex.size(); }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java
new file mode 100644
index 00000000000..0f3bc6ada88
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java
@@ -0,0 +1,171 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+public class SystemStateBroadcaster {
+
+ public static Logger log = Logger.getLogger(SystemStateBroadcaster.class.getName());
+
+ private final Timer timer;
+ private final Object monitor;
+ private ClusterState systemState;
+ private final List<SetClusterStateRequest> replies = new LinkedList<>();
+
+ private final static long minTimeBetweenNodeErrorLogging = 10 * 60 * 1000;
+ private final Map<Node, Long> lastErrorReported = new TreeMap<>();
+ private int lastClusterStateInSync = 0;
+
+ private final ClusterStateWaiter waiter = new ClusterStateWaiter();
+
+ public SystemStateBroadcaster(Timer timer, Object monitor) {
+ this.timer = timer;
+ this.monitor = monitor;
+ }
+
+ public void handleNewSystemState(ClusterState state) {
+ systemState = state;
+ }
+
+ public ClusterState getClusterState() {
+ return systemState;
+ }
+
+ private void reportNodeError(boolean nodeOk, NodeInfo info, String message) {
+ long time = timer.getCurrentTimeInMillis();
+ Long lastReported = lastErrorReported.get(info.getNode());
+ boolean alreadySeen = (lastReported != null && time - lastReported < minTimeBetweenNodeErrorLogging);
+ log.log(nodeOk && !alreadySeen ? LogLevel.WARNING : LogLevel.DEBUG, message);
+ if (!alreadySeen) lastErrorReported.put(info.getNode(), time);
+ }
+
+ public boolean processResponses() {
+ boolean anyResponsesFound = false;
+ synchronized(monitor) {
+ for(SetClusterStateRequest req : replies) {
+ anyResponsesFound = true;
+
+ NodeInfo info = req.getNodeInfo();
+ boolean nodeOk = info.getReportedState().getState().oneOf("uir");
+ int version = req.getSystemStateVersion();
+
+ if (req.getReply().isError()) {
+ if (req.getReply().getReturnCode() != Communicator.TRANSIENT_ERROR) {
+ info.setSystemStateVersionAcknowledged(version, false);
+ if (info.getNewestSystemStateVersionSent() == version) {
+ reportNodeError(nodeOk, info,
+ "Got error response " + req.getReply().getReturnCode() + ": " + req.getReply().getReturnMessage()
+ + " from " + info + " setsystemstate request.");
+ }
+ }
+ } else {
+ info.setSystemStateVersionAcknowledged(version, true);
+ log.log(LogLevel.DEBUG, "Node " + info + " acked system state version " + version + ".");
+ lastErrorReported.remove(info.getNode());
+ }
+ }
+ replies.clear();
+ }
+ return anyResponsesFound;
+ }
+
+ private boolean nodeNeedsClusterState(NodeInfo node) {
+ if (node.getSystemStateVersionAcknowledged() == systemState.getVersion()) {
+ return false; // No point in sending if node already has updated system state
+ }
+ if (node.getRpcAddress() == null || node.isRpcAddressOutdated()) {
+ return false; // Can't set state on nodes we don't know where are
+ }
+ if (node.getReportedState().getState() == State.MAINTENANCE ||
+ node.getReportedState().getState() == State.DOWN ||
+ node.getReportedState().getState() == State.STOPPING)
+ {
+ return false; // No point in sending system state to nodes that can't receive messages or don't want them
+ }
+ if (node.getNewestSystemStateVersionSent() == systemState.getVersion()) {
+ return false; // No point in sending if we already have done so
+ }
+ return true;
+ }
+
+ private List<NodeInfo> resolveStateVersionSendSet(DatabaseHandler.Context dbContext) {
+ return dbContext.getCluster().getNodeInfo().stream()
+ .filter(this::nodeNeedsClusterState)
+ .collect(Collectors.toList());
+ }
+
+ public boolean broadcastNewState(DatabaseHandler database,
+ DatabaseHandler.Context dbContext,
+ Communicator communicator,
+ FleetController fleetController) throws InterruptedException {
+ if (systemState == null) return false;
+
+ List<NodeInfo> recipients = resolveStateVersionSendSet(dbContext);
+ // Store new version in ZooKeeper _before_ publishing to any nodes so that a
+ // cluster controller crash after publishing but before a successful ZK store
+ // will not risk reusing the same version number.
+ if (!recipients.isEmpty() && !systemState.isOfficial()) {
+ database.saveLatestSystemStateVersion(dbContext, systemState.getVersion());
+ systemState.setOfficial(true);
+ }
+
+ boolean anyOutdatedDistributorNodes = false;
+ for (NodeInfo node : recipients) {
+ if (node.isDistributor()) {
+ anyOutdatedDistributorNodes = true;
+ }
+ if (nodeNeedsToObserveStartupTimestamps(node)) {
+ ClusterState newState = buildModifiedClusterState(dbContext);
+ log.log(LogLevel.DEBUG, "Sending modified system state version " + systemState.getVersion()
+ + " to node " + node + ": " + newState);
+ communicator.setSystemState(newState, node, waiter);
+ } else {
+ log.log(LogLevel.DEBUG, "Sending system state version " + systemState.getVersion() + " to node " + node
+ + ". (went down time " + node.getWentDownWithStartTime() + ", node start time " + node.getStartTimestamp() + ")");
+ communicator.setSystemState(systemState, node, waiter);
+ }
+ }
+
+ if (!anyOutdatedDistributorNodes && systemState.getVersion() > lastClusterStateInSync) {
+ log.log(LogLevel.DEBUG, "All distributors have newest clusterstate, updating start timestamps in zookeeper and clearing them from cluster state");
+ lastClusterStateInSync = systemState.getVersion();
+ fleetController.handleAllDistributorsInSync(database, dbContext);
+ }
+ return !recipients.isEmpty();
+ }
+
+ private boolean nodeNeedsToObserveStartupTimestamps(NodeInfo node) {
+ return node.getStartTimestamp() != 0 && node.getWentDownWithStartTime() == node.getStartTimestamp();
+ }
+
+ private ClusterState buildModifiedClusterState(DatabaseHandler.Context dbContext) {
+ ClusterState newState = systemState.clone();
+ for (NodeInfo n : dbContext.getCluster().getNodeInfo()) {
+ NodeState ns = newState.getNodeState(n.getNode());
+ if (!n.isDistributor() && ns.getStartTimestamp() == 0) {
+ ns.setStartTimestamp(n.getStartTimestamp());
+ newState.setNodeState(n.getNode(), ns);
+ }
+ }
+ return newState;
+ }
+
+ private class ClusterStateWaiter implements Communicator.Waiter<SetClusterStateRequest> {
+ @Override
+ public void done(SetClusterStateRequest reply) {
+ synchronized (monitor) {
+ replies.add(reply);
+ }
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java
new file mode 100644
index 00000000000..5cf88b68f29
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateGenerator.java
@@ -0,0 +1,771 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.Spec;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener;
+
+import java.util.*;
+import java.util.logging.Logger;
+import java.text.ParseException;
+import java.util.stream.Collectors;
+
+/**
+ * This class get node state updates and uses them to decide the cluster state.
+ */
+ // TODO: Remove all current state from this and make it rely on state from ClusterInfo instead
+public class SystemStateGenerator {
+
+ private static Logger log = Logger.getLogger(SystemStateGenerator.class.getName());
+
+ private final Timer timer;
+ private final EventLogInterface eventLog;
+ private ClusterStateView currentClusterStateView;
+ private ClusterStateView nextClusterStateView;
+ private boolean nextStateViewChanged = false;
+ private boolean isMaster = false;
+
+ private Map<NodeType, Integer> maxTransitionTime = new TreeMap<>();
+ private int maxInitProgressTime = 5000;
+ private int maxPrematureCrashes = 4;
+ private long stableStateTimePeriod = 60 * 60 * 1000;
+ private static final int maxHistorySize = 50;
+ private Set<ConfiguredNode> nodes;
+ private Map<Integer, String> hostnames = new HashMap<>();
+ private int minDistributorNodesUp = 1;
+ private int minStorageNodesUp = 1;
+ private double minRatioOfDistributorNodesUp = 0.50;
+ private double minRatioOfStorageNodesUp = 0.50;
+ private int maxSlobrokDisconnectGracePeriod = 1000;
+ private int idealDistributionBits = 16;
+ private static final boolean disableUnstableNodes = true;
+
+ private final LinkedList<SystemStateHistoryEntry> systemStateHistory = new LinkedList<>();
+
+ /**
+ * @param metricUpdater may be null, in which case no metrics will be recorded.
+ */
+ public SystemStateGenerator(Timer timer, EventLogInterface eventLog, MetricUpdater metricUpdater) {
+ try {
+ currentClusterStateView = ClusterStateView.create("", metricUpdater);
+ nextClusterStateView = ClusterStateView.create("", metricUpdater);
+ } catch (ParseException e) {
+ throw new RuntimeException("Parsing empty string should always work");
+ }
+ this.timer = timer;
+ this.eventLog = eventLog;
+ maxTransitionTime.put(NodeType.DISTRIBUTOR, 5000);
+ maxTransitionTime.put(NodeType.STORAGE, 5000);
+ }
+
+ public void handleAllDistributorsInSync(DatabaseHandler database,
+ DatabaseHandler.Context dbContext) throws InterruptedException {
+ int startTimestampsReset = 0;
+ for (NodeType nodeType : NodeType.getTypes()) {
+ for (ConfiguredNode configuredNode : nodes) {
+ Node node = new Node(nodeType, configuredNode.index());
+ NodeInfo nodeInfo = dbContext.getCluster().getNodeInfo(node);
+ NodeState nodeState = nextClusterStateView.getClusterState().getNodeState(node);
+ if (nodeInfo != null && nodeState != null) {
+ if (nodeState.getStartTimestamp() > nodeInfo.getStartTimestamp()) {
+ log.log(LogLevel.DEBUG, "Storing away new start timestamp for node " + node);
+ nodeInfo.setStartTimestamp(nodeState.getStartTimestamp());
+ }
+ if (nodeState.getStartTimestamp() > 0) {
+ log.log(LogLevel.DEBUG, "Resetting timestamp in cluster state for node " + node);
+ nodeState.setStartTimestamp(0);
+ nextClusterStateView.getClusterState().setNodeState(node, nodeState);
+ ++startTimestampsReset;
+ }
+ } else {
+ log.log(LogLevel.DEBUG, node + ": " +
+ (nodeInfo == null ? "null" : nodeInfo.getStartTimestamp()) + ", " +
+ (nodeState == null ? "null" : nodeState.getStartTimestamp()));
+ }
+ }
+ }
+ if (startTimestampsReset > 0) {
+ eventLog.add(new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE, "Reset " + startTimestampsReset +
+ " start timestamps as all available distributors have seen newest cluster state.", timer.getCurrentTimeInMillis()));
+ nextStateViewChanged = true;
+ database.saveStartTimestamps(dbContext);
+ } else {
+ log.log(LogLevel.DEBUG, "Found no start timestamps to reset in cluster state.");
+ }
+ }
+
+ public void setMaxTransitionTime(Map<NodeType, Integer> map) { maxTransitionTime = map; }
+ public void setMaxInitProgressTime(int millisecs) { maxInitProgressTime = millisecs; }
+ public void setMaxPrematureCrashes(int count) { maxPrematureCrashes = count; }
+ public void setStableStateTimePeriod(long millisecs) { stableStateTimePeriod = millisecs; }
+
+ public ClusterStateView currentClusterStateView() { return currentClusterStateView; }
+
+ /** Returns an immutable list of the historical states this has generated */
+ public List<SystemStateHistoryEntry> systemStateHistory() {
+ return Collections.unmodifiableList(systemStateHistory);
+ }
+
+ public void setMinNodesUp(int minDistNodes, int minStorNodes, double minDistRatio, double minStorRatio) {
+ minDistributorNodesUp = minDistNodes;
+ minStorageNodesUp = minStorNodes;
+ minRatioOfDistributorNodesUp = minDistRatio;
+ minRatioOfStorageNodesUp = minStorRatio;
+ nextStateViewChanged = true; // ... maybe
+ }
+
+ /** Sets the nodes of this and attempts to keep the node state in sync */
+ public void setNodes(ClusterInfo newClusterInfo) {
+ this.nodes = new HashSet<>(newClusterInfo.getConfiguredNodes().values());
+
+ // Nodes that are removed from config will be automatically marked as DOWN
+ // in the cluster state by createNextVersionOfClusterStateView, ensuring
+ // that these are not carried over into new cluster states.
+
+ for (ConfiguredNode node : this.nodes) {
+ NodeInfo newNodeInfo = newClusterInfo.getStorageNodeInfo(node.index());
+ NodeState currentState = currentClusterStateView.getClusterState().getNodeState(new Node(NodeType.STORAGE, node.index()));
+ if (currentState.getState() == State.RETIRED || currentState.getState() == State.UP) { // then correct to configured state
+ proposeNewNodeState(newNodeInfo, new NodeState(NodeType.STORAGE, node.retired() ? State.RETIRED : State.UP));
+ }
+ }
+ nextStateViewChanged = true;
+ }
+
+ public void setMaster(boolean isMaster) {
+ this.isMaster = isMaster;
+ }
+ public void setMaxSlobrokDisconnectGracePeriod(int millisecs) { maxSlobrokDisconnectGracePeriod = millisecs; }
+
+ public void setDistributionBits(int bits) {
+ if (bits == idealDistributionBits) return;
+ idealDistributionBits = bits;
+ int currentDistributionBits = calculateMinDistributionBitCount();
+ if (currentDistributionBits != nextClusterStateView.getClusterState().getDistributionBitCount()) {
+ nextClusterStateView.getClusterState().setDistributionBits(currentDistributionBits);
+ nextStateViewChanged = true;
+ }
+ }
+
+ public int getDistributionBits() { return idealDistributionBits; }
+
+ public int calculateMinDistributionBitCount() {
+ int currentDistributionBits = idealDistributionBits;
+ int minNode = -1;
+ for (ConfiguredNode node : nodes) {
+ NodeState ns = nextClusterStateView.getClusterState().getNodeState(new Node(NodeType.STORAGE, node.index()));
+ if (ns.getState().oneOf("iur")) {
+ if (ns.getMinUsedBits() < currentDistributionBits) {
+ currentDistributionBits = ns.getMinUsedBits();
+ minNode = node.index();
+ }
+ }
+ }
+ if (minNode == -1) {
+ log.log(LogLevel.DEBUG, "Distribution bit count should still be default as all available nodes have at least split to " + idealDistributionBits + " bits");
+ } else {
+ log.log(LogLevel.DEBUG, "Distribution bit count is limited to " + currentDistributionBits + " due to storage node " + minNode);
+ }
+ return currentDistributionBits;
+ }
+
+ public ClusterState getClusterState() { return currentClusterStateView.getClusterState(); }
+
+ /**
+ * Return the current cluster state, but if the cluster is down, modify the node states with the
+ * actual node states from the temporary next state.
+ */
+ public ClusterState getConsolidatedClusterState() {
+ ClusterState currentState = currentClusterStateView.getClusterState();
+ if (currentState.getClusterState().equals(State.UP)) {
+ return currentState;
+ }
+
+ ClusterState nextState = nextClusterStateView.getClusterState();
+ if (!currentState.getClusterState().equals(nextState.getClusterState())) {
+ log.warning("Expected current cluster state object to have same global state as the under creation instance.");
+ }
+ ClusterState state = nextState.clone();
+ state.setVersion(currentState.getVersion());
+ state.setOfficial(false);
+ return state;
+ }
+
+ private Event getDownDueToTooFewNodesEvent() {
+ Event clusterEvent = null;
+ int upStorageCount = 0, upDistributorCount = 0;
+ int dcount = nodes.size();
+ int scount = nodes.size();
+ for (NodeType type : NodeType.getTypes()) {
+ for (ConfiguredNode node : nodes) {
+ NodeState ns = nextClusterStateView.getClusterState().getNodeState(new Node(type, node.index()));
+ if (ns.getState() == State.UP || ns.getState() == State.RETIRED || ns.getState() == State.INITIALIZING) {
+ if (type.equals(NodeType.STORAGE))
+ ++upStorageCount;
+ else
+ ++upDistributorCount;
+ }
+ }
+ }
+
+ long timeNow = timer.getCurrentTimeInMillis();
+ if (upStorageCount < minStorageNodesUp) {
+ clusterEvent = new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
+ "Less than " + minStorageNodesUp + " storage nodes available (" + upStorageCount + "). Setting cluster state down.",
+ timeNow);
+ }
+ if (upDistributorCount < minDistributorNodesUp) {
+ clusterEvent = new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
+ "Less than " + minDistributorNodesUp + " distributor nodes available (" + upDistributorCount + "). Setting cluster state down.",
+ timeNow);
+ }
+ if (minRatioOfStorageNodesUp * scount > upStorageCount) {
+ clusterEvent = new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
+ "Less than " + (100 * minRatioOfStorageNodesUp) + " % of storage nodes are available ("
+ + upStorageCount + "/" + scount + "). Setting cluster state down.",
+ timeNow);
+ }
+ if (minRatioOfDistributorNodesUp * dcount > upDistributorCount) {
+ clusterEvent = new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
+ "Less than " + (100 * minRatioOfDistributorNodesUp) + " % of distributor nodes are available ("
+ + upDistributorCount + "/" + dcount + "). Setting cluster state down.",
+ timeNow);
+ }
+ return clusterEvent;
+ }
+
+ private ClusterStateView createNextVersionOfClusterStateView(Event clusterEvent) {
+ // If you change this method, see *) in notifyIfNewSystemState
+ ClusterStateView candidateClusterStateView = nextClusterStateView.cloneForNewState();
+ ClusterState candidateClusterState = candidateClusterStateView.getClusterState();
+
+ candidateClusterState.setClusterState(clusterEvent == null ? State.UP : State.DOWN);
+
+ int currentDistributionBits = calculateMinDistributionBitCount();
+ if (currentDistributionBits != nextClusterStateView.getClusterState().getDistributionBitCount()) {
+ candidateClusterState.setDistributionBits(currentDistributionBits);
+ }
+
+ Set<Integer> configuredIndices = this.nodes.stream().map(ConfiguredNode::index).collect(Collectors.toSet());
+
+ pruneNodesNotContainedInConfig(candidateClusterState, configuredIndices, NodeType.DISTRIBUTOR);
+ pruneNodesNotContainedInConfig(candidateClusterState, configuredIndices, NodeType.STORAGE);
+
+ return candidateClusterStateView;
+ }
+
+ private void pruneNodesNotContainedInConfig(ClusterState candidateClusterState,
+ Set<Integer> configuredIndices,
+ NodeType nodeType)
+ {
+ final int nodeCount = candidateClusterState.getNodeCount(nodeType);
+ for (int i = 0; i < nodeCount; ++i) {
+ final Node node = new Node(nodeType, i);
+ final NodeState currentState = candidateClusterState.getNodeState(node);
+ if (!configuredIndices.contains(i) && !currentState.getState().equals(State.DOWN)) {
+ log.log(LogLevel.INFO, "Removing node " + node + " from state as it is no longer present in config");
+ candidateClusterState.setNodeState(node, new NodeState(nodeType, State.DOWN));
+ }
+ }
+ }
+
+ private void recordNewClusterStateHasBeenChosen(
+ ClusterState currentClusterState, ClusterState newClusterState, Event clusterEvent) {
+ long timeNow = timer.getCurrentTimeInMillis();
+
+ if (!currentClusterState.getClusterState().equals(State.UP) &&
+ newClusterState.getClusterState().equals(State.UP)) {
+ eventLog.add(new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE,
+ "Enough nodes available for system to become up.", timeNow), isMaster);
+ } else if (currentClusterState.getClusterState().equals(State.UP) &&
+ ! newClusterState.getClusterState().equals(State.UP)) {
+ assert(clusterEvent != null);
+ eventLog.add(clusterEvent, isMaster);
+ }
+
+ if (newClusterState.getDistributionBitCount() != currentClusterState.getDistributionBitCount()) {
+ eventLog.add(new ClusterEvent(
+ ClusterEvent.Type.SYSTEMSTATE,
+ "Altering distribution bits in system from "
+ + currentClusterState.getDistributionBitCount() + " to " +
+ currentClusterState.getDistributionBitCount(),
+ timeNow), isMaster);
+ }
+
+ eventLog.add(new ClusterEvent(
+ ClusterEvent.Type.SYSTEMSTATE,
+ "New cluster state version " + newClusterState.getVersion() + ". Change from last: " +
+ currentClusterState.getTextualDifference(newClusterState),
+ timeNow), isMaster);
+
+ log.log(LogLevel.DEBUG, "Created new cluster state version: " + newClusterState.toString(true));
+ systemStateHistory.addFirst(new SystemStateHistoryEntry(newClusterState, timeNow));
+ if (systemStateHistory.size() > maxHistorySize) {
+ systemStateHistory.removeLast();
+ }
+ }
+
+ public boolean notifyIfNewSystemState(SystemStateListener stateListener) {
+ if ( ! nextStateViewChanged) return false;
+
+ Event clusterEvent = getDownDueToTooFewNodesEvent();
+ ClusterStateView newClusterStateView = createNextVersionOfClusterStateView(clusterEvent);
+ ClusterState newClusterState = newClusterStateView.getClusterState();
+
+ if (newClusterState.similarTo(currentClusterStateView.getClusterState())) {
+ log.log(LogLevel.DEBUG,
+ "State hasn't changed enough to warrant new cluster state. Not creating new state: " +
+ currentClusterStateView.getClusterState().getTextualDifference(newClusterState));
+ return false;
+ }
+
+ // Update the version of newClusterState now. This cannot be done prior to similarTo(),
+ // since it makes the cluster states different. From now on, the new cluster state is immutable.
+ newClusterState.setVersion(currentClusterStateView.getClusterState().getVersion() + 1);
+
+ recordNewClusterStateHasBeenChosen(currentClusterStateView.getClusterState(),
+ newClusterStateView.getClusterState(), clusterEvent);
+
+ // *) Ensure next state is still up to date.
+ // This should make nextClusterStateView a deep-copy of currentClusterStateView.
+ // If more than the distribution bits and state are deep-copied in
+ // createNextVersionOfClusterStateView(), we need to add corresponding statements here.
+ // This seems like a hack...
+ nextClusterStateView.getClusterState().setDistributionBits(newClusterState.getDistributionBitCount());
+ nextClusterStateView.getClusterState().setClusterState(newClusterState.getClusterState());
+
+ currentClusterStateView = newClusterStateView;
+ nextStateViewChanged = false;
+
+ stateListener.handleNewSystemState(currentClusterStateView.getClusterState());
+
+ return true;
+ }
+
+ public void setLatestSystemStateVersion(int version) {
+ currentClusterStateView.getClusterState().setVersion(Math.max(1, version));
+ nextStateViewChanged = true;
+ }
+
+ private void setNodeState(NodeInfo node, NodeState newState) {
+ NodeState oldState = nextClusterStateView.getClusterState().getNodeState(node.getNode());
+
+ // Correct UP to RETIRED if the node wants to be retired
+ if (newState.above(node.getWantedState()))
+ newState.setState(node.getWantedState().getState());
+
+ // Keep old description if a new one is not set and we're not going up or in initializing mode
+ if ( ! newState.getState().oneOf("ui") && oldState.hasDescription()) {
+ newState.setDescription(oldState.getDescription());
+ }
+
+ // Keep disk information if not set in new state
+ if (newState.getDiskCount() == 0 && oldState.getDiskCount() != 0) {
+ newState.setDiskCount(oldState.getDiskCount());
+ for (int i=0; i<oldState.getDiskCount(); ++i) {
+ newState.setDiskState(i, oldState.getDiskState(i));
+ }
+ }
+ if (newState.equals(oldState)) {
+ return;
+ }
+
+ eventLog.add(new NodeEvent(node, "Altered node state in cluster state from '" + oldState.toString(true)
+ + "' to '" + newState.toString(true) + "'.",
+ NodeEvent.Type.CURRENT, timer.getCurrentTimeInMillis()), isMaster);
+ nextClusterStateView.getClusterState().setNodeState(node.getNode(), newState);
+ nextStateViewChanged = true;
+ }
+
+ public void handleNewReportedNodeState(NodeInfo node, NodeState reportedState, NodeStateOrHostInfoChangeHandler nodeListener) {
+ ClusterState nextState = nextClusterStateView.getClusterState();
+ NodeState currentState = nextState.getNodeState(node.getNode());
+ log.log(currentState.equals(reportedState) && node.getVersion() == 0 ? LogLevel.SPAM : LogLevel.DEBUG,
+ "Got nodestate reply from " + node + ": "
+ + node.getReportedState().getTextualDifference(reportedState) + " (Current state is " + currentState.toString(true) + ")");
+ long currentTime = timer.getCurrentTimeInMillis();
+ if (reportedState.getState().equals(State.DOWN)) {
+ node.setTimeOfFirstFailingConnectionAttempt(currentTime);
+ }
+ if ( ! reportedState.similarTo(node.getReportedState())) {
+ if (reportedState.getState().equals(State.DOWN)) {
+ eventLog.addNodeOnlyEvent(new NodeEvent(node, "Failed to get node state: " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
+ } else {
+ eventLog.addNodeOnlyEvent(new NodeEvent(node, "Now reporting state " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), LogLevel.DEBUG);
+ }
+ }
+ if (reportedState.equals(node.getReportedState()) && ! reportedState.getState().equals(State.INITIALIZING))
+ return;
+
+ NodeState alteredState = decideNodeStateGivenReportedState(node, currentState, reportedState, nodeListener);
+ if (alteredState != null) {
+ ClusterState clusterState = currentClusterStateView.getClusterState();
+
+ if (alteredState.above(node.getWantedState())) {
+ log.log(LogLevel.DEBUG, "Cannot set node in state " + alteredState.getState() + " when wanted state is " + node.getWantedState());
+ alteredState.setState(node.getWantedState().getState());
+ }
+ if (reportedState.getStartTimestamp() > node.getStartTimestamp()) {
+ alteredState.setStartTimestamp(reportedState.getStartTimestamp());
+ } else {
+ alteredState.setStartTimestamp(0);
+ }
+ if (!alteredState.similarTo(currentState)) {
+ setNodeState(node, alteredState);
+ } else if (!alteredState.equals(currentState)) {
+ if (currentState.getState().equals(State.INITIALIZING) && alteredState.getState().equals(State.INITIALIZING) &&
+ Math.abs(currentState.getInitProgress() - alteredState.getInitProgress()) > 0.000000001)
+ {
+ log.log(LogLevel.DEBUG, "Only silently updating init progress for " + node + " in cluster state because new "
+ + "state is too similar to tag new version: " + currentState.getTextualDifference(alteredState));
+ currentState.setInitProgress(alteredState.getInitProgress());
+ nextState.setNodeState(node.getNode(), currentState);
+
+ NodeState currentNodeState = clusterState.getNodeState(node.getNode());
+ if (currentNodeState.getState().equals(State.INITIALIZING)) {
+ currentNodeState.setInitProgress(alteredState.getInitProgress());
+ clusterState.setNodeState(node.getNode(), currentNodeState);
+ }
+ } else if (alteredState.getMinUsedBits() != currentState.getMinUsedBits()) {
+ log.log(LogLevel.DEBUG, "Altering node state to reflect that min distribution bit count have changed from "
+ + currentState.getMinUsedBits() + " to " + alteredState.getMinUsedBits());
+ int oldCount = currentState.getMinUsedBits();
+ currentState.setMinUsedBits(alteredState.getMinUsedBits());
+ nextState.setNodeState(node.getNode(), currentState);
+ int minDistBits = calculateMinDistributionBitCount();
+ if (minDistBits < nextState.getDistributionBitCount()
+ || (nextState.getDistributionBitCount() < this.idealDistributionBits && minDistBits >= this.idealDistributionBits))
+ {
+ // If this will actually affect global cluster state.
+ eventLog.add(new NodeEvent(node, "Altered min distribution bit count from " + oldCount
+ + " to " + currentState.getMinUsedBits() + ". Updated cluster state.", NodeEvent.Type.CURRENT, currentTime), isMaster);
+ nextStateViewChanged = true;
+ } else {
+ log.log(LogLevel.DEBUG, "Altered min distribution bit count from " + oldCount
+ + " to " + currentState.getMinUsedBits() + ". No effect for cluster state with ideal " + this.idealDistributionBits
+ + ", new " + minDistBits + ", old " + nextState.getDistributionBitCount() + " though.");
+ clusterState.setNodeState(node.getNode(), currentState);
+ }
+ } else {
+ log.log(LogLevel.DEBUG, "Not altering state of " + node + " in cluster state because new state is too similar: "
+ + currentState.getTextualDifference(alteredState));
+ }
+ } else if (alteredState.getDescription().contains("Listing buckets")) {
+ currentState.setDescription(alteredState.getDescription());
+ nextState.setNodeState(node.getNode(), currentState);
+ NodeState currentNodeState = clusterState.getNodeState(node.getNode());
+ currentNodeState.setDescription(alteredState.getDescription());
+ clusterState.setNodeState(node.getNode(), currentNodeState);
+ }
+ }
+ }
+
+ public void handleNewNode(NodeInfo node) {
+ setHostName(node);
+ String message = "Found new node " + node + " in slobrok at " + node.getRpcAddress();
+ eventLog.add(new NodeEvent(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster);
+ }
+
+ public void handleMissingNode(NodeInfo node, NodeStateOrHostInfoChangeHandler nodeListener) {
+ removeHostName(node);
+
+ long timeNow = timer.getCurrentTimeInMillis();
+
+ if (node.getLatestNodeStateRequestTime() != null) {
+ eventLog.add(new NodeEvent(node, "Node is no longer in slobrok, but we still have a pending state request.", NodeEvent.Type.REPORTED, timeNow), isMaster);
+ } else {
+ eventLog.add(new NodeEvent(node, "Node is no longer in slobrok. No pending state request to node.", NodeEvent.Type.REPORTED, timeNow), isMaster);
+ }
+ if (node.getReportedState().getState().equals(State.STOPPING)) {
+ log.log(LogLevel.DEBUG, "Node " + node.getNode() + " is no longer in slobrok. Was in stopping state, so assuming it has shut down normally. Setting node down");
+ NodeState ns = node.getReportedState().clone();
+ ns.setState(State.DOWN);
+ handleNewReportedNodeState(node, ns.clone(), nodeListener);
+ node.setReportedState(ns, timer.getCurrentTimeInMillis()); // Must reset it to null to get connection attempts counted
+ } else {
+ log.log(LogLevel.DEBUG, "Node " + node.getNode() + " no longer in slobrok was in state " + node.getReportedState() + ". Waiting to see if it reappears in slobrok");
+ }
+ }
+
+ /**
+ * Propose a new state for a node. This may happen due to an administrator action, orchestration, or
+ * a configuration change.
+ */
+ public void proposeNewNodeState(NodeInfo node, NodeState proposedState) {
+ NodeState currentState = nextClusterStateView.getClusterState().getNodeState(node.getNode());
+ NodeState currentReported = node.getReportedState(); // TODO: Is there a reason to have both of this and the above?
+
+ NodeState newCurrentState = currentReported.clone();
+
+ newCurrentState.setState(proposedState.getState()).setDescription(proposedState.getDescription());
+
+ if (currentState.getState().equals(newCurrentState.getState())) return;
+
+ log.log(LogLevel.DEBUG, "Got new wanted nodestate for " + node + ": " + currentState.getTextualDifference(proposedState));
+ // Should be checked earlier before state was set in cluster
+ assert(newCurrentState.getState().validWantedNodeState(node.getNode().getType()));
+ long timeNow = timer.getCurrentTimeInMillis();
+ if (newCurrentState.above(currentReported)) {
+ eventLog.add(new NodeEvent(node, "Wanted state " + newCurrentState + ", but we cannot force node into that state yet as it is currently in " + currentReported, NodeEvent.Type.REPORTED, timeNow), isMaster);
+ return;
+ }
+ if ( ! newCurrentState.similarTo(currentState)) {
+ eventLog.add(new NodeEvent(node, "Node state set to " + newCurrentState + ".", NodeEvent.Type.WANTED, timeNow), isMaster);
+ }
+ setNodeState(node, newCurrentState);
+ }
+
+ public void handleNewRpcAddress(NodeInfo node) {
+ setHostName(node);
+ String message = "Node " + node + " has a new address in slobrok: " + node.getRpcAddress();
+ eventLog.add(new NodeEvent(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster);
+ }
+
+ public void handleReturnedRpcAddress(NodeInfo node) {
+ setHostName(node);
+ String message = "Node got back into slobrok with same address as before: " + node.getRpcAddress();
+ eventLog.add(new NodeEvent(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster);
+ }
+
+ private void setHostName(NodeInfo node) {
+ String rpcAddress = node.getRpcAddress();
+ if (rpcAddress == null) {
+ // This may happen if we haven't seen the node in Slobrok yet.
+ return;
+ }
+
+ Spec address = new Spec(rpcAddress);
+ if (address.malformed()) {
+ return;
+ }
+
+ hostnames.put(node.getNodeIndex(), address.host());
+ }
+
+ private void removeHostName(NodeInfo node) {
+ hostnames.remove(node.getNodeIndex());
+ }
+
+ public boolean watchTimers(ContentCluster cluster, NodeStateOrHostInfoChangeHandler nodeListener) {
+ boolean triggeredAnyTimers = false;
+ long currentTime = timer.getCurrentTimeInMillis();
+ for(NodeInfo node : cluster.getNodeInfo()) {
+ NodeState currentStateInSystem = nextClusterStateView.getClusterState().getNodeState(node.getNode());
+ NodeState lastReportedState = node.getReportedState();
+
+ // If we haven't had slobrok contact in a given amount of time and node is still not considered down,
+ // mark it down.
+ if (node.isRpcAddressOutdated()
+ && !lastReportedState.getState().equals(State.DOWN)
+ && node.getRpcAddressOutdatedTimestamp() + maxSlobrokDisconnectGracePeriod <= currentTime)
+ {
+ StringBuilder sb = new StringBuilder().append("Set node down as it has been out of slobrok for ")
+ .append(currentTime - node.getRpcAddressOutdatedTimestamp()).append(" ms which is more than the max limit of ")
+ .append(maxSlobrokDisconnectGracePeriod).append(" ms.");
+ node.abortCurrentNodeStateRequests();
+ NodeState state = lastReportedState.clone();
+ state.setState(State.DOWN);
+ if (!state.hasDescription()) state.setDescription(sb.toString());
+ eventLog.add(new NodeEvent(node, sb.toString(), NodeEvent.Type.CURRENT, currentTime), isMaster);
+ handleNewReportedNodeState(node, state.clone(), nodeListener);
+ node.setReportedState(state, currentTime);
+ triggeredAnyTimers = true;
+ }
+
+ // If node is still unavailable after transition time, mark it down
+ if (currentStateInSystem.getState().equals(State.MAINTENANCE)
+ && ( ! nextStateViewChanged || ! this.nextClusterStateView.getClusterState().getNodeState(node.getNode()).getState().equals(State.DOWN))
+ && node.getWantedState().above(new NodeState(node.getNode().getType(), State.DOWN))
+ && (lastReportedState.getState().equals(State.DOWN) || node.isRpcAddressOutdated())
+ && node.getTransitionTime() + maxTransitionTime.get(node.getNode().getType()) < currentTime)
+ {
+ eventLog.add(new NodeEvent(node, (currentTime - node.getTransitionTime())
+ + " milliseconds without contact. Marking node down.", NodeEvent.Type.CURRENT, currentTime), isMaster);
+ NodeState newState = new NodeState(node.getNode().getType(), State.DOWN).setDescription(
+ (currentTime - node.getTransitionTime()) + " ms without contact. Too long to keep in maintenance. Marking node down");
+ // Keep old description if there is one as it is likely closer to the cause of the problem
+ if (currentStateInSystem.hasDescription()) newState.setDescription(currentStateInSystem.getDescription());
+ setNodeState(node, newState);
+ triggeredAnyTimers = true;
+ }
+
+ // If node haven't increased its initializing progress within initprogresstime, mark it down.
+ if (!currentStateInSystem.getState().equals(State.DOWN)
+ && node.getWantedState().above(new NodeState(node.getNode().getType(), State.DOWN))
+ && lastReportedState.getState().equals(State.INITIALIZING)
+ && maxInitProgressTime != 0
+ && node.getInitProgressTime() + maxInitProgressTime <= currentTime
+ && node.getNode().getType().equals(NodeType.STORAGE))
+ {
+ eventLog.add(new NodeEvent(node, (currentTime - node.getInitProgressTime()) + " milliseconds "
+ + "without initialize progress. Marking node down."
+ + " Premature crash count is now " + (node.getPrematureCrashCount() + 1) + ".", NodeEvent.Type.CURRENT, currentTime), isMaster);
+ NodeState newState = new NodeState(node.getNode().getType(), State.DOWN).setDescription(
+ (currentTime - node.getInitProgressTime()) + " ms without initialize progress. Assuming node has deadlocked.");
+ setNodeState(node, newState);
+ handlePrematureCrash(node, nodeListener);
+ triggeredAnyTimers = true;
+ }
+ if (node.getUpStableStateTime() + stableStateTimePeriod <= currentTime
+ && lastReportedState.getState().equals(State.UP)
+ && node.getPrematureCrashCount() <= maxPrematureCrashes
+ && node.getPrematureCrashCount() != 0)
+ {
+ node.setPrematureCrashCount(0);
+ log.log(LogLevel.DEBUG, "Resetting premature crash count on node " + node + " as it has been up for a long time.");
+ triggeredAnyTimers = true;
+ } else if (node.getDownStableStateTime() + stableStateTimePeriod <= currentTime
+ && lastReportedState.getState().equals(State.DOWN)
+ && node.getPrematureCrashCount() <= maxPrematureCrashes
+ && node.getPrematureCrashCount() != 0)
+ {
+ node.setPrematureCrashCount(0);
+ log.log(LogLevel.DEBUG, "Resetting premature crash count on node " + node + " as it has been down for a long time.");
+ triggeredAnyTimers = true;
+ }
+ }
+ return triggeredAnyTimers;
+ }
+
+ private boolean isControlledShutdown(NodeState state) {
+ return (state.getState() == State.STOPPING && (state.getDescription().contains("Received signal 15 (SIGTERM - Termination signal)")
+ || state.getDescription().contains("controlled shutdown")));
+ }
+
+ /**
+ * Decide the state assigned to a new node given the state it reported
+ *
+ * @param node the node we are computing the state of
+ * @param currentState the current state of the node
+ * @param reportedState the new state reported by (or, in the case of down - inferred from) the node
+ * @param nodeListener this listener is notified for some of the system state changes that this will return
+ * @return the node node state, or null to keep the nodes current state
+ */
+ private NodeState decideNodeStateGivenReportedState(NodeInfo node, NodeState currentState, NodeState reportedState,
+ NodeStateOrHostInfoChangeHandler nodeListener) {
+ long timeNow = timer.getCurrentTimeInMillis();
+
+ log.log(LogLevel.DEBUG, "Finding new cluster state entry for " + node + " switching state " + currentState.getTextualDifference(reportedState));
+
+ // Set nodes in maintenance if 1) down, or 2) initializing but set retired, to avoid migrating data
+ // to the retired node while it is initializing
+ if (currentState.getState().oneOf("ur") && reportedState.getState().oneOf("dis")
+ && (node.getWantedState().getState().equals(State.RETIRED) || !reportedState.getState().equals(State.INITIALIZING)))
+ {
+ long currentTime = timer.getCurrentTimeInMillis();
+ node.setTransitionTime(currentTime);
+ if (node.getUpStableStateTime() + stableStateTimePeriod > currentTime && !isControlledShutdown(reportedState)) {
+ log.log(LogLevel.DEBUG, "Stable state: " + node.getUpStableStateTime() + " + " + stableStateTimePeriod + " > " + currentTime);
+ eventLog.add(new NodeEvent(node,
+ "Stopped or possibly crashed after " + (currentTime - node.getUpStableStateTime())
+ + " ms, which is before stable state time period."
+ + " Premature crash count is now " + (node.getPrematureCrashCount() + 1) + ".",
+ NodeEvent.Type.CURRENT,
+ timeNow), isMaster);
+ if (handlePrematureCrash(node, nodeListener)) return null;
+ }
+ if (maxTransitionTime.get(node.getNode().getType()) != 0) {
+ return new NodeState(node.getNode().getType(), State.MAINTENANCE).setDescription(reportedState.getDescription());
+ }
+ }
+
+ // If we got increasing initialization progress, reset initialize timer
+ if (reportedState.getState().equals(State.INITIALIZING) &&
+ (!currentState.getState().equals(State.INITIALIZING) ||
+ reportedState.getInitProgress() > currentState.getInitProgress()))
+ {
+ node.setInitProgressTime(timer.getCurrentTimeInMillis());
+ log.log(LogLevel.DEBUG, "Reset initialize timer on " + node + " to " + node.getInitProgressTime());
+ }
+
+ // If we get reverse initialize progress, mark node unstable, such that we don't mark it initializing again before it is up.
+ if (currentState.getState().equals(State.INITIALIZING) &&
+ (reportedState.getState().equals(State.INITIALIZING) && reportedState.getInitProgress() < currentState.getInitProgress()))
+ {
+ eventLog.add(new NodeEvent(node, "Stop or crash during initialization detected from reverse initializing progress."
+ + " Progress was " + currentState.getInitProgress() + " but is now " + reportedState.getInitProgress() + "."
+ + " Premature crash count is now " + (node.getPrematureCrashCount() + 1) + ".",
+ NodeEvent.Type.CURRENT, timeNow), isMaster);
+ return (handlePrematureCrash(node, nodeListener) ? null : new NodeState(node.getNode().getType(), State.DOWN).setDescription(
+ "Got reverse intialize progress. Assuming node have prematurely crashed"));
+ }
+
+ // If we go down while initializing, mark node unstable, such that we don't mark it initializing again before it is up.
+ if (currentState.getState().equals(State.INITIALIZING) && reportedState.getState().oneOf("ds") && !isControlledShutdown(reportedState))
+ {
+ eventLog.add(new NodeEvent(node, "Stop or crash during initialization."
+ + " Premature crash count is now " + (node.getPrematureCrashCount() + 1) + ".",
+ NodeEvent.Type.CURRENT, timeNow), isMaster);
+ return (handlePrematureCrash(node, nodeListener) ? null : new NodeState(node.getNode().getType(), State.DOWN).setDescription(reportedState.getDescription()));
+ }
+
+ // Ignore further unavailable states when node is set in maintenance
+ if (currentState.getState().equals(State.MAINTENANCE) && reportedState.getState().oneOf("dis"))
+ {
+ if (node.getWantedState().getState().equals(State.RETIRED) || !reportedState.getState().equals(State.INITIALIZING)
+ || reportedState.getInitProgress() <= NodeState.getListingBucketsInitProgressLimit() + 0.00001) {
+ log.log(LogLevel.DEBUG, "Ignoring down and initializing reports while in maintenance mode on " + node + ".");
+ return null;
+ }
+ }
+
+ // Hide initializing state if node has been unstable. (Not for distributors as these own buckets while initializing)
+ if ((currentState.getState().equals(State.DOWN) || currentState.getState().equals(State.UP)) &&
+ reportedState.getState().equals(State.INITIALIZING) && node.getPrematureCrashCount() > 0 &&
+ !node.isDistributor())
+ {
+ log.log(LogLevel.DEBUG, "Not setting " + node + " initializing again as it crashed prematurely earlier.");
+ return new NodeState(node.getNode().getType(), State.DOWN).setDescription("Not setting node back up as it failed prematurely at last attempt");
+ }
+ // Hide initializing state in cluster state if initialize progress is so low that we haven't listed buckets yet
+ if (!node.isDistributor() && reportedState.getState().equals(State.INITIALIZING) &&
+ reportedState.getInitProgress() <= NodeState.getListingBucketsInitProgressLimit() + 0.00001)
+ {
+ log.log(LogLevel.DEBUG, "Not setting " + node + " initializing in cluster state quite yet, as initializing progress still indicate it is listing buckets.");
+ return new NodeState(node.getNode().getType(), State.DOWN).setDescription("Listing buckets. Progress " + (100 * reportedState.getInitProgress()) + " %.");
+ }
+ return reportedState.clone();
+ }
+
+ public boolean handlePrematureCrash(NodeInfo node, NodeStateOrHostInfoChangeHandler changeListener) {
+ node.setPrematureCrashCount(node.getPrematureCrashCount() + 1);
+ if (disableUnstableNodes && node.getPrematureCrashCount() > maxPrematureCrashes) {
+ NodeState wantedState = new NodeState(node.getNode().getType(), State.DOWN)
+ .setDescription("Disabled by fleet controller as it prematurely shut down " + node.getPrematureCrashCount() + " times in a row");
+ NodeState oldState = node.getWantedState();
+ node.setWantedState(wantedState);
+ if ( ! oldState.equals(wantedState)) {
+ changeListener.handleNewWantedNodeState(node, wantedState);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ public void handleUpdatedHostInfo(NodeInfo nodeInfo, HostInfo hostInfo) {
+ // Only pass the host info to the latest cluster state view.
+ currentClusterStateView.handleUpdatedHostInfo(hostnames, nodeInfo, hostInfo);
+ }
+
+ public class SystemStateHistoryEntry {
+
+ private final ClusterState state;
+ private final long time;
+
+ SystemStateHistoryEntry(ClusterState state, long time) {
+ this.state = state;
+ this.time = time;
+ }
+
+ public ClusterState state() { return state; }
+
+ public long time() { return time; }
+
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Timer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Timer.java
new file mode 100644
index 00000000000..9d210b4f137
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Timer.java
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+/**
+ * Interface used to get time. This is separated into its own class, such that unit tests can fake timing to do timing related
+ * tests without relying on the speed of the unit test processing.
+ */
+public interface Timer {
+ public long getCurrentTimeInMillis();
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/config/.gitignore b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/config/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/config/.gitignore
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/Database.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/Database.java
new file mode 100644
index 00000000000..eb705e2c93c
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/Database.java
@@ -0,0 +1,85 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.database;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+
+import java.util.Map;
+
+/**
+ * This is an abstract class defining the functions needed by a database back end for the fleetcontroller.
+ */
+public abstract class Database {
+
+ /** Interface used for database to send events of stuff happening during requests. */
+ public interface DatabaseListener {
+ public void handleZooKeeperSessionDown();
+ public void handleMasterData(Map<Integer, Integer> data);
+ }
+
+ /**
+ * Used when initiating shutdown to avoid zookeeper layer reporting errors afterwards.
+ */
+ public abstract void stopErrorReporting();
+
+ /**
+ * Close this session, and release all resources it has used.
+ */
+ public abstract void close();
+
+ /**
+ * @return True if the database is closed, and cannot be used anymore.
+ */
+ public abstract boolean isClosed();
+
+ /**
+ * Set our vote for master election. Should always be set as this is the ephemeral node used for other
+ * fleetcontrollers to see that we are alive.
+ *
+ * @return True if request succeeded. False if not.
+ */
+ public abstract boolean storeMasterVote(int wantedMasterIndex) throws InterruptedException;
+
+ /**
+ * Store the latest system state version used. When the fleetcontroller makes a given version official it should
+ * store the version in the database, such that if another fleetcontroller takes over as master it will use a
+ * higher version system state.
+ *
+ * @return True if request succeeded. False if not.
+ */
+ public abstract boolean storeLatestSystemStateVersion(int version) throws InterruptedException;
+
+ /**
+ * Get the latest system state version used. To keep the version rising, a newly elected master will call this
+ * function to see at what index it should start.
+ *
+ * @return The last system state version used, or null if request failed.
+ */
+ public abstract Integer retrieveLatestSystemStateVersion() throws InterruptedException;
+
+ /**
+ * Save our current wanted states in the database. Typically called after processing an RPC request for altering
+ * a wanted state, or if the fleetcontroller decides to alter the wanted state itself.
+ *
+ * @return True if the request succeeded. False if not.
+ */
+ public abstract boolean storeWantedStates(Map<Node, NodeState> states) throws InterruptedException;
+
+ /**
+ * Read wanted states from the database and set wanted states for all nodes in the cluster accordingly.
+ * This function is typically called when one take over as master fleetcontroller.
+ *
+ * @return True if wanted states was altered, false if not. Null if request failed.
+ */
+ public abstract Map<Node, NodeState> retrieveWantedStates() throws InterruptedException;
+
+ /**
+ * Store start times of distributor and service layer nodes in zookeeper.
+ */
+ public abstract boolean storeStartTimestamps(Map<Node, Long> timestamps) throws InterruptedException;
+
+ /**
+ * Fetch the start times of distributor and service layer nodes.
+ */
+ public abstract Map<Node, Long> retrieveStartTimestamps() throws InterruptedException;
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
new file mode 100644
index 00000000000..a21ed994d5d
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
@@ -0,0 +1,417 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.database;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.FleetController;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.Timer;
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import org.apache.zookeeper.KeeperException;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.logging.Logger;
+
+/**
+ * Data store for the cluster controller.
+ * The data is stored and distributed by ZooKeeper.
+ */
+public class DatabaseHandler {
+
+ private static Logger log = Logger.getLogger(DatabaseHandler.class.getName());
+
+ public interface Context {
+ public ContentCluster getCluster();
+ public FleetController getFleetController();
+ public NodeAddedOrRemovedListener getNodeAddedOrRemovedListener();
+ public NodeStateOrHostInfoChangeHandler getNodeStateUpdateListener();
+ }
+
+ private class Data {
+ Integer masterVote;
+ Integer lastSystemStateVersion;
+ Map<Node, NodeState> wantedStates;
+ Map<Node, Long> startTimestamps;
+
+ void clear() {
+ masterVote = null;
+ lastSystemStateVersion = null;
+ wantedStates = null;
+ startTimestamps = null;
+ }
+ }
+ private class DatabaseListener implements Database.DatabaseListener {
+ public void handleZooKeeperSessionDown() {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Lost contact with zookeeper server");
+ synchronized(monitor) {
+ lostZooKeeperConnectionEvent = true;
+ monitor.notifyAll();
+ }
+ }
+
+ public void handleMasterData(Map<Integer, Integer> data) {
+ synchronized (monitor) {
+ if (masterDataEvent != null && masterDataEvent.equals(data)) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": New master data was the same as the last one. Not responding to it");
+ } else {
+ masterDataEvent = data;
+ }
+ monitor.notifyAll();
+ }
+ }
+ }
+
+ private final Timer timer;
+ private final int nodeIndex;
+ private final Object monitor;
+ private String zooKeeperAddress;
+ private int zooKeeperSessionTimeout = 5000;
+ private final Object databaseMonitor = new Object();
+
+ /** This is always ZooKeeperDatabase */
+ // TODO: Get rid of the interface as it is both unnecessary and gives a false impression of independence
+ private Database database;
+
+ private DatabaseListener dbListener = new DatabaseListener();
+ private final Data currentlyStored = new Data();
+ private final Data pendingStore = new Data();
+ private long lastZooKeeperConnectionAttempt = 0;
+ private static final int minimumWaitBetweenFailedConnectionAttempts = 10000;
+ private boolean lostZooKeeperConnectionEvent = false;
+ private Map<Integer, Integer> masterDataEvent = null;
+
+ public DatabaseHandler(Timer timer, String zooKeeperAddress, int ourIndex, Object monitor) throws InterruptedException
+ {
+ this.timer = timer;
+ this.nodeIndex = ourIndex;
+ pendingStore.masterVote = ourIndex; // To begin with we'll vote for ourselves.
+ this.monitor = monitor;
+ this.zooKeeperAddress = zooKeeperAddress;
+ }
+
+ private boolean isDatabaseClosedSafe() {
+ synchronized (databaseMonitor) {
+ return database == null || database.isClosed();
+ }
+ }
+
+ public void shutdown(FleetController fleetController) {
+ reset();
+ fleetController.lostDatabaseConnection();
+ }
+
+ public boolean isClosed() { return database == null || database.isClosed(); }
+
+ public void reset() {
+ final boolean wasRunning;
+ synchronized (databaseMonitor) {
+ wasRunning = database != null;
+ if (wasRunning) {
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Resetting database state");
+ database.close();
+ database = null;
+ }
+ }
+ clearSessionMetaData();
+
+ if (wasRunning) {
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Done resetting database state");
+ }
+ }
+
+ private void clearSessionMetaData() {
+ // Preserve who we want to vote for
+ Integer currentVote = (pendingStore.masterVote != null ? pendingStore.masterVote : currentlyStored.masterVote);
+ currentlyStored.clear();
+ pendingStore.clear();
+ pendingStore.masterVote = currentVote;
+ log.log(LogLevel.DEBUG, "Cleared session metadata. Pending master vote is now "
+ + pendingStore.masterVote);
+ }
+
+ public void setZooKeeperAddress(String address) {
+ if (address == null && zooKeeperAddress == null) return;
+ if (address != null && zooKeeperAddress != null && address.equals(zooKeeperAddress)) return;
+ if (zooKeeperAddress != null) {
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": " + (address == null ? "Stopped using ZooKeeper." : "Got new ZooKeeper address to use: " + address));
+ }
+ zooKeeperAddress = address;
+ reset();
+ }
+
+ public void setZooKeeperSessionTimeout(int timeout) {
+ if (timeout == zooKeeperSessionTimeout) return;
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Got new ZooKeeper session timeout of " + timeout + " milliseconds.");
+ zooKeeperSessionTimeout = timeout;
+ reset();
+ }
+
+ private boolean usingZooKeeper() { return (zooKeeperAddress != null); }
+
+ private void connect(ContentCluster cluster, long currentTime) throws InterruptedException {
+ try {
+ lastZooKeeperConnectionAttempt = currentTime;
+ synchronized (databaseMonitor) {
+ if (database != null) {
+ database.close();
+ }
+ // We still hold the database lock while calling this, we want to block callers.
+ clearSessionMetaData();
+ log.log(LogLevel.INFO,
+ "Fleetcontroller " + nodeIndex + ": Setting up new ZooKeeper session at " + zooKeeperAddress);
+ database = new ZooKeeperDatabase(cluster,
+ nodeIndex, zooKeeperAddress, zooKeeperSessionTimeout, dbListener);
+ }
+ } catch (KeeperException.NodeExistsException e) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Cannot create ephemeral fleetcontroller node. ZooKeeper server "
+ + "not seen old fleetcontroller instance disappear? It already exists. Will retry later: " + e.getMessage());
+ } catch (InterruptedException e) {
+ throw (InterruptedException) new InterruptedException("Interrupted").initCause(e);
+ } catch (KeeperException.ConnectionLossException e) {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to connect to ZooKeeper at " + zooKeeperAddress
+ + " with session timeout " + zooKeeperSessionTimeout + ": " + e.getMessage());
+ } catch (Exception e) {
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to connect to ZooKeeper at " + zooKeeperAddress
+ + " with session timeout " + zooKeeperSessionTimeout + ": " + sw);
+ }
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Done setting up new ZooKeeper session at " + zooKeeperAddress);
+ }
+
+ /**
+ * This is called to attempt the next task against ZooKeeper we want to try.
+ *
+ * @return true if we did or attempted any work.
+ */
+ public boolean doNextZooKeeperTask(Context context) throws InterruptedException {
+ boolean didWork = false;
+ synchronized (monitor) {
+ if (zooKeeperAddress == null) return false; // If not using zookeeper no work to be done
+ if (lostZooKeeperConnectionEvent) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": doNextZooKeeperTask(): lost connection");
+ context.getFleetController().lostDatabaseConnection();
+ lostZooKeeperConnectionEvent = false;
+ didWork = true;
+ if (masterDataEvent != null) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Had new master data queued on disconnect. Removing master data event");
+ masterDataEvent = null;
+ }
+ }
+ if (masterDataEvent != null) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": doNextZooKeeperTask(): new master data");
+ if (!masterDataEvent.containsKey(nodeIndex)) {
+ Integer currentVote = (pendingStore.masterVote != null ? pendingStore.masterVote : currentlyStored.masterVote);
+ assert(currentVote != null);
+ masterDataEvent.put(nodeIndex, currentVote);
+ }
+ context.getFleetController().handleFleetData(masterDataEvent);
+ masterDataEvent = null;
+ didWork = true;
+ }
+ }
+ if (isDatabaseClosedSafe()) {
+ long currentTime = timer.getCurrentTimeInMillis();
+ if (currentTime - lastZooKeeperConnectionAttempt < minimumWaitBetweenFailedConnectionAttempts) {
+ return false; // Not time to attempt connection yet.
+ }
+ didWork = true;
+ connect(context.getCluster(), currentTime);
+ }
+ synchronized (databaseMonitor) {
+ if (database == null || database.isClosed()) {
+ return didWork;
+ }
+ if (pendingStore.masterVote != null) {
+ didWork = true;
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Attempting to store master vote "
+ + pendingStore.masterVote + " into zookeeper.");
+ if (database.storeMasterVote(pendingStore.masterVote)) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Managed to store master vote "
+ + pendingStore.masterVote + " into zookeeper.");
+ currentlyStored.masterVote = pendingStore.masterVote;
+ pendingStore.masterVote = null;
+ } else {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to store master vote");
+ return didWork;
+ }
+ }
+ if (pendingStore.lastSystemStateVersion != null) {
+ didWork = true;
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex
+ + ": Attempting to store last system state version " + pendingStore.lastSystemStateVersion
+ + " into zookeeper.");
+ if (database.storeLatestSystemStateVersion(pendingStore.lastSystemStateVersion)) {
+ currentlyStored.lastSystemStateVersion = pendingStore.lastSystemStateVersion;
+ pendingStore.lastSystemStateVersion = null;
+ } else {
+ return didWork;
+ }
+ }
+ if (pendingStore.startTimestamps != null) {
+ didWork = true;
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Attempting to store "
+ + pendingStore.startTimestamps.size() + " start timestamps into zookeeper.");
+ if (database.storeStartTimestamps(pendingStore.startTimestamps)) {
+ currentlyStored.startTimestamps = pendingStore.startTimestamps;
+ pendingStore.startTimestamps = null;
+ } else {
+ return didWork;
+ }
+ }
+ if (pendingStore.wantedStates != null) {
+ didWork = true;
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Attempting to store "
+ + pendingStore.wantedStates.size() + " wanted states into zookeeper.");
+ if (database.storeWantedStates(pendingStore.wantedStates)) {
+ currentlyStored.wantedStates = pendingStore.wantedStates;
+ pendingStore.wantedStates = null;
+ } else {
+ return didWork;
+ }
+ }
+ }
+ return didWork;
+ }
+
+ public void setMasterVote(Context context, int wantedMasterCandidate) throws InterruptedException {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Checking if master vote has been updated and need to be stored.");
+ // Schedule a write if one of the following is true:
+ // - There is already a pending vote to be written, that may have been written already without our knowledge
+ // - We don't know what is actually stored now
+ // - The value is different from the value we know is stored.
+ if (pendingStore.masterVote != null || currentlyStored.masterVote == null
+ || currentlyStored.masterVote != wantedMasterCandidate)
+ {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Scheduling master vote " + wantedMasterCandidate + " to be stored in zookeeper.");
+ pendingStore.masterVote = wantedMasterCandidate;
+ doNextZooKeeperTask(context);
+ }
+ }
+
+ public void saveLatestSystemStateVersion(Context context, int version) throws InterruptedException {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Checking if latest system state version has been updated and need to be stored.");
+ // Schedule a write if one of the following is true:
+ // - There is already a pending vote to be written, that may have been written already without our knowledge
+ // - We don't know what is actually stored now
+ // - The value is different from the value we know is stored.
+ if (pendingStore.lastSystemStateVersion != null || currentlyStored.lastSystemStateVersion == null
+ || currentlyStored.lastSystemStateVersion != version)
+ {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Scheduling new last system state version " + version + " to be stored in zookeeper.");
+ pendingStore.lastSystemStateVersion = version;
+ doNextZooKeeperTask(context);
+ }
+ }
+
+ public int getLatestSystemStateVersion() throws InterruptedException {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Retrieving latest system state version.");
+ synchronized (databaseMonitor) {
+ if (database != null && !database.isClosed()) {
+ currentlyStored.lastSystemStateVersion = database.retrieveLatestSystemStateVersion();
+ }
+ }
+ Integer version = currentlyStored.lastSystemStateVersion;
+ if (version == null) {
+ if (usingZooKeeper()) {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to retrieve latest system state version from ZooKeeper. Returning version 0.");
+ }
+ return 0;
+ }
+ return version;
+ }
+
+ public void saveWantedStates(Context context) throws InterruptedException {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Checking whether wanted states have changed compared to zookeeper version.");
+ Map<Node, NodeState> wantedStates = new TreeMap<>();
+ for (NodeInfo info : context.getCluster().getNodeInfo()) {
+ if (!info.getUserWantedState().equals(new NodeState(info.getNode().getType(), State.UP))) {
+ wantedStates.put(info.getNode(), info.getUserWantedState());
+ }
+ }
+ // Schedule a write if one of the following is true:
+ // - There are already a pending vote to be written, that may have been written already without our knowledge
+ // - We don't know what is actually stored now
+ // - The value is different from the value we know is stored.
+ if (pendingStore.wantedStates != null || currentlyStored.wantedStates == null
+ || !currentlyStored.wantedStates.equals(wantedStates))
+ {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Scheduling new wanted states to be stored into zookeeper.");
+ pendingStore.wantedStates = wantedStates;
+ doNextZooKeeperTask(context);
+ }
+ }
+
+ public boolean loadWantedStates(Context context) throws InterruptedException {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Retrieving node wanted states.");
+ synchronized (databaseMonitor) {
+ if (database != null && !database.isClosed()) {
+ currentlyStored.wantedStates = database.retrieveWantedStates();
+ }
+ }
+ Map<Node, NodeState> wantedStates = currentlyStored.wantedStates;
+ if (wantedStates == null) {
+ if (usingZooKeeper()) {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes.");
+ }
+ wantedStates = new TreeMap<>();
+ }
+ boolean altered = false;
+ for (Node node : wantedStates.keySet()) {
+ NodeInfo nodeInfo = context.getCluster().getNodeInfo(node);
+ if (nodeInfo == null) continue; // ignore wanted state of nodes which doesn't exist
+ NodeState wantedState = wantedStates.get(node);
+ if ( ! nodeInfo.getUserWantedState().equals(wantedState)) {
+ nodeInfo.setWantedState(wantedState);
+ context.getNodeStateUpdateListener().handleNewWantedNodeState(nodeInfo, wantedState);
+ altered = true;
+ }
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Node " + node + " has wanted state " + wantedState);
+ }
+
+ // Remove wanted state from any node having a wanted state set that is no longer valid
+ for (NodeInfo info : context.getCluster().getNodeInfo()) {
+ NodeState wantedState = wantedStates.get(info.getNode());
+ if (wantedState == null && !info.getUserWantedState().equals(new NodeState(info.getNode().getType(), State.UP))) {
+ info.setWantedState(null);
+ context.getNodeStateUpdateListener().handleNewWantedNodeState(info, info.getWantedState().clone());
+ altered = true;
+ }
+ }
+ return altered;
+ }
+
+ public void saveStartTimestamps(Context context) throws InterruptedException {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Scheduling start timestamps to be stored into zookeeper.");
+ pendingStore.startTimestamps = context.getCluster().getStartTimestamps();
+ doNextZooKeeperTask(context);
+ }
+
+ public boolean loadStartTimestamps(ContentCluster cluster) throws InterruptedException {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Retrieving start timestamps");
+ synchronized (databaseMonitor) {
+ if (database == null || database.isClosed()) return false;
+ currentlyStored.startTimestamps = database.retrieveStartTimestamps();
+ }
+ Map<Node, Long> startTimestamps = currentlyStored.startTimestamps;
+ if (startTimestamps == null) {
+ if (usingZooKeeper()) {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set.");
+ }
+ startTimestamps = new TreeMap<>();
+ }
+ for (Map.Entry<Node, Long> e : startTimestamps.entrySet()) {
+ cluster.setStartTimestamp(e.getKey(), e.getValue());
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Node " + e.getKey() + " has start timestamp " + e.getValue());
+ }
+ return true;
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java
new file mode 100644
index 00000000000..69c5e10246c
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java
@@ -0,0 +1,203 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.database;
+
+import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.*;
+
+import java.util.logging.Logger;
+import java.util.*;
+import java.nio.charset.Charset;
+
+import com.yahoo.log.LogLevel;
+
+public class MasterDataGatherer {
+
+ private static Logger log = Logger.getLogger(MasterDataGatherer.class.getName());
+ private static Charset utf8 = Charset.forName("UTF8");
+
+ /** Utility function for getting node index from path name of the ephemeral nodes. */
+ private static int getIndex(String nodeName) {
+ assert(nodeName != null);
+ int lastSlash = nodeName.lastIndexOf('/');
+ if (lastSlash <= 1) {
+ System.err.println("Unexpected path to nodename: '" + nodeName + "'.");
+ assert(lastSlash > 1);
+ }
+ return Integer.parseInt(nodeName.substring(lastSlash + 1));
+ }
+
+ private final String zooKeeperRoot; // The root path in zookeeper, typically /vespa/fleetcontroller/<clustername>/
+ private Map<Integer, Integer> masterData = new TreeMap<Integer, Integer>(); // The master state last reported to the fleetcontroller
+ private final Map<Integer, Integer> nextMasterData = new TreeMap<Integer, Integer>(); // Temporary master state while gathering new info from zookeeper
+ private final AsyncCallback.ChildrenCallback childListener = new DirCallback(); // Dir change listener
+ private final NodeDataCallback nodeListener = new NodeDataCallback(); // Ephemeral node data change listener
+
+ private final Database.DatabaseListener listener;
+ private final ZooKeeper session;
+ private final int nodeIndex;
+
+ /*
+ private boolean seenDirChangeDuringRun = false; // Set to true if we got a dir event while a refetch is happening
+ private final Set<Integer> seenDataChangeDuringRun = new TreeSet<Integer>(); // Sets the indexes that got a data change event while fetching is already running
+ */
+ private Watcher changeWatcher = new ChangeWatcher();
+
+ /**
+ * This class is used to handle node children changed and node data changed events from the zookeeper server.
+ * A run to fetch new master data starts with either of these changes, except for the first time on startup,
+ * where the constructor triggers a run by requesting dir info, as it starts of knowing nothing.
+ */
+ private class ChangeWatcher implements Watcher {
+ public void process(WatchedEvent watchedEvent) {
+ switch (watchedEvent.getType()) {
+ case NodeChildrenChanged: // Fleetcontrollers have either connected or disconnected to ZooKeeper
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": A change occured in the list of registered fleetcontrollers. Requesting new information");
+ session.getChildren(zooKeeperRoot + "indexes", this, childListener, null);
+ break;
+ case NodeDataChanged: // A fleetcontroller have changed what node it is voting for
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Altered data in node " + watchedEvent.getPath() + ". Requesting new vote");
+ int index = getIndex(watchedEvent.getPath());
+ synchronized (nextMasterData) {
+ nextMasterData.put(index, null);
+ }
+ session.getData(zooKeeperRoot + "indexes/" + index, this, nodeListener, null);
+ break;
+ case NodeCreated: // How can this happen? Can one leave watches on non-existing nodes?
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Got unexpected ZooKeeper event NodeCreated");
+ break;
+ case NodeDeleted:
+ // We get this event when fleetcontrollers shut down and node in dir disappears. But it should also trigger a NodeChildrenChanged event, so
+ // ignoring this one.
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Node deleted event gotten. Ignoring it, expecting a NodeChildrenChanged event too.");
+ break;
+ case None:
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Got ZooKeeper event None.");
+ }
+ }
+ }
+
+ /**
+ * The dir callback class is responsible for handling dir change events. (Nodes coming up or going down)
+ * It gets a list of all the nodes, and need to find which ones are removed and which ones are added,
+ * and update the next state to remove those no longer existing and request data for those that are new.
+ */
+ private class DirCallback implements AsyncCallback.ChildrenCallback {
+ public void processResult(int version, String path, Object context, List<String> nodes) {
+ if (nodes == null) nodes = new LinkedList<String>();
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Got node list response from " + path + " version " + version + " with " + nodes.size() + " nodes");
+ // Detect what nodes are added and what nodes have been removed. Others can be ignored.
+ List<Integer> addedNodes = new LinkedList<Integer>();
+ synchronized (nextMasterData) {
+ Set<Integer> removedNodes = new TreeSet<Integer>(nextMasterData.keySet());
+ for (String node : nodes) {
+ int index = Integer.parseInt(node);
+ if (removedNodes.contains(index)) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Node " + index + " no longer exists");
+ removedNodes.remove(index);
+ } else {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Node " + index + " is new");
+ addedNodes.add(index);
+ }
+ }
+ for (Integer index : removedNodes) {
+ nextMasterData.remove(index);
+ }
+ for (Integer index : addedNodes) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Attempting to fetch data in node '" + zooKeeperRoot + index + "' to see vote");
+ nextMasterData.put(index, null);
+ session.getData(zooKeeperRoot + "indexes/" + index, changeWatcher, nodeListener, null);
+ }
+ }
+ // If we didn't add any information, we should have all the information we need and we can report back to the fleetcontroller
+ if (addedNodes.isEmpty()) {
+ cycleCompleted();
+ }
+ }
+ }
+
+ /** The node data callback class is responsible for fetching new votes from fleetcontrollers that have altered their vote. */
+ private class NodeDataCallback implements AsyncCallback.DataCallback {
+
+ public void processResult(int code, String path, Object context, byte[] rawdata, Stat stat) {
+ String data = rawdata == null ? null : new String(rawdata, utf8);
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Got change in vote data from path " + path +
+ " with code " + code + " and data " + data);
+
+ int index = getIndex(path);
+ synchronized (nextMasterData) {
+ if (code != KeeperException.Code.OK.intValue()) {
+ if (code == KeeperException.Code.NONODE.intValue()) {
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Node at " + path +
+ " removed, got no other option than counting it as down.");
+ } else {
+ log.log(LogLevel.ERROR, "Fleetcontroller " + nodeIndex + ": Failure code " + code +
+ " when listening to node at " + path +
+ ", will assume it's down.");
+ }
+ if (nextMasterData.containsKey(index)) {
+ nextMasterData.remove(index);
+ } else {
+ log.log(LogLevel.ERROR, "Fleetcontroller " + nodeIndex + ": Strangely, we already had data from node " + index + " when trying to remove it");
+ }
+ } else {
+ Integer value = Integer.valueOf(data);
+ if (nextMasterData.containsKey(index)) {
+ if (value.equals(nextMasterData.get(index))) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Got vote from fleetcontroller " + index + ", which already was " + value + ".");
+ } else {
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Got vote from fleetcontroller " + index + ". Altering vote from " + nextMasterData.get(index) + " to " + value + ".");
+ nextMasterData.put(index, value);
+ }
+ } else {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Got vote from fleetcontroller " + index + " which is not alive according to current state. Ignoring it");
+ }
+ }
+ for(Integer vote : nextMasterData.values()) {
+ if (vote == null) {
+ log.log(LogLevel.SPAM, "Fleetcontroller " + nodeIndex + ": Still not received votes from all fleet controllers. Awaiting more responses.");
+ return;
+ }
+ }
+ }
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Got votes for all fleetcontrollers. Sending event with new fleet data for update");
+ cycleCompleted();
+ }
+ }
+
+ /** Constructor setting up the various needed members, and initializing the first data fetch to start things up */
+ public MasterDataGatherer(ZooKeeper session, String zooKeeperRoot, Database.DatabaseListener listener, int nodeIndex) {
+ this.zooKeeperRoot = zooKeeperRoot;
+ this.session = session;
+ this.listener = listener;
+ this.nodeIndex = nodeIndex;
+ if (session.getState().equals(ZooKeeper.States.CONNECTED)) {
+ restart();
+ }
+ }
+
+ /** Calling restart, ignores what we currently know and starts another circly. Typically called after reconnecting to ZooKeeperServer. */
+ public void restart() {
+ synchronized (nextMasterData) {
+ masterData = new TreeMap<Integer, Integer>();
+ nextMasterData.clear();
+ session.getChildren(zooKeeperRoot + "indexes", changeWatcher, childListener, null);
+ }
+ }
+
+ /** Function to be called when we have new consistent master election. */
+ public void cycleCompleted() {
+ Map<Integer, Integer> copy;
+ synchronized (nextMasterData) {
+ if (nextMasterData.equals(masterData)) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": No change in master data detected, not sending it on");
+ // for(Integer i : nextMasterData.keySet()) { System.err.println(i + " -> " + nextMasterData.get(i)); }
+ return;
+ }
+ masterData = new TreeMap<Integer, Integer>(nextMasterData);
+ copy = masterData;
+ }
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Got new master data, sending it on");
+ listener.handleMasterData(copy);
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java
new file mode 100644
index 00000000000..05cc3c20e76
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java
@@ -0,0 +1,345 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.database;
+
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+import org.apache.zookeeper.*;
+import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.data.ACL;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vdslib.state.Node;
+
+import java.util.logging.Logger;
+import java.util.*;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.PrintWriter;
+import java.nio.charset.Charset;
+
+public class ZooKeeperDatabase extends Database {
+
+ private static Logger log = Logger.getLogger(ZooKeeperDatabase.class.getName());
+ private static Charset utf8 = Charset.forName("UTF8");
+ private static final List<ACL> acl = ZooDefs.Ids.OPEN_ACL_UNSAFE;
+
+ private final String zooKeeperRoot;
+ private final Database.DatabaseListener listener;
+ private final ZooKeeperWatcher watcher = new ZooKeeperWatcher();
+ private final ZooKeeper session;
+ private boolean sessionOpen = true;
+ private final int nodeIndex;
+ private final MasterDataGatherer masterDataGatherer;
+ private boolean reportErrors = true;
+
+ public void stopErrorReporting() {
+ reportErrors = false;
+ }
+
+ private class ZooKeeperWatcher implements Watcher {
+ private Event.KeeperState state = null;
+
+ public Event.KeeperState getState() { return (state == null ? Event.KeeperState.SyncConnected : state); }
+
+ public void process(WatchedEvent watchedEvent) {
+ // Shouldn't get events after we expire, but just be sure we stop them here.
+ if (state != null && state.equals(Event.KeeperState.Expired)) {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Got event from ZooKeeper session after it expired");
+ return;
+ }
+ Event.KeeperState newState = watchedEvent.getState();
+ if (state == null || !state.equals(newState)) switch (newState) {
+ case Expired:
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Zookeeper session expired");
+ sessionOpen = false;
+ listener.handleZooKeeperSessionDown();
+ break;
+ case Disconnected:
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Lost connection to zookeeper server");
+ sessionOpen = false;
+ listener.handleZooKeeperSessionDown();
+ break;
+ case SyncConnected:
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Connection to zookeeper server established. Refetching master data");
+ if (masterDataGatherer != null) {
+ masterDataGatherer.restart();
+ }
+ }
+ switch (watchedEvent.getType()) {
+ case NodeChildrenChanged: // Fleetcontrollers have either connected or disconnected to ZooKeeper
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Got unexpected ZooKeeper event NodeChildrenChanged");
+ break;
+ case NodeDataChanged: // A fleetcontroller have changed what node it is voting for
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Got unexpected ZooKeeper event NodeDataChanged");
+ break;
+ case NodeCreated: // How can this happen? Can one leave watches on non-existing nodes?
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Got unexpected ZooKeeper event NodeCreated");
+ break;
+ case NodeDeleted: // We're not watching any nodes for whether they are deleted or not.
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Got unexpected ZooKeeper event NodeDeleted");
+ break;
+ case None:
+ if (state != null && state.equals(watchedEvent.getState())) {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Got None type event that didn't even alter session state. What does that indicate?");
+ }
+ }
+ state = watchedEvent.getState();
+ }
+ };
+
+ public ZooKeeperDatabase(ContentCluster cluster, int nodeIndex, String address, int timeout, Database.DatabaseListener zksl) throws IOException, KeeperException, InterruptedException {
+ this.nodeIndex = nodeIndex;
+ zooKeeperRoot = "/vespa/fleetcontroller/" + cluster.getName() + "/";
+ session = new ZooKeeper(address, timeout, watcher);
+ boolean completedOk = false;
+ try{
+ this.listener = zksl;
+ setupRoot();
+ log.log(LogLevel.SPAM, "Fleetcontroller " + nodeIndex + ": Asking for initial data on master election");
+ masterDataGatherer = new MasterDataGatherer(session, zooKeeperRoot, listener, nodeIndex);
+ completedOk = true;
+ } finally {
+ if (!completedOk) session.close();
+ }
+ }
+
+ private void createNode(String prefix, String nodename, byte value[]) throws KeeperException, InterruptedException {
+ try{
+ if (session.exists(prefix + nodename, false) != null) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Zookeeper node '" + prefix + nodename + "' already exists. Not creating it");
+ return;
+ }
+ session.create(prefix + nodename, value, acl, CreateMode.PERSISTENT);
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Created zookeeper node '" + prefix + nodename + "'");
+ } catch (KeeperException.NodeExistsException e) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Node to create existed, "
+ + "but this is normal as other nodes may create them at the same time.");
+ }
+ }
+
+ private void setupRoot() throws KeeperException, InterruptedException {
+ String pathElements[] = zooKeeperRoot.substring(1).split("/");
+ String path = "";
+ for (String elem : pathElements) {
+ path += "/" + elem;
+ createNode("", path, new byte[0]);
+ }
+ createNode(zooKeeperRoot, "indexes", new byte[0]);
+ createNode(zooKeeperRoot, "wantedstates", new byte[0]);
+ createNode(zooKeeperRoot, "starttimestamps", new byte[0]);
+ createNode(zooKeeperRoot, "latestversion", new Integer(0).toString().getBytes(utf8));
+ byte val[] = String.valueOf(nodeIndex).getBytes(utf8);
+ deleteNodeIfExists(getMyIndexPath());
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex +
+ ": Creating ephemeral master vote node with vote to self.");
+ session.create(getMyIndexPath(), val, acl, CreateMode.EPHEMERAL);
+ }
+
+ private void deleteNodeIfExists(String path) throws KeeperException, InterruptedException {
+ if (session.exists(path, false) != null) {
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Removing master vote node.");
+ session.delete(path, -1);
+ }
+ }
+
+ private String getMyIndexPath() {
+ return zooKeeperRoot + "indexes/" + nodeIndex;
+ }
+
+ /**
+ * If this is called, we assume we're in shutdown situation, or we are doing it because we need a new session.
+ * Thus we only need to free up resources, no need to notify anyone.
+ */
+ public void close() {
+ sessionOpen = false;
+ try{
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Trying to close ZooKeeper session 0x"
+ + Long.toHexString(session.getSessionId()));
+ session.close();
+ } catch (InterruptedException e) {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Got interrupt exception while closing session: " + e);
+ }
+ }
+
+ public boolean isClosed() {
+ return (!sessionOpen || watcher.getState().equals(Watcher.Event.KeeperState.Expired));
+ }
+
+ public boolean storeMasterVote(int wantedMasterIndex) throws InterruptedException {
+ byte val[] = String.valueOf(wantedMasterIndex).getBytes(utf8);
+ try{
+ session.setData(getMyIndexPath(), val, -1);
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Stored new vote in ephemeral node. " + nodeIndex + " -> " + wantedMasterIndex);
+ return true;
+ } catch (InterruptedException e) {
+ throw (InterruptedException) new InterruptedException("Interrupted").initCause(e);
+ } catch (Exception e) {
+ if (sessionOpen && reportErrors) {
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to create our ephemeral node and store master vote:\n" + sw);
+ }
+ }
+ return false;
+ }
+ public boolean storeLatestSystemStateVersion(int version) throws InterruptedException {
+ byte data[] = Integer.toString(version).getBytes(utf8);
+ try{
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Setting new latest cluster state version " + version);
+ session.setData(zooKeeperRoot + "latestversion", data, -1);
+ return true;
+ } catch (InterruptedException e) {
+ throw (InterruptedException) new InterruptedException("Interrupted").initCause(e);
+ } catch (Exception e) {
+ if (sessionOpen && reportErrors) {
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to store latest system state version used " + version + "\n" + sw);
+ }
+ return false;
+ }
+ }
+
+ public Integer retrieveLatestSystemStateVersion() throws InterruptedException {
+ Stat stat = new Stat();
+ try{
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Fetching latest cluster state at '" + zooKeeperRoot + "latestversion'");
+ byte[] data = session.getData(zooKeeperRoot + "latestversion", false, stat);
+ return Integer.valueOf(new String(data, utf8));
+ } catch (InterruptedException e) {
+ throw (InterruptedException) new InterruptedException("Interrupted").initCause(e);
+ } catch (Exception e) {
+ if (sessionOpen && reportErrors) {
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to retrieve latest system state version used. Returning null.\n" + sw);
+ }
+ return null;
+ }
+ }
+
+ public boolean storeWantedStates(Map<Node, NodeState> states) throws InterruptedException {
+ if (states == null) states = new TreeMap<>();
+ StringBuilder sb = new StringBuilder();
+ for (Node node : states.keySet()) {
+ NodeState nodeState = states.get(node);
+ if (!nodeState.equals(new NodeState(node.getType(), State.UP))) {
+ NodeState toStore = new NodeState(node.getType(), nodeState.getState());
+ toStore.setDescription(nodeState.getDescription());
+ if (!toStore.equals(nodeState)) {
+ log.warning("Attempted to store wanted state with more than just a main state. Extra data stripped. Original data '" + nodeState.serialize(true));
+ }
+ sb.append(node.toString()).append(':').append(toStore.serialize(true)).append('\n');
+ }
+ }
+ byte val[] = sb.toString().getBytes(utf8);
+ try{
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Storing wanted states at '" + zooKeeperRoot + "wantedstates'");
+ session.setData(zooKeeperRoot + "wantedstates", val, -1);
+ return true;
+ } catch (InterruptedException e) {
+ throw (InterruptedException) new InterruptedException("Interrupted").initCause(e);
+ } catch (Exception e) {
+ if (sessionOpen && reportErrors) {
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to store wanted states in zookeeper: " + e.getMessage() + "\n" + sw);
+ }
+ return false;
+ }
+ }
+
+ public Map<Node, NodeState> retrieveWantedStates() throws InterruptedException {
+ try{
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Fetching wanted states at '" + zooKeeperRoot + "wantedstates'");
+ Stat stat = new Stat();
+ byte[] data = session.getData(zooKeeperRoot + "wantedstates", false, stat);
+ Map<Node, NodeState> wanted = new TreeMap<>();
+ if (data != null && data.length > 0) {
+ StringTokenizer st = new StringTokenizer(new String(data, utf8), "\n", false);
+ while (st.hasMoreTokens()) {
+ String token = st.nextToken();
+ int colon = token.indexOf(':');
+ try{
+ if (colon < 0) throw new Exception();
+ Node node = new Node(token.substring(0, colon));
+ NodeState nodeState = NodeState.deserialize(node.getType(), token.substring(colon + 1));
+ wanted.put(node, nodeState);
+ } catch (Exception e) {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Ignoring invalid wantedstate line in zookeeper '" + token + "'.");
+ }
+ }
+ }
+ return wanted;
+ } catch (InterruptedException e) {
+ throw (InterruptedException) new InterruptedException("Interrupted").initCause(e);
+ } catch (Exception e) {
+ if (sessionOpen && reportErrors) {
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to retrieve wanted states from zookeeper: " + e.getMessage() + "\n" + sw);
+ }
+ return null;
+ }
+ }
+
+ @Override
+ public boolean storeStartTimestamps(Map<Node, Long> timestamps) throws InterruptedException {
+ if (timestamps == null) timestamps = new TreeMap<>();
+ StringBuilder sb = new StringBuilder();
+ for (Node n : timestamps.keySet()) {
+ Long timestamp = timestamps.get(n);
+ sb.append(n.toString()).append(':').append(timestamp).append('\n');
+ }
+ byte val[] = sb.toString().getBytes(utf8);
+ try{
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Storing start timestamps at '" + zooKeeperRoot + "starttimestamps");
+ session.setData(zooKeeperRoot + "starttimestamps", val, -1);
+ return true;
+ } catch (InterruptedException e) {
+ throw (InterruptedException) new InterruptedException("Interrupted").initCause(e);
+ } catch (Exception e) {
+ if (sessionOpen && reportErrors) {
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to store start timestamps in zookeeper: " + e.getMessage() + "\n" + sw);
+ }
+ return false;
+ }
+ }
+
+ @Override
+ public Map<Node, Long> retrieveStartTimestamps() throws InterruptedException {
+ try{
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Fetching start timestamps at '" + zooKeeperRoot + "starttimestamps'");
+ Stat stat = new Stat();
+ byte[] data = session.getData(zooKeeperRoot + "starttimestamps", false, stat);
+ Map<Node, Long> wanted = new TreeMap<Node, Long>();
+ if (data != null && data.length > 0) {
+ StringTokenizer st = new StringTokenizer(new String(data, utf8), "\n", false);
+ while (st.hasMoreTokens()) {
+ String token = st.nextToken();
+ int colon = token.indexOf(':');
+ try{
+ if (colon < 0) throw new Exception();
+ Node n = new Node(token.substring(0, colon));
+ Long timestamp = Long.valueOf(token.substring(colon + 1));
+ wanted.put(n, timestamp);
+ } catch (Exception e) {
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Ignoring invalid starttimestamp line in zookeeper '" + token + "'.");
+ }
+ }
+ }
+ return wanted;
+ } catch (InterruptedException e) {
+ throw (InterruptedException) new InterruptedException("Interrupted").initCause(e);
+ } catch (Exception e) {
+ if (sessionOpen && reportErrors) {
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ log.log(LogLevel.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to retrieve start timestamps from zookeeper: " + e.getMessage() + "\n" + sw);
+ }
+ return null;
+ }
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Distributor.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Distributor.java
new file mode 100644
index 00000000000..c48c203a55c
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Distributor.java
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.hostinfo;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Class for handling Distributor part of HostInfo.
+ * @author dybdahl
+ */
+public class Distributor {
+
+ @JsonProperty("storage-nodes")
+ private List<StorageNode> storageNodes = new ArrayList<>();
+
+ public List<StorageNode> getStorageNodes() { return storageNodes; }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfo.java
new file mode 100644
index 00000000000..fbc53128415
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfo.java
@@ -0,0 +1,70 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.hostinfo;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Parsing and keeping of host info from nodes.
+ * @author dybdahl
+ */
+public class HostInfo {
+
+ private static Logger log = Logger.getLogger(HostInfo.class.getName());
+ private static final ObjectMapper mapper = new ObjectMapper();
+ private String rawCreationString = "NOT SET";
+ static {
+ mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+ }
+
+ // TODO: Don't use JSON classes as model classes
+ @JsonProperty("cluster-state-version") private Integer clusterStateVersion = null;
+ @JsonProperty("vtag") private Vtag vtag = new Vtag(null);
+ @JsonProperty("distributor") private Distributor distributor = new Distributor();
+ @JsonProperty("metrics") private Metrics metrics = new Metrics();
+
+ public Vtag getVtag() {
+ return vtag;
+ }
+
+ public Distributor getDistributor() {
+ return distributor;
+ }
+
+ public Metrics getMetrics() {
+ return metrics;
+ }
+
+ public Integer getClusterStateVersionOrNull() { return clusterStateVersion; }
+
+ public static HostInfo createHostInfo(String json) {
+ HostInfo hostInfo;
+ try {
+ hostInfo = mapper.readValue(json, HostInfo.class);
+ } catch (IOException e) {
+ log.log(Level.WARNING, "Problem parsing " + json, e);
+ hostInfo = new HostInfo();
+ }
+ hostInfo.setRawCreationString(json);
+ return hostInfo;
+ }
+
+ /**
+ * Only for debugging.
+ * @return string that was used to create this instance.
+ */
+ public String getRawCreationString() {
+ return rawCreationString;
+ }
+
+ public void setRawCreationString(String rawCreationString) {
+ this.rawCreationString = rawCreationString;
+ }
+
+ public HostInfo() {}
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java
new file mode 100644
index 00000000000..23c502063a6
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.hostinfo;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Keeper for Metrics for HostInfo.
+ * @author dybdahl
+ */
+public class Metrics {
+
+ public List<Metric> getValues() { return Collections.unmodifiableList(metricsList); }
+
+ public static class Metric {
+ private final String name;
+ private final Value value;
+
+ public Metric(
+ @JsonProperty("name") String name,
+ @JsonProperty("values") Value value) {
+ this.name = name;
+ this.value = value;
+ }
+
+ public String getName() { return name; }
+ public Value getValue() { return value; }
+ }
+
+ public static class Value {
+
+ private final Long last;
+ private final Double average;
+ private final Long count;
+
+ public Value(
+ @JsonProperty("average") Double average,
+ @JsonProperty("count") Long count,
+ @JsonProperty("rate") Double rate,
+ @JsonProperty("min") Long min,
+ @JsonProperty("max") Long max,
+ @JsonProperty("last") Long last) {
+ this.last = last;
+ this.average = average;
+ this.count = count;
+ }
+
+ public Long getLast() { return last; }
+ public Double getAverage() { return average; }
+ public Long getCount() { return count; }
+ }
+
+ // We initialize it in case the metrics is missing in the JSON.
+ @JsonProperty("values")
+ private ArrayList<Metric> metricsList = new ArrayList<>();
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNode.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNode.java
new file mode 100644
index 00000000000..4a8cff2d5bb
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNode.java
@@ -0,0 +1,100 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.hostinfo;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Keeping information about a storage node seen from the distributor.
+ * @author dybdahl
+ */
+public class StorageNode {
+
+ static public class Put {
+ private final Long latencyMsSum;
+ private final Long count;
+
+ @JsonCreator
+ public Put(@JsonProperty("latency-ms-sum") Long latencyMsSum, @JsonProperty("count") Long count) {
+ this.latencyMsSum = latencyMsSum;
+ this.count = count;
+ }
+
+ public Long getLatencyMsSum() { return latencyMsSum; }
+ public Long getCount() { return count; }
+ }
+
+ static public class OpsLatency {
+ private final Put put;
+
+ @JsonCreator
+ public OpsLatency(@JsonProperty("put") Put put) {
+ this.put = put;
+ }
+
+ public Put getPut() { return put; }
+ }
+
+ static public class Buckets {
+ private final long buckets;
+
+ @JsonCreator
+ public Buckets(@JsonProperty("buckets") Long buckets) {
+ this.buckets = buckets;
+ }
+
+ public long getBuckets() { return buckets; }
+ }
+
+ static public class OutstandingMergeOps {
+ @JsonProperty("syncing")
+ private Buckets syncing;
+ @JsonProperty("copying-in")
+ private Buckets copyingIn;
+ @JsonProperty("moving-out")
+ private Buckets movingOut;
+ @JsonProperty("copying-out")
+ private Buckets copyingOut;
+
+ public Buckets getSyncingOrNull() { return syncing; }
+ public Buckets getCopyingInOrNull() { return copyingIn; }
+ public Buckets getMovingOutOrNull() { return movingOut; }
+ public Buckets getCopyingOutOrNull() { return copyingOut; }
+ }
+
+ private final Integer index;
+
+ @JsonProperty("ops-latency")
+ private OpsLatency opsLatencies;
+
+ // If a Distributor does not manage any bucket copies for a particular storage node,
+ // then the distributor will not return any min-current-replication-factor for that
+ // storage node.
+ @JsonProperty("min-current-replication-factor")
+ private Integer minCurrentReplicationFactor;
+
+ @JsonProperty("outstanding-merge-ops")
+ private OutstandingMergeOps outstandingMergeOps;
+
+ @JsonCreator
+ public StorageNode(@JsonProperty("node-index") Integer index) {
+ this.index = index;
+ }
+
+ public Integer getIndex() {
+ return index;
+ }
+
+ public OpsLatency getOpsLatenciesOrNull() {
+ return opsLatencies;
+ }
+
+ // See documentation on minCurrentReplicationFactor.
+ public Integer getMinCurrentReplicationFactorOrNull() {
+ return minCurrentReplicationFactor;
+ }
+
+ public OutstandingMergeOps getOutstandingMergeOpsOrNull() {
+ return outstandingMergeOps;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridge.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridge.java
new file mode 100644
index 00000000000..ed4664c5b44
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridge.java
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.hostinfo;
+
+import com.yahoo.vespa.clustercontroller.core.LatencyStats;
+import com.yahoo.vespa.clustercontroller.core.NodeMergeStats;
+import com.yahoo.vespa.clustercontroller.core.StorageMergeStats;
+import com.yahoo.vespa.clustercontroller.core.StorageNodeStats;
+import com.yahoo.vespa.clustercontroller.core.StorageNodeStatsContainer;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Class used to create a StorageNodeStatsContainer from HostInfo.
+ * @author hakon
+ */
+public class StorageNodeStatsBridge {
+
+ private StorageNodeStatsBridge() { }
+
+ public static StorageNodeStatsContainer traverseHostInfo(HostInfo hostInfo) {
+ StorageNodeStatsContainer container = new StorageNodeStatsContainer();
+ List<StorageNode> storageNodes = hostInfo.getDistributor().getStorageNodes();
+ for (StorageNode storageNode : storageNodes) {
+ Integer storageNodeIndex = storageNode.getIndex();
+ if (storageNodeIndex == null) {
+ continue;
+ }
+ StorageNode.OpsLatency opsLatency = storageNode.getOpsLatenciesOrNull();
+ if (opsLatency == null) {
+ continue;
+ }
+ StorageNode.Put putLatency = opsLatency.getPut();
+ Long putLatencyMsSum = putLatency.getLatencyMsSum();
+ Long putLatencyCount = putLatency.getCount();
+ if (putLatencyMsSum == null || putLatencyCount == null) {
+ continue;
+ }
+ LatencyStats putLatencyStats = new LatencyStats(putLatencyMsSum, putLatencyCount);
+ StorageNodeStats nodeStats = new StorageNodeStats(putLatencyStats);
+ container.put(storageNodeIndex, nodeStats);
+ }
+ return container;
+ }
+
+ public static StorageMergeStats generate(Distributor distributor) {
+ Map<Integer, NodeMergeStats> mapToNodeStats = new HashMap<>();
+ for (StorageNode storageNode : distributor.getStorageNodes()) {
+ mapToNodeStats.put(storageNode.getIndex(), new NodeMergeStats(storageNode));
+ }
+ return new StorageMergeStats(mapToNodeStats);
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Vtag.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Vtag.java
new file mode 100644
index 00000000000..b4d445c1844
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Vtag.java
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.hostinfo;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Class for handling version.
+ * @author dybdahl
+ */
+public class Vtag {
+
+ private final String version;
+
+ @JsonCreator
+ public Vtag(@JsonProperty("version") String version) {
+ this.version = version;
+ }
+
+ public String getVersionOrNull() {
+ return version;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/NodeAddedOrRemovedListener.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/NodeAddedOrRemovedListener.java
new file mode 100644
index 00000000000..bf5e41495e1
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/NodeAddedOrRemovedListener.java
@@ -0,0 +1,14 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.listeners;
+
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+
+/**
+ * Listeners for new nodes detected.
+ */
+public interface NodeAddedOrRemovedListener {
+ void handleNewNode(NodeInfo node);
+ void handleMissingNode(NodeInfo node);
+ void handleNewRpcAddress(NodeInfo node);
+ void handleReturnedRpcAddress(NodeInfo node);
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/NodeStateOrHostInfoChangeHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/NodeStateOrHostInfoChangeHandler.java
new file mode 100644
index 00000000000..9440c4ca7e6
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/NodeStateOrHostInfoChangeHandler.java
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.listeners;
+
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+
+/**
+ * Implemented by classes wanting events when node states changes.
+ */
+public interface NodeStateOrHostInfoChangeHandler {
+
+ void handleNewNodeState(NodeInfo currentInfo, NodeState newState);
+ void handleNewWantedNodeState(NodeInfo node, NodeState newState);
+
+ /**
+ * For every getnodestate RPC call, handleUpdatedHostInfo() will be called with the host info JSON string.
+ */
+ void handleUpdatedHostInfo(NodeInfo node, HostInfo newHostInfo);
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/SystemStateListener.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/SystemStateListener.java
new file mode 100644
index 00000000000..a1094e50fff
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/SystemStateListener.java
@@ -0,0 +1,8 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.listeners;
+
+import com.yahoo.vdslib.state.ClusterState;
+
+public interface SystemStateListener {
+ public void handleNewSystemState(ClusterState state);
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/package-info.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/package-info.java
new file mode 100644
index 00000000000..b338984e909
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerStateRestAPI.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerStateRestAPI.java
new file mode 100644
index 00000000000..abbcdab1112
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerStateRestAPI.java
@@ -0,0 +1,150 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTaskScheduler;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.requests.*;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.StateRestAPI;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InternalFailure;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.OtherMasterException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.UnitStateRequest;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.SetResponse;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+
+import java.util.EnumSet;
+import java.util.Map;
+import java.util.Objects;
+import java.util.logging.Logger;
+
+public class ClusterControllerStateRestAPI implements StateRestAPI {
+
+ private static final Logger log = Logger.getLogger(ClusterControllerStateRestAPI.class.getName());
+
+ public interface FleetControllerResolver {
+ Map<String, RemoteClusterControllerTaskScheduler> getFleetControllers();
+ }
+
+ public static class Socket {
+ public final String hostname;
+ public final int port;
+
+ public Socket(String hostname, int port) {
+ this.hostname = hostname;
+ this.port = port;
+ }
+
+ @Override
+ public String toString() {
+ return hostname + ":" + port;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(hostname, port);
+ }
+
+ @Override
+ public boolean equals(Object object) {
+ if (this == object) return true;
+ if (!(object instanceof Socket)) return false;
+ Socket socket = (Socket) object;
+ return Objects.equals(hostname, socket.hostname) && port == socket.port;
+ }
+ }
+
+ private final FleetControllerResolver fleetControllerResolver;
+ private final Map<Integer, Socket> clusterControllerSockets;
+
+ public ClusterControllerStateRestAPI(FleetControllerResolver resolver,
+ Map<Integer, Socket> clusterControllerSockets)
+ {
+ fleetControllerResolver = resolver;
+ this.clusterControllerSockets = clusterControllerSockets;
+ }
+
+ @Override
+ public UnitResponse getState(final UnitStateRequest request) throws StateRestApiException {
+ log.finest("Got getState() request");
+ UnitPathResolver<UnitResponse> resolver = new UnitPathResolver<>(fleetControllerResolver.getFleetControllers());
+ Request<? extends UnitResponse> req = resolver.visit(
+ request.getUnitPath(), new UnitPathResolver.Visitor<UnitResponse>()
+ {
+ @Override
+ public Request<? extends UnitResponse> visitGlobal() throws StateRestApiException {
+ return new ClusterListRequest(request.getRecursiveLevels(), fleetControllerResolver);
+ }
+ @Override
+ public Request<? extends UnitResponse> visitCluster(Id.Cluster id) throws StateRestApiException {
+ return new ClusterStateRequest(id, request.getRecursiveLevels());
+ }
+ @Override
+ public Request<? extends UnitResponse> visitService(Id.Service id) throws StateRestApiException {
+ return new ServiceStateRequest(id, request.getRecursiveLevels());
+ }
+ @Override
+ public Request<? extends UnitResponse> visitNode(Id.Node id) throws StateRestApiException {
+ return new NodeStateRequest(id, request.getRecursiveLevels(),
+ EnumSet.of(VerboseReport.STATISTICS));
+ }
+ @Override
+ public Request<? extends UnitResponse> visitPartition(Id.Partition id) throws StateRestApiException {
+ return new PartitionStateRequest(id, EnumSet.of(VerboseReport.STATISTICS));
+ }
+ });
+ if (req instanceof ClusterListRequest) {
+ log.fine("Got cluster list request");
+ req.doRemoteFleetControllerTask(null);
+ req.notifyCompleted();
+ log.finest("Completed processing cluster list request");
+ } else {
+ log.fine("Scheduling state request: " + req.getClass().toString());
+ resolver.resolveFleetController(request.getUnitPath()).schedule(req);
+ log.finest("Scheduled state request: " + req.getClass().toString());
+ req.waitForCompletion();
+ log.finest("Completed processing state request: " + req.getClass().toString());
+ }
+ try {
+ return req.getResult();
+ } catch (OtherMasterIndexException e) {
+ createAndThrowOtherMasterException(e.getMasterIndex());
+ throw new RuntimeException("Should not get here");
+ }
+ }
+
+ @Override
+ public SetResponse setUnitState(final SetUnitStateRequest request) throws StateRestApiException {
+ UnitPathResolver<SetResponse> resolver = new UnitPathResolver<>(fleetControllerResolver.getFleetControllers());
+ Request<? extends SetResponse> req = resolver.visit(request.getUnitPath(),
+ new UnitPathResolver.AbstractVisitor<SetResponse>(request.getUnitPath(),
+ "State can only be set at cluster or node level")
+ {
+ @Override
+ public Request<? extends SetResponse> visitCluster(Id.Cluster id) throws StateRestApiException {
+ return new SetNodeStatesForClusterRequest(id, request);
+ }
+ @Override
+ public Request<? extends SetResponse> visitNode(Id.Node id) throws StateRestApiException {
+ return new SetNodeStateRequest(id, request);
+ }
+ });
+ resolver.resolveFleetController(request.getUnitPath()).schedule(req);
+ req.waitForCompletion();
+ try{
+ return req.getResult();
+ } catch (OtherMasterIndexException e) {
+ createAndThrowOtherMasterException(e.getMasterIndex());
+ throw new RuntimeException("Should not get here");
+ }
+ }
+
+ // Will always throw an exception.
+ private void createAndThrowOtherMasterException(int master) throws StateRestApiException {
+ Socket s = clusterControllerSockets.get(master);
+ // TODO: Consider changing return status code of this call to 503.
+ if (s == null) throw new InternalFailure(
+ "Cannot create redirect response to master at index " + master
+ + ", as we failed to get correct config to detect running cluster controllers.");
+ throw new OtherMasterException(s.hostname, s.port);
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Id.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Id.java
new file mode 100644
index 00000000000..d3d1f162fe0
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Id.java
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Class representations of resources in State Rest API.
+ *
+ * Note that the toString() implementation will put out a slash separated list of the tokens,
+ * and thus be compatible with the link format.
+ */
+
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vdslib.state.NodeType;
+
+public class Id {
+ public static class Cluster extends Id {
+ private final String id;
+
+ public Cluster(String id) { this.id = id; }
+
+ public final String getClusterId() { return id; }
+ public String toString() { return id; }
+ }
+
+ public static class Service extends Cluster {
+ private final NodeType id;
+
+ public Service(Cluster c, NodeType service) {
+ super(c.getClusterId());
+ id = service;
+ }
+
+ public final NodeType getService() { return id; }
+ public String toString() { return super.toString() + "/" + id; }
+ }
+
+ public static class Node extends Service {
+ private final int id;
+
+ public Node(Service service, int nodeIndex) {
+ super(service, service.id);
+ this.id = nodeIndex;
+ }
+
+ /**
+ * Looks bad with name overlap here, but everywhere else Node will have to be
+ * referred to as Id.Node, so users won't get conflicts.
+ */
+ public final com.yahoo.vdslib.state.Node getNode() {
+ return new com.yahoo.vdslib.state.Node(getService(), id);
+ }
+
+ public String toString() { return super.toString() + "/" + id; }
+ }
+
+ public static class Partition extends Node {
+ private final int id;
+
+ public Partition(Node n, int partition) {
+ super(n, n.id);
+ this.id = partition;
+ }
+
+ public final int getPartitionIndex() { return id; }
+ public String toString() { return super.toString() + "/" + id; }
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/MissingIdException.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/MissingIdException.java
new file mode 100644
index 00000000000..cd102ef4e16
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/MissingIdException.java
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.MissingUnitException;
+
+public class MissingIdException extends MissingUnitException {
+ private static String[] createPath(String cluster, Node n) {
+ String[] path = new String[3];
+ path[0] = cluster;
+ path[1] = n.getType().toString();
+ path[2] = String.valueOf(n.getIndex());
+ return path;
+ }
+
+ public MissingIdException(String cluster, Node n) {
+ super(createPath(cluster, n), 1);
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/OtherMasterIndexException.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/OtherMasterIndexException.java
new file mode 100644
index 00000000000..246e0acc61c
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/OtherMasterIndexException.java
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+public class OtherMasterIndexException extends Exception {
+ private final int index;
+
+ public OtherMasterIndexException(int index) {
+ this.index = index;
+ }
+
+ public int getMasterIndex() { return index; }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java
new file mode 100644
index 00000000000..38aab385a79
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InternalFailure;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.UnknownMasterException;
+
+public abstract class Request<Result> extends RemoteClusterControllerTask {
+ public enum MasterState {
+ MUST_BE_MASTER,
+ NEED_NOT_BE_MASTER
+ }
+
+ private Exception failure = null;
+ private boolean resultSet = false;
+ private Result result = null;
+ private final MasterState masterState;
+
+
+ public Request(MasterState state) {
+ this.masterState = state;
+ }
+
+ public Result getResult() throws StateRestApiException, OtherMasterIndexException {
+ if (failure != null) {
+ if (failure instanceof OtherMasterIndexException) {
+ throw (OtherMasterIndexException) failure;
+ } else {
+ throw (StateRestApiException) failure;
+ }
+ }
+ if (!isCompleted()) {
+ throw new InternalFailure("Attempt to fetch result before it has been calculated");
+ }
+ if (!resultSet) {
+ throw new InternalFailure("Expected result to be set at this point.");
+ }
+ return result;
+ }
+
+ @Override
+ public final void doRemoteFleetControllerTask(Context context) {
+ try{
+ if (masterState == MasterState.MUST_BE_MASTER && !context.masterInfo.isMaster()) {
+ Integer masterIndex = context.masterInfo.getMaster();
+ if (masterIndex == null) throw new UnknownMasterException();
+ throw new OtherMasterIndexException(masterIndex);
+ }
+ result = calculateResult(context);
+ resultSet = true;
+ } catch (OtherMasterIndexException e) {
+ failure = e;
+ } catch (StateRestApiException e) {
+ failure = e;
+ } catch (Exception e) {
+ failure = new InternalFailure("Caught unexpected exception");
+ failure.initCause(e);
+ }
+ }
+
+ public abstract Result calculateResult(Context context) throws StateRestApiException, OtherMasterIndexException;
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java
new file mode 100644
index 00000000000..66bcf58289a
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java
@@ -0,0 +1,129 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vdslib.state.DiskState;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InternalFailure;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.*;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+public class Response {
+ public static class UnitStateImpl implements UnitState {
+ private final String id;
+ private final String reason;
+
+ public UnitStateImpl(State s) throws StateRestApiException {
+ this.id = parseId(s);
+ this.reason = "";
+ }
+ public UnitStateImpl(NodeState ns) throws StateRestApiException {
+ this.id = parseId(ns.getState());
+ this.reason = ns.getDescription();
+ }
+ public UnitStateImpl(DiskState ds) throws StateRestApiException {
+ this.id = parseId(ds.getState());
+ this.reason = ds.getDescription();
+ }
+
+ public String parseId(State id) throws StateRestApiException {
+ switch (id) {
+ case UP: return "up";
+ case DOWN: return "down";
+ case INITIALIZING: return "initializing";
+ case MAINTENANCE: return "maintenance";
+ case RETIRED: return "retired";
+ case STOPPING: return "stopping";
+ }
+ throw new InternalFailure("Unknown state '" + id + "' found.");
+ }
+
+ @Override
+ public String getId() { return id; }
+ @Override
+ public String getReason() { return reason; }
+ }
+ public static class Link implements SubUnitList {
+ private final Map<String, String> links = new LinkedHashMap<>();
+ private final Map<String, UnitResponse> units = new LinkedHashMap<>();
+
+ public Link addLink(String unit, String link) {
+ links.put(unit, link);
+ return this;
+ }
+
+ public Link addUnit(String unit, UnitResponse r) {
+ units.put(unit, r);
+ return this;
+ }
+
+ @Override
+ public Map<String, String> getSubUnitLinks() { return links; }
+ @Override
+ public Map<String, UnitResponse> getSubUnits() { return units; }
+ }
+
+ public static abstract class EmptyResponse<T extends UnitResponse>
+ implements UnitResponse, UnitMetrics, UnitAttributes, CurrentUnitState
+ {
+ protected final Map<String, String> attributes = new LinkedHashMap<>();
+ protected final Map<String, SubUnitList> subUnits = new LinkedHashMap<>();
+ protected final Map<String, Number> metrics = new LinkedHashMap<>();
+ protected final Map<String, UnitState> stateMap = new LinkedHashMap<>();
+
+ @Override
+ public UnitAttributes getAttributes() { return attributes.isEmpty() ? null : this; }
+ @Override
+ public CurrentUnitState getCurrentState() { return stateMap.isEmpty() ? null : this; }
+ @Override
+ public Map<String, SubUnitList> getSubUnits() { return subUnits.isEmpty() ? null : subUnits; }
+ @Override
+ public UnitMetrics getMetrics() { return metrics.isEmpty() ? null : this; }
+ @Override
+ public Map<String, Number> getMetricMap() { return metrics; }
+ @Override
+ public Map<String, UnitState> getStatePerType() { return stateMap; }
+ @Override
+ public Map<String, String> getAttributeValues() { return attributes; }
+
+ public EmptyResponse<T> addLink(String type, String unit, String link) {
+ Link list = (Link) subUnits.get(type);
+ if (list == null) {
+ list = new Link();
+ subUnits.put(type, list);
+ }
+ list.addLink(unit, link);
+ return this;
+ }
+ public EmptyResponse<T> addEntry(String type, String unit, T response) {
+ Link list = (Link) subUnits.get(type);
+ if (list == null) {
+ list = new Link();
+ subUnits.put(type, list);
+ }
+ list.addUnit(unit, response);
+ return this;
+ }
+ public EmptyResponse<T> addMetric(String name, Number value) {
+ metrics.put(name, value);
+ return this;
+ }
+ public EmptyResponse<T> addState(String type, UnitStateImpl state) {
+ stateMap.put(type, state);
+ return this;
+ }
+ public EmptyResponse<T> addAttribute(String name, String value) {
+ attributes.put(name, value);
+ return this;
+ }
+ }
+
+ public static class ClusterListResponse extends EmptyResponse<ClusterResponse> {}
+ public static class ClusterResponse extends EmptyResponse<ServiceResponse> {}
+ public static class ServiceResponse extends EmptyResponse<NodeResponse> {}
+ public static class NodeResponse extends EmptyResponse<PartitionResponse> {}
+ public static class PartitionResponse extends EmptyResponse<UnitResponse> {}
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/UnitPathResolver.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/UnitPathResolver.java
new file mode 100644
index 00000000000..c13ea9698b8
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/UnitPathResolver.java
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTaskScheduler;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.MissingUnitException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.OperationNotSupportedForUnitException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class UnitPathResolver<T> {
+ public interface Visitor<T> {
+ public Request<? extends T> visitGlobal() throws StateRestApiException;
+ public Request<? extends T> visitCluster(Id.Cluster id) throws StateRestApiException;
+ public Request<? extends T> visitService(Id.Service id) throws StateRestApiException;
+ public Request<? extends T> visitNode(Id.Node id) throws StateRestApiException;
+ public Request<? extends T> visitPartition(Id.Partition id) throws StateRestApiException;
+ }
+ public static abstract class AbstractVisitor<T> implements Visitor<T> {
+ private final String path[];
+ private final String failureMessage;
+
+ public AbstractVisitor(String path[], String failureMessage) {
+ this.path = path;
+ this.failureMessage = failureMessage;
+ }
+ private Request<T> fail() throws StateRestApiException {
+ throw new OperationNotSupportedForUnitException(path, failureMessage);
+ }
+
+ public Request<? extends T> visitGlobal() throws StateRestApiException { return fail(); }
+ public Request<? extends T> visitCluster(Id.Cluster id) throws StateRestApiException { return fail(); }
+ public Request<? extends T> visitService(Id.Service id) throws StateRestApiException { return fail(); }
+ public Request<? extends T> visitNode(Id.Node id) throws StateRestApiException { return fail(); }
+ public Request<? extends T> visitPartition(Id.Partition id) throws StateRestApiException { return fail(); }
+ }
+
+ private final Map<String, RemoteClusterControllerTaskScheduler> fleetControllers;
+
+ public UnitPathResolver(Map<String, RemoteClusterControllerTaskScheduler> fleetControllers) {
+ this.fleetControllers = new HashMap<>(fleetControllers);
+ }
+
+ public RemoteClusterControllerTaskScheduler resolveFleetController(String path[]) throws StateRestApiException {
+ if (path.length == 0) return null;
+ RemoteClusterControllerTaskScheduler fc = fleetControllers.get(path[0]);
+ if (fc == null) {
+ throw new MissingUnitException(path, 0);
+ }
+ return fc;
+ }
+
+ public Request<? extends T> visit(String path[], Visitor<T> visitor) throws StateRestApiException {
+ if (path.length == 0) {
+ return visitor.visitGlobal();
+ }
+ RemoteClusterControllerTaskScheduler fc = fleetControllers.get(path[0]);
+ if (fc == null) throw new MissingUnitException(path, 0);
+ Id.Cluster cluster = new Id.Cluster(path[0]);
+ if (path.length == 1) {
+ return visitor.visitCluster(cluster);
+ }
+ Id.Service service;
+ try{
+ service = new Id.Service(cluster, NodeType.get(path[1]));
+ } catch (IllegalArgumentException e) {
+ throw new MissingUnitException(path, 1);
+ }
+ if (path.length == 2) {
+ return visitor.visitService(service);
+ }
+ Id.Node node;
+ try{
+ node = new Id.Node(service, Integer.valueOf(path[2]));
+ } catch (NumberFormatException e) {
+ throw new MissingUnitException(path, 2);
+ }
+ if (path.length == 3) {
+ return visitor.visitNode(node);
+ }
+ Id.Partition partition;
+ try{
+ partition = new Id.Partition(node, Integer.valueOf(path[3]));
+ } catch (NumberFormatException e) {
+ throw new MissingUnitException(path, 3);
+ }
+ if (path.length == 4) {
+ return visitor.visitPartition(partition);
+ }
+ throw new MissingUnitException(path, 4);
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/package-info.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/package-info.java
new file mode 100644
index 00000000000..994f1aae895
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterListRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterListRequest.java
new file mode 100644
index 00000000000..082fb1555f4
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterListRequest.java
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2.requests;
+
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTaskScheduler;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.*;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+
+import java.util.Map;
+
+public class ClusterListRequest extends Request<UnitResponse> {
+ private final int recursive;
+ private final ClusterControllerStateRestAPI.FleetControllerResolver resolver;
+
+ public ClusterListRequest(int recursive,
+ ClusterControllerStateRestAPI.FleetControllerResolver resolver)
+ {
+ super(MasterState.NEED_NOT_BE_MASTER);
+ this.recursive = recursive;
+ this.resolver = resolver;
+ }
+
+ @Override
+ public UnitResponse calculateResult(RemoteClusterControllerTask.Context context) throws StateRestApiException, OtherMasterIndexException {
+ return calculateResult();
+ }
+
+ /**
+ * The cluster list request is outside of the fleet controllers, and can thus not use a
+ * context (thus it is null all the time). Thus it must recurse into fleetcontrollers if
+ * needed. Adding function without context to make this obvious and hinder bad usage.
+ */
+ private UnitResponse calculateResult() throws StateRestApiException, OtherMasterIndexException {
+ Response.ClusterListResponse response = new Response.ClusterListResponse();
+ for (Map.Entry<String, RemoteClusterControllerTaskScheduler> e : resolver.getFleetControllers().entrySet()) {
+ Id.Cluster clusterId = new Id.Cluster(e.getKey());
+ if (recursive > 0) {
+ ClusterStateRequest csr = new ClusterStateRequest(clusterId, recursive - 1);
+ e.getValue().schedule(csr);
+ csr.waitForCompletion();
+ response.addEntry("cluster", clusterId.getClusterId(), csr.getResult());
+ } else {
+ response.addLink("cluster", clusterId.getClusterId(), clusterId.toString());
+ }
+ }
+ return response;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterStateRequest.java
new file mode 100644
index 00000000000..66ca9e2132e
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterStateRequest.java
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2.requests;
+
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Id;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Request;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Response;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+
+public class ClusterStateRequest extends Request<Response.ClusterResponse> {
+ private final Id.Cluster id;
+ private final int recursive;
+
+ public ClusterStateRequest(Id.Cluster id, int recursive) {
+ super(MasterState.MUST_BE_MASTER);
+ this.id = id;
+ this.recursive = recursive;
+ }
+
+ @Override
+ public Response.ClusterResponse calculateResult(RemoteClusterControllerTask.Context context) throws StateRestApiException {
+ Response.ClusterResponse result = new Response.ClusterResponse();
+ result.addState("generated", new Response.UnitStateImpl(context.currentState.getClusterState()));
+ for (NodeType type : NodeType.getTypes()) {
+ Id.Service serviceId = new Id.Service(id, type);
+ if (recursive > 0) {
+ ServiceStateRequest ssr = new ServiceStateRequest(serviceId, recursive - 1);
+ result.addEntry("service", type.toString(), ssr.calculateResult(context));
+ } else {
+ result.addLink("service", type.toString(), serviceId.toString());
+ }
+ }
+ return result;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/NodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/NodeStateRequest.java
new file mode 100644
index 00000000000..fa1ee6ef656
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/NodeStateRequest.java
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2.requests;
+
+import com.yahoo.vespa.clustercontroller.core.LatencyStats;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
+import com.yahoo.vespa.clustercontroller.core.StorageNodeStats;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Id;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Request;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Response;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.MissingResourceException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+
+import java.util.Set;
+
+public class NodeStateRequest extends Request<Response.NodeResponse> {
+ private final Id.Node id;
+ private final int recursive;
+ private final Set<VerboseReport> verboseReports;
+
+ public NodeStateRequest(Id.Node id, int recursive, Set<VerboseReport> verboseReports) {
+ super(MasterState.MUST_BE_MASTER);
+ this.id = id;
+ this.recursive = recursive;
+ this.verboseReports = verboseReports;
+ }
+
+ @Override
+ public Response.NodeResponse calculateResult(RemoteClusterControllerTask.Context context) throws StateRestApiException {
+ Response.NodeResponse result = new Response.NodeResponse();
+ NodeInfo info = context.cluster.getNodeInfo(id.getNode());
+ if (info == null) {
+ throw new MissingResourceException("node " + id.getNode());
+ }
+
+ if (info.getGroup() != null) {
+ result.addAttribute("hierarchical-group", info.getGroup().getPath());
+ }
+
+ result.addState("generated", new Response.UnitStateImpl(context.currentState.getNodeState(id.getNode())));
+ result.addState("unit", new Response.UnitStateImpl(info.getReportedState()));
+ result.addState("user", new Response.UnitStateImpl(info.getWantedState()));
+
+ if (info.isStorage() && verboseReports.contains(VerboseReport.STATISTICS)) {
+ StorageNodeStats storageStats = context.cluster.getStorageNodeStats(info.getNodeIndex());
+ LatencyStats latencyStats = storageStats.getDistributorPutLatency();
+ result.addMetric("distributor-put-latency-ms-sum", latencyStats.getLatencyMsSum());
+ result.addMetric("distributor-put-latency-count", latencyStats.getCount());
+ }
+
+ for (int i=0; i<info.getReportedState().getDiskCount(); ++i) {
+ Id.Partition partitionId = new Id.Partition(id, i);
+ if (recursive > 0) {
+ PartitionStateRequest psr = new PartitionStateRequest(partitionId, verboseReports);
+ result.addEntry("partition", String.valueOf(i), psr.calculateResult(context));
+ } else {
+ result.addLink("partition", String.valueOf(i), partitionId.toString());
+ }
+ }
+ return result;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/PartitionStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/PartitionStateRequest.java
new file mode 100644
index 00000000000..8958bce8ccd
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/PartitionStateRequest.java
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2.requests;
+
+import com.yahoo.vdslib.state.DiskState;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.Metrics;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Id;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Request;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Response;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+
+import java.util.Set;
+import java.util.logging.Logger;
+
+public class PartitionStateRequest extends Request<Response.PartitionResponse> {
+ private static final Logger log = Logger.getLogger(PartitionStateRequest.class.getName());
+ private final Id.Partition id;
+ private final Set<VerboseReport> verboseReports;
+
+ public PartitionStateRequest(Id.Partition id, Set<VerboseReport> verboseReports) {
+ super(MasterState.MUST_BE_MASTER);
+ this.id = id;
+ this.verboseReports = verboseReports;
+ }
+
+ @Override
+ public Response.PartitionResponse calculateResult(RemoteClusterControllerTask.Context context)
+ throws StateRestApiException {
+ Response.PartitionResponse result = new Response.PartitionResponse();
+ if (verboseReports.contains(VerboseReport.STATISTICS)) {
+ fillInMetrics(context.cluster.getNodeInfo(id.getNode()).getHostInfo().getMetrics(), result);
+ }
+ NodeState nodeState = context.currentState.getNodeState(id.getNode());
+ DiskState diskState = nodeState.getDiskState(id.getPartitionIndex());
+ result.addState("generated", new Response.UnitStateImpl(diskState));
+
+ return result;
+ }
+
+ private static void fillInMetrics(Metrics metrics, Response.PartitionResponse result) {
+ for (Metrics.Metric metric: metrics.getValues()) {
+ fillInMetricValue(metric.getName(), metric.getValue(), result);
+ }
+ }
+
+ private static void fillInMetricValue(
+ String name, Metrics.Value value, Response.PartitionResponse result) {
+ if (name.equals("vds.datastored.alldisks.docs")) {
+ if (value.getLast() == null) {
+ log.warning("Proper doc count value did not exist in value set.");
+ return;
+ }
+ result.addMetric("unique-document-count", value.getLast());
+ } else if (name.equals("vds.datastored.alldisks.bytes")) {
+ if (value.getLast() == null) {
+ log.warning("Proper doc size value did not exist in value set.");
+ return;
+ }
+ result.addMetric("unique-document-total-size", value.getLast());
+ } else if (name.equals("vds.datastored.alldisks.buckets")) {
+ if (value.getLast() == null) {
+ log.warning("Proper bucket count value did not exist in value set.");
+ return;
+ }
+ result.addMetric("bucket-count", value.getLast());
+ }
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ServiceStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ServiceStateRequest.java
new file mode 100644
index 00000000000..087ed55a5b6
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ServiceStateRequest.java
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2.requests;
+
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Id;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Request;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Response;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+
+import java.util.EnumSet;
+
+public class ServiceStateRequest extends Request<Response.ServiceResponse> {
+ private final Id.Service id;
+ private final int recursive;
+
+ public ServiceStateRequest(Id.Service id, int recursive) {
+ super(MasterState.MUST_BE_MASTER);
+ this.id = id;
+ this.recursive = recursive;
+ }
+
+ @Override
+ public Response.ServiceResponse calculateResult(RemoteClusterControllerTask.Context context) throws StateRestApiException {
+ Response.ServiceResponse result = new Response.ServiceResponse();
+ for (Integer i : context.cluster.getConfiguredNodes().keySet()) {
+ Id.Node nodeId = new Id.Node(id, i);
+ if (recursive > 0) {
+ // Don't include per-node statistics when aggregating over all nodes
+ NodeStateRequest nsr = new NodeStateRequest(nodeId, recursive - 1, EnumSet.noneOf(VerboseReport.class));
+ result.addEntry("node", String.valueOf(i), nsr.calculateResult(context));
+ } else {
+ result.addLink("node", String.valueOf(i), nodeId.toString());
+ }
+ }
+ return result;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
new file mode 100644
index 00000000000..d6dd6faa60d
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
@@ -0,0 +1,104 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2.requests;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Id;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.MissingIdException;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Request;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.*;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.SetResponse;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitState;
+
+import java.util.Map;
+import java.util.logging.Logger;
+
+public class SetNodeStateRequest extends Request<SetResponse> {
+ private static final Logger log = Logger.getLogger(SetNodeStateRequest.class.getName());
+
+ private final Id.Node id;
+ private final Map<String, UnitState> newStates;
+ private final SetUnitStateRequest.Condition condition;
+
+
+ public SetNodeStateRequest(Id.Node id, SetUnitStateRequest setUnitStateRequest) {
+ super(MasterState.MUST_BE_MASTER);
+ this.id = id;
+ this.newStates = setUnitStateRequest.getNewState();
+ this.condition = setUnitStateRequest.getCondition();
+ }
+
+ @Override
+ public SetResponse calculateResult(RemoteClusterControllerTask.Context context) throws StateRestApiException {
+ SetResponse setResponse = setWantedState(
+ context.cluster,
+ condition,
+ newStates,
+ id.getNode(),
+ context.nodeStateOrHostInfoChangeHandler,
+ context.currentState);
+ return setResponse;
+ }
+
+ static NodeState getRequestedNodeState(Map<String, UnitState> newStates, Node n) throws StateRestApiException {
+ UnitState newState = newStates.get("user");
+ if (newState == null) throw new InvalidContentException("No new user state given in request");
+ State state;
+ switch (newState.getId().toLowerCase()) {
+ case "up": state = State.UP; break;
+ case "retired": state = State.RETIRED; break;
+ case "maintenance": state = State.MAINTENANCE; break;
+ case "down": state = State.DOWN; break;
+ default: throw new InvalidContentException("Invalid user state '" + newState.getId() + "' given.");
+ }
+ return new NodeState(n.getType(), state).setDescription(newState.getReason());
+ }
+
+ static SetResponse setWantedState(
+ ContentCluster cluster,
+ SetUnitStateRequest.Condition condition,
+ Map<String, UnitState> newStates,
+ Node node,
+ NodeStateOrHostInfoChangeHandler stateListener,
+ ClusterState currentClusterState) throws StateRestApiException {
+ if ( ! cluster.getConfiguredNodes().containsKey(node.getIndex())) {
+ throw new MissingIdException(cluster.getName(), node);
+ }
+ NodeInfo nodeInfo = cluster.getNodeInfo(node);
+ if (nodeInfo == null)
+ throw new IllegalArgumentException("Cannot set the wanted state of unknown node " + node);
+
+ NodeState wantedState = nodeInfo.getUserWantedState();
+ NodeState newWantedState = getRequestedNodeState(newStates, node);
+ int version = currentClusterState.getVersion();
+ NodeStateChangeChecker.Result result = cluster.calculateEffectOfNewState(
+ node, version, condition, wantedState, newWantedState);
+
+ log.log(LogLevel.DEBUG, "node=" + node +
+ " version=" + version +
+ " condition=" + condition +
+ " wanted-state=" + wantedState +
+ " new-wanted-state=" + newWantedState +
+ " change-check=" + result);
+ if (result.settingWantedStateIsAllowed()) {
+ nodeInfo.setWantedState(newWantedState);
+ stateListener.handleNewWantedNodeState(nodeInfo, newWantedState);
+ }
+
+ // wasModified is true if the new/current State equals the wanted state in the request.
+ boolean wasModified = result.settingWantedStateIsAllowed() || result.wantedStateAlreadySet();
+ // If the state was successfully set, just return an "ok" message back.
+ String reason = wasModified ? "ok" : result.getReason();
+ return new SetResponse(reason, wasModified);
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java
new file mode 100644
index 00000000000..a6e73508858
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java
@@ -0,0 +1,87 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2.requests;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Id;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.MissingIdException;
+import com.yahoo.vespa.clustercontroller.core.restapiv2.Request;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InternalFailure;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InvalidContentException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.SetResponse;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitState;
+
+import java.util.Map;
+import java.util.logging.Logger;
+
+public class SetNodeStatesForClusterRequest extends Request<SetResponse> {
+ private static final Logger log = Logger.getLogger(SetNodeStateRequest.class.getName());
+
+ private final Id.Cluster cluster;
+ private final Map<String, UnitState> newStates;
+ private final SetUnitStateRequest.Condition condition;
+
+
+ public SetNodeStatesForClusterRequest(Id.Cluster cluster, SetUnitStateRequest request) {
+ super(MasterState.MUST_BE_MASTER);
+ this.cluster = cluster;
+ this.newStates = request.getNewState();
+ this.condition = request.getCondition();
+ }
+
+ @Override
+ public SetResponse calculateResult(RemoteClusterControllerTask.Context context) throws StateRestApiException {
+ if (condition != SetUnitStateRequest.Condition.FORCE) {
+ // Setting all nodes to e.g. maintainence is by design unsafe in the sense
+ // that it allows effective redundancy to drop to 0, many/all nodes may
+ // go down, etc. This is prohibited in Condition.SAFE.
+ throw new InvalidContentException(
+ "Setting all nodes in a cluster to a state is only supported with FORCE");
+ }
+
+ // Q: What about topology changes after the cluster has been set to maintenance?
+ // A: It's not safe to remove nodes when the cluster (the nodes in a cluster)
+ // is set to maintenance since all redistribution is shut down. Data may be lost.
+ // New nodes will currently come up with no wanted state, and so will eventually
+ // come up. This is a bug with the current implementation - they should automatically
+ // be in maintenance.
+ // When suspending an application through the Orchestrator in hosted Vespa, a cluster
+ // is set to maintenance to allow nodes to be taken down and up at will. The concern
+ // is that doing so will cause lots of redistribution work, and so all nodes in
+ // the content clusters are set to maintenance using this call. When new nodes
+ // are added while the cluster is set in maintenance, as long as the new nodes
+ // do not start with data and there's no feeding there's no redistribution either.
+ // So adding new nodes is actually OK in this case.
+
+ for (ConfiguredNode configuredNode : context.cluster.getConfiguredNodes().values()) {
+ Node node = new Node(NodeType.STORAGE, configuredNode.index());
+ SetResponse setResponse = SetNodeStateRequest.setWantedState(
+ context.cluster,
+ condition,
+ newStates,
+ node,
+ context.nodeStateOrHostInfoChangeHandler,
+ context.currentState);
+
+ if (!setResponse.getWasModified()) {
+ throw new InternalFailure("We have not yet implemented the meaning of " +
+ "failing to set the wanted state for a subset of nodes: " +
+ "condition = " + condition +
+ ", newStates = " + newStates +
+ ", currentState = " + context.currentState);
+ }
+ }
+
+ // 'true' here means the current state now equals the request's wanted state.
+ return new SetResponse("ok", true);
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/VerboseReport.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/VerboseReport.java
new file mode 100644
index 00000000000..1b1130dba03
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/VerboseReport.java
@@ -0,0 +1,6 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2.requests;
+
+public enum VerboseReport {
+ STATISTICS
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicator.java
new file mode 100644
index 00000000000..bab718e6d9d
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicator.java
@@ -0,0 +1,200 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @class Communicator
+ *
+ * Responsible for doing RPC requests to VDS nodes.
+ */
+
+package com.yahoo.vespa.clustercontroller.core.rpc;
+
+import com.yahoo.jrt.*;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.clustercontroller.core.*;
+
+import java.util.logging.Logger;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+/**
+ * This class is not thread-safe.
+ */
+public class RPCCommunicator implements Communicator {
+
+ public static final Logger log = Logger.getLogger(RPCCommunicator.class.getName());
+
+ private final Timer timer;
+ private final Supervisor supervisor;
+ private double nodeStateRequestTimeoutIntervalMaxSeconds;
+ private int nodeStateRequestTimeoutIntervalStartPercentage;
+ private int nodeStateRequestTimeoutIntervalStopPercentage;
+ private int nodeStateRequestRoundTripTimeMaxSeconds;
+ private final int fleetControllerIndex;
+
+ public RPCCommunicator(
+ final Timer t,
+ final int index,
+ final int nodeStateRequestTimeoutIntervalMaxMs,
+ final int nodeStateRequestTimeoutIntervalStartPercentage,
+ final int nodeStateRequestTimeoutIntervalStopPercentage,
+ final int nodeStateRequestRoundTripTimeMaxSeconds) {
+ this.timer = t;
+ this.fleetControllerIndex = index;
+ checkArgument(nodeStateRequestTimeoutIntervalMaxMs > 0);
+ checkArgument(nodeStateRequestTimeoutIntervalStartPercentage >= 0);
+ checkArgument(nodeStateRequestTimeoutIntervalStartPercentage <= 100);
+ checkArgument(nodeStateRequestTimeoutIntervalStopPercentage >= nodeStateRequestTimeoutIntervalStartPercentage);
+ checkArgument(nodeStateRequestTimeoutIntervalStartPercentage <= 100);
+ checkArgument(nodeStateRequestRoundTripTimeMaxSeconds >= 0);
+ this.nodeStateRequestTimeoutIntervalMaxSeconds = nodeStateRequestTimeoutIntervalMaxMs / 1000D;
+ this.nodeStateRequestTimeoutIntervalStartPercentage = nodeStateRequestTimeoutIntervalStartPercentage;
+ this.nodeStateRequestTimeoutIntervalStopPercentage = nodeStateRequestTimeoutIntervalStopPercentage;
+ this.nodeStateRequestRoundTripTimeMaxSeconds = nodeStateRequestRoundTripTimeMaxSeconds;
+ this.supervisor = new Supervisor(new Transport());
+ }
+
+ public void shutdown() {
+ supervisor.transport().shutdown().join();
+ }
+
+ public Target getConnection(final NodeInfo node) {
+ Target t = node.getConnection();
+ if (t == null || !t.isValid()) {
+ t = node.setConnection(
+ supervisor.connect(new Spec(node.getRpcAddress())));
+ }
+ return t;
+ }
+
+ public void doVersion0HandShake(Target connection, final NodeInfo node) {
+ log.log(LogLevel.DEBUG, "Sending version 0 handshake request as version has been set down to 0 for " + node);
+ Request req = new Request("vespa.storage.connect");
+ req.parameters().add(new StringValue("storage/cluster." + node.getCluster().getName() + (node.isDistributor() ? "/distributor/" : "/storage/") + node.getNodeIndex()));
+ connection.invokeAsync(req, 10.0, new RequestWaiter(){
+ public void handleRequestDone(Request req) {
+ if (req.isError()) {
+ log.log(LogLevel.WARNING, "Failed to do version 0 handshake towards " + node + ", " + req.errorCode() + ": " + req.errorMessage());
+ } else if (!req.checkReturnTypes("i")) {
+ log.log(LogLevel.WARNING, "Wrong arguments returned from version 0 handshake attempt towards " + node);
+ } else if (req.returnValues().get(0).asInt32() == 1) {
+ log.log(LogLevel.DEBUG, "Session already opened when handshaking towards " + node + ".");
+ } else if (req.returnValues().get(0).asInt32() > 1) {
+ log.log(LogLevel.WARNING, "Handshaking attempt towards " + node + " failed with code " + req.returnValues().get(0).asInt32());
+ }
+ }
+ });
+ node.setConnectionVersion(0);
+ }
+
+ public void clearOldStoredNodeState(Target connection, final NodeInfo node) {
+ log.log(LogLevel.DEBUG, "In case old node had stored a wanted state it is reporting, send a command to clear any unwanted stored state.");
+ Request req = new Request("setnodestate");
+ req.parameters().add(new StringValue(""));
+ connection.invokeAsync(req, 10.0, new RequestWaiter() {
+ public void handleRequestDone(Request req) {
+ if (req.isError()) {
+ if (node.getReportedState().getState() != State.DOWN) {
+ log.log(LogLevel.WARNING, "Failed to clear nodestate on old node " + node + ", " + req.errorCode() + ": " + req.errorMessage());
+ }
+ } else if (!req.checkReturnTypes("is")) {
+ log.log(LogLevel.WARNING, "Wrong arguments returned from version 0 setnodestate attempt to clear any unwanted state on " + node);
+ }
+ }
+ });
+ }
+
+ @Override
+ public void propagateOptions(final FleetControllerOptions options) {
+ checkArgument(options.nodeStateRequestTimeoutMS > 0);
+ checkArgument(options.nodeStateRequestTimeoutEarliestPercentage >= 0);
+ checkArgument(options.nodeStateRequestTimeoutEarliestPercentage <= 100);
+ checkArgument(options.nodeStateRequestTimeoutLatestPercentage
+ >= options.nodeStateRequestTimeoutEarliestPercentage);
+ checkArgument(options.nodeStateRequestTimeoutLatestPercentage <= 100);
+ checkArgument(options.nodeStateRequestRoundTripTimeMaxSeconds >= 0);
+ this.nodeStateRequestTimeoutIntervalMaxSeconds = options.nodeStateRequestTimeoutMS / 1000.0;
+ this.nodeStateRequestTimeoutIntervalStartPercentage = options.nodeStateRequestTimeoutEarliestPercentage;
+ this.nodeStateRequestTimeoutIntervalStopPercentage = options.nodeStateRequestTimeoutLatestPercentage;
+ this.nodeStateRequestRoundTripTimeMaxSeconds = options.nodeStateRequestRoundTripTimeMaxSeconds;
+ }
+
+ @Override
+ public void getNodeState(NodeInfo node, Waiter<GetNodeStateRequest> externalWaiter) {
+ Target connection = getConnection(node);
+ if ( ! connection.isValid()) {
+ log.log(LogLevel.DEBUG, "Connection to " + node.getRpcAddress() + " could not be created.");
+ }
+ if (node.getVersion() == 0 && node.getConnectionVersion() > 0) {
+ doVersion0HandShake(connection, node);
+ clearOldStoredNodeState(connection, node);
+ }
+ NodeState currentState = node.getReportedState();
+ Request req;
+ if (node.getVersion() == 0) {
+ req = new Request("getnodestate");
+ } else {
+ req = new Request(node.getVersion() == 1 ? "getnodestate2" : "getnodestate3");
+ req.parameters().add(new StringValue(
+ currentState.getState().equals(State.DOWN) || node.getConnectionAttemptCount() > 0
+ ? "unknown" : currentState.serialize()));
+ req.parameters().add(new Int32Value(generateNodeStateRequestTimeoutMs()));
+ if (node.getVersion() > 1) {
+ req.parameters().add(new Int32Value(fleetControllerIndex));
+ }
+ }
+
+ RPCGetNodeStateRequest stateRequest = new RPCGetNodeStateRequest(node, req);
+ RPCGetNodeStateWaiter waiter = new RPCGetNodeStateWaiter(stateRequest, externalWaiter, timer);
+
+ double requestTimeoutSeconds =
+ nodeStateRequestTimeoutIntervalMaxSeconds + nodeStateRequestRoundTripTimeMaxSeconds;
+
+ connection.invokeAsync(req, requestTimeoutSeconds, waiter);
+ node.setCurrentNodeStateRequest(stateRequest, timer.getCurrentTimeInMillis());
+ node.lastRequestInfoConnection = connection;
+ }
+
+ @Override
+ public void setSystemState(ClusterState state, NodeInfo node, Waiter<SetClusterStateRequest> externalWaiter) {
+ RPCSetClusterStateWaiter waiter = new RPCSetClusterStateWaiter(externalWaiter, timer);
+
+ Target connection = getConnection(node);
+ if (!connection.isValid()) {
+ log.log(LogLevel.DEBUG, "Connection to " + node.getRpcAddress() + " could not be created.");
+ return;
+ }
+ if (node.getVersion() == 0 && node.getConnectionVersion() > 0) {
+ doVersion0HandShake(connection, node);
+ clearOldStoredNodeState(connection, node);
+ }
+ Request req;
+ if (node.getVersion() == 0) {
+ req = new Request("setsystemstate");
+ req.parameters().add(new StringValue(state.toString(true)));
+ } else {
+ req = new Request("setsystemstate2");
+ req.parameters().add(new StringValue(state.toString(false)));
+ }
+
+ RPCSetClusterStateRequest stateRequest = new RPCSetClusterStateRequest(node, req, state.getVersion());
+ waiter.setRequest(stateRequest);
+
+ connection.invokeAsync(req, 60, waiter);
+ node.setSystemStateVersionSent(state);
+ }
+
+ // protected for testing.
+ protected int generateNodeStateRequestTimeoutMs() {
+ final double intervalFraction = Math.random();
+ final double earliestTimeoutSeconds =
+ nodeStateRequestTimeoutIntervalMaxSeconds * nodeStateRequestTimeoutIntervalStartPercentage / 100.0;
+ final double latestTimeoutSeconds =
+ nodeStateRequestTimeoutIntervalMaxSeconds * nodeStateRequestTimeoutIntervalStopPercentage / 100.0;
+ final double interval = latestTimeoutSeconds - earliestTimeoutSeconds;
+ final double timeoutSeconds = earliestTimeoutSeconds + intervalFraction * interval;
+ return (int) (timeoutSeconds * 1000);
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCGetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCGetNodeStateRequest.java
new file mode 100644
index 00000000000..4ae906936d2
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCGetNodeStateRequest.java
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.rpc;
+
+import com.yahoo.jrt.Request;
+import com.yahoo.vespa.clustercontroller.core.GetNodeStateRequest;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+
+public class RPCGetNodeStateRequest extends GetNodeStateRequest {
+
+ Request request;
+
+ public RPCGetNodeStateRequest(NodeInfo nodeInfo, Request request) {
+ super(nodeInfo);
+ this.request = request;
+ }
+
+ @Override
+ public void abort() {
+ request.abort();
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCGetNodeStateWaiter.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCGetNodeStateWaiter.java
new file mode 100644
index 00000000000..0154089b0cd
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCGetNodeStateWaiter.java
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.rpc;
+
+import com.yahoo.jrt.ErrorCode;
+import com.yahoo.jrt.Request;
+import com.yahoo.jrt.RequestWaiter;
+import com.yahoo.vespa.clustercontroller.core.Communicator;
+import com.yahoo.vespa.clustercontroller.core.GetNodeStateRequest;
+import com.yahoo.vespa.clustercontroller.core.Timer;
+
+/**
+ * Handles the reply to a get node state request to a node.
+ */
+public class RPCGetNodeStateWaiter implements RequestWaiter {
+
+ private final RPCGetNodeStateRequest request;
+ private final Communicator.Waiter<GetNodeStateRequest> waiter;
+ private final Timer timer;
+
+ public RPCGetNodeStateWaiter(RPCGetNodeStateRequest request,
+ Communicator.Waiter<GetNodeStateRequest> waiter, Timer timer) {
+ this.request = request;
+ this.waiter = waiter;
+ this.timer = timer;
+ }
+
+ private GetNodeStateRequest.Reply convertToReply(Request req) {
+ if (req.errorCode() == ErrorCode.NO_SUCH_METHOD) {
+ // If we get no such method error, and we downgrade version, we must retry. May be ok that it doesn't exist
+ if (request.getNodeInfo().notifyNoSuchMethodError(req.methodName(), timer)) {
+ return new GetNodeStateRequest.Reply(Communicator.TRANSIENT_ERROR, "Downgrading version");
+ }
+ }
+
+ if (req.isError()) {
+ return new GetNodeStateRequest.Reply(req.errorCode(), req.errorMessage());
+ }
+
+ if (req.methodName().equals("getnodestate3")) {
+ String stateStr = "";
+ String hostInfo = "";
+
+ if (req.returnValues().satisfies("s*")) {
+ stateStr = req.returnValues().get(0).asString();
+ }
+
+ if (req.returnValues().satisfies("ss*")) {
+ hostInfo = req.returnValues().get(1).asString();
+ }
+
+ return new GetNodeStateRequest.Reply(stateStr, hostInfo);
+ } else {
+ return new GetNodeStateRequest.Reply(ErrorCode.BAD_REPLY, "Unknown method name " + req.methodName());
+ }
+ }
+
+ @Override
+ public void handleRequestDone(Request req) {
+ request.setReply(convertToReply(req));
+ waiter.done(request);
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCSetClusterStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCSetClusterStateRequest.java
new file mode 100644
index 00000000000..269a56ff08d
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCSetClusterStateRequest.java
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.rpc;
+
+import com.yahoo.jrt.Request;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.SetClusterStateRequest;
+
+public class RPCSetClusterStateRequest extends SetClusterStateRequest {
+
+ Request request;
+
+ public RPCSetClusterStateRequest(NodeInfo nodeInfo, Request request, int clusterStateVersion) {
+ super(nodeInfo, clusterStateVersion);
+ this.request = request;
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCSetClusterStateWaiter.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCSetClusterStateWaiter.java
new file mode 100644
index 00000000000..5562aef5bb4
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCSetClusterStateWaiter.java
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.rpc;
+
+import com.yahoo.jrt.ErrorCode;
+import com.yahoo.jrt.Request;
+import com.yahoo.jrt.RequestWaiter;
+import com.yahoo.vespa.clustercontroller.core.Communicator;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.SetClusterStateRequest;
+import com.yahoo.vespa.clustercontroller.core.Timer;
+
+/**
+ * Waiter class for set cluster state RPC commands.
+ */
+public class RPCSetClusterStateWaiter implements RequestWaiter {
+
+ SetClusterStateRequest request;
+ Timer timer;
+ Communicator.Waiter<SetClusterStateRequest> waiter;
+
+ public RPCSetClusterStateWaiter(Communicator.Waiter<SetClusterStateRequest> waiter, Timer timer) {
+ this.timer = timer;
+ this.waiter = waiter;
+ }
+
+ public void setRequest(RPCSetClusterStateRequest request) {
+ this.request = request;
+ }
+
+ public SetClusterStateRequest.Reply getReply(Request req) {
+ NodeInfo info = request.getNodeInfo();
+
+ if (req.methodName().equals("setsystemstate2")) {
+ if (req.isError() && req.errorCode() == ErrorCode.NO_SUCH_METHOD) {
+ if (info.notifyNoSuchMethodError(req.methodName(), timer)) {
+ return new SetClusterStateRequest.Reply(Communicator.TRANSIENT_ERROR, "Trying lower version");
+ }
+ }
+ if (req.isError()) {
+ return new SetClusterStateRequest.Reply(req.errorCode(), req.errorMessage());
+ } else if (!req.checkReturnTypes("")) {
+ return new SetClusterStateRequest.Reply(ErrorCode.BAD_REPLY, "Got setsystemstate2 response with invalid return types from " + info);
+ }
+ } else {
+ return new SetClusterStateRequest.Reply(ErrorCode.BAD_REPLY, "Unknown method " + req.methodName());
+ }
+
+ return new SetClusterStateRequest.Reply();
+ }
+
+ @Override
+ public void handleRequestDone(Request request) {
+ SetClusterStateRequest.Reply reply = getReply(request);
+ this.request.setReply(reply);
+ waiter.done(this.request);
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RpcServer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RpcServer.java
new file mode 100644
index 00000000000..46fb18180e5
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RpcServer.java
@@ -0,0 +1,307 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.rpc;
+
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.slobrok.api.Register;
+import com.yahoo.jrt.slobrok.api.SlobrokList;
+import com.yahoo.jrt.slobrok.api.BackOffPolicy;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.MasterElectionHandler;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import com.yahoo.vespa.clustercontroller.core.Timer;
+
+import java.util.logging.Logger;
+import java.util.*;
+import java.net.UnknownHostException;
+import java.net.InetAddress;
+import java.io.StringWriter;
+import java.io.PrintWriter;
+
+public class RpcServer {
+
+ private static Logger log = Logger.getLogger(RpcServer.class.getName());
+
+ private final Timer timer;
+ private final Object monitor;
+ private final String clusterName;
+ private final int fleetControllerIndex;
+ private String slobrokConnectionSpecs[];
+ private int port = 0;
+ private Supervisor supervisor;
+ private Acceptor acceptor;
+ private Register register;
+ private final List<Request> rpcRequests = new LinkedList<>();
+ private MasterElectionHandler masterHandler;
+ private BackOffPolicy slobrokBackOffPolicy;
+ private long lastConnectErrorTime = 0;
+ private String lastConnectError = "";
+
+ public RpcServer(Timer timer, Object monitor, String clusterName, int fleetControllerIndex, BackOffPolicy bop) {
+ this.timer = timer;
+ this.monitor = monitor;
+ this.clusterName = clusterName;
+ this.fleetControllerIndex = fleetControllerIndex;
+ this.slobrokBackOffPolicy = bop;
+ }
+
+ public void setMasterElectionHandler(MasterElectionHandler handler) { this.masterHandler = handler; }
+
+ public int getPort() {
+ if (acceptor == null) return -1;
+ return acceptor.port();
+ }
+
+ public void shutdown() {
+ disconnect();
+ }
+
+ public String getSlobrokName() {
+ return "storage/cluster." + clusterName + "/fleetcontroller/" + fleetControllerIndex;
+ }
+
+ public void setSlobrokConnectionSpecs(String slobrokConnectionSpecs[], int port) throws ListenFailedException, UnknownHostException {
+ if (this.slobrokConnectionSpecs == null || !this.slobrokConnectionSpecs.equals(slobrokConnectionSpecs) // TODO: <-- probably a bug
+ || this.port != port)
+ {
+ this.slobrokConnectionSpecs = slobrokConnectionSpecs;
+ this.port = port;
+ disconnect();
+ connect();
+ }
+ }
+
+ public boolean isConnected() {
+ return (register != null);
+ }
+
+ public void connect() throws ListenFailedException, UnknownHostException {
+ disconnect();
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + fleetControllerIndex + ": Connecting RPC server.");
+ if (supervisor != null) disconnect();
+ supervisor = new Supervisor(new Transport());
+ addMethods();
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + fleetControllerIndex + ": Attempting to bind to port " + port);
+ acceptor = supervisor.listen(new Spec(port));
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + fleetControllerIndex + ": RPC server listening to port " + acceptor.port());
+ StringBuffer slobroks = new StringBuffer("(");
+ for (String s : slobrokConnectionSpecs) {
+ slobroks.append(" ").append(s);
+ }
+ slobroks.append(" )");
+ SlobrokList slist = new SlobrokList();
+ slist.setup(slobrokConnectionSpecs);
+ log.log(LogLevel.DEBUG, "Trying to connect to slobrok using local address " + InetAddress.getLocalHost().getHostName()
+ + ", port " + acceptor.port() + " using slobrok connection spec " + slobroks);
+ if (slobrokBackOffPolicy != null) {
+ register = new Register(supervisor, slist,
+ new Spec(InetAddress.getLocalHost().getHostName(), acceptor.port()), slobrokBackOffPolicy);
+ } else {
+ register = new Register(supervisor, slist,
+ InetAddress.getLocalHost().getHostName(), acceptor.port());
+ }
+ register.registerName(getSlobrokName());
+ }
+
+ public void disconnect() {
+ if (register != null) {
+ log.log(LogLevel.DEBUG, "Fleetcontroller " + fleetControllerIndex + ": Disconnecting RPC server.");
+ register.shutdown();
+ register = null;
+ }
+ if (acceptor != null) {
+ acceptor.shutdown().join();
+ acceptor = null;
+ }
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ supervisor = null;
+ }
+ }
+
+
+ public void addMethods() {
+ Method m = new Method("getMaster", "", "is", this, "queueRpcRequest");
+ m.methodDesc("Get index of current fleetcontroller master");
+ m.returnDesc(0, "masterindex", "The index of the current master according to this node, or -1 if there is none.");
+ m.returnDesc(1, "description", "A textual field, used for additional information, such as why there is no master.");
+ supervisor.addMethod(m);
+
+ m = new Method("getNodeList", "", "SS", this, "queueRpcRequest");
+ m.methodDesc("Get list of connection-specs to all nodes in the system");
+ m.returnDesc(0, "distributors", "connection-spec of all distributor-nodes (empty string for unknown nodes)");
+ m.returnDesc(1, "storagenodes", "connection-spec of all storage-nodes, (empty string for unknown nodes)");
+ supervisor.addMethod(m);
+
+ m = new Method("getSystemState", "", "ss", this, "queueRpcRequest");
+ m.methodDesc("Get nodeState of all nodes and the system itself");
+ m.returnDesc(0, "systemstate", "nodeState string of system");
+ m.returnDesc(1, "nodestate", "nodeState-string for distributor and storage-nodes");
+ supervisor.addMethod(m);
+
+ m = new Method("getNodeState", "si", "ssss", this, "queueRpcRequest");
+ m.methodDesc("Get nodeState of a node");
+ m.paramDesc(0, "nodeType", "Type of node. Should be 'storage' or 'distributor'");
+ m.paramDesc(1, "nodeIndex", "The node index");
+ m.returnDesc(0, "systemState", "This nodes state in the current system state");
+ m.returnDesc(1, "nodeState", "This nodes state as it reports itself. (Or down if we can't reach it)");
+ m.returnDesc(2, "wantedState", "This nodes wanted state");
+ m.returnDesc(3, "rpcAddress", "This nodes RPC server address");
+ supervisor.addMethod(m);
+
+ m = new Method("setNodeState", "ss", "s", this, "queueRpcRequest");
+ m.methodDesc("Set nodeState of a node");
+ m.paramDesc(0, "slobrokAddress", "Slobrok address of node");
+ m.paramDesc(1, "nodeState", "Desired nodeState of the node (complete nodeState string - [key:value ]*)");
+ m.returnDesc(0, "status", "success/failure");
+ supervisor.addMethod(m);
+ }
+
+ // Called by rpc
+ public void queueRpcRequest(Request req) {
+ synchronized(monitor) {
+ req.detach();
+ rpcRequests.add(req);
+ monitor.notifyAll();
+ }
+ }
+
+ public boolean handleRpcRequests(ContentCluster cluster, ClusterState systemState,
+ NodeStateOrHostInfoChangeHandler changeListener,
+ NodeAddedOrRemovedListener addedListener)
+ {
+ boolean handledAnyRequests = false;
+ if (!isConnected()) {
+ long time = timer.getCurrentTimeInMillis();
+ try{
+ connect();
+ } catch (ListenFailedException e) {
+ if (!e.getMessage().equals(lastConnectError) || time - lastConnectErrorTime > 60 * 1000) {
+ lastConnectError = e.getMessage();
+ lastConnectErrorTime = time;
+ log.log(LogLevel.WARNING, "Failed to bind RPC server to port " + port +": " + e.getMessage());
+ }
+ } catch (Exception e) {
+ if (!e.getMessage().equals(lastConnectError) || time - lastConnectErrorTime > 60 * 1000) {
+ lastConnectError = e.getMessage();
+ lastConnectErrorTime = time;
+ log.log(LogLevel.WARNING, "Failed to initailize RPC server socket: " + e.getMessage());
+ }
+ }
+ }
+ for (int j=0; j<10; ++j) { // Max perform 10 RPC requests per cycle.
+ Request req;
+ synchronized(monitor) {
+ if (rpcRequests.isEmpty()) break;
+ Iterator<Request> it = rpcRequests.iterator();
+ req = it.next();
+ it.remove();
+ handledAnyRequests = true;
+ }
+ try{
+ if (req.methodName().equals("getMaster")) {
+ log.log(LogLevel.DEBUG, "Resolving RPC getMaster request");
+ Integer master = masterHandler.getMaster();
+ String masterReason = masterHandler.getMasterReason();
+ req.returnValues().add(new Int32Value(master == null ? -1 : master));
+ req.returnValues().add(new StringValue(masterReason == null ? "No reason given" : masterReason));
+ req.returnRequest();
+ continue;
+ }
+ if (!masterHandler.isMaster()) {
+ throw new IllegalStateException("Refusing to answer RPC calls as we are not the master fleetcontroller.");
+ }
+ if (req.methodName().equals("getNodeList")) {
+ log.log(LogLevel.DEBUG, "Resolving RPC getNodeList request");
+ List<String> slobrok = new ArrayList<String>();
+ List<String> rpc = new ArrayList<String>();
+ for(NodeInfo node : cluster.getNodeInfo()) {
+ String s1 = node.getSlobrokAddress();
+ String s2 = node.getRpcAddress();
+ assert(s1 != null);
+ slobrok.add(s1);
+ rpc.add(s2 == null ? "" : s2);
+ }
+ req.returnValues().add(new StringArray(slobrok.toArray(new String[slobrok.size()])));
+ req.returnValues().add(new StringArray(rpc.toArray(new String[rpc.size()])));
+ req.returnRequest();
+ } else if (req.methodName().equals("getSystemState")) {
+ log.log(LogLevel.DEBUG, "Resolving RPC getSystemState request");
+ req.returnValues().add(new StringValue(""));
+ req.returnValues().add(new StringValue(systemState.toString(true)));
+ req.returnRequest();
+ } else if (req.methodName().equals("getNodeState")) {
+ log.log(LogLevel.DEBUG, "Resolving RPC getNodeState request");
+
+ NodeType nodeType = NodeType.get(req.parameters().get(0).asString());
+ int nodeIndex = req.parameters().get(1).asInt32();
+ Node node = new Node(nodeType, nodeIndex);
+ // First parameter is current state in system state
+ NodeState ns = systemState.getNodeState(node);
+ req.returnValues().add(new StringValue(systemState.getNodeState(node).serialize()));
+ // Second parameter is state node is reporting
+ NodeInfo nodeInfo = cluster.getNodeInfo(node);
+ if (nodeInfo == null) throw new RuntimeException("No node " + node + " exists in cluster " + cluster.getName());
+ NodeState fromNode = nodeInfo.getReportedState();
+ req.returnValues().add(new StringValue(fromNode == null ? "unknown" : fromNode.serialize()));
+ // Third parameter is state node has been requested to be in
+ req.returnValues().add(new StringValue(nodeInfo.getWantedState().serialize()));
+ // Fourth parameter is RPC address of node
+ req.returnValues().add(new StringValue(nodeInfo.getRpcAddress() == null ? "" : nodeInfo.getRpcAddress()));
+ req.returnRequest();
+ } else if (req.methodName().equals("setNodeState")) {
+ String slobrokAddress = req.parameters().get(0).asString();
+ int lastSlash = slobrokAddress.lastIndexOf('/');
+ int nextButLastSlash = slobrokAddress.lastIndexOf('/', lastSlash - 1);
+ if (lastSlash == -1 || nextButLastSlash == -1) {
+ throw new IllegalStateException("Invalid slobrok address '" + slobrokAddress + "'.");
+ }
+ NodeType nodeType = NodeType.get(slobrokAddress.substring(nextButLastSlash + 1, lastSlash));
+ Integer nodeIndex = Integer.valueOf(slobrokAddress.substring(lastSlash + 1));
+ NodeInfo node = cluster.getNodeInfo(new Node(nodeType, nodeIndex));
+ if (node == null)
+ throw new IllegalStateException("Cannot set wanted state of node " + new Node(nodeType, nodeIndex) + ". Index does not correspond to a configured node.");
+ NodeState nodeState = NodeState.deserialize(nodeType, req.parameters().get(1).asString());
+ if (nodeState.getDescription().equals("") && !nodeState.getState().equals(State.UP) && !nodeState.getState().equals(State.RETIRED)) {
+ nodeState.setDescription("Set by remote RPC client");
+ }
+ NodeState oldState = node.getUserWantedState();
+ String message = (nodeState.getState().equals(State.UP)
+ ? "Clearing wanted nodeState for node " + node
+ : "New wantedstate '" + nodeState.toString() + "' stored for node " + node);
+ if (!oldState.equals(nodeState) || !oldState.getDescription().equals(nodeState.getDescription())) {
+ if (!nodeState.getState().validWantedNodeState(nodeType)) {
+ throw new IllegalStateException("State " + nodeState.getState()
+ + " can not be used as wanted state for node of type " + nodeType);
+ }
+ node.setWantedState(nodeState);
+ changeListener.handleNewWantedNodeState(node, nodeState);
+ } else {
+ message = "Node " + node + " already had wanted state " + nodeState.toString();
+ log.log(LogLevel.DEBUG, message);
+ }
+ req.returnValues().add(new StringValue(message));
+ req.returnRequest();
+ if (nodeState.getState() == State.UP && node.getPrematureCrashCount() > 0) {
+ log.log(LogLevel.INFO, "Clearing premature crash count of " + node.getPrematureCrashCount() + " as wanted state was set to up");
+ node.setPrematureCrashCount(0);
+ }
+ }
+ } catch (Exception e) {
+ if (log.isLoggable(LogLevel.DEBUG)) {
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ log.log(LogLevel.DEBUG, "Failed RPC Request: " + sw);
+ }
+ String errorMsg = e.getMessage();
+ if (errorMsg == null) { errorMsg = e.toString(); }
+ req.setError(ErrorCode.METHOD_FAILED, errorMsg);
+ req.returnRequest();
+ }
+ }
+ return handledAnyRequests;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
new file mode 100644
index 00000000000..9bfebc08845
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
@@ -0,0 +1,220 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.clustercontroller.core.rpc;
+
+import com.yahoo.jrt.slobrok.api.SlobrokList;
+import com.yahoo.jrt.slobrok.api.Mirror;
+import com.yahoo.jrt.Supervisor;
+import com.yahoo.jrt.Transport;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.NodeLookup;
+import com.yahoo.vespa.clustercontroller.core.Timer;
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+
+import java.util.*;
+import java.util.logging.Logger;
+
+public class SlobrokClient implements NodeLookup {
+
+ public static Logger log = Logger.getLogger(SlobrokClient.class.getName());
+
+ private final Timer timer;
+ private String[] connectionSpecs;
+ private Mirror mirror;
+ private Supervisor supervisor;
+ private boolean freshMirror = false;
+
+ public SlobrokClient(Timer timer) {
+ this.timer = timer;
+ }
+
+ public boolean equalsExistingSpec(String spec[]) {
+ if (spec == null && connectionSpecs == null) return true;
+ if (spec == null && connectionSpecs != null) return false;
+ if (spec != null && connectionSpecs == null) return false;
+ if (spec.length != connectionSpecs.length) return false;
+ for (int i=0, n=spec.length; i<n; ++i) {
+ if (!spec[i].equals(connectionSpecs[i])) return false;
+ }
+ return true;
+ }
+
+ public void setSlobrokConnectionSpecs(String slobrokConnectionSpecs[]) {
+ if (equalsExistingSpec(slobrokConnectionSpecs)) return;
+ this.connectionSpecs = slobrokConnectionSpecs;
+ shutdown();
+ supervisor = new Supervisor(new Transport());
+ SlobrokList slist = new SlobrokList();
+ slist.setup(slobrokConnectionSpecs);
+ mirror = new Mirror(supervisor, slist);
+ freshMirror = true;
+ }
+
+ public void shutdown() {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ }
+ }
+
+ public Mirror getMirror() { return mirror; }
+
+ public boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener) {
+ if (mirror == null) return false;
+ int mirrorVersion = mirror.updates();
+ if (freshMirror) {
+ freshMirror = false;
+ } else if (cluster.getSlobrokGenerationCount() == mirrorVersion) {
+ if (log.isLoggable(LogLevel.SPAM)) {
+ log.log(LogLevel.SPAM, "Slobrok still at generation count " + cluster.getSlobrokGenerationCount() + ". Not updating.");
+ }
+ return false;
+ }
+
+ cluster.setSlobrokGenerationCount(0); // Set to unused value until we are done processing info.
+ Map<Node, SlobrokData> distributorRpc = getSlobrokData(
+ "storage/cluster." + cluster.getName() + "/distributor/*");
+ Map<Node, SlobrokData> distributorMbus = getSlobrokData(
+ "storage/cluster." + cluster.getName() + "/distributor/*/default");
+ Map<Node, SlobrokData> storageRpc = getSlobrokData("storage/cluster." + cluster.getName() + "/storage/*");
+ Map<Node, SlobrokData> storageMbus = getSlobrokData(
+ "storage/cluster." + cluster.getName() + "/storage/*/default");
+
+ Map<Node, SlobrokData> slobrokNodes = new TreeMap<>();
+ for (SlobrokData data : distributorRpc.values()) {
+ if (distributorMbus.containsKey(data.node)) {
+ slobrokNodes.put(data.node, data);
+ }
+ }
+ for (SlobrokData data : storageRpc.values()) {
+ if (storageMbus.containsKey(data.node)) {
+ slobrokNodes.put(data.node, data);
+ }
+ }
+
+ List<SlobrokData> newNodes = new LinkedList<>();
+ List<NodeInfo> missingNodeInfos = new LinkedList<>();
+ List<SlobrokData> alteredRpcAddressNodes = new LinkedList<>();
+ List<NodeInfo> returningNodeInfos = new LinkedList<>();
+ detectNewAndMissingNodes(
+ cluster,
+ slobrokNodes,
+ newNodes,
+ missingNodeInfos,
+ alteredRpcAddressNodes,
+ returningNodeInfos);
+ for (SlobrokData data : newNodes) {
+ // XXX we really would like to cross-check the actual RPC address against what's configured,
+ // but this information does not seem to be available to the cluster controller currently.
+ NodeInfo nodeInfo = cluster.clusterInfo().getNodeInfo(data.node);
+ if (nodeInfo == null) continue; // slobrok may contain nonconfigured nodes during state transitions
+ cluster.clusterInfo().setRpcAddress(data.node, data.rpcAddress);
+ if (listener != null)
+ listener.handleNewNode(nodeInfo); // TODO: We'll never add new nodes here, move this to where clusterInfo.setNodes is called?
+ }
+ for (NodeInfo nodeInfo : missingNodeInfos) {
+ nodeInfo.markRpcAddressOutdated(timer);
+ if (listener != null)
+ listener.handleMissingNode(nodeInfo);
+ }
+ for (SlobrokData data : alteredRpcAddressNodes) {
+ // TODO: Abort the current node state requests? See NodeInfo.abortCurrentNodeStateRequests()
+ NodeInfo nodeInfo = cluster.clusterInfo().setRpcAddress(data.node, data.rpcAddress);
+ if (listener != null) {
+ listener.handleNewRpcAddress(nodeInfo); // TODO: We'll never add new nodes here, move this to where clusterInfo.setNodes is called?
+ }
+ }
+ for (NodeInfo nodeInfo : returningNodeInfos) {
+ nodeInfo.markRpcAddressLive();
+ nodeInfo.abortCurrentNodeStateRequests();
+ if (listener != null) {
+ listener.handleReturnedRpcAddress(nodeInfo);
+ }
+ }
+ cluster.setSlobrokGenerationCount(mirrorVersion);
+ for (NodeInfo nodeInfo : cluster.getNodeInfo()) {
+ if (slobrokNodes.containsKey(nodeInfo.getNode()) && nodeInfo.isRpcAddressOutdated()) {
+ log.log(LogLevel.WARNING, "Node " + nodeInfo
+ + " was tagged NOT in slobrok even though it is. It was in the following lists:"
+ + (newNodes.contains(nodeInfo.getNode()) ? " newNodes" : "")
+ + (missingNodeInfos.contains(nodeInfo) ? " missingNodes" : "")
+ + (alteredRpcAddressNodes.contains(nodeInfo.getNode()) ? " alteredNodes" : "")
+ + (returningNodeInfos.contains(nodeInfo) ? " returningNodes" : ""));
+ nodeInfo.markRpcAddressLive();
+ }
+ }
+ log.log(LogLevel.SPAM, "Slobrok information updated to generation " + cluster.getSlobrokGenerationCount());
+ return true;
+ }
+
+ private void detectNewAndMissingNodes(
+ ContentCluster oldCluster,
+ Map<Node, SlobrokData> slobrokNodes,
+ List<SlobrokData> newNodes,
+ List<NodeInfo> missingNodeInfos,
+ List<SlobrokData> alteredRpcAddress,
+ List<NodeInfo> returningRpcAddressNodeInfos)
+ {
+ Iterator<NodeInfo> oldIt = oldCluster.getNodeInfo().iterator();
+ Iterator<SlobrokData> newIt = slobrokNodes.values().iterator();
+ NodeInfo oldNext = null;
+ SlobrokData newNext = null;
+ while (true) {
+ if (oldNext == null && oldIt.hasNext()) { oldNext = oldIt.next(); }
+ if (newNext == null && newIt.hasNext()) { newNext = newIt.next(); }
+ if (oldNext == null && newNext == null) { break; }
+ if (oldNext == null || (newNext != null && oldNext.getNode().compareTo(newNext.node) > 0)) {
+ newNodes.add(newNext);
+ newNext = null;
+ } else if (newNext == null || newNext.node.compareTo(oldNext.getNode()) > 0) {
+ assert(slobrokNodes.get(oldNext.getNode()) == null);
+ if (!oldNext.isRpcAddressOutdated() && oldNext.getRpcAddress() != null) {
+ missingNodeInfos.add(oldNext);
+ }
+ oldNext = null;
+ } else {
+ assert(newNext.rpcAddress != null);
+ if (oldNext.getRpcAddress() == null || !oldNext.getRpcAddress().equals(newNext.rpcAddress)) {
+ alteredRpcAddress.add(newNext);
+ } else if (oldNext.isRpcAddressOutdated()) {
+ returningRpcAddressNodeInfos.add(oldNext);
+ }
+ oldNext = null;
+ newNext = null;
+ }
+ }
+ }
+
+ private Map<Node, SlobrokData> getSlobrokData(String pattern) {
+ Map<Node, SlobrokData> result = new TreeMap<>();
+ Mirror.Entry[] entries = mirror.lookup(pattern);
+ log.log(LogLevel.SPAM, "Looking for slobrok entries with pattern '" + pattern + "'. Found " + entries.length + " entries.");
+ for (Mirror.Entry entry : entries) {
+ StringTokenizer st = new StringTokenizer(entry.getName(), "/");
+ String addressType = st.nextToken();
+ String cluster = st.nextToken(); // skip
+ NodeType nodeType = NodeType.get(st.nextToken());
+ Integer nodeIndex = Integer.valueOf(st.nextToken());
+ String service = (st.hasMoreTokens() ? st.nextToken() : ""); // skip
+ assert(addressType.equals("storage"));
+ Node n = new Node(nodeType, nodeIndex);
+ result.put(n, new SlobrokData(n, entry.getSpec()));
+ }
+ return result;
+ }
+
+ private static class SlobrokData {
+
+ public Node node;
+ public String rpcAddress;
+
+ SlobrokData(Node node, String rpcAddress) {
+ this.node = node;
+ this.rpcAddress = rpcAddress;
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/ClusterStateRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/ClusterStateRequestHandler.java
new file mode 100644
index 00000000000..6de9205bbe3
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/ClusterStateRequestHandler.java
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageResponse;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+import com.yahoo.vespa.clustercontroller.core.SystemStateGenerator;
+
+public class ClusterStateRequestHandler implements StatusPageServer.RequestHandler {
+ private final SystemStateGenerator systemStateGenerator;
+
+ public ClusterStateRequestHandler(SystemStateGenerator systemStateGenerator) {
+ this.systemStateGenerator = systemStateGenerator;
+ }
+ @Override
+ public StatusPageResponse handle(StatusPageServer.HttpRequest request) {
+ ClusterState cs = systemStateGenerator.getClusterState();
+
+ StatusPageResponse response = new StatusPageResponse();
+ response.setContentType("text/plain");
+ response.writeContent(cs.toString());
+ return response;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
new file mode 100644
index 00000000000..85db0ac0ef9
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
@@ -0,0 +1,133 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status;
+
+import com.yahoo.vespa.clustercontroller.core.*;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageResponse;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.VdsClusterHtmlRendrer;
+
+import java.util.Iterator;
+import java.util.TimeZone;
+
+/**
+* @author <a href="mailto:humbe@yahoo-inc.com">Haakon Humberset</a>
+*/
+public class LegacyIndexPageRequestHandler implements StatusPageServer.RequestHandler {
+
+ private final Timer timer;
+ private final ContentCluster cluster;
+ private final MasterElectionHandler masterElectionHandler;
+ private final SystemStateGenerator systemStateGenerator;
+ private final EventLog eventLog;
+ private final long startedTime;
+ private final RunDataExtractor data;
+ private boolean showLocalSystemStatesInLog = true;
+
+ public LegacyIndexPageRequestHandler(Timer timer, boolean showLocalSystemStatesInLog, ContentCluster cluster,
+ MasterElectionHandler masterElectionHandler, SystemStateGenerator systemStateGenerator,
+ EventLog eventLog, long startedTime, RunDataExtractor data)
+ {
+ this.timer = timer;
+ this.showLocalSystemStatesInLog = showLocalSystemStatesInLog;
+ this.cluster = cluster;
+ this.masterElectionHandler = masterElectionHandler;
+ this.systemStateGenerator = systemStateGenerator;
+ this.eventLog = eventLog;
+ this.startedTime = startedTime;
+ this.data = data;
+ }
+
+ @Override
+ public StatusPageResponse handle(StatusPageServer.HttpRequest request) {
+ TimeZone tz = TimeZone.getTimeZone("UTC");
+ long currentTime = timer.getCurrentTimeInMillis();
+
+ StatusPageResponse response = new StatusPageResponse();
+ response.setContentType("text/html");
+ StringBuilder content = new StringBuilder();
+ content.append("<!-- Answer to request " + request + " -->\n");
+ response.writeHtmlHeader(content, cluster.getName() + " Cluster Controller " + data.getOptions().fleetControllerIndex + " Status Page");
+ content.append("<p><font size=\"-1\">")
+ .append(" [ <a href=\"#config\">Current config</a>")
+ .append(" | <a href=\"#clusterstates\">Cluster states</a>")
+ .append(" | <a href=\"#eventlog\">Event log</a>")
+ .append(" ]</font></p>\n");
+ content.append("<table><tr><td>UTC time when creating this page:</td><td align=\"right\">").append(RealTimer.printDateNoMilliSeconds(currentTime, tz)).append("</td></tr>");
+ //content.append("<tr><td>Fleetcontroller version:</td><td align=\"right\">" + Vtag.V_TAG_PKG + "</td></tr/>");
+ content.append("<tr><td>Cluster controller uptime:</td><td align=\"right\">" + RealTimer.printDuration(currentTime - startedTime) + "</td></tr></table>");
+ // State of master election
+ masterElectionHandler.writeHtmlState(content, data.getOptions().stateGatherCount);
+ if (masterElectionHandler.isAmongNthFirst(data.getOptions().stateGatherCount)) {
+ // Table overview of all the nodes
+ cluster.writeHtmlState(
+ new VdsClusterHtmlRendrer(),
+ content,
+ timer,
+ systemStateGenerator.getClusterState(),
+ data.getOptions().storageDistribution,
+ data.getOptions(),
+ eventLog,
+ request.getPathPrefix());
+ // Overview of current config
+ data.getOptions().writeHtmlState(content, request);
+ // Current cluster state and cluster state history
+ writeHtmlState(systemStateGenerator, content, request);
+ } else {
+ // Overview of current config
+ data.getOptions().writeHtmlState(content, request);
+ }
+ // Event log
+ eventLog.writeHtmlState(content, null);
+ response.writeHtmlFooter(content, "");
+ response.writeContent(content.toString());
+
+ return response;
+ }
+
+ public void writeHtmlState(SystemStateGenerator systemStateGenerator, StringBuilder sb, StatusPageServer.HttpRequest request) {
+ boolean showLocal = showLocalSystemStatesInLog;
+ if (request.hasQueryParameter("showlocal")) {
+ showLocal = true;
+ } else if (request.hasQueryParameter("hidelocal")) {
+ showLocal = false;
+ }
+
+ sb.append("<h2 id=\"clusterstates\">Cluster states</h2>\n")
+ .append("<p>Current cluster state:<br><code>").append(systemStateGenerator.currentClusterStateView().toString()).append("</code></p>\n");
+
+ if ( ! systemStateGenerator.systemStateHistory().isEmpty()) {
+ TimeZone tz = TimeZone.getTimeZone("UTC");
+ sb.append("<h3 id=\"clusterstatehistory\">Cluster state history</h3>\n");
+ if (showLocal) {
+ sb.append("<p>Cluster states shown in gray are just transition states on the fleet controller and has never been sent to any nodes.</p>");
+ }
+ sb.append("<table border=\"1\" cellspacing=\"0\"><tr>\n")
+ .append(" <th>Creation date (").append(tz.getDisplayName(false, TimeZone.SHORT)).append(")</th>\n")
+ .append(" <th>Cluster state</th>\n")
+ .append("</tr>\n");
+ // Write cluster state history in reverse order (newest on top)
+ Iterator<SystemStateGenerator.SystemStateHistoryEntry> stateIterator = systemStateGenerator.systemStateHistory().iterator();
+ SystemStateGenerator.SystemStateHistoryEntry current = null;
+ while (stateIterator.hasNext()) {
+ SystemStateGenerator.SystemStateHistoryEntry nextEntry = stateIterator.next();
+ if (nextEntry.state().isOfficial() || showLocal) {
+ if (current != null) writeClusterStateEntry(current, nextEntry, sb, tz);
+ current = nextEntry;
+ }
+ }
+ if (current != null) writeClusterStateEntry(current, null, sb, tz);
+ sb.append("</table>\n");
+ }
+ }
+
+ private void writeClusterStateEntry(SystemStateGenerator.SystemStateHistoryEntry entry, SystemStateGenerator.SystemStateHistoryEntry last, StringBuilder sb, TimeZone tz) {
+ sb.append("<tr><td>").append(RealTimer.printDate(entry.time(), tz))
+ .append("</td><td>").append(entry.state().isOfficial() ? "" : "<font color=\"grey\">");
+ sb.append(entry.state());
+ if (last != null) {
+ sb.append("<br><b>Diff</b>: ").append(last.state().getHtmlDifference(entry.state()));
+ }
+ sb.append(entry.state().isOfficial() ? "" : "</font>").append("</td></tr>\n");
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyNodePageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyNodePageRequestHandler.java
new file mode 100644
index 00000000000..3a6ed11be10
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyNodePageRequestHandler.java
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.*;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageResponse;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+
+import java.util.TimeZone;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+* @author <a href="mailto:humbe@yahoo-inc.com">Haakon Humberset</a>
+*/
+public class LegacyNodePageRequestHandler implements StatusPageServer.RequestHandler {
+
+ private static final Pattern nodePattern = Pattern.compile("/node=([a-z]+)\\.(\\d+)");
+ private final Timer timer;
+ private final EventLog eventLog;
+ private final ContentCluster cluster;
+
+ public LegacyNodePageRequestHandler(Timer timer, EventLog eventLog, ContentCluster cluster) {
+ this.timer = timer;
+ this.eventLog = eventLog;
+ this.cluster = cluster;
+ }
+
+ @Override
+ public StatusPageResponse handle(StatusPageServer.HttpRequest request) {
+ Matcher m = nodePattern.matcher(request.getPath());
+ if (!m.matches()) {
+ throw new IllegalStateException("Node request handler invoked but failed to match path");
+ }
+ TimeZone tz = TimeZone.getTimeZone("UTC");
+ long currentTime = timer.getCurrentTimeInMillis();
+ NodeType nodeType = NodeType.get(m.group(1));
+ int index = Integer.valueOf(m.group(2));
+ Node node = new Node(nodeType, index);
+
+ StatusPageResponse response = new StatusPageResponse();
+ response.setContentType("text/html");
+ StringBuilder content = new StringBuilder();
+ content.append("<!-- Answer to request " + request + " -->\n");
+ response.writeHtmlHeader(content, "Cluster Controller Status Page - Node status for " + node);
+ content.append("<p>UTC time when creating this page: ").append(RealTimer.printDateNoMilliSeconds(currentTime, tz)).append("</p>");
+ String prefix = request.getPathPrefix();
+ if (!prefix.isEmpty()) {
+ prefix += "/" + cluster.getName();
+ }
+ content.append("[ <a href=\"" + prefix + "\">Back to cluster overview</a> ] <br><br>");
+ eventLog.writeHtmlState(content, node);
+ NodeInfo nodeInfo = cluster.getNodeInfo(node);
+ content.append("<h2>Host info</h2>\n");
+ if (nodeInfo.getHostInfo() != null) {
+ content.append("<pre>\n").append(nodeInfo.getHostInfo().getRawCreationString()).append("\n</pre>\n");
+ } else {
+ content.append("Not retrieved\n");
+ }
+ response.writeHtmlFooter(content, "");
+ response.writeContent(content.toString());
+ return response;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/NodeHealthRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/NodeHealthRequestHandler.java
new file mode 100644
index 00000000000..42b38eaebcc
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/NodeHealthRequestHandler.java
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status;
+
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageResponse;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+
+/**
+* @author <a href="mailto:humbe@yahoo-inc.com">Haakon Humberset</a>
+*/
+public class NodeHealthRequestHandler implements StatusPageServer.RequestHandler {
+ private final RunDataExtractor data;
+
+ public NodeHealthRequestHandler(RunDataExtractor data) {
+ this.data = data;
+ }
+
+ @Override
+ public StatusPageResponse handle(StatusPageServer.HttpRequest request) {
+ StatusPageResponse response = new StatusPageResponse();
+ StringBuilder content = new StringBuilder();
+ response.setContentType("application/json");
+ content.append(
+ "{\n" +
+ " \"status\" : {\n" +
+ " \"code\" : \"up\"\n" +
+ " },\n" +
+ " \"config\" : {\n" +
+ " \"component\" : {\n" +
+ " \"generation\" : " + data.getConfigGeneration() + "\n" +
+ " }\n" +
+ " }\n" +
+ "}"
+ );
+ response.writeContent(content.toString());
+ return response;
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/RunDataExtractor.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/RunDataExtractor.java
new file mode 100644
index 00000000000..f217ccdff57
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/RunDataExtractor.java
@@ -0,0 +1,18 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vespa.clustercontroller.core.FleetControllerOptions;
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+
+/**
+ * @author <a href="mailto:humbe@yahoo-inc.com">Haakon Humberset</a>
+ */
+public interface RunDataExtractor {
+
+ public ClusterState getLatestClusterState();
+ public FleetControllerOptions getOptions();
+ public long getConfigGeneration();
+ public ContentCluster getCluster();
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StaticResourceRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StaticResourceRequestHandler.java
new file mode 100644
index 00000000000..fa8128753f6
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StaticResourceRequestHandler.java
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status;
+
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageResponse;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * HTTP request handler for serving a single JAR resource as if it were
+ * a regular file hosted on the server. Always serves the content verbatim
+ * (i.e. as a byte stream), specifying a Content-Type provided when creating
+ * the handler.
+ *
+ * @author <a href="mailto:vekterli@yahoo-inc.com">Tor Brede Vekterli</a>
+ * @since 5.28
+ */
+public class StaticResourceRequestHandler implements StatusPageServer.RequestHandler {
+ private final byte[] resourceData;
+ private final String contentType;
+
+ public StaticResourceRequestHandler(String resourcePath,
+ String contentType)
+ throws IOException
+ {
+ this.resourceData = loadResource(resourcePath);
+ this.contentType = contentType;
+ }
+
+ private byte[] loadResource(String resourcePath) throws IOException {
+ InputStream resourceStream = getClass().getClassLoader().getResourceAsStream(resourcePath);
+ if (resourceStream == null) {
+ throw new IOException("No resource with path '" + resourcePath + "' could be found");
+ }
+ return readStreamData(resourceStream);
+ }
+
+ @Override
+ public StatusPageResponse handle(StatusPageServer.HttpRequest request) {
+ final StatusPageResponse response = new StatusPageResponse();
+ response.setClientCachingEnabled(true);
+ response.setContentType(contentType);
+ try {
+ response.getOutputStream().write(resourceData);
+ } catch (IOException e) {
+ response.setResponseCode(StatusPageResponse.ResponseCode.INTERNAL_SERVER_ERROR);
+ }
+ return response;
+ }
+
+ private byte[] readStreamData(InputStream resourceStream) throws IOException {
+ final byte[] buf = new byte[4096];
+ final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+ while (true) {
+ int read = resourceStream.read(buf);
+ if (read < 0) {
+ break;
+ }
+ outputStream.write(buf, 0, read);
+ }
+ outputStream.close();
+ return outputStream.toByteArray();
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StatusHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StatusHandler.java
new file mode 100644
index 00000000000..efa09919474
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StatusHandler.java
@@ -0,0 +1,129 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status;
+
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageResponse;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServerInterface;
+import com.yahoo.vespa.clustercontroller.utils.communication.http.HttpRequest;
+import com.yahoo.vespa.clustercontroller.utils.communication.http.HttpRequestHandler;
+import com.yahoo.vespa.clustercontroller.utils.communication.http.HttpResult;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.util.Map;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class StatusHandler implements HttpRequestHandler {
+ private final static Logger log = Logger.getLogger(StatusHandler.class.getName());
+ public static interface ClusterStatusPageServerSet {
+ public ContainerStatusPageServer get(String cluster);
+ public Map<String, ContainerStatusPageServer> getAll();
+ }
+ public static class ContainerStatusPageServer implements StatusPageServerInterface {
+ StatusPageServer.HttpRequest request;
+ StatusPageResponse response;
+ // Ensure only only one use the server at a time
+ private final Object queueMonitor = new Object();
+ // Lock safety with fleetcontroller. Wait until completion
+ private final Object answerMonitor = new Object();
+
+ @Override
+ public int getPort() { return 0; }
+ @Override
+ public void shutdown() throws InterruptedException, IOException {}
+ @Override
+ public void setPort(int port) throws IOException, InterruptedException {}
+ @Override
+ public StatusPageServer.HttpRequest getCurrentHttpRequest() {
+ synchronized (answerMonitor) {
+ StatusPageServer.HttpRequest r = request;
+ request = null;
+ return r;
+ }
+ }
+ @Override
+ public void answerCurrentStatusRequest(StatusPageResponse r) {
+ synchronized (answerMonitor) {
+ response = r;
+ answerMonitor.notify();
+ }
+ }
+
+ StatusPageResponse getStatus(StatusPageServer.HttpRequest req) throws InterruptedException {
+ synchronized (queueMonitor) {
+ synchronized (answerMonitor) {
+ request = req;
+ while (response == null) {
+ answerMonitor.wait();
+ }
+ StatusPageResponse res = response;
+ response = null;
+ return res;
+ }
+ }
+ }
+ }
+ private static Pattern clusterListRequest = Pattern.compile("^/clustercontroller-status/v1/?$");
+ private static Pattern statusRequest = Pattern.compile("^/clustercontroller-status/v1/([^/]+)(/.*)?$");
+ private final ClusterStatusPageServerSet statusClusters;
+
+ public StatusHandler(ClusterStatusPageServerSet set) {
+ statusClusters = set;
+ }
+
+ @Override
+ public HttpResult handleRequest(HttpRequest httpRequest) throws Exception {
+ log.fine("Handling status request " + httpRequest);
+ Matcher matcher = statusRequest.matcher(httpRequest.getPath());
+ if (matcher.matches()) {
+ return handleClusterRequest(matcher.group(1), matcher.group(2));
+ }
+ matcher = clusterListRequest.matcher(httpRequest.getPath());
+ if (matcher.matches()) {
+ return handleClusterListRequest();
+ }
+ return new HttpResult().setHttpCode(
+ 404, "No page for request '" + httpRequest.getPath() + "'.");
+ }
+
+ private HttpResult handleClusterRequest(String clusterName, String fleetControllerPath) throws InterruptedException {
+ ContainerStatusPageServer statusServer = statusClusters.get(clusterName);
+ if (statusServer == null) {
+ return new HttpResult().setHttpCode(
+ 404, "No controller exists for cluster '" + clusterName + "'.");
+ }
+ if (fleetControllerPath == null || fleetControllerPath.isEmpty()) {
+ fleetControllerPath = "/";
+ }
+ StatusPageServer.HttpRequest req = new StatusPageServer.HttpRequest(fleetControllerPath);
+ req.setPathPrefix("/clustercontroller-status/v1");
+ StatusPageResponse response = statusServer.getStatus(req);
+ HttpResult result = new HttpResult();
+ if (response.getResponseCode() != null) {
+ result.setHttpCode(
+ response.getResponseCode().getCode(),
+ response.getResponseCode().getMessage());
+ }
+ if (response.getContentType() != null) {
+ result.addHeader("Content-Type", response.getContentType());
+ }
+ result.setContent(new String(response.getOutputStream().toByteArray()));
+ return result;
+ }
+
+ public HttpResult handleClusterListRequest() {
+ HttpResult result = new HttpResult();
+ result.addHeader("Content-Type", "text/html");
+ StringWriter sw = new StringWriter();
+ sw.append("<title>clusters</title>\n");
+ for (String s : statusClusters.getAll().keySet()) {
+ sw.append("<a href=\"./").append(s).append("\">").append(s)
+ .append("</a><br>").append("\n");
+ }
+ result.setContent(sw.toString());
+ return result;
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/package-info.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/package-info.java
new file mode 100644
index 00000000000..df74a60bc2b
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.vespa.clustercontroller.core.status;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/HtmlTable.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/HtmlTable.java
new file mode 100644
index 00000000000..0c9620f046e
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/HtmlTable.java
@@ -0,0 +1,166 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status.statuspage;
+
+import java.util.ArrayList;
+
+/**
+ * Helper class in order to write HTML tables
+ */
+public class HtmlTable {
+
+ int border = 1;
+ int cellSpacing = 0;
+ enum Orientation { LEFT, CENTER, RIGHT };
+ public static class CellProperties {
+ Integer backgroundColor;
+ Integer colSpan; // Colspan 0 indicate rest of table
+ Integer rowSpan;
+ Orientation contentAlignment;
+ Boolean allowLineBreaks;
+
+ CellProperties setColSpan(Integer span) { this.colSpan = span; return this; }
+ CellProperties setRowSpan(Integer span) { this.rowSpan = span; return this; }
+ CellProperties setBackgroundColor(Integer bgcol) { this.backgroundColor = bgcol; return this; }
+ CellProperties align(Orientation alignment) { this.contentAlignment = alignment; return this; }
+ CellProperties allowLineBreaks(Boolean allow) { this.allowLineBreaks = allow; return this; }
+
+ void add(CellProperties cp) {
+ if (cp.backgroundColor != null) backgroundColor = cp.backgroundColor;
+ if (cp.colSpan != null) colSpan = cp.colSpan;
+ if (cp.rowSpan != null) rowSpan = cp.rowSpan;
+ if (cp.contentAlignment != null) contentAlignment = cp.contentAlignment;
+ if (cp.allowLineBreaks != null) allowLineBreaks = cp.allowLineBreaks;
+ }
+ }
+ ArrayList<CellProperties> colProperties = new ArrayList<CellProperties>();
+ CellProperties tableProperties = new CellProperties();
+ public static class Cell {
+ CellProperties properties = new CellProperties();
+ String content;
+
+ Cell(String content) { this.content = content; }
+
+ Cell addProperties(CellProperties c) { properties.add(c); return this; }
+ }
+ public static class Row {
+ boolean isHeaderRow;
+ ArrayList<Cell> cells = new ArrayList<Cell>();
+ CellProperties rowProperties = new CellProperties();
+
+ public Row addCell(Cell c) {
+ cells.add(c);
+ return this;
+ }
+ public Cell getLastCell() {
+ return cells.get(cells.size() - 1);
+ }
+
+ Row setHeaderRow() { isHeaderRow = true; return this; }
+ Row addProperties(CellProperties p) { rowProperties.add(p); return this; }
+ }
+
+ private ArrayList<Row> cells = new ArrayList<Row>();
+
+ public HtmlTable() {
+ }
+
+ public HtmlTable addRow(Row r) {
+ cells.add(r);
+ return this;
+ }
+
+ public CellProperties getTableProperties() { return tableProperties; }
+
+ public CellProperties getColProperties(int col) {
+ while (colProperties.size() <= col) {
+ colProperties.add(new CellProperties());
+ }
+ return colProperties.get(col);
+ }
+
+ private String getColor(int color) {
+ String col = Integer.toHexString(color);
+ while (col.length() < 6) col = "0" + col;
+ return col;
+ }
+
+ public static String escape(String s) {
+ s = s.replaceAll("&", "&amp;");
+ s = s.replaceAll("<", "&lt;");
+ s = s.replaceAll(">", "&gt;");
+ return s;
+ }
+
+ public int getColumnCount() {
+ int cols = 0;
+ ArrayList<Integer> next = new ArrayList<Integer>();
+ for (Row row : cells) {
+ int rowCount = 0;
+ if (!next.isEmpty()) {
+ rowCount += next.get(0);
+ next.remove(0);
+ }
+ for (Cell c : row.cells) {
+ int width = 1;
+ if (c.properties.colSpan != null && c.properties.colSpan > 1) {
+ width = c.properties.colSpan;
+ }
+ rowCount += width;
+ if (c.properties.rowSpan != null && c.properties.rowSpan > 1) {
+ while (next.size() < c.properties.rowSpan - 1) {
+ next.add(0);
+ }
+ for (int i=1; i<c.properties.rowSpan; ++i) {
+ next.set(i - 1, next.get(i - 1) + width);
+ }
+ }
+ }
+ cols = Math.max(cols, rowCount);
+ }
+ return cols;
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("<table border=\"").append(border).append("\" cellSpacing=\"").append(cellSpacing).append("\">\n");
+ int columnCount = getColumnCount();
+ for (Row row : cells) {
+ sb.append("<tr>\n");
+ for (int i=0; i<row.cells.size(); ++i) {
+ Cell cell = row.cells.get(i);
+ CellProperties properties = new CellProperties();
+ properties.add(tableProperties);
+ if (colProperties.size() > i) {
+ properties.add(colProperties.get(i));
+ }
+ properties.add(row.rowProperties);
+ properties.add(cell.properties);
+
+ sb.append(row.isHeaderRow ? "<th" : "<td");
+ if (properties.backgroundColor != null) {
+ sb.append(" bgcolor=\"#").append(getColor(properties.backgroundColor)).append('"');
+ }
+ if (properties.contentAlignment != null) {
+ sb.append(" align=\"").append(properties.contentAlignment.name().toLowerCase()).append('"');
+ }
+ if (properties.colSpan != null) {
+ int colSpan = properties.colSpan;
+ if (colSpan == 0) colSpan = (columnCount - i);
+ sb.append(" colspan=\"").append(colSpan).append('"');
+ }
+ if (properties.rowSpan != null) {
+ sb.append(" rowspan=\"").append(properties.rowSpan).append('"');
+ }
+ sb.append(">");
+ if (properties.allowLineBreaks != null && !properties.allowLineBreaks) sb.append("<nobr>");
+ sb.append(cell.content);
+ if (properties.allowLineBreaks != null && !properties.allowLineBreaks) sb.append("</nobr>");
+ sb.append(row.isHeaderRow ? "</th>" : "</td>").append("\n");
+ }
+ sb.append("</tr>\n");
+ }
+ sb.append("</table>\n");
+ return sb.toString();
+ }
+
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageResponse.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageResponse.java
new file mode 100644
index 00000000000..a789f5c0b9b
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageResponse.java
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status.statuspage;
+
+import org.apache.commons.lang.StringEscapeUtils;
+
+import java.io.*;
+
+public class StatusPageResponse {
+
+ private ByteArrayOutputStream output = new ByteArrayOutputStream();
+ private String contentType;
+ private ResponseCode responseCode = ResponseCode.OK;
+ private boolean clientCachingEnabled = false;
+
+ public enum ResponseCode {
+ OK(200, "OK"),
+ NOT_MODIFIED(304, "Not Modified"),
+ BAD_REQUEST(400, "Bad Request"),
+ NOT_FOUND(404, "Not Found"),
+ INTERNAL_SERVER_ERROR(500, "Internal Server Error");
+
+ private final int code;
+ private final String message;
+ ResponseCode(int code, String message) {
+ this.code = code;
+ this.message = message;
+ }
+
+ public int getCode() {
+ return code;
+ }
+
+ public String getMessage() {
+ return message;
+ }
+ }
+
+ public String getContentType() { return contentType; }
+ public ResponseCode getResponseCode() { return responseCode; }
+ public ByteArrayOutputStream getOutputStream() { return output; }
+
+ public BufferedWriter createBufferedWriter() {
+ return new BufferedWriter(new OutputStreamWriter(output));
+ }
+
+ public void writeContent(String content) {
+ try {
+ BufferedWriter writer = createBufferedWriter();
+ writer.write(content);
+ writer.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ public void setContentType(String type) { contentType = type; }
+ public void setResponseCode(ResponseCode responseCode) {
+ this.responseCode = responseCode;
+ }
+
+ public boolean isClientCachingEnabled() {
+ return clientCachingEnabled;
+ }
+
+ public void setClientCachingEnabled(boolean clientCachingEnabled) {
+ this.clientCachingEnabled = clientCachingEnabled;
+ }
+
+ public void writeHtmlHeader(StringBuilder content, String title) {
+ String escaped_title = StringEscapeUtils.escapeHtml(title);
+ content.append("<html>\n")
+ .append("<head><title>").append(escaped_title).append("</title></head>")
+ .append("<body>\n")
+ .append("<h1>").append(escaped_title).append("</h1>\n");
+ }
+
+ public void writeHtmlFooter(StringBuilder content, String hiddenMessage) {
+ if (hiddenMessage != null && !hiddenMessage.isEmpty()) {
+ content.append("\n<!-- " + StringEscapeUtils.escapeHtml(hiddenMessage) + " -->\n");
+ }
+ content.append("</body>\n")
+ .append("</html>\n");
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServer.java
new file mode 100644
index 00000000000..1d0d967a2b3
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServer.java
@@ -0,0 +1,403 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status.statuspage;
+
+import com.yahoo.log.LogLevel;
+import org.apache.commons.lang.exception.ExceptionUtils;
+
+import java.io.*;
+import java.net.ServerSocket;
+import java.net.InetSocketAddress;
+import java.net.Socket;
+import java.net.SocketTimeoutException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.logging.Logger;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+/**
+ * Shows status pages with debug information through a very simple HTTP interface.
+ */
+public class StatusPageServer implements Runnable, StatusPageServerInterface {
+
+ public static Logger log = Logger.getLogger(StatusPageServer.class.getName());
+
+ private final com.yahoo.vespa.clustercontroller.core.Timer timer;
+ private final Object monitor;
+ private ServerSocket ssocket;
+ private final Thread runner;
+ private int port = 0;
+ private boolean running = true;
+ private boolean shouldBeConnected = false;
+ private HttpRequest currentHttpRequest = null;
+ private StatusPageResponse currentResponse = null;
+ private long lastConnectErrorTime = 0;
+ private String lastConnectError = "";
+ private PatternRequestRouter staticContentRouter = new PatternRequestRouter();
+ private Date startTime = new Date();
+
+ public StatusPageServer(com.yahoo.vespa.clustercontroller.core.Timer timer, Object monitor, int port) throws java.io.IOException, InterruptedException {
+ this.timer = timer;
+ this.monitor = monitor;
+ this.port = port;
+ connect();
+ runner = new Thread(this);
+ runner.start();
+ }
+
+ public boolean isConnected() {
+ if (ssocket != null && ssocket.isBound() && (ssocket.getLocalPort() == port || port == 0)) {
+ return true;
+ } else {
+ log.log(LogLevel.SPAM, "Status page server socket is no longer connected: "+ (ssocket != null) + " " + ssocket.isBound() + " " + ssocket.getLocalPort() + " " + port);
+ return false;
+ }
+ }
+
+ public void connect() throws java.io.IOException, InterruptedException {
+ synchronized(monitor) {
+ if (ssocket != null) {
+ if (ssocket.isBound() && ssocket.getLocalPort() == port) {
+ return;
+ }
+ disconnect();
+ }
+ ssocket = new ServerSocket();
+ if (port != 0) {
+ ssocket.setReuseAddress(true);
+ }
+ ssocket.setSoTimeout(100);
+ ssocket.bind(new InetSocketAddress(port));
+ shouldBeConnected = true;
+ for (int i=0; i<200; ++i) {
+ if (isConnected()) break;
+ Thread.sleep(10);
+ }
+ if (!isConnected()) {
+ log.log(LogLevel.INFO, "Fleetcontroller: Server Socket not ready after connect()");
+ }
+ log.log(LogLevel.DEBUG, "Fleet controller status page viewer listening to " + ssocket.getLocalSocketAddress());
+ monitor.notifyAll();
+ }
+ }
+
+ public void disconnect() throws java.io.IOException {
+ synchronized(monitor) {
+ shouldBeConnected = false;
+ if (ssocket != null) ssocket.close();
+ ssocket = null;
+ monitor.notifyAll();
+ }
+ }
+
+ public void setPort(int port) throws java.io.IOException, InterruptedException {
+ // Only bother to reconnect if we were connected to begin with, we care about what port it runs on, and it's not already running there
+ if (port != 0 && isConnected() && port != ((InetSocketAddress) ssocket.getLocalSocketAddress()).getPort()) {
+ log.log(LogLevel.INFO, "Exchanging port used by status server. Moving from port "
+ + ((InetSocketAddress) ssocket.getLocalSocketAddress()).getPort() + " to port " + port);
+ disconnect();
+ this.port = port;
+ if (ssocket == null || !ssocket.isBound() || ssocket.getLocalPort() != port) {
+ connect();
+ }
+ } else {
+ this.port = port;
+ }
+ }
+
+ public int getPort() {
+ // Cannot use this.port, because of tests using port 0 to get any address
+ if (ssocket == null || !ssocket.isBound()) {
+ throw new IllegalStateException("Cannot ask for port before server socket is bound");
+ }
+ return ((InetSocketAddress) ssocket.getLocalSocketAddress()).getPort();
+ }
+
+ public void shutdown() throws InterruptedException, java.io.IOException {
+ running = false;
+ runner.interrupt();
+ runner.join();
+ disconnect();
+ }
+
+ public void run() {
+ try{
+ while (running) {
+ Socket connection = null;
+ ServerSocket serverSocket = null;
+ synchronized(monitor) {
+ if (ssocket == null || !ssocket.isBound()) {
+ monitor.wait(1000);
+ continue;
+ }
+ serverSocket = ssocket;
+ }
+ try{
+ connection = serverSocket.accept();
+ } catch (SocketTimeoutException e) {
+ // Ignore, since timeout is set to 100 ms
+ } catch (java.io.IOException e) {
+ log.log(shouldBeConnected ? LogLevel.WARNING : LogLevel.DEBUG, "Caught IO exception in ServerSocket.accept(): " + e.getMessage());
+ }
+ if (connection == null) continue;
+ log.log(LogLevel.DEBUG, "Got a status page request.");
+ String requestString = "";
+ BufferedReader br = null;
+ OutputStream output = null;
+ try{
+ StringBuilder sb = new StringBuilder();
+ br = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+ while (true) {
+ String s = br.readLine();
+ if (s == null) throw new java.io.IOException("No data in HTTP request on socket " + connection.toString());
+ if (s.length() > 4 && s.substring(0,4).equals("GET ")) {
+ int nextSpace = s.indexOf(' ', 4);
+ if (nextSpace == -1) {
+ requestString = s.substring(4);
+ } else {
+ requestString = s.substring(4, nextSpace);
+ }
+ }
+ if (s == null || s.equals("")) break;
+ sb.append(s).append("\n");
+ }
+ log.log(LogLevel.DEBUG, "Got HTTP request: " + sb.toString());
+
+ HttpRequest httpRequest = null;
+ StatusPageResponse response = null;
+ try {
+ httpRequest = new HttpRequest(requestString);
+ // Static files are served directly by the HTTP server thread, since
+ // it makes no sense to go via the fleetcontroller logic for these.
+ RequestHandler contentHandler = staticContentRouter.resolveHandler(httpRequest);
+ if (contentHandler != null) {
+ response = contentHandler.handle(httpRequest);
+ }
+ } catch (Exception e) {
+ response = new StatusPageResponse();
+ response.setResponseCode(StatusPageResponse.ResponseCode.INTERNAL_SERVER_ERROR);
+ StringBuilder content = new StringBuilder();
+ response.writeHtmlHeader(content, "Internal Server Error");
+ response.writeHtmlFooter(content, ExceptionUtils.getStackTrace(e));
+ response.writeContent(content.toString());
+ }
+ if (response == null) {
+ synchronized(monitor) {
+ currentHttpRequest = httpRequest;
+ currentResponse = null;
+ while (running) {
+ if (currentResponse != null) {
+ response = currentResponse;
+ break;
+ }
+ monitor.wait(100);
+ }
+ }
+ }
+ if (response == null) {
+ response = new StatusPageResponse();
+ StringBuilder content = new StringBuilder();
+ response.setContentType("text/html");
+ response.writeHtmlHeader(content, "Failed to get response. Fleet controller probably in the process of shutting down.");
+ response.writeHtmlFooter(content, "");
+ response.writeContent(content.toString());
+ }
+
+ output = connection.getOutputStream();
+ StringBuilder header = new StringBuilder();
+ // TODO: per-response cache control
+ header.append("HTTP/1.1 ")
+ .append(response.getResponseCode().getCode())
+ .append(" ")
+ .append(response.getResponseCode().getMessage())
+ .append("\r\n")
+ .append("Date: ").append(new Date().toString()).append("\r\n")
+ .append("Connection: Close\r\n")
+ .append("Content-type: ").append(response.getContentType()).append("\r\n");
+ if (response.isClientCachingEnabled()) {
+ // TODO(vekterli): would be better to let HTTP handlers set header values in response
+ DateFormat df = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z");
+ df.setTimeZone(TimeZone.getTimeZone("GMT"));
+ header.append("Last-Modified: ").append(df.format(startTime)).append("\r\n");
+ } else {
+ header.append("Expires: Fri, 01 Jan 1990 00:00:00 GMT\r\n")
+ .append("Pragma: no-cache\r\n")
+ .append("Cache-control: no-cache, must-revalidate\r\n");
+ }
+ header.append("\r\n");
+ output.write(header.toString().getBytes());
+ output.write(response.getOutputStream().toByteArray());
+ } catch (java.io.IOException e) {
+ log.log(e.getMessage().indexOf("Broken pipe") >= 0 ? LogLevel.DEBUG : LogLevel.INFO,
+ "Failed to process HTTP request : " + e.getMessage());
+ } catch (Exception e) {
+ log.log(LogLevel.WARNING, "Caught exception in HTTP server thread: "
+ + e.getClass().getName() + ": " + e.getMessage());
+ } finally {
+ if (output != null) try{
+ output.close();
+ } catch (IOException e) {
+ log.log(e.getMessage().indexOf("Broken pipe") >= 0 ? LogLevel.DEBUG : LogLevel.INFO,
+ "Failed to close output stream on socket " + connection + ": " + e.getMessage());
+ }
+ if (br != null) try {
+ br.close();
+ } catch (IOException e) {
+ log.log(LogLevel.INFO, "Failed to close input stream on socket " + connection + ": " + e.getMessage());
+ }
+ if (connection != null) try{
+ connection.close();
+ } catch (IOException e) {
+ log.log(LogLevel.INFO, "Failed to close socket " + connection + ": " + e.getMessage());
+ }
+ }
+ }
+ } catch (InterruptedException e) {
+ log.log(LogLevel.DEBUG, "Status processing thread shut down by interrupt exception: " + e);
+ }
+ }
+
+ /**
+ * Very simple HTTP request class. This should be replaced the second
+ * the fleetcontroller e.g. moves into the container.
+ */
+ public static class HttpRequest {
+ private final String request;
+ private String pathPrefix = "";
+ private final Map<String, String> params = new HashMap<String, String>();
+ private String path;
+
+ static Pattern pathPattern;
+ static {
+ // NOTE: allow [=.] in path to be backwards-compatible with legacy node
+ // status pages.
+ // If you stare at it for long enough, this sorta looks like one of those
+ // magic eye pictures.
+ pathPattern = Pattern.compile("^(/([\\w=\\./]+)?)(?:\\?((?:&?\\w+(?:=[\\w\\.]*)?)*))?$");
+ }
+
+ public HttpRequest(String request) {
+ this.request = request;
+ Matcher m = pathPattern.matcher(request);
+ if (!m.matches()) {
+ throw new IllegalArgumentException("Illegal HTTP request path: " + request);
+ }
+ path = m.group(1);
+ if (m.group(3) != null) {
+ String[] rawParams = m.group(3).split("&");
+ for (String param : rawParams) {
+ // Parameter values are optional.
+ String[] queryParts = param.split("=");
+ params.put(queryParts[0], queryParts.length > 1 ? queryParts[1] : null);
+ }
+ }
+ }
+
+ public String getPathPrefix() { return pathPrefix; }
+
+ public String toString() {
+ return "HttpRequest(" + request + ")";
+ }
+
+ public String getRequest() {
+ return request;
+ }
+
+ public String getPath() {
+ return path;
+ }
+
+ public boolean hasQueryParameters() {
+ return !params.isEmpty();
+ }
+
+ public String getQueryParameter(String name) {
+ return params.get(name);
+ }
+
+ public boolean hasQueryParameter(String name) {
+ return params.containsKey(name);
+ }
+
+ public void setPathPrefix(String pathPrefix) {
+ this.pathPrefix = pathPrefix;
+ }
+ }
+
+ public interface RequestHandler {
+ StatusPageResponse handle(HttpRequest request);
+ }
+
+ public interface RequestRouter {
+ /**
+ * Resolve a request's handler based on its path.
+ * @param request HTTP request to resolve for.
+ * @return the request handler, or null if none matched.
+ */
+ RequestHandler resolveHandler(HttpRequest request);
+ }
+
+ /**
+ * Request router inspired by the Django framework's regular expression
+ * based approach. Patterns are matched in the same order as they were
+ * added to the router and the first matching one is used as the handler.
+ */
+ public static class PatternRequestRouter implements RequestRouter {
+ private static class PatternRouting {
+ public Pattern pattern;
+ public RequestHandler handler;
+
+ private PatternRouting(Pattern pattern, RequestHandler handler) {
+ this.pattern = pattern;
+ this.handler = handler;
+ }
+ }
+
+ private List<PatternRouting> patterns = new ArrayList<>();
+
+ public void addHandler(Pattern pattern, RequestHandler handler) {
+ patterns.add(new PatternRouting(pattern, handler));
+ }
+
+ public void addHandler(String pattern, RequestHandler handler) {
+ addHandler(Pattern.compile(pattern), handler);
+ }
+
+ @Override
+ public RequestHandler resolveHandler(HttpRequest request) {
+ for (PatternRouting routing : patterns) {
+ Matcher m = routing.pattern.matcher(request.getPath());
+ if (m.matches()) {
+ return routing.handler;
+ }
+ }
+ return null;
+ }
+ }
+
+ public HttpRequest getCurrentHttpRequest() {
+ synchronized (monitor) {
+ return currentHttpRequest;
+ }
+ }
+
+ public void answerCurrentStatusRequest(StatusPageResponse r) {
+ if (!isConnected()) {
+ long time = timer.getCurrentTimeInMillis();
+ try{
+ connect();
+ } catch (Exception e) {
+ if (!e.getMessage().equals(lastConnectError) || time - lastConnectErrorTime > 60 * 1000) {
+ lastConnectError = e.getMessage();
+ lastConnectErrorTime = time;
+ log.log(LogLevel.WARNING, "Failed to initialize HTTP status server server socket: " + e.getMessage());
+ }
+ }
+ }
+ synchronized (monitor) {
+ currentResponse = r;
+ currentHttpRequest = null; // Avoid fleetcontroller processing request more than once
+ }
+ }
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServerInterface.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServerInterface.java
new file mode 100644
index 00000000000..ab04d590e42
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServerInterface.java
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status.statuspage;
+
+public interface StatusPageServerInterface {
+ public int getPort();
+ public void shutdown() throws InterruptedException, java.io.IOException;
+ public void setPort(int port) throws java.io.IOException, InterruptedException;
+ public StatusPageServer.HttpRequest getCurrentHttpRequest();
+ public void answerCurrentStatusRequest(StatusPageResponse r);
+}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRendrer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRendrer.java
new file mode 100644
index 00000000000..7304d1e673a
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRendrer.java
@@ -0,0 +1,304 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.status.statuspage;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.EventLog;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.RealTimer;
+import com.yahoo.vespa.clustercontroller.core.Timer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.TreeMap;
+
+
+/**
+ * Renders webpage with status regarding cluster.
+ */
+public class VdsClusterHtmlRendrer {
+ private static final TimeZone utcTimeZone = TimeZone.getTimeZone("UTC");
+
+ public static class Table {
+ private final HtmlTable table = new HtmlTable();
+ private final HtmlTable.CellProperties headerProperties;
+ private final StringBuilder contentBuilder = new StringBuilder();
+ private final static String TAG_NOT_SET = "not set";
+
+ Table(final String clusterName, final int slobrokGenerationCount) {
+ table.getTableProperties().align(HtmlTable.Orientation.RIGHT).setBackgroundColor(0xc0ffc0);
+ table.getColProperties(0).align(HtmlTable.Orientation.CENTER).setBackgroundColor(0xffffff);
+ table.getColProperties(1).align(HtmlTable.Orientation.LEFT);
+ table.getColProperties(2).align(HtmlTable.Orientation.LEFT);
+ table.getColProperties(3).align(HtmlTable.Orientation.LEFT);
+ table.getColProperties(7).align(HtmlTable.Orientation.LEFT);
+ table.getColProperties(12).align(HtmlTable.Orientation.LEFT);
+ for (int i = 4; i < 13; ++i) table.getColProperties(i).allowLineBreaks(false);
+ headerProperties = new HtmlTable.CellProperties()
+ .setBackgroundColor(0xffffff)
+ .align(HtmlTable.Orientation.CENTER);
+ contentBuilder.append("<h2>State of content cluster '")
+ .append(clusterName)
+ .append("'.</h2>\n")
+ .append("<p>Based on information retrieved from slobrok at generation ")
+ .append(slobrokGenerationCount).append(".</p>\n");
+ }
+
+ public void addTable(final StringBuilder destination, final long stableStateTimePeriode) {
+ destination.append(contentBuilder);
+
+ destination.append(table.toString())
+ .append("<p>")
+ .append("<p>");
+ addFooter(destination, stableStateTimePeriode);
+ }
+
+ public void renderNodes(
+ final TreeMap<Integer, NodeInfo> storageNodeInfos,
+ final TreeMap<Integer, NodeInfo> distributorNodeInfos,
+ final Timer timer,
+ final ClusterState state,
+ final int maxPrematureCrashes,
+ final EventLog eventLog,
+ final String pathPrefix,
+ final String name) {
+ final String dominantVtag = findDominantVtag(
+ storageNodeInfos, distributorNodeInfos);
+
+ renderNodesOneType(storageNodeInfos,
+ NodeType.STORAGE,
+ timer,
+ state,
+ maxPrematureCrashes,
+ eventLog,
+ pathPrefix,
+ dominantVtag,
+ name);
+ renderNodesOneType(distributorNodeInfos,
+ NodeType.DISTRIBUTOR,
+ timer,
+ state,
+ maxPrematureCrashes,
+ eventLog,
+ pathPrefix,
+ dominantVtag,
+ name);
+ }
+
+ private String findDominantVtag(
+ final Map<Integer, NodeInfo> storageNodeInfos,
+ final Map<Integer, NodeInfo> distributorNodeInfos) {
+ final List<NodeInfo> nodeInfos = new ArrayList<>();
+ nodeInfos.addAll(storageNodeInfos.values());
+ nodeInfos.addAll(distributorNodeInfos.values());
+
+ final Map<String, Integer> versionTagToCount = new HashMap<>();
+ int maxCount = -1;
+ String dominantVtag = null;
+ for (NodeInfo nodeInfo : nodeInfos) {
+ final String buildTag = nodeInfo.getVtag();
+ Integer count = versionTagToCount.get(buildTag);
+ count = count == null ? 1 : count + 1;
+ versionTagToCount.put(buildTag, count);
+ if (count > maxCount) {
+ maxCount = count;
+ dominantVtag = buildTag;
+ }
+ }
+ return dominantVtag == null ? TAG_NOT_SET : dominantVtag;
+ }
+ private void addTableHeader(final String name, final NodeType nodeType) {
+ table.addRow(new HtmlTable.Row().addCell(
+ new HtmlTable.Cell("Group " + name)
+ .addProperties(new HtmlTable.CellProperties()
+ .setColSpan(0)
+ .setBackgroundColor(0xccccff)
+ .align(HtmlTable.Orientation.LEFT))));
+ table.addRow(new HtmlTable.Row()
+ .setHeaderRow()
+ .addProperties(headerProperties)
+ .addProperties(new HtmlTable.CellProperties().setRowSpan(2))
+ .addCell(new HtmlTable.Cell(nodeType == NodeType.DISTRIBUTOR ? "Distributor" : "Storage"))
+ .addCell(new HtmlTable.Cell("Node states")
+ .addProperties(new HtmlTable.CellProperties().setColSpan(3).setRowSpan(1)))
+ .addCell(new HtmlTable.Cell("Build"))
+ .addCell(new HtmlTable.Cell("FC<sup>1)</sup>"))
+ .addCell(new HtmlTable.Cell("OCT<sup>2)</sup>"))
+ .addCell(new HtmlTable.Cell("SPT<sup>3)</sup>"))
+ .addCell(new HtmlTable.Cell("SSV<sup>4)</sup>"))
+ .addCell(new HtmlTable.Cell("PC<sup>5)</sup>"))
+ .addCell(new HtmlTable.Cell("ELW<sup>6)</sup>"))
+ .addCell(new HtmlTable.Cell("Start Time"))
+ .addCell(new HtmlTable.Cell("RPC Address")));
+ table.addRow(new HtmlTable.Row().setHeaderRow().addProperties(headerProperties)
+ .addCell(new HtmlTable.Cell("Reported"))
+ .addCell(new HtmlTable.Cell("Wanted"))
+ .addCell(new HtmlTable.Cell("System")));
+ }
+
+ private void renderNodesOneType(
+ final TreeMap<Integer, NodeInfo> nodeInfos,
+ final NodeType nodeType,
+ final Timer timer,
+ final ClusterState state,
+ final int maxPrematureCrashes,
+ final EventLog eventLog,
+ final String pathPrefix,
+ final String dominantVtag,
+ final String name) {
+ final long currentTime = timer.getCurrentTimeInMillis();
+ addTableHeader(name, nodeType);
+ for (final NodeInfo nodeInfo : nodeInfos.values()) {
+ HtmlTable.Row row = new HtmlTable.Row();
+ HtmlTable.CellProperties warning = new HtmlTable.CellProperties().setBackgroundColor(0xffffc0);
+ HtmlTable.CellProperties error = new HtmlTable.CellProperties().setBackgroundColor(0xffc0c0);
+ HtmlTable.CellProperties centered = new HtmlTable.CellProperties().align(HtmlTable.Orientation.CENTER);
+
+ // Add node index
+ row.addCell(new HtmlTable.Cell("<a href=\"" + pathPrefix + "/node=" + nodeInfo.getNode()
+ + "\">" + nodeInfo.getNodeIndex() + "</a>"));
+
+ // Add reported state
+ NodeState reportedState = nodeInfo.getReportedState().clone().setStartTimestamp(0);
+ row.addCell(new HtmlTable.Cell(HtmlTable.escape(reportedState.toString(true))));
+ if (!nodeInfo.getReportedState().getState().equals(State.UP)) {
+ row.getLastCell().addProperties(warning);
+ }
+
+ // Add wanted state
+ if (nodeInfo.getWantedState() == null || nodeInfo.getWantedState().getState().equals(State.UP)) {
+ row.addCell(new HtmlTable.Cell("-").addProperties(centered));
+ } else {
+ row.addCell(new HtmlTable.Cell(HtmlTable.escape(nodeInfo.getWantedState().toString(true))));
+ if (nodeInfo.getWantedState().toString(true).indexOf("Disabled by fleet controller") != -1) {
+ row.getLastCell().addProperties(error);
+ } else {
+ row.getLastCell().addProperties(warning);
+ }
+ }
+
+ // Add current state
+ NodeState ns = state.getNodeState(nodeInfo.getNode()).clone().setDescription("").setMinUsedBits(16);
+ if (state.getClusterState().oneOf("uir")) {
+ row.addCell(new HtmlTable.Cell(HtmlTable.escape(ns.toString(true))));
+ if (ns.getState().equals(State.DOWN)) {
+ row.getLastCell().addProperties(error);
+ } else if (ns.getState().oneOf("mi")) {
+ row.getLastCell().addProperties(warning);
+ }
+ } else {
+ row.addCell(new HtmlTable.Cell("Cluster " +
+ state.getClusterState().name().toLowerCase()).addProperties(error));
+ }
+
+ // Add build tag version.
+ final String buildTagText =
+ nodeInfo.getVtag() != null
+ ? nodeInfo.getVtag()
+ : TAG_NOT_SET;
+ row.addCell(new HtmlTable.Cell(buildTagText));
+ if (! dominantVtag.equals(nodeInfo.getVtag())) {
+ row.getLastCell().addProperties(warning);
+ }
+
+ // Add failed connection attempt count
+ row.addCell(new HtmlTable.Cell("" + nodeInfo.getConnectionAttemptCount()));
+ long timeSinceContact = nodeInfo.getTimeOfFirstFailingConnectionAttempt() == 0
+ ? 0 : currentTime - nodeInfo.getTimeOfFirstFailingConnectionAttempt();
+ if (timeSinceContact > 60 * 1000) {
+ row.getLastCell().addProperties(error);
+ } else if (nodeInfo.getConnectionAttemptCount() > 0) {
+ row.getLastCell().addProperties(warning);
+ }
+
+ // Add time since first failing
+ row.addCell(new HtmlTable.Cell((timeSinceContact / 1000) + " s"));
+ if (timeSinceContact > 60 * 1000) {
+ row.getLastCell().addProperties(error);
+ } else if (nodeInfo.getConnectionAttemptCount() > 0) {
+ row.getLastCell().addProperties(warning);
+ }
+
+ // State pending time
+ if (nodeInfo.getLatestNodeStateRequestTime() == null) {
+ row.addCell(new HtmlTable.Cell("-").addProperties(centered));
+ } else {
+ row.addCell(new HtmlTable.Cell(HtmlTable.escape(RealTimer.printDuration(
+ currentTime - nodeInfo.getLatestNodeStateRequestTime()))));
+ }
+
+ // System state version
+ row.addCell(new HtmlTable.Cell("" + nodeInfo.getSystemStateVersionAcknowledged()));
+ if (nodeInfo.getSystemStateVersionAcknowledged() < state.getVersion() - 2) {
+ row.getLastCell().addProperties(error);
+ } else if (nodeInfo.getSystemStateVersionAcknowledged() < state.getVersion()) {
+ row.getLastCell().addProperties(warning);
+ }
+
+ // Premature crashes
+ row.addCell(new HtmlTable.Cell("" + nodeInfo.getPrematureCrashCount()));
+ if (nodeInfo.getPrematureCrashCount() >= maxPrematureCrashes) {
+ row.getLastCell().addProperties(error);
+ } else if (nodeInfo.getPrematureCrashCount() > 0) {
+ row.getLastCell().addProperties(warning);
+ }
+
+ // Events last week
+ int nodeEvents = eventLog.getNodeEventsSince(nodeInfo.getNode(),
+ currentTime - eventLog.getRecentTimePeriod());
+ row.addCell(new HtmlTable.Cell("" + nodeEvents));
+ if (nodeEvents > 20) {
+ row.getLastCell().addProperties(error);
+ } else if (nodeEvents > 3) {
+ row.getLastCell().addProperties(warning);
+ }
+
+ // Start time
+ if (nodeInfo.getStartTimestamp() == 0) {
+ row.addCell(new HtmlTable.Cell("-").addProperties(error).addProperties(centered));
+ } else {
+ String startTime = RealTimer.printDateNoMilliSeconds(
+ 1000 * nodeInfo.getStartTimestamp(), utcTimeZone);
+ row.addCell(new HtmlTable.Cell(HtmlTable.escape(startTime)));
+ }
+
+ // RPC address
+ if (nodeInfo.getRpcAddress() == null) {
+ row.addCell(new HtmlTable.Cell("-").addProperties(error));
+ } else {
+ row.addCell(new HtmlTable.Cell(HtmlTable.escape(nodeInfo.getRpcAddress())));
+ if (nodeInfo.isRpcAddressOutdated()) {
+ row.getLastCell().addProperties(warning);
+ }
+ }
+ table.addRow(row);
+ }
+ }
+ private void addFooter(final StringBuilder contentBuilder, final long stableStateTimePeriode) {
+ contentBuilder.append("<font size=\"-1\">\n")
+ .append("1) FC - Failed connections - We have tried to connect to the nodes this many times " +
+ "without being able to contact it.<br>\n")
+ .append("2) OCT - Out of contact time - Time in seconds we have failed to contact the node.<br>\n")
+ .append("3) SPT - State pending time - Time the current getNodeState request has been " +
+ "pending.<br>\n")
+ .append("4) SSV - System state version - The latest system state version the node has " +
+ "acknowledged.<br>\n")
+ .append("5) PC - Premature crashes - Number of times node has crashed since last time it had " +
+ "been stable in up or down state for more than "
+ + RealTimer.printDuration(stableStateTimePeriode) + ".<br>\n")
+ .append("6) ELW - Events last week - The number of events that has occured on this node the " +
+ "last week. (Or shorter period if a week haven't passed since restart or more than " +
+ "max events to keep in node event log have happened during last week.)<br>\n")
+ .append("</font>\n");
+ }
+ }
+
+ public Table createNewClusterHtmlTable(final String clusterName, final int slobrokGenerationCount) {
+ return new Table(clusterName, slobrokGenerationCount);
+ }
+} \ No newline at end of file
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/package-info.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/package-info.java
new file mode 100644
index 00000000000..11e5cd44b44
--- /dev/null
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/package-info.java
@@ -0,0 +1,6 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.vespa.clustercontroller.core.status.statuspage;
+
+import com.yahoo.osgi.annotation.ExportPackage;
+
diff --git a/clustercontroller-core/src/main/resources/configdefinitions/.gitignore b/clustercontroller-core/src/main/resources/configdefinitions/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/clustercontroller-core/src/main/resources/configdefinitions/.gitignore
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateViewTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateViewTest.java
new file mode 100644
index 00000000000..a62a8676096
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateViewTest.java
@@ -0,0 +1,111 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNodeStatsBridge;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.*;
+
+/**
+ * @author hakon
+ * @since 5.34
+ */
+public class ClusterStateViewTest {
+ final Map<Integer, String> hostnames = new HashMap<>();
+ final NodeInfo nodeInfo = mock(NodeInfo.class);
+ final Node node = mock(Node.class);
+ final ClusterStatsAggregator statsAggregator = mock(ClusterStatsAggregator.class);
+ final StorageMergeStats storageStats = mock(StorageMergeStats.class);
+ final ClusterState clusterState = mock(ClusterState.class);
+ final MetricUpdater metricUpdater = mock(MetricUpdater.class);
+ final ClusterStateView clusterStateView = new ClusterStateView(clusterState, statsAggregator, metricUpdater);
+
+ HostInfo createHostInfo(String version) {
+ return HostInfo.createHostInfo("{ \"cluster-state-version\": " + version + " }");
+ }
+
+ @Test
+ public void testWrongNodeType() {
+ when(nodeInfo.isDistributor()).thenReturn(false);
+
+ clusterStateView.handleUpdatedHostInfo(hostnames, nodeInfo, createHostInfo("101"));
+
+ verify(statsAggregator, never()).updateForDistributor(any(), anyInt(), any());
+ }
+
+
+
+ @Test
+ public void testStateVersionMismatch() {
+ when(nodeInfo.isDistributor()).thenReturn(true);
+ when(clusterState.getVersion()).thenReturn(101);
+
+ clusterStateView.handleUpdatedHostInfo(hostnames, nodeInfo, createHostInfo("22"));
+
+ verify(statsAggregator, never()).updateForDistributor(any(), anyInt(), any());
+ }
+
+ @Test
+ public void testFailToGetStats() {
+ when(nodeInfo.isDistributor()).thenReturn(true);
+ when(clusterState.getVersion()).thenReturn(101);
+
+ clusterStateView.handleUpdatedHostInfo(hostnames, nodeInfo, createHostInfo("22"));
+
+ verify(statsAggregator, never()).updateForDistributor(any(), anyInt(), any());
+ }
+
+ @Test
+ public void testSuccessCase() {
+ when(nodeInfo.isDistributor()).thenReturn(true);
+ HostInfo hostInfo = HostInfo.createHostInfo(
+ "{" +
+ " \"cluster-state-version\": 101," +
+ " \"distributor\": {\n" +
+ " \"storage-nodes\": [\n" +
+ " {\n" +
+ " \"node-index\": 3\n" +
+ " }\n" +
+ " ]}}");
+
+
+ when(nodeInfo.getNodeIndex()).thenReturn(3);
+ when(clusterState.getVersion()).thenReturn(101);
+
+ clusterStateView.handleUpdatedHostInfo(hostnames, nodeInfo, hostInfo);
+
+ verify(statsAggregator).updateForDistributor(
+ hostnames, 3, StorageNodeStatsBridge.generate(hostInfo.getDistributor()));
+ }
+
+ @Test
+ public void testIndicesOfUpNodes() {
+ when(clusterState.getNodeCount(NodeType.DISTRIBUTOR)).thenReturn(7);
+
+ NodeState nodeState = mock(NodeState.class);
+ when(nodeState.getState()).
+ thenReturn(State.MAINTENANCE). // 0
+ thenReturn(State.RETIRED). // 1
+ thenReturn(State.INITIALIZING). // 2
+ thenReturn(State.DOWN).
+ thenReturn(State.STOPPING).
+ thenReturn(State.UNKNOWN).
+ thenReturn(State.UP); // 6
+
+ when(clusterState.getNodeState(any())).thenReturn(nodeState);
+
+ Set<Integer> indices = ClusterStateView.getIndicesOfUpNodes(clusterState, NodeType.DISTRIBUTOR);
+ assertEquals(4, indices.size());
+ assert(indices.contains(0));
+ assert(indices.contains(1));
+ assert(indices.contains(2));
+ assert(indices.contains(6));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java
new file mode 100644
index 00000000000..e87cad135c8
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java
@@ -0,0 +1,217 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.runners.MockitoJUnitRunner;
+
+import java.util.*;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.*;
+
+/**
+ * @author hakon
+ * @since 5.34
+ */
+@RunWith(MockitoJUnitRunner.class)
+public class ClusterStatsAggregatorTest {
+
+ final Set<Integer> distributors = new HashSet<>();
+ final Set<Integer> storageNodes = new HashSet<>();
+ final Map<Integer, String> hostnames = new HashMap<>();
+ final MetricUpdater updater = mock(MetricUpdater.class);
+ StorageMergeStats storageStats;
+
+ private void addDistributors(Integer... indices) {
+ for (Integer i : indices) {
+ distributors.add(i);
+ }
+ }
+
+ private static class StorageNodeSpec {
+ public StorageNodeSpec(Integer index, String hostname) {
+ this.index = index;
+ this.hostname = hostname;
+ }
+ public Integer index;
+ public String hostname;
+ }
+
+ private void addStorageNodes(StorageNodeSpec... specs) {
+ for (StorageNodeSpec spec : specs) {
+ storageNodes.add(spec.index);
+ hostnames.put(spec.index, spec.hostname);
+ }
+ storageStats = new StorageMergeStats(storageNodes);
+ }
+
+ private void putStorageStats(int index, int syncing, int copyingIn, int movingOut, int copyingOut) {
+ storageStats.getStorageNode(index).set(createStats(index, syncing, copyingIn, movingOut, copyingOut));
+ }
+
+ private static NodeMergeStats createStats(int index, int syncing, int copyingIn, int movingOut, int copyingOut) {
+ return new NodeMergeStats(
+ index,
+ new NodeMergeStats.Amount(syncing),
+ new NodeMergeStats.Amount(copyingIn),
+ new NodeMergeStats.Amount(movingOut),
+ new NodeMergeStats.Amount(copyingOut));
+ }
+
+ @Test
+ public void testSimple() {
+ final int distributorIndex = 1;
+ addDistributors(distributorIndex);
+
+ final int storageNodeIndex = 11;
+ addStorageNodes(new StorageNodeSpec(storageNodeIndex, "storage-node"));
+
+ putStorageStats(storageNodeIndex, 5, 6, 7, 8);
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+ aggregator.updateForDistributor(hostnames, distributorIndex, storageStats);
+
+ Map<String, NodeMergeStats> expectedStorageNodeStats = new HashMap<>();
+ expectedStorageNodeStats.put("storage-node", createStats(storageNodeIndex, 5, 6, 7, 8));
+
+ verify(updater).updateMergeOpMetrics(expectedStorageNodeStats);
+ }
+
+ @Test
+ public void testComplex() {
+ final int distributor1 = 1;
+ final int distributor2 = 2;
+ addDistributors(distributor1, distributor2);
+
+ final int storageNode1 = 11;
+ final int storageNode2 = 12;
+ addStorageNodes(
+ new StorageNodeSpec(storageNode1, "storage-node-1"),
+ new StorageNodeSpec(storageNode2, "storage-node-2"));
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+
+ // Distributor 1.
+ putStorageStats(storageNode1, 0, 1, 2, 3);
+ putStorageStats(storageNode2, 20, 21, 22, 23);
+ aggregator.updateForDistributor(hostnames, distributor1, storageStats);
+
+ // Distributor 2.
+ putStorageStats(storageNode1, 10, 11, 12, 13);
+ putStorageStats(storageNode2, 30, 31, 32, 33);
+ aggregator.updateForDistributor(hostnames, distributor2, storageStats);
+
+ Map<String, NodeMergeStats> expectedStorageNodeStats = new HashMap<>();
+ expectedStorageNodeStats.put("storage-node-1", createStats(storageNode1, 0 + 10, 1 + 11, 2 + 12, 3 + 13));
+ expectedStorageNodeStats.put("storage-node-2", createStats(storageNode2, 20 + 30, 21 + 31, 22 + 32, 23 + 33));
+
+ verify(updater, times(1)).updateMergeOpMetrics(expectedStorageNodeStats);
+ }
+
+ @Test
+ public void testHashCodeCache() {
+ final int distributor1 = 1;
+ final int distributor2 = 2;
+ addDistributors(distributor1, distributor2);
+
+ final int storageNode1 = 11;
+ final int storageNode2 = 12;
+ addStorageNodes(
+ new StorageNodeSpec(storageNode1, "storage-node-1"),
+ new StorageNodeSpec(storageNode2, "storage-node-2"));
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+
+ // Distributor 1.
+ putStorageStats(storageNode1, 0, 1, 2, 3);
+ putStorageStats(storageNode2, 20, 21, 22, 23);
+ aggregator.updateForDistributor(hostnames, distributor1, storageStats);
+
+ // Distributor 2.
+ putStorageStats(storageNode1, 10, 11, 12, 13);
+ putStorageStats(storageNode2, 30, 31, 32, 33);
+ aggregator.updateForDistributor(hostnames, distributor2, storageStats);
+
+ // If we add call another updateForDistributor with the same arguments, updateMergeOpMetrics() should not be called.
+ // See times(1) below.
+ aggregator.updateForDistributor(hostnames, distributor2, storageStats);
+
+ Map<String, NodeMergeStats> expectedStorageNodeStats = new HashMap<>();
+ expectedStorageNodeStats.put("storage-node-1", createStats(storageNode1, 0 + 10, 1 + 11, 2 + 12, 3 + 13));
+ expectedStorageNodeStats.put("storage-node-2", createStats(storageNode2, 20 + 30, 21 + 31, 22 + 32, 23 + 33));
+
+
+ verify(updater, times(1)).updateMergeOpMetrics(expectedStorageNodeStats);
+ }
+
+ @Test
+ public void testUnknownDistributor() {
+ final int upDistributor = 1;
+ final int DownDistributorIndex = 2;
+ addDistributors(upDistributor);
+
+ final int storageNodeIndex = 11;
+ addStorageNodes(new StorageNodeSpec(storageNodeIndex, "storage-node"));
+
+ putStorageStats(storageNodeIndex, 5, 6, 7, 8);
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+ aggregator.updateForDistributor(hostnames, DownDistributorIndex, storageStats);
+
+ verify(updater, never()).updateMergeOpMetrics(any());
+ }
+
+ @Test
+ public void testMoreStorageNodesThanDistributors() {
+ final int distributor1 = 1;
+ addDistributors(distributor1);
+
+ final int storageNode1 = 11;
+ final int storageNode2 = 12;
+ addStorageNodes(
+ new StorageNodeSpec(storageNode1, "storage-node-1"),
+ new StorageNodeSpec(storageNode2, "storage-node-2"));
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+
+ // Distributor 1.
+ putStorageStats(storageNode1, 0, 1, 2, 3);
+ putStorageStats(storageNode2, 20, 21, 22, 23);
+ aggregator.updateForDistributor(hostnames, distributor1, storageStats);
+
+ Map<String, NodeMergeStats> expectedStorageNodeStats = new HashMap<>();
+ expectedStorageNodeStats.put("storage-node-1", createStats(storageNode1, 0, 1, 2, 3));
+ expectedStorageNodeStats.put("storage-node-2", createStats(storageNode2, 20, 21, 22, 23));
+
+ verify(updater, times(1)).updateMergeOpMetrics(expectedStorageNodeStats);
+ }
+
+ @Test
+ public void testMoreDistributorsThanStorageNodes() {
+ final int distributor1 = 1;
+ final int distributor2 = 2;
+ addDistributors(distributor1, distributor2);
+
+ final int storageNode1 = 11;
+ addStorageNodes(new StorageNodeSpec(storageNode1, "storage-node-1"));
+
+ ClusterStatsAggregator aggregator = new ClusterStatsAggregator(distributors, storageNodes, updater);
+
+ // Distributor 1.
+ putStorageStats(storageNode1, 0, 1, 2, 3);
+ aggregator.updateForDistributor(hostnames, distributor1, storageStats);
+
+ // Distributor 2.
+ putStorageStats(storageNode1, 10, 11, 12, 13);
+ aggregator.updateForDistributor(hostnames, distributor2, storageStats);
+
+ Map<String, NodeMergeStats> expectedStorageNodeStats = new HashMap<>();
+ expectedStorageNodeStats.put("storage-node-1", createStats(storageNode1, 0 + 10, 1 + 11, 2 + 12, 3 + 13));
+
+ verify(updater, times(1)).updateMergeOpMetrics(expectedStorageNodeStats);
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java
new file mode 100644
index 00000000000..5cf5060e240
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.VdsClusterHtmlRendrer;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONWriter;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.StringWriter;
+import java.io.Writer;
+import java.text.ParseException;
+import java.util.TreeMap;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.mock;
+
+public class ContentClusterHtmlRendrerTest {
+ VdsClusterHtmlRendrer rendrer = new VdsClusterHtmlRendrer();
+ final static int slobrokGeneration = 34;
+ final static String clusterName = "clustername";
+ final TreeMap<Integer, NodeInfo> storageNodeInfoByIndex = new TreeMap<>();
+ final TreeMap<Integer, NodeInfo> distributorNodeInfoByIndex = new TreeMap<>();
+ String result;
+
+ @Before
+ public void before() throws JSONException, ParseException {
+ final ClusterState state = new ClusterState("version:34633 bits:24 distributor:211 storage:211");
+ final EventLog eventLog = new EventLog(new FakeTimer(), null);
+
+ final VdsClusterHtmlRendrer.Table table = rendrer.createNewClusterHtmlTable(clusterName, slobrokGeneration);
+
+ final ContentCluster contentCluster = mock(ContentCluster.class);
+
+ for (int x = 0; x < 10; x++) {
+ NodeInfo nodeInfo = new DistributorNodeInfo(contentCluster, x, "dist " + x, null);
+ final Writer writer = new StringWriter();
+ new JSONWriter(writer)
+ .object().key("vtag")
+ // Let one node have a different release tag.
+ .object().key("version").value("release1" + (x == 2 ? "bad" : ""))
+ .endObject()
+ .endObject();
+ nodeInfo.setHostInfo(HostInfo.createHostInfo(writer.toString()));
+ distributorNodeInfoByIndex.put(x, nodeInfo);
+ }
+ storageNodeInfoByIndex.put(2, new StorageNodeInfo(contentCluster, 2, false, "storage" + 2, null));
+
+ table.renderNodes(
+ storageNodeInfoByIndex,
+ distributorNodeInfoByIndex,
+ new FakeTimer(),
+ state,
+ 10,
+ eventLog,
+ "pathPrefix",
+ "name");
+ final StringBuilder stringBuilder = new StringBuilder();
+ table.addTable(stringBuilder, 34);
+ result = stringBuilder.toString();
+ }
+
+ @Test
+ public void testVtagRendering() throws Exception {
+ // 9 distribution nodes should have green tag on release1.
+ assertThat(result.split("<td bgcolor=\"#c0ffc0\" align=\"right\"><nobr>release1</nobr></td>").length, is(10));
+ // 1 distribution node should have warning on release1bad.
+ assertThat(result.split("<td bgcolor=\"#ffffc0\" align=\"right\"><nobr>release1bad</nobr></td>").length, is(2));
+ // 1 storage node should should have warning on release "not set".
+ assertThat(result.split("<td bgcolor=\"#ffffc0\" align=\"right\"><nobr>not set</nobr></td>").length, is(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java
new file mode 100644
index 00000000000..65661d99d4a
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java
@@ -0,0 +1,166 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.StringValue;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import org.junit.After;
+import org.junit.Ignore;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.logging.Logger;
+
+public class DatabaseTest extends FleetControllerTest {
+
+ private static Logger log = Logger.getLogger(DatabaseTest.class.getName());
+
+ protected Supervisor supervisor;
+
+ @After
+ public void tearDown() throws Exception {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ }
+ super.tearDown();
+ }
+
+ private void setWantedState(Node n, NodeState ns, Map<Node, NodeState> wantedStates) {
+ int rpcPort = fleetController.getRpcPort();
+ if (supervisor == null) {
+ supervisor = new Supervisor(new Transport());
+ }
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/" + n.getType().toString() + "/" + n.getIndex()));
+ req.parameters().add(new StringValue(ns.serialize(true)));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("s"));
+ wantedStates.put(n, ns);
+ }
+
+ // These tests work in isolation but causes other tests to hang
+ @Ignore
+ @Test
+ public void testWantedStatesInZooKeeper() throws Exception {
+ startingTest("DatabaseTest::testWantedStatesInZooKeeper");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.zooKeeperServerAddress = "127.0.0.1";
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ log.info("WAITING FOR STABLE SYSTEM");
+ waitForStableSystem();
+
+
+ log.info("VALIDATE STARTING WANTED STATES");
+ Map<Node, NodeState> wantedStates = new TreeMap<>();
+ for (DummyVdsNode node : nodes) {
+ wantedStates.put(node.getNode(), new NodeState(node.getType(), State.UP));
+ }
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ log.info("SET A WANTED STATE AND SEE THAT IT GETS PROPAGATED");
+ setWantedState(new Node(NodeType.STORAGE, 3), new NodeState(NodeType.STORAGE, State.MAINTENANCE).setDescription("Yoo"), wantedStates);
+ waitForState("version:\\d+ distributor:10 storage:10 .3.s:m");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ log.info("SET ANOTHER WANTED STATE AND SEE THAT IT GETS PROPAGATED");
+ setWantedState(new Node(NodeType.DISTRIBUTOR, 2), new NodeState(NodeType.DISTRIBUTOR, State.DOWN), wantedStates);
+ waitForState("version:\\d+ distributor:10 .2.s:d storage:10 .3.s:m");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ log.info("SET YET ANOTHER WANTED STATE AND SEE THAT IT GETS PROPAGATED");
+ setWantedState(new Node(NodeType.STORAGE, 7), new NodeState(NodeType.STORAGE, State.RETIRED).setDescription("We wanna replace this node"), wantedStates);
+ waitForState("version:\\d+ distributor:10 .2.s:d storage:10 .3.s:m .7.s:r");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ log.info("CHECK THAT WANTED STATES PERSIST FLEETCONTROLLER RESTART");
+ stopFleetController();
+ startFleetController();
+
+ waitForState("version:\\d+ distributor:10 .2.s:d storage:10 .3.s:m .7.s:r");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ log.info("CLEAR WANTED STATE");
+ setWantedState(new Node(NodeType.STORAGE, 7), new NodeState(NodeType.STORAGE, State.UP), wantedStates);
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ setWantedState(new Node(NodeType.DISTRIBUTOR, 5), new NodeState(NodeType.DISTRIBUTOR, State.DOWN), wantedStates);
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ setWantedState(new Node(NodeType.DISTRIBUTOR, 2), new NodeState(NodeType.DISTRIBUTOR, State.UP), wantedStates);
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ setWantedState(new Node(NodeType.STORAGE, 9), new NodeState(NodeType.STORAGE, State.DOWN), wantedStates);
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+ }
+
+ // These tests work in isolation but causes other tests to hang
+ @Ignore
+ @Test
+ public void testWantedStateOfUnknownNode() throws Exception {
+ startingTest("DatabaseTest::testWantedStatesOfUnknownNode");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.minRatioOfDistributorNodesUp = 0;
+ options.minRatioOfStorageNodesUp = 0;
+ options.zooKeeperServerAddress = "localhost";
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ // Populate map of wanted states we should have
+ Map<Node, NodeState> wantedStates = new TreeMap<>();
+ for (DummyVdsNode node : nodes) {
+ wantedStates.put(node.getNode(), new NodeState(node.getType(), State.UP));
+ }
+
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ setWantedState(new Node(NodeType.STORAGE, 1), new NodeState(NodeType.STORAGE, State.MAINTENANCE).setDescription("Yoo"), wantedStates);
+ waitForState("version:\\d+ distributor:10 storage:10 .1.s:m");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ // This should not show up, as it is down
+ setWantedState(new Node(NodeType.DISTRIBUTOR, 8), new NodeState(NodeType.DISTRIBUTOR, State.DOWN), wantedStates);
+ waitForState("version:\\d+ distributor:10 .8.s:d storage:10 .1.s:m");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ // This should show up, as down nodes can be turned to maintenance
+ setWantedState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.MAINTENANCE).setDescription("foobar"), wantedStates);
+ waitForState("version:\\d+ distributor:10 .8.s:d storage:10 .1.s:m .6.s:m");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ // This should not show up, as we cannot turn a down node retired
+ setWantedState(new Node(NodeType.STORAGE, 7), new NodeState(NodeType.STORAGE, State.RETIRED).setDescription("foobar"), wantedStates);
+ waitForState("version:\\d+ distributor:10 .8.s:d storage:10 .1.s:m .6.s:m .7.s:r");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ // This should not show up, as it is down
+ setWantedState(new Node(NodeType.STORAGE, 8), new NodeState(NodeType.STORAGE, State.DOWN).setDescription("foobar"), wantedStates);
+ waitForState("version:\\d+ distributor:10 .8.s:d storage:10 .1.s:m .6.s:m .7.s:r .8.s:d");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+
+ stopFleetController();
+ for (int i=6; i<nodes.size(); ++i) nodes.get(i).disconnect();
+ startFleetController();
+
+ waitForState("version:\\d+ distributor:3 storage:7 .1.s:m .3.s:d .4.s:d .5.s:d .6.s:m");
+
+ setWantedState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.UP), wantedStates);
+ waitForState("version:\\d+ distributor:3 storage:3 .1.s:m");
+
+ for (int i=6; i<nodes.size(); ++i) nodes.get(i).connect();
+ waitForState("version:\\d+ distributor:10 .8.s:d storage:10 .1.s:m .7.s:r .8.s:d");
+ for (DummyVdsNode node : nodes) { assertEquals(node.getNode().toString(), wantedStates.get(node.getNode()), fleetController.getWantedNodeState(node.getNode())); }
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java
new file mode 100644
index 00000000000..1adb0dcad7d
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class DistributionBitCountTest extends FleetControllerTest {
+
+ private void setUpSystem(String testName) throws Exception {
+ List<ConfiguredNode> configuredNodes = new ArrayList<>();
+ for (int i = 0 ; i < 10; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.distributionBits = 17;
+ setUpFleetController(false, options);
+ startingTest(testName);
+ List<DummyVdsNode> nodes = setUpVdsNodes(false, new DummyVdsNodeOptions(), true, configuredNodes);
+ for (DummyVdsNode node : nodes) {
+ node.setNodeState(new NodeState(node.getType(), State.UP).setMinUsedBits(20));
+ node.connect();
+ }
+ waitForState("version:\\d+ bits:17 distributor:10 storage:10");
+ }
+
+ /**
+ * Test that then altering config to increased bit count, that a new system state is sent out if the least split storagenode use more bits.
+ * Test that then altering config to increased bit count, that a new system state is not sent out (and not altered) if a storagenode needs it to be no further split.
+ */
+ @Test
+ public void testDistributionBitCountConfigIncrease() throws Exception {
+ setUpSystem("DistributionBitCountTest::testDistributionBitCountConfigIncrease");
+ options.distributionBits = 20;
+ fleetController.updateOptions(options, 0);
+ ClusterState currentState = waitForState("version:\\d+ bits:20 distributor:10 storage:10");
+
+ int version = currentState.getVersion();
+ options.distributionBits = 23;
+ fleetController.updateOptions(options, 0);
+ assertEquals(version, currentState.getVersion());
+ }
+
+ /**
+ * Test that then altering config to decrease bit count, that a new system state is sent out with that bit count.
+ */
+ @Test
+ public void testDistributionBitCountConfigDecrease() throws Exception {
+ setUpSystem("DistributionBitCountTest::testDistributionBitCountConfigDecrease");
+ options.distributionBits = 12;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ bits:12 distributor:10 storage:10");
+ }
+
+
+ /**
+ * Test that when storage node reports higher bit count, but another storage
+ * node has equally low bitcount, the fleetcontroller does nothing.
+ *
+ * Test that when storage node reports higher bit count, but another storage
+ * node now being lowest, the fleetcontroller adjusts to use that bit in system state.
+ */
+ @Test
+ public void testStorageNodeReportingHigherBitCount() throws Exception {
+ setUpSystem("DistributionBitCountTest::testStorageNodeReportingHigherBitCount");
+
+ nodes.get(1).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(11));
+ nodes.get(3).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(11));
+
+ ClusterState startState = waitForState("version:\\d+ bits:11 distributor:10 storage:10");
+ ClusterState state = waitForClusterStateIncludingNodesWithMinUsedBits(11, 2);
+
+ nodes.get(1).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(12));
+ assertEquals(state + "->" + fleetController.getSystemState(), startState.getVersion(), fleetController.getSystemState().getVersion());
+
+ for (int i = 0; i < 10; ++i) {
+ nodes.get(i).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(17));
+ }
+ assertEquals(startState.getVersion() + 1, waitForState("version:\\d+ bits:17 distributor:10 storage:10").getVersion());
+ }
+
+ /**
+ * Test that then storage node report lower bit count, but another storage node with equally low bitcount, the fleetcontroller does nothing.
+ * Test that then storage node report lower bit count, and then becomes the smallest, the fleetcontroller adjusts to use that bit in system state.
+ */
+ @Test
+ public void testStorageNodeReportingLowerBitCount() throws Exception {
+ setUpSystem("DistributionBitCountTest::testStorageNodeReportingLowerBitCount");
+
+ nodes.get(1).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(13));
+ ClusterState currentState = waitForState("version:\\d+ bits:13 distributor:10 storage:10");
+ int version = currentState.getVersion();
+
+ nodes.get(3).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(15));
+ assertEquals(version, currentState.getVersion());
+
+ nodes.get(3).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(13));
+ assertEquals(version, currentState.getVersion());
+
+ nodes.get(3).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(12));
+ waitForState("version:\\d+ bits:12 distributor:10 storage:10");
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java
new file mode 100644
index 00000000000..11b685d3f84
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+public class DummyCommunicator implements Communicator, NodeLookup {
+
+ List<Node> newNodes;
+ Timer timer;
+
+ public class DummyGetNodeStateRequest extends GetNodeStateRequest {
+ Waiter<GetNodeStateRequest> waiter;
+
+ public DummyGetNodeStateRequest(NodeInfo nodeInfo, Waiter<GetNodeStateRequest> waiter) {
+ super(nodeInfo);
+
+ this.waiter = waiter;
+ }
+
+ @Override
+ public void abort() {
+
+ }
+ }
+
+ public class DummySetClusterStateRequest extends SetClusterStateRequest {
+
+ public DummySetClusterStateRequest(NodeInfo nodeInfo, ClusterState state) {
+ super(nodeInfo, state.getVersion());
+ }
+
+ }
+
+ private Map<Node, DummyGetNodeStateRequest> getNodeStateRequests = new TreeMap<>();
+
+ public DummyCommunicator(List<Node> nodeList, Timer timer) {
+ this.newNodes = nodeList;
+ this.timer = timer;
+ }
+
+ @Override
+ public synchronized void getNodeState(NodeInfo node, Waiter<GetNodeStateRequest> waiter) {
+ DummyGetNodeStateRequest req = new DummyGetNodeStateRequest(node, waiter);
+ getNodeStateRequests.put(node.getNode(), req);
+ node.setCurrentNodeStateRequest(req, timer.getCurrentTimeInMillis());
+ notifyAll();
+ }
+
+ public void propagateOptions(final FleetControllerOptions options) {
+
+ }
+
+ public boolean setNodeState(Node node, State state, String description) throws Exception {
+ return setNodeState(node, new NodeState(node.getType(), state).setDescription(description), "");
+ }
+
+ public boolean setNodeState(Node node, NodeState state, String hostInfo) throws Exception {
+ DummyGetNodeStateRequest req = getNodeStateRequests.remove(node);
+
+ if (req == null) {
+ throw new IllegalStateException("Premature set node state - wait for fleet controller to request first: " + node);
+ }
+
+ GetNodeStateRequest.Reply reply = new GetNodeStateRequest.Reply(state.serialize(), hostInfo);
+ req.setReply(reply);
+
+ req.waiter.done(req);
+
+ return true;
+ }
+
+ @Override
+ public void setSystemState(ClusterState state, NodeInfo node, Waiter<SetClusterStateRequest> waiter) {
+ DummySetClusterStateRequest req = new DummySetClusterStateRequest(node, state);
+ node.setSystemStateVersionSent(state);
+ req.setReply(new SetClusterStateRequest.Reply());
+ waiter.done(req);
+ }
+
+ @Override
+ public void shutdown() {
+ }
+
+ @Override
+ public boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener) {
+ if (newNodes != null) {
+ List<Node> tmp = newNodes;
+
+ for (Node node : tmp)
+ cluster.clusterInfo().setRpcAddress(node, "foo");
+
+ for (NodeInfo info : cluster.getNodeInfo()) {
+ if (!tmp.contains(info.getNode())) {
+ info.markRpcAddressOutdated(timer);
+ listener.handleMissingNode(info);
+ }
+ }
+
+ newNodes = null;
+ return true;
+ }
+
+ return false;
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java
new file mode 100644
index 00000000000..ff470bc6b75
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java
@@ -0,0 +1,504 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.StringValue;
+import com.yahoo.jrt.slobrok.api.BackOffPolicy;
+import com.yahoo.jrt.slobrok.api.Register;
+import com.yahoo.jrt.slobrok.api.SlobrokList;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.*;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.*;
+import java.util.logging.Logger;
+
+/**
+ *
+ * Used to fake a node in VDS, such that we can test the fleetcontroller without dummy interface for talking to
+ * VDS nodes.
+ */
+public class DummyVdsNode {
+
+ public static Logger log = Logger.getLogger(DummyVdsNode.class.getName());
+
+ private String slobrokConnectionSpecs[];
+ private String clusterName;
+ private NodeType type;
+ private int index;
+ private NodeState nodeState;
+ private Supervisor supervisor;
+ private Acceptor acceptor;
+ private Register register;
+ private int stateCommunicationVersion;
+ private boolean negotiatedHandle = false;
+ private final Timer timer;
+ private boolean failSetSystemStateRequests = false;
+ private boolean resetTimestampOnReconnect = false;
+ private long startTimestamp;
+ private Map<Node, Long> highestStartTimestamps = new TreeMap<Node, Long>();
+ public int timedOutStateReplies = 0;
+ public int outdatedStateReplies = 0;
+ public int immediateStateReplies = 0;
+ public int setNodeStateReplies = 0;
+ private boolean registeredInSlobrok = false;
+
+ class Req {
+ Request request;
+ long timeout;
+
+ Req(Request r, long timeout) {
+ request = r;
+ this.timeout = timeout;
+ }
+ }
+ class BackOff implements BackOffPolicy {
+ public void reset() {}
+ public double get() { return 0.01; }
+ public boolean shouldWarn(double v) { return false; }
+ }
+ private final List<Req> waitingRequests = new LinkedList<>();
+
+ /**
+ * History of received system states.
+ * Any access to this list or to its members must be synchronized on the timer variable.
+ */
+ private List<ClusterState> systemState = new LinkedList<>();
+
+ private Thread messageResponder = new Thread() {
+ public void run() {
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": starting message reponder thread");
+ while (true) {
+ synchronized (timer) {
+ if (isInterrupted()) break;
+ long currentTime = timer.getCurrentTimeInMillis();
+ for (Iterator<Req> it = waitingRequests.iterator(); it.hasNext(); ) {
+ Req r = it.next();
+ if (r.timeout <= currentTime) {
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": Responding to node state request at time " + currentTime);
+ r.request.returnValues().add(new StringValue(nodeState.serialize()));
+ if (r.request.methodName().equals("getnodestate3")) {
+ r.request.returnValues().add(new StringValue("No host info in dummy implementation"));
+ }
+ r.request.returnRequest();
+ it.remove();
+ ++timedOutStateReplies;
+ }
+ }
+ try{
+ timer.wait(100);
+ } catch (InterruptedException e) {
+ break;
+ }
+ }
+ }
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": shut down message reponder thread");
+ }
+ };
+
+ public DummyVdsNode(Timer timer, DummyVdsNodeOptions options, String slobrokConnectionSpecs[], String clusterName, boolean distributor, int index) throws Exception {
+ this.timer = timer;
+ this.slobrokConnectionSpecs = slobrokConnectionSpecs;
+ this.clusterName = clusterName;
+ type = distributor ? NodeType.DISTRIBUTOR : NodeType.STORAGE;
+ this.index = index;
+ this.nodeState = new NodeState(type, State.UP);
+ this.stateCommunicationVersion = options.stateCommunicationVersion;
+ messageResponder.start();
+ nodeState.setStartTimestamp(timer.getCurrentTimeInMillis() / 1000);
+ }
+
+ public void resetStartTimestamp() {
+ resetTimestampOnReconnect = true;
+ }
+
+ public int getPendingNodeStateCount() { return waitingRequests.size(); }
+
+ public void shutdown() {
+ messageResponder.interrupt();
+ try{ messageResponder.join(); } catch (InterruptedException e) {}
+ disconnect();
+ }
+
+ public int connect() throws ListenFailedException, UnknownHostException {
+ if (resetTimestampOnReconnect) {
+ startTimestamp = timer.getCurrentTimeInMillis() / 1000;
+ nodeState.setStartTimestamp(startTimestamp);
+ resetTimestampOnReconnect = false;
+ }
+ supervisor = new Supervisor(new Transport());
+ addMethods();
+ acceptor = supervisor.listen(new Spec(0));
+ SlobrokList slist = new SlobrokList();
+ slist.setup(slobrokConnectionSpecs);
+ register = new Register(supervisor, slist, new Spec("localhost", acceptor.port()), new BackOff());
+ registerSlobrok();
+ negotiatedHandle = false;
+ return acceptor.port();
+ }
+
+ public boolean isConnected() {
+ return (registeredInSlobrok && supervisor != null);
+ }
+
+ public void registerSlobrok() {
+ register.registerName(getSlobrokName());
+ register.registerName(getSlobrokName() + "/default");
+ registeredInSlobrok = true;
+ }
+
+ public void disconnectSlobrok() {
+ register.unregisterName(getSlobrokName());
+ register.unregisterName(getSlobrokName() + "/default");
+ registeredInSlobrok = false;
+ }
+
+ public void disconnect() { disconnectImmediately(); }
+ public void disconnectImmediately() { disconnect(false, 0, false); }
+ public void disconnectBreakConnection() { disconnect(true, FleetControllerTest.timeoutMS, false); }
+ public void disconnectAsShutdown() { disconnect(true, FleetControllerTest.timeoutMS, true); }
+ public void disconnect(boolean waitForPendingNodeStateRequest, long timeoutms, boolean setStoppingStateFirst) {
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": Breaking connection." + (waitForPendingNodeStateRequest ? " Waiting for pending state first." : ""));
+ if (waitForPendingNodeStateRequest) {
+ this.waitForPendingGetNodeStateRequest(timeoutms);
+ }
+ if (setStoppingStateFirst) {
+ NodeState newState = nodeState.clone();
+ newState.setState(State.STOPPING);
+ // newState.setDescription("Received signal 15 (SIGTERM - Termination signal)");
+ // Altered in storageserver implementation. Updating now to fit
+ newState.setDescription("controlled shutdown");
+ setNodeState(newState);
+ // Sleep a bit in hopes of answer being written before shutting down socket
+ try{ Thread.sleep(100); } catch (InterruptedException e) {}
+ }
+ if (supervisor == null) return;
+ register.shutdown();
+ acceptor.shutdown().join();
+ supervisor.transport().shutdown().join();
+ supervisor = null;
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": Done breaking connection.");
+ }
+
+ public String toString() {
+ return type + "." + index;
+ }
+
+ public boolean isDistributor() { return type.equals(NodeType.DISTRIBUTOR); }
+ public NodeType getType() { return type; }
+
+ public Node getNode() {
+ return new Node(type, index);
+ }
+
+ public int getStateCommunicationVersion() { return stateCommunicationVersion; }
+
+ public void waitForSystemStateVersion(int version, long timeout) {
+ try {
+ long startTime = System.currentTimeMillis();
+ while (getLatestSystemStateVersion().orElse(-1) < version) {
+ if ( (System.currentTimeMillis() - startTime) > timeout)
+ throw new RuntimeException("Timed out waiting for state version " + version + " in " + this);
+ Thread.sleep(10);
+ }
+ }
+ catch (InterruptedException e) {
+ }
+ }
+
+ /** Returns the latest system state version received, or empty if none are received yet. */
+ private Optional<Integer> getLatestSystemStateVersion() {
+ synchronized(timer) {
+ if (systemState.isEmpty()) return Optional.empty();
+ return Optional.of(systemState.get(0).getVersion());
+ }
+ }
+
+ public boolean hasPendingGetNodeStateRequest() {
+ synchronized (timer) {
+ return !waitingRequests.isEmpty();
+ }
+ }
+
+ public void waitForPendingGetNodeStateRequest(long timeout) {
+ long startTime = System.currentTimeMillis();
+ long endTime = startTime + timeout;
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " waiting for pending node state request.");
+ while (true) {
+ synchronized(timer) {
+ if (!waitingRequests.isEmpty()) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " has pending request, returning.");
+ return;
+ }
+ try{
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " waiting " + (endTime - startTime) + " ms for pending request.");
+ timer.wait(endTime - startTime);
+ } catch (InterruptedException e) {
+ }
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " woke up to recheck.");
+ }
+ startTime = System.currentTimeMillis();
+ if (startTime >= endTime) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " timeout passed. Don't have pending request.");
+ if (!waitingRequests.isEmpty()) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ". Non-empty set of waiting requests");
+ }
+ throw new IllegalStateException("Timeout. No pending get node state request pending after waiting " + timeout + " milliseconds.");
+ }
+ }
+ }
+
+ public void replyToPendingNodeStateRequests() {
+ for(Req req : waitingRequests) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " answering pending node state request.");
+ req.request.returnValues().add(new StringValue(nodeState.serialize()));
+ if (req.request.methodName().equals("getnodestate3")) {
+ req.request.returnValues().add(new StringValue("Dummy node host info"));
+ }
+ req.request.returnRequest();
+ ++setNodeStateReplies;
+ }
+ waitingRequests.clear();
+ }
+
+ public void setNodeState(NodeState state) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " got new state: " + state);
+ synchronized(timer) {
+ this.nodeState = state;
+ replyToPendingNodeStateRequests();
+ }
+ }
+
+ public void setNodeState(State state) {
+ setNodeState(new NodeState(type, state));
+ }
+
+ public NodeState getNodeState() {
+ synchronized(timer) {
+ return nodeState;
+ }
+ }
+
+ public List<ClusterState> getSystemStatesReceived() {
+ List<ClusterState> states = new ArrayList<>();
+ synchronized(timer) {
+ states.addAll(systemState);
+ }
+ return states;
+ }
+
+ public ClusterState getClusterState() {
+ synchronized(timer) {
+ return (systemState.isEmpty() ? null : systemState.get(0));
+ }
+ }
+
+ public String getSlobrokName() {
+ return "storage/cluster." + clusterName + "/" + type + "/" + index;
+ }
+
+ private void addMethods() {
+ Method m;
+
+ m = new Method("vespa.storage.connect", "s", "i", this, "rpc_storageConnect");
+ m.methodDesc("Binds connection to a storage API handle");
+ m.paramDesc(0, "somearg", "Argument looking like slobrok address of the ones we're asking for some reason");
+ m.returnDesc(0, "returnCode", "Returncode of request. Should be 0 = OK");
+ supervisor.addMethod(m);
+
+ m = new Method("getnodestate", "", "issi", this, "rpc_getNodeState");
+ m.methodDesc("Get nodeState of a node");
+ m.returnDesc(0, "returnCode", "Returncode of request. Should be 1 = OK");
+ m.returnDesc(1, "returnMessage", "Textual error message if returncode is not ok.");
+ m.returnDesc(2, "nodeState", "The node state of the given node");
+ m.returnDesc(3, "progress", "Progress in percent of node initialization");
+ supervisor.addMethod(m);
+
+ m = new Method("setsystemstate", "s", "is", this, "rpc_setSystemState");
+ m.methodDesc("Set system state of entire system");
+ m.paramDesc(0, "systemState", "new systemstate");
+ m.returnDesc(0, "returnCode", "Returncode of request. Should be 1 = OK");
+ m.returnDesc(1, "returnMessage", "Textual error message if returncode is not ok.");
+ supervisor.addMethod(m);
+
+ if (stateCommunicationVersion > 0) {
+ m = new Method("getnodestate2", "si", "s", this, "rpc_getNodeState2");
+ m.methodDesc("Get nodeState of a node, answer when state changes from given state.");
+ m.paramDesc(0, "nodeStateIn", "The node state of the given node");
+ m.paramDesc(1, "timeout", "Time timeout in milliseconds set by the state requester.");
+ m.returnDesc(0, "nodeStateOut", "The node state of the given node");
+ supervisor.addMethod(m);
+
+ m = new Method("setsystemstate2", "s", "", this, "rpc_setSystemState2");
+ m.methodDesc("Set system state of entire system");
+ m.paramDesc(0, "systemState", "new systemstate");
+ supervisor.addMethod(m);
+
+ if (stateCommunicationVersion > 1) {
+ m = new Method("getnodestate3", "sii", "ss", this, "rpc_getNodeState2");
+ m.methodDesc("Get nodeState of a node, answer when state changes from given state.");
+ m.paramDesc(0, "nodeStateIn", "The node state of the given node");
+ m.paramDesc(1, "timeout", "Time timeout in milliseconds set by the state requester.");
+ m.returnDesc(0, "nodeStateOut", "The node state of the given node");
+ m.returnDesc(1, "hostinfo", "Information on the host node is running on");
+ supervisor.addMethod(m);
+ }
+ }
+ }
+
+ public void rpc_storageConnect(Request req) {
+ synchronized(timer) {
+ log.log(LogLevel.SPAM, "Dummy node " + this + " got old type handle connect message.");
+ req.returnValues().add(new Int32Value(0));
+ negotiatedHandle = true;
+ }
+ }
+
+ public void rpc_getNodeState(Request req) {
+ synchronized(timer) {
+ if (!negotiatedHandle) {
+ req.setError(75000, "Connection not bound to a handle");
+ return;
+ }
+ String stateString = nodeState.serialize(-1, true);
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " got old type get node state request, answering: " + stateString);
+ req.returnValues().add(new Int32Value(1));
+ req.returnValues().add(new StringValue(""));
+ req.returnValues().add(new StringValue(stateString));
+ req.returnValues().add(new Int32Value(0));
+ }
+ }
+
+ public boolean sendGetNodeStateReply(int index) {
+ for (Iterator<Req> it = waitingRequests.iterator(); it.hasNext(); ) {
+ Req r = it.next();
+ if (r.request.parameters().size() > 2 && r.request.parameters().get(2).asInt32() == index) {
+ log.log(LogLevel.DEBUG, "Dummy node " + DummyVdsNode.this.toString() + ": Responding to node state reply from controller " + index + " as we received new one");
+ r.request.returnValues().add(new StringValue(nodeState.serialize()));
+ r.request.returnValues().add(new StringValue("No host info from dummy implementation"));
+ r.request.returnRequest();
+ it.remove();
+ ++outdatedStateReplies;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public void rpc_getNodeState2(Request req) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ": Got " + req.methodName() + " request");
+ try{
+ String oldState = req.parameters().get(0).asString();
+ int timeout = req.parameters().get(1).asInt32();
+ int index = -1;
+ if (req.parameters().size() > 2) {
+ index = req.parameters().get(2).asInt32();
+ }
+ synchronized(timer) {
+ boolean sentReply = sendGetNodeStateReply(index);
+ NodeState givenState = (oldState.equals("unknown") ? null : NodeState.deserialize(type, oldState));
+ if (givenState != null && (givenState.equals(nodeState) || sentReply)) {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ": Has same state as reported " + givenState + ". Queing request. Timeout is " + timeout + " ms. "
+ + "Will be answered at time " + (timer.getCurrentTimeInMillis() + timeout * 800l / 1000));
+ req.detach();
+ waitingRequests.add(new Req(req, timer.getCurrentTimeInMillis() + timeout * 800l / 1000));
+ log.log(LogLevel.DEBUG, "Dummy node " + this + " has now " + waitingRequests.size() + " entries and is " + (waitingRequests.isEmpty() ? "empty" : "not empty"));
+ timer.notifyAll();
+ } else {
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ": Request had " + (givenState == null ? "no state" : "different state(" + givenState +")") + ". Answering with " + nodeState);
+ req.returnValues().add(new StringValue(nodeState.serialize()));
+ if (req.methodName().equals("getnodestate3")) {
+ req.returnValues().add(new StringValue("Dummy node host info"));
+ }
+ ++immediateStateReplies;
+ }
+ }
+ } catch (Exception e) {
+ log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occured when answering " + req.methodName() + " request: " + e.getMessage());
+ e.printStackTrace(System.err);
+ req.setError(ErrorCode.METHOD_FAILED, e.getMessage());
+ }
+ }
+
+ public long getStartTimestamp(Node n) {
+ Long ts = highestStartTimestamps.get(n);
+ return (ts == null ? 0 : ts);
+ }
+
+ private void updateStartTimestamps(ClusterState state) {
+ for(int i=0; i<2; ++i) {
+ NodeType nodeType = (i == 0 ? NodeType.DISTRIBUTOR : NodeType.STORAGE);
+ for (int j=0, n=state.getNodeCount(nodeType); j<n; ++j) {
+ Node node = new Node(nodeType, j);
+ NodeState ns = state.getNodeState(node);
+ if (ns.getStartTimestamp() != 0) {
+ Long oldValue = highestStartTimestamps.get(node);
+ if (oldValue != null && oldValue > ns.getStartTimestamp()) {
+ throw new Error("Somehow start timestamp of node " + node + " has gone down");
+ }
+ highestStartTimestamps.put(node, ns.getStartTimestamp());
+ }
+ }
+ }
+ }
+
+ public void failSetSystemState(boolean failSystemStateRequests) {
+ synchronized (timer) {
+ this.failSetSystemStateRequests = failSystemStateRequests;
+ }
+ }
+
+ private boolean shouldFailSetSystemStateRequests() {
+ synchronized (timer) {
+ return failSetSystemStateRequests;
+ }
+ }
+
+ public void rpc_setSystemState(Request req) {
+ try{
+ if (shouldFailSetSystemStateRequests()) {
+ req.setError(ErrorCode.GENERAL_ERROR, "Dummy node configured to fail setSystemState() calls");
+ return;
+ }
+ if (!negotiatedHandle) {
+ req.setError(75000, "Connection not bound to a handle");
+ return;
+ }
+ ClusterState newState = new ClusterState(req.parameters().get(0).asString());
+ synchronized(timer) {
+ updateStartTimestamps(newState);
+ systemState.add(0, newState);
+ timer.notifyAll();
+ }
+ req.returnValues().add(new Int32Value(1));
+ req.returnValues().add(new StringValue("OK"));
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ": Got new system state (through old setsystemstate call) " + newState);
+ } catch (Exception e) {
+ log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occured when answering setsystemstate request: " + e.getMessage());
+ e.printStackTrace(System.err);
+ req.returnValues().add(new Int32Value(ErrorCode.METHOD_FAILED));
+ req.returnValues().add(new StringValue(e.getMessage()));
+ }
+ }
+
+ public void rpc_setSystemState2(Request req) {
+ try{
+ if (shouldFailSetSystemStateRequests()) {
+ req.setError(ErrorCode.GENERAL_ERROR, "Dummy node configured to fail setSystemState2() calls");
+ return;
+ }
+ ClusterState newState = new ClusterState(req.parameters().get(0).asString());
+ synchronized(timer) {
+ updateStartTimestamps(newState);
+ systemState.add(0, newState);
+ timer.notifyAll();
+ }
+ log.log(LogLevel.DEBUG, "Dummy node " + this + ": Got new system state " + newState);
+ } catch (Exception e) {
+ log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occured when answering setsystemstate request: " + e.getMessage());
+ e.printStackTrace(System.err);
+ req.setError(ErrorCode.METHOD_FAILED, e.getMessage());
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java
new file mode 100644
index 00000000000..5436e1f62c7
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+/**
+ *
+ */
+public class DummyVdsNodeOptions {
+ public int stateCommunicationVersion = 2; // 0 - 4.1, 1 - 4.2-5.0.10, 2 - 5.0.11+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventLogTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventLogTest.java
new file mode 100644
index 00000000000..65950136675
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventLogTest.java
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.utils.util.MetricReporter;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.runners.MockitoJUnitRunner;
+
+import java.util.logging.Level;
+
+import static org.junit.Assert.assertNotEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class EventLogTest {
+ private MetricUpdater metricUpdater = mock(MetricUpdater.class);
+ private NodeEvent nodeEvent = mock(NodeEvent.class);
+
+ private EventLog eventLog;
+
+ private void initialize(MetricUpdater updater) {
+ eventLog = new EventLog(new FakeTimer(), updater);
+
+ // Avoid NullPointerException...
+ NodeInfo nodeInfo = mock(NodeInfo.class);
+ when(nodeEvent.getNode()).thenReturn(nodeInfo);
+ Node node = mock(Node.class);
+ when(nodeInfo.getNode()).thenReturn(node);
+ }
+
+ @Test
+ public void testMetric() {
+ initialize(metricUpdater);
+
+ eventLog.addNodeOnlyEvent(nodeEvent, Level.INFO);
+
+ verify(metricUpdater).recordNewNodeEvent();
+ verifyNoMoreInteractions(metricUpdater);
+ }
+
+ @Test
+ public void testNullMetricReporter() {
+ initialize(null);
+
+ eventLog.addNodeOnlyEvent(nodeEvent, Level.INFO);
+
+ verifyNoMoreInteractions(metricUpdater);
+ }
+
+ @Test
+ public void testNoEventsDoNotThrowException() {
+ initialize(metricUpdater);
+ StringBuilder builder = new StringBuilder();
+ Node nonExistantNode = new Node(NodeType.DISTRIBUTOR, 0);
+ eventLog.writeHtmlState(builder, nonExistantNode);
+ assertNotEquals("", builder.toString());
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeTimer.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeTimer.java
new file mode 100644
index 00000000000..26c14d3f82a
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeTimer.java
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.clustercontroller.core.testutils.LogFormatter;
+
+import java.util.logging.Logger;
+
+/**
+ * FakeTimer
+ *
+ * Used to fake timing for unit test purposes.
+ */
+public class FakeTimer implements Timer {
+ private static Logger log = Logger.getLogger(FakeTimer.class.getName());
+ // Don't start at zero. Clock users may initialize a 'last run' entry with 0, and we want first time to always look like a timeout
+ long currentTime = (long)30 * 365 * 24 * 60 * 60 * 1000;
+
+ public synchronized long getCurrentTimeInMillis() {
+ return currentTime;
+ }
+
+ public synchronized void advanceTime(long time) {
+ long currentTime = getCurrentTimeInMillis();
+ this.currentTime += time;
+ log.log(LogLevel.DEBUG, "Time advanced by " + time + " ms. Time increased from " + currentTime + " to " + (currentTime + time));
+ notifyAll();
+ }
+
+ static {
+ LogFormatter.initializeLogging();
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
new file mode 100644
index 00000000000..86248d2e1e3
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
@@ -0,0 +1,555 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.slobrok.api.BackOffPolicy;
+import com.yahoo.jrt.slobrok.server.Slobrok;
+import com.yahoo.log.LogLevel;
+import com.yahoo.log.LogSetup;
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator;
+import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer;
+import com.yahoo.vespa.clustercontroller.core.rpc.SlobrokClient;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServerInterface;
+import com.yahoo.vespa.clustercontroller.core.testutils.WaitCondition;
+import com.yahoo.vespa.clustercontroller.core.testutils.WaitTask;
+import com.yahoo.vespa.clustercontroller.core.testutils.Waiter;
+import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter;
+import org.junit.After;
+import org.junit.Rule;
+import org.junit.rules.TestRule;
+import org.junit.rules.TestWatcher;
+import org.junit.runner.Description;
+
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.*;
+import java.util.logging.Logger;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * @author humbe
+ */
+public abstract class FleetControllerTest implements Waiter {
+
+ private static Logger log = Logger.getLogger(FleetControllerTest.class.getName());
+ protected static final int DEFAULT_NODE_COUNT = 10;
+
+ protected FakeTimer timer = new FakeTimer();
+ protected boolean usingFakeTimer = false;
+ protected Slobrok slobrok;
+ protected FleetControllerOptions options;
+ protected ZooKeeperTestServer zooKeeperServer;
+ protected FleetController fleetController;
+ protected List<DummyVdsNode> nodes = new ArrayList<>();
+ protected String testName;
+
+ public final static int timeoutS;
+ public final static int timeoutMS;
+ private final Waiter waiter = new Waiter.Impl(new DataRetriever() {
+ @Override
+ public Object getMonitor() { return timer; }
+ @Override
+ public FleetController getFleetController() { return fleetController; }
+ @Override
+ public List<DummyVdsNode> getDummyNodes() { return nodes; }
+ @Override
+ public int getTimeoutMS() { return timeoutMS; }
+ });
+
+ static {
+ LogSetup.initVespaLogging("fleetcontroller");
+ timeoutS = 120;
+ timeoutMS = timeoutS * 1000;
+ }
+
+ class BackOff implements BackOffPolicy {
+ private int counter = 0;
+ public void reset() { counter = 0; }
+ public double get() { ++counter; return 0.01; }
+ public boolean shouldWarn(double v) { return ((counter % 1000) == 10); }
+ }
+
+ protected class CleanupZookeeperLogsOnSuccess extends TestWatcher {
+ @Override
+ protected void failed(Throwable e, Description description) {
+ System.err.println("TEST FAILED - NOT cleaning up zookeeper directory");
+ shutdownZooKeeper(false);
+ }
+
+ @Override
+ protected void succeeded(Description description) {
+ System.err.println("TEST SUCCEEDED - cleaning up zookeeper directory");
+ shutdownZooKeeper(true);
+ }
+
+ private void shutdownZooKeeper(boolean cleanupZooKeeperDir) {
+ if (zooKeeperServer != null) {
+ zooKeeperServer.shutdown(cleanupZooKeeperDir);
+ zooKeeperServer = null;
+ }
+ }
+ }
+
+ @Rule
+ public TestRule cleanupZookeeperLogsOnSuccess = new CleanupZookeeperLogsOnSuccess();
+
+ protected void startingTest(String name) {
+ System.err.println("STARTING TEST: " + name);
+ testName = name;
+ }
+
+ protected void setUpSystem(boolean useFakeTimer, FleetControllerOptions options) throws Exception {
+ log.log(LogLevel.DEBUG, "Setting up system");
+ slobrok = new Slobrok();
+ this.options = options;
+ if (options.zooKeeperServerAddress != null) {
+ zooKeeperServer = new ZooKeeperTestServer();
+ this.options.zooKeeperServerAddress = zooKeeperServer.getAddress();
+ log.log(LogLevel.DEBUG, "Set up new zookeeper server at " + this.options.zooKeeperServerAddress);
+ }
+ this.options.slobrokConnectionSpecs = new String[1];
+ this.options.slobrokConnectionSpecs[0] = "tcp/localhost:" + slobrok.port();
+ this.usingFakeTimer = useFakeTimer;
+ }
+
+ protected FleetController createFleetController(boolean useFakeTimer, FleetControllerOptions options, boolean startThread, StatusPageServerInterface status) throws Exception {
+ Timer timer = useFakeTimer ? this.timer : new RealTimer();
+ MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex);
+ EventLog log = new EventLog(timer, metricUpdater);
+ ContentCluster cluster = new ContentCluster(
+ options.clusterName,
+ options.nodes,
+ options.storageDistribution,
+ options.minStorageNodesUp,
+ options.minRatioOfStorageNodesUp);
+ NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, log);
+ Communicator communicator = new RPCCommunicator(
+ timer,
+ options.fleetControllerIndex,
+ options.nodeStateRequestTimeoutMS,
+ options.nodeStateRequestTimeoutEarliestPercentage,
+ options.nodeStateRequestTimeoutLatestPercentage,
+ options.nodeStateRequestRoundTripTimeMaxSeconds);
+ SlobrokClient lookUp = new SlobrokClient(timer);
+ lookUp.setSlobrokConnectionSpecs(new String[0]);
+ if (status == null) {
+ status = new StatusPageServer(timer, timer, options.httpPort);
+ }
+ RpcServer rpcServer = new RpcServer(timer, timer, options.clusterName, options.fleetControllerIndex, options.slobrokBackOffPolicy);
+ DatabaseHandler database = new DatabaseHandler(timer, options.zooKeeperServerAddress, options.fleetControllerIndex, timer);
+ SystemStateGenerator stateGenerator = new SystemStateGenerator(timer, log, metricUpdater);
+ SystemStateBroadcaster stateBroadcaster = new SystemStateBroadcaster(timer, timer);
+ MasterElectionHandler masterElectionHandler = new MasterElectionHandler(options.fleetControllerIndex, options.fleetControllerCount, timer, timer);
+ FleetController controller = new FleetController(timer, log, cluster, stateGatherer, communicator, status, rpcServer, lookUp, database, stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options);
+ if (startThread) {
+ controller.start();
+ }
+ return controller;
+ }
+
+ protected void setUpFleetController(boolean useFakeTimer, FleetControllerOptions options) throws Exception {
+ setUpFleetController(useFakeTimer, options, true);
+ }
+
+ protected void setUpFleetController(boolean useFakeTimer, FleetControllerOptions options, boolean startThread) throws Exception {
+ setUpFleetController(useFakeTimer, options, startThread, null);
+ }
+ protected void setUpFleetController(boolean useFakeTimer, FleetControllerOptions options, boolean startThread, StatusPageServerInterface status) throws Exception {
+ if (slobrok == null) setUpSystem(useFakeTimer, options);
+ if (fleetController == null) {
+ fleetController = createFleetController(useFakeTimer, options, startThread, status);
+ } else {
+ throw new Exception("called setUpFleetcontroller but it was already setup");
+ }
+ }
+
+ protected void stopFleetController() throws Exception {
+ if (fleetController != null) {
+ fleetController.shutdown();
+ fleetController = null;
+ }
+ }
+ protected void startFleetController() throws Exception {
+ if (fleetController == null) {
+ fleetController = createFleetController(usingFakeTimer, options, true, null);
+ } else {
+ log.log(LogLevel.WARNING, "already started fleetcontroller, not starting another");
+ }
+ }
+
+ protected void setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options) throws Exception {
+ setUpVdsNodes(useFakeTimer, options, false);
+ }
+ protected void setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options, boolean startDisconnected) throws Exception {
+ setUpVdsNodes(useFakeTimer, options, startDisconnected, DEFAULT_NODE_COUNT);
+ }
+ protected void setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options, boolean startDisconnected, int nodeCount) throws Exception {
+ TreeSet<Integer> nodeIndexes = new TreeSet<>();
+ for (int i = 0; i < nodeCount; ++i)
+ nodeIndexes.add(this.nodes.size()/2 + i); // divide by 2 because there are 2 nodes (storage and distributor) per index
+ setUpVdsNodes(useFakeTimer, options, startDisconnected, nodeIndexes);
+ }
+ protected void setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options, boolean startDisconnected, Set<Integer> nodeIndexes) throws Exception {
+ String connectionSpecs[] = new String[1];
+ connectionSpecs[0] = "tcp/localhost:" + slobrok.port();
+ for (int nodeIndex : nodeIndexes) {
+ nodes.add(new DummyVdsNode(useFakeTimer ? timer : new RealTimer(), options, connectionSpecs, this.options.clusterName, true, nodeIndex));
+ if ( ! startDisconnected) nodes.get(nodes.size() - 1).connect();
+ nodes.add(new DummyVdsNode(useFakeTimer ? timer : new RealTimer(), options, connectionSpecs, this.options.clusterName, false, nodeIndex));
+ if ( ! startDisconnected) nodes.get(nodes.size() - 1).connect();
+ }
+ }
+ // TODO: Replace all usages of the above setUp methods with this one, and remove the nodes field
+
+ /**
+ * Creates dummy vds nodes for the list of configured nodes and returns them.
+ * As two dummy nodes are created for each configured node - one distributor and one storage node -
+ * the returned list is twice as large as configuredNodes.
+ */
+ protected List<DummyVdsNode> setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options, boolean startDisconnected, List<ConfiguredNode> configuredNodes) throws Exception {
+ String connectionSpecs[] = new String[1];
+ connectionSpecs[0] = "tcp/localhost:" + slobrok.port();
+ nodes = new ArrayList<>();
+ final boolean distributor = true;
+ for (ConfiguredNode configuredNode : configuredNodes) {
+ nodes.add(new DummyVdsNode(useFakeTimer ? timer : new RealTimer(), options, connectionSpecs, this.options.clusterName, distributor, configuredNode.index()));
+ if ( ! startDisconnected) nodes.get(nodes.size() - 1).connect();
+ nodes.add(new DummyVdsNode(useFakeTimer ? timer : new RealTimer(), options, connectionSpecs, this.options.clusterName, !distributor, configuredNode.index()));
+ if ( ! startDisconnected) nodes.get(nodes.size() - 1).connect();
+ }
+ return nodes;
+ }
+
+ public interface NodeModifier {
+ void modify(NodeInfo node);
+ }
+
+ NodeModifier makeDefaultTestNodeModifier() {
+ return new NodeModifier() {
+ @Override
+ public void modify(NodeInfo node) {
+ if (node.isDistributor()) {
+ if (node.getNodeIndex() == 13) {
+ node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes + 2);
+ }
+ return;
+ }
+ double latency = 75;
+ long count = 1000;
+ if (node.getNodeIndex() == 4) {
+ latency = 300;
+ count = 500;
+ } else if (node.getNodeIndex() == 7) {
+ latency = 120;
+ count = 800;
+ } else if (node.getNodeIndex() == 21) {
+ latency = 2000;
+ count = 600;
+ } else if (node.getNodeIndex() == 25) {
+ node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes + 1);
+ } else if (node.getNodeIndex() == 26) {
+ node.setPrematureCrashCount(fleetController.getOptions().maxPrematureCrashes);
+ }
+ String hostInfoString = generateHostInfo(latency, count);
+ node.setHostInfo(HostInfo.createHostInfo(hostInfoString));
+ }
+ };
+ }
+
+ NodeModifier makeStdDevTestNodeModifier() {
+ return new NodeModifier() {
+ double[] latencies = new double[] { 30, 300, 60, 270 };
+ int counter = 0;
+
+ @Override
+ public void modify(NodeInfo node) {
+ if (node.isDistributor()) {
+ return;
+ }
+ String hostInfo = generateHostInfo(latencies[counter++ % latencies.length], 1500);
+ node.setHostInfo(HostInfo.createHostInfo(hostInfo));
+ }
+ };
+ }
+
+ protected void setUpSlowDiskCluster(NodeModifier callback) throws Exception {
+ int nodeCount = 31;
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ // TODO: multiple groups!
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, nodeCount)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeCount);
+ waitForStableSystem(nodeCount);
+ // Set one node as not being up. It should not contribute to the overall
+ // latency or operation metrics, nor should its disks be included.
+ nodes.get(2*13).disconnectAsShutdown();
+ nodes.get(2*21+1).disconnectAsShutdown();
+ waiter.waitForState("version:\\d+ distributor:31 .13.s:d storage:31 .21.s:m");
+
+ for (NodeInfo node : fleetController.getCluster().getNodeInfo()) {
+ callback.modify(node);
+ }
+ }
+
+ protected void setUpSimpleCluster(int nodeCount) throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ // TODO: multiple groups!
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, nodeCount)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeCount);
+ waitForStableSystem(nodeCount);
+ waiter.waitForState("version:\\d+ distributor:" + nodeCount + " storage:" + nodeCount);
+
+ NodeModifier callback = makeDefaultTestNodeModifier();
+ for (NodeInfo node : fleetController.getCluster().getNodeInfo()) {
+ callback.modify(node);
+ }
+ }
+
+ protected void tearDownSystem() throws Exception {
+ if (testName != null) {
+ //log.log(LogLevel.INFO, "STOPPING TEST " + testName);
+ System.err.println("STOPPING TEST " + testName);
+ testName = null;
+ }
+ if (fleetController != null) {
+ fleetController.shutdown();
+ fleetController = null;
+ }
+ if (nodes != null) for (DummyVdsNode node : nodes) {
+ node.shutdown();
+ nodes = null;
+ }
+ if (slobrok != null) {
+ slobrok.stop();
+ slobrok = null;
+ }
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ tearDownSystem();
+ }
+
+ public ClusterState waitForStableSystem() throws Exception { return waiter.waitForStableSystem(); }
+ public ClusterState waitForStableSystem(int nodeCount) throws Exception { return waiter.waitForStableSystem(nodeCount); }
+ public ClusterState waitForState(String state) throws Exception { return waiter.waitForState(state); }
+ public ClusterState waitForState(String state, int timeoutMS) throws Exception { return waiter.waitForState(state, timeoutMS); }
+ public ClusterState waitForInitProgressPassed(Node n, double progress) { return waiter.waitForInitProgressPassed(n, progress); }
+ public ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount) { return waiter.waitForClusterStateIncludingNodesWithMinUsedBits(bitcount, nodecount); }
+
+ protected void waitForNodeStateReported(int nodeIndex, NodeState state, int ms) {
+ long timeoutAtTime = System.currentTimeMillis() + ms;
+ while (true) {
+ Node node = nodes.get(nodeIndex).getNode();
+ NodeState ns = fleetController.getReportedNodeState(node);
+ if ((ns == null && state == null) || (ns != null && state != null && ns.equals(state))) break;
+ if (System.currentTimeMillis() > timeoutAtTime) {
+ throw new IllegalStateException("Failed to find " + node + " in nodestate " + state + " before timeout of " + ms + " milliseconds.");
+ }
+ }
+ }
+
+ public void wait(WaitCondition c, WaitTask wt, int timeoutMS) {
+ waiter.wait(c, wt, timeoutMS);
+ }
+
+ public void waitForCompleteCycle() {
+ fleetController.waitForCompleteCycle(timeoutMS);
+ }
+
+ protected void verifyNodeEvents(Node n, String exp) {
+ verifyNodeEvents(n, exp, null);
+ }
+
+ private class ExpectLine {
+ Pattern regex;
+ int matchedCount = 0;
+ int minCount = 1;
+ int maxCount = 1;
+ boolean repeatable() { return (maxCount == 0 || maxCount > matchedCount); }
+ boolean optional() { return (matchedCount >= minCount); }
+
+ boolean matches(String event) {
+ if (event == null) return false;
+ boolean m = regex.matcher(event).matches();
+ if (m) ++matchedCount;
+ return m;
+ }
+
+ ExpectLine(String pattern) {
+ if (pattern.charAt(0) == '?') {
+ pattern = pattern.substring(1);
+ minCount = 0;
+ } else if (pattern.charAt(0) == '*') {
+ pattern = pattern.substring(1);
+ minCount = 0;
+ maxCount = 0;
+ } else if (pattern.charAt(0) == '+') {
+ pattern = pattern.substring(1);
+ maxCount = 0;
+ }
+ regex = Pattern.compile(pattern);
+ }
+
+ public String toString() {
+ return "{"+minCount+","+maxCount+"}("+matchedCount+") " + regex;
+ }
+ }
+
+ /**
+ * Verifies that node event list is equal to some expected value.
+ * The format of the expected values is as follows:
+ * <ul>
+ * <li>Each line in the exp string specifies a pattern to match one or more events.
+ * <li>A line starting with ? * or + means that the line can match 0 or 1, 0 or more or 1 or more respectively.
+ * <li>The rest of the line is a regular expression.
+ * </ul>
+ */
+ protected void verifyNodeEvents(Node n, String exp, String ignoreRegex) {
+ Pattern ignorePattern = (ignoreRegex == null ? null : Pattern.compile(ignoreRegex));
+ List<NodeEvent> events = fleetController.getNodeEvents(n);
+ String[] expectLines = exp.split("\n");
+ List<ExpectLine> expected = new ArrayList<ExpectLine>();
+ for (String line : expectLines) {
+ expected.add(new ExpectLine(line));
+ }
+
+ boolean mismatch = false;
+ StringBuilder eventLog = new StringBuilder();
+ StringBuilder errors = new StringBuilder();
+
+ int gotno = 0;
+ int expno = 0;
+
+ while (gotno < events.size() || expno < expected.size()) {
+ String eventLine = null;
+ if (gotno < events.size()) {
+ NodeEvent e = events.get(gotno);
+ eventLine = e.toString();
+ }
+
+ if (ignorePattern != null && ignorePattern.matcher(eventLine).matches()) {
+ ++gotno;
+ continue;
+ }
+
+ ExpectLine pattern = null;
+ if (expno < expected.size()) {
+ pattern = expected.get(expno);
+ }
+ eventLog.append(eventLine).append("\n");
+
+ if (pattern == null) {
+ errors.append("Exhausted expected list before matching event " + gotno
+ + ": '" + eventLine + "'.");
+ mismatch = true;
+ break;
+ }
+
+ if (pattern.matches(eventLine)) {
+ if (! pattern.repeatable()) {
+ ++expno;
+ }
+ ++gotno;
+ } else {
+ if (pattern.optional()) {
+ ++expno;
+ } else {
+ errors.append("Event " + gotno + ": '" + eventLine
+ + "' did not match regex " + expno + ": " + pattern);
+ mismatch = true;
+ break;
+ }
+ }
+ }
+ if (!mismatch && expno < expected.size()) {
+ errors.append("Too few entries in event log (only matched "
+ + expno + " of " + expected.size() + ")");
+ mismatch = true;
+ }
+ if (mismatch) {
+ StringBuilder eventsGotten = new StringBuilder();
+ for (Event e : events) {
+ String eventLine = e.toString();
+ if (ignorePattern != null && ignorePattern.matcher(eventLine).matches()) {
+ continue;
+ }
+ eventsGotten.append(eventLine).append("\n");
+ }
+ errors.append("\nExpected events matching:\n" + exp + "\n");
+ errors.append("but got the following events:\n" + eventsGotten.toString());
+ fail(errors.toString());
+ }
+ }
+
+ protected String generateHostInfo(double averagePutLatency, long operationCount) {
+ return ("{\n" +
+ " \"metrics\":\n" +
+ " {\n" +
+ " \"snapshot\":\n" +
+ " {\n" +
+ " \"from\":1335527020,\n" +
+ " \"to\":1335527320\n" +
+ " },\n" +
+ " \"values\":\n" +
+ " [\n" +
+ " {\n" +
+ " \"name\":\"vds.filestor.disk_0.allthreads.put.sum.latency\",\n" +
+ " \"values\":\n" +
+ " {\n" +
+ " \"average\":" + averagePutLatency + ",\n" +
+ " \"rate\":123.00000\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"vds.filestor.disk_0.allthreads.operations\",\n" +
+ " \"values\":\n" +
+ " {\n" +
+ " \"count\":" + operationCount + ",\n" +
+ " \"rate\":3.266666\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ "}\n");
+ }
+
+ protected String readFile(String filename) throws IOException {
+ FileInputStream stream = new FileInputStream(new File(filename));
+ ByteArrayOutputStream output = new ByteArrayOutputStream();
+ try {
+ byte [] buf = new byte[4096];
+ while (true) {
+ int read = stream.read(buf);
+ if (read<=0) {
+ break;
+ }
+ output.write(buf, 0, read);
+ }
+ output.close();
+ return output.toString();
+ } finally {
+ stream.close();
+ }
+ }
+
+ public static Set<ConfiguredNode> toNodes(Integer ... indexes) {
+ return Arrays.asList(indexes).stream()
+ .map(i -> new ConfiguredNode(i, false))
+ .collect(Collectors.toSet());
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/LeafGroupsTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/LeafGroupsTest.java
new file mode 100644
index 00000000000..ae1f10eb61f
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/LeafGroupsTest.java
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.Group;
+import org.junit.Test;
+
+import java.util.List;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+
+public class LeafGroupsTest {
+
+ @Test
+ public void rootGroupCountedAsLeafWhenNoChildren() {
+ Group g = new Group(0, "donkeykong");
+
+ List<Group> leaves = LeafGroups.enumerateFrom(g);
+ assertThat(leaves.size(), is(1));
+ assertThat(leaves.get(0).getName(), is("donkeykong"));
+ }
+
+ private Group.Distribution dummyDistribution() throws Exception {
+ return new Group.Distribution("*", 1);
+ }
+
+ @Test
+ public void singleLeafIsEnumerated() throws Exception {
+ Group g = new Group(0, "donkeykong", dummyDistribution());
+ Group child = new Group(1, "mario");
+ g.addSubGroup(child);
+
+ List<Group> leaves = LeafGroups.enumerateFrom(g);
+ assertThat(leaves.size(), is(1));
+ assertThat(leaves.get(0).getName(), is("mario"));
+ }
+
+ @Test
+ public void singleLeafIsEnumeratedInNestedCase() throws Exception {
+ Group g = new Group(0, "donkeykong", dummyDistribution());
+ Group child = new Group(1, "mario", dummyDistribution());
+ child.addSubGroup(new Group(2, "toad"));
+ g.addSubGroup(child);
+
+ List<Group> leaves = LeafGroups.enumerateFrom(g);
+ assertThat(leaves.size(), is(1));
+ assertThat(leaves.get(0).getName(), is("toad"));
+ }
+
+ @Test
+ public void multipleLeafGroupsAreEnumerated() throws Exception {
+ Group g = new Group(0, "donkeykong", dummyDistribution());
+ Group child = new Group(1, "mario", dummyDistribution());
+ child.addSubGroup(new Group(2, "toad"));
+ child.addSubGroup(new Group(3, "yoshi"));
+ g.addSubGroup(child);
+ g.addSubGroup(new Group(4, "luigi"));
+
+ List<Group> leaves = LeafGroups.enumerateFrom(g);
+ // Ensure that output order matches insertion order.
+ leaves.sort((a, b) -> Integer.compare(a.getIndex(), b.getIndex()));
+ assertThat(leaves.size(), is(3));
+ assertThat(leaves.get(0).getName(), is("toad"));
+ assertThat(leaves.get(1).getName(), is("yoshi"));
+ assertThat(leaves.get(2).getName(), is("luigi"));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
new file mode 100644
index 00000000000..ba2cd287a9a
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
@@ -0,0 +1,440 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.slobrok.server.Slobrok;
+import com.yahoo.log.LogLevel;
+
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.TimeoutException;
+import java.util.logging.Logger;
+
+import com.yahoo.vdslib.state.ClusterState;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+
+public class MasterElectionTest extends FleetControllerTest {
+
+ private static Logger log = Logger.getLogger(MasterElectionTest.class.getName());
+
+ protected Supervisor supervisor;
+ protected List<FleetController> fleetControllers = new ArrayList<>();
+
+ @Rule
+ public TestRule cleanupZookeeperLogsOnSuccess = new CleanupZookeeperLogsOnSuccess();
+
+ protected void setUpFleetController(int count, boolean useFakeTimer, FleetControllerOptions options) throws Exception {
+ if (zooKeeperServer == null) {
+ zooKeeperServer = new ZooKeeperTestServer();
+ }
+ slobrok = new Slobrok();
+ usingFakeTimer = useFakeTimer;
+ this.options = options;
+ this.options.zooKeeperSessionTimeout = 10 * timeoutMS;
+ this.options.zooKeeperServerAddress = zooKeeperServer.getAddress();
+ this.options.slobrokConnectionSpecs = new String[1];
+ this.options.slobrokConnectionSpecs[0] = "tcp/"+ InetAddress.getLocalHost().getHostName()+":" + slobrok.port();
+ this.options.fleetControllerCount = count;
+ for (int i=0; i<count; ++i) {
+ FleetControllerOptions nodeOptions = options.clone();
+ nodeOptions.fleetControllerIndex = i;
+ fleetControllers.add(createFleetController(usingFakeTimer, nodeOptions, true, null));
+ }
+ }
+
+ public FleetControllerOptions adjustConfig(FleetControllerOptions o,
+ int fleetControllerIndex, int fleetControllerCount) throws Exception
+ {
+ FleetControllerOptions options = o.clone();
+ options.zooKeeperSessionTimeout = 10 * timeoutMS;
+ options.zooKeeperServerAddress = zooKeeperServer.getAddress();
+ options.slobrokConnectionSpecs = new String[1];
+ options.slobrokConnectionSpecs[0] = "tcp/"+ InetAddress.getLocalHost().getHostName()+":" + slobrok.port();
+ options.fleetControllerIndex = fleetControllerIndex;
+ options.fleetControllerCount = fleetControllerCount;
+ return options;
+ }
+
+ protected void waitForZookeeperDisconnected() throws TimeoutException {
+ long maxTime = System.currentTimeMillis() + timeoutMS;
+ for(FleetController f : fleetControllers) {
+ while (true) {
+ if (!f.hasZookeeperConnection()) break;
+ timer.advanceTime(1000);
+ try{ Thread.sleep(1); } catch (InterruptedException e) {}
+ if (System.currentTimeMillis() > maxTime) throw new TimeoutException("Failed to notice zookeeper down within timeout of " + timeoutMS + " ms");
+ }
+ }
+ waitForCompleteCycles();
+ }
+
+ protected void waitForCompleteCycle(int findex) {
+ fleetControllers.get(findex).waitForCompleteCycle(timeoutMS);
+ }
+
+ protected void waitForCompleteCycles() {
+ for (int i = 0; i < fleetControllers.size(); ++i) {
+ waitForCompleteCycle(i);
+ }
+ }
+
+ protected void tearDownSystem() throws Exception {
+ for (FleetController fleetController : fleetControllers) {
+ if (fleetController != null) {
+ fleetController.shutdown();
+ }
+ }
+ if (slobrok != null) {
+ slobrok.stop();
+ }
+ super.tearDownSystem();
+ }
+
+ public void tearDown() throws Exception {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ }
+ super.tearDown();
+ }
+
+ /** Ignored for unknown reasons */
+ @Test
+ @Ignore
+ public void testMasterElection() throws Exception {
+ startingTest("MasterElectionTest::testMasterElection");
+ log.log(LogLevel.INFO, "STARTING TEST: MasterElectionTest::testMasterElection()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 1;
+ setUpFleetController(5, true, options);
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 0");
+ fleetControllers.get(0).shutdown();
+ waitForMaster(1);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 1");
+ fleetControllers.get(1).shutdown();
+ waitForMaster(2);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 2");
+ fleetControllers.get(2).shutdown();
+
+ // Too few for there to be a master at this point
+ for (int i=0; i<fleetControllers.size(); ++i) {
+ if (fleetControllers.get(i).isRunning()) waitForCompleteCycle(i);
+ assertEquals("Fleet controller " + i, false, fleetControllers.get(i).isMaster());
+ }
+
+ log.log(LogLevel.INFO, "STARTING FLEET CONTROLLER 2");
+ fleetControllers.set(2, createFleetController(usingFakeTimer, fleetControllers.get(2).getOptions(), true, null));
+ waitForMaster(2);
+ log.log(LogLevel.INFO, "STARTING FLEET CONTROLLER 0");
+ fleetControllers.set(0, createFleetController(usingFakeTimer, fleetControllers.get(0).getOptions(), true, null));
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "STARTING FLEET CONTROLLER 1");
+ fleetControllers.set(1, createFleetController(usingFakeTimer, fleetControllers.get(1).getOptions(), true, null));
+ waitForMaster(0);
+
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 4");
+ fleetControllers.get(4).shutdown();
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 3");
+ fleetControllers.get(3).shutdown();
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 2");
+ fleetControllers.get(2).shutdown();
+
+ // Too few for there to be a master at this point
+ for (int i=0; i<fleetControllers.size(); ++i) {
+ if (fleetControllers.get(i).isRunning()) waitForCompleteCycle(i);
+ assertEquals(false, fleetControllers.get(i).isMaster());
+ }
+ }
+
+ protected void waitForMaster(int master) {
+ log.log(LogLevel.INFO, "Entering waitForMaster");
+ boolean isOnlyMaster = false;
+ for (int i=0; i < FleetControllerTest.timeoutMS; i+=100) {
+ if (!fleetControllers.get(master).isMaster()) {
+ log.log(LogLevel.INFO, "Node " + master + " is not master yet, sleeping more");
+ timer.advanceTime(100);
+ waitForCompleteCycle(master);
+ } else {
+ log.log(LogLevel.INFO, "Node " + master + " is master. Checking that noone else is master");
+ isOnlyMaster = true;
+ for (int j=0; j<fleetControllers.size(); ++j) {
+ if (j != master && fleetControllers.get(j).isMaster()) {
+ isOnlyMaster = false;
+ log.log(LogLevel.INFO, "Node " + j + " also says it is master.");
+ }
+ }
+
+ if (isOnlyMaster) {
+ break;
+ }
+ }
+ // Have to wait to get zookeeper communication chance to happen.
+ try{ Thread.sleep(100); } catch (InterruptedException e) {}
+ }
+
+ if (!isOnlyMaster) {
+ log.log(LogLevel.INFO, "Node " + master + " is not the only master");
+ throw new IllegalStateException("Node " + master + " never got to be the only master.");
+ }
+
+ log.log(LogLevel.INFO, "Leaving waitForMaster");
+ }
+
+ private static class VersionMonotonicityChecker {
+ private ClusterState lastState;
+
+ private VersionMonotonicityChecker(ClusterState initialState) {
+ this.lastState = initialState;
+ }
+
+ public static VersionMonotonicityChecker bootstrappedWith(ClusterState initialState) {
+ return new VersionMonotonicityChecker(initialState);
+ }
+
+ public void updateAndVerify(ClusterState currentState) {
+ final ClusterState last = lastState;
+ lastState = currentState;
+ if (currentState.getVersion() <= last.getVersion()) {
+ throw new IllegalStateException(
+ String.format("Cluster state version monotonicity invariant broken! " +
+ "Old state was '%s', new state is '%s'", last, currentState));
+ }
+ }
+ }
+
+ @Test
+ public void testClusterStateVersionIncreasesAcrossMasterElections() throws Exception {
+ startingTest("MasterElectionTest::testClusterStateVersionIncreasesAcrossMasterElections");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 1;
+ setUpFleetController(5, true, options);
+ // Currently need to have content nodes present for the cluster controller to even bother
+ // attempting to persisting its cluster state version to ZK.
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ fleetController = fleetControllers.get(0); // Required to prevent waitForStableSystem from NPE'ing
+ waitForStableSystem();
+ waitForMaster(0);
+ Arrays.asList(0, 1, 2, 3, 4).stream().forEach(this::waitForCompleteCycle);
+ VersionMonotonicityChecker checker = VersionMonotonicityChecker.bootstrappedWith(fleetControllers.get(0).getClusterState());
+ fleetControllers.get(0).shutdown();
+ waitForMaster(1);
+ Arrays.asList(1, 2, 3, 4).stream().forEach(this::waitForCompleteCycle);
+ checker.updateAndVerify(fleetControllers.get(1).getClusterState());
+ fleetControllers.get(1).shutdown();
+ waitForMaster(2); // Still a quorum available
+ Arrays.asList(2, 3, 4).stream().forEach(this::waitForCompleteCycle);
+ checker.updateAndVerify(fleetControllers.get(2).getClusterState());
+ }
+
+ @Test
+ public void testVotingCorrectnessInFaceOfZKDisconnect() throws Exception {
+ startingTest("MasterElectionTest::testVotingCorrectnessInFaceOfZKDisconnect");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ // "Magic" port value is in range allocated to module for testing.
+ zooKeeperServer = ZooKeeperTestServer.createWithFixedPort(18342);
+ options.zooKeeperSessionTimeout = 100;
+ options.masterZooKeeperCooldownPeriod = 100;
+ setUpFleetController(2, true, options);
+ waitForMaster(0);
+
+ zooKeeperServer.shutdown(true);
+ waitForCompleteCycles();
+ timer.advanceTime(options.zooKeeperSessionTimeout);
+ waitForZookeeperDisconnected();
+
+ zooKeeperServer = ZooKeeperTestServer.createWithFixedPort(18342);
+ timer.advanceTime(10 * 1000); // Wait long enough for fleetcontroller wanting to retry zookeeper connection
+
+ log.log(LogLevel.INFO, "WAITING FOR 0 TO BE MASTER");
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN");
+ }
+
+ @Test
+ public void testZooKeeperUnavailable() throws Exception {
+ startingTest("MasterElectionTest::testZooKeeperUnavailable");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.zooKeeperSessionTimeout = 100;
+ options.masterZooKeeperCooldownPeriod = 100;
+ options.zooKeeperServerAddress = "localhost";
+ setUpFleetController(5, true, options);
+ waitForMaster(0);
+
+ log.log(LogLevel.INFO, "STOPPING ZOOKEEPER SERVER AT " + zooKeeperServer.getAddress());
+ zooKeeperServer.shutdown(true);
+ waitForCompleteCycles();
+ timer.advanceTime(options.zooKeeperSessionTimeout);
+ waitForZookeeperDisconnected();
+ // Noone can be master if server is unavailable
+ log.log(LogLevel.INFO, "Checking master status");
+ for (int i=0; i<fleetControllers.size(); ++i) {
+ assertEquals("Index " + i, false, fleetControllers.get(i).isMaster());
+ }
+
+ zooKeeperServer = new ZooKeeperTestServer();
+ log.log(LogLevel.INFO, "STARTED ZOOKEEPER SERVER AT " + zooKeeperServer.getAddress());
+ for (FleetController fc : fleetControllers) {
+ FleetControllerOptions myoptions = fc.getOptions();
+ myoptions.zooKeeperServerAddress = zooKeeperServer.getAddress();
+ fc.updateOptions(myoptions, 0);
+ log.log(LogLevel.INFO, "Should now have sent out new zookeeper server address " + myoptions.zooKeeperServerAddress + " to fleetcontroller " + myoptions.fleetControllerIndex);
+ }
+ timer.advanceTime(10 * 1000); // Wait long enough for fleetcontroller wanting to retry zookeeper connection
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN");
+ }
+
+ /** Ignored for unknown reasons */
+ @Test
+ @Ignore
+ public void testMasterZooKeeperCooldown() throws Exception {
+ startingTest("MasterElectionTest::testMasterZooKeeperCooldown");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 3600 * 1000; // An hour
+ setUpFleetController(3, true, options);
+ waitForMaster(0);
+ timer.advanceTime(24 * 3600 * 1000); // A day
+ waitForCompleteCycle(1);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 0");
+ fleetControllers.get(0).shutdown();
+ waitForCompleteCycle(1);
+ // 5 minutes is not long enough period to wait before letting this node be master.
+ timer.advanceTime(300 * 1000); // 5 minutes
+ waitForCompleteCycle(1);
+ assertFalse(fleetControllers.get(1).isMaster());
+ // But after an hour it should become one.
+ timer.advanceTime(4000 * 1000); // more than 60 minutes
+ waitForMaster(1);
+ }
+
+ private void waitForMasterReason(String reason, Integer master, List<Target> connections, int nodes[]) {
+ long endTime = System.currentTimeMillis() + timeoutMS;
+ while (System.currentTimeMillis() < endTime) {
+ boolean allOk = true;
+ for (int i=0; i<nodes.length; ++i) {
+ Request req = new Request("getMaster");
+ connections.get(nodes[i]).invokeSync(req, FleetControllerTest.timeoutS);
+ if (req.isError()) { allOk = false; break; }
+ if (master != null && master != req.returnValues().get(0).asInt32()) { allOk = false; break; }
+ if (reason != null && !reason.equals(req.returnValues().get(1).asString())) { allOk = false; break; }
+ }
+ if (allOk) return;
+ try{ Thread.sleep(100); } catch (InterruptedException e) {}
+ }
+ throw new IllegalStateException("Did not get master reason '" + reason
+ + "' within timeout of " + timeoutMS + " ms");
+ }
+
+ /** Ignored for unknown reasons */
+ @Test
+ @Ignore
+ public void testGetMaster() throws Exception {
+ startingTest("MasterElectionTest::testGetMaster");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 3600 * 1000; // An hour
+ setUpFleetController(3, true, options);
+ waitForMaster(0);
+
+ supervisor = new Supervisor(new Transport());
+ List<Target> connections = new ArrayList<Target>();
+ for (FleetController fleetController : fleetControllers) {
+ int rpcPort = fleetController.getRpcPort();
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+ connections.add(connection);
+ }
+
+ timer.advanceTime(24 * 3600 * 1000); // A day
+ waitForCompleteCycles();
+
+ Request req = new Request("getMaster");
+
+ for (int nodeIndex = 0; nodeIndex<3; ++nodeIndex) {
+ for (int retry = 0; retry < FleetControllerTest.timeoutS * 10; ++retry) {
+ req = new Request("getMaster");
+ connections.get(nodeIndex).invokeSync(req, FleetControllerTest.timeoutS);
+ assertEquals(req.errorMessage(), false, req.isError());
+ if (req.returnValues().get(0).asInt32() == 0 &&
+ req.returnValues().get(1).asString().equals("All 3 nodes agree that 0 is current master.")) {
+ break;
+ }
+ }
+ assertEquals(req.toString(), 0, req.returnValues().get(0).asInt32());
+ assertEquals(req.toString(), "All 3 nodes agree that 0 is current master.", req.returnValues().get(1).asString());
+ }
+
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 0");
+ fleetControllers.get(0).shutdown();
+ // Wait until fc 1 & 2 votes for node 1
+ waitForCompleteCycle(1);
+ waitForCompleteCycle(2);
+ // 5 minutes is not long enough period to wait before letting this node be master.
+ timer.advanceTime(300 * 1000); // 5 minutes
+
+ int remainingNodes[] = { 1, 2 };
+ waitForMasterReason(
+ "2 of 3 nodes agree 1 should be master, but old master cooldown period of 3600000 ms has not passed yet. To ensure it has got time to realize it is no longer master before we elect a new one, currently there is no master.",
+ -1, connections, remainingNodes);
+ // Verify that fc 1 is not master, and the correct reasons for why not
+ assertFalse(fleetControllers.get(1).isMaster());
+
+ // But after an hour it should become one.
+ timer.advanceTime(3600 * 1000); // 60 minutes
+ waitForMaster(1);
+
+ req = new Request("getMaster");
+ connections.get(0).invokeSync(req, FleetControllerTest.timeoutS);
+ assertEquals(req.toString(), 104, req.errorCode());
+ assertEquals(req.toString(), "Connection error", req.errorMessage());
+
+ for (int i=0; i<FleetControllerTest.timeoutS * 10; ++i) {
+ req = new Request("getMaster");
+ connections.get(1).invokeSync(req, FleetControllerTest.timeoutS);
+ assertEquals(req.errorMessage(), false, req.isError());
+ if (req.returnValues().get(0).asInt32() != -1) break;
+ // We may have bad timing causing node not to have realized it is master yet
+ }
+ assertEquals(req.toString(), 1, req.returnValues().get(0).asInt32());
+ assertEquals(req.toString(), "2 of 3 nodes agree 1 is master.", req.returnValues().get(1).asString());
+
+ for (int i=0; i<FleetControllerTest.timeoutS * 10; ++i) {
+ req = new Request("getMaster");
+ connections.get(2).invokeSync(req, FleetControllerTest.timeoutS);
+ assertEquals(req.errorMessage(), false, req.isError());
+ if (req.returnValues().get(0).asInt32() != -1) break;
+ }
+ assertEquals(req.toString(), 1, req.returnValues().get(0).asInt32());
+ assertEquals(req.toString(), "2 of 3 nodes agree 1 is master.", req.returnValues().get(1).asString());
+ }
+
+ @Test
+ public void testReconfigure() throws Exception {
+ startingTest("MasterElectionTest::testReconfigure");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 1;
+ setUpFleetController(3, true, options);
+ waitForMaster(0);
+
+ FleetControllerOptions newOptions = options.clone();
+ for (int i=0; i<fleetControllers.size(); ++i) {
+ FleetControllerOptions nodeOptions = adjustConfig(newOptions, i, fleetControllers.size());
+ fleetControllers.get(i).updateOptions(nodeOptions, 2);
+ }
+ waitForMaster(0);
+ log.log(LogLevel.INFO, "SHUTTING DOWN FLEET CONTROLLER 0");
+ fleetControllers.get(0).shutdown();
+ waitForMaster(1);
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java
new file mode 100644
index 00000000000..2191819858c
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+public class NoZooKeeperTest extends FleetControllerTest {
+
+ @Test
+ public void testWantedStatesInZooKeeper() throws Exception {
+ startingTest("NoZooKeeperTest::testWantedStatesInZooKeeper");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.zooKeeperServerAddress = null;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ assertEquals(true, nodes.get(0).isDistributor());
+ nodes.get(0).disconnect();
+ waitForState("version:\\d+ distributor:10 .0.s:d storage:10");
+
+ nodes.get(0).connect();
+ waitForState("version:\\d+ distributor:10 storage:10");
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
new file mode 100644
index 00000000000..10305de116a
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
@@ -0,0 +1,117 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vespa.clustercontroller.core.testutils.Waiter;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertTrue;
+
+public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest {
+
+ private final Set<Integer> nodeIndices = asIntSet(0, 1, 2, 3);
+ private final int foreignNode = 6;
+
+ private void waitForStateExcludingNodeSubset(String expectedState, Set<Integer> excludedNodes) throws Exception {
+ // Due to the implementation details of the test base, this.waitForState() will always
+ // wait until all nodes added in the test have received the latest cluster state. Since we
+ // want to entirely ignore node #6, it won't get a cluster state at all and the test will
+ // fail unless otherwise handled. We thus use a custom waiter which filters out nodes with
+ // the sneaky index (storage and distributors with same index are treated as different nodes
+ // in this context).
+ Waiter subsetWaiter = new Waiter.Impl(new DataRetriever() {
+ @Override
+ public Object getMonitor() { return timer; }
+ @Override
+ public FleetController getFleetController() { return fleetController; }
+ @Override
+ public List<DummyVdsNode> getDummyNodes() {
+ return nodes.stream()
+ .filter(n -> !excludedNodes.contains(n.getNode().getIndex()))
+ .collect(Collectors.toList());
+ }
+ @Override
+ public int getTimeoutMS() { return timeoutMS; }
+ });
+ subsetWaiter.waitForState(expectedState);
+ }
+
+ private static Set<Integer> asIntSet(Integer... idx) {
+ return Arrays.asList(idx).stream().collect(Collectors.toSet());
+ }
+
+ private static Set<ConfiguredNode> asConfiguredNodes(Set<Integer> indices) {
+ return indices.stream().map(idx -> new ConfiguredNode(idx, false)).collect(Collectors.toSet());
+ }
+
+ private void setUpClusterWithForeignNode(Set<Integer> validIndices, final int foreignNodeIndex) throws Exception {
+ final Set<ConfiguredNode> configuredNodes = asConfiguredNodes(validIndices);
+ FleetControllerOptions options = optionsForConfiguredNodes(configuredNodes);
+ setUpFleetController(true, options);
+ Set<Integer> nodesWithStranger = new TreeSet<>(validIndices);
+ nodesWithStranger.add(foreignNodeIndex);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodesWithStranger);
+ }
+
+ private FleetControllerOptions optionsForConfiguredNodes(Set<ConfiguredNode> configuredNodes) {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+ options.nodeStateRequestTimeoutMS = 10000 * 60 * 1000;
+ return options;
+ }
+
+ @Test
+ public void testSlobrokNodeOutsideConfiguredIndexSetIsNotIncludedInCluster() throws Exception {
+ setUpClusterWithForeignNode(nodeIndices, foreignNode);
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:4 storage:4", asIntSet(foreignNode));
+ }
+
+ @Test
+ public void testNodeSetReconfigurationForcesFreshSlobrokFetch() throws Exception {
+ setUpClusterWithForeignNode(nodeIndices, foreignNode);
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:4 storage:4", asIntSet(foreignNode));
+
+ // If we get a configuration with the node present, we have to accept it into
+ // cluster. If we do not re-fetch state from slobrok we risk racing
+ nodeIndices.add(foreignNode);
+ options.nodes = asConfiguredNodes(nodeIndices);
+ fleetController.updateOptions(options, 0);
+ // Need to treat cluster as having 6 nodes due to ideal state algo semantics.
+ // Note that we do not use subsetWaiter here since we want node 6 included.
+ waitForState("version:\\d+ distributor:7 .4.s:d .5.s:d storage:7 .4.s:d .5.s:d");
+ }
+
+ @Test
+ public void test_removed_retired_node_is_not_included_in_state() throws Exception {
+ final Set<ConfiguredNode> configuredNodes = asConfiguredNodes(nodeIndices);
+ FleetControllerOptions options = optionsForConfiguredNodes(configuredNodes);
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeIndices);
+
+ waitForState("version:\\d+ distributor:4 storage:4");
+
+ // Update options with 1 node config-retired
+ assertTrue(configuredNodes.remove(new ConfiguredNode(0, false)));
+ configuredNodes.add(new ConfiguredNode(0, true));
+ options.nodes = configuredNodes;
+ fleetController.updateOptions(options, 0);
+
+ waitForState("version:\\d+ distributor:4 storage:4 .0.s:r");
+
+ // Now remove the retired node entirely from config
+ assertTrue(configuredNodes.remove(new ConfiguredNode(0, true)));
+ fleetController.updateOptions(options, 0);
+
+ // The previously retired node should now be marked as done, as it no longer
+ // exists from the point of view of the content cluster. We have to use a subset
+ // state waiter, as the controller will not send the new state to node 0.
+ waitForStateExcludingNodeSubset("version:\\d+ distributor:4 .0.s:d storage:4 .0.s:d", asIntSet(0));
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
new file mode 100644
index 00000000000..cb5cee70486
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -0,0 +1,349 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.distribution.Group;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static org.hamcrest.core.StringContains.containsString;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class NodeStateChangeCheckerTest {
+
+ private static final int minStorageNodesUp = 3;
+ private static final int requiredRedundancy = 4;
+ private static final int currentClusterState = 2;
+ private static final double minRatioOfStorageNodesUp = 0.9;
+
+ private static final Node nodeDistributor = new Node(NodeType.DISTRIBUTOR, 1);
+ private static final Node nodeStorage = new Node(NodeType.STORAGE, 1);
+
+ private static final NodeState upNodeState = new NodeState(NodeType.STORAGE, State.UP);
+ public static final NodeState maintenanceNodeState = createNodeState(State.MAINTENANCE, "Orchestrator");
+
+ private static NodeState createNodeState(State state, String description) {
+ return new NodeState(NodeType.STORAGE, state).setDescription(description);
+ }
+
+ private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) {
+ return new NodeStateChangeChecker(minStorageNodesUp, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
+ }
+
+ private ContentCluster createCluster(Collection<ConfiguredNode> nodes) {
+ Distribution distribution = mock(Distribution.class);
+ Group group = new Group(2, "to");
+ when(distribution.getRootGroup()).thenReturn(group);
+ return new ContentCluster("Clustername", nodes, distribution, minStorageNodesUp, 0.0);
+ }
+
+ private StorageNodeInfo createStorageNodeInfo(int index, State state) {
+ Distribution distribution = mock(Distribution.class);
+ Group group = new Group(2, "to");
+ when(distribution.getRootGroup()).thenReturn(group);
+
+ String clusterName = "Clustername";
+ Set<ConfiguredNode> configuredNodeIndexes = new HashSet<>();
+ ContentCluster cluster = new ContentCluster(clusterName, configuredNodeIndexes, distribution, minStorageNodesUp, 0.0);
+
+ String rpcAddress = "";
+ StorageNodeInfo storageNodeInfo = new StorageNodeInfo(cluster, index, false, rpcAddress, distribution);
+ storageNodeInfo.setReportedState(new NodeState(NodeType.STORAGE, state), 3 /* time */);
+ return storageNodeInfo;
+ }
+
+ private String createDistributorHostInfo(int replicationfactor1, int replicationfactor2, int replicationfactor3) {
+ return "{\n" +
+ " \"cluster-state-version\": 2,\n" +
+ " \"distributor\": {\n" +
+ " \"storage-nodes\": [\n" +
+ " {\n" +
+ " \"node-index\": 0,\n" +
+ " \"min-current-replication-factor\": " + replicationfactor1 + "\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 1,\n" +
+ " \"min-current-replication-factor\": " + replicationfactor2 + "\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 2,\n" +
+ " \"min-current-replication-factor\": " + replicationfactor3 + "\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 3\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ "}\n";
+ }
+
+ @Test
+ public void testCanUpgradeForce() {
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(createNodes(1)));
+ NodeState newState = new NodeState(NodeType.STORAGE, State.INITIALIZING);
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeDistributor, currentClusterState, SetUnitStateRequest.Condition.FORCE,
+ upNodeState, newState);
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertTrue(!result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testSafeSetStateDistributors() {
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(createNodes(1)));
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeDistributor, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), containsString("Safe-set of node state is only supported for storage nodes"));
+ }
+
+ @Test
+ public void testCanUpgradeSafeMissingStorage() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6)));
+ NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker(
+ 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), is("There are only 4 storage nodes up, while config requires at least 5"));
+ }
+
+ @Test
+ public void testCanUpgradeStorageSafeYes() {
+ NodeStateChangeChecker.Result result = transitionToMaintenanceWithNoStorageNodesDown();
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testSetUpFailsIfReportedIsDown() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ // Not setting nodes up -> all are down
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ maintenanceNodeState, upNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testCannotSetUpIfUnknownOldStateAndReportedIsDown() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ // Not setting nodes up -> all are down
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ new NodeState(NodeType.STORAGE, State.DOWN), upNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), is("Refusing to set wanted state to up when it is currently in Down"));
+ }
+
+ @Test
+ public void testCanUpgradeStorageSafeNo() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6)));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), is("Distributor 0 says storage node 1 " +
+ "has buckets with redundancy as low as 3, but we require at least 4"));
+ }
+
+ @Test
+ public void testCanUpgradeIfMissingMinReplicationFactor() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 3, 6)));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ new Node(NodeType.STORAGE, 3), currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testCanUpgradeIfStorageNodeMissingFromNodeInfo() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ String hostInfo = "{\n" +
+ " \"cluster-state-version\": 2,\n" +
+ " \"distributor\": {\n" +
+ " \"storage-nodes\": [\n" +
+ " {\n" +
+ " \"node-index\": 0,\n" +
+ " \"min-current-replication-factor\": " + requiredRedundancy + "\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ "}\n";
+ setAllNodesUp(cluster, HostInfo.createHostInfo(hostInfo));
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ new Node(NodeType.STORAGE, 1), currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ upNodeState, maintenanceNodeState);
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testMissingDistributorState() {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+ cluster.clusterInfo().getStorageNodeInfo(1).setReportedState(new NodeState(NodeType.STORAGE, State.UP), 0);
+
+ NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), is("Distributor node (0) has not reported any cluster state version yet."));
+ }
+
+ private NodeStateChangeChecker.Result transitionToSameState(State state, String oldDescription, String newDescription) {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+
+ NodeState currentNodeState = createNodeState(state, oldDescription);
+ NodeState newNodeState = createNodeState(state, newDescription);
+ return nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE,
+ currentNodeState, newNodeState);
+ }
+
+ private NodeStateChangeChecker.Result transitionToSameState(String oldDescription, String newDescription) {
+ return transitionToSameState(State.MAINTENANCE, oldDescription, newDescription);
+ }
+
+ @Test
+ public void testSettingUpWhenUpCausesAlreadySet() {
+ NodeStateChangeChecker.Result result = transitionToSameState(State.UP, "foo", "bar");
+ assertTrue(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testSettingAlreadySetState() {
+ NodeStateChangeChecker.Result result = transitionToSameState("foo", "foo");
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertTrue(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testDifferentDescriptionImpliesAlreadySet() {
+ NodeStateChangeChecker.Result result = transitionToSameState("foo", "bar");
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertTrue(result.wantedStateAlreadySet());
+ }
+
+ private NodeStateChangeChecker.Result transitionToMaintenanceWithOneStorageNodeDown(
+ int storageNodeIndex, boolean alternatingUpRetiredAndInitializing) {
+ ContentCluster cluster = createCluster(createNodes(4));
+ NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(cluster);
+
+ for (int x = 0; x < cluster.clusterInfo().getConfiguredNodes().size(); x++) {
+ State state = State.UP;
+ // Pick some retired and initializing nodes too
+ if (alternatingUpRetiredAndInitializing) { // TODO: Move this into the calling test
+ if (x % 3 == 1) state = State.RETIRED;
+ else if (x % 3 == 2) state = State.INITIALIZING;
+ }
+ cluster.clusterInfo().getDistributorNodeInfo(x).setReportedState(new NodeState(NodeType.DISTRIBUTOR, state), 0);
+ cluster.clusterInfo().getDistributorNodeInfo(x).setHostInfo(HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6)));
+ cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(NodeType.STORAGE, state), 0);
+ }
+
+ if (storageNodeIndex >= 0) { // TODO: Move this into the calling test
+ NodeState downNodeState = new NodeState(NodeType.STORAGE, State.DOWN);
+ cluster.clusterInfo().getStorageNodeInfo(storageNodeIndex).setReportedState(downNodeState, 4 /* time */);
+ }
+
+ return nodeStateChangeChecker.evaluateTransition(
+ nodeStorage, currentClusterState, SetUnitStateRequest.Condition.SAFE, upNodeState, maintenanceNodeState);
+ }
+
+ private void setAllNodesUp(ContentCluster cluster, HostInfo distributorHostInfo) {
+ for (int x = 0; x < cluster.clusterInfo().getConfiguredNodes().size(); x++) {
+ State state = State.UP;
+ cluster.clusterInfo().getDistributorNodeInfo(x).setReportedState(new NodeState(NodeType.DISTRIBUTOR, state), 0);
+ cluster.clusterInfo().getDistributorNodeInfo(x).setHostInfo(distributorHostInfo);
+ cluster.clusterInfo().getStorageNodeInfo(x).setReportedState(new NodeState(NodeType.STORAGE, state), 0);
+ }
+ }
+
+ private NodeStateChangeChecker.Result transitionToMaintenanceWithOneStorageNodeDown(int storageNodeIndex) {
+ return transitionToMaintenanceWithOneStorageNodeDown(storageNodeIndex, false);
+ }
+
+ private NodeStateChangeChecker.Result transitionToMaintenanceWithNoStorageNodesDown() {
+ return transitionToMaintenanceWithOneStorageNodeDown(-1, false);
+ }
+
+ @Test
+ public void testCanUpgradeWhenAllUp() {
+ NodeStateChangeChecker.Result result = transitionToMaintenanceWithNoStorageNodesDown();
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testCanUpgradeWhenAllUpOrRetired() {
+ NodeStateChangeChecker.Result result = transitionToMaintenanceWithNoStorageNodesDown();
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testCanUpgradeWhenStorageIsDown() {
+ NodeStateChangeChecker.Result result = transitionToMaintenanceWithOneStorageNodeDown(nodeStorage.getIndex());
+ assertTrue(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ }
+
+ @Test
+ public void testCannotUpgradeWhenOtherStorageIsDown() {
+ int otherIndex = 2;
+ // If this fails, just set otherIndex to some other valid index.
+ assertNotEquals(nodeStorage.getIndex(), otherIndex);
+
+ NodeStateChangeChecker.Result result = transitionToMaintenanceWithOneStorageNodeDown(otherIndex);
+ assertFalse(result.settingWantedStateIsAllowed());
+ assertFalse(result.wantedStateAlreadySet());
+ assertThat(result.getReason(), containsString("Not enough storage nodes running"));
+ }
+
+ private List<ConfiguredNode> createNodes(int count) {
+ List<ConfiguredNode> nodes = new ArrayList<>();
+ for (int i = 0; i < count; i++)
+ nodes.add(new ConfiguredNode(i, false));
+ return nodes;
+ }
+
+} \ No newline at end of file
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java
new file mode 100644
index 00000000000..2816b75622e
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java
@@ -0,0 +1,627 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer;
+import com.yahoo.vespa.clustercontroller.core.testutils.LogFormatter;
+import com.yahoo.vespa.clustercontroller.core.testutils.WaitCondition;
+import com.yahoo.vespa.config.content.StorDistributionConfig;
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.StringValue;
+import com.yahoo.jrt.slobrok.server.Slobrok;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.*;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+
+import java.net.InetAddress;
+import java.util.*;
+import java.util.logging.Logger;
+
+/**
+ * @author humbe
+ */
+public class RpcServerTest extends FleetControllerTest {
+
+ public static Logger log = Logger.getLogger(RpcServerTest.class.getName());
+
+ protected Supervisor supervisor;
+
+ public void tearDown() throws Exception {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ }
+ super.tearDown();
+ }
+
+ @Test
+ public void testRebinding() throws Exception {
+ startingTest("RpcServerTest::testRebinding");
+ Slobrok slobrok = new Slobrok();
+ String slobrokConnectionSpecs[] = new String[1];
+ slobrokConnectionSpecs[0] = "tcp/"+ InetAddress.getLocalHost().getHostName()+":" + slobrok.port();
+ RpcServer server = new RpcServer(timer, new Object(), "mycluster", 0, new BackOff());
+ server.setSlobrokConnectionSpecs(slobrokConnectionSpecs, 0);
+ int portUsed = server.getPort();
+ server.setSlobrokConnectionSpecs(slobrokConnectionSpecs, portUsed);
+ server.disconnect();
+ server.disconnect();
+ server.connect();
+ server.connect();
+ server.disconnect();
+ server.connect();
+ server.shutdown();
+ slobrok.stop();
+ }
+
+ /**
+ * For some reason, the first test trying to set up a stable system here occasionally times out.
+ * The theory is that some test run before it does something that is not cleaned up in time.
+ * Trying to add a test that should provoke the failure, but not fail due to it to see if we can verify that
+ * assumption.
+ *
+ * (testRebinding() does not seem to be that test. Tests in StateChangeTest that runs before this test tests very
+ * similar things, so strange if it should be from them too though. Maybe last test there.
+ */
+ @Test
+ public void testFailOccasionallyAndIgnoreToSeeIfOtherTestsThenWork() {
+ try{
+ startingTest("RpcServerTest::testFailOccasionallyAndIgnoreToSeeIfOtherTestsThenWork");
+ setUpFleetController(true, new FleetControllerOptions("mycluster"));
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+ } catch (Throwable t) {}
+ }
+
+ @Test
+ public void testGetSystemState() throws Exception {
+ LogFormatter.initializeLogging();
+ startingTest("RpcServerTest::testGetSystemState");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ assertEquals(true, nodes.get(0).isDistributor());
+ log.log(LogLevel.INFO, "Disconnecting distributor 0. Waiting for state to reflect change.");
+ nodes.get(0).disconnect();
+ nodes.get(19).disconnect();
+ fleetController.waitForNodesInSlobrok(9, 9, timeoutMS);
+ timer.advanceTime(options.nodeStateRequestTimeoutMS + options.maxSlobrokDisconnectGracePeriod);
+
+ wait(new WaitCondition.StateWait(fleetController, fleetController.getMonitor()) {
+ @Override
+ public String isConditionMet() {
+ if (currentState == null) {
+ return "No cluster state defined yet";
+ }
+ NodeState distState = currentState.getNodeState(new Node(NodeType.DISTRIBUTOR, 0));
+ if (distState.getState() != State.DOWN) {
+ return "Distributor not detected down yet: " + currentState.toString();
+ }
+ NodeState storState = currentState.getNodeState(new Node(NodeType.STORAGE, 9));
+ if (!storState.getState().oneOf("md")) {
+ return "Storage node not detected down yet: " + currentState.toString();
+ }
+ return null;
+ }
+ }, null, timeoutMS);
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("getSystemState");
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ss"));
+ String systemState = req.returnValues().get(1).asString();
+ ClusterState retrievedClusterState = new ClusterState(systemState);
+ assertEquals(systemState, State.DOWN, retrievedClusterState.getNodeState(new Node(NodeType.DISTRIBUTOR, 0)).getState());
+ assertTrue(systemState, retrievedClusterState.getNodeState(new Node(NodeType.STORAGE, 9)).getState().oneOf("md"));
+ }
+
+ private void setWantedNodeState(State newState, NodeType nodeType, int nodeIndex) {
+ int rpcPort = fleetController.getRpcPort();
+ if (supervisor == null) {
+ supervisor = new Supervisor(new Transport());
+ }
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Node node = new Node(nodeType, nodeIndex);
+ NodeState newNodeState = new NodeState(nodeType, newState);
+
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/" + node.getType().toString() + "/" + node.getIndex()));
+ req.parameters().add(new StringValue(newNodeState.serialize(true)));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("s"));
+ }
+
+ @Test
+ public void testGetNodeState() throws Exception {
+ startingTest("RpcServerTest::testGetNodeState");
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 10; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.minRatioOfStorageNodesUp = 0;
+ options.maxInitProgressTime = 30000;
+ options.stableStateTimePeriod = 60000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ setWantedNodeState(State.DOWN, NodeType.DISTRIBUTOR, 2);
+ setWantedNodeState(State.RETIRED, NodeType.STORAGE, 2);
+ setWantedNodeState(State.MAINTENANCE, NodeType.STORAGE, 7);
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(0).disconnect();
+ nodes.get(3).disconnect();
+ nodes.get(5).disconnect();
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:m .2.s:m .7.s:m");
+ timer.advanceTime(1000000);
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:d .2.s:d .7.s:m");
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(3).setNodeState(new NodeState(nodes.get(3).getType(), State.INITIALIZING).setInitProgress(0.2));
+ nodes.get(3).connect();
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:i .1.i:0.2 .2.s:d .7.s:m");
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("getNodeState");
+ req.parameters().add(new StringValue("distributor"));
+ req.parameters().add(new Int32Value(0));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(0).asString()).getState());
+ NodeState reported = NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(1).asString());
+ assertTrue(req.returnValues().get(1).asString(), reported.getState().oneOf("d-"));
+ assertEquals("", req.returnValues().get(2).asString());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("distributor"));
+ req.parameters().add(new Int32Value(2));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(0).asString()).getState());
+ assertEquals("t:946080000", req.returnValues().get(1).asString());
+ assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(2).asString()).getState());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("distributor"));
+ req.parameters().add(new Int32Value(4));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals("", req.returnValues().get(0).asString());
+ assertEquals("t:946080000", req.returnValues().get(1).asString());
+ assertEquals("", req.returnValues().get(2).asString());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("distributor"));
+ req.parameters().add(new Int32Value(15));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.METHOD_FAILED, req.errorCode());
+ assertEquals("No node distributor.15 exists in cluster mycluster", req.errorMessage());
+ assertFalse(req.toString(), req.checkReturnTypes("ssss"));
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("storage"));
+ req.parameters().add(new Int32Value(1));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals("s:i i:0.2", req.returnValues().get(0).asString());
+ assertEquals("s:i i:0.2", req.returnValues().get(1).asString());
+ assertEquals("", req.returnValues().get(2).asString());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("storage"));
+ req.parameters().add(new Int32Value(2));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals(State.DOWN, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(0).asString()).getState());
+ reported = NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(1).asString());
+ assertTrue(req.returnValues().get(1).asString(), reported.getState().oneOf("d-"));
+ assertEquals(State.RETIRED, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(2).asString()).getState());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("storage"));
+ req.parameters().add(new Int32Value(5));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals("", req.returnValues().get(0).asString());
+ assertEquals("t:946080000", req.returnValues().get(1).asString());
+ assertEquals("", req.returnValues().get(2).asString());
+
+ req = new Request("getNodeState");
+ req.parameters().add(new StringValue("storage"));
+ req.parameters().add(new Int32Value(7));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("ssss"));
+ assertEquals(State.MAINTENANCE, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(0).asString()).getState());
+ assertEquals("t:946080000", req.returnValues().get(1).asString());
+ assertEquals(State.MAINTENANCE, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(2).asString()).getState());
+ }
+
+ @Test
+ public void testGetNodeStateWithConfiguredRetired() throws Exception {
+ startingTest("RpcServerTest::testGetNodeStateWithConfiguredRetired");
+ List<ConfiguredNode> configuredNodes = new ArrayList<>();
+ for (int i = 0; i < 9; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ configuredNodes.add(new ConfiguredNode(9, true)); // Last node is configured retired
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.minRatioOfStorageNodesUp = 0;
+ options.maxInitProgressTime = 30000;
+ options.stableStateTimePeriod = 60000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, configuredNodes);
+ waitForState("version:\\d+ distributor:10 storage:10 .9.s:r");
+
+ setWantedNodeState(State.DOWN, NodeType.DISTRIBUTOR, 2);
+ setWantedNodeState(State.RETIRED, NodeType.STORAGE, 2);
+ setWantedNodeState(State.MAINTENANCE, NodeType.STORAGE, 7);
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(0).disconnect();
+ nodes.get(3).disconnect();
+ nodes.get(5).disconnect();
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:m .2.s:m .7.s:m .9.s:r");
+ timer.advanceTime(1000000);
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:d .2.s:d .7.s:m .9.s:r");
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(3).setNodeState(new NodeState(nodes.get(3).getType(), State.INITIALIZING).setInitProgress(0.2));
+ nodes.get(3).connect();
+ waitForState("version:\\d+ distributor:10 .0.s:d .2.s:d storage:10 .1.s:i .1.i:0.2 .2.s:d .7.s:m .9.s:r");
+ }
+
+ @Test
+ public void testGetNodeStateWithConfigurationChangeToRetiredWhileNodeDown() throws Exception {
+ startingTest("RpcServerTest::testGetNodeStateWithConfigurationChangeToRetiredWhileNodeDown");
+
+ { // Configuration: 5 nodes, all normal
+ List<ConfiguredNode> configuredNodes = new ArrayList<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.maxInitProgressTime = 30000;
+ options.stableStateTimePeriod = 60000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, configuredNodes);
+ waitForState("version:\\d+ distributor:5 storage:5");
+ }
+
+ { // 2 first storage nodes go down (0 and 2 are the corresponding distributors)
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(1).disconnectImmediately();
+ nodes.get(3).disconnectImmediately();
+ waitForState("version:\\d+ distributor:5 storage:5 .0.s:m .1.s:m");
+ }
+
+ { // Configuration change: Add 2 new nodes and retire the 5 existing ones
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, 2);
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, true));
+ configuredNodes.add(new ConfiguredNode(5, false));
+ configuredNodes.add(new ConfiguredNode(6, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:m .1.s:m .2.s:r .3.s:r .4.s:r");
+ }
+
+ { // 2 storage nodes down come up, should go to state retired
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(1).connect();
+ nodes.get(3).connect();
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:r .1.s:r .2.s:r .3.s:r .4.s:r");
+ }
+
+ { // 2 first storage nodes go down again
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(1).disconnectImmediately();
+ nodes.get(3).disconnectImmediately();
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:m .1.s:m .2.s:r .3.s:r .4.s:r");
+ }
+
+ { // Configuration change: Unretire the nodes
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 7; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:m .1.s:m");
+ }
+
+ { // 2 storage nodes down come up, should go to state up
+ waitForCompleteCycle();
+ timer.advanceTime(1000000);
+ waitForCompleteCycle(); // Make fleet controller notice that time has changed before any disconnects
+ nodes.get(1).connect();
+ nodes.get(3).connect();
+ waitForState("version:\\d+ distributor:7 storage:7");
+ }
+
+ }
+
+ @Test
+ public void testGetNodeStateWithConfigurationChangeToRetired() throws Exception {
+ startingTest("RpcServerTest::testGetNodeStateWithConfigurationChangeToRetired");
+
+ { // Configuration: 5 nodes, all normal
+ List<ConfiguredNode> configuredNodes = new ArrayList<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.maxInitProgressTime = 30000;
+ options.stableStateTimePeriod = 60000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, configuredNodes);
+ waitForState("version:\\d+ distributor:5 storage:5");
+ }
+
+ { // Reconfigure with the same state
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ distributor:5 storage:5");
+ }
+
+ { // Configuration change: Add 2 new nodes and retire the 5 existing ones
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, 2);
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, true));
+ configuredNodes.add(new ConfiguredNode(5, false));
+ configuredNodes.add(new ConfiguredNode(6, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:r .1.s:r .2.s:r .3.s:r .4.s:r");
+ }
+
+ { // Reconfigure with the same state
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ for (int i = 0; i < 5; i++)
+ configuredNodes.add(new ConfiguredNode(i, true));
+ configuredNodes.add(new ConfiguredNode(5, false));
+ configuredNodes.add(new ConfiguredNode(6, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:r .1.s:r .2.s:r .3.s:r .4.s:r");
+ }
+
+ { // Configuration change: Remove the previously retired nodes
+ /*
+ TODO: Verify current result: version:23 distributor:7 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d storage:7 .0.s:m .1.s:m .2.s:m .3.s:m .4.s:m
+ TODO: Make this work without stopping/disconnecting (see SystemStateGenerator.setNodes
+ Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ configuredNodes.add(new ConfiguredNode(5, false));
+ configuredNodes.add(new ConfiguredNode(6, false));
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes);
+ options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs;
+ this.options.maxInitProgressTime = 30000;
+ this.options.stableStateTimePeriod = 60000;
+ fleetController.updateOptions(options, 0);
+ for (int i = 0; i < 5*2; i++) {
+ nodes.get(i).disconnectSlobrok();
+ nodes.get(i).disconnect();
+ }
+ waitForState("version:\\d+ distributor:7 storage:7 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d");
+ */
+ }
+ }
+
+ public StorDistributionConfig getDistConfig(Set<Integer> nodes) {
+ List<StorDistributionConfig.Group.Nodes.Builder> nodeList = new LinkedList<>();
+ for (int i : nodes) {
+ StorDistributionConfig.Group.Nodes.Builder nodeConfig = new StorDistributionConfig.Group.Nodes.Builder();
+ nodeConfig.index(i);
+ nodeList.add(nodeConfig);
+ }
+ StorDistributionConfig.Group.Builder groupConfig = new StorDistributionConfig.Group.Builder();
+ groupConfig.nodes(nodeList);
+ groupConfig.index("0");
+ groupConfig.name("foo");
+ StorDistributionConfig.Builder distConfig = new StorDistributionConfig.Builder();
+ distConfig.group(groupConfig);
+ return new StorDistributionConfig(distConfig);
+ }
+
+ @Test
+ public void testSetNodeState() throws Exception {
+ startingTest("RpcServerTest::testSetNodeState");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ Set<Integer> nodeIndexes = new TreeSet<>(Arrays.asList(new Integer[]{4, 6, 9, 10, 14, 16, 21, 22, 23, 25}));
+ options.setStorageDistribution(new Distribution(getDistConfig(nodeIndexes)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), false, nodeIndexes);
+ waitForState("version:\\d+ distributor:26 .0.s:d .1.s:d .2.s:d .3.s:d .5.s:d .7.s:d .8.s:d .11.s:d .12.s:d .13.s:d .15.s:d .17.s:d .18.s:d .19.s:d .20.s:d .24.s:d storage:26 .0.s:d .1.s:d .2.s:d .3.s:d .5.s:d .7.s:d .8.s:d .11.s:d .12.s:d .13.s:d .15.s:d .17.s:d .18.s:d .19.s:d .20.s:d .24.s:d");
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/storage/14"));
+ req.parameters().add(new StringValue("s:r"));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("s"));
+
+ waitForState("version:\\d+ distributor:26 .* storage:26 .* .14.s:r .*");
+
+ req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/storage/16"));
+ req.parameters().add(new StringValue("s:m"));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("s"));
+
+ ClusterState state = waitForState("version:\\d+ distributor:26 .* storage:26 .* .14.s:r.* .16.s:m .*");
+ nodes.get(5 * 2 + 1).disconnect();
+ waitForCompleteCycle();
+ timer.advanceTime(100000000);
+ waitForCompleteCycle();
+ assertEquals(State.MAINTENANCE, fleetController.getSystemState().getNodeState(new Node(NodeType.STORAGE, 16)).getState());
+
+ nodes.get(4 * 2 + 1).disconnect();
+ waitForState("version:\\d+ distributor:26 .* storage:26 .* .14.s:m.* .16.s:m .*");
+ nodes.get(4 * 2 + 1).connect();
+ timer.advanceTime(100000000);
+ // Might need to pass more actual time while waiting below?
+ waitForState("version:\\d+ distributor:26 .* storage:26 .* .14.s:r.* .16.s:m .*");
+ }
+
+ @Test
+ public void testSetNodeStateOutOfRange() throws Exception {
+ startingTest("RpcServerTest::testSetNodeStateOutOfRange");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(2, 10)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/storage/10"));
+ req.parameters().add(new StringValue("s:m"));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.METHOD_FAILED, req.errorCode());
+ assertEquals(req.toString(), "Cannot set wanted state of node storage.10. Index does not correspond to a configured node.", req.errorMessage());
+
+ req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/distributor/10"));
+ req.parameters().add(new StringValue("s:m"));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.METHOD_FAILED, req.errorCode());
+ assertEquals(req.toString(), "Cannot set wanted state of node distributor.10. Index does not correspond to a configured node.", req.errorMessage());
+
+ req = new Request("setNodeState");
+ req.parameters().add(new StringValue("storage/cluster.mycluster/storage/9"));
+ req.parameters().add(new StringValue("s:m"));
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), ErrorCode.NONE, req.errorCode());
+
+ waitForState("version:\\d+ distributor:10 storage:10 .9.s:m");
+ }
+
+ @Test
+ public void testGetMaster() throws Exception {
+ startingTest("RpcServerTest::testGetMaster");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(2, 10)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ Request req = new Request("getMaster");
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.toString(), 0, req.returnValues().get(0).asInt32());
+ assertEquals(req.toString(), "All 1 nodes agree that 0 is current master.", req.returnValues().get(1).asString());
+
+ // Note that this feature is tested better in MasterElectionTest.testGetMaster as it has multiple fleetcontrollers
+ }
+
+ @Test
+ public void testGetNodeList() throws Exception {
+ startingTest("RpcServerTest::testGetNodeList");
+ setUpFleetController(true, new FleetControllerOptions("mycluster"));
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ assertEquals(true, nodes.get(0).isDistributor());
+ nodes.get(0).disconnect();
+ waitForState("version:\\d+ distributor:10 .0.s:d storage:10");
+
+ int rpcPort = fleetController.getRpcPort();
+ supervisor = new Supervisor(new Transport());
+ Target connection = supervisor.connect(new Spec(rpcPort));
+ assertTrue(connection.isValid());
+
+ // Possibly do request multiple times if we haven't lost slobrok contact first times yet.
+ for (int j=0; j<=10; ++j) {
+ Request req = new Request("getNodeList");
+ connection.invokeSync(req, timeoutS);
+ assertEquals(req.errorMessage(), ErrorCode.NONE, req.errorCode());
+ assertTrue(req.toString(), req.checkReturnTypes("SS"));
+ String slobrok[] = req.returnValues().get(0).asStringArray().clone();
+ String rpc[] = req.returnValues().get(1).asStringArray().clone();
+
+ assertEquals(20, slobrok.length);
+ assertEquals(20, rpc.length);
+
+ // Verify that we can connect to all addresses returned.
+ for (int i=0; i<20; ++i) {
+ if (slobrok[i].equals("storage/cluster.mycluster/distributor/0")) {
+ if (j < 10 && !"".equals(rpc[i])) {
+ continue;
+ }
+ assertEquals(slobrok[i], "", rpc[i]);
+ continue;
+ }
+ assertTrue(slobrok[i], !rpc[i].equals(""));
+ Request req2 = new Request("getnodestate2");
+ req2.parameters().add(new StringValue("unknown"));
+ Target connection2 = supervisor.connect(new Spec(rpc[i]));
+ connection2.invokeSync(req2, timeoutS);
+ assertEquals(req2.toString(), ErrorCode.NONE, req.errorCode());
+ }
+ break;
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java
new file mode 100644
index 00000000000..f043a325fdd
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.slobrok.server.Slobrok;
+import com.yahoo.log.LogLevel;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.logging.Logger;
+
+public class SlobrokTest extends FleetControllerTest {
+
+ private static Logger log = Logger.getLogger(SlobrokTest.class.getName());
+
+ private boolean clusterAvailable() {
+ boolean ok = true;
+ ContentCluster cluster = fleetController.getCluster();
+ for (NodeInfo info : cluster.getNodeInfo()) {
+ if (info.getConnectionAttemptCount() > 0) ok = false;
+ if (info.getLatestNodeStateRequestTime() == null) ok = false;
+ }
+ return ok;
+ }
+ private void assertClusterAvailable() {
+ ContentCluster cluster = fleetController.getCluster();
+ for (NodeInfo info : cluster.getNodeInfo()) {
+ assertEquals("Node " + info + " connection attempts.", 0, info.getConnectionAttemptCount());
+ assertTrue("Node " + info + " has no last request time.", info.getLatestNodeStateRequestTime() != 0);
+ }
+ }
+
+ @Test
+ public void testSingleSlobrokRestart() throws Exception {
+ startingTest("SlobrokTest::testSingleSlobrokRestart");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.nodeStateRequestTimeoutMS = 60 * 60 * 1000;
+ options.maxSlobrokDisconnectGracePeriod = 60 * 60 * 1000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ int version = fleetController.getSystemState().getVersion();
+ int slobrokPort = slobrok.port();
+
+ // Test that we survive some slobrok instability without changing system state.
+ for (int j=0; j<4; ++j) {
+ log.log(LogLevel.INFO, "Mirror updateForDistributor count is " + fleetController.getSlobrokMirrorUpdates());
+ log.log(LogLevel.INFO, "STOPPING SLOBROK SERVER (" + (j+1) + "/4)");
+ slobrok.stop();
+ for (int i=0; i<10; ++i) {
+ // Force one node to at least notice that the slobrok server is gone
+ if (i == 5) {
+ log.log(LogLevel.INFO, "Forcing one node to initate a resend: " + nodes.get(3));
+ nodes.get(3).replyToPendingNodeStateRequests();
+ }
+ waitForCompleteCycle();
+ timer.advanceTime(100);
+ }
+ log.log(LogLevel.INFO, "STARTING SLOBROK SERVER AGAIN (" + (j+1) + "/4)");
+ slobrok = new Slobrok(slobrokPort);
+ // May take up to 30 seconds for slobrok clients to re-register. Trigger retry.
+ for (DummyVdsNode node : nodes) {
+ node.disconnectSlobrok();
+ node.registerSlobrok();
+ }
+ //fleetController.setFreshSlobrokMirror();
+ waitForCompleteCycle();
+ fleetController.waitForNodesInSlobrok(10, 10, timeoutMS);
+
+ log.log(LogLevel.INFO, "Waiting for cluster to be up and available again");
+ for (int i = 0; i < timeoutMS; i += 10) {
+ if (clusterAvailable()) break;
+ timer.advanceTime(1000);
+ waitForCompleteCycle();
+ try{
+ Thread.sleep(10);
+ } catch (InterruptedException e) {}
+ }
+ assertClusterAvailable();
+ }
+
+ assertEquals("Cluster state was affected, although it should not have been.",
+ version, fleetController.getSystemState().getVersion());
+ }
+
+ @Test
+ public void testNodeTooLongOutOfSlobrok() throws Exception {
+ startingTest("SlobrokTest::testNodeTooLongOutOfSlobrok");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+ options.nodeStateRequestTimeoutMS = 10000 * 60 * 1000;
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ int version = fleetController.getSystemState().getVersion();
+ nodes.get(0).disconnectSlobrok();
+ log.log(LogLevel.INFO, "DISCONNECTED NODE FROM SLOBROK. SHOULD BE IN COOLDOWN PERIOD");
+ fleetController.waitForNodesInSlobrok(9, 10, timeoutMS);
+ synchronized (timer) {
+ nodes.get(0).sendGetNodeStateReply(0);
+ }
+
+ // Give system a little time to possible faultily removing node not in slobrok
+ timer.advanceTime(1000);
+ try{ Thread.sleep(10); } catch (InterruptedException e) {}
+ assertEquals(version, fleetController.getSystemState().getVersion());
+ log.log(LogLevel.INFO, "JUMPING TIME. NODE SHOULD BE MARKED DOWN");
+ // At this point the fleetcontroller might not have noticed that the node is out of slobrok yet.
+ // Thus we keep advancing time another minute such that it should get down.
+ timer.advanceTime(options.nodeStateRequestTimeoutMS + options.maxSlobrokDisconnectGracePeriod);
+ waitForState("version:\\d+ distributor:10 .0.s:d storage:10");
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
new file mode 100644
index 00000000000..b94691bb880
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
@@ -0,0 +1,1135 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.*;
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
+import com.yahoo.vespa.clustercontroller.core.testutils.StateWaiter;
+import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.*;
+import java.util.logging.Logger;
+
+public class StateChangeTest extends FleetControllerTest {
+
+ public static Logger log = Logger.getLogger(StateChangeTest.class.getName());
+ private Supervisor supervisor;
+ private FleetController ctrl;
+ private DummyCommunicator communicator;
+ private EventLog eventLog;
+
+ @Before
+ public void setUp() {
+ supervisor = new Supervisor(new Transport());
+ }
+
+ private void initialize(FleetControllerOptions options) throws Exception {
+ List<Node> nodes = new ArrayList<>();
+ for (int i = 0; i < options.nodes.size(); ++i) {
+ nodes.add(new Node(NodeType.STORAGE, i));
+ nodes.add(new Node(NodeType.DISTRIBUTOR, i));
+ }
+
+ communicator = new DummyCommunicator(nodes, timer);
+ MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex);
+ eventLog = new EventLog(timer, metricUpdater);
+ ContentCluster cluster = new ContentCluster(options.clusterName, options.nodes, options.storageDistribution,
+ options.minStorageNodesUp, options.minRatioOfStorageNodesUp);
+ NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, eventLog);
+ DatabaseHandler database = new DatabaseHandler(timer, options.zooKeeperServerAddress, options.fleetControllerIndex, timer);
+ SystemStateGenerator stateGenerator = new SystemStateGenerator(timer, eventLog, metricUpdater);
+ SystemStateBroadcaster stateBroadcaster = new SystemStateBroadcaster(timer, timer);
+ MasterElectionHandler masterElectionHandler = new MasterElectionHandler(options.fleetControllerIndex, options.fleetControllerCount, timer, timer);
+ ctrl = new FleetController(timer, eventLog, cluster, stateGatherer, communicator, null, null, communicator, database, stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options);
+
+ ctrl.tick();
+
+ for (int i = 0; i < options.nodes.size(); ++i) {
+ communicator.setNodeState(new Node(NodeType.STORAGE, i), State.UP, "");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, i), State.UP, "");
+ }
+
+ ctrl.tick();
+ }
+
+ public void tearDown() throws Exception {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ supervisor = null;
+ }
+ super.tearDown();
+ }
+
+ public void verifyNodeEvents(Node n, String correct) {
+ String actual = "";
+ for (NodeEvent e : eventLog.getNodeEvents(n)) {
+ actual += e.toString() + "\n";
+ }
+
+ assertEquals(correct, actual);
+
+ }
+
+ private List<ConfiguredNode> createNodes(int count) {
+ List<ConfiguredNode> nodes = new ArrayList<>();
+ for (int i = 0; i < count; i++)
+ nodes.add(new ConfiguredNode(i, false));
+ return nodes;
+ }
+
+ @Test
+ public void testNormalStartup() throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxInitProgressTime = 50000;
+
+ initialize(options);
+
+ // Should now pick up previous node states
+ ctrl.tick();
+
+
+ for (int j = 0; j < 10; ++j) {
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, j), new NodeState(NodeType.DISTRIBUTOR, State.INITIALIZING).setInitProgress(0.0), "");
+ }
+
+ for (int i=0; i<100; i += 10) {
+ timer.advanceTime(options.maxInitProgressTime / 20);
+ ctrl.tick();
+ for (int j = 0; j < 10; ++j) {
+ communicator.setNodeState(new Node(NodeType.STORAGE, j), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(i / 100.0), "");
+ }
+ }
+
+ // Now, fleet controller should have generated a new cluster state.
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 .0.s:i .0.i:0.0 .1.s:i .1.i:0.0 .2.s:i .2.i:0.0 .3.s:i .3.i:0.0 .4.s:i .4.i:0.0 .5.s:i .5.i:0.0 .6.s:i .6.i:0.0 .7.s:i .7.i:0.0 .8.s:i .8.i:0.0 .9.s:i .9.i:0.0 storage:10 .0.s:i .0.i:0.9 .1.s:i .1.i:0.9 .2.s:i .2.i:0.9 .3.s:i .3.i:0.9 .4.s:i .4.i:0.9 .5.s:i .5.i:0.9 .6.s:i .6.i:0.9 .7.s:i .7.i:0.9 .8.s:i .8.i:0.9 .9.s:i .9.i:0.9",
+ ctrl.getSystemState().toString());
+
+ timer.advanceTime(options.maxInitProgressTime / 20);
+ ctrl.tick();
+
+ for (int i = 0; i < 10; ++i) {
+ communicator.setNodeState(new Node(NodeType.STORAGE, i), new NodeState(NodeType.STORAGE, State.UP), "");
+ }
+
+ timer.advanceTime(options.maxInitProgressTime / 20);
+ ctrl.tick();
+
+ for (int i = 0; i < 10; ++i) {
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, i), new NodeState(NodeType.STORAGE, State.UP), "");
+ }
+
+ timer.advanceTime(options.maxInitProgressTime / 20);
+ ctrl.tick();
+
+ assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+
+ verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0),
+ "Event: distributor.0: Now reporting state U\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: distributor.0: Now reporting state I, i 0.00\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'U' to 'I, i 0.00'.\n" +
+ "Event: distributor.0: Now reporting state U\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'I, i 0.00' to 'U'.\n");
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 0),
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.0: Now reporting state I, i 0.00 (ls)\n" +
+ "Event: storage.0: Altered node state in cluster state from 'U' to 'D: Listing buckets. Progress 0.0 %.'.\n" +
+ "Event: storage.0: Now reporting state I, i 0.100 (read)\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D: Listing buckets. Progress 0.0 %.' to 'I, i 0.100 (read)'.\n" +
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'I, i 0.900 (read)' to 'U'.\n");
+ }
+
+ @Test
+ public void testNodeGoingDownAndUp() throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.nodeStateRequestTimeoutMS = 60 * 60 * 1000;
+ options.minTimeBetweenNewSystemStates = 0;
+ options.maxInitProgressTime = 50000;
+
+ initialize(options);
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.DOWN, "Closed at other end");
+
+ ctrl.tick();
+
+ String desc = ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Closed at other end") != -1);
+
+ assertEquals("version:4 distributor:10 .0.s:d storage:10", ctrl.getSystemState().toString());
+
+ timer.advanceTime(1000);
+ long distStartTime = timer.getCurrentTimeInMillis() / 1000;
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), new NodeState(NodeType.DISTRIBUTOR, State.UP).setStartTimestamp(12345678), "");
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), State.DOWN, "Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 .0.t:12345678 storage:10 .0.s:m", ctrl.getSystemState().toString());
+
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).hasDescription());
+ desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Closed at other end") != -1);
+
+ timer.advanceTime(options.maxTransitionTime.get(NodeType.STORAGE) + 1);
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 .0.t:12345678 storage:10 .0.s:d", ctrl.getSystemState().toString());
+
+ desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Closed at other end") != -1);
+
+ timer.advanceTime(1000);
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), new NodeState(NodeType.STORAGE, State.UP).setStartTimestamp(12345679), "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 distributor:10 storage:10 .0.t:12345679", ctrl.getSystemState().toString());
+
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).hasDescription());
+
+ verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0),
+ "Event: distributor.0: Now reporting state U\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: distributor.0: Failed to get node state: D: Closed at other end\n" +
+ "Event: distributor.0: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'U' to 'D: Closed at other end'.\n" +
+ "Event: distributor.0: Now reporting state U, t 12345678\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'D: Closed at other end' to 'U, t 12345678'.\n");
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 0),
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.0: Failed to get node state: D: Closed at other end\n" +
+ "Event: storage.0: Stopped or possibly crashed after 1000 ms, which is before stable state time period. Premature crash count is now 1.\n" +
+ "Event: storage.0: Altered node state in cluster state from 'U' to 'M: Closed at other end'.\n" +
+ "Event: storage.0: 5001 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.0: Altered node state in cluster state from 'M: Closed at other end' to 'D: Closed at other end'.\n" +
+ "Event: storage.0: Now reporting state U, t 12345679\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D: Closed at other end' to 'U, t 12345679'.\n");
+
+ assertEquals(1, ctrl.getCluster().getNodeInfo(new Node(NodeType.DISTRIBUTOR, 0)).getPrematureCrashCount());
+ assertEquals(1, ctrl.getCluster().getNodeInfo(new Node(NodeType.STORAGE, 0)).getPrematureCrashCount());
+ }
+
+ public void tick(int timeMs) throws Exception {
+ timer.advanceTime(timeMs);
+ ctrl.tick();
+ }
+
+ @Test
+ public void testNodeGoingDownAndUpNotifying() throws Exception {
+ // Same test as above, but node manage to notify why it is going down first.
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.nodeStateRequestTimeoutMS = 60 * 60 * 1000;
+ options.maxSlobrokDisconnectGracePeriod = 100000;
+
+ initialize(options);
+
+ ctrl.tick();
+
+ tick((int)options.stableStateTimePeriod + 1);
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.DOWN, "controlled shutdown");
+
+ ctrl.tick();
+
+ String desc = ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Received signal 15 (SIGTERM - Termination signal)") != -1
+ || desc.indexOf("controlled shutdown") != -1);
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.UP, "");
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), State.DOWN, "controlled shutdown");
+
+ tick(1000);
+
+ assertEquals("version:5 distributor:10 storage:10 .0.s:m", ctrl.getSystemState().toString());
+
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).hasDescription());
+ desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Received signal 15 (SIGTERM - Termination signal)") != -1
+ || desc.indexOf("controlled shutdown") != -1);
+
+ tick(options.maxTransitionTime.get(NodeType.STORAGE) + 1);
+
+ assertEquals("version:6 distributor:10 storage:10 .0.s:d", ctrl.getSystemState().toString());
+ desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
+ assertTrue(desc, desc.indexOf("Received signal 15 (SIGTERM - Termination signal)") != -1
+ || desc.indexOf("controlled shutdown") != -1);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), State.UP, "");
+
+ tick(1000);
+
+ assertEquals("version:7 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).hasDescription());
+
+ assertEquals(0, ctrl.getCluster().getNodeInfo(new Node(NodeType.DISTRIBUTOR, 0)).getPrematureCrashCount());
+ assertEquals(0, ctrl.getCluster().getNodeInfo(new Node(NodeType.STORAGE, 0)).getPrematureCrashCount());
+
+ verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0),
+ "Event: distributor.0: Now reporting state U\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: distributor.0: Failed to get node state: D: controlled shutdown\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'U' to 'D: controlled shutdown'.\n" +
+ "Event: distributor.0: Now reporting state U\n" +
+ "Event: distributor.0: Altered node state in cluster state from 'D: controlled shutdown' to 'U'.\n");
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 0),
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.0: Failed to get node state: D: controlled shutdown\n" +
+ "Event: storage.0: Altered node state in cluster state from 'U' to 'M: controlled shutdown'.\n" +
+ "Event: storage.0: 5001 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.0: Altered node state in cluster state from 'M: controlled shutdown' to 'D: controlled shutdown'.\n" +
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D: controlled shutdown' to 'U'.\n");
+
+ }
+
+ @Test
+ public void testNodeGoingDownAndUpFast() throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+
+ initialize(options);
+
+ ctrl.tick();
+
+ // Node dropped out of slobrok
+ List<Node> nodes = new ArrayList<>();
+ for (int i = 1; i < 10; ++i) {
+ nodes.add(new Node(NodeType.STORAGE, i));
+ nodes.add(new Node(NodeType.DISTRIBUTOR, i));
+ }
+
+ communicator.newNodes = nodes;
+
+ ctrl.tick();
+ ctrl.tick();
+
+ assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ nodes = new ArrayList<>();
+ for (int i = 0; i < 10; ++i) {
+ nodes.add(new Node(NodeType.STORAGE, i));
+ nodes.add(new Node(NodeType.DISTRIBUTOR, i));
+ }
+
+ communicator.newNodes = nodes;
+
+ ctrl.tick();
+
+ assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 0),
+ "Event: storage.0: Now reporting state U\n" +
+ "Event: storage.0: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.0: Node is no longer in slobrok, but we still have a pending state request.\n");
+ }
+
+ @Test
+ public void testMaintenanceWhileNormalStorageNodeRestart() throws Exception {
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+
+ initialize(options);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ NodeState ns = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6));
+ assertTrue(ns.toString(), ns.getDescription().indexOf("Connection error: Closed at other end") != -1);
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.0), "");
+
+ ctrl.tick();
+
+ // Still maintenance since .i progress 0.0 is really down.
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.6), "");
+
+ ctrl.tick();
+
+ // Now it's OK
+ assertEquals("version:5 distributor:10 storage:10 .6.s:i .6.i:0.6", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.UP), "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6)).hasDescription());
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 6),
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
+ "Event: storage.6: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.\n" +
+ "Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: Now reporting state I, i 0.00 (ls)\n" +
+ "Event: storage.6: Now reporting state I, i 0.600 (read)\n" +
+ "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'I, i 0.600 (read)'.\n" +
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'I, i 0.600 (read)' to 'U'.\n");
+ }
+
+ @Test
+ public void testMaintenanceWithoutInitIfRetired() throws Exception {
+ List<ConfiguredNode> nodes = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ boolean retired = (i == 6);
+ nodes.add(new ConfiguredNode(i, retired));
+ }
+
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", nodes);
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+
+ initialize(options);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ NodeState ns = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6));
+ assertTrue(ns.toString(), ns.getDescription().indexOf("Connection error: Closed at other end") != -1);
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.0), "");
+
+ ctrl.tick();
+
+ // Still maintenance since .i progress 0.0 is really down.
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.6), "");
+
+ ctrl.tick();
+
+ // Still maintenance since configured.
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.UP), "");
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:r", ctrl.getSystemState().toString());
+ assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6)).hasDescription());
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 6),
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D' to 'R'.\n" +
+ "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
+ "Event: storage.6: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.\n" +
+ "Event: storage.6: Altered node state in cluster state from 'R' to 'M: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: Now reporting state I, i 0.00 (ls)\n" +
+ "Event: storage.6: Now reporting state I, i 0.600 (read)\n" +
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'R: Connection error: Closed at other end'.\n");
+ }
+
+ @Test
+ public void testMaintenanceToDownIfPastTransitionTimeAndRetired() throws Exception {
+ List<ConfiguredNode> nodes = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ boolean retired = (i == 6);
+ nodes.add(new ConfiguredNode(i, retired));
+ }
+
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", nodes);
+ options.maxSlobrokDisconnectGracePeriod = 60 * 1000;
+
+ initialize(options);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(100000);
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ }
+
+ // Test that a node that has been down for a long time (above steady state period), actually alters cluster state to
+ // tell that it is initializing, rather than being ignored as a just restarted/unstable node should be.
+ @Test
+ public void testDownNodeInitializing() throws Exception {
+ // Actually report initializing state if node has been down steadily for a while
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 5000);
+ options.maxInitProgressTime = 5000;
+ options.stableStateTimePeriod = 20000;
+ options.nodeStateRequestTimeoutMS = 1000000;
+ options.maxSlobrokDisconnectGracePeriod = 1000000;
+
+ initialize(options);
+
+ timer.advanceTime(100000); // Node has been in steady state up
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(100000); // Node has been in steady state down
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.001), "");
+
+ ctrl.tick();
+
+ assertEquals("Listing buckets. Progress 0.1 %.", ctrl.getSystemState().getNodeState(new Node(NodeType.STORAGE, 6)).getDescription());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.1), "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.UP), "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 6),
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
+ "Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: 100000 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'D: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: Now reporting state I, i 0.00100 (ls)\n" +
+ "Event: storage.6: Now reporting state I, i 0.100 (read)\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D: Listing buckets. Progress 0.1 %.' to 'I, i 0.100 (read)'.\n" +
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'I, i 0.100 (read)' to 'U'.\n");
+ }
+
+ @Test
+ public void testNodeInitializationStalled() throws Exception {
+ // Node should eventually be marked down, and not become initializing next time, but stay down until up
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 5000);
+ options.maxInitProgressTime = 5000;
+ options.stableStateTimePeriod = 1000000;
+ options.maxSlobrokDisconnectGracePeriod = 10000000;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(1000000); // Node has been in steady state down
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.1), "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
+
+ timer.advanceTime(options.maxInitProgressTime + 1);
+
+ ctrl.tick();
+
+ // We should now get the node marked down.
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ tick(options.nodeStateRequestTimeoutMS + 1);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.0), "");
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.1), "");
+
+ tick(1000);
+
+ // Still down since it seemingly crashed during last init.
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ assertEquals("Down: 5001 ms without initialize progress. Assuming node has deadlocked.",
+ ctrl.getSystemState().getNodeState(new Node(NodeType.STORAGE, 6)).toString());
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.UP, "");
+
+ ctrl.tick();
+
+ assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ verifyNodeEvents(new Node(NodeType.STORAGE, 6),
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D' to 'U'.\n" +
+ "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
+ "Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: 1000000 milliseconds without contact. Marking node down.\n" +
+ "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'D: Connection error: Closed at other end'.\n" +
+ "Event: storage.6: Now reporting state I, i 0.100 (read)\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D: Connection error: Closed at other end' to 'I, i 0.100 (read)'.\n" +
+ "Event: storage.6: 5001 milliseconds without initialize progress. Marking node down. Premature crash count is now 1.\n" +
+ "Event: storage.6: Altered node state in cluster state from 'I, i 0.100 (read)' to 'D: 5001 ms without initialize progress. Assuming node has deadlocked.'.\n" +
+ "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" +
+ "Event: storage.6: Now reporting state I, i 0.00 (ls)\n" +
+ "Event: storage.6: Now reporting state I, i 0.100 (read)\n" +
+ "Event: storage.6: Now reporting state U\n" +
+ "Event: storage.6: Altered node state in cluster state from 'D: 5001 ms without initialize progress. Assuming node has deadlocked.' to 'U'.\n");
+
+ }
+
+ @Test
+ public void testBackwardsInitializationProgress() throws Exception {
+ // Same as stalled. Mark down, keep down until up
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 5000);
+ options.maxInitProgressTime = 5000;
+ options.stableStateTimePeriod = 1000000;
+ // Set long so we dont time out RPC requests and mark nodes down due to advancing time to get in steady state
+ options.nodeStateRequestTimeoutMS = (int) options.stableStateTimePeriod * 2;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(1000000); // Node has been in steady state down
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.3), "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.3", ctrl.getSystemState().toString());
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.2), "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ String desc = ctrl.getSystemState().getNodeState(new Node(NodeType.STORAGE, 6)).getDescription();
+ assertEquals("Got reverse intialize progress. Assuming node have prematurely crashed", desc);
+ }
+
+ @Test
+ public void testNodeGoingDownWhileInitializing() throws Exception {
+ // Same as stalled. Mark down, keep down until up
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 5000);
+ options.maxInitProgressTime = 5000;
+ options.stableStateTimePeriod = 1000000;
+ options.nodeStateRequestTimeoutMS = 365 * 24 * 60 * 1000; // Set very high so the advanceTime don't start sending state replies right before we disconnect.
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(1000000); // Node has been in steady state down
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.3), "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.3", ctrl.getSystemState().toString());
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.3), "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.UP, "");
+
+ ctrl.tick();
+
+ assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString());
+ }
+
+ @Test
+ public void testContinuousCrashRightAfterInit() throws Exception {
+ startingTest("StateChangeTest::testContinuousCrashRightAfterInit");
+ // If node does this too many times, take it out of service
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 5000);
+ options.maxInitProgressTime = 5000;
+ options.maxPrematureCrashes = 2;
+ options.stableStateTimePeriod = 1000000;
+ options.maxSlobrokDisconnectGracePeriod = 10000000;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+
+ timer.advanceTime(1000000); // Node has been in steady state down
+
+ ctrl.tick();
+
+ assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+
+ for (int j = 0; j <= options.maxPrematureCrashes; ++j) {
+ ctrl.tick();
+
+ tick(options.nodeStateRequestTimeoutMS + 1);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ tick(options.nodeStateRequestTimeoutMS + 1);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.0), "");
+
+ ctrl.tick();
+
+ tick(options.nodeStateRequestTimeoutMS + 1);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.1), "");
+
+ tick(1000);
+ }
+
+ assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ }
+
+ @Test
+ public void testClusterStateMinNodes() throws Exception {
+ startingTest("StateChangeTest::testClusterStateMinNodes");
+ // If node does this too many times, take it out of service
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 0);
+ options.maxInitProgressTime = 0;
+ options.minDistributorNodesUp = 6;
+ options.minStorageNodesUp = 8;
+ options.minRatioOfDistributorNodesUp = 0.0;
+ options.minRatioOfStorageNodesUp = 0.0;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 1), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 2), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 3), State.DOWN, "Connection error: Closed at other end");
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.STORAGE, 1), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 4), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:5 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 4), State.UP, "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 2), State.DOWN, "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d .2.s:d", ctrl.getSystemState().toString());
+ }
+
+ @Test
+ public void testClusterStateMinFactor() throws Exception {
+ startingTest("StateChangeTest::testClusterStateMinFactor");
+ // If node does this too many times, take it out of service
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.maxTransitionTime.put(NodeType.STORAGE, 0);
+ options.maxInitProgressTime = 0;
+ options.minDistributorNodesUp = 0;
+ options.minStorageNodesUp = 0;
+ options.minRatioOfDistributorNodesUp = 0.6;
+ options.minRatioOfStorageNodesUp = 0.8;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 1), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 2), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 3), State.DOWN, "Connection error: Closed at other end");
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), State.DOWN, "Connection error: Closed at other end");
+ communicator.setNodeState(new Node(NodeType.STORAGE, 1), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:4 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 4), State.DOWN, "Connection error: Closed at other end");
+
+ ctrl.tick();
+
+ assertEquals("version:5 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 4), State.UP, "");
+
+ ctrl.tick();
+
+ assertEquals("version:6 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 2), State.DOWN, "");
+
+ ctrl.tick();
+
+ assertEquals("version:7 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d .2.s:d", ctrl.getSystemState().toString());
+ }
+
+ /**
+ * Class for testing states of all nodes. Will fail in constructor with
+ * debug message on non-expected results.
+ */
+ abstract class StateMessageChecker {
+ StateMessageChecker(final List<DummyVdsNode> nodes) {
+ for (final DummyVdsNode node : nodes) {
+ final List<ClusterState> states = node.getSystemStatesReceived();
+ final StringBuilder debugString = new StringBuilder();
+ debugString.append("Node ").append(node).append("\n");
+ for (ClusterState state : states) {
+ debugString.append(state.toString()).append("\n");
+ }
+ assertEquals(debugString.toString(), expectedMessageCount(node), states.size());
+ }
+ }
+ abstract int expectedMessageCount(final DummyVdsNode node);
+ }
+
+ @Test
+ public void testNoSystemStateBeforeInitialTimePeriod() throws Exception {
+ startingTest("StateChangeTest::testNoSystemStateBeforeInitialTimePeriod()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.minTimeBeforeFirstSystemStateBroadcast = 3 * 60 * 1000;
+ setUpSystem(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), true);
+ // Leave one node down to avoid sending cluster state due to having seen all node states.
+ for (int i=0; i<nodes.size(); ++i) {
+ if (i != 3) {
+ nodes.get(i).connect();
+ }
+ }
+ setUpFleetController(true, options);
+
+ StateWaiter waiter = new StateWaiter(timer);
+ fleetController.addSystemStateListener(waiter);
+
+ // Ensure all nodes have been seen by fleetcontroller and that it has had enough time to possibly have sent a cluster state
+ waiter.waitForState("version:\\d+ distributor:10 (\\.\\d+\\.t:\\d+ )*storage:10 (\\.\\d+\\.t:\\d+ )*.1.s:d( \\.\\d+\\.t:\\d+)*", timeoutMS);
+ waitForCompleteCycle();
+ new StateMessageChecker(nodes) {
+ @Override int expectedMessageCount(final DummyVdsNode node) { return 0; }
+ };
+
+ // Pass time and see that the nodes get state
+ timer.advanceTime(3 * 60 * 1000);
+ waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:d", timeoutMS);
+
+ int version = waiter.getCurrentSystemState().getVersion();
+ fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 19, timeoutMS);
+
+ new StateMessageChecker(nodes) {
+ @Override int expectedMessageCount(final DummyVdsNode node) {
+ return node.getNode().equals(new Node(NodeType.STORAGE, 1)) ? 0 : 2;
+ }
+ };
+ assertEquals(version, waiter.getCurrentSystemState().getVersion());
+ }
+
+ @Test
+ public void testSystemStateSentWhenNodesReplied() throws Exception {
+ startingTest("StateChangeTest::testSystemStateSentWhenNodesReplied()");
+ final FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.minTimeBeforeFirstSystemStateBroadcast = 300 * 60 * 1000;
+
+ setUpSystem(true, options);
+
+ setUpVdsNodes(true, new DummyVdsNodeOptions(), true);
+
+ for (int i=0; i<nodes.size(); ++i) {
+ nodes.get(i).connect();
+ }
+ // Marking one node as 'initializing' improves testing of state later on.
+ nodes.get(3).setNodeState(State.INITIALIZING);
+
+ setUpFleetController(true, options);
+
+ final StateWaiter waiter = new StateWaiter(timer);
+
+ fleetController.addSystemStateListener(waiter);
+ waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:i .1.i:1.0", timeoutMS);
+ waitForCompleteCycle();
+
+ final int version = waiter.getCurrentSystemState().getVersion();
+ fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 20, timeoutMS);
+
+ // The last two versions of the cluster state should be seen (all nodes up,
+ // zero out timestate)
+ new StateMessageChecker(nodes) {
+ @Override int expectedMessageCount(final DummyVdsNode node) { return 2; }
+ };
+ }
+
+ @Test
+ public void testDontTagFailingSetSystemStateOk() throws Exception {
+ startingTest("StateChangeTest::testDontTagFailingSetSystemStateOk()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ StateWaiter waiter = new StateWaiter(timer);
+ fleetController.addSystemStateListener(waiter);
+
+ nodes.get(1).failSetSystemState(true);
+ int versionBeforeChange = nodes.get(1).getSystemStatesReceived().get(0).getVersion();
+ nodes.get(2).disconnect(); // cause a new state
+ waiter.waitForState("version:\\d+ distributor:10 .1.s:d storage:10", timeoutMS);
+ int versionAfterChange = waiter.getCurrentSystemState().getVersion();
+ assertTrue(versionAfterChange > versionBeforeChange);
+ fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(versionAfterChange, 18, timeoutMS);
+
+ // Assert that the failed node has not acknowledged the latest version.
+ // (The version may still be larger than versionBeforeChange if the fleet controller sends a
+ // "stable system" update without timestamps in the meantime
+ assertTrue(fleetController.getCluster().getNodeInfo(nodes.get(1).getNode()).getSystemStateVersionAcknowledged() < versionAfterChange);
+
+ // Ensure non-concurrent access to getNewestSystemStateVersionSent
+ synchronized(timer) {
+ int sentVersion = fleetController.getCluster().getNodeInfo(nodes.get(1).getNode()).getNewestSystemStateVersionSent();
+ assertTrue(sentVersion == -1 || sentVersion == versionAfterChange);
+ }
+ }
+
+ @Test
+ public void testAlteringDistributionSplitCount() throws Exception {
+ startingTest("StateChangeTest::testAlteringDistributionSplitCount");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ options.distributionBits = 17;
+
+ initialize(options);
+
+ timer.advanceTime(1000000); // Node has been in steady state up
+
+ ctrl.tick();
+
+ setMinUsedBitsForAllNodes(15);
+
+ ctrl.tick();
+
+ assertEquals("version:4 bits:15 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ tick(1000);
+
+ communicator.setNodeState(new Node(NodeType.STORAGE, 0), new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(13), "");
+
+ ctrl.tick();
+
+ assertEquals("version:5 bits:13 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ tick(1000);
+ setMinUsedBitsForAllNodes(16);
+ ctrl.tick();
+
+ // Don't increase dist bits until we've reached at least the wanted
+ // level, in order to avoid multiple full redistributions of data.
+ assertEquals("version:5 bits:13 distributor:10 storage:10", ctrl.getSystemState().toString());
+
+ tick(1000);
+ setMinUsedBitsForAllNodes(19);
+ ctrl.tick();
+
+ assertEquals("version:6 bits:17 distributor:10 storage:10", ctrl.getSystemState().toString());
+ }
+
+ private void setMinUsedBitsForAllNodes(int bits) throws Exception {
+ for (int i = 0; i < 10; ++i) {
+ communicator.setNodeState(new Node(NodeType.STORAGE, i), new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(bits), "");
+ }
+ }
+
+ @Test
+ public void testSetAllTimestampsAfterDowntime() throws Exception {
+ startingTest("StateChangeTest::testSetAllTimestampsAfterDowntime");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ StateWaiter waiter = new StateWaiter(timer);
+ fleetController.addSystemStateListener(waiter);
+
+ // Simulate netsplit. Take node down without node booting
+ assertEquals(true, nodes.get(0).isDistributor());
+ nodes.get(0).disconnectImmediately();
+ waiter.waitForState("version:\\d+ distributor:10 .0.s:d storage:10", timeoutMS);
+
+ // Add node back.
+ nodes.get(0).connect();
+ waitForStableSystem();
+
+ // At this time, node taken down should have cluster states with all starting timestamps set. Others node should not.
+ for (DummyVdsNode node : nodes) {
+ node.waitForSystemStateVersion(waiter.getCurrentSystemState().getVersion(), timeoutMS);
+ List<ClusterState> states = node.getSystemStatesReceived();
+ ClusterState lastState = states.get(0);
+ StringBuilder stateHistory = new StringBuilder();
+ for (ClusterState state : states) {
+ stateHistory.append(state.toString()).append("\n");
+ }
+
+ if (node.getNode().equals(new Node(NodeType.DISTRIBUTOR, 0))) {
+ for (ConfiguredNode i : options.nodes) {
+ Node nodeId = new Node(NodeType.STORAGE, i.index());
+ long ts = lastState.getNodeState(nodeId).getStartTimestamp();
+ assertTrue(nodeId + "\n" + stateHistory + "\nWas " + ts + " should be " + fleetController.getCluster().getNodeInfo(nodeId).getStartTimestamp(), ts > 0);
+ }
+ } else {
+ for (ConfiguredNode i : options.nodes) {
+ Node nodeId = new Node(NodeType.STORAGE, i.index());
+ assertTrue(nodeId.toString(), lastState.getNodeState(nodeId).getStartTimestamp() == 0);
+ }
+ }
+
+ for (ConfiguredNode i : options.nodes) {
+ Node nodeId = new Node(NodeType.DISTRIBUTOR, i.index());
+ assertTrue(nodeId.toString(), lastState.getNodeState(nodeId).getStartTimestamp() == 0);
+ }
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java
new file mode 100644
index 00000000000..cf3a47b1add
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java
@@ -0,0 +1,81 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.log.LogLevel;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+import java.net.InetAddress;
+import java.util.concurrent.TimeoutException;
+import java.util.logging.Logger;
+
+public class StateGatherTest extends FleetControllerTest {
+
+ public static Logger log = Logger.getLogger(StateGatherTest.class.getName());
+
+ public String getGetNodeStateReplyCounts(DummyVdsNode node) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("timedout ").append(node.timedOutStateReplies)
+ .append(", outdated ").append(node.outdatedStateReplies)
+ .append(", immediate ").append(node.immediateStateReplies)
+ .append(", setstate ").append(node.setNodeStateReplies)
+ .append(", pending ").append(node.getPendingNodeStateCount());
+ return sb.toString();
+ }
+
+ @Test
+ public void testAlwaysHavePendingGetNodeStateRequestTowardsNodes() throws Exception {
+ Logger.getLogger(NodeStateGatherer.class.getName()).setLevel(LogLevel.SPAM);
+ startingTest("StateGatherTest::testOverlappingGetNodeStateRequests");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.nodeStateRequestTimeoutMS = 10 * 60 * 1000;
+ // Force actual message timeout to be lower than request timeout.
+ options.nodeStateRequestTimeoutEarliestPercentage = 80;
+ options.nodeStateRequestTimeoutLatestPercentage = 80;
+ setUpFleetController(true, options);
+ String connectionSpecs[] = new String[1];
+ connectionSpecs[0] = "tcp/" + InetAddress.getLocalHost().getHostName() + ":" + slobrok.port();
+ DummyVdsNodeOptions dummyOptions = new DummyVdsNodeOptions();
+ DummyVdsNode dnode = new DummyVdsNode(timer, dummyOptions, connectionSpecs, this.options.clusterName, true, 0);
+ DummyVdsNode snode = new DummyVdsNode(timer, dummyOptions, connectionSpecs, this.options.clusterName, false, 0);
+ dnode.connect();
+ snode.connect();
+
+ waitUntilPendingGetNodeState(dnode, snode);
+
+ assertEquals("timedout 0, outdated 0, immediate 1, setstate 0, pending 1", getGetNodeStateReplyCounts(dnode));
+ assertEquals("timedout 0, outdated 0, immediate 1, setstate 0, pending 1", getGetNodeStateReplyCounts(snode));
+
+ waitForCompleteCycle();
+ timer.advanceTime(9 * 60 * 1000); // Requests should have timed out on nodes (8 min timeout).
+
+ waitUntilTimedOutGetNodeState(dnode, snode);
+ waitForCompleteCycle(); // Send new node state requests.
+ waitUntilPendingGetNodeState(dnode, snode);
+
+ assertEquals("timedout 1, outdated 0, immediate 1, setstate 0, pending 1", getGetNodeStateReplyCounts(dnode));
+ assertEquals("timedout 1, outdated 0, immediate 1, setstate 0, pending 1", getGetNodeStateReplyCounts(snode));
+ }
+
+ private void waitUntilTimedOutGetNodeState(DummyVdsNode dnode, DummyVdsNode snode) throws TimeoutException {
+ long timeout = System.currentTimeMillis() + timeoutMS;
+ synchronized (timer) {
+ while (dnode.timedOutStateReplies != 1 || snode.timedOutStateReplies != 1) {
+ if (System.currentTimeMillis() > timeout) {
+ throw new TimeoutException("Did not get to have one timed out within timeout of " + timeoutMS + " ms"
+ + ", " + getGetNodeStateReplyCounts(dnode) + ", " + getGetNodeStateReplyCounts(snode));
+ }
+ try{ timer.wait(1); } catch (InterruptedException e) {}
+ }
+ }
+ }
+
+ private void waitUntilPendingGetNodeState(DummyVdsNode dnode, DummyVdsNode snode) throws TimeoutException {
+ long timeout = System.currentTimeMillis() + timeoutMS;
+ while (dnode.getPendingNodeStateCount() != 1 || snode.getPendingNodeStateCount() != 1) {
+ if (System.currentTimeMillis() > timeout) throw new TimeoutException("Did not get to have one pending within timeout of " + timeoutMS + " ms");
+ try{ Thread.sleep(1); } catch (InterruptedException e) {}
+ }
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodeTest.java
new file mode 100644
index 00000000000..ddf0286b0fe
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatsForStorageNodeTest.java
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+/**
+ * @author hakon
+ */
+public class StatsForStorageNodeTest {
+ @Test
+ public void testStatsForStorage() {
+ Map<Integer, StorageNodeStats> statsMap = new HashMap<>();
+
+ LatencyStats putLatencyForA = new LatencyStats(1, 2);
+ StorageNodeStats nodeStatsForA = new StorageNodeStats(putLatencyForA);
+ statsMap.put(5, nodeStatsForA);
+
+ LatencyStats putLatencyForB = new LatencyStats(3, 4);
+ StorageNodeStats nodeStatsForB = new StorageNodeStats(putLatencyForB);
+ statsMap.put(6, nodeStatsForB);
+
+ StatsForStorageNodes stats = new StatsForStorageNodes(statsMap);
+
+ StorageNodeStats nodeStats = stats.getStatsForStorageNode(5);
+ assertNotNull(nodeStats);
+ assertEquals(1, nodeStatsForA.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(2, nodeStatsForA.getDistributorPutLatency().getCount());
+
+ nodeStats = stats.getStatsForStorageNode(6);
+ assertNotNull(nodeStats);
+ assertEquals(3, nodeStatsForB.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(4, nodeStatsForB.getDistributorPutLatency().getCount());
+
+ nodeStats = stats.getStatsForStorageNode(7);
+ assertNull(nodeStats);
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java
new file mode 100644
index 00000000000..b1ae39729ab
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java
@@ -0,0 +1,385 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.status.StatusHandler;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageResponse;
+import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+import com.yahoo.vespa.clustercontroller.utils.communication.http.HttpRequest;
+import com.yahoo.vespa.clustercontroller.utils.communication.http.HttpResult;
+import org.codehaus.jettison.json.JSONObject;
+
+import java.io.*;
+import java.net.Socket;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.logging.Logger;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+
+public class StatusPagesTest extends FleetControllerTest {
+
+ public static Logger log = Logger.getLogger(StatusPagesTest.class.getName());
+
+ private String doHttpGetRequest(String request, Date ifModifiedSince) throws IOException {
+ int statusPort = fleetController.getHttpPort();
+ Socket socket = new Socket("localhost", statusPort);
+
+ BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream()));
+ bw.write("GET " + request + " HTTP/1.1\r\n");
+ if (ifModifiedSince != null) {
+ DateFormat df = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z");
+ df.setTimeZone(TimeZone.getTimeZone("GMT"));
+ bw.write("If-Modified-Since: " + df.format(ifModifiedSince) + "\r\n");
+ }
+ bw.write("\r\n");
+ bw.flush();
+
+ InputStream stream = socket.getInputStream();
+ ByteArrayOutputStream output = new ByteArrayOutputStream();
+ try {
+ byte [] buf = new byte[4096];
+ while (true) {
+ int read = stream.read(buf);
+ if (read<=0) {
+ break;
+ }
+ output.write(buf, 0, read);
+ }
+ output.close();
+ return output.toString();
+ } finally {
+ stream.close();
+ bw.close();
+ }
+ }
+
+ private String doHttpGetRequest(String request) throws IOException {
+ return doHttpGetRequest(request, null);
+ }
+
+ @Test
+ public void testStatusThroughContainer() throws Exception {
+ startingTest("StatusPagesTest::testStatusThroughContainer()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10)));
+ final StatusHandler.ContainerStatusPageServer statusServer = new StatusHandler.ContainerStatusPageServer();
+ setUpFleetController(true, options, true, statusServer);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ //ThreadPoolExecutor executor = new ThreadPoolExecutor(10, 100, 100, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(1000));
+ //FleetControllerComponent fcComp = new FleetControllerComponent();
+ //fcComp.addFleetController("mycluster", fleetController, statusServer);
+ StatusHandler comp = new StatusHandler(new StatusHandler.ClusterStatusPageServerSet() {
+ @Override
+ public StatusHandler.ContainerStatusPageServer get(String cluster) {
+ return ("mycluster".equals(cluster) ? statusServer : null);
+ }
+
+ @Override
+ public Map<String, StatusHandler.ContainerStatusPageServer> getAll() {
+ Map<String, StatusHandler.ContainerStatusPageServer> map = new HashMap<>();
+ map.put("mycluster", statusServer);
+ return map;
+ }
+ });
+
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 200, result.getHttpReturnCode());
+ assertEquals("<title>clusters</title>\n<a href=\"./mycluster\">mycluster</a><br>\n", result.getContent().toString());
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1/");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 200, result.getHttpReturnCode());
+ assertEquals("<title>clusters</title>\n<a href=\"./mycluster\">mycluster</a><br>\n", result.getContent().toString());
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1/mycluster");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 200, result.getHttpReturnCode());
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "mycluster Cluster Controller 0 Status Page"));
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1/mycluster/");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 200, result.getHttpReturnCode());
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "mycluster Cluster Controller 0 Status Page"));
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "href=\"/clustercontroller-status/v1/mycluster/node=distributor.0\""));
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "href=\"/clustercontroller-status/v1/mycluster/node=storage.0\""));
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1/mycluster/node=storage.0");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 200, result.getHttpReturnCode());
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "Node status for storage.0"));
+ assertTrue(result.toString(true), result.getContent().toString().contains(
+ "href=\"/clustercontroller-status/v1/mycluster\""));
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v1/foo");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 404, result.getHttpReturnCode());
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/foobar/v1/mycluster/");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 404, result.getHttpReturnCode());
+ }
+ {
+ HttpRequest request = new HttpRequest().setPath("/clustercontroller-status/v2/");
+ HttpResult result = comp.handleRequest(request);
+ assertEquals(result.toString(true), 404, result.getHttpReturnCode());
+ }
+ //executor.shutdown();
+ }
+
+ @Test
+ public void testZooKeeperAddressSplitting() {
+ String rawAddress = "conc1.foo.yahoo.com:2181,conc2.foo.yahoo.com:2181,"
+ + "dp1.foo.yahoo.com:2181,dp2.foo.yahoo.com:2181,"
+ + "dp3.foo.yahoo.com:2181";
+ String result = "conc1.foo.yahoo.com:2181, conc2.foo.yahoo.com:2181, "
+ + "dp1.foo.yahoo.com:2181, dp2.foo.yahoo.com:2181, "
+ + "dp3.foo.yahoo.com:2181";
+ String split = FleetControllerOptions.splitZooKeeperAddress(rawAddress);
+ assertEquals(result, split);
+ }
+
+ @Test
+ public void testSimpleConnectionWithSomeContent() throws Exception {
+ // Set this to true temporary if you want to check status page from browser. Should be false in checked in code always.
+ boolean haltTestToViewStatusPage = false;
+ startingTest("StatusPagesTest::testSimpleConnectionWithSomeContent()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10)));
+ //options.minRatioOfStorageNodesUp = 0.99;
+ if (haltTestToViewStatusPage) {
+ options.httpPort = 19234;
+ }
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ nodes.get(2).disconnectBreakConnection();
+ nodes.get(5).disconnectAsShutdown();
+ nodes.get(7).disconnectSlobrok();
+
+ fleetController.getCluster().getNodeInfo(new Node(NodeType.STORAGE, 3)).setWantedState(new NodeState(NodeType.STORAGE, State.MAINTENANCE).setDescription("Test&<>special"));
+
+ String content = doHttpGetRequest("/");
+
+ assertTrue(content, content.contains("<html>"));
+ assertTrue(content, content.contains("</html>"));
+ assertTrue(content, content.contains("Current cluster state"));
+ assertTrue(content, content.contains("Cluster states"));
+ assertTrue(content, content.contains("Event log"));
+
+ if (haltTestToViewStatusPage) {
+ System.err.println(content);
+ try{
+ Thread.sleep(1000000);
+ } catch (InterruptedException e) {}
+ }
+ }
+
+ @Test
+ public void testNodePage() throws Exception {
+ startingTest("StatusPagesTest::testNodePage()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ String content = doHttpGetRequest("/node=storage.0");
+
+ assertTrue(content, content.contains("<html>"));
+ assertTrue(content, content.contains("</html>"));
+ assertTrue(content, content.contains("Node status for storage.0"));
+ assertTrue(content, content.contains("REPORTED"));
+ assertTrue(content, content.contains("Altered node state in cluster state from"));
+ //System.err.println(sb.toString());
+
+ }
+
+ @Test
+ public void testErrorResponseCode() throws Exception {
+ startingTest("StatusPagesTest::testNodePage()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10)));
+ setUpFleetController(true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ String content = doHttpGetRequest("/fraggle/rock");
+
+ assertTrue(content.contains("404 Not Found"));
+ //System.err.println(sb.toString());
+ }
+
+ private StatusPageServer.HttpRequest makeHttpRequest(String request) {
+ return new StatusPageServer.HttpRequest(request);
+ }
+
+ @Test
+ public void testHttpRequestParsing() {
+ {
+ StatusPageServer.HttpRequest request = makeHttpRequest("/") ;
+ assertEquals("/", request.getPath());
+ assertFalse(request.hasQueryParameters());
+ }
+ {
+ StatusPageServer.HttpRequest request = makeHttpRequest("/foo/bar");
+ assertEquals("/foo/bar", request.getPath());
+ assertFalse(request.hasQueryParameters());
+ }
+ {
+ StatusPageServer.HttpRequest request = makeHttpRequest("/foo/bar?baz=baff");
+ assertEquals("/foo/bar", request.getPath());
+ assertTrue(request.hasQueryParameters());
+ assertEquals("baff", request.getQueryParameter("baz"));
+ }
+ {
+ StatusPageServer.HttpRequest request = makeHttpRequest("/?baz=baff&blarg=blee");
+ assertEquals("/", request.getPath());
+ assertTrue(request.hasQueryParameters());
+ assertEquals("baff", request.getQueryParameter("baz"));
+ assertEquals("blee", request.getQueryParameter("blarg"));
+ }
+ {
+ StatusPageServer.HttpRequest request = makeHttpRequest("/node=storage.101?showlocal");
+ assertEquals("/node=storage.101", request.getPath());
+ assertTrue(request.hasQueryParameters());
+ assertTrue(request.hasQueryParameter("showlocal"));
+ assertNull(request.getQueryParameter("showlocal"));
+ }
+ }
+
+ private static class DummyRequestHandler implements StatusPageServer.RequestHandler {
+ private String returnData;
+ public DummyRequestHandler(String returnData) {
+ this.returnData = returnData;
+ }
+
+ @Override
+ public StatusPageResponse handle(StatusPageServer.HttpRequest request) {
+ StatusPageResponse response = new StatusPageResponse();
+ response.writeContent(returnData);
+ return response;
+ }
+ }
+
+ private String invokeHandler(StatusPageServer.RequestRouter router, String request) {
+ StatusPageServer.HttpRequest httpRequest = makeHttpRequest(request);
+ StatusPageServer.RequestHandler handler = router.resolveHandler(httpRequest);
+ if (handler == null) {
+ return null;
+ }
+ try {
+ return handler.handle(httpRequest).getOutputStream().toString("UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ return "<ERROR>";
+ }
+ }
+
+ @Test
+ public void testRequestRouting() {
+ StatusPageServer.PatternRequestRouter router = new StatusPageServer.PatternRequestRouter();
+ router.addHandler("^/alerts/red.*", new DummyRequestHandler("red alert!"));
+ router.addHandler("^/alerts.*", new DummyRequestHandler("beige alert"));
+ router.addHandler("^/$", new DummyRequestHandler("root"));
+ assertEquals("root", invokeHandler(router, "/"));
+ assertEquals("beige alert", invokeHandler(router, "/alerts"));
+ assertEquals("beige alert", invokeHandler(router, "/alerts?foo"));
+ assertEquals("red alert!", invokeHandler(router, "/alerts/red"));
+ assertEquals("red alert!", invokeHandler(router, "/alerts/red/blue"));
+ assertNull(invokeHandler(router, "/blarg"));
+ }
+
+ public String[] getResponseParts(String response) {
+ int offset = response.indexOf("\r\n\r\n");
+ if (offset == -1) {
+ throw new IllegalStateException("No HTTP header delimiter found");
+ }
+ return new String[] {
+ response.substring(0, offset + 2), // all header lines must have linebreaks
+ response.substring(offset + 4)
+ };
+ }
+
+ private String getHeaderValue(String header, String name) {
+ int offset = header.indexOf(name + ": ");
+ if (offset == -1) {
+ throw new IllegalStateException("No HTTP header found for " + name);
+ }
+ int end = header.indexOf("\r\n", offset);
+ if (end == -1) {
+ throw new IllegalStateException("No EOL found for " + name);
+ }
+ return header.substring(offset + name.length() + 2, end);
+ }
+
+ @Test
+ public void testStateServing() throws Exception {
+ startingTest("StatusPagesTest::testStateServing()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ setUpFleetController(true, options);
+ fleetController.updateOptions(options, 5);
+ waitForCompleteCycle();
+ {
+ String content = doHttpGetRequest("/state/v1/health");
+ String[] parts = getResponseParts(content);
+ String body = parts[1];
+ String expected =
+ "{\n" +
+ " \"status\" : {\n" +
+ " \"code\" : \"up\"\n" +
+ " },\n" +
+ " \"config\" : {\n" +
+ " \"component\" : {\n" +
+ " \"generation\" : 5\n" +
+ " }\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, body);
+ // Check that it actually parses
+ JSONObject o = new JSONObject(expected);
+ }
+ }
+
+ @Test
+ public void testClusterStateServing() throws Exception {
+ startingTest("StatusPagesTest::testClusterStateServing()");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ setUpFleetController(true, options);
+ fleetController.updateOptions(options, 5);
+ waitForCompleteCycle();
+ {
+ String content = doHttpGetRequest("/clusterstate");
+ String[] parts = getResponseParts(content);
+ String body = parts[1];
+ String expected = "version:2 cluster:d";
+ assertEquals(expected, body);
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainerTest.java
new file mode 100644
index 00000000000..e2832c5b6b9
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsContainerTest.java
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+/**
+ * @author hakon
+ */
+public class StorageNodeStatsContainerTest {
+ @Test
+ public void testStatsForStorage() {
+ StorageNodeStatsContainer statsContainer = new StorageNodeStatsContainer();
+ Map<Integer, StorageNodeStats> statsMap = new HashMap<>();
+
+ LatencyStats putLatencyForA = new LatencyStats(1, 2);
+ StorageNodeStats nodeStatsForA = new StorageNodeStats(putLatencyForA);
+ statsContainer.put(5, nodeStatsForA);
+
+ LatencyStats putLatencyForB = new LatencyStats(3, 4);
+ StorageNodeStats nodeStatsForB = new StorageNodeStats(putLatencyForB);
+ statsContainer.put(6, nodeStatsForB);
+
+ StorageNodeStats nodeStats = statsContainer.get(5);
+ assertNotNull(nodeStats);
+ assertEquals(1, nodeStatsForA.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(2, nodeStatsForA.getDistributorPutLatency().getCount());
+
+ nodeStats = statsContainer.get(6);
+ assertNotNull(nodeStats);
+ assertEquals(3, nodeStatsForB.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(4, nodeStatsForB.getDistributorPutLatency().getCount());
+
+ nodeStats = statsContainer.get(7);
+ assertNull(nodeStats);
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsTest.java
new file mode 100644
index 00000000000..b905cd32979
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StorageNodeStatsTest.java
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author hakon
+ */
+public class StorageNodeStatsTest {
+ @Test
+ public void testStorageNodeStats() {
+ LatencyStats putLatency = new LatencyStats(1, 2);
+ StorageNodeStats stats = new StorageNodeStats(putLatency);
+ assertEquals(1, stats.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(2, stats.getDistributorPutLatency().getCount());
+
+ LatencyStats putLatencyToAdd = new LatencyStats(3, 4);
+ StorageNodeStats statsToAdd = new StorageNodeStats(putLatencyToAdd);
+ stats.add(statsToAdd);
+ assertEquals(1 + 3, stats.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(2 + 4, stats.getDistributorPutLatency().getCount());
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java
new file mode 100644
index 00000000000..ab6185d2b56
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateGeneratorTest.java
@@ -0,0 +1,198 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener;
+import com.yahoo.vespa.clustercontroller.core.mocks.TestEventLog;
+import com.yahoo.vespa.clustercontroller.core.testutils.LogFormatter;
+import junit.framework.TestCase;
+
+import java.util.LinkedList;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.logging.Logger;
+
+public class SystemStateGeneratorTest extends TestCase {
+ private static final Logger log = Logger.getLogger(SystemStateGeneratorTest.class.getName());
+ class Config {
+ int nodeCount = 3;
+ int stableStateTime = 1000 * 60000;
+ int maxSlobrokDisconnectPeriod = 60000;
+ int maxPrematureCrashes = 3;
+ }
+ class TestSystemStateListener implements SystemStateListener {
+ LinkedList<ClusterState> states = new LinkedList<>();
+
+ @Override
+ public void handleNewSystemState(ClusterState state) {
+ states.add(state);
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("States(");
+ for (ClusterState state : states) sb.append('\n').append(state.toString());
+ sb.append(")");
+ return sb.toString();
+ }
+
+ }
+
+ class TestNodeStateOrHostInfoChangeHandler implements NodeStateOrHostInfoChangeHandler {
+
+ LinkedList<String> events = new LinkedList<>();
+
+ @Override
+ public void handleNewNodeState(NodeInfo node, NodeState newState) {
+ events.add(node + " - " + newState);
+ }
+
+ @Override
+ public void handleNewWantedNodeState(NodeInfo node, NodeState newState) {
+ events.add(node + " - " + newState);
+ }
+
+ @Override
+ public void handleUpdatedHostInfo(NodeInfo node, HostInfo newHostInfo) {
+ events.add(node + " - " + newHostInfo);
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("NodeChanges(");
+ for (String change : events) sb.append('\n').append(change);
+ sb.append(")");
+ return sb.toString();
+ }
+ }
+
+ private FakeTimer clock = new FakeTimer();
+ private TestEventLog eventLog = new TestEventLog();
+ private Set<ConfiguredNode> configuredNodes = new TreeSet<>();
+ private Config config;
+ private ContentCluster cluster;
+ private SystemStateGenerator generator;
+ private TestSystemStateListener systemStateListener;
+ private TestNodeStateOrHostInfoChangeHandler nodeStateUpdateListener;
+
+ public void setUp() {
+ LogFormatter.initializeLogging();
+ }
+
+ private void initialize(Config config) {
+ Distribution distribution = new Distribution(Distribution.getDefaultDistributionConfig(2, 100));
+ this.config = config;
+ for (int i=0; i<config.nodeCount; ++i) configuredNodes.add(new ConfiguredNode(i, false));
+ cluster = new ContentCluster("testcluster", configuredNodes, distribution, 0, 0.0);
+ generator = new SystemStateGenerator(clock, eventLog, null);
+ generator.setNodes(cluster.clusterInfo());
+ generator.setStableStateTimePeriod(config.stableStateTime);
+ generator.setMaxPrematureCrashes(config.maxPrematureCrashes);
+ generator.setMaxSlobrokDisconnectGracePeriod(config.maxSlobrokDisconnectPeriod);
+ generator.setMinNodesUp(1, 1, 0, 0);
+ systemStateListener = new TestSystemStateListener();
+ nodeStateUpdateListener = new TestNodeStateOrHostInfoChangeHandler();
+ }
+
+ private void assertNewClusterStateReceived() {
+ assertTrue(generator.notifyIfNewSystemState(systemStateListener));
+ assertTrue(systemStateListener.toString(), systemStateListener.states.size() == 1);
+ systemStateListener.states.clear();
+ }
+
+ private void startWithStableStateClusterWithNodesUp() {
+ for (NodeType type : NodeType.getTypes()) {
+ for (ConfiguredNode i : configuredNodes) {
+ NodeInfo nodeInfo = cluster.clusterInfo().setRpcAddress(new Node(type, i.index()), null);
+ nodeInfo.markRpcAddressLive();
+ generator.handleNewReportedNodeState(nodeInfo, new NodeState(type, State.UP), null);
+ nodeInfo.setReportedState(new NodeState(type, State.UP), clock.getCurrentTimeInMillis());
+ }
+ }
+ assertNewClusterStateReceived();
+ for (NodeType type : NodeType.getTypes()) {
+ for (ConfiguredNode i : configuredNodes) {
+ Node n = new Node(type, i.index());
+ assertEquals(State.UP, generator.getClusterState().getNodeState(n).getState());
+ }
+ }
+ clock.advanceTime(config.stableStateTime);
+ }
+
+ private void markNodeOutOfSlobrok(Node node) {
+ log.info("Marking " + node + " out of slobrok");
+ cluster.getNodeInfo(node).markRpcAddressOutdated(clock);
+ generator.handleMissingNode(cluster.getNodeInfo(node), nodeStateUpdateListener);
+ assertTrue(nodeStateUpdateListener.toString(), nodeStateUpdateListener.events.isEmpty());
+ nodeStateUpdateListener.events.clear();
+ assertTrue(eventLog.toString(), eventLog.toString().contains("Node is no longer in slobrok"));
+ eventLog.clear();
+ }
+
+ private void markNodeBackIntoSlobrok(Node node, State state) {
+ log.info("Marking " + node + " back in slobrok");
+ cluster.getNodeInfo(node).markRpcAddressLive();
+ generator.handleReturnedRpcAddress(cluster.getNodeInfo(node));
+ assertEquals(0, nodeStateUpdateListener.events.size());
+ assertEquals(0, systemStateListener.states.size());
+ generator.handleNewReportedNodeState(cluster.getNodeInfo(node), new NodeState(node.getType(), state), nodeStateUpdateListener);
+ cluster.getNodeInfo(node).setReportedState(new NodeState(node.getType(), state), clock.getCurrentTimeInMillis());
+ assertEquals(0, nodeStateUpdateListener.events.size());
+ assertEquals(0, systemStateListener.states.size());
+ }
+
+ private void verifyClusterStateChanged(Node node, State state) {
+ log.info("Verifying cluster state has been updated for " + node + " to " + state);
+ assertTrue(generator.notifyIfNewSystemState(systemStateListener));
+ assertEquals(1, systemStateListener.states.size());
+ assertEquals(state, systemStateListener.states.get(0).getNodeState(node).getState());
+ systemStateListener.states.clear();
+ assertEquals(state, generator.getClusterState().getNodeState(node).getState());
+ }
+
+ private void verifyNodeStateAfterTimerWatch(Node node, State state) {
+ log.info("Verifying state of node after timer watch.");
+ generator.watchTimers(cluster, nodeStateUpdateListener);
+ assertEquals(0, nodeStateUpdateListener.events.size());
+ verifyClusterStateChanged(node, state);
+ }
+
+ private void verifyPrematureCrashCountCleared(Node node) {
+ assertTrue(generator.watchTimers(cluster, nodeStateUpdateListener));
+ assertEquals(0, nodeStateUpdateListener.events.size());
+ assertEquals(0, cluster.getNodeInfo(node).getPrematureCrashCount());
+ }
+
+ public void testUnstableNodeInSlobrok() throws Exception {
+ initialize(new Config());
+ startWithStableStateClusterWithNodesUp();
+ Node node = new Node(NodeType.STORAGE, 0);
+ for (int j=0; j<3; ++j) {
+ log.info("Iteration " + j);
+ assertEquals(0, cluster.getNodeInfo(node).getPrematureCrashCount());
+ assertEquals(State.UP, cluster.getNodeInfo(node).getWantedState().getState());
+ assertEquals(State.UP, generator.getClusterState().getNodeState(node).getState());
+ for (int k=0; k<config.maxPrematureCrashes; ++k) {
+ log.info("Premature iteration " + k);
+ markNodeOutOfSlobrok(node);
+
+ log.info("Passing max disconnect time period. Watching timers");
+ clock.advanceTime(config.maxSlobrokDisconnectPeriod);
+
+ verifyNodeStateAfterTimerWatch(node, State.MAINTENANCE);
+ cluster.getNodeInfo(node).setReportedState(new NodeState(node.getType(), State.DOWN), clock.getCurrentTimeInMillis());
+
+ assertEquals(k, cluster.getNodeInfo(node).getPrematureCrashCount());
+ markNodeBackIntoSlobrok(node, State.UP);
+ verifyClusterStateChanged(node, State.UP);
+ }
+ log.info("Passing steady state to get premature crash count flag cleared");
+ clock.advanceTime(config.stableStateTime);
+ verifyPrematureCrashCountCleared(node);
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java
new file mode 100644
index 00000000000..8065d701f6b
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.jrt.*;
+import com.yahoo.jrt.StringValue;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.State;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class WantedStateTest extends FleetControllerTest {
+
+ private Supervisor supervisor;
+
+ @Before
+ public void setUp() {
+ supervisor = new Supervisor(new Transport());
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ supervisor = null;
+ }
+ super.tearDown();
+ }
+
+ public void setWantedState(DummyVdsNode node, State state, String reason) {
+ NodeState ns = new NodeState(node.getType(), state);
+ if (reason != null) ns.setDescription(reason);
+ Target connection = supervisor.connect(new Spec(fleetController.getRpcPort()));
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue(node.getSlobrokName()));
+ req.parameters().add(new StringValue(ns.serialize()));
+ connection.invokeSync(req, timeoutS);
+ if (req.isError()) {
+ assertTrue("Failed to invoke setNodeState(): " + req.errorCode() + ": " + req.errorMessage(), false);
+ }
+ if (!req.checkReturnTypes("s")) {
+ assertTrue("Failed to invoke setNodeState(): Invalid return types.", false);
+ }
+ }
+
+ @Test
+ public void testSettingStorageNodeMaintenanceAndBack() throws Exception {
+ startingTest("WantedStateTest::testSettingStorageNodeMaintenanceAndBack()");
+ setUpFleetController(true, new FleetControllerOptions("mycluster"));
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ setWantedState(nodes.get(1), State.MAINTENANCE, null);
+ waitForState("version:\\d+ distributor:10 storage:10 .0.s:m");
+
+ setWantedState(nodes.get(1), State.UP, null);
+ waitForState("version:\\d+ distributor:10 storage:10");
+ }
+
+ @Test
+ public void testOverridingWantedStateOtherReason() throws Exception {
+ startingTest("WantedStateTest::testOverridingWantedStateOtherReason()");
+ setUpFleetController(true, new FleetControllerOptions("mycluster"));
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ waitForStableSystem();
+
+ setWantedState(nodes.get(1), State.MAINTENANCE, "Foo");
+ waitForState("version:\\d+ distributor:10 storage:10 .0.s:m");
+ assertEquals("Foo", fleetController.getWantedNodeState(nodes.get(1).getNode()).getDescription());
+
+ setWantedState(nodes.get(1), State.MAINTENANCE, "Bar");
+ waitForCompleteCycle();
+ assertEquals("Bar", fleetController.getWantedNodeState(nodes.get(1).getNode()).getDescription());
+
+ setWantedState(nodes.get(1), State.UP, null);
+ waitForState("version:\\d+ distributor:10 storage:10");
+ }
+
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperStressTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperStressTest.java
new file mode 100644
index 00000000000..bc317b78ff1
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperStressTest.java
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabase;
+
+import java.util.Map;
+
+public class ZooKeeperStressTest extends junit.framework.TestCase {
+ private Object lock = new Object();
+ private int waitTime = 0;
+
+ class LoadGiver extends Thread {
+ ZooKeeperDatabase db;
+ public int count = 0;
+ public int errors = 0;
+ public int index;
+ public boolean stopNow = false;
+
+ LoadGiver(ZooKeeperDatabase db, int index) {
+ this.db = db;
+ this.index = index;
+ }
+
+ public void doStop() {
+ stopNow = true;
+ }
+
+ public void run() {
+ try{
+ while (!this.isInterrupted() && !stopNow) {
+ // Needs to take lock for each operation. Store new mastervote can not run at the same time as
+ // another store new master vote as they kill the ephemeral node
+ synchronized (lock) {
+ if (db.isClosed()) { System.err.println(this + " Session broke"); break; }
+ ++count;
+ if (db.retrieveLatestSystemStateVersion() == null) {
+ System.err.println("retrieveLatestSystemStateVersion() failed");
+ ++errors;
+ }
+ }
+ Map<Node, NodeState> wantedStates;
+ synchronized (lock) {
+ if (db.isClosed()) { System.err.println(this + " Session broke"); break; }
+ ++count;
+ wantedStates = db.retrieveWantedStates();
+ if (wantedStates == null) {
+ System.err.println("retrieveWantedStates() failed");
+ ++errors;
+ }
+ }
+ synchronized (lock) {
+ if (db.isClosed()) { System.err.println(this + " Session broke"); break; }
+ ++count;
+ if (!db.storeLatestSystemStateVersion(5)) {
+ System.err.println("storeLastestSystemStateVersion() failed");
+ ++errors;
+ }
+ }
+ synchronized (lock) {
+ if (db.isClosed()) { System.err.println(this + " Session broke"); break; }
+ ++count;
+ if (!db.storeMasterVote(0)) {
+ System.err.println("storeMasterVote() failed");
+ ++errors;
+ }
+ }
+ synchronized (lock) {
+ if (db.isClosed()) { System.err.println(this + " Session broke"); break; }
+ if (wantedStates != null) {
+ ++count;
+ if (!db.storeWantedStates(wantedStates)) {
+ System.err.println("storeWantedState() failed");
+ ++errors;
+ }
+ }
+ }
+ try{ Thread.sleep(waitTime); } catch (Exception e) {}
+ }
+ } catch (InterruptedException e) {}
+ }
+
+ public String toString() {
+ return "LoadGiver(" + index + ": count " + count + ", errors " + errors + ")";
+ }
+ }
+
+ public void testNothing() throws Exception {
+ // Stupid junit fails if there's testclass without tests
+ }
+
+ public void testZooKeeperStressed() throws Exception {
+ // Disabled for now.: Unstable
+ /*
+ ZooKeeperTestServer zooKeeperServer = new ZooKeeperTestServer();
+ Database.DatabaseListener zksl = new Database.DatabaseListener() {
+ public void handleZooKeeperSessionDown() {
+ assertFalse("We lost session to ZooKeeper. Shouldn't happen", true);
+ }
+
+ public void handleMasterData(Map<Integer, Integer> data) {
+ }
+ };
+ VdsCluster cluster = new VdsCluster("mycluster", 10, 10, true);
+ int timeout = 30000;
+ ZooKeeperDatabase db = new ZooKeeperDatabase(cluster, 0, zooKeeperServer.getAddress(), timeout, zksl);
+
+ Collection<LoadGiver> loadGivers = new ArrayList();
+ long time = System.currentTimeMillis();
+ for (int i = 0; i<10; ++i) {
+ loadGivers.add(new LoadGiver(db, i));
+ }
+ for (LoadGiver lg : loadGivers) {
+ lg.start();
+ }
+ for (int i = 0; i<30000; i += 100) {
+ Thread.sleep(100);
+ boolean failed = false;
+ for (LoadGiver lg : loadGivers) {
+ if (lg.errors > 0) {
+ failed = true;
+ }
+ }
+ if (failed) i += 5000;
+ }
+ int throughput = 0;
+ int errors = 0;
+ for (LoadGiver lg : loadGivers) {
+ assertTrue("Error check prior to attempting to stop: " + lg.toString(), lg.errors == 0);
+ }
+ for (LoadGiver lg : loadGivers) {
+ lg.doStop();
+ throughput += lg.count;
+ errors += lg.errors;
+ }
+ time = System.currentTimeMillis() - time;
+ Double timesecs = new Double(time / 1000.0);
+ if (timesecs > 0.001) {
+ System.err.println("Throughput is " + (throughput / timesecs) + "msgs/sec, " + errors + " errors, total messages sent: " + throughput + ", waittime = " + waitTime);
+ } else {
+ System.err.println("too small time period " + time + " to calculate throughput");
+ }
+ //try{ Thread.sleep(5000); } catch (Exception e) {}
+ for (LoadGiver lg : loadGivers) {
+ lg.join();
+ }
+ for (LoadGiver lg : loadGivers) {
+ System.err.println(lg);
+ }
+ // Disabling test. This fails occasionally for some reason.
+ for (LoadGiver lg : loadGivers) {
+ // assertTrue("Error check after having stopped: " + lg.toString(), lg.errors == 0);
+ }
+ */
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperTestServer.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperTestServer.java
new file mode 100644
index 00000000000..a5191df5f73
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ZooKeeperTestServer.java
@@ -0,0 +1,86 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import org.apache.zookeeper.server.ZooKeeperServer;
+import org.apache.zookeeper.server.NIOServerCnxnFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+/**
+ * This class sets up a zookeeper server, such that we can test fleetcontroller zookeeper parts without stubbing in the client.
+ */
+public class ZooKeeperTestServer {
+ private File zooKeeperDir;
+ private ZooKeeperServer server;
+ private static final int tickTime = 100;
+ private NIOServerCnxnFactory factory;
+ private static final String DIR_PREFIX = "test_fltctrl_zk";
+ private static final String DIR_POSTFIX = "sdir";
+
+ public ZooKeeperTestServer() throws IOException {
+ this(0);
+ }
+
+ private ZooKeeperTestServer(int port) throws IOException {
+ zooKeeperDir = getTempDir();
+ delete(zooKeeperDir);
+ if (!zooKeeperDir.mkdir()) {
+ throw new IllegalStateException("Failed to create directory " + zooKeeperDir);
+ }
+ zooKeeperDir.deleteOnExit();
+ server = new ZooKeeperServer(zooKeeperDir, zooKeeperDir, tickTime);
+ final int maxcc = 10000; // max number of connections from the same client
+ factory = new NIOServerCnxnFactory();
+ factory.configure(new InetSocketAddress(port), maxcc); // Use any port
+ try{
+ factory.startup(server);
+ } catch (InterruptedException e) {
+ throw (RuntimeException) new IllegalStateException("Interrupted during test startup: ").initCause(e);
+ }
+ }
+
+ public static ZooKeeperTestServer createWithFixedPort(int port) throws IOException {
+ return new ZooKeeperTestServer(port);
+ }
+
+ public int getPort() {
+ return factory.getLocalPort();
+ }
+
+ public String getAddress() {
+ return factory.getLocalAddress().getHostName() + ":" + getPort();
+ }
+
+ public void shutdown(boolean cleanupZooKeeperDir) {
+ server.shutdown();
+
+ if (cleanupZooKeeperDir) {
+ delete(zooKeeperDir);
+ }
+
+ factory.shutdown();
+ }
+
+ public void delete(File f) {
+ if (f.isDirectory()) {
+ for (File file : f.listFiles()) {
+ delete(file);
+ }
+ }
+ f.delete();
+ }
+
+ private static File getTempDir() throws IOException {
+ // The pom file sets java.io.tmpdir to ${project.build.directory}. This doesn't happen within (e.g.) IntelliJ, but happens
+ // on Screwdriver (tm). So if we're running tests on Screwdriver (tm), put the log in 'surefire-reports' instead so the
+ // user can find them along with the other test reports.
+ final File surefireReportsDir = new File(System.getProperty("java.io.tmpdir") + File.separator + "surefire-reports");
+ if (surefireReportsDir.isDirectory()) {
+ return File.createTempFile(DIR_PREFIX, DIR_POSTFIX, surefireReportsDir);
+ }
+
+ return File.createTempFile(DIR_PREFIX, DIR_POSTFIX);
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java
new file mode 100644
index 00000000000..d24b45817e0
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java
@@ -0,0 +1,107 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.hostinfo;
+
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+
+public class HostInfoTest {
+
+ private static String readDataFile(String filename) throws IOException {
+ String directory = "../protocols/getnodestate/";
+ Path path = Paths.get(directory + filename);
+ byte[] encoded;
+ encoded = Files.readAllBytes(path);
+ return new String(encoded, StandardCharsets.UTF_8);
+ }
+
+ @Test
+ public void testEmptyJson() throws IOException {
+ HostInfo hostInfo = HostInfo.createHostInfo("{}");
+ assertThat(hostInfo.getVtag().getVersionOrNull(), is(nullValue()));
+ assertThat(hostInfo.getDistributor().getStorageNodes().size(), is(0));
+ assertThat(hostInfo.getMetrics().getValues().size(), is(0));
+ assertThat(hostInfo.getClusterStateVersionOrNull(), is(nullValue()));
+ }
+
+ @Test
+ public void testExtendedJson() throws IOException {
+ HostInfo hostInfo = HostInfo.createHostInfo(readDataFile("host_info.json"));
+ assertThat(hostInfo.getVtag().getVersionOrNull(), is("5.32.76"));
+ }
+
+ @Test
+ public void testFullSet() throws IOException {
+ HostInfo hostInfo = HostInfo.createHostInfo(readDataFile("host_info.json"));
+ List<StorageNode> storageNodeList = hostInfo.getDistributor().getStorageNodes();
+ assertThat(storageNodeList.size(), is(2));
+ assertThat(storageNodeList.get(0).getIndex(), is(0));
+ assertThat(storageNodeList.get(0).getOpsLatenciesOrNull().getPut().getCount(), is(16L));
+ assertThat(storageNodeList.get(1).getOpsLatenciesOrNull().getPut().getCount(), is(18L));
+ assertThat(storageNodeList.get(0).getOpsLatenciesOrNull().getPut().getLatencyMsSum(), is(15L));
+ List<Metrics.Metric> metrics = hostInfo.getMetrics().getValues();
+ assertThat(metrics.size(), is(2));
+ Metrics.Value value = metrics.get(0).getValue();
+ assertThat(value.getLast(), is(5095L));
+ assertThat(metrics.get(0).getName(), equalTo("vds.datastored.alldisks.buckets"));
+ assertThat(hostInfo.getClusterStateVersionOrNull(), is(123));
+ }
+
+ @Test
+ public void testSpeed() throws Exception {
+ String json = readDataFile("slow_host_info.json");
+
+ long start = 0;
+ for (int x = 0; x < 100; x++) {
+ if (x == 90) {
+ start = System.currentTimeMillis();
+ }
+ HostInfo hostInfo = HostInfo.createHostInfo(json);
+ // Check a value so not all code is removed by optimizer.
+ if (hostInfo.getMetrics().getValues().size() == -1) return;
+ }
+ long end = System.currentTimeMillis();
+ System.out.println("Should take about 1.5 ms on fast machine, actually " + (end - start) / 10. + " ms.");
+ }
+
+ @Test
+ public void testSharedFile() throws Exception {
+ String json = readDataFile("distributor.json");
+ HostInfo hostInfo = HostInfo.createHostInfo(json);
+
+ List<StorageNode> storageNodeList = hostInfo.getDistributor().getStorageNodes();
+ assertThat(storageNodeList.size(), is(2));
+ Map<Integer, StorageNode> storageNodeByIndex = new TreeMap<>();
+ for (StorageNode node : storageNodeList) {
+ Integer index = node.getIndex();
+ assertFalse(storageNodeByIndex.containsKey(index));
+ storageNodeByIndex.put(index, node);
+ }
+
+ assertTrue(storageNodeByIndex.containsKey(0));
+ assertThat(storageNodeByIndex.get(0).getIndex(), is(0));
+ assertThat(storageNodeByIndex.get(0).getMinCurrentReplicationFactorOrNull(), is(2));
+ assertNotNull(storageNodeByIndex.get(0).getOpsLatenciesOrNull());
+ assertThat(storageNodeByIndex.get(0).getOpsLatenciesOrNull().getPut().getLatencyMsSum(), is(10000L));
+ assertThat(storageNodeByIndex.get(0).getOpsLatenciesOrNull().getPut().getCount(), is(3L));
+
+ assertTrue(storageNodeByIndex.containsKey(5));
+ assertThat(storageNodeByIndex.get(5).getIndex(), is(5));
+ assertThat(storageNodeByIndex.get(5).getMinCurrentReplicationFactorOrNull(), is(9));
+ assertNotNull(storageNodeByIndex.get(5).getOpsLatenciesOrNull());
+ assertThat(storageNodeByIndex.get(5).getOpsLatenciesOrNull().getPut().getLatencyMsSum(), is(25000L));
+ assertThat(storageNodeByIndex.get(5).getOpsLatenciesOrNull().getPut().getCount(), is(7L));
+ }
+} \ No newline at end of file
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridgeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridgeTest.java
new file mode 100644
index 00000000000..9d23031cd55
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNodeStatsBridgeTest.java
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.hostinfo;
+
+import com.yahoo.vespa.clustercontroller.core.NodeMergeStats;
+import com.yahoo.vespa.clustercontroller.core.StorageMergeStats;
+import com.yahoo.vespa.clustercontroller.core.StorageNodeStats;
+import com.yahoo.vespa.clustercontroller.core.StorageNodeStatsContainer;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+/**
+ * @author hakon
+ */
+public class StorageNodeStatsBridgeTest {
+
+ private static String getJsonString() throws IOException {
+ Path path = Paths.get("../protocols/getnodestate/host_info.json");
+ byte[] encoded;
+ encoded = Files.readAllBytes(path);
+ return new String(encoded, StandardCharsets.UTF_8);
+ }
+
+ @Test
+ public void testStorageNodeStatsContainer() throws IOException {
+ String data = getJsonString();
+ HostInfo hostInfo = HostInfo.createHostInfo(data);
+ StorageNodeStatsContainer container = StorageNodeStatsBridge.traverseHostInfo(hostInfo);
+ assertEquals(2, container.size());
+
+ StorageNodeStats node0 = container.get(0);
+ assertNotNull(node0);
+ assertEquals(15, node0.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(16, node0.getDistributorPutLatency().getCount());
+
+ StorageNodeStats node1 = container.get(1);
+ assertNotNull(node1);
+ assertEquals(17, node1.getDistributorPutLatency().getLatencyMsSum());
+ assertEquals(18, node1.getDistributorPutLatency().getCount());
+ }
+
+ @Test
+ public void testStorageMergeStats() throws IOException {
+ String data = getJsonString();
+ HostInfo hostInfo = HostInfo.createHostInfo(data);
+
+ StorageMergeStats storageMergeStats = StorageNodeStatsBridge.generate(hostInfo.getDistributor());
+ int size = 0;
+ for (NodeMergeStats mergeStats : storageMergeStats) {
+ assertThat(mergeStats.getCopyingIn().getBuckets(), is(2L));
+ assertThat(mergeStats.getCopyingOut().getBuckets(), is(4L));
+ assertThat(mergeStats.getSyncing().getBuckets(), is(1L));
+ assertThat(mergeStats.getMovingOut().getBuckets(), is(3L));
+ size++;
+ }
+ assertThat(size, is(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/mocks/TestEventLog.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/mocks/TestEventLog.java
new file mode 100644
index 00000000000..646421e93ae
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/mocks/TestEventLog.java
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.mocks;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vespa.clustercontroller.core.Event;
+import com.yahoo.vespa.clustercontroller.core.EventLogInterface;
+import com.yahoo.vespa.clustercontroller.core.NodeEvent;
+
+import java.util.logging.Level;
+
+public class TestEventLog implements EventLogInterface {
+ private StringBuilder events = new StringBuilder();
+ private int eventCount = 0;
+
+ public void clear() { events = new StringBuilder(); eventCount = 0; }
+ public String toString() { return events.toString(); }
+ public int getEventCount() { return eventCount; }
+
+ @Override
+ public void add(Event e) {
+ events.append("add(" + e.getDescription() + ")\n");
+ ++eventCount;
+ }
+
+ @Override
+ public void add(Event e, boolean logInfo) {
+ events.append("add(" + e + ", log ? " + logInfo + ")\n");
+ ++eventCount;
+ }
+
+ @Override
+ public void addNodeOnlyEvent(NodeEvent e, Level level) {
+ events.append("add(" + e + ", " + level + ")\n");
+ ++eventCount;
+ }
+
+ @Override
+ public int getNodeEventsSince(Node n, long time) {
+ throw new IllegalStateException("Should never be called.");
+ }
+
+ @Override
+ public long getRecentTimePeriod() {
+ throw new IllegalStateException("Should never be called.");
+ }
+
+ @Override
+ public void writeHtmlState(StringBuilder sb, Node node) {
+ throw new IllegalStateException("Should never be called.");
+ }
+
+ @Override
+ public void setMaxSize(int size, int nodesize) {
+ throw new IllegalStateException("Should never be called.");
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java
new file mode 100644
index 00000000000..a62b0cd9a7b
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java
@@ -0,0 +1,85 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vespa.clustercontroller.core.*;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener;
+import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler;
+
+public class ClusterControllerMock implements RemoteClusterControllerTaskScheduler {
+ public RemoteClusterControllerTask.Context context = new RemoteClusterControllerTask.Context();
+
+ public int fleetControllerIndex;
+ public Integer fleetControllerMaster;
+ public StringBuilder events = new StringBuilder();
+
+ public ClusterControllerMock(ContentCluster cluster, ClusterState state,
+ int fcIndex, Integer fcMaster) {
+ this.fleetControllerIndex = fcIndex;
+ this.fleetControllerMaster = fcMaster;
+ context.cluster = cluster;
+ context.currentState = state;
+ context.masterInfo = new MasterInterface() {
+ @Override
+ public boolean isMaster() {
+ return (fleetControllerMaster != null &&
+ fleetControllerMaster == fleetControllerIndex);
+ }
+
+ @Override
+ public Integer getMaster() {
+ return fleetControllerMaster;
+ }
+ };
+ context.nodeStateOrHostInfoChangeHandler = new NodeStateOrHostInfoChangeHandler() {
+
+ @Override
+ public void handleNewNodeState(NodeInfo currentInfo, NodeState newState) {
+ events.append("newNodeState(").append(currentInfo.getNode()).append(": ").append(newState).append("\n");
+ }
+
+ @Override
+ public void handleNewWantedNodeState(NodeInfo node, NodeState newState) {
+ events.append("newWantedNodeState(").append(node.getNode()).append(": ").append(newState).append("\n");
+ }
+
+ @Override
+ public void handleUpdatedHostInfo(NodeInfo node, HostInfo newHostInfo) {
+ events.append("updatedHostInfo(").append(node.getNode()).append(": ")
+ .append(newHostInfo).append(")\n");
+ }
+
+ };
+ context.nodeAddedOrRemovedListener = new NodeAddedOrRemovedListener() {
+
+ @Override
+ public void handleNewNode(NodeInfo node) {
+ events.append("newNode(").append(node.getNode()).append(")\n");
+ }
+
+ @Override
+ public void handleMissingNode(NodeInfo node) {
+ events.append("newMissingNode(").append(node.getNode()).append("\n");
+ }
+
+ @Override
+ public void handleNewRpcAddress(NodeInfo node) {
+ events.append("newRpcAddress(").append(node.getNode()).append("\n");
+ }
+
+ @Override
+ public void handleReturnedRpcAddress(NodeInfo node) {
+ events.append("returnedRpcAddress(").append(node.getNode()).append(")\n");
+ }
+
+ };
+ }
+
+ @Override
+ public void schedule(RemoteClusterControllerTask task) {
+ task.doRemoteFleetControllerTask(context);
+ task.notifyCompleted();
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterListTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterListTest.java
new file mode 100644
index 00000000000..4e26585177f
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterListTest.java
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+public class ClusterListTest extends StateRestApiTest {
+
+ @Test
+ public void testClusterList() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("", 0));
+ String expected =
+ "{\"cluster\": {\n" +
+ " \"books\": {\"link\": \"\\/cluster\\/v2\\/books\"},\n" +
+ " \"music\": {\"link\": \"\\/cluster\\/v2\\/music\"}\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveClusterList() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("", 1));
+ String expected =
+ "{\"cluster\": {\n" +
+ " \"books\": {\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"service\": {\n" +
+ " \"storage\": {\"link\": \"\\/cluster\\/v2\\/books\\/storage\"},\n" +
+ " \"distributor\": {\"link\": \"\\/cluster\\/v2\\/books\\/distributor\"}\n" +
+ " }\n" +
+ " },\n" +
+ " \"music\": {\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"service\": {\n" +
+ " \"storage\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\"},\n" +
+ " \"distributor\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\"}\n" +
+ " }\n" +
+ " }\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java
new file mode 100644
index 00000000000..8fbee2c5952
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class ClusterTest extends StateRestApiTest {
+
+ @Test
+ public void testCluster() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music", 0));
+ String expected =
+ "{\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"service\": {\n" +
+ " \"storage\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\"},\n" +
+ " \"distributor\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\"}\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveCluster() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music", 1));
+ String expected =
+ "{\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"service\": {\n" +
+ " \"storage\": {\"node\": {\n" +
+ " \"1\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/1\"},\n" +
+ " \"2\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/2\"},\n" +
+ " \"3\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/3\"},\n" +
+ " \"5\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/5\"},\n" +
+ " \"7\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/7\"}\n" +
+ " }},\n" +
+ " \"distributor\": {\"node\": {\n" +
+ " \"1\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/1\"},\n" +
+ " \"2\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/2\"},\n" +
+ " \"3\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/3\"},\n" +
+ " \"5\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/5\"},\n" +
+ " \"7\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/7\"}\n" +
+ " }}\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java
new file mode 100644
index 00000000000..10d757901e2
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java
@@ -0,0 +1,200 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import org.codehaus.jettison.json.JSONObject;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class NodeTest extends StateRestApiTest {
+
+ @Test
+ public void testDistributor() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor/1", 0));
+ String expected =
+ "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testStorage() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/1", 0));
+ String expected =
+ "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " },\n" +
+ " \"metrics\": {\n" +
+ // Why 24 and 28? There are 4 distributor nodes seen in slobrok (see StateRestApiTest).
+ // Each gets a host info with distributor-put-latency-ms-sum 6 and
+ // distributor-put-latency-count 7 (see StateRestApiTest.getHostInfo()).
+ // Therefore, in aggregate, 4*6 is 24, and 4*7 is 28.
+ " \"distributor-put-latency-ms-sum\": 24,\n" +
+ " \"distributor-put-latency-count\": 28\n" +
+ " },\n" +
+ " \"partition\": {\n" +
+ " \"0\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/1\\/0\"},\n" +
+ " \"1\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/1\\/1\"}\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveNode() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/1", 1));
+ String expected =
+ "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " },\n" +
+ " \"metrics\": {\n" +
+ // Why 24 and 28? There are 4 distributor nodes seen in slobrok (see StateRestApiTest).
+ // Each gets a host info with distributor-put-latency-ms-sum 6 and
+ // distributor-put-latency-count 7 (see StateRestApiTest.getHostInfo()).
+ // Therefore, in aggregate, 4*6 is 24, and 4*7 is 28.
+ " \"distributor-put-latency-ms-sum\": 24,\n" +
+ " \"distributor-put-latency-count\": 28\n" +
+ " },\n" +
+ " \"partition\": {\n" +
+ " \"0\": {\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"metrics\": {\n" +
+ " \"bucket-count\": 1,\n" +
+ " \"unique-document-count\": 2,\n" +
+ " \"unique-document-total-size\": 3\n" +
+ " }\n" +
+ " },\n" +
+ " \"1\": {\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"metrics\": {\n" +
+ " \"bucket-count\": 1,\n" +
+ " \"unique-document-count\": 2,\n" +
+ " \"unique-document-total-size\": 3\n" +
+ " }\n" +
+ " }\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testNodeNotSeenInSlobrok() throws Exception {
+ setUp(true);
+ ContentCluster old = music.context.cluster;
+ music.context.cluster = new ContentCluster(old.getName(), old.getConfiguredNodes().values(), old.getDistribution(), 0, 0.0);
+ NodeState currentState = new NodeState(NodeType.STORAGE, State.DOWN);
+ currentState.setDescription("Not seen");
+ music.context.currentState.setNodeState(new Node(NodeType.STORAGE, 1), currentState);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/1", 0));
+ String expected =
+ "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"down\",\n" +
+ " \"reason\": \"Not seen\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"down\",\n" +
+ " \"reason\": \"Node not seen in slobrok.\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " },\n" +
+ " \"metrics\": {\n" +
+ " \"distributor-put-latency-ms-sum\": 0,\n" +
+ " \"distributor-put-latency-count\": 0\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveStorageClusterDoesNotIncludePerNodeStatsOrMetrics() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage", 1));
+ String expected =
+ "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " },\n" +
+ " \"partition\": {\n" +
+ " \"0\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/1\\/0\"},\n" +
+ " \"1\": {\"link\": \"\\/cluster\\/v2\\/music\\/storage\\/1\\/1\"}\n" +
+ " }\n" +
+ "}";
+ JSONObject json = jsonWriter.createJson(response);
+ assertEquals(expected, json.getJSONObject("node").getJSONObject("1").toString(2));
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NotMasterTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NotMasterTest.java
new file mode 100644
index 00000000000..4b59b3426c2
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NotMasterTest.java
@@ -0,0 +1,137 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.OtherMasterException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.UnknownMasterException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import static com.yahoo.vespa.defaults.Defaults.getDefaults;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class NotMasterTest extends StateRestApiTest {
+
+ @Test
+ public void testUnknownMaster() throws Exception {
+ setUp(true);
+ music.fleetControllerMaster = null;
+ // Non-recursive cluster list works, as it doesn't touches into fleetcontrollers
+ {
+ UnitResponse response = restAPI.getState(new StateRequest("", 0));
+ String expected =
+ "{\"cluster\": {\n" +
+ " \"books\": {\"link\": \"\\/cluster\\/v2\\/books\"},\n" +
+ " \"music\": {\"link\": \"\\/cluster\\/v2\\/music\"}\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+ // Recursive cluster list does not work
+ try{
+ restAPI.getState(new StateRequest("", 1));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ // Other requests does not work either
+ try{
+ restAPI.getState(new StateRequest("music", 0));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage", 0));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage/1", 0));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage/1/0", 0));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ try{
+ restAPI.setUnitState(new SetNodeStateTest.SetUnitStateRequestImpl("music/storage/1")
+ .setNewState("user", "down", "test"));
+ assertTrue(false);
+ } catch (UnknownMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No known master cluster controller"));
+ }
+ }
+
+ @Test
+ public void testKnownOtherMaster() throws Exception {
+ setUp(true);
+ ccSockets.put(1, new ClusterControllerStateRestAPI.Socket("otherhost", getDefaults().vespaWebServicePort()));
+ music.fleetControllerMaster = 1;
+ // Non-recursive cluster list works, as it doesn't touches into fleetcontrollers
+ {
+ UnitResponse response = restAPI.getState(new StateRequest("", 0));
+ String expected =
+ "{\"cluster\": {\n" +
+ " \"books\": {\"link\": \"\\/cluster\\/v2\\/books\"},\n" +
+ " \"music\": {\"link\": \"\\/cluster\\/v2\\/music\"}\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+ // Recursive cluster list does not work
+ try{
+ restAPI.getState(new StateRequest("", 1));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ // Other requests does not work either
+ try{
+ restAPI.getState(new StateRequest("music", 0));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage", 0));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage/1", 0));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ try{
+ restAPI.getState(new StateRequest("music/storage/1/0", 0));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ try{
+ restAPI.setUnitState(new SetNodeStateTest.SetUnitStateRequestImpl("music/storage/1")
+ .setNewState("user", "down", "test"));
+ assertTrue(false);
+ } catch (OtherMasterException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Cluster controller not master. Use master at otherhost:" + getDefaults().vespaWebServicePort() + "."));
+ assertTrue(e.getHost().equals("otherhost"));
+ assertTrue(e.getPort() == getDefaults().vespaWebServicePort());
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/PartitionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/PartitionTest.java
new file mode 100644
index 00000000000..f0f7f422824
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/PartitionTest.java
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import org.codehaus.jettison.json.JSONObject;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class PartitionTest extends StateRestApiTest {
+
+ @Test
+ public void testPartition() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/1/0", 0));
+ String expected =
+ "{\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"metrics\": {\n" +
+ " \"bucket-count\": 1,\n" +
+ " \"unique-document-count\": 2,\n" +
+ " \"unique-document-total-size\": 3\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveCluster() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/1/0", 1));
+ String expected =
+ "{\n" +
+ " \"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }},\n" +
+ " \"metrics\": {\n" +
+ " \"bucket-count\": 1,\n" +
+ " \"unique-document-count\": 2,\n" +
+ " \"unique-document-total-size\": 3\n" +
+ " }\n" +
+ "}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveStorageClusterDoesNotIncludePartitionMetrics() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage", 2));
+ String expected =
+ "{\"state\": {\"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ "}}}";
+ JSONObject json = jsonWriter.createJson(response);
+ assertEquals(expected, json.getJSONObject("node").getJSONObject("1").
+ getJSONObject("partition").getJSONObject("0").
+ toString(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/RequestTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/RequestTest.java
new file mode 100644
index 00000000000..5d3813f44da
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/RequestTest.java
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InternalFailure;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
+import junit.framework.TestCase;
+
+public class RequestTest extends TestCase {
+
+ public void testGetResultBeforeCompletion() {
+ Request<String> r = new Request<String>(Request.MasterState.MUST_BE_MASTER) {
+ @Override
+ public String calculateResult(Context context) throws StateRestApiException {
+ return "foo";
+ }
+ };
+ try{
+ r.getResult();
+ assertTrue(false);
+ } catch (InternalFailure e) {
+ } catch (Exception e) {
+ assertTrue(false);
+ }
+ r.notifyCompleted();
+ try{
+ r.getResult();
+ assertTrue(false);
+ } catch (InternalFailure e) {
+ } catch (Exception e) {
+ assertTrue(false);
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ServiceTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ServiceTest.java
new file mode 100644
index 00000000000..01bd7b6f033
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ServiceTest.java
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+// TODO: Author?
+public class ServiceTest extends StateRestApiTest {
+
+ @Test
+ public void testService() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor", 0));
+ String expected =
+ "{\"node\": {\n" +
+ " \"1\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/1\"},\n" +
+ " \"2\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/2\"},\n" +
+ " \"3\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/3\"},\n" +
+ " \"5\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/5\"},\n" +
+ " \"7\": {\"link\": \"\\/cluster\\/v2\\/music\\/distributor\\/7\"}\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testRecursiveCluster() throws Exception {
+ setUp(true);
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor", 1));
+ String expected =
+ "{\"node\": {\n" +
+ " \"1\": {\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"2\": {\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g1\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"down\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"down\",\n" +
+ " \"reason\": \"Node not seen in slobrok.\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"3\": {\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"5\": {\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"7\": {\n" +
+ " \"attributes\": {\"hierarchical-group\": \"east.g2\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"up\",\n" +
+ " \"reason\": \"\"\n" +
+ " }\n" +
+ " }\n" +
+ " }\n" +
+ "}}";
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java
new file mode 100644
index 00000000000..d574e949348
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java
@@ -0,0 +1,348 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InvalidContentException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.MissingUnitException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.OperationNotSupportedForUnitException;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.SetResponse;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitState;
+import org.junit.Test;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.core.StringContains.containsString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class SetNodeStateTest extends StateRestApiTest {
+
+ public static class SetUnitStateRequestImpl extends StateRequest implements SetUnitStateRequest {
+ private Map<String, UnitState> newStates = new LinkedHashMap<>();
+ private Condition condition = Condition.FORCE;
+
+ public SetUnitStateRequestImpl(String req) {
+ super(req, 0);
+ }
+
+ public SetUnitStateRequestImpl setCondition(Condition condition) {
+ this.condition = condition;
+ return this;
+ }
+
+ public SetUnitStateRequestImpl setNewState(
+ final String type,
+ final String state,
+ final String reason) {
+ newStates.put(type, new UnitState() {
+ @Override
+ public String getId() {
+ return state;
+ }
+
+ @Override
+ public String getReason() {
+ return reason;
+ }
+ });
+ return this;
+ }
+
+ @Override
+ public Map<String, UnitState> getNewState() {
+ return newStates;
+ }
+
+ @Override
+ public Condition getCondition() {
+ return condition;
+ }
+ }
+
+ private void verifyStateSet(String state, String reason) throws Exception {
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", state, reason));
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor/1", 0));
+ String expected = musicClusterExpectedUserStateString("east.g2", "up", "up", state.toLowerCase(), reason);
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ private void verifyClusterSet(String state, String reason) throws Exception {
+ restAPI.setUnitState(new SetUnitStateRequestImpl("music").setNewState("user", state, reason));
+ for (int index : new int[]{1, 2, 3, 5, 7}) {
+ UnitResponse response = restAPI.getState(new StateRequest("music/storage/" + index, 0));
+ String actualState = response.getCurrentState().getStatePerType().get("user").getId();
+ assertThat(actualState, is(state.toLowerCase()));
+ String actualReason = response.getCurrentState().getStatePerType().get("user").getReason();
+ assertThat(actualReason, is(reason));
+ }
+ }
+
+ private String musicClusterExpectedUserStateStringWithUninitializedNode(String groupName,
+ String generatedState, String unitState,
+ String userState, String userReason) {
+ return "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"" + groupName + "\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"" + generatedState + "\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"" + unitState + "\",\n" +
+ " \"reason\": \"Node not seen in slobrok.\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"" + userState + "\",\n" +
+ " \"reason\": \"" + userReason + "\"\n" +
+ " }\n" +
+ " }\n" +
+ "}";
+ }
+
+ private String musicClusterExpectedUserStateString(String groupName,
+ String generatedState, String unitState,
+ String userState, String userReason) {
+ return "{\n" +
+ " \"attributes\": {\"hierarchical-group\": \"" + groupName + "\"},\n" +
+ " \"state\": {\n" +
+ " \"generated\": {\n" +
+ " \"state\": \"" + generatedState + "\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"unit\": {\n" +
+ " \"state\": \"" + unitState + "\",\n" +
+ " \"reason\": \"\"\n" +
+ " },\n" +
+ " \"user\": {\n" +
+ " \"state\": \"" + userState + "\",\n" +
+ " \"reason\": \"" + userReason + "\"\n" +
+ " }\n" +
+ " }\n" +
+ "}";
+ }
+
+ @Test
+ public void testSimple() throws Exception {
+ setUp(true);
+ verifyStateSet("down", "testing");
+ verifyStateSet("up", "foo");
+ verifyStateSet("maintenance", "");
+ verifyStateSet("retired", "argh");
+ verifyStateSet("UP", "even uppercase");
+ }
+
+ @Test
+ public void testSetNodesForCluster() throws Exception {
+ setUp(true);
+ verifyClusterSet("maintenance", "prepare for maintenance");
+ verifyClusterSet("up", "and we're back online");
+ }
+
+ @Test
+ public void testShouldNotModifyDistributorSafe() throws Exception {
+ setUp(false);
+ SetResponse setResponse = restAPI.setUnitState(new SetUnitStateRequestImpl("music/distributor/1")
+ .setNewState("user", "up", "whatever reason.")
+ .setCondition(SetUnitStateRequest.Condition.SAFE));
+ assertThat(setResponse.getWasModified(), is(false));
+ assertThat(setResponse.getReason(), containsString(
+ "Safe-set of node state is only supported for storage nodes"));
+ }
+
+ @Test
+ public void testShouldModifyStorageSafeOk() throws Exception {
+ setUp(false);
+ SetResponse setResponse = restAPI.setUnitState(new SetUnitStateRequestImpl("music/storage/1")
+ .setNewState("user", "maintenance", "whatever reason.")
+ .setCondition(SetUnitStateRequest.Condition.SAFE));
+ assertThat(setResponse.getWasModified(), is(true));
+ assertThat(setResponse.getReason(), is("ok"));
+ }
+
+ @Test
+ public void testShouldModifyStorageSafeBlocked() throws Exception {
+ setUp(false);
+ {
+ SetResponse setResponse = restAPI.setUnitState(new SetUnitStateRequestImpl("music/storage/1")
+ .setNewState("user", "maintenance", "whatever reason.")
+ .setCondition(SetUnitStateRequest.Condition.SAFE));
+ assertThat(setResponse.getReason(), is("ok"));
+ assertThat(setResponse.getWasModified(), is(true));
+ }
+ {
+ SetResponse setResponse = restAPI.setUnitState(new SetUnitStateRequestImpl("music/storage/3")
+ .setNewState("user", "maintenance", "whatever reason.")
+ .setCondition(SetUnitStateRequest.Condition.SAFE));
+ assertThat(setResponse.getReason(), is(
+ "There is a node already in maintenance:1"));
+ assertThat(setResponse.getWasModified(), is(false));
+ }
+ }
+
+ @Test
+ public void testSetWantedStateOnNodeNotInSlobrok() throws Exception {
+ // Node 2 in cluster music does not have a valid NodeInfo due to passing true to setUp
+ setUp(true);
+ restAPI.setUnitState(new SetUnitStateRequestImpl("music/distributor/2").setNewState("user", "down", "borked node"));
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor/2", 0));
+ String expected = musicClusterExpectedUserStateStringWithUninitializedNode("east.g1", "down", "down", "down", "borked node");
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+ @Test
+ public void testWrongUnit() throws Exception {
+ setUp(true);
+
+ String wrongUnitMessage = "State can only be set at cluster or node level";
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (OperationNotSupportedForUnitException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains(wrongUnitMessage));
+ }
+
+ // ... setting at cluster-level is allowed
+
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (OperationNotSupportedForUnitException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains(wrongUnitMessage));
+ }
+
+ // ... setting at node-level is allowed
+
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/storage/1/0").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (OperationNotSupportedForUnitException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains(wrongUnitMessage));
+ }
+ }
+
+ @Test
+ public void testInvalidUnit() throws Exception {
+ setUp(true);
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "foo").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/content").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/storage/bah").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/storage/10").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/storage/1/0/1").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/storage/1/bar").setNewState("user", "down", "testing"));
+ assertTrue(false);
+ } catch (MissingUnitException e) {
+ }
+ }
+
+ @Test
+ public void testSettingInvalidStateType() throws Exception {
+ setUp(true);
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("foo", "down", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No new user state given"));
+ }
+ }
+
+ @Test
+ public void testSafeIsInvalidForSetNodesStatesForCluster() throws Exception {
+ setUp(true);
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl("music")
+ .setNewState("user", "maintenance", "example reason")
+ .setCondition(SetUnitStateRequest.Condition.SAFE));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains(
+ "Setting all nodes in a cluster to a state is only supported with FORCE"));
+ }
+ }
+
+ @Test
+ public void testSettingWrongStateType() throws Exception {
+ setUp(true);
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("generated", "down", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No new user state given"));
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("unit", "down", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("No new user state given"));
+ }
+ }
+
+ @Test
+ public void testInvalidState() throws Exception {
+ setUp(true);
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", "initializing", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Invalid user state"));
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", "stopping", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Invalid user state"));
+ }
+ try{
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", "foo", "testing"));
+ } catch (InvalidContentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Invalid user state"));
+ }
+ }
+
+ @Test
+ public void testOverwriteReason() throws Exception {
+ setUp(true);
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", "down", "testing"));
+ restAPI.setUnitState(new SetUnitStateRequestImpl(
+ "music/distributor/1").setNewState("user", "down", "testing more"));
+ UnitResponse response = restAPI.getState(new StateRequest("music/distributor/1", 0));
+ String expected = musicClusterExpectedUserStateString("east.g2", "up", "up", "down", "testing more");
+ assertEquals(expected, jsonWriter.createJson(response).toString(2));
+ }
+
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java
new file mode 100644
index 00000000000..c25fe092c2a
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java
@@ -0,0 +1,170 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.restapiv2;
+
+import com.yahoo.vdslib.distribution.ConfiguredNode;
+import com.yahoo.vdslib.distribution.Distribution;
+import com.yahoo.vdslib.state.*;
+import com.yahoo.vespa.clustercontroller.core.FleetControllerTest;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTaskScheduler;
+import com.yahoo.vespa.clustercontroller.core.ContentCluster;
+import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.StateRestAPI;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.UnitStateRequest;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.server.JsonWriter;
+
+import java.util.*;
+
+// TODO: Author
+public abstract class StateRestApiTest {
+
+ protected ClusterControllerMock books;
+ protected ClusterControllerMock music;
+ protected StateRestAPI restAPI;
+ protected JsonWriter jsonWriter = new JsonWriter();
+ protected Map<Integer, ClusterControllerStateRestAPI.Socket> ccSockets;
+
+ public static class StateRequest implements UnitStateRequest {
+ private String[] path;
+ private int recursive;
+
+ public StateRequest(String req, int recursive) {
+ path = req.isEmpty() ? new String[0] : req.split("/");
+ this.recursive = recursive;
+ }
+ @Override
+ public int getRecursiveLevels() { return recursive;
+ }
+ @Override
+ public String[] getUnitPath() { return path; }
+ }
+
+ protected void setUp(boolean dontInitializeNode2) throws Exception {
+ Distribution distribution = new Distribution(Distribution.getSimpleGroupConfig(2, 10));
+ jsonWriter.setDefaultPathPrefix("/cluster/v2");
+ {
+ Set<ConfiguredNode> nodes = FleetControllerTest.toNodes(0, 1, 2, 3);
+ ContentCluster cluster = new ContentCluster(
+ "books", nodes, distribution, 6 /* minStorageNodesUp*/, 0.9 /* minRatioOfStorageNodesUp */);
+ initializeCluster(cluster, nodes);
+ ClusterState state = new ClusterState("distributor:4 storage:4");
+ books = new ClusterControllerMock(cluster, state, 0, 0);
+ }
+ {
+ Set<ConfiguredNode> nodes = FleetControllerTest.toNodes(1, 2, 3, 5, 7);
+ Set<ConfiguredNode> nodesInSlobrok = FleetControllerTest.toNodes(1, 3, 5, 7);
+
+ ContentCluster cluster = new ContentCluster(
+ "music", nodes, distribution, 4 /* minStorageNodesUp*/, 0.0 /* minRatioOfStorageNodesUp */);
+ if (dontInitializeNode2) {
+ // TODO: this skips initialization of node 2 to fake that it is not answering
+ // which really leaves us in an illegal state
+ initializeCluster(cluster, nodesInSlobrok);
+ }
+ else {
+ initializeCluster(cluster, nodes);
+ }
+ ClusterState state = new ClusterState("distributor:8 .0.s:d .2.s:d .4.s:d .6.s:d "
+ + "storage:8 .0.s:d .2.s:d .4.s:d .6.s:d");
+ music = new ClusterControllerMock(cluster, state, 0, 0);
+ }
+ ccSockets = new TreeMap<>();
+ ccSockets.put(0, new ClusterControllerStateRestAPI.Socket("localhost", 80));
+ restAPI = new ClusterControllerStateRestAPI(new ClusterControllerStateRestAPI.FleetControllerResolver() {
+ @Override
+ public Map<String, RemoteClusterControllerTaskScheduler> getFleetControllers() {
+ Map<String, RemoteClusterControllerTaskScheduler> fleetControllers = new LinkedHashMap<>();
+ fleetControllers.put(books.context.cluster.getName(), books);
+ fleetControllers.put(music.context.cluster.getName(), music);
+ return fleetControllers;
+ }
+ }, ccSockets);
+ }
+
+ protected void initializeCluster(ContentCluster cluster, Collection<ConfiguredNode> nodes) {
+ for (ConfiguredNode configuredNode : nodes) {
+ for (NodeType type : NodeType.getTypes()) {
+ NodeState reported = new NodeState(type, State.UP);
+ if (type.equals(NodeType.STORAGE)) {
+ reported.setDiskCount(2);
+ }
+
+ NodeInfo nodeInfo = cluster.clusterInfo().setRpcAddress(new Node(type, configuredNode.index()), "rpc:" + type + "/" + configuredNode);
+ nodeInfo.setReportedState(reported, 10);
+ nodeInfo.setHostInfo(HostInfo.createHostInfo(getHostInfo()));
+ }
+ }
+ }
+
+ private String getHostInfo() {
+ return "{\n" +
+ " \"cluster-state-version\": 0,\n" +
+ " \"metrics\": {\n" +
+ " \"values\": [\n" +
+ " {\n" +
+ " \"name\": \"vds.datastored.alldisks.buckets\",\n" +
+ " \"values\": {\n" +
+ " \"last\": 1\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\": \"vds.datastored.alldisks.docs\",\n" +
+ " \"values\": {\n" +
+ " \"last\": 2\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\": \"vds.datastored.alldisks.bytes\",\n" +
+ " \"values\": {\n" +
+ " \"last\": 3\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ " },\n" +
+ " \"distributor\": {\n" +
+ " \"storage-nodes\": [\n" +
+ " {\n" +
+ " \"node-index\": 1,\n" +
+ " \"min-current-replication-factor\": 2,\n" +
+ " \"ops-latency\": {\n" +
+ " \"put\": {\n" +
+ " \"latency-ms-sum\": 6,\n" +
+ " \"count\": 7\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 3,\n" +
+ " \"min-current-replication-factor\": 2,\n" +
+ " \"ops-latency\": {\n" +
+ " \"put\": {\n" +
+ " \"latency-ms-sum\": 5,\n" +
+ " \"count\": 4\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 5,\n" +
+ " \"min-current-replication-factor\": 2,\n" +
+ " \"ops-latency\": {\n" +
+ " \"put\": {\n" +
+ " \"latency-ms-sum\": 4,\n" +
+ " \"count\": 5\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"node-index\": 7,\n" +
+ " \"min-current-replication-factor\": 2,\n" +
+ " \"ops-latency\": {\n" +
+ " \"put\": {\n" +
+ " \"latency-ms-sum\": 6,\n" +
+ " \"count\": 7\n" +
+ " }\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ "}";
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java
new file mode 100644
index 00000000000..6c41cfd9a75
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.rpc;
+
+import com.yahoo.jrt.Request;
+import com.yahoo.jrt.RequestWaiter;
+import com.yahoo.jrt.Target;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.*;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.core.IsNot.not;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class RPCCommunicatorTest {
+
+ public static final int NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS = 10000;
+ public static final int NODE_STATE_REQUEST_TIMEOUT_INTERVAL_START_PERCENTAGE = 80;
+ public static final int NODE_STATE_REQUEST_TIMEOUT_INTERVAL_STOP_PERCENTAGE = 95;
+ public static final int INDEX = 0;
+ public static final int TEST_ITERATIONS = 500;
+ public static final int ROUNDTRIP_LATENCY_SECONDS = 2000;
+
+ @Test
+ public void testGenerateNodeStateRequestTimeoutMs() throws Exception {
+ final RPCCommunicator communicator = new RPCCommunicator(
+ null /* Timer */,
+ INDEX,
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS,
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_START_PERCENTAGE,
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_STOP_PERCENTAGE,
+ 0);
+ int max = -1;
+ int min = 100000;
+ final Set<Integer> uniqueTimeoutValues = new HashSet<>();
+ for (int x = 0; x < TEST_ITERATIONS; x++) {
+ int timeOutMs = communicator.generateNodeStateRequestTimeoutMs();
+ min = Math.min(min, timeOutMs);
+ max = Math.max(max, timeOutMs);
+ uniqueTimeoutValues.add(timeOutMs);
+ }
+ assertTrue(max <= NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS *
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_STOP_PERCENTAGE / 100.);
+ assertThat(min, is(not(max)));
+ assertTrue(min >= NODE_STATE_REQUEST_TIMEOUT_INTERVAL_START_PERCENTAGE *
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS / 100);
+ assertTrue(uniqueTimeoutValues.size()> TEST_ITERATIONS/2);
+ }
+
+ @Test
+ public void testGenerateNodeStateRequestTimeoutMsWithUpdates() throws Exception {
+ final RPCCommunicator communicator = new RPCCommunicator(null /* Timer */, INDEX, 1, 1, 100, 0);
+ FleetControllerOptions fleetControllerOptions = new FleetControllerOptions(null /*clustername*/);
+ fleetControllerOptions.nodeStateRequestTimeoutEarliestPercentage = 100;
+ fleetControllerOptions.nodeStateRequestTimeoutLatestPercentage = 100;
+ fleetControllerOptions.nodeStateRequestTimeoutMS = NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS;
+ communicator.propagateOptions(fleetControllerOptions);
+ int timeOutMs = communicator.generateNodeStateRequestTimeoutMs();
+ assertThat(timeOutMs, is(NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS));
+ }
+
+ @Test
+ public void testRoundtripLatency() throws Exception {
+ final Timer timer = new FakeTimer();
+ final RPCCommunicator communicator = new RPCCommunicator(
+ timer,
+ INDEX,
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS,
+ NODE_STATE_REQUEST_TIMEOUT_INTERVAL_STOP_PERCENTAGE,
+ 100,
+ ROUNDTRIP_LATENCY_SECONDS);
+
+ final NodeInfo nodeInfo = mock(NodeInfo.class);
+ final Target target = mock(Target.class);
+
+ when(target.isValid()).thenReturn(true);
+ when(nodeInfo.getConnection()).thenReturn(target);
+ communicator.getNodeState(nodeInfo, null);
+ Mockito.verify(target).invokeAsync(
+ (Request)any(),
+ eq(ROUNDTRIP_LATENCY_SECONDS + NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS/1000.0),
+ (RequestWaiter)any());
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/LogFormatter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/LogFormatter.java
new file mode 100644
index 00000000000..d437053fcd9
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/LogFormatter.java
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.testutils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.logging.Formatter;
+import java.util.logging.LogManager;
+import java.util.logging.LogRecord;
+
+public class LogFormatter extends Formatter {
+ @Override
+ public String format(LogRecord record) {
+ return record.getMillis() + " " + record.getLevel() + " "
+ + record.getLoggerName().substring(record.getLoggerName().lastIndexOf('.') + 1) + " " + record.getMessage() + "\n";
+ }
+
+ private static boolean initialized = false;
+ public synchronized static void initializeLogging() {
+ if (initialized) return;
+ initialized = true;
+ try {
+ File f = new File("src/test/resources/test.logging.properties");
+ if (!f.exists()) {
+ System.err.println("Test logging property file does not exist");
+ }
+ final InputStream inputStream = new FileInputStream(f);
+ LogManager.getLogManager().readConfiguration(inputStream);
+ } catch (Throwable t) {
+ System.err.println("Failed to initialize logging");
+ t.printStackTrace();
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java
new file mode 100644
index 00000000000..db4879b89d4
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.testutils;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vespa.clustercontroller.core.FakeTimer;
+import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Old class used for waiting for something..
+ * Deprecated.. Use the Waiter class instead
+ */
+public class StateWaiter implements SystemStateListener {
+ private final FakeTimer timer;
+ protected ClusterState current;
+ private int stateUpdates = -1;
+
+ public StateWaiter(FakeTimer timer) {
+ this.timer = timer;
+ }
+
+ public void handleNewSystemState(ClusterState state) {
+ synchronized(timer) {
+ current = state;
+
+ ++stateUpdates;
+ timer.notifyAll();
+ }
+ }
+
+ public int getStateUpdates() { return Math.max(0, stateUpdates); }
+
+ public ClusterState getCurrentSystemState() {
+ synchronized(timer) {
+ return current;
+ }
+ }
+
+ public void waitForState(String stateRegex, long timeout) {
+ waitForState(stateRegex, timeout, 0);
+ }
+
+ /**
+ * WARNING: If timeIntervalToProvokeRetry is set != 0 that means time will can be set far into future
+ * and thus hit various unintended timeout periods. Only auto-step time if this is a non-issue.
+ */
+ public void waitForState(String stateRegex, long timeout, long timeIntervalToProvokeRetry) {
+ Pattern p = Pattern.compile(stateRegex);
+ long startTime = System.currentTimeMillis();
+ final long endTime = startTime + timeout;
+ int iteration = 0;
+ while (true) {
+ ClusterState currentClusterState;
+ synchronized(timer) {
+ currentClusterState = current;
+
+ if (currentClusterState != null) {
+ Matcher m = p.matcher(currentClusterState.toString());
+
+ if (m.matches()) {
+ return;
+ }
+ }
+ try{
+ if (timeIntervalToProvokeRetry == 0) {
+ timer.wait(endTime - startTime);
+ } else {
+ if (++iteration % 10 == 0) {
+ timer.advanceTime(timeIntervalToProvokeRetry);
+ }
+ timer.wait(10);
+ }
+ } catch (InterruptedException e) {
+ }
+ }
+ startTime = System.currentTimeMillis();
+ if (startTime >= endTime) {
+ throw new IllegalStateException("Timeout. Did not find a state matching " + stateRegex + " within timeout of " + timeout + " milliseconds. Current state is " + currentClusterState);
+ }
+ }
+ }
+ public void clear() {
+ synchronized(timer) {
+ current = null;
+ }
+ }
+
+ public void waitForInitProgressPassed(Node node, double minProgress, int timeoutMS) {
+ long startTime = System.currentTimeMillis();
+ long endTime = startTime + timeoutMS;
+ while (true) {
+ ClusterState currentClusterState;
+ synchronized(timer) {
+ currentClusterState = current;
+ if (currentClusterState != null) {
+ if (currentClusterState.getNodeState(node).getInitProgress() >= minProgress) {
+ return;
+ }
+ }
+ try{
+ timer.wait(endTime - startTime);
+ } catch (InterruptedException e) {
+ }
+ }
+ startTime = System.currentTimeMillis();
+ if (startTime >= endTime) {
+ throw new IllegalStateException("Timeout. Did not get to " + minProgress + " init progress on node " + node + " within timeout of " + timeoutMS + " ms. Current init progress is " + currentClusterState.getNodeState(node).getInitProgress());
+ }
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java
new file mode 100644
index 00000000000..eae5f92278e
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java
@@ -0,0 +1,174 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.testutils;
+
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.DummyVdsNode;
+import com.yahoo.vespa.clustercontroller.core.FleetController;
+import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener;
+
+import java.util.Collection;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+* @author <a href="mailto:humbe@yahoo-inc.com">Haakon Humberset</a>
+*/
+public interface WaitCondition {
+
+ /** Return null if met, why not if it is not met. */
+ public String isConditionMet();
+
+ public abstract class StateWait implements WaitCondition {
+ private final Object monitor;
+ protected ClusterState currentState;
+ private final SystemStateListener listener = new SystemStateListener() {
+ @Override
+ public void handleNewSystemState(ClusterState state) {
+ synchronized (monitor) {
+ currentState = state;
+ monitor.notifyAll();
+ }
+ }
+ };
+
+ public StateWait(FleetController fc, Object monitor) {
+ this.monitor = monitor;
+ fc.addSystemStateListener(listener);
+ }
+
+ public ClusterState getCurrentState() {
+ synchronized (monitor) {
+ return currentState;
+ }
+ }
+ }
+
+ public class RegexStateMatcher extends StateWait {
+
+ private final Pattern pattern;
+ private Collection<DummyVdsNode> nodesToCheck;
+ private ClusterState lastCheckedState;
+
+ public RegexStateMatcher(String regex, FleetController fc, Object monitor) {
+ super(fc, monitor);
+ pattern = Pattern.compile(regex);
+ }
+
+ public RegexStateMatcher includeNotifyingNodes(Collection<DummyVdsNode> nodes) {
+ nodesToCheck = nodes;
+ return this;
+ }
+
+ @Override
+ public String isConditionMet() {
+ if (currentState != null) {
+ lastCheckedState = currentState;
+ Matcher m = pattern.matcher(lastCheckedState.toString());
+ if (m.matches()) {
+ if (nodesToCheck != null) {
+ for (DummyVdsNode node : nodesToCheck) {
+ if (node.getClusterState() == null) {
+ return "Node " + node + " has not received a cluster state yet";
+ }
+ if (! pattern.matcher(withoutTimestamps(node.getClusterState().toString())).matches()) {
+ return "Node " + node + " state mismatch.\n wanted: " + pattern + "\n is: " + node.getClusterState().toString();
+ }
+ if (node.getStateCommunicationVersion() > 0) {
+ if (!node.hasPendingGetNodeStateRequest()) {
+ return "Node " + node + " has not received another get node state request yet";
+ }
+ }
+ }
+ }
+ return null;
+ }
+ return "Cluster state mismatch";
+ }
+ return "No cluster state defined yet";
+ }
+
+ /** Returns the given state string with timestamps removed */
+ private String withoutTimestamps(String state) {
+ String[] parts = state.split(" ");
+ StringBuilder b = new StringBuilder();
+ for (String part : parts) {
+ if ( ! part.contains(".t"))
+ b.append(part).append(" ");
+ }
+ if (b.length() > 0)
+ b.setLength(b.length() - 1);
+ return b.toString();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("RegexStateMatcher(\n wanted: '").append(pattern.pattern())
+ .append("'\n last checked: '").append(lastCheckedState).append("'")
+ .append("'\n current: '").append(currentState).append(")");
+ return sb.toString();
+ }
+ }
+
+ public class InitProgressPassedMatcher extends StateWait {
+ private final Node node;
+ private final double minProgress;
+
+ public InitProgressPassedMatcher(Node n, double minProgress, FleetController fc, Object monitor) {
+ super(fc, monitor);
+ this.node = n;
+ this.minProgress = minProgress;
+ }
+
+ @Override
+ public String isConditionMet() {
+ if (currentState == null) {
+ return "No cluster state defined yet";
+ }
+ double currentProgress = currentState.getNodeState(node).getInitProgress();
+ if (currentProgress < minProgress) {
+ return "Current progress of node " + node + " at " + currentProgress + " is less than wanted progress of " + minProgress;
+ }
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ return "InitProgressPassedMatcher(" + node + ", " + minProgress + ")";
+ }
+ }
+
+ public static class MinUsedBitsMatcher extends StateWait {
+ private final int bitCount;
+ private final int nodeCount;
+
+ public MinUsedBitsMatcher(int bitCount, int nodeCount, FleetController fc, Object monitor) {
+ super(fc, monitor);
+ this.bitCount = bitCount;
+ this.nodeCount = nodeCount;
+ }
+
+ @Override
+ public String isConditionMet() {
+ if (currentState == null) {
+ return "No cluster state defined yet";
+ }
+ int nodebitcount = 0;
+ for (NodeType type : NodeType.getTypes()) {
+ int nodeCount = currentState.getNodeCount(type);
+ for (int i=0; i<nodeCount; ++i) {
+ if (currentState.getNodeState(new Node(type, i)).getMinUsedBits() == bitCount) {
+ ++nodebitcount;
+ }
+ }
+ }
+ if (nodebitcount == nodeCount) return null;
+ return "Currently, " + nodebitcount + " and not " + nodeCount + " nodes have " + bitCount + " min bits used set";
+ }
+
+ @Override
+ public String toString() { return "MinUsedBitsMatcher(" + bitCount + ", " + nodeCount + ")"; }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitTask.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitTask.java
new file mode 100644
index 00000000000..24333e28cd6
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitTask.java
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.testutils;
+
+import com.yahoo.vespa.clustercontroller.core.FleetController;
+import com.yahoo.vespa.clustercontroller.core.NodeInfo;
+
+/** A wait task is something that is performed once in a while while waiting for something. */
+public abstract class WaitTask {
+ public static final int defaultTaskFrequencyMillis = 1;
+
+ public abstract boolean performWaitTask();
+
+ public int getWaitTaskFrequencyInMillis() {
+ return defaultTaskFrequencyMillis;
+ }
+
+ public static class StateResender extends WaitTask {
+ public final FleetController fleetController;
+
+ public StateResender(FleetController fc) {
+ fleetController = fc;
+ }
+
+ @Override
+ public boolean performWaitTask() {
+ boolean didWork = false;
+ synchronized (fleetController.getMonitor()) {
+ for (NodeInfo info : fleetController.getCluster().getNodeInfo()) {
+ if (info.getTimeForNextStateRequestAttempt() != 0) didWork = true;
+ info.setNextGetStateAttemptTime(0);
+ }
+ }
+ return didWork;
+ }
+
+ @Override
+ public String toString() {
+ return "GetNodeStateResender";
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java
new file mode 100644
index 00000000000..a6789ae22e5
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core.testutils;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vespa.clustercontroller.core.DummyVdsNode;
+import com.yahoo.vespa.clustercontroller.core.FleetController;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.logging.Logger;
+
+public interface Waiter {
+
+ public interface DataRetriever {
+ public Object getMonitor();
+ public FleetController getFleetController();
+ public List<DummyVdsNode> getDummyNodes();
+ public int getTimeoutMS();
+ }
+
+ public ClusterState waitForState(String state) throws Exception;
+ public ClusterState waitForState(String state, int timeoutMS) throws Exception;
+ public ClusterState waitForStableSystem() throws Exception;
+ public ClusterState waitForStableSystem(int nodeCount) throws Exception;
+ public ClusterState waitForInitProgressPassed(Node n, double progress);
+ public ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount);
+ public void wait(WaitCondition c, WaitTask wt, int timeoutMS);
+
+ public static class Impl implements Waiter {
+
+ private static final Logger log = Logger.getLogger(Impl.class.getName());
+ private final DataRetriever data;
+
+ public Impl(DataRetriever data) {
+ this.data = data;
+ }
+
+ public ClusterState waitForState(String state) throws Exception { return waitForState(state, data.getTimeoutMS()); }
+ public ClusterState waitForState(String state, int timeoutMS) throws Exception {
+ LinkedList<DummyVdsNode> nodesToCheck = new LinkedList<>();
+ for(DummyVdsNode node : data.getDummyNodes()) {
+ if (node.isConnected()) nodesToCheck.add(node);
+ }
+ WaitCondition.StateWait swc = new WaitCondition.RegexStateMatcher(state, data.getFleetController(), data.getMonitor()).includeNotifyingNodes(nodesToCheck);
+ wait(swc, new WaitTask.StateResender(data.getFleetController()), timeoutMS);
+ return swc.getCurrentState();
+ }
+ public ClusterState waitForStableSystem() throws Exception {
+ return waitForStableSystem(data.getDummyNodes().size() / 2);
+ }
+ public ClusterState waitForStableSystem(int nodeCount) throws Exception {
+ WaitCondition.StateWait swc = new WaitCondition.RegexStateMatcher("version:\\d+ distributor:"+nodeCount+" storage:"+nodeCount, data.getFleetController(), data.getMonitor()).includeNotifyingNodes(data.getDummyNodes());
+ wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS());
+ return swc.getCurrentState();
+ }
+ public ClusterState waitForInitProgressPassed(Node n, double progress) {
+ WaitCondition.StateWait swc = new WaitCondition.InitProgressPassedMatcher(n, progress, data.getFleetController(), data.getMonitor());
+ wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS());
+ return swc.getCurrentState();
+ }
+ public ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount) {
+ WaitCondition.StateWait swc = new WaitCondition.MinUsedBitsMatcher(bitcount, nodecount, data.getFleetController(), data.getMonitor());
+ wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS());
+ return swc.getCurrentState();
+ }
+
+ public final void wait(WaitCondition c, WaitTask wt, int timeoutMS) {
+ log.log(LogLevel.INFO, "Waiting for " + c + (wt == null ? "" : " with wait task " + wt));
+ final long startTime = System.currentTimeMillis();
+ final long endTime = startTime + timeoutMS;
+ String lastReason = null;
+ while (true) {
+ synchronized (data.getMonitor()) {
+ String reason = c.isConditionMet();
+ if (reason == null) {
+ log.log(LogLevel.INFO, "Condition met. Returning");
+ return;
+ }
+ if (lastReason == null || !lastReason.equals(reason)) {
+ log.log(LogLevel.INFO, "Wait condition not met: " + reason);
+ lastReason = reason;
+ }
+ try {
+ boolean allowWait = true;
+ if (wt != null) {
+ if (wt.performWaitTask()) {
+ data.getMonitor().notifyAll();
+ allowWait = false;
+ }
+ }
+ final long timeLeft = endTime - System.currentTimeMillis();
+ if (timeLeft <= 0) {
+ throw new IllegalStateException("Timed out waiting max " + timeoutMS + " ms for " + c + (wt == null ? "" : "\n with wait task " + wt) + ",\n reason: " + reason);
+ }
+ if (allowWait) data.getMonitor().wait(wt == null ? WaitTask.defaultTaskFrequencyMillis : Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft));
+ } catch (InterruptedException e) {
+ }
+ }
+ }
+ }
+ }
+
+} \ No newline at end of file
diff --git a/clustercontroller-core/src/test/resources/test.logging.properties b/clustercontroller-core/src/test/resources/test.logging.properties
new file mode 100644
index 00000000000..a99598f795a
--- /dev/null
+++ b/clustercontroller-core/src/test/resources/test.logging.properties
@@ -0,0 +1,5 @@
+handlers = java.util.logging.ConsoleHandler
+.level = WARNING
+java.util.logging.ConsoleHandler.level = INFO
+java.util.logging.ConsoleHandler.formatter = com.yahoo.vespa.clustercontroller.core.testutils.LogFormatter
+com.yahoo.vespa.level = INFO