diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-01-17 12:39:12 +0100 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-01-17 12:39:12 +0100 |
commit | c28f13dac59167dee1257f5b23835e6441bc5f31 (patch) | |
tree | 7910cb0ce12e37b7af828c3b13c14b84cbcb0105 /clustercontroller-core/src/test/java/com/yahoo | |
parent | b324c19e007a7a57ba731ed72a01d35cd6937ed7 (diff) |
Use bucket_space metric in retirement
This makes the Cluster Controller use the
vds.datastored.bucket_space.buckets_total, dimension bucketSpace=default, to
determine whether a content node manages zero buckets, and if so, will allow
the node to go permanently down. This is used when a node is retiring, and it
is to be removed from the application.
The change is guarded by the use-bucket-space-metric, default true. If the new
metric doesn't work as expected, we can revert to using the current/old metric
by flipping the flag. The flag can be controlled per application.
Diffstat (limited to 'clustercontroller-core/src/test/java/com/yahoo')
8 files changed, 64 insertions, 20 deletions
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java index 2df9279e450..36c49fdf5e2 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java @@ -198,7 +198,7 @@ public class ClusterFixture { Collection<ConfiguredNode> nodes = DistributionBuilder.buildConfiguredNodes(nodeCount); Distribution distribution = DistributionBuilder.forFlatCluster(nodeCount); - ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0); + ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0, true); return new ClusterFixture(cluster, distribution); } @@ -206,7 +206,7 @@ public class ClusterFixture { static ClusterFixture forHierarchicCluster(DistributionBuilder.GroupBuilder root) { List<ConfiguredNode> nodes = DistributionBuilder.buildConfiguredNodes(root.totalNodeCount()); Distribution distribution = DistributionBuilder.forHierarchicCluster(root); - ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0); + ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0, true); return new ClusterFixture(cluster, distribution); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index ae64bee6bbf..d569feb6f14 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -34,8 +34,6 @@ import org.junit.rules.TestRule; import org.junit.rules.TestWatcher; import org.junit.runner.Description; -import static org.junit.Assert.fail; - import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -49,6 +47,8 @@ import java.util.logging.Logger; import java.util.regex.Pattern; import java.util.stream.Collectors; +import static org.junit.Assert.fail; + /** * @author Håkon Humberset */ @@ -157,7 +157,7 @@ public abstract class FleetControllerTest implements Waiter { options.nodes, options.storageDistribution, options.minStorageNodesUp, - options.minRatioOfStorageNodesUp); + options.minRatioOfStorageNodesUp, true); NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, log); Communicator communicator = new RPCCommunicator( RPCCommunicator.createRealSupervisor(), diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index 303376e7a5e..153d570adaf 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -61,14 +61,14 @@ public class NodeStateChangeCheckerTest { } private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) { - return new NodeStateChangeChecker(minStorageNodesUp, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo()); + return new NodeStateChangeChecker(minStorageNodesUp, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo(), true); } private ContentCluster createCluster(Collection<ConfiguredNode> nodes) { Distribution distribution = mock(Distribution.class); Group group = new Group(2, "to"); when(distribution.getRootGroup()).thenReturn(group); - return new ContentCluster("Clustername", nodes, distribution, minStorageNodesUp, 0.0); + return new ContentCluster("Clustername", nodes, distribution, minStorageNodesUp, 0.0, true); } private StorageNodeInfo createStorageNodeInfo(int index, State state) { @@ -78,7 +78,7 @@ public class NodeStateChangeCheckerTest { String clusterName = "Clustername"; Set<ConfiguredNode> configuredNodeIndexes = new HashSet<>(); - ContentCluster cluster = new ContentCluster(clusterName, configuredNodeIndexes, distribution, minStorageNodesUp, 0.0); + ContentCluster cluster = new ContentCluster(clusterName, configuredNodeIndexes, distribution, minStorageNodesUp, 0.0, true); String rpcAddress = ""; StorageNodeInfo storageNodeInfo = new StorageNodeInfo(cluster, index, false, rpcAddress, distribution); @@ -136,7 +136,7 @@ public class NodeStateChangeCheckerTest { public void testUnknownStorageNode() { ContentCluster cluster = createCluster(createNodes(4)); NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( - 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo()); + 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo(), true); NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition( new Node(NodeType.STORAGE, 10), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); @@ -161,7 +161,7 @@ public class NodeStateChangeCheckerTest { ContentCluster cluster = createCluster(createNodes(4)); setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6))); NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( - 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo()); + 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo(), true); NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition( nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); @@ -549,12 +549,48 @@ public class NodeStateChangeCheckerTest { " \"dimensions\":\n" + " {\n" + " }\n" + + " },\n" + + " {\n" + + " \"name\":\"vds.datastored.bucket_space.buckets_total\",\n" + + " \"description\":\"Total number buckets present in the bucket space (ready + not ready)\",\n" + + " \"values\":\n" + + " {\n" + + " \"average\":0.0,\n" + + " \"sum\":0.0,\n" + + " \"count\":1,\n" + + " \"rate\":0.016666,\n" + + " \"min\":0,\n" + + " \"max\":0,\n" + + " \"last\":0\n" + + " },\n" + + " \"dimensions\":\n" + + " {\n" + + " \"bucketSpace\":\"global\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"name\":\"vds.datastored.bucket_space.buckets_total\",\n" + + " \"description\":\"Total number buckets present in the bucket space (ready + not ready)\",\n" + + " \"values\":\n" + + " {\n" + + " \"average\":129.0,\n" + + " \"sum\":129.0,\n" + + " \"count\":1,\n" + + " \"rate\":0.016666,\n" + + " \"min\":129,\n" + + " \"max\":129,\n" + + " \"last\":%d\n" + + " },\n" + + " \"dimensions\":\n" + + " {\n" + + " \"bucketSpace\":\"default\"\n" + + " }\n" + " }\n" + " ]\n" + " },\n" + " \"cluster-state-version\":%d\n" + "}", - lastAlldisksBuckets, clusterStateVersion)); + lastAlldisksBuckets, lastAlldisksBuckets, clusterStateVersion)); } private List<ConfiguredNode> createNodes(int count) { diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java index 68926b4d10d..dc76b381f51 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java @@ -80,7 +80,7 @@ public class StateChangeHandlerTest { Distribution distribution = new Distribution(Distribution.getDefaultDistributionConfig(2, 100)); this.config = config; for (int i=0; i<config.nodeCount; ++i) configuredNodes.add(new ConfiguredNode(i, false)); - cluster = new ContentCluster("testcluster", configuredNodes, distribution, 0, 0.0); + cluster = new ContentCluster("testcluster", configuredNodes, distribution, 0, 0.0, true); nodeStateChangeHandler = new StateChangeHandler(clock, eventLog, null); params.minStorageNodesUp(1).minDistributorNodesUp(1) .minRatioOfStorageNodesUp(0.0).minRatioOfDistributorNodesUp(0.0) diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java index 5f9e0d56cfa..8b7c50cda56 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java @@ -53,7 +53,7 @@ public class StateChangeTest extends FleetControllerTest { MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex); eventLog = new EventLog(timer, metricUpdater); ContentCluster cluster = new ContentCluster(options.clusterName, options.nodes, options.storageDistribution, - options.minStorageNodesUp, options.minRatioOfStorageNodesUp); + options.minStorageNodesUp, options.minRatioOfStorageNodesUp, true); NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, eventLog); DatabaseHandler database = new DatabaseHandler(new ZooKeeperDatabaseFactory(), timer, options.zooKeeperServerAddress, options.fleetControllerIndex, timer); StateChangeHandler stateGenerator = new StateChangeHandler(timer, eventLog, metricUpdater); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java index 8804e9a9c96..696cfd42f5a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java @@ -10,12 +10,15 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.TreeMap; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.nullValue; import static org.hamcrest.core.Is.is; -import static org.junit.Assert.*; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; public class HostInfoTest { @@ -49,11 +52,16 @@ public class HostInfoTest { assertThat(storageNodeList.size(), is(2)); assertThat(storageNodeList.get(0).getIndex(), is(0)); List<Metrics.Metric> metrics = hostInfo.getMetrics().getMetrics(); - assertThat(metrics.size(), is(2)); - Metrics.Value value = metrics.get(0).getValue(); - assertThat(value.getLast(), is(5095L)); + assertThat(metrics.size(), is(4)); + assertThat(metrics.get(0).getValue().getLast(), is(5095L)); assertThat(metrics.get(0).getName(), equalTo("vds.datastored.alldisks.buckets")); + assertThat(metrics.get(3).getValue().getLast(), is(129L)); + assertThat(metrics.get(3).getName(), equalTo("vds.datastored.bucket_space.buckets_total")); assertThat(hostInfo.getClusterStateVersionOrNull(), is(123)); + + Optional<Metrics.Value> value = hostInfo.getMetrics() + .getValueAt("vds.datastored.bucket_space.buckets_total", Map.of("bucketSpace", "default")); + assertThat(value.map(Metrics.Value::getLast), equalTo(Optional.of(129L))); } @Test diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java index 6bd7f086249..368f64352d1 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java @@ -121,7 +121,7 @@ public class NodeTest extends StateRestApiTest { public void testNodeNotSeenInSlobrok() throws Exception { setUp(true); ContentCluster old = music.context.cluster; - music.context.cluster = new ContentCluster(old.getName(), old.getConfiguredNodes().values(), old.getDistribution(), 0, 0.0); + music.context.cluster = new ContentCluster(old.getName(), old.getConfiguredNodes().values(), old.getDistribution(), 0, 0.0, true); NodeState currentState = new NodeState(NodeType.STORAGE, State.DOWN); currentState.setDescription("Not seen"); music.context.currentConsolidatedState.setNodeState(new Node(NodeType.STORAGE, 1), currentState); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java index faebbf8755d..44dcd50ae88 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java @@ -42,7 +42,7 @@ public abstract class StateRestApiTest { { Set<ConfiguredNode> nodes = FleetControllerTest.toNodes(0, 1, 2, 3); ContentCluster cluster = new ContentCluster( - "books", nodes, distribution, 6 /* minStorageNodesUp*/, 0.9 /* minRatioOfStorageNodesUp */); + "books", nodes, distribution, 6 /* minStorageNodesUp*/, 0.9, /* minRatioOfStorageNodesUp */true); initializeCluster(cluster, nodes); AnnotatedClusterState baselineState = AnnotatedClusterState.withoutAnnotations(ClusterState.stateFromString("distributor:4 storage:4")); Map<String, AnnotatedClusterState> bucketSpaceStates = new HashMap<>(); @@ -56,7 +56,7 @@ public abstract class StateRestApiTest { Set<ConfiguredNode> nodesInSlobrok = FleetControllerTest.toNodes(1, 3, 5, 7); ContentCluster cluster = new ContentCluster( - "music", nodes, distribution, 4 /* minStorageNodesUp*/, 0.0 /* minRatioOfStorageNodesUp */); + "music", nodes, distribution, 4 /* minStorageNodesUp*/, 0.0, /* minRatioOfStorageNodesUp */true); if (dontInitializeNode2) { // TODO: this skips initialization of node 2 to fake that it is not answering // which really leaves us in an illegal state |