aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java1
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java8
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java2
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java3
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java27
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java26
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java4
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java6
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java48
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java2
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java2
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java16
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java2
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java4
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java1
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java1
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/ClusterControllerConfig.java13
-rw-r--r--configdefinitions/src/vespa/fleetcontroller.def8
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java6
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java7
-rw-r--r--protocols/getnodestate/host_info.json34
-rw-r--r--protocols/getnodestate/slow_host_info.json34
22 files changed, 222 insertions, 33 deletions
diff --git a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java
index c95d814eb99..bad1e526fba 100644
--- a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java
+++ b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java
@@ -76,6 +76,7 @@ public class ClusterControllerClusterConfigurer {
options.clusterHasGlobalDocumentTypes = config.cluster_has_global_document_types();
options.minMergeCompletionRatio = config.min_merge_completion_ratio();
options.enableTwoPhaseClusterStateActivation = config.enable_two_phase_cluster_state_transitions();
+ options.determineBucketsFromBucketSpaceMetric = config.determine_buckets_from_bucket_space_metric();
}
private void configure(SlobroksConfig config) {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
index ddb9357f11f..ae12a6dabb1 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
@@ -22,6 +22,7 @@ public class ContentCluster {
private final ClusterInfo clusterInfo = new ClusterInfo();
private final Map<Node, Long> nodeStartTimestamps = new TreeMap<>();
+ private final boolean determineBucketsFromBucketSpaceMetric;
private int slobrokGenerationCount = 0;
@@ -32,7 +33,9 @@ public class ContentCluster {
private double minRatioOfStorageNodesUp;
public ContentCluster(String clusterName, Collection<ConfiguredNode> configuredNodes, Distribution distribution,
- int minStorageNodesUp, double minRatioOfStorageNodesUp) {
+ int minStorageNodesUp, double minRatioOfStorageNodesUp,
+ boolean determineBucketsFromBucketSpaceMetric) {
+ this.determineBucketsFromBucketSpaceMetric = determineBucketsFromBucketSpaceMetric;
if (configuredNodes == null) throw new IllegalArgumentException("Nodes must be set");
this.clusterName = clusterName;
this.distribution = distribution;
@@ -183,7 +186,8 @@ public class ContentCluster {
minStorageNodesUp,
minRatioOfStorageNodesUp,
distribution.getRedundancy(),
- clusterInfo);
+ clusterInfo,
+ determineBucketsFromBucketSpaceMetric);
return nodeStateChangeChecker.evaluateTransition(node, clusterState, condition, oldState, newState);
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index d943cf27f9c..02f52d5f0c7 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -193,7 +193,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
options.nodes,
options.storageDistribution,
options.minStorageNodesUp,
- options.minRatioOfStorageNodesUp);
+ options.minRatioOfStorageNodesUp, true);
NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, log);
Communicator communicator = new RPCCommunicator(
RPCCommunicator.createRealSupervisor(),
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
index 553b3332ee8..7ad6765cc47 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -128,6 +128,9 @@ public class FleetControllerOptions implements Cloneable {
public int maxDivergentNodesPrintedInTaskErrorMessages = 10;
+ // TODO: May be removed once rolled out everywhere.
+ public boolean determineBucketsFromBucketSpaceMetric = true;
+
// TODO: Replace usage of this by usage where the nodes are explicitly passed (below)
public FleetControllerOptions(String clusterName) {
this.clusterName = clusterName;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index c31a4976827..6bcb5b07f28 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -12,6 +12,7 @@ import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
/**
@@ -20,22 +21,27 @@ import java.util.Optional;
* @author Haakon Dybdahl
*/
public class NodeStateChangeChecker {
- public static final String BUCKETS_METRIC_NAME = "vds.datastored.alldisks.buckets";
+ public static final String LEGACY_BUCKETS_METRIC_NAME = "vds.datastored.alldisks.buckets";
+ public static final String BUCKETS_METRIC_NAME = "vds.datastored.bucket_space.buckets_total";
+ public static final Map<String, String> BUCKETS_METRIC_DIMENSIONS = Map.of("bucketSpace", "default");
private final int minStorageNodesUp;
private double minRatioOfStorageNodesUp;
private final int requiredRedundancy;
private final ClusterInfo clusterInfo;
+ private final boolean determineBucketsFromBucketSpaceMetric;
public NodeStateChangeChecker(
int minStorageNodesUp,
double minRatioOfStorageNodesUp,
int requiredRedundancy,
- ClusterInfo clusterInfo) {
+ ClusterInfo clusterInfo,
+ boolean determineBucketsFromBucketSpaceMetric) {
this.minStorageNodesUp = minStorageNodesUp;
this.minRatioOfStorageNodesUp = minRatioOfStorageNodesUp;
this.requiredRedundancy = requiredRedundancy;
this.clusterInfo = clusterInfo;
+ this.determineBucketsFromBucketSpaceMetric = determineBucketsFromBucketSpaceMetric;
}
public static class Result {
@@ -152,10 +158,19 @@ public class NodeStateChangeChecker {
+ hostInfoNodeVersion);
}
- Optional<Metrics.Value> bucketsMetric = hostInfo.getMetrics().getValue(BUCKETS_METRIC_NAME);
- if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) {
- return Result.createDisallowed("Missing last value of the " + BUCKETS_METRIC_NAME +
- " metric for storage node " + nodeInfo.getNodeIndex());
+ Optional<Metrics.Value> bucketsMetric;
+ if (determineBucketsFromBucketSpaceMetric) {
+ bucketsMetric = hostInfo.getMetrics().getValueAt(BUCKETS_METRIC_NAME, BUCKETS_METRIC_DIMENSIONS);
+ if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) {
+ return Result.createDisallowed("Missing last value of the " + BUCKETS_METRIC_NAME +
+ " metric for storage node " + nodeInfo.getNodeIndex());
+ }
+ } else {
+ bucketsMetric = hostInfo.getMetrics().getValue(LEGACY_BUCKETS_METRIC_NAME);
+ if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) {
+ return Result.createDisallowed("Missing last value of the " + LEGACY_BUCKETS_METRIC_NAME +
+ " metric for storage node " + nodeInfo.getNodeIndex());
+ }
}
long lastBuckets = bucketsMetric.get().getLast();
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java
index cb2a1e92612..eef3fd2e217 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java
@@ -6,6 +6,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
/**
@@ -25,21 +26,44 @@ public class Metrics {
return Optional.empty();
}
+ /**
+ * Get the metric value whose dimensions MUST MATCH the given dimensions map.
+ * To require the metric to NOT have a dimension key, set it's value to null.
+ */
+ public Optional<Value> getValueAt(String name, Map<String, String> dimensions) {
+ return metricsList.stream()
+ .filter(metric -> metric.name.equals(name))
+ .filter(metric -> dimensions.entrySet().stream()
+ .allMatch(entry -> {
+ String dimensionName = entry.getKey();
+ Optional<String> requiredDimensionValue = Optional.ofNullable(entry.getValue());
+ return metric.getDimensionValue(dimensionName).equals(requiredDimensionValue);
+ }))
+ .map(Metric::getValue)
+ .findFirst();
+ }
+
public List<Metric> getMetrics() { return Collections.unmodifiableList(metricsList); }
public static class Metric {
private final String name;
private final Value value;
+ private final Map<String, String> dimensions;
public Metric(
@JsonProperty("name") String name,
- @JsonProperty("values") Value value) {
+ @JsonProperty("values") Value value,
+ @JsonProperty("dimensions") Map<String, String> dimensions) {
this.name = name;
this.value = value;
+ this.dimensions = dimensions;
}
public String getName() { return name; }
public Value getValue() { return value; }
+ public Optional<String> getDimensionValue(String dimension) {
+ return Optional.ofNullable(dimensions.get(dimension));
+ }
}
public static class Value {
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java
index 2df9279e450..36c49fdf5e2 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java
@@ -198,7 +198,7 @@ public class ClusterFixture {
Collection<ConfiguredNode> nodes = DistributionBuilder.buildConfiguredNodes(nodeCount);
Distribution distribution = DistributionBuilder.forFlatCluster(nodeCount);
- ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0);
+ ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0, true);
return new ClusterFixture(cluster, distribution);
}
@@ -206,7 +206,7 @@ public class ClusterFixture {
static ClusterFixture forHierarchicCluster(DistributionBuilder.GroupBuilder root) {
List<ConfiguredNode> nodes = DistributionBuilder.buildConfiguredNodes(root.totalNodeCount());
Distribution distribution = DistributionBuilder.forHierarchicCluster(root);
- ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0);
+ ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0, true);
return new ClusterFixture(cluster, distribution);
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
index ae64bee6bbf..d569feb6f14 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
@@ -34,8 +34,6 @@ import org.junit.rules.TestRule;
import org.junit.rules.TestWatcher;
import org.junit.runner.Description;
-import static org.junit.Assert.fail;
-
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -49,6 +47,8 @@ import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
+import static org.junit.Assert.fail;
+
/**
* @author HÃ¥kon Humberset
*/
@@ -157,7 +157,7 @@ public abstract class FleetControllerTest implements Waiter {
options.nodes,
options.storageDistribution,
options.minStorageNodesUp,
- options.minRatioOfStorageNodesUp);
+ options.minRatioOfStorageNodesUp, true);
NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, log);
Communicator communicator = new RPCCommunicator(
RPCCommunicator.createRealSupervisor(),
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
index 303376e7a5e..153d570adaf 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -61,14 +61,14 @@ public class NodeStateChangeCheckerTest {
}
private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) {
- return new NodeStateChangeChecker(minStorageNodesUp, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
+ return new NodeStateChangeChecker(minStorageNodesUp, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo(), true);
}
private ContentCluster createCluster(Collection<ConfiguredNode> nodes) {
Distribution distribution = mock(Distribution.class);
Group group = new Group(2, "to");
when(distribution.getRootGroup()).thenReturn(group);
- return new ContentCluster("Clustername", nodes, distribution, minStorageNodesUp, 0.0);
+ return new ContentCluster("Clustername", nodes, distribution, minStorageNodesUp, 0.0, true);
}
private StorageNodeInfo createStorageNodeInfo(int index, State state) {
@@ -78,7 +78,7 @@ public class NodeStateChangeCheckerTest {
String clusterName = "Clustername";
Set<ConfiguredNode> configuredNodeIndexes = new HashSet<>();
- ContentCluster cluster = new ContentCluster(clusterName, configuredNodeIndexes, distribution, minStorageNodesUp, 0.0);
+ ContentCluster cluster = new ContentCluster(clusterName, configuredNodeIndexes, distribution, minStorageNodesUp, 0.0, true);
String rpcAddress = "";
StorageNodeInfo storageNodeInfo = new StorageNodeInfo(cluster, index, false, rpcAddress, distribution);
@@ -136,7 +136,7 @@ public class NodeStateChangeCheckerTest {
public void testUnknownStorageNode() {
ContentCluster cluster = createCluster(createNodes(4));
NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker(
- 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
+ 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo(), true);
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
new Node(NodeType.STORAGE, 10), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
UP_NODE_STATE, MAINTENANCE_NODE_STATE);
@@ -161,7 +161,7 @@ public class NodeStateChangeCheckerTest {
ContentCluster cluster = createCluster(createNodes(4));
setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6)));
NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker(
- 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo());
+ 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo(), true);
NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition(
nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE,
UP_NODE_STATE, MAINTENANCE_NODE_STATE);
@@ -549,12 +549,48 @@ public class NodeStateChangeCheckerTest {
" \"dimensions\":\n" +
" {\n" +
" }\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"vds.datastored.bucket_space.buckets_total\",\n" +
+ " \"description\":\"Total number buckets present in the bucket space (ready + not ready)\",\n" +
+ " \"values\":\n" +
+ " {\n" +
+ " \"average\":0.0,\n" +
+ " \"sum\":0.0,\n" +
+ " \"count\":1,\n" +
+ " \"rate\":0.016666,\n" +
+ " \"min\":0,\n" +
+ " \"max\":0,\n" +
+ " \"last\":0\n" +
+ " },\n" +
+ " \"dimensions\":\n" +
+ " {\n" +
+ " \"bucketSpace\":\"global\"\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"vds.datastored.bucket_space.buckets_total\",\n" +
+ " \"description\":\"Total number buckets present in the bucket space (ready + not ready)\",\n" +
+ " \"values\":\n" +
+ " {\n" +
+ " \"average\":129.0,\n" +
+ " \"sum\":129.0,\n" +
+ " \"count\":1,\n" +
+ " \"rate\":0.016666,\n" +
+ " \"min\":129,\n" +
+ " \"max\":129,\n" +
+ " \"last\":%d\n" +
+ " },\n" +
+ " \"dimensions\":\n" +
+ " {\n" +
+ " \"bucketSpace\":\"default\"\n" +
+ " }\n" +
" }\n" +
" ]\n" +
" },\n" +
" \"cluster-state-version\":%d\n" +
"}",
- lastAlldisksBuckets, clusterStateVersion));
+ lastAlldisksBuckets, lastAlldisksBuckets, clusterStateVersion));
}
private List<ConfiguredNode> createNodes(int count) {
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java
index 68926b4d10d..dc76b381f51 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java
@@ -80,7 +80,7 @@ public class StateChangeHandlerTest {
Distribution distribution = new Distribution(Distribution.getDefaultDistributionConfig(2, 100));
this.config = config;
for (int i=0; i<config.nodeCount; ++i) configuredNodes.add(new ConfiguredNode(i, false));
- cluster = new ContentCluster("testcluster", configuredNodes, distribution, 0, 0.0);
+ cluster = new ContentCluster("testcluster", configuredNodes, distribution, 0, 0.0, true);
nodeStateChangeHandler = new StateChangeHandler(clock, eventLog, null);
params.minStorageNodesUp(1).minDistributorNodesUp(1)
.minRatioOfStorageNodesUp(0.0).minRatioOfDistributorNodesUp(0.0)
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
index 5f9e0d56cfa..8b7c50cda56 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
@@ -53,7 +53,7 @@ public class StateChangeTest extends FleetControllerTest {
MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex);
eventLog = new EventLog(timer, metricUpdater);
ContentCluster cluster = new ContentCluster(options.clusterName, options.nodes, options.storageDistribution,
- options.minStorageNodesUp, options.minRatioOfStorageNodesUp);
+ options.minStorageNodesUp, options.minRatioOfStorageNodesUp, true);
NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, eventLog);
DatabaseHandler database = new DatabaseHandler(new ZooKeeperDatabaseFactory(), timer, options.zooKeeperServerAddress, options.fleetControllerIndex, timer);
StateChangeHandler stateGenerator = new StateChangeHandler(timer, eventLog, metricUpdater);
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java
index 8804e9a9c96..696cfd42f5a 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java
@@ -10,12 +10,15 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.TreeMap;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.nullValue;
import static org.hamcrest.core.Is.is;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
public class HostInfoTest {
@@ -49,11 +52,16 @@ public class HostInfoTest {
assertThat(storageNodeList.size(), is(2));
assertThat(storageNodeList.get(0).getIndex(), is(0));
List<Metrics.Metric> metrics = hostInfo.getMetrics().getMetrics();
- assertThat(metrics.size(), is(2));
- Metrics.Value value = metrics.get(0).getValue();
- assertThat(value.getLast(), is(5095L));
+ assertThat(metrics.size(), is(4));
+ assertThat(metrics.get(0).getValue().getLast(), is(5095L));
assertThat(metrics.get(0).getName(), equalTo("vds.datastored.alldisks.buckets"));
+ assertThat(metrics.get(3).getValue().getLast(), is(129L));
+ assertThat(metrics.get(3).getName(), equalTo("vds.datastored.bucket_space.buckets_total"));
assertThat(hostInfo.getClusterStateVersionOrNull(), is(123));
+
+ Optional<Metrics.Value> value = hostInfo.getMetrics()
+ .getValueAt("vds.datastored.bucket_space.buckets_total", Map.of("bucketSpace", "default"));
+ assertThat(value.map(Metrics.Value::getLast), equalTo(Optional.of(129L)));
}
@Test
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java
index 6bd7f086249..368f64352d1 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java
@@ -121,7 +121,7 @@ public class NodeTest extends StateRestApiTest {
public void testNodeNotSeenInSlobrok() throws Exception {
setUp(true);
ContentCluster old = music.context.cluster;
- music.context.cluster = new ContentCluster(old.getName(), old.getConfiguredNodes().values(), old.getDistribution(), 0, 0.0);
+ music.context.cluster = new ContentCluster(old.getName(), old.getConfiguredNodes().values(), old.getDistribution(), 0, 0.0, true);
NodeState currentState = new NodeState(NodeType.STORAGE, State.DOWN);
currentState.setDescription("Not seen");
music.context.currentConsolidatedState.setNodeState(new Node(NodeType.STORAGE, 1), currentState);
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java
index faebbf8755d..44dcd50ae88 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java
@@ -42,7 +42,7 @@ public abstract class StateRestApiTest {
{
Set<ConfiguredNode> nodes = FleetControllerTest.toNodes(0, 1, 2, 3);
ContentCluster cluster = new ContentCluster(
- "books", nodes, distribution, 6 /* minStorageNodesUp*/, 0.9 /* minRatioOfStorageNodesUp */);
+ "books", nodes, distribution, 6 /* minStorageNodesUp*/, 0.9, /* minRatioOfStorageNodesUp */true);
initializeCluster(cluster, nodes);
AnnotatedClusterState baselineState = AnnotatedClusterState.withoutAnnotations(ClusterState.stateFromString("distributor:4 storage:4"));
Map<String, AnnotatedClusterState> bucketSpaceStates = new HashMap<>();
@@ -56,7 +56,7 @@ public abstract class StateRestApiTest {
Set<ConfiguredNode> nodesInSlobrok = FleetControllerTest.toNodes(1, 3, 5, 7);
ContentCluster cluster = new ContentCluster(
- "music", nodes, distribution, 4 /* minStorageNodesUp*/, 0.0 /* minRatioOfStorageNodesUp */);
+ "music", nodes, distribution, 4 /* minStorageNodesUp*/, 0.0, /* minRatioOfStorageNodesUp */true);
if (dontInitializeNode2) {
// TODO: this skips initialization of node 2 to fake that it is not answering
// which really leaves us in an illegal state
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
index a53b0931cc6..323aa473580 100644
--- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
+++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
@@ -57,6 +57,7 @@ public interface ModelContext {
// TODO: Remove temporary default implementation
default Optional<TlsSecrets> tlsSecrets() { return Optional.empty(); }
double defaultTermwiseLimit();
+ boolean useBucketSpaceMetric();
}
}
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
index cfa61560b29..9d561a79c75 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
@@ -58,6 +58,7 @@ public class TestProperties implements ModelContext.Properties {
@Override public boolean useDedicatedNodeForLogserver() { return useDedicatedNodeForLogserver; }
@Override public Optional<TlsSecrets> tlsSecrets() { return tlsSecrets; }
@Override public double defaultTermwiseLimit() { return defaultTermwiseLimit; }
+ @Override public boolean useBucketSpaceMetric() { return true; }
public TestProperties setDefaultTermwiseLimit(double limit) {
defaultTermwiseLimit = limit;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/ClusterControllerConfig.java b/config-model/src/main/java/com/yahoo/vespa/model/content/ClusterControllerConfig.java
index 1b0af3e9046..adff10ae580 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/ClusterControllerConfig.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/ClusterControllerConfig.java
@@ -40,6 +40,8 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F
bucketSplittingMinimumBits = clusterTuning.childAsInteger("bucket-splitting.minimum-bits");
}
+ boolean useBucketSpaceMetric = deployState.getProperties().useBucketSpaceMetric();
+
if (tuning != null) {
return new ClusterControllerConfig(ancestor, clusterName,
tuning.childAsDuration("init-progress-time"),
@@ -49,10 +51,11 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F
tuning.childAsDouble("min-distributor-up-ratio"),
tuning.childAsDouble("min-storage-up-ratio"),
bucketSplittingMinimumBits,
- minNodeRatioPerGroup);
+ minNodeRatioPerGroup,
+ useBucketSpaceMetric);
} else {
return new ClusterControllerConfig(ancestor, clusterName, null, null, null, null, null, null,
- bucketSplittingMinimumBits, minNodeRatioPerGroup);
+ bucketSplittingMinimumBits, minNodeRatioPerGroup, useBucketSpaceMetric);
}
}
}
@@ -66,6 +69,7 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F
Double minStorageUpRatio;
Integer minSplitBits;
private Double minNodeRatioPerGroup;
+ private boolean useBucketSpaceMetric;
// TODO refactor; too many args
private ClusterControllerConfig(AbstractConfigProducer parent,
@@ -77,7 +81,8 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F
Double minDistributorUpRatio,
Double minStorageUpRatio,
Integer minSplitBits,
- Double minNodeRatioPerGroup) {
+ Double minNodeRatioPerGroup,
+ boolean useBucketSpaceMetric) {
super(parent, "fleetcontroller");
this.clusterName = clusterName;
@@ -89,6 +94,7 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F
this.minStorageUpRatio = minStorageUpRatio;
this.minSplitBits = minSplitBits;
this.minNodeRatioPerGroup = minNodeRatioPerGroup;
+ this.useBucketSpaceMetric = useBucketSpaceMetric;
}
@Override
@@ -105,6 +111,7 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F
builder.index(0);
builder.cluster_name(clusterName);
builder.fleet_controller_count(getChildren().size());
+ builder.determine_buckets_from_bucket_space_metric(useBucketSpaceMetric);
if (initProgressTime != null) {
builder.init_progress_time((int) initProgressTime.getMilliSeconds());
diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def
index 62f3b6759c3..3f2a5637a53 100644
--- a/configdefinitions/src/vespa/fleetcontroller.def
+++ b/configdefinitions/src/vespa/fleetcontroller.def
@@ -178,3 +178,11 @@ min_merge_completion_ratio double default=1.0
## activation may happen at wildly different times throughout the cluster. The 2 phase
## transition logic aims to minimize the window of time where active states diverge.
enable_two_phase_cluster_state_transitions bool default=false
+
+## Determines which metric will be used to decide whether a content node manages
+## zero buckets, when deciding whether it can be set permanently down (typically
+## to be removed from the application).
+## If true, use vds.datastored.bucket_space.buckets_total (new), otherwise use
+## vds.datastored.alldisks.buckets (legacy).
+## This setting is intended to be used to safely roll out the new metric.
+determine_buckets_from_bucket_space_metric bool default=true
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
index 694ab8f037e..52d47a9398b 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
@@ -132,6 +132,7 @@ public class ModelContextImpl implements ModelContext {
private final boolean useAdaptiveDispatch;
private final Optional<TlsSecrets> tlsSecrets;
private final double defaultTermwiseLimit;
+ private final boolean useBucketSpaceMetric;
public Properties(ApplicationId applicationId,
boolean multitenantFromConfig,
@@ -162,6 +163,8 @@ public class ModelContextImpl implements ModelContext {
this.tlsSecrets = tlsSecrets;
defaultTermwiseLimit = Flags.DEFAULT_TERM_WISE_LIMIT.bindTo(flagSource)
.with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value();
+ this.useBucketSpaceMetric = Flags.USE_BUCKET_SPACE_METRIC.bindTo(flagSource)
+ .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value();
}
@Override
@@ -209,6 +212,9 @@ public class ModelContextImpl implements ModelContext {
@Override
public double defaultTermwiseLimit() { return defaultTermwiseLimit; }
+
+ @Override
+ public boolean useBucketSpaceMetric() { return useBucketSpaceMetric; }
}
}
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index 532f7696893..9dd71f05565 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -83,6 +83,13 @@ public class Flags {
"Takes effect on next node agent tick. Change is orchestrated, but does NOT require container restart",
HOSTNAME, APPLICATION_ID);
+ public static final UnboundBooleanFlag USE_BUCKET_SPACE_METRIC = defineFeatureFlag(
+ "use-bucket-space-metric", true,
+ "Whether to use vds.datastored.bucket_space.buckets_total (true) instead of " +
+ "vds.datastored.alldisks.buckets (false, legacy).",
+ "Takes efefct on the next deployment of the application",
+ APPLICATION_ID);
+
public static final UnboundBooleanFlag INCLUDE_SIS_IN_TRUSTSTORE = defineFeatureFlag(
"include-sis-in-truststore", false,
"Whether to use the trust store backed by Athenz and (in public) Service Identity certificates in " +
diff --git a/protocols/getnodestate/host_info.json b/protocols/getnodestate/host_info.json
index 5bfa4f3171c..b14eaa0e13c 100644
--- a/protocols/getnodestate/host_info.json
+++ b/protocols/getnodestate/host_info.json
@@ -30,6 +30,40 @@
"last": 3069833
},
"dimensions": {}
+ },
+ {
+ "name":"vds.datastored.bucket_space.buckets_total",
+ "description":"Total number buckets present in the bucket space (ready + not ready)",
+ "values":
+ {
+ "average":0.0,
+ "sum":0.0,
+ "count":1,
+ "rate":0.016666,
+ "min":0,
+ "max":0,
+ "last":0
+ },
+ "dimensions":
+ {
+ "bucketSpace":"global"
+ }
+ },
+ {
+ "name": "vds.datastored.bucket_space.buckets_total",
+ "description": "Total number buckets present in the bucket space (ready + not ready)",
+ "values": {
+ "average": 129.0,
+ "sum": 129.0,
+ "count": 1,
+ "rate": 0.016666,
+ "min": 129,
+ "max": 129,
+ "last": 129
+ },
+ "dimensions": {
+ "bucketSpace": "default"
+ }
}
]
},
diff --git a/protocols/getnodestate/slow_host_info.json b/protocols/getnodestate/slow_host_info.json
index a489ac0a4b1..b792e45d5d6 100644
--- a/protocols/getnodestate/slow_host_info.json
+++ b/protocols/getnodestate/slow_host_info.json
@@ -9,6 +9,40 @@
"values":
[
{
+ "name":"vds.datastored.bucket_space.buckets_total",
+ "description":"Total number buckets present in the bucket space (ready + not ready)",
+ "values":
+ {
+ "average":0.0,
+ "sum":0.0,
+ "count":1,
+ "rate":0.016666,
+ "min":0,
+ "max":0,
+ "last":0
+ },
+ "dimensions":
+ {
+ "bucketSpace":"global"
+ }
+ },
+ {
+ "name": "vds.datastored.bucket_space.buckets_total",
+ "description": "Total number buckets present in the bucket space (ready + not ready)",
+ "values": {
+ "average": 129.0,
+ "sum": 129.0,
+ "count": 1,
+ "rate": 0.016666,
+ "min": 129,
+ "max": 129,
+ "last": 129
+ },
+ "dimensions": {
+ "bucketSpace": "default"
+ }
+ },
+ {
"name":"vds.datastored.alldisks.buckets",
"description":"buckets managed",
"values":