diff options
22 files changed, 222 insertions, 33 deletions
diff --git a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java index c95d814eb99..bad1e526fba 100644 --- a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java +++ b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java @@ -76,6 +76,7 @@ public class ClusterControllerClusterConfigurer { options.clusterHasGlobalDocumentTypes = config.cluster_has_global_document_types(); options.minMergeCompletionRatio = config.min_merge_completion_ratio(); options.enableTwoPhaseClusterStateActivation = config.enable_two_phase_cluster_state_transitions(); + options.determineBucketsFromBucketSpaceMetric = config.determine_buckets_from_bucket_space_metric(); } private void configure(SlobroksConfig config) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java index ddb9357f11f..ae12a6dabb1 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java @@ -22,6 +22,7 @@ public class ContentCluster { private final ClusterInfo clusterInfo = new ClusterInfo(); private final Map<Node, Long> nodeStartTimestamps = new TreeMap<>(); + private final boolean determineBucketsFromBucketSpaceMetric; private int slobrokGenerationCount = 0; @@ -32,7 +33,9 @@ public class ContentCluster { private double minRatioOfStorageNodesUp; public ContentCluster(String clusterName, Collection<ConfiguredNode> configuredNodes, Distribution distribution, - int minStorageNodesUp, double minRatioOfStorageNodesUp) { + int minStorageNodesUp, double minRatioOfStorageNodesUp, + boolean determineBucketsFromBucketSpaceMetric) { + this.determineBucketsFromBucketSpaceMetric = determineBucketsFromBucketSpaceMetric; if (configuredNodes == null) throw new IllegalArgumentException("Nodes must be set"); this.clusterName = clusterName; this.distribution = distribution; @@ -183,7 +186,8 @@ public class ContentCluster { minStorageNodesUp, minRatioOfStorageNodesUp, distribution.getRedundancy(), - clusterInfo); + clusterInfo, + determineBucketsFromBucketSpaceMetric); return nodeStateChangeChecker.evaluateTransition(node, clusterState, condition, oldState, newState); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index d943cf27f9c..02f52d5f0c7 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -193,7 +193,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd options.nodes, options.storageDistribution, options.minStorageNodesUp, - options.minRatioOfStorageNodesUp); + options.minRatioOfStorageNodesUp, true); NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, log); Communicator communicator = new RPCCommunicator( RPCCommunicator.createRealSupervisor(), diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java index 553b3332ee8..7ad6765cc47 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java @@ -128,6 +128,9 @@ public class FleetControllerOptions implements Cloneable { public int maxDivergentNodesPrintedInTaskErrorMessages = 10; + // TODO: May be removed once rolled out everywhere. + public boolean determineBucketsFromBucketSpaceMetric = true; + // TODO: Replace usage of this by usage where the nodes are explicitly passed (below) public FleetControllerOptions(String clusterName) { this.clusterName = clusterName; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index c31a4976827..6bcb5b07f28 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -12,6 +12,7 @@ import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode; import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest; import java.util.List; +import java.util.Map; import java.util.Optional; /** @@ -20,22 +21,27 @@ import java.util.Optional; * @author Haakon Dybdahl */ public class NodeStateChangeChecker { - public static final String BUCKETS_METRIC_NAME = "vds.datastored.alldisks.buckets"; + public static final String LEGACY_BUCKETS_METRIC_NAME = "vds.datastored.alldisks.buckets"; + public static final String BUCKETS_METRIC_NAME = "vds.datastored.bucket_space.buckets_total"; + public static final Map<String, String> BUCKETS_METRIC_DIMENSIONS = Map.of("bucketSpace", "default"); private final int minStorageNodesUp; private double minRatioOfStorageNodesUp; private final int requiredRedundancy; private final ClusterInfo clusterInfo; + private final boolean determineBucketsFromBucketSpaceMetric; public NodeStateChangeChecker( int minStorageNodesUp, double minRatioOfStorageNodesUp, int requiredRedundancy, - ClusterInfo clusterInfo) { + ClusterInfo clusterInfo, + boolean determineBucketsFromBucketSpaceMetric) { this.minStorageNodesUp = minStorageNodesUp; this.minRatioOfStorageNodesUp = minRatioOfStorageNodesUp; this.requiredRedundancy = requiredRedundancy; this.clusterInfo = clusterInfo; + this.determineBucketsFromBucketSpaceMetric = determineBucketsFromBucketSpaceMetric; } public static class Result { @@ -152,10 +158,19 @@ public class NodeStateChangeChecker { + hostInfoNodeVersion); } - Optional<Metrics.Value> bucketsMetric = hostInfo.getMetrics().getValue(BUCKETS_METRIC_NAME); - if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) { - return Result.createDisallowed("Missing last value of the " + BUCKETS_METRIC_NAME + - " metric for storage node " + nodeInfo.getNodeIndex()); + Optional<Metrics.Value> bucketsMetric; + if (determineBucketsFromBucketSpaceMetric) { + bucketsMetric = hostInfo.getMetrics().getValueAt(BUCKETS_METRIC_NAME, BUCKETS_METRIC_DIMENSIONS); + if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) { + return Result.createDisallowed("Missing last value of the " + BUCKETS_METRIC_NAME + + " metric for storage node " + nodeInfo.getNodeIndex()); + } + } else { + bucketsMetric = hostInfo.getMetrics().getValue(LEGACY_BUCKETS_METRIC_NAME); + if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) { + return Result.createDisallowed("Missing last value of the " + LEGACY_BUCKETS_METRIC_NAME + + " metric for storage node " + nodeInfo.getNodeIndex()); + } } long lastBuckets = bucketsMetric.get().getLast(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java index cb2a1e92612..eef3fd2e217 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java @@ -6,6 +6,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Optional; /** @@ -25,21 +26,44 @@ public class Metrics { return Optional.empty(); } + /** + * Get the metric value whose dimensions MUST MATCH the given dimensions map. + * To require the metric to NOT have a dimension key, set it's value to null. + */ + public Optional<Value> getValueAt(String name, Map<String, String> dimensions) { + return metricsList.stream() + .filter(metric -> metric.name.equals(name)) + .filter(metric -> dimensions.entrySet().stream() + .allMatch(entry -> { + String dimensionName = entry.getKey(); + Optional<String> requiredDimensionValue = Optional.ofNullable(entry.getValue()); + return metric.getDimensionValue(dimensionName).equals(requiredDimensionValue); + })) + .map(Metric::getValue) + .findFirst(); + } + public List<Metric> getMetrics() { return Collections.unmodifiableList(metricsList); } public static class Metric { private final String name; private final Value value; + private final Map<String, String> dimensions; public Metric( @JsonProperty("name") String name, - @JsonProperty("values") Value value) { + @JsonProperty("values") Value value, + @JsonProperty("dimensions") Map<String, String> dimensions) { this.name = name; this.value = value; + this.dimensions = dimensions; } public String getName() { return name; } public Value getValue() { return value; } + public Optional<String> getDimensionValue(String dimension) { + return Optional.ofNullable(dimensions.get(dimension)); + } } public static class Value { diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java index 2df9279e450..36c49fdf5e2 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java @@ -198,7 +198,7 @@ public class ClusterFixture { Collection<ConfiguredNode> nodes = DistributionBuilder.buildConfiguredNodes(nodeCount); Distribution distribution = DistributionBuilder.forFlatCluster(nodeCount); - ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0); + ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0, true); return new ClusterFixture(cluster, distribution); } @@ -206,7 +206,7 @@ public class ClusterFixture { static ClusterFixture forHierarchicCluster(DistributionBuilder.GroupBuilder root) { List<ConfiguredNode> nodes = DistributionBuilder.buildConfiguredNodes(root.totalNodeCount()); Distribution distribution = DistributionBuilder.forHierarchicCluster(root); - ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0); + ContentCluster cluster = new ContentCluster("foo", nodes, distribution, 0, 0.0, true); return new ClusterFixture(cluster, distribution); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index ae64bee6bbf..d569feb6f14 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -34,8 +34,6 @@ import org.junit.rules.TestRule; import org.junit.rules.TestWatcher; import org.junit.runner.Description; -import static org.junit.Assert.fail; - import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -49,6 +47,8 @@ import java.util.logging.Logger; import java.util.regex.Pattern; import java.util.stream.Collectors; +import static org.junit.Assert.fail; + /** * @author HÃ¥kon Humberset */ @@ -157,7 +157,7 @@ public abstract class FleetControllerTest implements Waiter { options.nodes, options.storageDistribution, options.minStorageNodesUp, - options.minRatioOfStorageNodesUp); + options.minRatioOfStorageNodesUp, true); NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, log); Communicator communicator = new RPCCommunicator( RPCCommunicator.createRealSupervisor(), diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index 303376e7a5e..153d570adaf 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -61,14 +61,14 @@ public class NodeStateChangeCheckerTest { } private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) { - return new NodeStateChangeChecker(minStorageNodesUp, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo()); + return new NodeStateChangeChecker(minStorageNodesUp, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo(), true); } private ContentCluster createCluster(Collection<ConfiguredNode> nodes) { Distribution distribution = mock(Distribution.class); Group group = new Group(2, "to"); when(distribution.getRootGroup()).thenReturn(group); - return new ContentCluster("Clustername", nodes, distribution, minStorageNodesUp, 0.0); + return new ContentCluster("Clustername", nodes, distribution, minStorageNodesUp, 0.0, true); } private StorageNodeInfo createStorageNodeInfo(int index, State state) { @@ -78,7 +78,7 @@ public class NodeStateChangeCheckerTest { String clusterName = "Clustername"; Set<ConfiguredNode> configuredNodeIndexes = new HashSet<>(); - ContentCluster cluster = new ContentCluster(clusterName, configuredNodeIndexes, distribution, minStorageNodesUp, 0.0); + ContentCluster cluster = new ContentCluster(clusterName, configuredNodeIndexes, distribution, minStorageNodesUp, 0.0, true); String rpcAddress = ""; StorageNodeInfo storageNodeInfo = new StorageNodeInfo(cluster, index, false, rpcAddress, distribution); @@ -136,7 +136,7 @@ public class NodeStateChangeCheckerTest { public void testUnknownStorageNode() { ContentCluster cluster = createCluster(createNodes(4)); NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( - 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo()); + 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo(), true); NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition( new Node(NodeType.STORAGE, 10), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); @@ -161,7 +161,7 @@ public class NodeStateChangeCheckerTest { ContentCluster cluster = createCluster(createNodes(4)); setAllNodesUp(cluster, HostInfo.createHostInfo(createDistributorHostInfo(4, 5, 6))); NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( - 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo()); + 5 /* min storage nodes */, minRatioOfStorageNodesUp, requiredRedundancy, cluster.clusterInfo(), true); NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition( nodeStorage, defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); @@ -549,12 +549,48 @@ public class NodeStateChangeCheckerTest { " \"dimensions\":\n" + " {\n" + " }\n" + + " },\n" + + " {\n" + + " \"name\":\"vds.datastored.bucket_space.buckets_total\",\n" + + " \"description\":\"Total number buckets present in the bucket space (ready + not ready)\",\n" + + " \"values\":\n" + + " {\n" + + " \"average\":0.0,\n" + + " \"sum\":0.0,\n" + + " \"count\":1,\n" + + " \"rate\":0.016666,\n" + + " \"min\":0,\n" + + " \"max\":0,\n" + + " \"last\":0\n" + + " },\n" + + " \"dimensions\":\n" + + " {\n" + + " \"bucketSpace\":\"global\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"name\":\"vds.datastored.bucket_space.buckets_total\",\n" + + " \"description\":\"Total number buckets present in the bucket space (ready + not ready)\",\n" + + " \"values\":\n" + + " {\n" + + " \"average\":129.0,\n" + + " \"sum\":129.0,\n" + + " \"count\":1,\n" + + " \"rate\":0.016666,\n" + + " \"min\":129,\n" + + " \"max\":129,\n" + + " \"last\":%d\n" + + " },\n" + + " \"dimensions\":\n" + + " {\n" + + " \"bucketSpace\":\"default\"\n" + + " }\n" + " }\n" + " ]\n" + " },\n" + " \"cluster-state-version\":%d\n" + "}", - lastAlldisksBuckets, clusterStateVersion)); + lastAlldisksBuckets, lastAlldisksBuckets, clusterStateVersion)); } private List<ConfiguredNode> createNodes(int count) { diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java index 68926b4d10d..dc76b381f51 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java @@ -80,7 +80,7 @@ public class StateChangeHandlerTest { Distribution distribution = new Distribution(Distribution.getDefaultDistributionConfig(2, 100)); this.config = config; for (int i=0; i<config.nodeCount; ++i) configuredNodes.add(new ConfiguredNode(i, false)); - cluster = new ContentCluster("testcluster", configuredNodes, distribution, 0, 0.0); + cluster = new ContentCluster("testcluster", configuredNodes, distribution, 0, 0.0, true); nodeStateChangeHandler = new StateChangeHandler(clock, eventLog, null); params.minStorageNodesUp(1).minDistributorNodesUp(1) .minRatioOfStorageNodesUp(0.0).minRatioOfDistributorNodesUp(0.0) diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java index 5f9e0d56cfa..8b7c50cda56 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java @@ -53,7 +53,7 @@ public class StateChangeTest extends FleetControllerTest { MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex); eventLog = new EventLog(timer, metricUpdater); ContentCluster cluster = new ContentCluster(options.clusterName, options.nodes, options.storageDistribution, - options.minStorageNodesUp, options.minRatioOfStorageNodesUp); + options.minStorageNodesUp, options.minRatioOfStorageNodesUp, true); NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, eventLog); DatabaseHandler database = new DatabaseHandler(new ZooKeeperDatabaseFactory(), timer, options.zooKeeperServerAddress, options.fleetControllerIndex, timer); StateChangeHandler stateGenerator = new StateChangeHandler(timer, eventLog, metricUpdater); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java index 8804e9a9c96..696cfd42f5a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java @@ -10,12 +10,15 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.TreeMap; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.nullValue; import static org.hamcrest.core.Is.is; -import static org.junit.Assert.*; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; public class HostInfoTest { @@ -49,11 +52,16 @@ public class HostInfoTest { assertThat(storageNodeList.size(), is(2)); assertThat(storageNodeList.get(0).getIndex(), is(0)); List<Metrics.Metric> metrics = hostInfo.getMetrics().getMetrics(); - assertThat(metrics.size(), is(2)); - Metrics.Value value = metrics.get(0).getValue(); - assertThat(value.getLast(), is(5095L)); + assertThat(metrics.size(), is(4)); + assertThat(metrics.get(0).getValue().getLast(), is(5095L)); assertThat(metrics.get(0).getName(), equalTo("vds.datastored.alldisks.buckets")); + assertThat(metrics.get(3).getValue().getLast(), is(129L)); + assertThat(metrics.get(3).getName(), equalTo("vds.datastored.bucket_space.buckets_total")); assertThat(hostInfo.getClusterStateVersionOrNull(), is(123)); + + Optional<Metrics.Value> value = hostInfo.getMetrics() + .getValueAt("vds.datastored.bucket_space.buckets_total", Map.of("bucketSpace", "default")); + assertThat(value.map(Metrics.Value::getLast), equalTo(Optional.of(129L))); } @Test diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java index 6bd7f086249..368f64352d1 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/NodeTest.java @@ -121,7 +121,7 @@ public class NodeTest extends StateRestApiTest { public void testNodeNotSeenInSlobrok() throws Exception { setUp(true); ContentCluster old = music.context.cluster; - music.context.cluster = new ContentCluster(old.getName(), old.getConfiguredNodes().values(), old.getDistribution(), 0, 0.0); + music.context.cluster = new ContentCluster(old.getName(), old.getConfiguredNodes().values(), old.getDistribution(), 0, 0.0, true); NodeState currentState = new NodeState(NodeType.STORAGE, State.DOWN); currentState.setDescription("Not seen"); music.context.currentConsolidatedState.setNodeState(new Node(NodeType.STORAGE, 1), currentState); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java index faebbf8755d..44dcd50ae88 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java @@ -42,7 +42,7 @@ public abstract class StateRestApiTest { { Set<ConfiguredNode> nodes = FleetControllerTest.toNodes(0, 1, 2, 3); ContentCluster cluster = new ContentCluster( - "books", nodes, distribution, 6 /* minStorageNodesUp*/, 0.9 /* minRatioOfStorageNodesUp */); + "books", nodes, distribution, 6 /* minStorageNodesUp*/, 0.9, /* minRatioOfStorageNodesUp */true); initializeCluster(cluster, nodes); AnnotatedClusterState baselineState = AnnotatedClusterState.withoutAnnotations(ClusterState.stateFromString("distributor:4 storage:4")); Map<String, AnnotatedClusterState> bucketSpaceStates = new HashMap<>(); @@ -56,7 +56,7 @@ public abstract class StateRestApiTest { Set<ConfiguredNode> nodesInSlobrok = FleetControllerTest.toNodes(1, 3, 5, 7); ContentCluster cluster = new ContentCluster( - "music", nodes, distribution, 4 /* minStorageNodesUp*/, 0.0 /* minRatioOfStorageNodesUp */); + "music", nodes, distribution, 4 /* minStorageNodesUp*/, 0.0, /* minRatioOfStorageNodesUp */true); if (dontInitializeNode2) { // TODO: this skips initialization of node 2 to fake that it is not answering // which really leaves us in an illegal state diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index a53b0931cc6..323aa473580 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -57,6 +57,7 @@ public interface ModelContext { // TODO: Remove temporary default implementation default Optional<TlsSecrets> tlsSecrets() { return Optional.empty(); } double defaultTermwiseLimit(); + boolean useBucketSpaceMetric(); } } diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index cfa61560b29..9d561a79c75 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -58,6 +58,7 @@ public class TestProperties implements ModelContext.Properties { @Override public boolean useDedicatedNodeForLogserver() { return useDedicatedNodeForLogserver; } @Override public Optional<TlsSecrets> tlsSecrets() { return tlsSecrets; } @Override public double defaultTermwiseLimit() { return defaultTermwiseLimit; } + @Override public boolean useBucketSpaceMetric() { return true; } public TestProperties setDefaultTermwiseLimit(double limit) { defaultTermwiseLimit = limit; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/ClusterControllerConfig.java b/config-model/src/main/java/com/yahoo/vespa/model/content/ClusterControllerConfig.java index 1b0af3e9046..adff10ae580 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/ClusterControllerConfig.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/ClusterControllerConfig.java @@ -40,6 +40,8 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F bucketSplittingMinimumBits = clusterTuning.childAsInteger("bucket-splitting.minimum-bits"); } + boolean useBucketSpaceMetric = deployState.getProperties().useBucketSpaceMetric(); + if (tuning != null) { return new ClusterControllerConfig(ancestor, clusterName, tuning.childAsDuration("init-progress-time"), @@ -49,10 +51,11 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F tuning.childAsDouble("min-distributor-up-ratio"), tuning.childAsDouble("min-storage-up-ratio"), bucketSplittingMinimumBits, - minNodeRatioPerGroup); + minNodeRatioPerGroup, + useBucketSpaceMetric); } else { return new ClusterControllerConfig(ancestor, clusterName, null, null, null, null, null, null, - bucketSplittingMinimumBits, minNodeRatioPerGroup); + bucketSplittingMinimumBits, minNodeRatioPerGroup, useBucketSpaceMetric); } } } @@ -66,6 +69,7 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F Double minStorageUpRatio; Integer minSplitBits; private Double minNodeRatioPerGroup; + private boolean useBucketSpaceMetric; // TODO refactor; too many args private ClusterControllerConfig(AbstractConfigProducer parent, @@ -77,7 +81,8 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F Double minDistributorUpRatio, Double minStorageUpRatio, Integer minSplitBits, - Double minNodeRatioPerGroup) { + Double minNodeRatioPerGroup, + boolean useBucketSpaceMetric) { super(parent, "fleetcontroller"); this.clusterName = clusterName; @@ -89,6 +94,7 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F this.minStorageUpRatio = minStorageUpRatio; this.minSplitBits = minSplitBits; this.minNodeRatioPerGroup = minNodeRatioPerGroup; + this.useBucketSpaceMetric = useBucketSpaceMetric; } @Override @@ -105,6 +111,7 @@ public class ClusterControllerConfig extends AbstractConfigProducer implements F builder.index(0); builder.cluster_name(clusterName); builder.fleet_controller_count(getChildren().size()); + builder.determine_buckets_from_bucket_space_metric(useBucketSpaceMetric); if (initProgressTime != null) { builder.init_progress_time((int) initProgressTime.getMilliSeconds()); diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def index 62f3b6759c3..3f2a5637a53 100644 --- a/configdefinitions/src/vespa/fleetcontroller.def +++ b/configdefinitions/src/vespa/fleetcontroller.def @@ -178,3 +178,11 @@ min_merge_completion_ratio double default=1.0 ## activation may happen at wildly different times throughout the cluster. The 2 phase ## transition logic aims to minimize the window of time where active states diverge. enable_two_phase_cluster_state_transitions bool default=false + +## Determines which metric will be used to decide whether a content node manages +## zero buckets, when deciding whether it can be set permanently down (typically +## to be removed from the application). +## If true, use vds.datastored.bucket_space.buckets_total (new), otherwise use +## vds.datastored.alldisks.buckets (legacy). +## This setting is intended to be used to safely roll out the new metric. +determine_buckets_from_bucket_space_metric bool default=true diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index 694ab8f037e..52d47a9398b 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -132,6 +132,7 @@ public class ModelContextImpl implements ModelContext { private final boolean useAdaptiveDispatch; private final Optional<TlsSecrets> tlsSecrets; private final double defaultTermwiseLimit; + private final boolean useBucketSpaceMetric; public Properties(ApplicationId applicationId, boolean multitenantFromConfig, @@ -162,6 +163,8 @@ public class ModelContextImpl implements ModelContext { this.tlsSecrets = tlsSecrets; defaultTermwiseLimit = Flags.DEFAULT_TERM_WISE_LIMIT.bindTo(flagSource) .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value(); + this.useBucketSpaceMetric = Flags.USE_BUCKET_SPACE_METRIC.bindTo(flagSource) + .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value(); } @Override @@ -209,6 +212,9 @@ public class ModelContextImpl implements ModelContext { @Override public double defaultTermwiseLimit() { return defaultTermwiseLimit; } + + @Override + public boolean useBucketSpaceMetric() { return useBucketSpaceMetric; } } } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 532f7696893..9dd71f05565 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -83,6 +83,13 @@ public class Flags { "Takes effect on next node agent tick. Change is orchestrated, but does NOT require container restart", HOSTNAME, APPLICATION_ID); + public static final UnboundBooleanFlag USE_BUCKET_SPACE_METRIC = defineFeatureFlag( + "use-bucket-space-metric", true, + "Whether to use vds.datastored.bucket_space.buckets_total (true) instead of " + + "vds.datastored.alldisks.buckets (false, legacy).", + "Takes efefct on the next deployment of the application", + APPLICATION_ID); + public static final UnboundBooleanFlag INCLUDE_SIS_IN_TRUSTSTORE = defineFeatureFlag( "include-sis-in-truststore", false, "Whether to use the trust store backed by Athenz and (in public) Service Identity certificates in " + diff --git a/protocols/getnodestate/host_info.json b/protocols/getnodestate/host_info.json index 5bfa4f3171c..b14eaa0e13c 100644 --- a/protocols/getnodestate/host_info.json +++ b/protocols/getnodestate/host_info.json @@ -30,6 +30,40 @@ "last": 3069833 }, "dimensions": {} + }, + { + "name":"vds.datastored.bucket_space.buckets_total", + "description":"Total number buckets present in the bucket space (ready + not ready)", + "values": + { + "average":0.0, + "sum":0.0, + "count":1, + "rate":0.016666, + "min":0, + "max":0, + "last":0 + }, + "dimensions": + { + "bucketSpace":"global" + } + }, + { + "name": "vds.datastored.bucket_space.buckets_total", + "description": "Total number buckets present in the bucket space (ready + not ready)", + "values": { + "average": 129.0, + "sum": 129.0, + "count": 1, + "rate": 0.016666, + "min": 129, + "max": 129, + "last": 129 + }, + "dimensions": { + "bucketSpace": "default" + } } ] }, diff --git a/protocols/getnodestate/slow_host_info.json b/protocols/getnodestate/slow_host_info.json index a489ac0a4b1..b792e45d5d6 100644 --- a/protocols/getnodestate/slow_host_info.json +++ b/protocols/getnodestate/slow_host_info.json @@ -9,6 +9,40 @@ "values": [ { + "name":"vds.datastored.bucket_space.buckets_total", + "description":"Total number buckets present in the bucket space (ready + not ready)", + "values": + { + "average":0.0, + "sum":0.0, + "count":1, + "rate":0.016666, + "min":0, + "max":0, + "last":0 + }, + "dimensions": + { + "bucketSpace":"global" + } + }, + { + "name": "vds.datastored.bucket_space.buckets_total", + "description": "Total number buckets present in the bucket space (ready + not ready)", + "values": { + "average": 129.0, + "sum": 129.0, + "count": 1, + "rate": 0.016666, + "min": 129, + "max": 129, + "last": 129 + }, + "dimensions": { + "bucketSpace": "default" + } + }, + { "name":"vds.datastored.alldisks.buckets", "description":"buckets managed", "values": |