diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-01-17 12:39:12 +0100 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-01-17 12:39:12 +0100 |
commit | c28f13dac59167dee1257f5b23835e6441bc5f31 (patch) | |
tree | 7910cb0ce12e37b7af828c3b13c14b84cbcb0105 /clustercontroller-core/src/main/java | |
parent | b324c19e007a7a57ba731ed72a01d35cd6937ed7 (diff) |
Use bucket_space metric in retirement
This makes the Cluster Controller use the
vds.datastored.bucket_space.buckets_total, dimension bucketSpace=default, to
determine whether a content node manages zero buckets, and if so, will allow
the node to go permanently down. This is used when a node is retiring, and it
is to be removed from the application.
The change is guarded by the use-bucket-space-metric, default true. If the new
metric doesn't work as expected, we can revert to using the current/old metric
by flipping the flag. The flag can be controlled per application.
Diffstat (limited to 'clustercontroller-core/src/main/java')
5 files changed, 56 insertions, 10 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java index ddb9357f11f..ae12a6dabb1 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java @@ -22,6 +22,7 @@ public class ContentCluster { private final ClusterInfo clusterInfo = new ClusterInfo(); private final Map<Node, Long> nodeStartTimestamps = new TreeMap<>(); + private final boolean determineBucketsFromBucketSpaceMetric; private int slobrokGenerationCount = 0; @@ -32,7 +33,9 @@ public class ContentCluster { private double minRatioOfStorageNodesUp; public ContentCluster(String clusterName, Collection<ConfiguredNode> configuredNodes, Distribution distribution, - int minStorageNodesUp, double minRatioOfStorageNodesUp) { + int minStorageNodesUp, double minRatioOfStorageNodesUp, + boolean determineBucketsFromBucketSpaceMetric) { + this.determineBucketsFromBucketSpaceMetric = determineBucketsFromBucketSpaceMetric; if (configuredNodes == null) throw new IllegalArgumentException("Nodes must be set"); this.clusterName = clusterName; this.distribution = distribution; @@ -183,7 +186,8 @@ public class ContentCluster { minStorageNodesUp, minRatioOfStorageNodesUp, distribution.getRedundancy(), - clusterInfo); + clusterInfo, + determineBucketsFromBucketSpaceMetric); return nodeStateChangeChecker.evaluateTransition(node, clusterState, condition, oldState, newState); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index d943cf27f9c..02f52d5f0c7 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -193,7 +193,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd options.nodes, options.storageDistribution, options.minStorageNodesUp, - options.minRatioOfStorageNodesUp); + options.minRatioOfStorageNodesUp, true); NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, log); Communicator communicator = new RPCCommunicator( RPCCommunicator.createRealSupervisor(), diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java index 553b3332ee8..7ad6765cc47 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java @@ -128,6 +128,9 @@ public class FleetControllerOptions implements Cloneable { public int maxDivergentNodesPrintedInTaskErrorMessages = 10; + // TODO: May be removed once rolled out everywhere. + public boolean determineBucketsFromBucketSpaceMetric = true; + // TODO: Replace usage of this by usage where the nodes are explicitly passed (below) public FleetControllerOptions(String clusterName) { this.clusterName = clusterName; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index c31a4976827..6bcb5b07f28 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -12,6 +12,7 @@ import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode; import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest; import java.util.List; +import java.util.Map; import java.util.Optional; /** @@ -20,22 +21,27 @@ import java.util.Optional; * @author Haakon Dybdahl */ public class NodeStateChangeChecker { - public static final String BUCKETS_METRIC_NAME = "vds.datastored.alldisks.buckets"; + public static final String LEGACY_BUCKETS_METRIC_NAME = "vds.datastored.alldisks.buckets"; + public static final String BUCKETS_METRIC_NAME = "vds.datastored.bucket_space.buckets_total"; + public static final Map<String, String> BUCKETS_METRIC_DIMENSIONS = Map.of("bucketSpace", "default"); private final int minStorageNodesUp; private double minRatioOfStorageNodesUp; private final int requiredRedundancy; private final ClusterInfo clusterInfo; + private final boolean determineBucketsFromBucketSpaceMetric; public NodeStateChangeChecker( int minStorageNodesUp, double minRatioOfStorageNodesUp, int requiredRedundancy, - ClusterInfo clusterInfo) { + ClusterInfo clusterInfo, + boolean determineBucketsFromBucketSpaceMetric) { this.minStorageNodesUp = minStorageNodesUp; this.minRatioOfStorageNodesUp = minRatioOfStorageNodesUp; this.requiredRedundancy = requiredRedundancy; this.clusterInfo = clusterInfo; + this.determineBucketsFromBucketSpaceMetric = determineBucketsFromBucketSpaceMetric; } public static class Result { @@ -152,10 +158,19 @@ public class NodeStateChangeChecker { + hostInfoNodeVersion); } - Optional<Metrics.Value> bucketsMetric = hostInfo.getMetrics().getValue(BUCKETS_METRIC_NAME); - if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) { - return Result.createDisallowed("Missing last value of the " + BUCKETS_METRIC_NAME + - " metric for storage node " + nodeInfo.getNodeIndex()); + Optional<Metrics.Value> bucketsMetric; + if (determineBucketsFromBucketSpaceMetric) { + bucketsMetric = hostInfo.getMetrics().getValueAt(BUCKETS_METRIC_NAME, BUCKETS_METRIC_DIMENSIONS); + if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) { + return Result.createDisallowed("Missing last value of the " + BUCKETS_METRIC_NAME + + " metric for storage node " + nodeInfo.getNodeIndex()); + } + } else { + bucketsMetric = hostInfo.getMetrics().getValue(LEGACY_BUCKETS_METRIC_NAME); + if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) { + return Result.createDisallowed("Missing last value of the " + LEGACY_BUCKETS_METRIC_NAME + + " metric for storage node " + nodeInfo.getNodeIndex()); + } } long lastBuckets = bucketsMetric.get().getLast(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java index cb2a1e92612..eef3fd2e217 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java @@ -6,6 +6,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Optional; /** @@ -25,21 +26,44 @@ public class Metrics { return Optional.empty(); } + /** + * Get the metric value whose dimensions MUST MATCH the given dimensions map. + * To require the metric to NOT have a dimension key, set it's value to null. + */ + public Optional<Value> getValueAt(String name, Map<String, String> dimensions) { + return metricsList.stream() + .filter(metric -> metric.name.equals(name)) + .filter(metric -> dimensions.entrySet().stream() + .allMatch(entry -> { + String dimensionName = entry.getKey(); + Optional<String> requiredDimensionValue = Optional.ofNullable(entry.getValue()); + return metric.getDimensionValue(dimensionName).equals(requiredDimensionValue); + })) + .map(Metric::getValue) + .findFirst(); + } + public List<Metric> getMetrics() { return Collections.unmodifiableList(metricsList); } public static class Metric { private final String name; private final Value value; + private final Map<String, String> dimensions; public Metric( @JsonProperty("name") String name, - @JsonProperty("values") Value value) { + @JsonProperty("values") Value value, + @JsonProperty("dimensions") Map<String, String> dimensions) { this.name = name; this.value = value; + this.dimensions = dimensions; } public String getName() { return name; } public Value getValue() { return value; } + public Optional<String> getDimensionValue(String dimension) { + return Optional.ofNullable(dimensions.get(dimension)); + } } public static class Value { |