aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core/src/main/java
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2020-01-17 12:39:12 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2020-01-17 12:39:12 +0100
commitc28f13dac59167dee1257f5b23835e6441bc5f31 (patch)
tree7910cb0ce12e37b7af828c3b13c14b84cbcb0105 /clustercontroller-core/src/main/java
parentb324c19e007a7a57ba731ed72a01d35cd6937ed7 (diff)
Use bucket_space metric in retirement
This makes the Cluster Controller use the vds.datastored.bucket_space.buckets_total, dimension bucketSpace=default, to determine whether a content node manages zero buckets, and if so, will allow the node to go permanently down. This is used when a node is retiring, and it is to be removed from the application. The change is guarded by the use-bucket-space-metric, default true. If the new metric doesn't work as expected, we can revert to using the current/old metric by flipping the flag. The flag can be controlled per application.
Diffstat (limited to 'clustercontroller-core/src/main/java')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java8
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java2
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java3
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java27
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java26
5 files changed, 56 insertions, 10 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
index ddb9357f11f..ae12a6dabb1 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
@@ -22,6 +22,7 @@ public class ContentCluster {
private final ClusterInfo clusterInfo = new ClusterInfo();
private final Map<Node, Long> nodeStartTimestamps = new TreeMap<>();
+ private final boolean determineBucketsFromBucketSpaceMetric;
private int slobrokGenerationCount = 0;
@@ -32,7 +33,9 @@ public class ContentCluster {
private double minRatioOfStorageNodesUp;
public ContentCluster(String clusterName, Collection<ConfiguredNode> configuredNodes, Distribution distribution,
- int minStorageNodesUp, double minRatioOfStorageNodesUp) {
+ int minStorageNodesUp, double minRatioOfStorageNodesUp,
+ boolean determineBucketsFromBucketSpaceMetric) {
+ this.determineBucketsFromBucketSpaceMetric = determineBucketsFromBucketSpaceMetric;
if (configuredNodes == null) throw new IllegalArgumentException("Nodes must be set");
this.clusterName = clusterName;
this.distribution = distribution;
@@ -183,7 +186,8 @@ public class ContentCluster {
minStorageNodesUp,
minRatioOfStorageNodesUp,
distribution.getRedundancy(),
- clusterInfo);
+ clusterInfo,
+ determineBucketsFromBucketSpaceMetric);
return nodeStateChangeChecker.evaluateTransition(node, clusterState, condition, oldState, newState);
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index d943cf27f9c..02f52d5f0c7 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -193,7 +193,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
options.nodes,
options.storageDistribution,
options.minStorageNodesUp,
- options.minRatioOfStorageNodesUp);
+ options.minRatioOfStorageNodesUp, true);
NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, log);
Communicator communicator = new RPCCommunicator(
RPCCommunicator.createRealSupervisor(),
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
index 553b3332ee8..7ad6765cc47 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -128,6 +128,9 @@ public class FleetControllerOptions implements Cloneable {
public int maxDivergentNodesPrintedInTaskErrorMessages = 10;
+ // TODO: May be removed once rolled out everywhere.
+ public boolean determineBucketsFromBucketSpaceMetric = true;
+
// TODO: Replace usage of this by usage where the nodes are explicitly passed (below)
public FleetControllerOptions(String clusterName) {
this.clusterName = clusterName;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index c31a4976827..6bcb5b07f28 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -12,6 +12,7 @@ import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
/**
@@ -20,22 +21,27 @@ import java.util.Optional;
* @author Haakon Dybdahl
*/
public class NodeStateChangeChecker {
- public static final String BUCKETS_METRIC_NAME = "vds.datastored.alldisks.buckets";
+ public static final String LEGACY_BUCKETS_METRIC_NAME = "vds.datastored.alldisks.buckets";
+ public static final String BUCKETS_METRIC_NAME = "vds.datastored.bucket_space.buckets_total";
+ public static final Map<String, String> BUCKETS_METRIC_DIMENSIONS = Map.of("bucketSpace", "default");
private final int minStorageNodesUp;
private double minRatioOfStorageNodesUp;
private final int requiredRedundancy;
private final ClusterInfo clusterInfo;
+ private final boolean determineBucketsFromBucketSpaceMetric;
public NodeStateChangeChecker(
int minStorageNodesUp,
double minRatioOfStorageNodesUp,
int requiredRedundancy,
- ClusterInfo clusterInfo) {
+ ClusterInfo clusterInfo,
+ boolean determineBucketsFromBucketSpaceMetric) {
this.minStorageNodesUp = minStorageNodesUp;
this.minRatioOfStorageNodesUp = minRatioOfStorageNodesUp;
this.requiredRedundancy = requiredRedundancy;
this.clusterInfo = clusterInfo;
+ this.determineBucketsFromBucketSpaceMetric = determineBucketsFromBucketSpaceMetric;
}
public static class Result {
@@ -152,10 +158,19 @@ public class NodeStateChangeChecker {
+ hostInfoNodeVersion);
}
- Optional<Metrics.Value> bucketsMetric = hostInfo.getMetrics().getValue(BUCKETS_METRIC_NAME);
- if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) {
- return Result.createDisallowed("Missing last value of the " + BUCKETS_METRIC_NAME +
- " metric for storage node " + nodeInfo.getNodeIndex());
+ Optional<Metrics.Value> bucketsMetric;
+ if (determineBucketsFromBucketSpaceMetric) {
+ bucketsMetric = hostInfo.getMetrics().getValueAt(BUCKETS_METRIC_NAME, BUCKETS_METRIC_DIMENSIONS);
+ if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) {
+ return Result.createDisallowed("Missing last value of the " + BUCKETS_METRIC_NAME +
+ " metric for storage node " + nodeInfo.getNodeIndex());
+ }
+ } else {
+ bucketsMetric = hostInfo.getMetrics().getValue(LEGACY_BUCKETS_METRIC_NAME);
+ if (!bucketsMetric.isPresent() || bucketsMetric.get().getLast() == null) {
+ return Result.createDisallowed("Missing last value of the " + LEGACY_BUCKETS_METRIC_NAME +
+ " metric for storage node " + nodeInfo.getNodeIndex());
+ }
}
long lastBuckets = bucketsMetric.get().getLast();
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java
index cb2a1e92612..eef3fd2e217 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/Metrics.java
@@ -6,6 +6,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
/**
@@ -25,21 +26,44 @@ public class Metrics {
return Optional.empty();
}
+ /**
+ * Get the metric value whose dimensions MUST MATCH the given dimensions map.
+ * To require the metric to NOT have a dimension key, set it's value to null.
+ */
+ public Optional<Value> getValueAt(String name, Map<String, String> dimensions) {
+ return metricsList.stream()
+ .filter(metric -> metric.name.equals(name))
+ .filter(metric -> dimensions.entrySet().stream()
+ .allMatch(entry -> {
+ String dimensionName = entry.getKey();
+ Optional<String> requiredDimensionValue = Optional.ofNullable(entry.getValue());
+ return metric.getDimensionValue(dimensionName).equals(requiredDimensionValue);
+ }))
+ .map(Metric::getValue)
+ .findFirst();
+ }
+
public List<Metric> getMetrics() { return Collections.unmodifiableList(metricsList); }
public static class Metric {
private final String name;
private final Value value;
+ private final Map<String, String> dimensions;
public Metric(
@JsonProperty("name") String name,
- @JsonProperty("values") Value value) {
+ @JsonProperty("values") Value value,
+ @JsonProperty("dimensions") Map<String, String> dimensions) {
this.name = name;
this.value = value;
+ this.dimensions = dimensions;
}
public String getName() { return name; }
public Value getValue() { return value; }
+ public Optional<String> getDimensionValue(String dimension) {
+ return Optional.ofNullable(dimensions.get(dimension));
+ }
}
public static class Value {