summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-12-06 16:59:33 +0100
committerGitHub <noreply@github.com>2023-12-06 16:59:33 +0100
commit10a580357998b4c750729f27d3ef2e224dd69af7 (patch)
tree3138aedec274b466ed38587977da19afc66ea02e /node-repository
parentb6810d469697d2fd18941adbf355282dbc370080 (diff)
parent1aa5e20620c9abddd7f8f3253455620e40487aa6 (diff)
Merge pull request #29571 from vespa-engine/mpolden/detect-redist
Let distributor metric decide cluster stability
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java22
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java21
6 files changed, 43 insertions, 21 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 4c5ace3d51a..1e4a11fdea2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -72,7 +72,7 @@ public class ClusterModel {
// Lazily initialized members
private Double queryFractionOfMax = null;
private Double maxQueryGrowthRate = null;
- private OptionalDouble averageQueryRate = null;
+ private OptionalDouble averageQueryRate = OptionalDouble.empty();
public ClusterModel(NodeRepository nodeRepository,
Application application,
@@ -307,7 +307,7 @@ public class ClusterModel {
/** Returns the average query rate in the scaling window. */
private OptionalDouble averageQueryRate() {
- if (averageQueryRate != null) return averageQueryRate;
+ if (averageQueryRate.isPresent()) return averageQueryRate;
return averageQueryRate = clusterTimeseries().queryRate(scalingDuration(), clock);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
index 5ae184c50ca..e1ef21ebd13 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
@@ -51,7 +51,7 @@ public class ClusterNodesTimeseries {
/** Returns the average number of measurements per node */
public double measurementsPerNode() {
- if (clusterNodes.size() == 0) return 0;
+ if (clusterNodes.isEmpty()) return 0;
return (double) totalMeasurementsIn(timeseries) / clusterNodes.size();
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java
index 6c75170ba63..940109bab8a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java
@@ -14,7 +14,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.stream.Collectors;
/**
* An in-memory implementation of the metrics Db.
@@ -59,12 +58,6 @@ public class MemoryMetricsDb implements MetricsDb {
}
}
- public void clearClusterMetrics(ApplicationId application, ClusterSpec.Id cluster) {
- synchronized (lock) {
- clusterTimeseries.remove(new Pair<>(application, cluster));
- }
- }
-
@Override
public List<NodeTimeseries> getNodeTimeseries(Duration period, Set<String> hostnames) {
Instant startTime = clock().instant().minus(period);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java
index 01bcabeb11b..82f96375d79 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.hosted.provision.autoscale;
import ai.vespa.metrics.ContainerMetrics;
+import ai.vespa.metrics.DistributorMetrics;
import ai.vespa.metrics.HostedNodeAdminMetrics;
import ai.vespa.metrics.SearchNodeMetrics;
import ai.vespa.metrics.StorageMetrics;
@@ -78,7 +79,7 @@ public class MetricsResponse {
Metric.disk.from(nodeValues)),
(long)Metric.generation.from(nodeValues),
Metric.inService.from(nodeValues) > 0,
- clusterIsStable(node.get(), applicationNodes),
+ clusterIsStable(node.get(), applicationNodes, nodeValues),
Metric.queryRate.from(nodeValues))));
var cluster = node.get().allocation().get().membership().cluster().id();
@@ -107,7 +108,10 @@ public class MetricsResponse {
item.field("values").traverse((ObjectTraverser)(name, value) -> values.put(name, value.asDouble()));
}
- private boolean clusterIsStable(Node node, NodeList applicationNodes) {
+ private boolean clusterIsStable(Node node, NodeList applicationNodes, ListMap<String, Double> nodeValues) {
+ if (Metric.redistributing.from(nodeValues) > 0) {
+ return false;
+ }
ClusterSpec cluster = node.allocation().get().membership().cluster();
return applicationNodes.cluster(cluster.id()).retired().isEmpty();
}
@@ -207,6 +211,20 @@ public class MetricsResponse {
}
},
+ redistributing { // whether data redistribution is ongoing
+
+ @Override
+ public List<String> metricResponseNames() {
+ return List.of(DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_PENDING.last());
+ }
+
+ @Override
+ double computeFinal(ListMap<String, Double> values) {
+ // Really a bool. True if any node is merging buckets.
+ return values.values().stream().flatMap(List::stream).anyMatch(v -> v > 0) ? 1 : 0;
+ }
+
+ },
queryRate { // queries per second
@Override
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java
index 9ac51407873..50e8b77fef2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java
@@ -34,14 +34,6 @@ public class ResourceChange {
}
}
- private boolean requiresRedistribution() {
- if ( ! model.clusterSpec().type().isContent()) return false;
- if (from.nodes() != to.nodes()) return true;
- if (from.groups() != to.groups()) return true;
- if (requiresNodeReplacement()) return true;
- return false;
- }
-
/**
* Returns the estimated number of nodes that will be retired by this change,
* given that it is a content cluster and no node replacement is necessary.
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java
index 127f3525cf5..4ec4ecd6d84 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java
@@ -99,6 +99,7 @@ public class MetricsV2MetricsFetcherTest {
assertEquals("host-3.yahoo.com", values.get(0).getFirst());
assertEquals(0.13, values.get(0).getSecond().load().cpu(), delta);
assertEquals(0.9375, values.get(0).getSecond().load().memory(), delta);
+ assertFalse("Unstable because buckets are being merged", values.get(0).getSecond().stable());
}
}
@@ -265,7 +266,25 @@ public class MetricsV2MetricsFetcherTest {
}
}
]
- }
+ },
+ "services": [
+ {
+ "name": "distributor",
+ "timestamp": 1234,
+ "status": {
+ "code": "up"
+ },
+ "metrics": [
+ {
+ "values": {
+ "vds.idealstate.merge_bucket.pending.last": 42.2
+ },
+ "dimensions": {
+ }
+ }
+ ]
+ }
+ ]
}
]
}