aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
diff options
context:
space:
mode:
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java103
1 files changed, 35 insertions, 68 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
index 150958835ac..e359579117f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
@@ -1,103 +1,70 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.autoscale;
-import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
import java.time.Duration;
import java.time.Instant;
-import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
import java.util.function.Predicate;
import java.util.stream.Collectors;
/**
- * A list of metric snapshots from a cluster, sorted by increasing time (newest last).
+ * A series of metric snapshots for all nodes in a cluster
*
* @author bratseth
*/
public class ClusterTimeseries {
- private final ClusterSpec.Id cluster;
- private final List<ClusterMetricSnapshot> snapshots;
+ private final NodeList clusterNodes;
- ClusterTimeseries(ClusterSpec.Id cluster, List<ClusterMetricSnapshot> snapshots) {
- this.cluster = cluster;
- List<ClusterMetricSnapshot> sortedSnapshots = new ArrayList<>(snapshots);
- Collections.sort(sortedSnapshots);
- this.snapshots = Collections.unmodifiableList(sortedSnapshots);
- }
-
- public boolean isEmpty() { return snapshots.isEmpty(); }
-
- public int size() { return snapshots.size(); }
-
- public ClusterMetricSnapshot get(int index) { return snapshots.get(index); }
+ /** The measurements for all nodes in this snapshot */
+ private final List<NodeTimeseries> allTimeseries;
- public List<ClusterMetricSnapshot> asList() { return snapshots; }
+ public ClusterTimeseries(Duration period, Cluster cluster, NodeList clusterNodes, MetricsDb db) {
+ this.clusterNodes = clusterNodes;
+ var timeseries = db.getNodeTimeseries(period, clusterNodes);
- public ClusterSpec.Id cluster() { return cluster; }
+ if (cluster.lastScalingEvent().isPresent())
+ timeseries = filter(timeseries, snapshot -> snapshot.generation() < 0 || // Content nodes do not yet send generation
+ snapshot.generation() >= cluster.lastScalingEvent().get().generation());
+ timeseries = filter(timeseries, snapshot -> snapshot.inService() && snapshot.stable());
- public ClusterTimeseries add(ClusterMetricSnapshot snapshot) {
- List<ClusterMetricSnapshot> list = new ArrayList<>(snapshots);
- list.add(snapshot);
- return new ClusterTimeseries(cluster, list);
+ this.allTimeseries = timeseries;
}
- /** The max query growth rate we can predict from this time-series as a fraction of the current traffic per minute */
- public double maxQueryGrowthRate() {
- if (snapshots.isEmpty()) return 0.1;
-
- // Find the period having the highest growth rate, where total growth exceeds 30% increase
- double maxGrowthRate = 0; // In query rate per minute
- for (int start = 0; start < snapshots.size(); start++) {
- if (start > 0) { // Optimization: Skip this point when starting from the previous is better relative to the best rate so far
- Duration duration = durationBetween(start - 1, start);
- if ( ! duration.isZero()) {
- double growthRate = (queryRateAt(start - 1) - queryRateAt(start)) / duration.toMinutes();
- if (growthRate >= maxGrowthRate)
- continue;
- }
- }
- for (int end = start + 1; end < snapshots.size(); end++) {
- if (queryRateAt(end) >= queryRateAt(start) * 1.3) {
- Duration duration = durationBetween(start, end);
- if (duration.isZero()) continue;
- double growthRate = (queryRateAt(end) - queryRateAt(start)) / duration.toMinutes();
- if (growthRate > maxGrowthRate)
- maxGrowthRate = growthRate;
- }
- }
- }
- if (maxGrowthRate == 0) { // No periods of significant growth
- if (durationBetween(0, snapshots.size() - 1).toHours() < 24)
- return 0.1; // ... because not much data
- else
- return 0.0; // ... because load is stable
- }
- if (queryRateNow() == 0) return 0.1; // Growth not expressible as a fraction of the current rate
- return maxGrowthRate / queryRateNow();
+ /** Returns the average number of measurements per node */
+ public int measurementsPerNode() {
+ int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum();
+ return measurementCount / clusterNodes.size();
}
- /** The current query rate as a fraction of the peak rate in this timeseries */
- public double currentQueryFractionOfMax() {
- if (snapshots.isEmpty()) return 0.5;
- var max = snapshots.stream().mapToDouble(ClusterMetricSnapshot::queryRate).max().getAsDouble();
- return snapshots.get(snapshots.size() - 1).queryRate() / max;
+ /** Returns the number of nodes measured in this */
+ public int nodesMeasured() {
+ return allTimeseries.size();
}
- private double queryRateAt(int index) {
- return snapshots.get(index).queryRate();
+ /** Returns the average load of this resource in this */
+ public double averageLoad(Resource resource) {
+ int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum();
+ if (measurementCount == 0) return 0;
+ double measurementSum = allTimeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum();
+ return measurementSum / measurementCount;
}
- private double queryRateNow() {
- return queryRateAt(snapshots.size() - 1);
+ private double value(Resource resource, MetricSnapshot snapshot) {
+ switch (resource) {
+ case cpu: return snapshot.cpu();
+ case memory: return snapshot.memory();
+ case disk: return snapshot.disk();
+ default: throw new IllegalArgumentException("Got an unknown resource " + resource);
+ }
}
- private Duration durationBetween(int startIndex, int endIndex) {
- return Duration.between(snapshots.get(startIndex).at(), snapshots.get(endIndex).at());
+ private List<NodeTimeseries> filter(List<NodeTimeseries> timeseries, Predicate<MetricSnapshot> filter) {
+ return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.filter(filter)).collect(Collectors.toList());
}
}