diff options
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java')
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java | 103 |
1 files changed, 35 insertions, 68 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java index 150958835ac..e359579117f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java @@ -1,103 +1,70 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Cluster; import java.time.Duration; import java.time.Instant; -import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.function.Predicate; import java.util.stream.Collectors; /** - * A list of metric snapshots from a cluster, sorted by increasing time (newest last). + * A series of metric snapshots for all nodes in a cluster * * @author bratseth */ public class ClusterTimeseries { - private final ClusterSpec.Id cluster; - private final List<ClusterMetricSnapshot> snapshots; + private final NodeList clusterNodes; - ClusterTimeseries(ClusterSpec.Id cluster, List<ClusterMetricSnapshot> snapshots) { - this.cluster = cluster; - List<ClusterMetricSnapshot> sortedSnapshots = new ArrayList<>(snapshots); - Collections.sort(sortedSnapshots); - this.snapshots = Collections.unmodifiableList(sortedSnapshots); - } - - public boolean isEmpty() { return snapshots.isEmpty(); } - - public int size() { return snapshots.size(); } - - public ClusterMetricSnapshot get(int index) { return snapshots.get(index); } + /** The measurements for all nodes in this snapshot */ + private final List<NodeTimeseries> allTimeseries; - public List<ClusterMetricSnapshot> asList() { return snapshots; } + public ClusterTimeseries(Duration period, Cluster cluster, NodeList clusterNodes, MetricsDb db) { + this.clusterNodes = clusterNodes; + var timeseries = db.getNodeTimeseries(period, clusterNodes); - public ClusterSpec.Id cluster() { return cluster; } + if (cluster.lastScalingEvent().isPresent()) + timeseries = filter(timeseries, snapshot -> snapshot.generation() < 0 || // Content nodes do not yet send generation + snapshot.generation() >= cluster.lastScalingEvent().get().generation()); + timeseries = filter(timeseries, snapshot -> snapshot.inService() && snapshot.stable()); - public ClusterTimeseries add(ClusterMetricSnapshot snapshot) { - List<ClusterMetricSnapshot> list = new ArrayList<>(snapshots); - list.add(snapshot); - return new ClusterTimeseries(cluster, list); + this.allTimeseries = timeseries; } - /** The max query growth rate we can predict from this time-series as a fraction of the current traffic per minute */ - public double maxQueryGrowthRate() { - if (snapshots.isEmpty()) return 0.1; - - // Find the period having the highest growth rate, where total growth exceeds 30% increase - double maxGrowthRate = 0; // In query rate per minute - for (int start = 0; start < snapshots.size(); start++) { - if (start > 0) { // Optimization: Skip this point when starting from the previous is better relative to the best rate so far - Duration duration = durationBetween(start - 1, start); - if ( ! duration.isZero()) { - double growthRate = (queryRateAt(start - 1) - queryRateAt(start)) / duration.toMinutes(); - if (growthRate >= maxGrowthRate) - continue; - } - } - for (int end = start + 1; end < snapshots.size(); end++) { - if (queryRateAt(end) >= queryRateAt(start) * 1.3) { - Duration duration = durationBetween(start, end); - if (duration.isZero()) continue; - double growthRate = (queryRateAt(end) - queryRateAt(start)) / duration.toMinutes(); - if (growthRate > maxGrowthRate) - maxGrowthRate = growthRate; - } - } - } - if (maxGrowthRate == 0) { // No periods of significant growth - if (durationBetween(0, snapshots.size() - 1).toHours() < 24) - return 0.1; // ... because not much data - else - return 0.0; // ... because load is stable - } - if (queryRateNow() == 0) return 0.1; // Growth not expressible as a fraction of the current rate - return maxGrowthRate / queryRateNow(); + /** Returns the average number of measurements per node */ + public int measurementsPerNode() { + int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum(); + return measurementCount / clusterNodes.size(); } - /** The current query rate as a fraction of the peak rate in this timeseries */ - public double currentQueryFractionOfMax() { - if (snapshots.isEmpty()) return 0.5; - var max = snapshots.stream().mapToDouble(ClusterMetricSnapshot::queryRate).max().getAsDouble(); - return snapshots.get(snapshots.size() - 1).queryRate() / max; + /** Returns the number of nodes measured in this */ + public int nodesMeasured() { + return allTimeseries.size(); } - private double queryRateAt(int index) { - return snapshots.get(index).queryRate(); + /** Returns the average load of this resource in this */ + public double averageLoad(Resource resource) { + int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum(); + if (measurementCount == 0) return 0; + double measurementSum = allTimeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); + return measurementSum / measurementCount; } - private double queryRateNow() { - return queryRateAt(snapshots.size() - 1); + private double value(Resource resource, MetricSnapshot snapshot) { + switch (resource) { + case cpu: return snapshot.cpu(); + case memory: return snapshot.memory(); + case disk: return snapshot.disk(); + default: throw new IllegalArgumentException("Got an unknown resource " + resource); + } } - private Duration durationBetween(int startIndex, int endIndex) { - return Duration.between(snapshots.get(startIndex).at(), snapshots.get(endIndex).at()); + private List<NodeTimeseries> filter(List<NodeTimeseries> timeseries, Predicate<MetricSnapshot> filter) { + return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.filter(filter)).collect(Collectors.toList()); } } |