From a51c18c8687b6cbbda38488bab51b2d315113ad6 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Mon, 8 Mar 2021 19:51:38 +0100 Subject: Revert "Consider growth rate vs. scaling time" --- .../model/application/validation/Validator.java | 1 + .../hosted/provision/applications/Cluster.java | 27 --- .../hosted/provision/autoscale/Autoscaler.java | 50 +++++- .../provision/autoscale/ClusterMetricSnapshot.java | 42 ----- .../autoscale/ClusterNodesTimeseries.java | 76 --------- .../provision/autoscale/ClusterTimeseries.java | 103 ++++-------- .../provision/autoscale/MemoryMetricsDb.java | 59 ++----- .../hosted/provision/autoscale/MetricSnapshot.java | 66 ++++++++ .../hosted/provision/autoscale/MetricsDb.java | 17 +- .../provision/autoscale/MetricsResponse.java | 39 ++--- .../provision/autoscale/NodeMetricSnapshot.java | 66 -------- .../hosted/provision/autoscale/NodeTimeseries.java | 16 +- .../hosted/provision/autoscale/QuestMetricsDb.java | 186 +++++---------------- .../vespa/hosted/provision/autoscale/Resource.java | 2 +- .../hosted/provision/autoscale/ResourceTarget.java | 42 ++--- .../maintenance/AutoscalingMaintainer.java | 4 +- .../maintenance/NodeMetricsDbMaintainer.java | 4 +- .../provision/maintenance/RetiredExpirer.java | 2 +- .../provision/restapi/ApplicationSerializer.java | 5 +- .../autoscale/AutoscalingIntegrationTest.java | 2 +- .../provision/autoscale/AutoscalingTest.java | 56 +------ .../provision/autoscale/AutoscalingTester.java | 91 +++------- .../provision/autoscale/ClusterTimeseriesTest.java | 109 ------------ .../autoscale/MetricsV2MetricsFetcherTest.java | 6 +- .../provision/autoscale/NodeMetricsDbTest.java | 20 +-- .../provision/autoscale/QuestMetricsDbTest.java | 112 +++---------- .../maintenance/AutoscalingMaintainerTester.java | 19 ++- .../maintenance/NodeMetricsDbMaintainerTest.java | 8 +- .../ScalingSuggestionsMaintainerTest.java | 20 +-- 29 files changed, 336 insertions(+), 914 deletions(-) delete mode 100644 node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterMetricSnapshot.java delete mode 100644 node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java create mode 100644 node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java delete mode 100644 node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricSnapshot.java delete mode 100644 node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseriesTest.java diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validator.java index f3bebbe7fb9..c926c1f13a0 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validator.java @@ -8,6 +8,7 @@ import com.yahoo.vespa.model.VespaModel; * Abstract superclass of all application package validators. * * @author hmusum + * @since 2010-01-29 */ public abstract class Validator { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index bddbcf43bd0..b16859fa6fb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -5,7 +5,6 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; -import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.List; @@ -128,32 +127,6 @@ public class Cluster { return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } - /** The predicted duration of a rescaling of this cluster */ - public Duration scalingDuration(ClusterSpec clusterSpec) { - int completedEventCount = 0; - Duration totalDuration = Duration.ZERO; - for (ScalingEvent event : scalingEvents()) { - if (event.duration().isEmpty()) continue; - completedEventCount++; - totalDuration = totalDuration.plus(event.duration().get()); - } - - if (completedEventCount == 0) { // Use defaults - if (clusterSpec.isStateful()) return Duration.ofHours(12); - return Duration.ofMinutes(10); - } - else { - Duration predictedDuration = totalDuration.dividedBy(completedEventCount); - - // TODO: Remove when we have reliable completion for content clusters - if (clusterSpec.isStateful() && predictedDuration.minus(Duration.ofHours(12)).isNegative()) - return Duration.ofHours(12); - - if (predictedDuration.minus(Duration.ofMinutes(5)).isNegative()) return Duration.ofMinutes(5); // minimum - return predictedDuration; - } - } - @Override public int hashCode() { return id.hashCode(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 2c0e0a2bdb0..2d192fae11f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -2,12 +2,14 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ClusterResources; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import java.time.Duration; import java.time.Instant; @@ -21,9 +23,9 @@ import java.util.Optional; */ public class Autoscaler { - /** What cost difference is worth a reallocation? */ + /** What cost difference factor is worth a reallocation? */ private static final double costDifferenceWorthReallocation = 0.1; - /** What resource difference is worth a reallocation? */ + /** What difference factor for a resource is worth a reallocation? */ private static final double resourceDifferenceWorthReallocation = 0.1; private final MetricsDb metricsDb; @@ -62,27 +64,31 @@ public class Autoscaler { if ( ! stable(clusterNodes, nodeRepository)) return Advice.none("Cluster change in progress"); - Duration scalingWindow = cluster.scalingDuration(clusterNodes.clusterSpec()); + Duration scalingWindow = scalingWindow(clusterNodes.clusterSpec(), cluster); if (scaledIn(scalingWindow, cluster)) return Advice.dontScale("Won't autoscale now: Less than " + scalingWindow + " since last rescaling"); - var clusterNodesTimeseries = new ClusterNodesTimeseries(scalingWindow, cluster, clusterNodes, metricsDb); - var currentAllocation = new AllocatableClusterResources(clusterNodes.asList(), nodeRepository, cluster.exclusive()); + ClusterTimeseries clusterTimeseries = + new ClusterTimeseries(scalingWindow, cluster, clusterNodes, metricsDb); + AllocatableClusterResources currentAllocation = + new AllocatableClusterResources(clusterNodes.asList(), nodeRepository, cluster.exclusive()); - int measurementsPerNode = clusterNodesTimeseries.measurementsPerNode(); + int measurementsPerNode = clusterTimeseries.measurementsPerNode(); if (measurementsPerNode < minimumMeasurementsPerNode(scalingWindow)) return Advice.none("Collecting more data before making new scaling decisions: " + "Have " + measurementsPerNode + " measurements per node but require " + minimumMeasurementsPerNode(scalingWindow)); - int nodesMeasured = clusterNodesTimeseries.nodesMeasured(); + int nodesMeasured = clusterTimeseries.nodesMeasured(); if (nodesMeasured != clusterNodes.size()) return Advice.none("Collecting more data before making new scaling decisions: " + "Have measurements from " + nodesMeasured + " but require from " + clusterNodes.size()); + double cpuLoad = clusterTimeseries.averageLoad(Resource.cpu); + double memoryLoad = clusterTimeseries.averageLoad(Resource.memory); + double diskLoad = clusterTimeseries.averageLoad(Resource.disk); - var clusterTimeseries = metricsDb.getClusterTimeseries(application.id(), cluster.id()); - var target = ResourceTarget.idealLoad(clusterTimeseries, clusterNodesTimeseries, currentAllocation, application); + var target = ResourceTarget.idealLoad(cpuLoad, memoryLoad, diskLoad, currentAllocation, application); Optional bestAllocation = allocationOptimizer.findBestAllocation(target, currentAllocation, limits); @@ -122,6 +128,32 @@ public class Autoscaler { .isAfter(nodeRepository.clock().instant().minus(delay)); } + /** The duration of the window we need to consider to make a scaling decision. See also minimumMeasurementsPerNode */ + private Duration scalingWindow(ClusterSpec clusterSpec, Cluster cluster) { + int completedEventCount = 0; + Duration totalDuration = Duration.ZERO; + for (ScalingEvent event : cluster.scalingEvents()) { + if (event.duration().isEmpty()) continue; + completedEventCount++; + totalDuration = totalDuration.plus(event.duration().get()); + } + + if (completedEventCount == 0) { // Use defaults + if (clusterSpec.isStateful()) return Duration.ofHours(12); + return Duration.ofMinutes(10); + } + else { + Duration predictedDuration = totalDuration.dividedBy(completedEventCount); + + // TODO: Remove when we have reliable completion for content clusters + if (clusterSpec.isStateful() && predictedDuration.minus(Duration.ofHours(12)).isNegative()) + return Duration.ofHours(12); + + if (predictedDuration.minus(Duration.ofMinutes(5)).isNegative()) return Duration.ofMinutes(5); // minimum + return predictedDuration; + } + } + static Duration maxScalingWindow() { return Duration.ofHours(48); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterMetricSnapshot.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterMetricSnapshot.java deleted file mode 100644 index fd8e91584c4..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterMetricSnapshot.java +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.autoscale; - -import java.time.Instant; - -/** - * Cluster level metrics. - * These are aggregated at fetch time over the nodes in the cluster at that point in time. - * - * @author bratseth - */ -public class ClusterMetricSnapshot implements Comparable { - - private final Instant at; - - private final double queryRate; - - public ClusterMetricSnapshot(Instant at, double queryRate) { - this.at = at; - this.queryRate = queryRate; - } - - public Instant at() { return at; } - - /** Queries per second */ - public double queryRate() { return queryRate; } - - public ClusterMetricSnapshot withQueryRate(double queryRate) { - return new ClusterMetricSnapshot(at, queryRate); - } - - @Override - public int compareTo(ClusterMetricSnapshot other) { - return at.compareTo(other.at); - } - - @Override - public String toString() { return "metrics at " + at + ":" + - " queryRate: " + queryRate; - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java deleted file mode 100644 index 173d76e4c26..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.autoscale; - -import com.yahoo.vespa.hosted.provision.NodeList; -import com.yahoo.vespa.hosted.provision.applications.Cluster; - -import java.time.Duration; -import java.util.List; -import java.util.function.Predicate; -import java.util.stream.Collectors; - -/** - * A series of metric snapshots for the nodes of a cluster used to compute load - * - * @author bratseth - */ -public class ClusterNodesTimeseries { - - private final Cluster cluster; - private final NodeList clusterNodes; - - /** The measurements for all nodes in this snapshot */ - private final List timeseries; - - public ClusterNodesTimeseries(Duration period, Cluster cluster, NodeList clusterNodes, MetricsDb db) { - this.cluster = cluster; - this.clusterNodes = clusterNodes; - var timeseries = db.getNodeTimeseries(period, clusterNodes); - - if (cluster.lastScalingEvent().isPresent()) - timeseries = filter(timeseries, snapshot -> snapshot.generation() < 0 || // Content nodes do not yet send generation - snapshot.generation() >= cluster.lastScalingEvent().get().generation()); - timeseries = filter(timeseries, snapshot -> snapshot.inService() && snapshot.stable()); - - this.timeseries = timeseries; - } - - /** The cluster this is a timeseries for */ - public Cluster cluster() { return cluster; } - - /** The nodes of the cluster this is a timeseries for */ - public NodeList clusterNodes() { return clusterNodes; } - - /** Returns the average number of measurements per node */ - public int measurementsPerNode() { - int measurementCount = timeseries.stream().mapToInt(m -> m.size()).sum(); - return measurementCount / clusterNodes.size(); - } - - /** Returns the number of nodes measured in this */ - public int nodesMeasured() { - return timeseries.size(); - } - - /** Returns the average load of this resource in this */ - public double averageLoad(Resource resource) { - int measurementCount = timeseries.stream().mapToInt(m -> m.size()).sum(); - if (measurementCount == 0) return 0; - double measurementSum = timeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); - return measurementSum / measurementCount; - } - - private double value(Resource resource, NodeMetricSnapshot snapshot) { - switch (resource) { - case cpu: return snapshot.cpu(); - case memory: return snapshot.memory(); - case disk: return snapshot.disk(); - default: throw new IllegalArgumentException("Got an unknown resource " + resource); - } - } - - private List filter(List timeseries, Predicate filter) { - return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.filter(filter)).collect(Collectors.toList()); - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java index 150958835ac..e359579117f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java @@ -1,103 +1,70 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Cluster; import java.time.Duration; import java.time.Instant; -import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.function.Predicate; import java.util.stream.Collectors; /** - * A list of metric snapshots from a cluster, sorted by increasing time (newest last). + * A series of metric snapshots for all nodes in a cluster * * @author bratseth */ public class ClusterTimeseries { - private final ClusterSpec.Id cluster; - private final List snapshots; + private final NodeList clusterNodes; - ClusterTimeseries(ClusterSpec.Id cluster, List snapshots) { - this.cluster = cluster; - List sortedSnapshots = new ArrayList<>(snapshots); - Collections.sort(sortedSnapshots); - this.snapshots = Collections.unmodifiableList(sortedSnapshots); - } - - public boolean isEmpty() { return snapshots.isEmpty(); } - - public int size() { return snapshots.size(); } - - public ClusterMetricSnapshot get(int index) { return snapshots.get(index); } + /** The measurements for all nodes in this snapshot */ + private final List allTimeseries; - public List asList() { return snapshots; } + public ClusterTimeseries(Duration period, Cluster cluster, NodeList clusterNodes, MetricsDb db) { + this.clusterNodes = clusterNodes; + var timeseries = db.getNodeTimeseries(period, clusterNodes); - public ClusterSpec.Id cluster() { return cluster; } + if (cluster.lastScalingEvent().isPresent()) + timeseries = filter(timeseries, snapshot -> snapshot.generation() < 0 || // Content nodes do not yet send generation + snapshot.generation() >= cluster.lastScalingEvent().get().generation()); + timeseries = filter(timeseries, snapshot -> snapshot.inService() && snapshot.stable()); - public ClusterTimeseries add(ClusterMetricSnapshot snapshot) { - List list = new ArrayList<>(snapshots); - list.add(snapshot); - return new ClusterTimeseries(cluster, list); + this.allTimeseries = timeseries; } - /** The max query growth rate we can predict from this time-series as a fraction of the current traffic per minute */ - public double maxQueryGrowthRate() { - if (snapshots.isEmpty()) return 0.1; - - // Find the period having the highest growth rate, where total growth exceeds 30% increase - double maxGrowthRate = 0; // In query rate per minute - for (int start = 0; start < snapshots.size(); start++) { - if (start > 0) { // Optimization: Skip this point when starting from the previous is better relative to the best rate so far - Duration duration = durationBetween(start - 1, start); - if ( ! duration.isZero()) { - double growthRate = (queryRateAt(start - 1) - queryRateAt(start)) / duration.toMinutes(); - if (growthRate >= maxGrowthRate) - continue; - } - } - for (int end = start + 1; end < snapshots.size(); end++) { - if (queryRateAt(end) >= queryRateAt(start) * 1.3) { - Duration duration = durationBetween(start, end); - if (duration.isZero()) continue; - double growthRate = (queryRateAt(end) - queryRateAt(start)) / duration.toMinutes(); - if (growthRate > maxGrowthRate) - maxGrowthRate = growthRate; - } - } - } - if (maxGrowthRate == 0) { // No periods of significant growth - if (durationBetween(0, snapshots.size() - 1).toHours() < 24) - return 0.1; // ... because not much data - else - return 0.0; // ... because load is stable - } - if (queryRateNow() == 0) return 0.1; // Growth not expressible as a fraction of the current rate - return maxGrowthRate / queryRateNow(); + /** Returns the average number of measurements per node */ + public int measurementsPerNode() { + int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum(); + return measurementCount / clusterNodes.size(); } - /** The current query rate as a fraction of the peak rate in this timeseries */ - public double currentQueryFractionOfMax() { - if (snapshots.isEmpty()) return 0.5; - var max = snapshots.stream().mapToDouble(ClusterMetricSnapshot::queryRate).max().getAsDouble(); - return snapshots.get(snapshots.size() - 1).queryRate() / max; + /** Returns the number of nodes measured in this */ + public int nodesMeasured() { + return allTimeseries.size(); } - private double queryRateAt(int index) { - return snapshots.get(index).queryRate(); + /** Returns the average load of this resource in this */ + public double averageLoad(Resource resource) { + int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum(); + if (measurementCount == 0) return 0; + double measurementSum = allTimeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); + return measurementSum / measurementCount; } - private double queryRateNow() { - return queryRateAt(snapshots.size() - 1); + private double value(Resource resource, MetricSnapshot snapshot) { + switch (resource) { + case cpu: return snapshot.cpu(); + case memory: return snapshot.memory(); + case disk: return snapshot.disk(); + default: throw new IllegalArgumentException("Got an unknown resource " + resource); + } } - private Duration durationBetween(int startIndex, int endIndex) { - return Duration.between(snapshots.get(startIndex).at(), snapshots.get(endIndex).at()); + private List filter(List timeseries, Predicate filter) { + return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.filter(filter)).collect(Collectors.toList()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java index bf8d354665a..1b1e5933604 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java @@ -2,12 +2,9 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.collections.Pair; -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; -import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; @@ -29,10 +26,8 @@ public class MemoryMetricsDb implements MetricsDb { private final NodeRepository nodeRepository; - /** Metric time series by node (hostname). Each list of metric snapshots is sorted by increasing timestamp */ - private final Map nodeTimeseries = new HashMap<>(); - - private final Map, ClusterTimeseries> clusterTimeseries = new HashMap<>(); + /** Metric time seriest by node (hostname). Each list of metric snapshots is sorted by increasing timestamp */ + private final Map db = new HashMap<>(); /** Lock all access for now since we modify lists inside a map */ private final Object lock = new Object(); @@ -42,10 +37,7 @@ public class MemoryMetricsDb implements MetricsDb { } @Override - public Clock clock() { return nodeRepository.clock(); } - - @Override - public void addNodeMetrics(Collection> nodeMetrics) { + public void add(Collection> nodeMetrics) { synchronized (lock) { for (var value : nodeMetrics) { add(value.getFirst(), value.getSecond()); @@ -53,49 +45,28 @@ public class MemoryMetricsDb implements MetricsDb { } } - @Override - public void addClusterMetrics(ApplicationId application, Map clusterMetrics) { - synchronized (lock) { - for (var value : clusterMetrics.entrySet()) { - add(application, value.getKey(), value.getValue()); - } - } - } - - public void clearClusterMetrics(ApplicationId application, ClusterSpec.Id cluster) { - synchronized (lock) { - clusterTimeseries.remove(new Pair<>(application, cluster)); - } - } - @Override public List getNodeTimeseries(Duration period, Set hostnames) { Instant startTime = nodeRepository.clock().instant().minus(period); synchronized (lock) { return hostnames.stream() - .map(hostname -> nodeTimeseries.getOrDefault(hostname, new NodeTimeseries(hostname, List.of())).justAfter(startTime)) + .map(hostname -> db.getOrDefault(hostname, new NodeTimeseries(hostname, List.of())).justAfter(startTime)) .collect(Collectors.toList()); } } - @Override - public ClusterTimeseries getClusterTimeseries(ApplicationId application, ClusterSpec.Id cluster) { - return clusterTimeseries.computeIfAbsent(new Pair<>(application, cluster), - __ -> new ClusterTimeseries(cluster, new ArrayList<>())); - } - @Override public void gc() { synchronized (lock) { // Each measurement is Object + long + float = 16 + 8 + 4 = 28 bytes // 12 hours with 1k nodes and 3 resources and 1 measurement/sec is about 5Gb - for (String hostname : nodeTimeseries.keySet()) { - var timeseries = nodeTimeseries.get(hostname); + for (String hostname : db.keySet()) { + var timeseries = db.get(hostname); timeseries = timeseries.justAfter(nodeRepository.clock().instant().minus(Autoscaler.maxScalingWindow())); if (timeseries.isEmpty()) - nodeTimeseries.remove(hostname); + db.remove(hostname); else - nodeTimeseries.put(hostname, timeseries); + db.put(hostname, timeseries); } } } @@ -103,22 +74,16 @@ public class MemoryMetricsDb implements MetricsDb { @Override public void close() {} - private void add(String hostname, NodeMetricSnapshot snapshot) { - NodeTimeseries timeseries = nodeTimeseries.get(hostname); + private void add(String hostname, MetricSnapshot snapshot) { + NodeTimeseries timeseries = db.get(hostname); if (timeseries == null) { // new node Optional node = nodeRepository.nodes().node(hostname); if (node.isEmpty()) return; if (node.get().allocation().isEmpty()) return; timeseries = new NodeTimeseries(hostname, new ArrayList<>()); - nodeTimeseries.put(hostname, timeseries); + db.put(hostname, timeseries); } - nodeTimeseries.put(hostname, timeseries.add(snapshot)); - } - - private void add(ApplicationId application, ClusterSpec.Id cluster, ClusterMetricSnapshot snapshot) { - var key = new Pair<>(application, cluster); - var existing = clusterTimeseries.computeIfAbsent(key, __ -> new ClusterTimeseries(cluster, new ArrayList<>())); - clusterTimeseries.put(key, existing.add(snapshot)); + db.put(hostname, timeseries.add(snapshot)); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java new file mode 100644 index 00000000000..82812592809 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java @@ -0,0 +1,66 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import java.time.Instant; + +/** + * A single measurement of all values we measure for one node. + * + * @author bratseth + */ +public class MetricSnapshot implements Comparable { + + private final Instant at; + + private final double cpu; + private final double memory; + private final double disk; + private final long generation; + private final boolean inService; + private final boolean stable; + private final double queryRate; + + public MetricSnapshot(Instant at, double cpu, double memory, double disk, + long generation, boolean inService, boolean stable, + double queryRate) { + this.at = at; + this.cpu = cpu; + this.memory = memory; + this.disk = disk; + this.generation = generation; + this.inService = inService; + this.stable = stable; + this.queryRate = queryRate; + } + + public Instant at() { return at; } + public double cpu() { return cpu; } + public double memory() { return memory; } + public double disk() { return disk; } + + /** Queries per second */ + public double queryRate() { return queryRate; } + + /** The configuration generation at the time of this measurement, or -1 if not known */ + public long generation() { return generation; } + + public boolean inService() { return inService; } + public boolean stable() { return stable; } + + @Override + public int compareTo(MetricSnapshot other) { + return at.compareTo(other.at); + } + + @Override + public String toString() { return "metrics at " + at + ":" + + " cpu: " + cpu + + " memory: " + memory + + " disk: " + disk + + " generation: " + generation + + " inService: " + inService + + " stable: " + stable + + " queryRate: " + queryRate; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java index 568c5f88661..6fdc87f2448 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java @@ -2,17 +2,15 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.collections.Pair; -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.time.Clock; import java.time.Duration; +import java.time.Instant; import java.util.Collection; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -23,12 +21,8 @@ import java.util.stream.Collectors; */ public interface MetricsDb { - Clock clock(); - - /** Adds node snapshots to this. */ - void addNodeMetrics(Collection> nodeMetrics); - - void addClusterMetrics(ApplicationId application, Map clusterMetrics); + /** Adds snapshots to this. */ + void add(Collection> nodeMetrics); /** * Returns a list with one entry for each hostname containing @@ -42,15 +36,12 @@ public interface MetricsDb { return getNodeTimeseries(period, nodes.stream().map(Node::hostname).collect(Collectors.toSet())); } - /** Returns all cluster level metric snapshots for a given cluster */ - ClusterTimeseries getClusterTimeseries(ApplicationId applicationId, ClusterSpec.Id clusterId); - /** Must be called intermittently (as long as add is called) to gc old data */ void gc(); void close(); - static MemoryMetricsDb createTestInstance(NodeRepository nodeRepository) { + static MetricsDb createTestInstance(NodeRepository nodeRepository) { return new MemoryMetricsDb(nodeRepository); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java index 0fa7a0e0bb1..d6661b89536 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java @@ -11,6 +11,7 @@ import com.yahoo.slime.SlimeUtils; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Application; import java.time.Instant; import java.util.ArrayList; @@ -27,21 +28,14 @@ import java.util.Optional; */ public class MetricsResponse { - /** Node level metrics */ - private final Collection> nodeMetrics; - - /** - * Cluster level metrics. - * Must be aggregated at fetch time to avoid issues with nodes and nodes joining/leaving the cluster over time. - */ - private final Map clusterMetrics = new HashMap<>(); + private final Collection> nodeMetrics; /** Creates this from a metrics/V2 response */ public MetricsResponse(String response, NodeList applicationNodes, NodeRepository nodeRepository) { this(SlimeUtils.jsonToSlime(response), applicationNodes, nodeRepository); } - public MetricsResponse(Collection> metrics) { + public MetricsResponse(Collection> metrics) { this.nodeMetrics = metrics; } @@ -52,9 +46,7 @@ public class MetricsResponse { nodes.traverse((ArrayTraverser)(__, node) -> consumeNode(node, applicationNodes, nodeRepository)); } - public Collection> nodeMetrics() { return nodeMetrics; } - - public Map clusterMetrics() { return clusterMetrics; } + public Collection> metrics() { return nodeMetrics; } private void consumeNode(Inspector node, NodeList applicationNodes, NodeRepository nodeRepository) { String hostname = node.field("hostname").asString(); @@ -67,21 +59,14 @@ public class MetricsResponse { if (node.isEmpty()) return; // Node is not part of this cluster any more long timestampSecond = nodeData.field("timestamp").asLong(); Map values = consumeMetrics(nodeData.field("metrics")); - Instant at = Instant.ofEpochMilli(timestampSecond * 1000); - - nodeMetrics.add(new Pair<>(hostname, new NodeMetricSnapshot(at, - Metric.cpu.from(values), - Metric.memory.from(values), - Metric.disk.from(values), - (long)Metric.generation.from(values), - Metric.inService.from(values) > 0, - clusterIsStable(node.get(), applicationNodes, nodeRepository), - Metric.queryRate.from(values)))); - - var cluster = node.get().allocation().get().membership().cluster().id(); - var metrics = clusterMetrics.getOrDefault(cluster, new ClusterMetricSnapshot(at, 0.0)); - metrics = metrics.withQueryRate(metrics.queryRate() + Metric.queryRate.from(values)); - clusterMetrics.put(cluster, metrics); + nodeMetrics.add(new Pair<>(hostname, new MetricSnapshot(Instant.ofEpochMilli(timestampSecond * 1000), + Metric.cpu.from(values), + Metric.memory.from(values), + Metric.disk.from(values), + (long)Metric.generation.from(values), + Metric.inService.from(values) > 0, + clusterIsStable(node.get(), applicationNodes, nodeRepository), + Metric.queryRate.from(values)))); } private boolean clusterIsStable(Node node, NodeList applicationNodes, NodeRepository nodeRepository) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricSnapshot.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricSnapshot.java deleted file mode 100644 index be9f7bd4819..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricSnapshot.java +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.autoscale; - -import java.time.Instant; - -/** - * A single measurement of all values we measure for one node. - * - * @author bratseth - */ -public class NodeMetricSnapshot implements Comparable { - - private final Instant at; - - private final double cpu; - private final double memory; - private final double disk; - private final long generation; - private final boolean inService; - private final boolean stable; - private final double queryRate; - - public NodeMetricSnapshot(Instant at, double cpu, double memory, double disk, - long generation, boolean inService, boolean stable, - double queryRate) { - this.at = at; - this.cpu = cpu; - this.memory = memory; - this.disk = disk; - this.generation = generation; - this.inService = inService; - this.stable = stable; - this.queryRate = queryRate; - } - - public Instant at() { return at; } - public double cpu() { return cpu; } - public double memory() { return memory; } - public double disk() { return disk; } - - /** Queries per second */ - public double queryRate() { return queryRate; } - - /** The configuration generation at the time of this measurement, or -1 if not known */ - public long generation() { return generation; } - - public boolean inService() { return inService; } - public boolean stable() { return stable; } - - @Override - public int compareTo(NodeMetricSnapshot other) { - return at.compareTo(other.at); - } - - @Override - public String toString() { return "metrics at " + at + ":" + - " cpu: " + cpu + - " memory: " + memory + - " disk: " + disk + - " generation: " + generation + - " inService: " + inService + - " stable: " + stable + - " queryRate: " + queryRate; - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java index cedc2edfe63..24876609f58 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java @@ -16,11 +16,11 @@ import java.util.stream.Collectors; public class NodeTimeseries { private final String hostname; - private final List snapshots; + private final List snapshots; - NodeTimeseries(String hostname, List snapshots) { + NodeTimeseries(String hostname, List snapshots) { this.hostname = hostname; - List sortedSnapshots = new ArrayList<>(snapshots); + List sortedSnapshots = new ArrayList<>(snapshots); Collections.sort(sortedSnapshots); this.snapshots = Collections.unmodifiableList(sortedSnapshots); } @@ -29,19 +29,19 @@ public class NodeTimeseries { public int size() { return snapshots.size(); } - public NodeMetricSnapshot get(int index) { return snapshots.get(index); } + public MetricSnapshot get(int index) { return snapshots.get(index); } - public List asList() { return snapshots; } + public List asList() { return snapshots; } public String hostname() { return hostname; } - public NodeTimeseries add(NodeMetricSnapshot snapshot) { - List list = new ArrayList<>(snapshots); + public NodeTimeseries add(MetricSnapshot snapshot) { + List list = new ArrayList<>(snapshots); list.add(snapshot); return new NodeTimeseries(hostname(), list); } - public NodeTimeseries filter(Predicate filter) { + public NodeTimeseries filter(Predicate filter) { return new NodeTimeseries(hostname, snapshots.stream().filter(filter).collect(Collectors.toList())); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java index efa1de6bb97..37e70e3539a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java @@ -5,8 +5,6 @@ import com.google.inject.Inject; import com.yahoo.collections.ListMap; import com.yahoo.collections.Pair; import com.yahoo.component.AbstractComponent; -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.io.IOUtils; import com.yahoo.vespa.defaults.Defaults; import io.questdb.cairo.CairoConfiguration; @@ -32,7 +30,6 @@ import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Collection; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -48,8 +45,7 @@ import java.util.stream.Collectors; public class QuestMetricsDb extends AbstractComponent implements MetricsDb { private static final Logger log = Logger.getLogger(QuestMetricsDb.class.getName()); - private static final String nodeTable = "metrics"; - private static final String clusterTable = "clusterMetrics"; + private static final String table = "metrics"; private final Clock clock; private final String dataDir; @@ -73,8 +69,7 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { } private void initializeDb() { - IOUtils.createDirectory(dataDir + "/" + nodeTable); - IOUtils.createDirectory(dataDir + "/" + clusterTable); + IOUtils.createDirectory(dataDir + "/" + table); // silence Questdb's custom logging system IOUtils.writeFile(new File(dataDir, "quest-log.conf"), new byte[0]); @@ -83,36 +78,32 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { CairoConfiguration configuration = new DefaultCairoConfiguration(dataDir); engine = new CairoEngine(configuration); - ensureTablesExist(); + ensureExists(table); } @Override - public Clock clock() { return clock; } - - @Override - public void addNodeMetrics(Collection> snapshots) { - try (TableWriter writer = engine.getWriter(newContext().getCairoSecurityContext(), nodeTable)) { - addNodeMetrics(snapshots, writer); + public void add(Collection> snapshots) { + try (TableWriter writer = engine.getWriter(newContext().getCairoSecurityContext(), table)) { + add(snapshots, writer); } catch (CairoException e) { if (e.getMessage().contains("Cannot read offset")) { // This error seems non-recoverable repair(e); - try (TableWriter writer = engine.getWriter(newContext().getCairoSecurityContext(), nodeTable)) { - addNodeMetrics(snapshots, writer); + try (TableWriter writer = engine.getWriter(newContext().getCairoSecurityContext(), table)) { + add(snapshots, writer); } } } } - private void addNodeMetrics(Collection> snapshots, TableWriter writer) { + private void add(Collection> snapshots, TableWriter writer) { for (var snapshot : snapshots) { long atMillis = adjustIfRecent(snapshot.getSecond().at().toEpochMilli(), highestTimestampAdded); if (atMillis < highestTimestampAdded) continue; // Ignore old data highestTimestampAdded = atMillis; TableWriter.Row row = writer.newRow(atMillis * 1000); // in microseconds row.putStr(0, snapshot.getFirst()); - // (1 is timestamp) row.putFloat(2, (float)snapshot.getSecond().cpu()); row.putFloat(3, (float)snapshot.getSecond().memory()); row.putFloat(4, (float)snapshot.getSecond().disk()); @@ -125,71 +116,24 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { writer.commit(); } - @Override - public void addClusterMetrics(ApplicationId application, Map snapshots) { - try (TableWriter writer = engine.getWriter(newContext().getCairoSecurityContext(), clusterTable)) { - addClusterMetrics(application, snapshots, writer); - } - catch (CairoException e) { - if (e.getMessage().contains("Cannot read offset")) { - // This error seems non-recoverable - repair(e); - try (TableWriter writer = engine.getWriter(newContext().getCairoSecurityContext(), clusterTable)) { - addClusterMetrics(application, snapshots, writer); - } - } - } - } - - private void addClusterMetrics(ApplicationId applicationId, Map snapshots, TableWriter writer) { - for (var snapshot : snapshots.entrySet()) { - long atMillis = adjustIfRecent(snapshot.getValue().at().toEpochMilli(), highestTimestampAdded); - if (atMillis < highestTimestampAdded) continue; // Ignore old data - highestTimestampAdded = atMillis; - TableWriter.Row row = writer.newRow(atMillis * 1000); // in microseconds - row.putStr(0, applicationId.serializedForm()); - row.putStr(1, snapshot.getKey().value()); - // (2 is timestamp) - row.putFloat(3, (float)snapshot.getValue().queryRate()); - row.append(); - } - writer.commit(); - } - @Override public List getNodeTimeseries(Duration period, Set hostnames) { try (SqlCompiler compiler = new SqlCompiler(engine)) { SqlExecutionContext context = newContext(); - var snapshots = getNodeSnapshots(clock.instant().minus(period), hostnames, compiler, context); + var snapshots = getSnapshots(clock.instant().minus(period), hostnames, compiler, context); return snapshots.entrySet().stream() .map(entry -> new NodeTimeseries(entry.getKey(), entry.getValue())) .collect(Collectors.toList()); } catch (SqlException e) { - throw new IllegalStateException("Could not read node timeseries data in Quest stored in " + dataDir, e); - } - } - - @Override - public ClusterTimeseries getClusterTimeseries(ApplicationId applicationId, ClusterSpec.Id clusterId) { - try (SqlCompiler compiler = new SqlCompiler(engine)) { - SqlExecutionContext context = newContext(); - return getClusterSnapshots(applicationId, clusterId, compiler, context); - } - catch (SqlException e) { - throw new IllegalStateException("Could not read cluster timeseries data in Quest stored in " + dataDir, e); + throw new IllegalStateException("Could not read timeseries data in Quest stored in " + dataDir, e); } } @Override public void gc() { - gc(nodeTable); - gc(clusterTable); - } - - private void gc(String table) { - // We remove full days at once and we want to see at least three days to not every only see weekend data - Instant oldestToKeep = clock.instant().minus(Duration.ofDays(4)); + // Since we remove full days at once we need to keep at least the scaling window + 1 day + Instant oldestToKeep = clock.instant().minus(Autoscaler.maxScalingWindow().plus(Duration.ofDays(1))); SqlExecutionContext context = newContext(); int partitions = 0; try (SqlCompiler compiler = new SqlCompiler(engine)) { @@ -213,7 +157,7 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { context); } catch (SqlException e) { - log.log(Level.WARNING, "Failed to gc old metrics data in " + dataDir + " table " + table, e); + log.log(Level.WARNING, "Failed to gc old metrics data in " + dataDir, e); } } @@ -237,26 +181,18 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { initializeDb(); } - private boolean exists(String table, SqlExecutionContext context) { - return 0 == engine.getStatus(context.getCairoSecurityContext(), new Path(), table); - } - - private void ensureTablesExist() { + private void ensureExists(String table) { SqlExecutionContext context = newContext(); - if (exists(nodeTable, context)) - ensureNodeTableIsUpdated(context); - else - createNodeTable(context); - - if (exists(clusterTable, context)) - ensureClusterTableIsUpdated(context); - else - createClusterTable(context); + if (0 == engine.getStatus(context.getCairoSecurityContext(), new Path(), table)) { // table exists + ensureTableIsUpdated(table, context); + } else { + createTable(table, context); + } } - private void createNodeTable(SqlExecutionContext context) { + private void createTable(String table, SqlExecutionContext context) { try (SqlCompiler compiler = new SqlCompiler(engine)) { - compiler.compile("create table " + nodeTable + + compiler.compile("create table " + table + " (hostname string, at timestamp, cpu_util float, mem_total_util float, disk_util float," + " application_generation long, inService boolean, stable boolean, queries_rate float)" + " timestamp(at)" + @@ -266,39 +202,20 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { // compiler.compile("alter table " + tableName + " alter column hostname add index", context); } catch (SqlException e) { - throw new IllegalStateException("Could not create Quest db table '" + nodeTable + "'", e); - } - } - - private void createClusterTable(SqlExecutionContext context) { - try (SqlCompiler compiler = new SqlCompiler(engine)) { - compiler.compile("create table " + clusterTable + - " (application string, cluster string, at timestamp, queries_rate float)" + - " timestamp(at)" + - "PARTITION BY DAY;", - context); - // We should do this if we get a version where selecting on strings work embedded, see below - // compiler.compile("alter table " + tableName + " alter column cluster add index", context); - } - catch (SqlException e) { - throw new IllegalStateException("Could not create Quest db table '" + clusterTable + "'", e); + throw new IllegalStateException("Could not create Quest db table '" + table + "'", e); } } - private void ensureNodeTableIsUpdated(SqlExecutionContext context) { + private void ensureTableIsUpdated(String table, SqlExecutionContext context) { try (SqlCompiler compiler = new SqlCompiler(engine)) { - if (0 == engine.getStatus(context.getCairoSecurityContext(), new Path(), nodeTable)) { - ensureColumnExists("queries_rate", "float", nodeTable, compiler, context); // TODO: Remove after March 2021 + if (0 == engine.getStatus(context.getCairoSecurityContext(), new Path(), table)) { + ensureColumnExists("queries_rate", "float", table, compiler, context); // TODO: Remove after March 2021 } } catch (SqlException e) { repair(e); } } - private void ensureClusterTableIsUpdated(SqlExecutionContext context) { - // Nothing to do for now - } - private void ensureColumnExists(String column, String columnType, String table, SqlCompiler compiler, SqlExecutionContext context) throws SqlException { if (columnNamesOf(table, compiler, context).contains(column)) return; @@ -329,34 +246,34 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { return timestamp; } - private ListMap getNodeSnapshots(Instant startTime, - Set hostnames, - SqlCompiler compiler, - SqlExecutionContext context) throws SqlException { + private ListMap getSnapshots(Instant startTime, + Set hostnames, + SqlCompiler compiler, + SqlExecutionContext context) throws SqlException { DateTimeFormatter formatter = DateTimeFormatter.ISO_DATE_TIME.withZone(ZoneId.of("UTC")); String from = formatter.format(startTime).substring(0, 19) + ".000000Z"; String to = formatter.format(clock.instant()).substring(0, 19) + ".000000Z"; - String sql = "select * from " + nodeTable + " where at in('" + from + "', '" + to + "');"; + String sql = "select * from " + table + " where at in('" + from + "', '" + to + "');"; // WHERE clauses does not work: // String sql = "select * from " + tableName + " where hostname in('host1', 'host2', 'host3');"; try (RecordCursorFactory factory = compiler.compile(sql, context).getRecordCursorFactory()) { - ListMap snapshots = new ListMap<>(); + ListMap snapshots = new ListMap<>(); try (RecordCursor cursor = factory.getCursor(context)) { Record record = cursor.getRecord(); while (cursor.hasNext()) { String hostname = record.getStr(0).toString(); if (hostnames.contains(hostname)) { snapshots.put(hostname, - new NodeMetricSnapshot(Instant.ofEpochMilli(record.getTimestamp(1) / 1000), - record.getFloat(2), - record.getFloat(3), - record.getFloat(4), - record.getLong(5), - record.getBool(6), - record.getBool(7), - record.getFloat(8))); + new MetricSnapshot(Instant.ofEpochMilli(record.getTimestamp(1) / 1000), + record.getFloat(2), + record.getFloat(3), + record.getFloat(4), + record.getLong(5), + record.getBool(6), + record.getBool(7), + record.getFloat(8))); } } } @@ -364,29 +281,6 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { } } - private ClusterTimeseries getClusterSnapshots(ApplicationId application, - ClusterSpec.Id cluster, - SqlCompiler compiler, - SqlExecutionContext context) throws SqlException { - String sql = "select * from " + clusterTable; - try (RecordCursorFactory factory = compiler.compile(sql, context).getRecordCursorFactory()) { - List snapshots = new ArrayList<>(); - try (RecordCursor cursor = factory.getCursor(context)) { - Record record = cursor.getRecord(); - while (cursor.hasNext()) { - String applicationIdString = record.getStr(0).toString(); - if ( ! application.serializedForm().equals(applicationIdString)) continue; - String clusterId = record.getStr(1).toString(); - if (cluster.value().equals(clusterId)) { - snapshots.add(new ClusterMetricSnapshot(Instant.ofEpochMilli(record.getTimestamp(2) / 1000), - record.getFloat(3))); - } - } - } - return new ClusterTimeseries(cluster, snapshots); - } - } - private SqlExecutionContext newContext() { return new SqlExecutionContextImpl(engine, 1); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java index b841b31833f..8353f56df91 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java @@ -12,7 +12,7 @@ public enum Resource { /** Cpu utilization ratio */ cpu { - public double idealAverageLoad() { return 0.8; } + public double idealAverageLoad() { return 0.4; } double valueFrom(NodeResources resources) { return resources.vcpu(); } }, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java index c7151e3ae7b..a2fbeb3b710 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java @@ -3,8 +3,6 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.vespa.hosted.provision.applications.Application; -import java.time.Duration; - /** * A resource target to hit for the allocation optimizer. * The target is measured in cpu, memory and disk per node in the allocation given by current. @@ -49,16 +47,11 @@ public class ResourceTarget { } /** Create a target of achieving ideal load given a current load */ - public static ResourceTarget idealLoad(ClusterTimeseries clusterTimeseries, - ClusterNodesTimeseries clusterNodesTimeseries, - AllocatableClusterResources current, - Application application) { - return new ResourceTarget(nodeUsage(Resource.cpu, clusterNodesTimeseries.averageLoad(Resource.cpu), current) - / idealCpuLoad(clusterTimeseries, clusterNodesTimeseries, application), - nodeUsage(Resource.memory, clusterNodesTimeseries.averageLoad(Resource.memory), current) - / Resource.memory.idealAverageLoad(), - nodeUsage(Resource.disk, clusterNodesTimeseries.averageLoad(Resource.disk), current) - / Resource.disk.idealAverageLoad(), + public static ResourceTarget idealLoad(double currentCpuLoad, double currentMemoryLoad, double currentDiskLoad, + AllocatableClusterResources current, Application application) { + return new ResourceTarget(nodeUsage(Resource.cpu, currentCpuLoad, current) / idealCpuLoad(application), + nodeUsage(Resource.memory, currentMemoryLoad, current) / Resource.memory.idealAverageLoad(), + nodeUsage(Resource.disk, currentDiskLoad, current) / Resource.disk.idealAverageLoad(), true); } @@ -71,27 +64,16 @@ public class ResourceTarget { } /** Ideal cpu load must take the application traffic fraction into account */ - private static double idealCpuLoad(ClusterTimeseries clusterTimeseries, - ClusterNodesTimeseries clusterNodesTimeseries, - Application application) { - // What's needed to have headroom for growth during scale-up as a fraction of current resources? - double maxGrowthRate = clusterTimeseries.maxQueryGrowthRate(); // in fraction per minute of the current traffic - Duration scalingDuration = clusterNodesTimeseries.cluster().scalingDuration(clusterNodesTimeseries.clusterNodes().clusterSpec()); - double growthRateHeadroom = 1 + maxGrowthRate * scalingDuration.toMinutes(); - // Cap headroom at 10% above the historical observed peak - growthRateHeadroom = Math.min(growthRateHeadroom, 1 / clusterTimeseries.currentQueryFractionOfMax() + 0.1); - - // How much headroom is needed to handle sudden arrival of additional traffic due to another zone going down? - double trafficShiftHeadroom; + private static double idealCpuLoad(Application application) { + double trafficFactor; if (application.status().maxReadShare() == 0) // No traffic fraction data - trafficShiftHeadroom = 2.0; // assume we currently get half of the global share of traffic + trafficFactor = 0.5; // assume we currently get half of the global share of traffic else - trafficShiftHeadroom = application.status().maxReadShare() / application.status().currentReadShare(); - - if (trafficShiftHeadroom > 2.0) // The expectation that we have almost no load with almost no queries is incorrect due - trafficShiftHeadroom = 2.0; // to write traffic; once that is separated we can increase this threshold + trafficFactor = application.status().currentReadShare() / application.status().maxReadShare(); - return 1 / growthRateHeadroom * 1 / trafficShiftHeadroom * Resource.cpu.idealAverageLoad(); + if (trafficFactor < 0.5) // The expectation that we have almost no load with almost no queries is incorrect due + trafficFactor = 0.5; // to write traffic; once that is separated we can lower this threshold (but not to 0) + return trafficFactor * Resource.cpu.idealAverageLoad(); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 9d910df01d9..bcfdaefb305 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -14,7 +14,7 @@ import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot; +import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.autoscale.NodeTimeseries; import com.yahoo.vespa.hosted.provision.node.History; @@ -110,7 +110,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { // - 2. all nodes have switched to the right config generation for (NodeTimeseries nodeTimeseries : metricsDb.getNodeTimeseries(Duration.between(event.at(), clock().instant()), clusterNodes)) { - Optional firstOnNewGeneration = + Optional firstOnNewGeneration = nodeTimeseries.asList().stream() .filter(snapshot -> snapshot.generation() >= event.generation()).findFirst(); if (firstOnNewGeneration.isEmpty()) return cluster; // Not completed diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java index 01ab73c20b2..b8548c4c3f4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java @@ -8,6 +8,7 @@ import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.autoscale.MetricsFetcher; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.autoscale.MetricsResponse; +import com.yahoo.yolean.Exceptions; import java.time.Duration; import java.util.Set; @@ -73,8 +74,7 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { warnings.add(1); } else if (response != null) { - metricsDb.addNodeMetrics(response.nodeMetrics()); - metricsDb.addClusterMetrics(application, response.clusterMetrics()); + metricsDb.add(response.metrics()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index 10db9a08eeb..e0a11aa5dac 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -59,7 +59,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { List retiredNodes = entry.getValue(); try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { - if ( ! deployment.isValid()) continue; + if ( ! deployment.isValid()) continue; // this will be done at another config server List nodesToRemove = retiredNodes.stream().filter(this::canRemove).collect(Collectors.toList()); if (nodesToRemove.isEmpty()) continue; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java index 4235bae6850..ceaf88dd7d9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java @@ -9,7 +9,8 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; -import com.yahoo.vespa.hosted.provision.autoscale.ClusterNodesTimeseries; +import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; +import com.yahoo.vespa.hosted.provision.autoscale.ClusterTimeseries; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.autoscale.Resource; @@ -73,7 +74,7 @@ public class ApplicationSerializer { } private static void clusterUtilizationToSlime(Cluster cluster, NodeList nodes, MetricsDb metricsDb, Cursor utilizationObject) { - var timeseries = new ClusterNodesTimeseries(Duration.ofHours(1), cluster, nodes, metricsDb); + var timeseries = new ClusterTimeseries(Duration.ofHours(1), cluster, nodes, metricsDb); utilizationObject.setDouble("cpu", timeseries.averageLoad(Resource.cpu)); utilizationObject.setDouble("memory", timeseries.averageLoad(Resource.memory)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java index 8c6c116a225..87b8ccdc348 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java @@ -46,7 +46,7 @@ public class AutoscalingIntegrationTest { for (int i = 0; i < 1000; i++) { tester.clock().advance(Duration.ofSeconds(10)); - fetcher.fetchMetrics(application1).whenComplete((r, e) -> tester.nodeMetricsDb().addNodeMetrics(r.nodeMetrics())); + fetcher.fetchMetrics(application1).whenComplete((r, e) -> tester.nodeMetricsDb().add(r.metrics())); tester.clock().advance(Duration.ofSeconds(10)); tester.nodeMetricsDb().gc(); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 708316802bd..3fef1d9746b 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -56,7 +56,7 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.25f, 1f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", - 14, 1, 1.4, 30.8, 30.8, + 15, 1, 1.3, 28.6, 28.6, tester.autoscale(application1, cluster1.id(), min, max).target()); tester.deploy(application1, cluster1, scaledResources); @@ -74,7 +74,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.1f, 1f, 120, application1); tester.assertResources("Scaling down to minimum since usage has gone down significantly", - 15, 1, 1.0, 28.6, 28.6, + 14, 1, 1.0, 30.8, 30.8, tester.autoscale(application1, cluster1.id(), min, max).target()); var events = tester.nodeRepository().applications().get(application1).get().cluster(cluster1.id()).get().scalingEvents(); @@ -129,7 +129,7 @@ public class AutoscalingTest { ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1, NodeResources.DiskSpeed.any)); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", - 14, 1, 1.4, 30.8, 30.8, + 15, 1, 1.3, 28.6, 28.6, tester.autoscale(application1, cluster1.id(), min, max).target()); assertEquals("Disk speed from min/max is used", NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed()); @@ -343,7 +343,7 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofDays(1)); tester.addMemMeasurements(1.0f, 1f, 1000, application1); tester.assertResources("Increase group size to reduce memory load", - 8, 2, 13.6, 89.3, 62.5, + 8, 2, 12.9, 89.3, 62.5, tester.autoscale(application1, cluster1.id(), min, max).target()); } @@ -362,7 +362,7 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofDays(2)); tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", - 6, 1, 2.9, 4.0, 95.0, + 6, 1, 2.8, 4.0, 95.0, tester.autoscale(application1, cluster1.id(), min, max).target()); } @@ -386,7 +386,7 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofDays(2)); tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", - 6, 1, 2.9, 4.0, 95.0, + 6, 1, 2.8, 4.0, 95.0, tester.autoscale(application1, cluster1.id(), min, max).target()); } @@ -405,7 +405,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, min); tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1); tester.assertResources("Scaling up", - 4, 1, 7.4, 20, 200, + 4, 1, 7.0, 20, 200, tester.autoscale(application1, cluster1.id(), min, max).target()); } @@ -418,7 +418,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, min); tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1); tester.assertResources("Scaling up", - 4, 1, 7.4, 34, 200, + 4, 1, 7.0, 34, 200, tester.autoscale(application1, cluster1.id(), min, max).target()); } } @@ -457,7 +457,7 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofDays(2)); tester.addMemMeasurements(0.3f, 0.6f, 1000, application1); tester.assertResources("Scaling down since resource usage has gone down", - 6, 1, 3, 83, 28.8, + 5, 1, 3, 83, 36, tester.autoscale(application1, cluster1.id(), min, max).target()); } @@ -491,44 +491,6 @@ public class AutoscalingTest { } - @Test - public void test_autoscaling_considers_growth_rate() { - NodeResources resources = new NodeResources(3, 100, 100, 1); - ClusterResources min = new ClusterResources( 1, 1, resources); - ClusterResources max = new ClusterResources(10, 1, resources); - AutoscalingTester tester = new AutoscalingTester(resources.withVcpu(resources.vcpu() * 2)); - - ApplicationId application1 = tester.applicationId("application1"); - ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); - - tester.deploy(application1, cluster1, 5, 1, resources); - tester.addCpuMeasurements(0.25f, 1f, 120, application1); - - // (no query rate data) - tester.assertResources("Advice to scale up since we assume we need 2x cpu for growth when no data", - 7, 1, 3, 100, 100, - tester.autoscale(application1, cluster1.id(), min, max).target()); - - tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(5)); - tester.addQueryRateMeasurements(application1, cluster1.id(), - 100, - t -> 10.0 + (t < 50 ? t : 100 - t)); - tester.assertResources("Advice to scale down since observed growth is much slower than scaling time", - 4, 1, 3, 100, 100, - tester.autoscale(application1, cluster1.id(), min, max).target()); - - tester.clearQueryRateMeasurements(application1, cluster1.id()); - - System.out.println("The fast growth one"); - tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(60)); - tester.addQueryRateMeasurements(application1, cluster1.id(), - 100, - t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49))); - tester.assertResources("Advice to scale up since observed growth is much faster than scaling time", - 10, 1, 3, 100, 100, - tester.autoscale(application1, cluster1.id(), min, max).target()); - } - /** * This calculator subtracts the memory tax when forecasting overhead, but not when actually * returning information about nodes. This is allowed because the forecast is a *worst case*. diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index ce3293aa518..156542ef1d4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -20,21 +20,15 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.applications.Application; -import com.yahoo.vespa.hosted.provision.applications.Cluster; -import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import java.time.Duration; -import java.time.Instant; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.function.IntFunction; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -46,7 +40,7 @@ class AutoscalingTester { private final ProvisioningTester provisioningTester; private final Autoscaler autoscaler; - private final MemoryMetricsDb db; + private final MetricsDb db; private final MockHostResourcesCalculator hostResourcesCalculator; /** Creates an autoscaling tester with a single host type ready */ @@ -143,14 +137,14 @@ class AutoscalingTester { float cpu = value * oneExtraNodeFactor; float memory = (float) Resource.memory.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; float disk = (float) Resource.disk.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; - db.addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(clock().instant(), - cpu, - memory, - disk, - 0, - true, - true, - 0.0)))); + db.add(List.of(new Pair<>(node.hostname(), new MetricSnapshot(clock().instant(), + cpu, + memory, + disk, + 0, + true, + true, + 0.0)))); } } } @@ -175,14 +169,14 @@ class AutoscalingTester { float cpu = (float) 0.2 * otherResourcesLoad * oneExtraNodeFactor; float memory = value * oneExtraNodeFactor; float disk = (float) Resource.disk.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; - db.addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(clock().instant(), - cpu, - memory, - disk, - 0, - true, - true, - 0.0)))); + db.add(List.of(new Pair<>(node.hostname(), new MetricSnapshot(clock().instant(), + cpu, + memory, + disk, + 0, + true, + true, + 0.0)))); } } } @@ -197,14 +191,14 @@ class AutoscalingTester { for (int i = 0; i < count; i++) { clock().advance(Duration.ofMinutes(1)); for (Node node : nodes) { - db.addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(clock().instant(), - cpu, - memory, - disk, - generation, - inService, - stable, - 0.0)))); + db.add(List.of(new Pair<>(node.hostname(), new MetricSnapshot(clock().instant(), + cpu, + memory, + disk, + generation, + inService, + stable, + 0.0)))); } } } @@ -216,41 +210,6 @@ class AutoscalingTester { nodeRepository().applications().put(application, nodeRepository().nodes().lock(applicationId)); } - /** Creates a single redeployment event with bogus data except for the given duration */ - public void setScalingDuration(ApplicationId applicationId, ClusterSpec.Id clusterId, Duration duration) { - Application application = nodeRepository().applications().require(applicationId); - Cluster cluster = application.cluster(clusterId).get(); - cluster = new Cluster(clusterId, - cluster.exclusive(), - cluster.minResources(), - cluster.maxResources(), - cluster.suggestedResources(), - cluster.targetResources(), - List.of(), // Remove scaling events - cluster.autoscalingStatus()); - cluster = cluster.with(ScalingEvent.create(cluster.minResources(), cluster.minResources(), - 0, - clock().instant().minus(Duration.ofDays(1).minus(duration))).withCompletion(clock().instant().minus(Duration.ofDays(1)))); - application = application.with(cluster); - nodeRepository().applications().put(application, nodeRepository().nodes().lock(applicationId)); - } - - /** Creates the given number of measurements, spaced 5 minutes between, using the given function */ - public void addQueryRateMeasurements(ApplicationId application, - ClusterSpec.Id cluster, - int measurements, - IntFunction queryRate) { - Instant time = clock().instant(); - for (int i = 0; i < measurements; i++) { - db.addClusterMetrics(application, Map.of(cluster, new ClusterMetricSnapshot(time, queryRate.apply(i)))); - time = time.plus(Duration.ofMinutes(5)); - } - } - - public void clearQueryRateMeasurements(ApplicationId application, ClusterSpec.Id cluster) { - db.clearClusterMetrics(application, cluster); - } - public Autoscaler.Advice autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId, ClusterResources min, ClusterResources max) { Application application = nodeRepository().applications().get(applicationId).orElse(Application.empty(applicationId)) diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseriesTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseriesTest.java deleted file mode 100644 index 89fe2d76159..00000000000 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseriesTest.java +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.autoscale; - -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.test.ManualClock; -import org.junit.Test; - -import java.time.Duration; -import java.util.ArrayList; -import java.util.List; -import java.util.function.IntFunction; - -import static org.junit.Assert.assertEquals; - -/** - * @author bratseth - */ -public class ClusterTimeseriesTest { - - private static final double delta = 0.001; - private static final ClusterSpec.Id cluster = new ClusterSpec.Id("test"); - - @Test - public void test_empty() { - var timeseries = new ClusterTimeseries(cluster, List.of()); - assertEquals(0.1, timeseries.maxQueryGrowthRate(), delta); - } - - @Test - public void test_constant_rate_short() { - var clock = new ManualClock(); - var timeseries = new ClusterTimeseries(cluster, rate(10, clock, t -> 50.0)); - assertEquals(0.1, timeseries.maxQueryGrowthRate(), delta); - } - - @Test - public void test_constant_rate_long() { - var clock = new ManualClock(); - var timeseries = new ClusterTimeseries(cluster, rate(10000, clock, t -> 50.0)); - assertEquals(0.0, timeseries.maxQueryGrowthRate(), delta); - } - - @Test - public void test_single_spike() { - var clock = new ManualClock(); - var snapshots = new ArrayList(); - snapshots.addAll(rate(1000, clock, t -> 50.0)); - snapshots.addAll(rate( 10, clock, t -> 400.0)); - snapshots.addAll(rate(1000, clock, t -> 50.0)); - assertEquals((400-50)/5.0/50.0, new ClusterTimeseries(cluster, snapshots).maxQueryGrowthRate(), delta); - } - - @Test - public void test_three_spikes() { - var clock = new ManualClock(); - var snapshots = new ArrayList(); - snapshots.addAll(rate(1000, clock, t -> 50.0)); - snapshots.addAll(rate( 10, clock, t -> 400.0)); - snapshots.addAll(rate(1000, clock, t -> 50.0)); - snapshots.addAll(rate( 10, clock, t -> 600.0)); - snapshots.addAll(rate(1000, clock, t -> 50.0)); - snapshots.addAll(rate( 10, clock, t -> 800.0)); - snapshots.addAll(rate(1000, clock, t -> 50.0)); - assertEquals((800-50)/5.0/50.0, new ClusterTimeseries(cluster, snapshots).maxQueryGrowthRate(), delta); - } - - @Test - public void test_single_hill() { - var clock = new ManualClock(); - var snapshots = new ArrayList(); - snapshots.addAll(rate(100, clock, t -> (double)t)); - snapshots.addAll(rate(100, clock, t -> 100.0 - t)); - assertEquals(1/5.0, new ClusterTimeseries(cluster, snapshots).maxQueryGrowthRate(), delta); - } - - @Test - public void test_smooth_curve() { - var clock = new ManualClock(); - var timeseries = new ClusterTimeseries(cluster, rate(10000, clock, - t -> 10.0 + 100.0 * Math.sin(t))); - assertEquals(0.26, timeseries.maxQueryGrowthRate(), delta); - } - - @Test - public void test_smooth_curve_small_variation() { - var clock = new ManualClock(); - var timeseries = new ClusterTimeseries(cluster, rate(10000, clock, - t -> 1000.0 + 10.0 * Math.sin(t))); - assertEquals(0.0, timeseries.maxQueryGrowthRate(), delta); - } - - @Test - public void test_two_periods() { - var clock = new ManualClock(); - var timeseries = new ClusterTimeseries(cluster, rate(10000, clock, - t -> 10.0 + 100.0 * Math.sin(t) + 80.0 * Math.sin(10 * t)) ); - assertEquals(1.765, timeseries.maxQueryGrowthRate(), delta); - } - - private List rate(int count, ManualClock clock, IntFunction rate) { - List snapshots = new ArrayList<>(); - for (int i = 0; i < count; i++) { - snapshots.add(new ClusterMetricSnapshot(clock.instant(), rate.apply(i))); - clock.advance(Duration.ofMinutes(5)); - } - return snapshots; - } - -} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java index 14a9a596e78..384e8dd8439 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java @@ -45,7 +45,7 @@ public class MetricsV2MetricsFetcherTest { { httpClient.cannedResponse = cannedResponseForApplication1; - List> values = new ArrayList<>(fetcher.fetchMetrics(application1).get().nodeMetrics()); + List> values = new ArrayList<>(fetcher.fetchMetrics(application1).get().metrics()); assertEquals("http://host-1.yahoo.com:4080/metrics/v2/values?consumer=autoscaling", httpClient.requestsReceived.get(0)); assertEquals(2, values.size()); @@ -63,7 +63,7 @@ public class MetricsV2MetricsFetcherTest { { httpClient.cannedResponse = cannedResponseForApplication2; - List> values = new ArrayList<>(fetcher.fetchMetrics(application2).get().nodeMetrics()); + List> values = new ArrayList<>(fetcher.fetchMetrics(application2).get().metrics()); assertEquals("http://host-3.yahoo.com:4080/metrics/v2/values?consumer=autoscaling", httpClient.requestsReceived.get(1)); assertEquals(1, values.size()); @@ -81,7 +81,7 @@ public class MetricsV2MetricsFetcherTest { tester.nodeRepository().nodes().write(tester.nodeRepository().nodes().list(Node.State.active).owner(application2) .first().get().retire(tester.clock().instant()), lock); } - List> values = new ArrayList<>(fetcher.fetchMetrics(application2).get().nodeMetrics()); + List> values = new ArrayList<>(fetcher.fetchMetrics(application2).get().metrics()); assertFalse(values.get(0).getSecond().stable()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java index 76e56004871..c1c94c7dd24 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java @@ -38,19 +38,19 @@ public class NodeMetricsDbTest { ManualClock clock = tester.clock(); MetricsDb db = MetricsDb.createTestInstance(tester.nodeRepository()); - Collection> values = new ArrayList<>(); + Collection> values = new ArrayList<>(); for (int i = 0; i < 40; i++) { - values.add(new Pair<>(node0, new NodeMetricSnapshot(clock.instant(), - 0.9f, - 0.6f, - 0.6f, - 0, - true, - false, - 0.0))); + values.add(new Pair<>(node0, new MetricSnapshot(clock.instant(), + 0.9f, + 0.6f, + 0.6f, + 0, + true, + false, + 0.0))); clock.advance(Duration.ofMinutes(120)); } - db.addNodeMetrics(values); + db.add(values); // Avoid off-by-one bug when the below windows starts exactly on one of the above getEpochSecond() timestamps. clock.advance(Duration.ofMinutes(1)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java index 18b92fa6b0f..70f9d581816 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java @@ -2,8 +2,6 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.collections.Pair; -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.io.IOUtils; import com.yahoo.test.ManualClock; import org.junit.Ignore; @@ -14,9 +12,7 @@ import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -33,7 +29,7 @@ public class QuestMetricsDbTest { private static final double delta = 0.0000001; @Test - public void testNodeMetricsReadWrite() { + public void testReadWrite() { String dataDir = "data/QuestMetricsDbReadWrite"; IOUtils.recursiveDeleteDir(new File(dataDir)); IOUtils.createDirectory(dataDir + "/metrics"); @@ -42,7 +38,7 @@ public class QuestMetricsDbTest { Instant startTime = clock.instant(); clock.advance(Duration.ofSeconds(1)); - db.addNodeMetrics(nodeTimeseries(1000, Duration.ofSeconds(1), clock, "host1", "host2", "host3")); + db.add(timeseries(1000, Duration.ofSeconds(1), clock, "host1", "host2", "host3")); clock.advance(Duration.ofSeconds(1)); @@ -52,7 +48,7 @@ public class QuestMetricsDbTest { assertEquals(1, nodeTimeSeries1.size()); assertEquals("host1", nodeTimeSeries1.get(0).hostname()); assertEquals(1000, nodeTimeSeries1.get(0).size()); - NodeMetricSnapshot snapshot = nodeTimeSeries1.get(0).asList().get(0); + MetricSnapshot snapshot = nodeTimeSeries1.get(0).asList().get(0); assertEquals(startTime.plus(Duration.ofSeconds(1)), snapshot.at()); assertEquals(0.1, snapshot.cpu(), delta); assertEquals(0.2, snapshot.memory(), delta); @@ -78,56 +74,6 @@ public class QuestMetricsDbTest { assertEquals(2, nodeTimeSeries3.get(2).size()); } - @Test - public void testClusterMetricsReadWrite() { - String dataDir = "data/QuestMetricsDbReadWrite"; - IOUtils.recursiveDeleteDir(new File(dataDir)); - IOUtils.createDirectory(dataDir + "/clusterMetrics"); - ManualClock clock = new ManualClock("2020-10-01T00:00:00"); - QuestMetricsDb db = new QuestMetricsDb(dataDir, clock); - Instant startTime = clock.instant(); - - var application1 = ApplicationId.from("t1", "a1", "i1"); - var application2 = ApplicationId.from("t1", "a2", "i1"); - var cluster1 = new ClusterSpec.Id("cluster1"); - var cluster2 = new ClusterSpec.Id("cluster2"); - db.addClusterMetrics(application1, Map.of(cluster1, new ClusterMetricSnapshot(clock.instant(), 30.0))); - db.addClusterMetrics(application1, Map.of(cluster2, new ClusterMetricSnapshot(clock.instant(), 60.0))); - clock.advance(Duration.ofMinutes(1)); - db.addClusterMetrics(application1, Map.of(cluster1, new ClusterMetricSnapshot(clock.instant(), 45.0))); - clock.advance(Duration.ofMinutes(1)); - db.addClusterMetrics(application2, Map.of(cluster1, new ClusterMetricSnapshot(clock.instant(), 90.0))); - - ClusterTimeseries clusterTimeseries11 = db.getClusterTimeseries(application1, cluster1); - assertEquals(cluster1, clusterTimeseries11.cluster()); - assertEquals(2, clusterTimeseries11.asList().size()); - - ClusterMetricSnapshot snapshot111 = clusterTimeseries11.get(0); - assertEquals(startTime, snapshot111.at()); - assertEquals(30, snapshot111.queryRate(), delta); - ClusterMetricSnapshot snapshot112 = clusterTimeseries11.get(1); - assertEquals(startTime.plus(Duration.ofMinutes(1)), snapshot112.at()); - assertEquals(45, snapshot112.queryRate(), delta); - - - ClusterTimeseries clusterTimeseries12 = db.getClusterTimeseries(application1, cluster2); - assertEquals(cluster2, clusterTimeseries12.cluster()); - assertEquals(1, clusterTimeseries12.asList().size()); - - ClusterMetricSnapshot snapshot121 = clusterTimeseries12.get(0); - assertEquals(startTime, snapshot121.at()); - assertEquals(60, snapshot121.queryRate(), delta); - - - ClusterTimeseries clusterTimeseries21 = db.getClusterTimeseries(application2, cluster1); - assertEquals(cluster1, clusterTimeseries21.cluster()); - assertEquals(1, clusterTimeseries21.asList().size()); - - ClusterMetricSnapshot snapshot211 = clusterTimeseries21.get(0); - assertEquals(startTime.plus(Duration.ofMinutes(2)), snapshot211.at()); - assertEquals(90, snapshot211.queryRate(), delta); - } - @Test public void testWriteOldData() { String dataDir = "data/QuestMetricsDbWriteOldData"; @@ -137,19 +83,19 @@ public class QuestMetricsDbTest { QuestMetricsDb db = new QuestMetricsDb(dataDir, clock); Instant startTime = clock.instant(); clock.advance(Duration.ofSeconds(300)); - db.addNodeMetrics(timeseriesAt(10, clock.instant(), "host1", "host2", "host3")); + db.add(timeseriesAt(10, clock.instant(), "host1", "host2", "host3")); clock.advance(Duration.ofSeconds(1)); List nodeTimeSeries1 = db.getNodeTimeseries(Duration.between(startTime, clock.instant()), Set.of("host1")); assertEquals(10, nodeTimeSeries1.get(0).size()); - db.addNodeMetrics(timeseriesAt(10, clock.instant().minus(Duration.ofSeconds(20)), "host1", "host2", "host3")); + db.add(timeseriesAt(10, clock.instant().minus(Duration.ofSeconds(20)), "host1", "host2", "host3")); List nodeTimeSeries2 = db.getNodeTimeseries(Duration.between(startTime, clock.instant()), Set.of("host1")); assertEquals("Recent data is accepted", 20, nodeTimeSeries2.get(0).size()); - db.addNodeMetrics(timeseriesAt(10, clock.instant().minus(Duration.ofSeconds(200)), "host1", "host2", "host3")); + db.add(timeseriesAt(10, clock.instant().minus(Duration.ofSeconds(200)), "host1", "host2", "host3")); List nodeTimeSeries3 = db.getNodeTimeseries(Duration.between(startTime, clock.instant()), Set.of("host1")); assertEquals("Too old data is rejected", 20, nodeTimeSeries3.get(0).size()); @@ -165,15 +111,15 @@ public class QuestMetricsDbTest { Instant startTime = clock.instant(); int dayOffset = 3; clock.advance(Duration.ofHours(dayOffset)); - db.addNodeMetrics(nodeTimeseries(24 * 10, Duration.ofHours(1), clock, "host1", "host2", "host3")); + db.add(timeseries(24 * 10, Duration.ofHours(1), clock, "host1", "host2", "host3")); assertEquals(24 * 10, db.getNodeTimeseries(Duration.between(startTime, clock.instant()), Set.of("host1")).get(0).size()); db.gc(); - assertEquals(75, db.getNodeTimeseries(Duration.between(startTime, clock.instant()), + assertEquals(48 * 1 + dayOffset, db.getNodeTimeseries(Duration.between(startTime, clock.instant()), Set.of("host1")).get(0).size()); db.gc(); // no-op - assertEquals(75, db.getNodeTimeseries(Duration.between(startTime, clock.instant()), + assertEquals(48 * 1 + dayOffset, db.getNodeTimeseries(Duration.between(startTime, clock.instant()), Set.of("host1")).get(0).size()); } @@ -200,7 +146,7 @@ public class QuestMetricsDbTest { System.out.println(" " + snapshot); clock.advance(Duration.ofSeconds(1)); - db.addNodeMetrics(nodeTimeseries(2, Duration.ofSeconds(1), clock, "host1")); + db.add(timeseries(2, Duration.ofSeconds(1), clock, "host1")); System.out.println("New data written and read:"); timeseries = db.getNodeTimeseries(Duration.ofSeconds(2), Set.of("host1")); for (var snapshot : timeseries.get(0).asList()) @@ -217,7 +163,7 @@ public class QuestMetricsDbTest { ManualClock clock = new ManualClock("2020-10-01T00:00:00"); QuestMetricsDb db = new QuestMetricsDb(dataDir, clock); Instant startTime = clock.instant(); - db.addNodeMetrics(nodeTimeseries(10, Duration.ofSeconds(1), clock, "host1")); + db.add(timeseries(10, Duration.ofSeconds(1), clock, "host1")); int added = db.getNodeTimeseries(Duration.between(startTime, clock.instant()), Set.of("host1")).get(0).asList().size(); @@ -225,46 +171,36 @@ public class QuestMetricsDbTest { db.close(); } - private Collection> nodeTimeseries(int countPerHost, Duration sampleRate, ManualClock clock, - String ... hosts) { - Collection> timeseries = new ArrayList<>(); + private Collection> timeseries(int countPerHost, Duration sampleRate, ManualClock clock, + String ... hosts) { + Collection> timeseries = new ArrayList<>(); for (int i = 1; i <= countPerHost; i++) { for (String host : hosts) - timeseries.add(new Pair<>(host, new NodeMetricSnapshot(clock.instant(), + timeseries.add(new Pair<>(host, new MetricSnapshot(clock.instant(), i * 0.1, i * 0.2, i * 0.4, i % 100, - true, - true, - 30.0))); - clock.advance(sampleRate); - } - return timeseries; - } - - private List clusterTimeseries(int count, Duration sampleRate, ManualClock clock, - ClusterSpec.Id cluster) { - List timeseries = new ArrayList<>(); - for (int i = 1; i <= count; i++) { - timeseries.add(new ClusterMetricSnapshot(clock.instant(), 30.0)); + true, + true, + 30.0))); clock.advance(sampleRate); } return timeseries; } - private Collection> timeseriesAt(int countPerHost, Instant at, String ... hosts) { - Collection> timeseries = new ArrayList<>(); + private Collection> timeseriesAt(int countPerHost, Instant at, String ... hosts) { + Collection> timeseries = new ArrayList<>(); for (int i = 1; i <= countPerHost; i++) { for (String host : hosts) - timeseries.add(new Pair<>(host, new NodeMetricSnapshot(at, + timeseries.add(new Pair<>(host, new MetricSnapshot(at, i * 0.1, i * 0.2, i * 0.4, i % 100, - true, - false, - 0.0))); + true, + false, + 0.0))); } return timeseries; } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java index e8cfe6a2310..1b531fd3237 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java @@ -16,7 +16,8 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Cluster; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot; +import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; +import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; @@ -74,14 +75,14 @@ public class AutoscalingMaintainerTester { NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId); for (int i = 0; i < count; i++) { for (Node node : nodes) - metricsDb.addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(clock().instant(), - cpu, - mem, - disk, - generation, - true, - true, - 0.0)))); + metricsDb.add(List.of(new Pair<>(node.hostname(), new MetricSnapshot(clock().instant(), + cpu, + mem, + disk, + generation, + true, + true, + 0.0)))); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java index 5af787092d5..e99f7740c29 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java @@ -4,7 +4,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.NodeResources; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot; +import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.autoscale.MetricsV2MetricsFetcher; import com.yahoo.vespa.hosted.provision.autoscale.NodeTimeseries; @@ -49,9 +49,9 @@ public class NodeMetricsDbMaintainerTest { List timeseriesList = db.getNodeTimeseries(Duration.ofDays(1), Set.of("host-1.yahoo.com", "host-2.yahoo.com")); assertEquals(2, timeseriesList.size()); - List allSnapshots = timeseriesList.stream() - .flatMap(timeseries -> timeseries.asList().stream()) - .collect(Collectors.toList()); + List allSnapshots = timeseriesList.stream() + .flatMap(timeseries -> timeseries.asList().stream()) + .collect(Collectors.toList()); assertTrue(allSnapshots.stream().anyMatch(snapshot -> snapshot.inService())); assertTrue(allSnapshots.stream().anyMatch(snapshot -> ! snapshot.inService())); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index 88d39e887d3..d5b7903b94c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -17,7 +17,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Cluster; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot; +import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.autoscale.Resource; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; @@ -74,7 +74,7 @@ public class ScalingSuggestionsMaintainerTest { assertEquals("14 nodes with [vcpu: 6.9, memory: 5.1 Gb, disk 15.0 Gb, bandwidth: 0.1 Gbps]", suggestionOf(app1, cluster1, tester).get().resources().toString()); - assertEquals("9 nodes with [vcpu: 13.8, memory: 4.0 Gb, disk 10.3 Gb, bandwidth: 0.1 Gbps]", + assertEquals("8 nodes with [vcpu: 14.7, memory: 4.0 Gb, disk 11.8 Gb, bandwidth: 0.1 Gbps]", suggestionOf(app2, cluster2, tester).get().resources().toString()); // Utilization goes way down @@ -125,14 +125,14 @@ public class ScalingSuggestionsMaintainerTest { NodeList nodes = nodeRepository.nodes().list(Node.State.active).owner(applicationId); for (int i = 0; i < count; i++) { for (Node node : nodes) - db.addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(nodeRepository.clock().instant(), - cpu, - memory, - disk, - generation, - true, - true, - 0.0)))); + db.add(List.of(new Pair<>(node.hostname(), new MetricSnapshot(nodeRepository.clock().instant(), + cpu, + memory, + disk, + generation, + true, + true, + 0.0)))); } } -- cgit v1.2.3