aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-12-06 16:03:42 +0100
committerJon Bratseth <bratseth@gmail.com>2021-12-06 16:03:42 +0100
commitb361a711b50e456831775e64642e0a7cb30b9d1c (patch)
treeddb5a9665abdc219666d72a8fdd9dfd60ee0409a /node-repository
parent76a3908ea1d3475d36a7b2d15b867a18d7952862 (diff)
Ignore warmup period
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/AutoscalingStatus.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java30
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java15
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java35
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java104
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java63
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java39
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java5
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java23
13 files changed, 213 insertions, 124 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/AutoscalingStatus.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/AutoscalingStatus.java
index f44785cfab3..71a6d661594 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/AutoscalingStatus.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/AutoscalingStatus.java
@@ -1,8 +1,6 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.applications;
-import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler;
-
import java.util.Objects;
/**
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index dcfb8fb7246..80c192f8353 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -212,7 +212,7 @@ public class Autoscaler {
@Override
public String toString() {
return "autoscaling advice: " +
- (present ? (target.isPresent() ? "Scale to " + target.get() : "Don't scale") : " None");
+ (present ? (target.isPresent() ? "Scale to " + target.get() : "Don't scale") : "None");
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 1001ab83cc0..e1e670c5b01 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -24,7 +24,10 @@ public class ClusterModel {
private static final Logger log = Logger.getLogger(ClusterModel.class.getName());
- private static final Duration CURRENT_LOAD_DURATION = Duration.ofMinutes(5);
+ /** Containers typically use more cpu right after generation change, so discard those metrics */
+ public static final Duration warmupDuration = Duration.ofSeconds(90);
+
+ private static final Duration currentLoadDuration = Duration.ofMinutes(5);
static final double idealQueryCpuLoad = 0.8;
static final double idealWriteCpuLoad = 0.95;
@@ -100,8 +103,8 @@ public class ClusterModel {
return queryFractionOfMax = clusterTimeseries().queryFractionOfMax(scalingDuration(), clock);
}
- /** Returns average load during the last {@link #CURRENT_LOAD_DURATION} */
- public Load currentLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(CURRENT_LOAD_DURATION)); }
+ /** Returns average load during the last {@link #currentLoadDuration} */
+ public Load currentLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(currentLoadDuration)); }
/** Returns average load during the last {@link #scalingDuration()} */
public Load averageLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(scalingDuration())); }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
index f5657966e5f..5ad4ef2e263 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
@@ -10,6 +10,8 @@ import java.util.List;
import java.util.function.Predicate;
import java.util.stream.Collectors;
+import static com.yahoo.vespa.hosted.provision.autoscale.ClusterModel.warmupDuration;
+
/**
* A series of metric snapshots for the nodes of a cluster used to compute load
*
@@ -24,13 +26,18 @@ public class ClusterNodesTimeseries {
public ClusterNodesTimeseries(Duration period, Cluster cluster, NodeList clusterNodes, MetricsDb db) {
this.clusterNodes = clusterNodes;
- var timeseries = db.getNodeTimeseries(period, clusterNodes);
-
- if (cluster.lastScalingEvent().isPresent())
- timeseries = filter(timeseries, snapshot -> snapshot.generation() < 0 || // Content nodes do not yet send generation
- snapshot.generation() >= cluster.lastScalingEvent().get().generation());
- timeseries = filter(timeseries, snapshot -> snapshot.inService() && snapshot.stable());
+ // See warmupSeconds*4 into the past to see any generation change in it
+ // If none can be detected we assume the node is new/was down.
+ // If either this is the case, or there is a generation change, we ignore
+ // the first warmupWindow metrics
+ var timeseries = db.getNodeTimeseries(period.plus(warmupDuration.multipliedBy(4)), clusterNodes);
+ if (cluster.lastScalingEvent().isPresent()) {
+ long currentGeneration = cluster.lastScalingEvent().get().generation();
+ timeseries = keepCurrentGenerationAfterWarmup(timeseries, currentGeneration);
+ }
+ timeseries = keep(timeseries, snapshot -> snapshot.inService() && snapshot.stable());
+ timeseries = keep(timeseries, snapshot -> ! snapshot.at().isBefore(db.clock().instant().minus(period)));
this.timeseries = timeseries;
}
@@ -62,8 +69,15 @@ public class ClusterNodesTimeseries {
return total.divide(count);
}
- private List<NodeTimeseries> filter(List<NodeTimeseries> timeseries, Predicate<NodeMetricSnapshot> filter) {
- return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.filter(filter)).collect(Collectors.toList());
+ private static List<NodeTimeseries> keep(List<NodeTimeseries> timeseries, Predicate<NodeMetricSnapshot> filter) {
+ return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.keep(filter)).collect(Collectors.toList());
+ }
+
+ private static List<NodeTimeseries> keepCurrentGenerationAfterWarmup(List<NodeTimeseries> timeseries,
+ long currentGeneration) {
+ return timeseries.stream()
+ .map(nodeTimeseries -> nodeTimeseries.keepCurrentGenerationAfterWarmup(currentGeneration))
+ .collect(Collectors.toList());
}
public static ClusterNodesTimeseries empty() {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
index 96896bb1ba0..131873b0137 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
@@ -47,16 +47,19 @@ public class ClusterTimeseries {
/**
* The max query growth rate we can predict from this time-series as a fraction of the average traffic in the window
+ *
+ * @return the predicted max growth of the query rate, per minute as a fraction of the current load
*/
public double maxQueryGrowthRate(Duration window, Clock clock) {
if (snapshots.isEmpty()) return 0.1;
// Find the period having the highest growth rate, where total growth exceeds 30% increase
- double maxGrowthRate = 0; // In query rate per minute
+ double maxGrowthRate = 0; // In query rate growth per second (to get good resolution)
+
for (int start = 0; start < snapshots.size(); start++) {
if (start > 0) { // Optimization: Skip this point when starting from the previous is better relative to the best rate so far
Duration duration = durationBetween(start - 1, start);
- if (duration.toMinutes() != 0) {
- double growthRate = (queryRateAt(start - 1) - queryRateAt(start)) / duration.toMinutes();
+ if (duration.toSeconds() != 0) {
+ double growthRate = (queryRateAt(start - 1) - queryRateAt(start)) / duration.toSeconds();
if (growthRate >= maxGrowthRate)
continue;
}
@@ -64,8 +67,8 @@ public class ClusterTimeseries {
for (int end = start + 1; end < snapshots.size(); end++) {
if (queryRateAt(end) >= queryRateAt(start) * 1.3) {
Duration duration = durationBetween(start, end);
- if (duration.toMinutes() == 0) continue;
- double growthRate = (queryRateAt(end) - queryRateAt(start)) / duration.toMinutes();
+ if (duration.toSeconds() == 0) continue;
+ double growthRate = (queryRateAt(end) - queryRateAt(start)) / duration.toSeconds();
if (growthRate > maxGrowthRate)
maxGrowthRate = growthRate;
}
@@ -79,7 +82,7 @@ public class ClusterTimeseries {
}
OptionalDouble queryRate = queryRate(window, clock);
if (queryRate.orElse(0) == 0) return 0.1; // Growth not expressible as a fraction of the current rate
- return maxGrowthRate / queryRate.getAsDouble();
+ return maxGrowthRate * 60 / queryRate.getAsDouble();
}
/**
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java
index d9544b334ea..9eefd4e60b7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java
@@ -4,8 +4,6 @@ package com.yahoo.vespa.hosted.provision.autoscale;
import com.yahoo.collections.Pair;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterSpec;
-import com.yahoo.vespa.hosted.provision.Node;
-import com.yahoo.vespa.hosted.provision.NodeRepository;
import java.time.Clock;
import java.time.Duration;
@@ -15,7 +13,6 @@ import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
@@ -73,10 +70,10 @@ public class MemoryMetricsDb implements MetricsDb {
Instant startTime = clock().instant().minus(period);
synchronized (lock) {
if (hostnames.isEmpty())
- return nodeTimeseries.values().stream().map(ns -> ns.justAfter(startTime)).collect(Collectors.toList());
+ return nodeTimeseries.values().stream().map(ns -> ns.keepAfter(startTime)).collect(Collectors.toList());
else
return hostnames.stream()
- .map(hostname -> nodeTimeseries.getOrDefault(hostname, new NodeTimeseries(hostname, List.of())).justAfter(startTime))
+ .map(hostname -> nodeTimeseries.getOrDefault(hostname, new NodeTimeseries(hostname, List.of())).keepAfter(startTime))
.collect(Collectors.toList());
}
}
@@ -94,7 +91,7 @@ public class MemoryMetricsDb implements MetricsDb {
// 12 hours with 1k nodes and 3 resources and 1 measurement/sec is about 5Gb
for (String hostname : nodeTimeseries.keySet()) {
var timeseries = nodeTimeseries.get(hostname);
- timeseries = timeseries.justAfter(clock().instant().minus(Autoscaler.maxScalingWindow()));
+ timeseries = timeseries.keepAfter(clock().instant().minus(Autoscaler.maxScalingWindow()));
if (timeseries.isEmpty())
nodeTimeseries.remove(hostname);
else
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
index 864df9a16c4..4a5f8972e11 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
@@ -9,6 +9,8 @@ import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;
+import static com.yahoo.vespa.hosted.provision.autoscale.ClusterModel.warmupDuration;
+
/**
* A list of metric snapshots from a node, sorted by increasing time (newest last).
*
@@ -48,15 +50,42 @@ public class NodeTimeseries {
return new NodeTimeseries(hostname(), list);
}
- public NodeTimeseries filter(Predicate<NodeMetricSnapshot> filter) {
- return new NodeTimeseries(hostname, snapshots.stream().filter(filter).collect(Collectors.toList()));
+ /** Returns the instant this changed to the given generation, or empty if no *change* to this generation is present */
+ private Optional<Instant> generationChange(long targetGeneration) {
+ if (snapshots.isEmpty()) return Optional.empty();
+ if (snapshots.get(0).generation() == targetGeneration) return Optional.of(snapshots.get(0).at());
+ for (NodeMetricSnapshot snapshot : snapshots) {
+ if (snapshot.generation() == targetGeneration)
+ return Optional.of(snapshot.at());
+ }
+ return Optional.empty();
+ }
+
+ public NodeTimeseries keep(Predicate<NodeMetricSnapshot> filter) {
+ return new NodeTimeseries(hostname, snapshots.stream()
+ .filter(snapshot -> filter.test(snapshot))
+ .collect(Collectors.toList()));
}
- public NodeTimeseries justAfter(Instant oldestTime) {
+ public NodeTimeseries keepAfter(Instant oldestTime) {
return new NodeTimeseries(hostname,
snapshots.stream()
.filter(snapshot -> snapshot.at().equals(oldestTime) || snapshot.at().isAfter(oldestTime))
.collect(Collectors.toList()));
}
+ public NodeTimeseries keepCurrentGenerationAfterWarmup(long currentGeneration) {
+ Optional<Instant> generationChange = generationChange(currentGeneration);
+ return keep(snapshot -> isOnCurrentGenerationAfterWarmup(snapshot, currentGeneration, generationChange));
+ }
+
+ private boolean isOnCurrentGenerationAfterWarmup(NodeMetricSnapshot snapshot,
+ long currentGeneration,
+ Optional<Instant> generationChange) {
+ if (snapshot.generation() < 0) return true; // Content nodes do not yet send generation
+ if (snapshot.generation() < currentGeneration) return false;
+ if (generationChange.isEmpty()) return true;
+ return ! snapshot.at().isBefore(generationChange.get().plus(warmupDuration));
+ }
+
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java
index b7f9dcb8e8a..8027afc665b 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java
@@ -58,7 +58,6 @@ public class NodeListMicroBenchmarkTest {
nodeList.childrenOf(nodes.get(indexes.get(i)));
}
Duration duration = Duration.between(start, Instant.now());
- System.out.println("Calling NodeList.childrenOf took " + duration + " (" + duration.toNanos() / iterations / 1000 + " microseconds per invocation)");
}
private List<Node> createHosts() {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index 7ade2cdf8c4..16017036e87 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -15,7 +15,6 @@ import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.Zone;
-import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.Nodelike;
import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies;
@@ -63,7 +62,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high",
15, 1, 1.2, 28.6, 28.6,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.deploy(application1, cluster1, scaledResources);
assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1, capacity).isEmpty());
@@ -87,7 +86,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling down to minimum since usage has gone down significantly",
7, 1, 1.0, 66.7, 66.7,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
var events = tester.nodeRepository().applications().get(application1).get().cluster(cluster1.id()).get().scalingEvents();
}
@@ -111,7 +110,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high",
7, 1, 2.5, 80.0, 80.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.deploy(application1, cluster1, scaledResources);
tester.deactivateRetired(application1, cluster1, scaledResources);
@@ -121,7 +120,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling down since cpu usage has gone down",
4, 1, 2.5, 68.6, 68.6,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -148,7 +147,7 @@ public class AutoscalingTest {
var capacity = Capacity.from(min, max);
ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high",
14, 1, 1.4, 30.8, 30.8,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
assertEquals("Disk speed from min/max is used",
NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed());
tester.deploy(application1, cluster1, scaledResources);
@@ -206,7 +205,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up to limit since resource usage is too high",
6, 1, 2.4, 78.0, 79.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -225,7 +224,7 @@ public class AutoscalingTest {
tester.addMeasurements(0.05f, 0.05f, 0.05f, 0, 120, application1);
tester.assertResources("Scaling down to limit since resource usage is low",
4, 1, 1.8, 7.7, 10.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -246,13 +245,14 @@ public class AutoscalingTest {
tester.deploy(application1, cluster1, Capacity.from(min, max));
tester.assertResources("Min number of nodes and default resources",
2, 1, defaultResources,
- Optional.of(tester.nodeRepository().nodes().list().owner(application1).toResources()));
+ tester.nodeRepository().nodes().list().owner(application1).toResources());
tester.addMeasurements(0.25f, 0.95f, 0.95f, 0, 120, application1);
tester.clock().advance(Duration.ofMinutes(-10 * 5));
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up to limit since resource usage is too high",
- 4, 1, defaultResources,
- tester.autoscale(application1, cluster1, capacity).target());
+ 4, 1,
+ defaultResources.vcpu(), defaultResources.memoryGb(), defaultResources.diskGb(),
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -273,7 +273,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up since resource usage is too high",
6, 6, 3.6, 8.0, 10.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -315,15 +315,15 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 3, 1, min.nodeResources());
- tester.addDiskMeasurements(0.01f, 1f, 120, application1);
- tester.clock().advance(Duration.ofMinutes(-10 * 5));
+ Duration timeAdded = tester.addDiskMeasurements(0.01f, 1f, 120, application1);
+ tester.clock().advance(timeAdded.negated());
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> 10.0); // Query traffic only
- Optional<ClusterResources> suggestion = tester.suggest(application1, cluster1.id(), min, max).target();
+ Autoscaler.Advice suggestion = tester.suggest(application1, cluster1.id(), min, max);
tester.assertResources("Choosing the remote disk flavor as it has less disk",
6, 1, 3.0, 100.0, 10.0,
suggestion);
assertEquals("Choosing the remote disk flavor as it has less disk",
- StorageType.remote, suggestion.get().nodeResources().storageType());
+ StorageType.remote, suggestion.target().get().nodeResources().storageType());
}
@Test
@@ -343,7 +343,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up since resource usage is too high",
7, 1, 2.5, 80.0, 80.0,
- tester.suggest(application1, cluster1.id(), min, max).target());
+ tester.suggest(application1, cluster1.id(), min, max));
}
@Test
@@ -400,7 +400,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up since resource usage is too high",
7, 7, 2.5, 80.0, 80.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -423,7 +423,7 @@ public class AutoscalingTest {
t -> 1.0);
tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper",
8, 1, 2.6, 83.3, 83.3,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
/** Same as above but mostly write traffic, which favors smaller groups */
@@ -447,7 +447,7 @@ public class AutoscalingTest {
t -> 100.0);
tester.assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper",
4, 1, 2.1, 83.3, 83.3,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -469,7 +469,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Increase group size to reduce memory load",
8, 2, 12.4, 96.2, 62.5,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -490,7 +490,7 @@ public class AutoscalingTest {
tester.addMemMeasurements(0.02f, 0.95f, 120, application1);
tester.assertResources("Scaling down",
6, 1, 2.9, 4.0, 95.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -519,7 +519,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling down",
6, 1, 1.4, 4.0, 95.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -543,7 +543,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up",
4, 1, 6.7, 20.5, 200,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
{ // 15 Gb memory tax
@@ -560,7 +560,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up",
4, 1, 6.7, 35.5, 200,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
}
@@ -591,7 +591,7 @@ public class AutoscalingTest {
tester.addMemMeasurements(0.9f, 0.6f, 120, application1);
ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.",
8, 1, 3, 83, 34.3,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.deploy(application1, cluster1, scaledResources);
tester.deactivateRetired(application1, cluster1, scaledResources);
@@ -602,7 +602,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling down since resource usage has gone down",
5, 1, 3, 83, 36.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -624,17 +624,17 @@ public class AutoscalingTest {
// (no read share stored)
tester.assertResources("Advice to scale up since we set aside for bcp by default",
7, 1, 3, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.storeReadShare(0.25, 0.5, application1);
tester.assertResources("Half of global share is the same as the default assumption used above",
7, 1, 3, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.storeReadShare(0.5, 0.5, application1);
tester.assertResources("Advice to scale down since we don't need room for bcp",
4, 1, 3, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@@ -652,36 +652,36 @@ public class AutoscalingTest {
ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
tester.deploy(application1, cluster1, 5, 1, midResources);
- tester.addQueryRateMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0);
- tester.clock().advance(Duration.ofMinutes(-100 * 5));
- tester.addCpuMeasurements(0.25f, 1f, 100, application1);
+ Duration timeAdded = tester.addQueryRateMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0);
+ tester.clock().advance(timeAdded.negated());
+ tester.addCpuMeasurements(0.25f, 1f, 200, application1);
// (no query rate data)
tester.assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data",
5, 1, 6.3, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(5));
- tester.addQueryRateMeasurements(application1, cluster1.id(),
- 100,
- t -> 10.0 + (t < 50 ? t : 100 - t));
- tester.clock().advance(Duration.ofMinutes(-100 * 5));
- tester.addCpuMeasurements(0.25f, 1f, 100, application1);
+ timeAdded = tester.addQueryRateMeasurements(application1, cluster1.id(),
+ 100,
+ t -> 10.0 + (t < 50 ? t : 100 - t));
+ tester.clock().advance(timeAdded.negated());
+ tester.addCpuMeasurements(0.25f, 1f, 200, application1);
tester.assertResources("Scale down since observed growth is slower than scaling time",
5, 1, 3.4, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.clearQueryRateMeasurements(application1, cluster1.id());
tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(60));
- tester.addQueryRateMeasurements(application1, cluster1.id(),
- 100,
- t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49)));
- tester.clock().advance(Duration.ofMinutes(-100 * 5));
- tester.addCpuMeasurements(0.25f, 1f, 100, application1);
+ timeAdded = tester.addQueryRateMeasurements(application1, cluster1.id(),
+ 100,
+ t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49)));
+ tester.clock().advance(timeAdded.negated());
+ tester.addCpuMeasurements(0.25f, 1f, 200, application1);
tester.assertResources("Scale up since observed growth is faster than scaling time",
5, 1, 10.0, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -708,35 +708,35 @@ public class AutoscalingTest {
tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0);
tester.assertResources("Query and write load is equal -> scale up somewhat",
5, 1, 7.3, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.addCpuMeasurements(0.4f, 1f, 100, application1);
tester.clock().advance(Duration.ofMinutes(-100 * 5));
tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0);
tester.assertResources("Query load is 4x write load -> scale up more",
5, 1, 9.5, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.addCpuMeasurements(0.3f, 1f, 100, application1);
tester.clock().advance(Duration.ofMinutes(-100 * 5));
tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0);
tester.assertResources("Write load is 10x query load -> scale down",
5, 1, 2.9, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.addCpuMeasurements(0.4f, 1f, 100, application1);
tester.clock().advance(Duration.ofMinutes(-100 * 5));
tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0);
tester.assertResources("Query only -> largest possible",
5, 1, 10.0, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.addCpuMeasurements(0.4f, 1f, 100, application1);
tester.clock().advance(Duration.ofMinutes(-100 * 5));
tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> 0.0, t -> 10.0);
tester.assertResources("Write only -> smallest possible",
5, 1, 2.1, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -776,7 +776,7 @@ public class AutoscalingTest {
tester.addCpuMeasurements(1.0f, 1f, 10, application1);
tester.assertResources("We scale up even in dev because resources are required",
3, 1, 1.0, 4, 50,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
/**
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index ededdf8fe7f..03a29b6613c 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -29,6 +29,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import java.time.Duration;
+import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -145,7 +146,7 @@ class AutoscalingTester {
NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
for (int i = 0; i < count; i++) {
- clock().advance(Duration.ofMinutes(5));
+ clock().advance(Duration.ofSeconds(150));
for (Node node : nodes) {
Load load = new Load(value,
ClusterModel.idealMemoryLoad * otherResourcesLoad,
@@ -170,13 +171,15 @@ class AutoscalingTester {
* @param otherResourcesLoad the load factor relative to ideal to use for other resources
* @param count the number of measurements
* @param applicationId the application we're adding measurements for all nodes of
+ * @return the duration added to the current time by this
*/
- public void addDiskMeasurements(float value, float otherResourcesLoad,
- int count, ApplicationId applicationId) {
+ public Duration addDiskMeasurements(float value, float otherResourcesLoad,
+ int count, ApplicationId applicationId) {
NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
+ Instant initialTime = clock().instant();
for (int i = 0; i < count; i++) {
- clock().advance(Duration.ofMinutes(5));
+ clock().advance(Duration.ofSeconds(150));
for (Node node : nodes) {
Load load = new Load(ClusterModel.idealQueryCpuLoad * otherResourcesLoad,
ClusterModel.idealDiskLoad * otherResourcesLoad,
@@ -190,6 +193,7 @@ class AutoscalingTester {
0.0))));
}
}
+ return Duration.between(initialTime, clock().instant());
}
/**
@@ -290,10 +294,11 @@ class AutoscalingTester {
}
/** Creates the given number of measurements, spaced 5 minutes between, using the given function */
- public void addQueryRateMeasurements(ApplicationId application,
- ClusterSpec.Id cluster,
- int measurements,
- IntFunction<Double> queryRate) {
+ public Duration addQueryRateMeasurements(ApplicationId application,
+ ClusterSpec.Id cluster,
+ int measurements,
+ IntFunction<Double> queryRate) {
+ Instant initialTime = clock().instant();
for (int i = 0; i < measurements; i++) {
nodeMetricsDb().addClusterMetrics(application,
Map.of(cluster, new ClusterMetricSnapshot(clock().instant(),
@@ -301,6 +306,7 @@ class AutoscalingTester {
0.0)));
clock().advance(Duration.ofMinutes(5));
}
+ return Duration.between(initialTime, clock().instant());
}
public void clearQueryRateMeasurements(ApplicationId application, ClusterSpec.Id cluster) {
@@ -329,28 +335,37 @@ class AutoscalingTester {
nodeRepository().nodes().list(Node.State.active).owner(applicationId));
}
- public ClusterResources assertResources(String message,
- int nodeCount, int groupCount,
- NodeResources expectedResources,
- Optional<ClusterResources> resources) {
- return assertResources(message, nodeCount, groupCount,
- expectedResources.vcpu(), expectedResources.memoryGb(), expectedResources.diskGb(),
- resources);
+ public void assertResources(String message,
+ int nodeCount, int groupCount,
+ NodeResources expectedResources,
+ ClusterResources resources) {
+ assertResources(message, nodeCount, groupCount,
+ expectedResources.vcpu(), expectedResources.memoryGb(), expectedResources.diskGb(),
+ resources);
}
public ClusterResources assertResources(String message,
int nodeCount, int groupCount,
double approxCpu, double approxMemory, double approxDisk,
- Optional<ClusterResources> resources) {
+ Autoscaler.Advice advice) {
+ assertTrue("Resources are present: " + message + " (" + advice + ": " + advice.reason() + ")",
+ advice.target().isPresent());
+ var resources = advice.target().get();
+ assertResources(message, nodeCount, groupCount, approxCpu, approxMemory, approxDisk, resources);
+ return resources;
+ }
+
+ public void assertResources(String message,
+ int nodeCount, int groupCount,
+ double approxCpu, double approxMemory, double approxDisk,
+ ClusterResources resources) {
double delta = 0.0000000001;
- assertTrue("Resources are present: " + message, resources.isPresent());
- NodeResources nodeResources = resources.get().nodeResources();
- assertEquals("Node count in " + resources.get() + ": " + message, nodeCount, resources.get().nodes());
- assertEquals("Group count in " + resources.get() + ": " + message, groupCount, resources.get().groups());
- assertEquals("Cpu in " + resources.get() + ": " + message, approxCpu, Math.round(nodeResources.vcpu() * 10) / 10.0, delta);
- assertEquals("Memory in " + resources.get() + ": " + message, approxMemory, Math.round(nodeResources.memoryGb() * 10) / 10.0, delta);
- assertEquals("Disk in: " + resources.get() + ": " + message, approxDisk, Math.round(nodeResources.diskGb() * 10) / 10.0, delta);
- return resources.get();
+ NodeResources nodeResources = resources.nodeResources();
+ assertEquals("Node count in " + resources + ": " + message, nodeCount, resources.nodes());
+ assertEquals("Group count in " + resources+ ": " + message, groupCount, resources.groups());
+ assertEquals("Cpu in " + resources + ": " + message, approxCpu, Math.round(nodeResources.vcpu() * 10) / 10.0, delta);
+ assertEquals("Memory in " + resources + ": " + message, approxMemory, Math.round(nodeResources.memoryGb() * 10) / 10.0, delta);
+ assertEquals("Disk in: " + resources + ": " + message, approxDisk, Math.round(nodeResources.diskGb() * 10) / 10.0, delta);
}
public ManualClock clock() {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
index 6d5677d0911..4c3274d763f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
@@ -89,11 +89,9 @@ public class AutoscalingMaintainerTest {
tester.deploy(app1, cluster1, app1Capacity);
// Measure overload
- tester.clock().advance(Duration.ofSeconds(1));
tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1);
// Causes autoscaling
- tester.clock().advance(Duration.ofSeconds(1));
tester.clock().advance(Duration.ofMinutes(10));
Instant firstMaintenanceTime = tester.clock().instant();
tester.maintainer().maintain();
@@ -108,16 +106,12 @@ public class AutoscalingMaintainerTest {
assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(1).at().toEpochMilli());
// Measure overload still, since change is not applied, but metrics are discarded
- tester.clock().advance(Duration.ofSeconds(1));
tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1);
- tester.clock().advance(Duration.ofSeconds(1));
tester.maintainer().maintain();
assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
// Measure underload, but no autoscaling since we still haven't measured we're on the new config generation
- tester.clock().advance(Duration.ofSeconds(1));
tester.addMeasurements(0.1f, 0.1f, 0.1f, 0, 500, app1);
- tester.clock().advance(Duration.ofSeconds(1));
tester.maintainer().maintain();
assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
@@ -126,8 +120,9 @@ public class AutoscalingMaintainerTest {
tester.clock().advance(Duration.ofMinutes(5));
tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 1, app1);
tester.maintainer().maintain();
+ assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
// - measure underload
- tester.clock().advance(Duration.ofHours(1));
+ tester.clock().advance(Duration.ofDays(4)); // Exit cooling period
tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 500, app1);
Instant lastMaintenanceTime = tester.clock().instant();
tester.maintainer().maintain();
@@ -204,6 +199,34 @@ public class AutoscalingMaintainerTest {
}
@Test
+ public void test_autoscaling_ignores_high_cpu_right_after_generation_change() {
+ ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1");
+ ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec();
+ NodeResources resources = new NodeResources(4, 4, 10, 1);
+ ClusterResources min = new ClusterResources(2, 1, resources);
+ ClusterResources max = new ClusterResources(20, 1, resources);
+ var capacity = Capacity.from(min, max);
+ var tester = new AutoscalingMaintainerTester(new MockDeployer.ApplicationContext(app1, cluster1, capacity));
+
+ tester.deploy(app1, cluster1, capacity);
+ // fast completion
+ tester.addMeasurements(1.0f, 0.3f, 0.3f, 0, 1, app1);
+ tester.addMeasurements(1.0f, 0.3f, 0.3f, 0, 1, app1);
+ tester.maintainer().maintain();
+ assertEquals("Scale up: " + tester.cluster(app1, cluster1).autoscalingStatus(),
+ 1,
+ tester.cluster(app1, cluster1).lastScalingEvent().get().generation());
+
+ // fast completion, with initially overloaded cpu
+ tester.addMeasurements(3.0f, 0.3f, 0.3f, 1, 1, app1);
+ tester.addMeasurements(0.2f, 0.3f, 0.3f, 1, 1, app1);
+ tester.maintainer().maintain();
+ assertEquals("No autoscaling since we ignore the (first) data point in the warup period",
+ 1,
+ tester.cluster(app1, cluster1).lastScalingEvent().get().generation());
+ }
+
+ @Test
public void test_cd_autoscaling_test() {
ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1");
ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec();
@@ -240,7 +263,7 @@ public class AutoscalingMaintainerTest {
clock.advance(completionTime);
float load = down ? 0.1f : 1.0f;
- tester.addMeasurements(load, load, load, generation, 200, application);
+ tester.addMeasurements(load, load, load, generation, 1, application);
tester.maintainer().maintain();
assertEvent("Measured completion of the last scaling event, but no new autoscaling yet",
generation, Optional.of(clock.instant()),
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
index 021ca4bdf64..e1a1a2af5fb 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
@@ -71,8 +71,9 @@ public class AutoscalingMaintainerTester {
return provisioningTester.deploy(application, cluster, capacity);
}
- public void addMeasurements(float cpu, float mem, float disk, long generation, int count, ApplicationId applicationId) {
+ public Duration addMeasurements(float cpu, float mem, float disk, long generation, int count, ApplicationId applicationId) {
NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
+ Instant startTime = clock().instant();
for (int i = 0; i < count; i++) {
for (Node node : nodes)
nodeRepository().metricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
@@ -82,7 +83,9 @@ public class AutoscalingMaintainerTester {
true,
true,
0.0))));
+ clock().advance(Duration.ofSeconds(150));
}
+ return Duration.between(startTime, clock().instant());
}
/** Creates the given number of measurements, spaced 5 minutes between, using the given function */
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
index 03b41412896..4c0395a0c7e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
@@ -13,6 +13,7 @@ import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
import com.yahoo.config.provisioning.FlavorsConfig;
+import com.yahoo.test.ManualClock;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -24,6 +25,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import org.junit.Test;
import java.time.Duration;
+import java.time.Instant;
import java.util.List;
import java.util.Optional;
@@ -41,14 +43,13 @@ public class ScalingSuggestionsMaintainerTest {
@Test
public void testScalingSuggestionsMaintainer() {
- ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east3"))).flavorsConfig(flavorsConfig()).build();
-
+ ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east3")))
+ .flavorsConfig(flavorsConfig())
+ .build();
ApplicationId app1 = ProvisioningTester.applicationId("app1");
- ClusterSpec cluster1 = ProvisioningTester.containerClusterSpec();
-
ApplicationId app2 = ProvisioningTester.applicationId("app2");
+ ClusterSpec cluster1 = ProvisioningTester.containerClusterSpec();
ClusterSpec cluster2 = ProvisioningTester.contentClusterSpec();
-
tester.makeReadyNodes(20, "flt", NodeType.host, 8);
tester.activateTenantHosts();
@@ -60,7 +61,8 @@ public class ScalingSuggestionsMaintainerTest {
false, true));
tester.clock().advance(Duration.ofHours(13));
- addMeasurements(0.90f, 0.90f, 0.90f, 0, 500, app1, tester.nodeRepository());
+ Duration timeAdded = addMeasurements(0.90f, 0.90f, 0.90f, 0, 500, app1, tester.nodeRepository());
+ tester.clock().advance(timeAdded.negated());
addMeasurements(0.99f, 0.99f, 0.99f, 0, 500, app2, tester.nodeRepository());
ScalingSuggestionsMaintainer maintainer = new ScalingSuggestionsMaintainer(tester.nodeRepository(),
@@ -114,10 +116,11 @@ public class ScalingSuggestionsMaintainerTest {
.shouldSuggestResources(currentResources);
}
- public void addMeasurements(float cpu, float memory, float disk, int generation, int count,
- ApplicationId applicationId,
- NodeRepository nodeRepository) {
+ public Duration addMeasurements(float cpu, float memory, float disk, int generation, int count,
+ ApplicationId applicationId,
+ NodeRepository nodeRepository) {
NodeList nodes = nodeRepository.nodes().list(Node.State.active).owner(applicationId);
+ Instant startTime = nodeRepository.clock().instant();
for (int i = 0; i < count; i++) {
for (Node node : nodes)
nodeRepository.metricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
@@ -127,7 +130,9 @@ public class ScalingSuggestionsMaintainerTest {
true,
true,
0.0))));
+ ((ManualClock)nodeRepository.clock()).advance(Duration.ofSeconds(150));
}
+ return Duration.between(startTime, nodeRepository.clock().instant());
}
private FlavorsConfig flavorsConfig() {