aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2022-12-12 15:32:12 +0100
committerGitHub <noreply@github.com>2022-12-12 15:32:12 +0100
commit6cba335640ae35e45f87d7566bc339ef6eb2c235 (patch)
tree1e2b60412b39f9399ae5c441e938652381928a05
parent9cb4add47c48a264b9d204f59ca73d3082c2b490 (diff)
parent2d393d038aab7b2b438f04cf01e6202e0090a4ea (diff)
Merge pull request #25216 from vespa-engine/bratseth/autoscaling-noise
Downweight traffic headroom when traffic is low
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java30
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java39
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java28
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java37
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json2
10 files changed, 106 insertions, 59 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 0facc6d37ea..1928a784763 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -57,6 +57,7 @@ public class ClusterModel {
// Lazily initialized members
private Double queryFractionOfMax = null;
private Double maxQueryGrowthRate = null;
+ private OptionalDouble averageQueryRate = null;
public ClusterModel(Zone zone,
Application application,
@@ -131,19 +132,25 @@ public class ClusterModel {
/**
* Returns the predicted max query growth rate per minute as a fraction of the average traffic
- * in the scaling window
+ * in the scaling window.
*/
public double maxQueryGrowthRate() {
if (maxQueryGrowthRate != null) return maxQueryGrowthRate;
return maxQueryGrowthRate = clusterTimeseries().maxQueryGrowthRate(scalingDuration(), clock);
}
- /** Returns the average query rate in the scaling window as a fraction of the max observed query rate */
+ /** Returns the average query rate in the scaling window as a fraction of the max observed query rate. */
public double queryFractionOfMax() {
if (queryFractionOfMax != null) return queryFractionOfMax;
return queryFractionOfMax = clusterTimeseries().queryFractionOfMax(scalingDuration(), clock);
}
+ /** Returns the average query rate in the scaling window. */
+ public OptionalDouble averageQueryRate() {
+ if (averageQueryRate != null) return averageQueryRate;
+ return averageQueryRate = clusterTimeseries().queryRate(scalingDuration(), clock);
+ }
+
/** Returns the average of the last load measurement from each node. */
public Load currentLoad() { return nodeTimeseries().currentLoad(); }
@@ -239,7 +246,8 @@ public class ClusterModel {
// Cap headroom at 10% above the historical observed peak
if (queryFractionOfMax() != 0)
growthRateHeadroom = Math.min(growthRateHeadroom, 1 / queryFractionOfMax() + 0.1);
- return growthRateHeadroom;
+
+ return adjustByConfidence(growthRateHeadroom);
}
/**
@@ -255,15 +263,23 @@ public class ClusterModel {
trafficShiftHeadroom = 1/application.status().maxReadShare();
else
trafficShiftHeadroom = application.status().maxReadShare() / application.status().currentReadShare();
- return Math.min(trafficShiftHeadroom, 1/application.status().maxReadShare());
+ return adjustByConfidence(Math.min(trafficShiftHeadroom, 1/application.status().maxReadShare()));
+ }
+
+ /**
+ * Headroom values are a multiplier of the current query rate.
+ * Adjust this value closer to 1 if the query rate is too low to derive statistical conclusions
+ * with high confidence to avoid large adjustments caused by random noise due to low traffic numbers.
+ */
+ private double adjustByConfidence(double headroom) {
+ return ( (headroom -1 ) * Math.min(1, averageQueryRate().orElse(0) / 100.0) ) + 1;
}
/** The estimated fraction of cpu usage which goes to processing queries vs. writes */
public double queryCpuFraction() {
- OptionalDouble queryRate = clusterTimeseries().queryRate(scalingDuration(), clock);
OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock);
- if (queryRate.orElse(0) == 0 && writeRate.orElse(0) == 0) return queryCpuFraction(0.5);
- return queryCpuFraction(queryRate.orElse(0) / (queryRate.orElse(0) + writeRate.orElse(0)));
+ if (averageQueryRate().orElse(0) == 0 && writeRate.orElse(0) == 0) return queryCpuFraction(0.5);
+ return queryCpuFraction(averageQueryRate().orElse(0) / (averageQueryRate().orElse(0) + writeRate.orElse(0)));
}
private double queryCpuFraction(double queryRateFraction) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
index 40bad7022d6..1a3ac17c7ef 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
@@ -74,7 +74,8 @@ public class GroupPreparer {
public PrepareResult prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes,
List<Node> surplusActiveNodes, NodeIndices indices, int wantedGroups,
NodesAndHosts<LockedNodeList> allNodesAndHosts) {
- log.log(Level.FINE, () -> "Preparing " + cluster.type().name() + " " + cluster.id() + " with requested resources " + requestedNodes.resources().orElse(NodeResources.unspecified()));
+ log.log(Level.FINE, () -> "Preparing " + cluster.type().name() + " " + cluster.id() + " with requested resources " +
+ requestedNodes.resources().orElse(NodeResources.unspecified()));
// Try preparing in memory without global unallocated lock. Most of the time there should be no changes,
// and we can return nodes previously allocated.
NodeAllocation probeAllocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index eda677c6e59..f6c393a6f4d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -457,7 +457,7 @@ public class AutoscalingTest {
fixture.tester().clock().advance(Duration.ofDays(2));
Duration timePassed = fixture.loader().addCpuMeasurements(0.25, 120);
fixture.tester().clock().advance(timePassed.negated());
- fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 1.0);
+ fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 200.0 : 100.0, t -> 10.0);
fixture.tester().assertResources("Scaling up cpu, others down, changing to 1 group is cheaper",
8, 1, 2.8, 36.2, 56.4,
fixture.autoscale());
@@ -496,7 +496,7 @@ public class AutoscalingTest {
fixture.tester().clock().advance(Duration.ofDays(1));
fixture.loader().applyMemLoad(1.0, 1000);
fixture.tester().assertResources("Increase group size to reduce memory load",
- 8, 2, 4.5, 97.1, 74.7,
+ 8, 2, 13.9, 97.1, 66.6,
fixture.autoscale());
}
@@ -564,7 +564,7 @@ public class AutoscalingTest {
var fixture = AutoscalingTester.fixture().awsProdSetup(true).build();
fixture.tester().clock().advance(Duration.ofDays(2));
- Duration timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 0.0);
+ Duration timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 200.0 : 100.0, t -> 0.0);
fixture.tester.clock().advance(timeAdded.negated());
fixture.loader().addCpuMeasurements(0.25, 200);
@@ -574,17 +574,17 @@ public class AutoscalingTest {
fixture.setScalingDuration(Duration.ofMinutes(5));
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.loader().addLoadMeasurements(100, t -> 10.0 + (t < 50 ? t : 100 - t), t -> 0.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100, t -> 100.0 + (t < 50 ? t : 100 - t), t -> 0.0);
fixture.tester.clock().advance(timeAdded.negated());
fixture.loader().addCpuMeasurements(0.25, 200);
fixture.tester().assertResources("Scale down since observed growth is slower than scaling time",
- 5, 1, 2.2, 13.3, 83.2,
+ 5, 1, 2.1, 13.3, 83.2,
fixture.autoscale());
fixture.setScalingDuration(Duration.ofMinutes(60));
fixture.tester().clock().advance(Duration.ofDays(2));
timeAdded = fixture.loader().addLoadMeasurements(100,
- t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49)),
+ t -> 100.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49)),
t -> 0.0);
fixture.tester.clock().advance(timeAdded.negated());
fixture.loader().addCpuMeasurements(0.25, 200);
@@ -594,6 +594,23 @@ public class AutoscalingTest {
}
@Test
+ public void test_autoscaling_weights_growth_rate_by_confidence() {
+ var fixture = AutoscalingTester.fixture().awsProdSetup(true).build();
+
+ double scalingFactor = 1.0/6000; // To make the average query rate low
+ fixture.setScalingDuration(Duration.ofMinutes(60));
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ Duration timeAdded = fixture.loader().addLoadMeasurements(100,
+ t -> scalingFactor * (100.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49))),
+ t -> 0.0);
+ fixture.tester.clock().advance(timeAdded.negated());
+ fixture.loader().addCpuMeasurements(0.7, 200);
+ fixture.tester().assertResources("Scale up slightly since observed growth is faster than scaling time, but we are not confident",
+ 5, 1, 2.1, 13.3, 83.2,
+ fixture.autoscale());
+ }
+
+ @Test
public void test_autoscaling_considers_query_vs_write_rate() {
var fixture = AutoscalingTester.fixture().awsProdSetup(true).build();
@@ -603,7 +620,7 @@ public class AutoscalingTest {
// This makes headroom for queries doubling, which we want to observe the effect of here
fixture.tester().clock().advance(Duration.ofDays(2));
- var timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0);
+ var timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 200.0 : 100.0, t -> 100.0);
fixture.tester.clock().advance(timeAdded.negated());
fixture.loader().addCpuMeasurements(0.4, 200);
fixture.tester.assertResources("Query and write load is equal -> scale up somewhat",
@@ -611,7 +628,7 @@ public class AutoscalingTest {
fixture.autoscale());
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 800.0 : 400.0, t -> 100.0);
fixture.tester.clock().advance(timeAdded.negated());
fixture.loader().addCpuMeasurements(0.4, 200);
// TODO: Ackhually, we scale down here - why?
@@ -620,7 +637,7 @@ public class AutoscalingTest {
fixture.autoscale());
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 200.0 : 100.0, t -> 1000.0);
fixture.tester.clock().advance(timeAdded.negated());
fixture.loader().addCpuMeasurements(0.4, 200);
fixture.tester().assertResources("Write load is 10x query load -> scale down",
@@ -628,7 +645,7 @@ public class AutoscalingTest {
fixture.autoscale());
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 200.0 : 100.0, t-> 0.0);
fixture.tester.clock().advance(timeAdded.negated());
fixture.loader().addCpuMeasurements(0.4, 200);
fixture.tester().assertResources("Query only -> largest possible",
@@ -636,7 +653,7 @@ public class AutoscalingTest {
fixture.autoscale());
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.loader().addLoadMeasurements(100, t -> 0.0, t -> 10.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100, t -> 0.0, t -> 100.0);
fixture.tester.clock().advance(timeAdded.negated());
fixture.loader().addCpuMeasurements(0.4, 200);
fixture.tester().assertResources("Write only -> smallest possible",
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java
index b38dbfc55ae..ed00134af55 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java
@@ -41,31 +41,41 @@ public class ClusterModelTest {
public void test_traffic_headroom() {
// No current traffic share: Ideal load is low but capped
var model1 = clusterModel(new Status(0.0, 1.0),
- t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
- assertEquals(0.37067209775967414, model1.idealLoad().cpu(), delta);
+ t -> t == 0 ? 10000.0 : 100.0, t -> 0.0);
+ assertEquals(0.32653061224489793, model1.idealLoad().cpu(), delta);
// Almost no current traffic share: Ideal load is low but capped
var model2 = clusterModel(new Status(0.0001, 1.0),
- t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
- assertEquals(0.37067209775967414, model2.idealLoad().cpu(), delta);
+ t -> t == 0 ? 10000.0 : 100.0, t -> 0.0);
+ assertEquals(0.32653061224489793, model2.idealLoad().cpu(), delta);
+
+ // Almost no traffic: Headroom impact is reduced due to uncertainty
+ var model3 = clusterModel(new Status(0.0001, 1.0),
+ t -> t == 0 ? 10000.0 : 1.0, t -> 0.0);
+ assertEquals(0.6465952717720751, model3.idealLoad().cpu(), delta);
}
@Test
public void test_growth_headroom() {
// No traffic data: Ideal load assumes 2 regions
var model1 = clusterModel(new Status(0.0, 0.0),
- t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
- assertEquals(0.2240325865580448, model1.idealLoad().cpu(), delta);
+ t -> t == 0 ? 10000.0 : 100.0, t -> 0.0);
+ assertEquals(0.16326530612244897, model1.idealLoad().cpu(), delta);
// No traffic: Ideal load is higher since we now know there is only one zone
var model2 = clusterModel(new Status(0.0, 1.0),
- t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
- assertEquals(0.37067209775967414, model2.idealLoad().cpu(), delta);
+ t -> t == 0 ? 10000.0 : 100.0, t -> 0.0);
+ assertEquals(0.32653061224489793, model2.idealLoad().cpu(), delta);
// Almost no current traffic: Similar number as above
var model3 = clusterModel(new Status(0.0001, 1.0),
- t -> t == 0 ? 10000.0 : 0.0001, t -> 0.0);
+ t -> t == 0 ? 10000.0 : 100.0, t -> 0.0);
assertEquals(0.32653061224489793, model3.idealLoad().cpu(), delta);
+
+ // Low query rate: Impact of growth headroom is reduced due to uncertainty
+ var model4 = clusterModel(new Status(0.0001, 1.0),
+ t -> t == 0 ? 100.0 : 1.0, t -> 0.0);
+ assertEquals(0.6465952717720751, model4.idealLoad().cpu(), delta);
}
private ClusterModel clusterModelWithNoData() {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
index 9158262b134..10c8c7434b1 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
@@ -82,13 +82,13 @@ public class Loader {
public void applyCpuLoad(double cpuLoad, int measurements) {
addCpuMeasurements((float)cpuLoad, measurements);
fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
- addQueryRateMeasurements(measurements, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ addQueryRateMeasurements(measurements, t -> t == 0 ? 200.0 : 100.0); // Query traffic only
}
public void applyMemLoad(double memLoad, int measurements) {
addMemMeasurements(memLoad, measurements);
fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
- addQueryRateMeasurements(measurements, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ addQueryRateMeasurements(measurements, t -> t == 0 ? 200.0 : 100.0); // Query traffic only
}
/**
@@ -140,13 +140,13 @@ public class Loader {
public void applyLoad(Load load, int measurements) {
addMeasurements(load, measurements);
fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
- addQueryRateMeasurements(measurements, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ addQueryRateMeasurements(measurements, t -> t == 0 ? 200.0 : 100.0); // Query traffic only
}
public void applyLoad(Load load, int generation, boolean inService, boolean stable, int measurements) {
addMeasurements(load, generation, inService, stable, measurements);
fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
- addQueryRateMeasurements(measurements, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ addQueryRateMeasurements(measurements, t -> t == 0 ? 200.0 : 100.0); // Query traffic only
}
public Duration addQueryRateMeasurements(int measurements, IntFunction<Double> queryRate) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
index 5ceb28d3fed..214d842e4bb 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
@@ -70,8 +70,8 @@ public class AutoscalingMaintainerTest {
assertTrue(tester.deployer().lastDeployTime(app1).isEmpty());
assertTrue(tester.deployer().lastDeployTime(app2).isEmpty());
- tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1);
- tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app2);
+ tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1, cluster1.id());
+ tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app2, cluster2.id());
tester.clock().advance(Duration.ofMinutes(10));
tester.maintainer().maintain();
@@ -93,7 +93,7 @@ public class AutoscalingMaintainerTest {
tester.deploy(app1, cluster1, app1Capacity);
// Measure overload
- tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1);
+ tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1, cluster1.id());
// Causes autoscaling
tester.clock().advance(Duration.ofMinutes(10));
@@ -110,24 +110,24 @@ public class AutoscalingMaintainerTest {
assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(1).at().toEpochMilli());
// Measure overload still, since change is not applied, but metrics are discarded
- tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1);
+ tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1, cluster1.id());
tester.maintainer().maintain();
assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
// Measure underload, but no autoscaling since we still haven't measured we're on the new config generation
- tester.addMeasurements(0.1f, 0.1f, 0.1f, 0, 500, app1);
+ tester.addMeasurements(0.1f, 0.1f, 0.1f, 0, 500, app1, cluster1.id());
tester.maintainer().maintain();
assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
// Add measurement of the expected generation, leading to rescaling
// - record scaling completion
tester.clock().advance(Duration.ofMinutes(5));
- tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 1, app1);
+ tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 1, app1, cluster1.id());
tester.maintainer().maintain();
assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
// - measure underload
tester.clock().advance(Duration.ofDays(4)); // Exit cooling period
- tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 500, app1);
+ tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 500, app1, cluster1.id());
Instant lastMaintenanceTime = tester.clock().instant();
tester.maintainer().maintain();
assertEquals(lastMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
@@ -161,16 +161,16 @@ public class AutoscalingMaintainerTest {
Duration samplePeriod = Duration.ofSeconds(150);
for (int i = 0; i < 20; i++) {
// Record completion to keep scaling window at minimum
- tester.addMeasurements(0.1f, 0.1f, 0.1f, i, 1, app1);
+ tester.addMeasurements(0.1f, 0.1f, 0.1f, i, 1, app1, cluster1.id());
tester.maintainer().maintain();
tester.clock().advance(Duration.ofDays(1));
if (i % 2 == 0) { // high load
- tester.addMeasurements(0.99f, 0.99f, 0.99f, i, measurements, app1);
+ tester.addMeasurements(0.99f, 0.99f, 0.99f, i, measurements, app1, cluster1.id());
}
else { // low load
- tester.addMeasurements(0.2f, 0.2f, 0.2f, i, measurements, app1);
+ tester.addMeasurements(0.2f, 0.2f, 0.2f, i, measurements, app1, cluster1.id());
}
tester.clock().advance(samplePeriod.negated().multipliedBy(measurements));
tester.addQueryRateMeasurements(app1, cluster1.id(), measurements, t -> (t == 0 ? 20.0 : 10.0 ));
@@ -180,7 +180,7 @@ public class AutoscalingMaintainerTest {
assertEquals(Cluster.maxScalingEvents, tester.cluster(app1, cluster1).scalingEvents().size());
// Complete last event
- tester.addMeasurements(0.1f, 0.1f, 0.1f, 20, 1, app1);
+ tester.addMeasurements(0.1f, 0.1f, 0.1f, 20, 1, app1, cluster1.id());
tester.maintainer().maintain();
assertEquals("Last event is completed",
tester.clock().instant(),
@@ -202,7 +202,6 @@ public class AutoscalingMaintainerTest {
autoscale(false, Duration.ofMinutes( 1), Duration.ofMinutes( 5), clock, app1, cluster1, tester);
autoscale( true, Duration.ofMinutes(19), Duration.ofMinutes(10), clock, app1, cluster1, tester);
- autoscale( true, Duration.ofMinutes(40), Duration.ofMinutes(20), clock, app1, cluster1, tester);
}
@Test
@@ -217,21 +216,21 @@ public class AutoscalingMaintainerTest {
// Add a scaling event
tester.deploy(app1, cluster1, capacity);
- tester.addMeasurements(1.0f, 0.3f, 0.3f, 0, 4, app1);
+ tester.addMeasurements(1.0f, 0.3f, 0.3f, 0, 4, app1, cluster1.id());
tester.maintainer().maintain();
assertEquals("Scale up: " + tester.cluster(app1, cluster1).autoscalingStatus(),
1,
tester.cluster(app1, cluster1).lastScalingEvent().get().generation());
// measurements with outdated generation are ignored -> no autoscaling
- var duration = tester.addMeasurements(3.0f, 0.3f, 0.3f, 0, 2, app1);
+ var duration = tester.addMeasurements(3.0f, 0.3f, 0.3f, 0, 2, app1, cluster1.id());
tester.maintainer().maintain();
assertEquals("Measurements with outdated generation are ignored -> no autoscaling",
1,
tester.cluster(app1, cluster1).lastScalingEvent().get().generation());
tester.clock().advance(duration.negated());
- duration = tester.addMeasurements(3.0f, 0.3f, 0.3f, 1, 2, app1);
+ duration = tester.addMeasurements(3.0f, 0.3f, 0.3f, 1, 2, app1, cluster1.id());
tester.maintainer().maintain();
assertEquals("Measurements right after generation change are ignored -> no autoscaling",
1,
@@ -242,7 +241,7 @@ public class AutoscalingMaintainerTest {
tester.clock().advance(ClusterModel.warmupDuration.plus(Duration.ofMinutes(1)));
tester.nodeRepository().nodes().list().owner(app1).asList().forEach(node -> recordRestart(node, tester.nodeRepository()));
- duration = tester.addMeasurements(3.0f, 0.3f, 0.3f, 1, 2, app1);
+ duration = tester.addMeasurements(3.0f, 0.3f, 0.3f, 1, 2, app1, cluster1.id());
tester.maintainer().maintain();
assertEquals("Measurements right after restart are ignored -> no autoscaling",
1,
@@ -250,7 +249,7 @@ public class AutoscalingMaintainerTest {
tester.clock().advance(duration.negated());
tester.clock().advance(ClusterModel.warmupDuration.plus(Duration.ofMinutes(1)));
- tester.addMeasurements(3.0f, 0.3f, 0.3f, 1, 2, app1);
+ tester.addMeasurements(3.0f, 0.3f, 0.3f, 1, 2, app1, cluster1.id());
tester.maintainer().maintain();
assertEquals("We have valid measurements -> scale up",
2,
@@ -310,7 +309,7 @@ public class AutoscalingMaintainerTest {
clock.advance(completionTime);
float load = down ? 0.1f : 1.0f;
- tester.addMeasurements(load, load, load, generation, 1, application);
+ tester.addMeasurements(load, load, load, generation, 1, application, cluster.id());
tester.maintainer().maintain();
assertEvent("Measured completion of the last scaling event, but no new autoscaling yet",
generation, Optional.of(clock.instant()),
@@ -320,7 +319,7 @@ public class AutoscalingMaintainerTest {
else
clock.advance(expectedWindow.minus(completionTime));
- tester.addMeasurements(load, load, load, generation, 200, application);
+ tester.addMeasurements(load, load, load, generation, 200, application, cluster.id());
tester.maintainer().maintain();
assertEquals("We passed window duration so a new autoscaling is started: " +
tester.cluster(application, cluster).autoscalingStatus(),
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
index d921af9543e..95e36787219 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
@@ -71,7 +71,8 @@ public class AutoscalingMaintainerTester {
return provisioningTester.deploy(application, cluster, capacity);
}
- public Duration addMeasurements(float cpu, float mem, float disk, long generation, int count, ApplicationId applicationId) {
+ public Duration addMeasurements(float cpu, float mem, float disk, long generation, int count,
+ ApplicationId applicationId, ClusterSpec.Id clusterId) {
NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
Instant startTime = clock().instant();
for (int i = 0; i < count; i++) {
@@ -85,7 +86,10 @@ public class AutoscalingMaintainerTester {
0.0))));
clock().advance(Duration.ofSeconds(150));
}
- return Duration.between(startTime, clock().instant());
+ var totalDuration = Duration.between(startTime, clock().instant());
+ clock().advance(totalDuration.negated());
+ addQueryRateMeasurements(applicationId, clusterId, count, t -> 100.0);
+ return totalDuration;
}
/** Creates the given number of measurements, spaced 5 minutes between, using the given function */
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
index b43baf444c8..f5ab822721f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
@@ -70,9 +70,9 @@ public class ScalingSuggestionsMaintainerTest {
new TestMetric());
maintainer.maintain();
- assertEquals("13 nodes with [vcpu: 5.5, memory: 4.5 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]",
+ assertEquals("8 nodes with [vcpu: 3.2, memory: 4.5 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]",
suggestionOf(app1, cluster1, tester).get().resources().toString());
- assertEquals("8 nodes with [vcpu: 11.0, memory: 4.4 Gb, disk 11.8 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]",
+ assertEquals("8 nodes with [vcpu: 3.6, memory: 4.4 Gb, disk 11.8 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]",
suggestionOf(app2, cluster2, tester).get().resources().toString());
// Utilization goes way down
@@ -80,14 +80,14 @@ public class ScalingSuggestionsMaintainerTest {
addMeasurements(0.10f, 0.10f, 0.10f, 0, 500, app1, tester.nodeRepository());
maintainer.maintain();
assertEquals("Suggestion stays at the peak value observed",
- "13 nodes with [vcpu: 5.5, memory: 4.5 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]",
+ "8 nodes with [vcpu: 3.2, memory: 4.5 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]",
suggestionOf(app1, cluster1, tester).get().resources().toString());
// Utilization is still way down and a week has passed
tester.clock().advance(Duration.ofDays(7));
addMeasurements(0.10f, 0.10f, 0.10f, 0, 500, app1, tester.nodeRepository());
maintainer.maintain();
assertEquals("Peak suggestion has been outdated",
- "5 nodes with [vcpu: 1.8, memory: 4.0 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]",
+ "3 nodes with [vcpu: 1.2, memory: 4.0 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]",
suggestionOf(app1, cluster1, tester).get().resources().toString());
assertTrue(shouldSuggest(app1, cluster1, tester));
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
index 6adcb1199eb..0d640f7e3b2 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
@@ -71,7 +71,7 @@
},
"utilization" : {
"cpu" : 0.0,
- "idealCpu": 0.1375,
+ "idealCpu": 0.40750000000000003,
"currentCpu": 0.0,
"peakCpu": 0.0,
"memory" : 0.0,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
index 5babf5fc843..80da118f620 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
@@ -45,7 +45,7 @@
},
"utilization" : {
"cpu" : 0.0,
- "idealCpu": 0.1394913986537023,
+ "idealCpu": 0.42670157068062825,
"currentCpu": 0.0,
"peakCpu": 0.0,
"memory" : 0.0,