summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2023-02-22 14:52:41 +0100
committerGitHub <noreply@github.com>2023-02-22 14:52:41 +0100
commit890e0ac9e795ca1c95e459f98a54593ac151051c (patch)
tree29d14a90b3c9f4fd6e52fe608bf017cf778dd9d0
parenta55889ca8ba1f12b60e3e03813823483f8673c5f (diff)
parenta26eb016dd18519ce8d51779702618105f812c9a (diff)
Merge pull request #26140 from vespa-engine/bratseth/autoscaling-cpucost
Test and make more robust
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java87
2 files changed, 64 insertions, 25 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 4a4222cca6a..4f262fb8105 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -216,7 +216,7 @@ public class ClusterModel {
public Instant at() { return at;}
private OptionalDouble cpuCostPerQuery() {
- if (averageQueryRate().isEmpty()) return OptionalDouble.empty();
+ if (averageQueryRate().isEmpty() || averageQueryRate().getAsDouble() == 0.0) return OptionalDouble.empty();
// TODO: Query rate should generally be sampled at the time where we see the peak resource usage
int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize();
return OptionalDouble.of(peakLoad().cpu() * queryCpuFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu()
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
index 5cef4baadd4..bd3589be9dd 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
@@ -11,6 +11,8 @@ import org.junit.Test;
import java.time.Duration;
import java.util.Optional;
+import static org.junit.Assert.assertEquals;
+
/**
* Tests autoscaling using information from the BCP group this cluster deployment
* is part of to supplement local data when the local deployment lacks sufficient traffic.
@@ -28,7 +30,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 3.6, 6.1, 25.3,
+ 9, 1, 3.6, 6.1, 25.3,
fixture.autoscale());
// Higher query rate
@@ -36,7 +38,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 7.1, 6.1, 25.3,
+ 9, 1, 7.1, 6.1, 25.3,
fixture.autoscale());
// Higher headroom
@@ -44,7 +46,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 4.2, 6.1, 25.3,
+ 9, 1, 4.2, 6.1, 25.3,
fixture.autoscale());
// Higher per query cost
@@ -52,7 +54,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 5.4, 6.1, 25.3,
+ 9, 1, 5.4, 6.1, 25.3,
fixture.autoscale());
// Bcp elsewhere is 0 - use local only
@@ -60,7 +62,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(0, 1.1, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling using local info",
- 8, 1, 1, 7.0, 29.0,
+ 8, 1, 1, 7.0, 29.0,
fixture.autoscale());
}
@@ -81,7 +83,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 10.5, 41.0, 168.9,
+ 3, 3, 10.5, 41.0, 168.9,
fixture.autoscale());
// Higher query rate
@@ -89,7 +91,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 20.9, 41.0, 168.9,
+ 3, 3, 20.9, 41.0, 168.9,
fixture.autoscale());
// Higher headroom
@@ -97,7 +99,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 12.4, 41.0, 168.9,
+ 3, 3, 12.4, 41.0, 168.9,
fixture.autoscale());
// Higher per query cost
@@ -105,7 +107,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 15.7, 41.0, 168.9,
+ 3, 3, 15.7, 41.0, 168.9,
fixture.autoscale());
}
@@ -123,7 +125,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 4.0, 16.0, 40.8,
+ 8, 1, 4.0, 16.0, 40.8,
fixture.autoscale());
// Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share
@@ -131,7 +133,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 8.0, 16.0, 40.8,
+ 8, 1, 8.0, 16.0, 40.8,
fixture.autoscale());
// Higher headroom
@@ -139,7 +141,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 5, 1, 8.0, 16.0, 40.8,
+ 5, 1, 8.0, 16.0, 40.8,
fixture.autoscale());
// Higher per query cost
@@ -147,7 +149,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 6, 1, 8.0, 16.0, 40.8,
+ 6, 1, 8.0, 16.0, 40.8,
fixture.autoscale());
}
@@ -160,7 +162,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 14.2, 7.0, 29.0,
+ 8, 1, 14.2, 7.0, 29.0,
fixture.autoscale());
// Some local traffic
@@ -170,7 +172,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration1.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 10.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 6.9, 7.0, 29.0,
+ 8, 1, 6.9, 7.0, 29.0,
fixture.autoscale());
// Enough local traffic to get half the votes
@@ -180,7 +182,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration2.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 50.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 2.7, 6.1, 25.3,
+ 9, 1, 2.7, 6.1, 25.3,
fixture.autoscale());
// Mostly local
@@ -190,7 +192,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration3.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 90.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 2.1, 6.1, 25.3,
+ 9, 1, 2.1, 6.1, 25.3,
fixture.autoscale());
// Local only
@@ -200,7 +202,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration4.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 100.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 2.0, 6.1, 25.3,
+ 9, 1, 2.0, 6.1, 25.3,
fixture.autoscale());
// No group info, should be the same as the above
@@ -210,7 +212,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration5.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 100.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 2.0, 6.1, 25.3,
+ 9, 1, 2.0, 6.1, 25.3,
fixture.autoscale());
// 40 query rate, no group info (for reference to the below)
@@ -220,28 +222,65 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration6.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 40.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 1.4, 6.1, 25.3,
+ 9, 1, 1.4, 6.1, 25.3,
fixture.autoscale());
// Local query rate is too low but global is even lower so disregard it, giving the same as above
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.store(new BcpGroupInfo(200/40.0, 1.3, 0.45*40.0));
+ fixture.store(new BcpGroupInfo(200 / 40.0, 1.3, 0.45 * 40.0));
Duration duration7 = fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().clock().advance(duration7.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 40.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 1.4, 6.1, 25.3,
+ 9, 1, 1.4, 6.1, 25.3,
fixture.autoscale());
// Local query rate is too low to be fully confident, and so is global but as it is slightly larger, incorporate it slightly
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.store(new BcpGroupInfo(200/4.0, 1.3, 0.45*4.0));
+ fixture.store(new BcpGroupInfo(200 / 4.0, 1.3, 0.45 * 4.0));
Duration duration8 = fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().clock().advance(duration8.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 40.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 9, 1, 1.8, 6.1, 25.3,
+ 9, 1, 1.8, 6.1, 25.3,
fixture.autoscale());
}
+ /** Tests with varying BCP group info parameters. */
+ @Test
+ public void test_autoscaling_metrics() {
+ var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build();
+
+ // Empty has metrics at zero
+ assertEquals(new Autoscaling.Metrics(0, 0, 0),
+ fixture.autoscale().metrics());
+
+
+ // No external load mesurements -> 0
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ assertEquals(new Autoscaling.Metrics(0, 1.0, 0),
+ fixture.autoscale().metrics());
+
+ // External load is measured to zero -> 0
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.loader().addQueryRateMeasurements(10, i -> 0.0);
+ assertEquals(new Autoscaling.Metrics(0, 1.0, 0),
+ fixture.autoscale().metrics());
+
+ // External load
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.loader().addQueryRateMeasurements(10, i -> 110.0);
+ assertEquals(new Autoscaling.Metrics(110, 1.1, 0.05),
+ round(fixture.autoscale().metrics()));
+ }
+
+ private Autoscaling.Metrics round(Autoscaling.Metrics metrics) {
+ return new Autoscaling.Metrics(Math.round(metrics.queryRate() * 100) / 100.0,
+ Math.round(metrics.growthRateHeadroom() * 100) / 100.0,
+ Math.round(metrics.cpuCostPerQuery() * 100) / 100.0);
+ }
+
}