diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2023-02-22 14:52:41 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-22 14:52:41 +0100 |
commit | 890e0ac9e795ca1c95e459f98a54593ac151051c (patch) | |
tree | 29d14a90b3c9f4fd6e52fe608bf017cf778dd9d0 | |
parent | a55889ca8ba1f12b60e3e03813823483f8673c5f (diff) | |
parent | a26eb016dd18519ce8d51779702618105f812c9a (diff) |
Merge pull request #26140 from vespa-engine/bratseth/autoscaling-cpucost
Test and make more robust
2 files changed, 64 insertions, 25 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 4a4222cca6a..4f262fb8105 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -216,7 +216,7 @@ public class ClusterModel { public Instant at() { return at;} private OptionalDouble cpuCostPerQuery() { - if (averageQueryRate().isEmpty()) return OptionalDouble.empty(); + if (averageQueryRate().isEmpty() || averageQueryRate().getAsDouble() == 0.0) return OptionalDouble.empty(); // TODO: Query rate should generally be sampled at the time where we see the peak resource usage int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize(); return OptionalDouble.of(peakLoad().cpu() * queryCpuFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu() diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java index 5cef4baadd4..bd3589be9dd 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java @@ -11,6 +11,8 @@ import org.junit.Test; import java.time.Duration; import java.util.Optional; +import static org.junit.Assert.assertEquals; + /** * Tests autoscaling using information from the BCP group this cluster deployment * is part of to supplement local data when the local deployment lacks sufficient traffic. @@ -28,7 +30,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 3.6, 6.1, 25.3, + 9, 1, 3.6, 6.1, 25.3, fixture.autoscale()); // Higher query rate @@ -36,7 +38,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 7.1, 6.1, 25.3, + 9, 1, 7.1, 6.1, 25.3, fixture.autoscale()); // Higher headroom @@ -44,7 +46,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 4.2, 6.1, 25.3, + 9, 1, 4.2, 6.1, 25.3, fixture.autoscale()); // Higher per query cost @@ -52,7 +54,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 5.4, 6.1, 25.3, + 9, 1, 5.4, 6.1, 25.3, fixture.autoscale()); // Bcp elsewhere is 0 - use local only @@ -60,7 +62,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(0, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling using local info", - 8, 1, 1, 7.0, 29.0, + 8, 1, 1, 7.0, 29.0, fixture.autoscale()); } @@ -81,7 +83,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 10.5, 41.0, 168.9, + 3, 3, 10.5, 41.0, 168.9, fixture.autoscale()); // Higher query rate @@ -89,7 +91,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 20.9, 41.0, 168.9, + 3, 3, 20.9, 41.0, 168.9, fixture.autoscale()); // Higher headroom @@ -97,7 +99,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 12.4, 41.0, 168.9, + 3, 3, 12.4, 41.0, 168.9, fixture.autoscale()); // Higher per query cost @@ -105,7 +107,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 15.7, 41.0, 168.9, + 3, 3, 15.7, 41.0, 168.9, fixture.autoscale()); } @@ -123,7 +125,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 4.0, 16.0, 40.8, + 8, 1, 4.0, 16.0, 40.8, fixture.autoscale()); // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share @@ -131,7 +133,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 8.0, 16.0, 40.8, + 8, 1, 8.0, 16.0, 40.8, fixture.autoscale()); // Higher headroom @@ -139,7 +141,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 5, 1, 8.0, 16.0, 40.8, + 5, 1, 8.0, 16.0, 40.8, fixture.autoscale()); // Higher per query cost @@ -147,7 +149,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 6, 1, 8.0, 16.0, 40.8, + 6, 1, 8.0, 16.0, 40.8, fixture.autoscale()); } @@ -160,7 +162,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.3, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 14.2, 7.0, 29.0, + 8, 1, 14.2, 7.0, 29.0, fixture.autoscale()); // Some local traffic @@ -170,7 +172,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration1.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 10.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 6.9, 7.0, 29.0, + 8, 1, 6.9, 7.0, 29.0, fixture.autoscale()); // Enough local traffic to get half the votes @@ -180,7 +182,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration2.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 50.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 2.7, 6.1, 25.3, + 9, 1, 2.7, 6.1, 25.3, fixture.autoscale()); // Mostly local @@ -190,7 +192,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration3.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 90.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 2.1, 6.1, 25.3, + 9, 1, 2.1, 6.1, 25.3, fixture.autoscale()); // Local only @@ -200,7 +202,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration4.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 100.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 2.0, 6.1, 25.3, + 9, 1, 2.0, 6.1, 25.3, fixture.autoscale()); // No group info, should be the same as the above @@ -210,7 +212,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration5.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 100.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 2.0, 6.1, 25.3, + 9, 1, 2.0, 6.1, 25.3, fixture.autoscale()); // 40 query rate, no group info (for reference to the below) @@ -220,28 +222,65 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration6.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 1.4, 6.1, 25.3, + 9, 1, 1.4, 6.1, 25.3, fixture.autoscale()); // Local query rate is too low but global is even lower so disregard it, giving the same as above fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.store(new BcpGroupInfo(200/40.0, 1.3, 0.45*40.0)); + fixture.store(new BcpGroupInfo(200 / 40.0, 1.3, 0.45 * 40.0)); Duration duration7 = fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().clock().advance(duration7.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 1.4, 6.1, 25.3, + 9, 1, 1.4, 6.1, 25.3, fixture.autoscale()); // Local query rate is too low to be fully confident, and so is global but as it is slightly larger, incorporate it slightly fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.store(new BcpGroupInfo(200/4.0, 1.3, 0.45*4.0)); + fixture.store(new BcpGroupInfo(200 / 4.0, 1.3, 0.45 * 4.0)); Duration duration8 = fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().clock().advance(duration8.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 1.8, 6.1, 25.3, + 9, 1, 1.8, 6.1, 25.3, fixture.autoscale()); } + /** Tests with varying BCP group info parameters. */ + @Test + public void test_autoscaling_metrics() { + var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build(); + + // Empty has metrics at zero + assertEquals(new Autoscaling.Metrics(0, 0, 0), + fixture.autoscale().metrics()); + + + // No external load mesurements -> 0 + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.loader().addCpuMeasurements(0.7f, 10); + assertEquals(new Autoscaling.Metrics(0, 1.0, 0), + fixture.autoscale().metrics()); + + // External load is measured to zero -> 0 + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.loader().addQueryRateMeasurements(10, i -> 0.0); + assertEquals(new Autoscaling.Metrics(0, 1.0, 0), + fixture.autoscale().metrics()); + + // External load + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.loader().addQueryRateMeasurements(10, i -> 110.0); + assertEquals(new Autoscaling.Metrics(110, 1.1, 0.05), + round(fixture.autoscale().metrics())); + } + + private Autoscaling.Metrics round(Autoscaling.Metrics metrics) { + return new Autoscaling.Metrics(Math.round(metrics.queryRate() * 100) / 100.0, + Math.round(metrics.growthRateHeadroom() * 100) / 100.0, + Math.round(metrics.cpuCostPerQuery() * 100) / 100.0); + } + } |