diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2023-08-16 23:39:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-16 23:39:04 +0200 |
commit | 8d26be9da9d22a950ff52c3642cfd3b05ca331c0 (patch) | |
tree | f334708a7b76b57807ba7d8a3605ea0b00d93420 | |
parent | 1f8213997718c25942c38402202ae9e51572d89f (diff) | |
parent | 081bb95fd3f35bd97051c8738112774d8fab67b0 (diff) |
Merge pull request #28069 from vespa-engine/bratseth/dont-scale-down-from-bcpinfov8.213.13
Don't use BCPInfo to scale down
2 files changed, 35 insertions, 21 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 40b0bd8d88b..8976dd9ff08 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -272,16 +272,15 @@ public class ClusterModel { private Load adjustQueryDependentIdealLoadByBcpGroupInfo(Load ideal) { double currentClusterTotalVcpuPerGroup = nodes.not().retired().first().get().resources().vcpu() * groupSize(); - double targetQueryRateToHandle = ( canRescaleWithinBcpDeadline() ? averageQueryRate().orElse(0) : cluster.bcpGroupInfo().queryRate() ) * cluster.bcpGroupInfo().growthRateHeadroom() * trafficShiftHeadroom(); - double neededTotalVcpPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() + + double neededTotalVcpuPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() + ( 1 - cpu.queryFraction()) * cpu.idealLoad() * (clusterSpec.type().isContainer() ? 1 : groupSize()); - - double cpuAdjustment = neededTotalVcpPerGroup / currentClusterTotalVcpuPerGroup; - return ideal.withCpu(peakLoad().cpu() / cpuAdjustment); + // Max 1: Only use bcp group info if it indicates that we need to scale *up* + double cpuAdjustment = Math.max(1.0, neededTotalVcpuPerGroup / currentClusterTotalVcpuPerGroup); + return ideal.withCpu(ideal.cpu() / cpuAdjustment); } private boolean hasScaledIn(Duration period) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java index 379dbb27d87..be7bc3c44a8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java @@ -32,7 +32,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 4.0, 7.4, 29.0, + 8, 1, 3.4, 7.4, 29.0, fixture.autoscale()); // Higher query rate @@ -40,7 +40,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 8.0, 7.4, 29.0, + 8, 1, 6.8, 7.4, 29.0, fixture.autoscale()); // Higher headroom @@ -48,7 +48,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 4.8, 7.4, 29.0, + 8, 1, 4.0, 7.4, 29.0, fixture.autoscale()); // Higher per query cost @@ -56,7 +56,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 6.0, 7.4, 29.0, + 8, 1, 5.1, 7.4, 29.0, fixture.autoscale()); // Bcp elsewhere is 0 - use local only @@ -85,7 +85,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 10.5, 43.2, 190.0, + 3, 3, 11.7, 43.2, 190.0, fixture.autoscale()); // Higher query rate @@ -93,7 +93,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 20.9, 43.2, 190.0, + 3, 3, 23.1, 43.2, 190.0, fixture.autoscale()); // Higher headroom @@ -101,7 +101,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 12.4, 43.2, 190.0, + 3, 3, 13.8, 43.2, 190.0, fixture.autoscale()); // Higher per query cost @@ -109,7 +109,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 15.7, 43.2, 190.0, + 3, 3, 17.4, 43.2, 190.0, fixture.autoscale()); } @@ -127,7 +127,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 4.0, 16.0, 40.8, + 4, 1, 8.0, 16.0, 40.8, fixture.autoscale()); // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share @@ -135,7 +135,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 8.0, 16.0, 40.8, + 7, 1, 8.0, 16.0, 40.8, fixture.autoscale()); // Higher headroom @@ -143,7 +143,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 5, 1, 8.0, 16.0, 40.8, + 8, 1, 4.0, 16.0, 40.8, fixture.autoscale()); // Higher per query cost @@ -151,7 +151,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 6, 1, 8.0, 16.0, 40.8, + 10, 1, 4.0, 16.0, 40.8, fixture.autoscale()); } @@ -173,7 +173,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("No need for traffic shift headroom", - 2, 1, 2.0, 16.0, 40.8, + 3, 1, 4.0, 16.0, 40.8, fixture.autoscale()); } @@ -186,7 +186,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.3, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 14.2, 7.4, 29.0, + 8, 1, 11.9, 7.4, 29.0, fixture.autoscale()); // Some local traffic @@ -196,7 +196,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration1.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 10.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 6.9, 7.4, 29.0, + 8, 1, 6.8, 7.4, 29.0, fixture.autoscale()); // Enough local traffic to get half the votes @@ -206,7 +206,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration2.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 50.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 2.9, 7.4, 29.0, + 8, 1, 3.0, 7.4, 29.0, fixture.autoscale()); // Mostly local @@ -270,6 +270,21 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.autoscale()); } + @Test + public void test_autoscaling_containers_with_some_local_traffic() { + var fixture = DynamicProvisioningTester.fixture().clusterType(ClusterSpec.Type.container).awsProdSetup(true).build(); + + // Some local traffic + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200, 1.9, 0.01)); + Duration duration1 = fixture.loader().addCpuMeasurements(0.58f, 10); + fixture.tester().clock().advance(duration1.negated()); + fixture.loader().addQueryRateMeasurements(10, __ -> 10.0); + fixture.tester().assertResources("Not scaling down due to group info, even though it contains much evidence queries are cheap", + 3, 1, 4.0, 16.0, 40.8, + fixture.autoscale()); + } + /** Tests with varying BCP group info parameters. */ @Test public void test_autoscaling_metrics() { |