aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2023-08-16 23:39:04 +0200
committerGitHub <noreply@github.com>2023-08-16 23:39:04 +0200
commit8d26be9da9d22a950ff52c3642cfd3b05ca331c0 (patch)
treef334708a7b76b57807ba7d8a3605ea0b00d93420
parent1f8213997718c25942c38402202ae9e51572d89f (diff)
parent081bb95fd3f35bd97051c8738112774d8fab67b0 (diff)
Merge pull request #28069 from vespa-engine/bratseth/dont-scale-down-from-bcpinfov8.213.13
Don't use BCPInfo to scale down
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java9
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java47
2 files changed, 35 insertions, 21 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 40b0bd8d88b..8976dd9ff08 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -272,16 +272,15 @@ public class ClusterModel {
private Load adjustQueryDependentIdealLoadByBcpGroupInfo(Load ideal) {
double currentClusterTotalVcpuPerGroup = nodes.not().retired().first().get().resources().vcpu() * groupSize();
-
double targetQueryRateToHandle = ( canRescaleWithinBcpDeadline() ? averageQueryRate().orElse(0)
: cluster.bcpGroupInfo().queryRate() )
* cluster.bcpGroupInfo().growthRateHeadroom() * trafficShiftHeadroom();
- double neededTotalVcpPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() +
+ double neededTotalVcpuPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() +
( 1 - cpu.queryFraction()) * cpu.idealLoad() *
(clusterSpec.type().isContainer() ? 1 : groupSize());
-
- double cpuAdjustment = neededTotalVcpPerGroup / currentClusterTotalVcpuPerGroup;
- return ideal.withCpu(peakLoad().cpu() / cpuAdjustment);
+ // Max 1: Only use bcp group info if it indicates that we need to scale *up*
+ double cpuAdjustment = Math.max(1.0, neededTotalVcpuPerGroup / currentClusterTotalVcpuPerGroup);
+ return ideal.withCpu(ideal.cpu() / cpuAdjustment);
}
private boolean hasScaledIn(Duration period) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
index 379dbb27d87..be7bc3c44a8 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
@@ -32,7 +32,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 4.0, 7.4, 29.0,
+ 8, 1, 3.4, 7.4, 29.0,
fixture.autoscale());
// Higher query rate
@@ -40,7 +40,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 8.0, 7.4, 29.0,
+ 8, 1, 6.8, 7.4, 29.0,
fixture.autoscale());
// Higher headroom
@@ -48,7 +48,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 4.8, 7.4, 29.0,
+ 8, 1, 4.0, 7.4, 29.0,
fixture.autoscale());
// Higher per query cost
@@ -56,7 +56,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 6.0, 7.4, 29.0,
+ 8, 1, 5.1, 7.4, 29.0,
fixture.autoscale());
// Bcp elsewhere is 0 - use local only
@@ -85,7 +85,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 10.5, 43.2, 190.0,
+ 3, 3, 11.7, 43.2, 190.0,
fixture.autoscale());
// Higher query rate
@@ -93,7 +93,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 20.9, 43.2, 190.0,
+ 3, 3, 23.1, 43.2, 190.0,
fixture.autoscale());
// Higher headroom
@@ -101,7 +101,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 12.4, 43.2, 190.0,
+ 3, 3, 13.8, 43.2, 190.0,
fixture.autoscale());
// Higher per query cost
@@ -109,7 +109,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 15.7, 43.2, 190.0,
+ 3, 3, 17.4, 43.2, 190.0,
fixture.autoscale());
}
@@ -127,7 +127,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 4.0, 16.0, 40.8,
+ 4, 1, 8.0, 16.0, 40.8,
fixture.autoscale());
// Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share
@@ -135,7 +135,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 8.0, 16.0, 40.8,
+ 7, 1, 8.0, 16.0, 40.8,
fixture.autoscale());
// Higher headroom
@@ -143,7 +143,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 5, 1, 8.0, 16.0, 40.8,
+ 8, 1, 4.0, 16.0, 40.8,
fixture.autoscale());
// Higher per query cost
@@ -151,7 +151,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 6, 1, 8.0, 16.0, 40.8,
+ 10, 1, 4.0, 16.0, 40.8,
fixture.autoscale());
}
@@ -173,7 +173,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("No need for traffic shift headroom",
- 2, 1, 2.0, 16.0, 40.8,
+ 3, 1, 4.0, 16.0, 40.8,
fixture.autoscale());
}
@@ -186,7 +186,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 14.2, 7.4, 29.0,
+ 8, 1, 11.9, 7.4, 29.0,
fixture.autoscale());
// Some local traffic
@@ -196,7 +196,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration1.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 10.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 6.9, 7.4, 29.0,
+ 8, 1, 6.8, 7.4, 29.0,
fixture.autoscale());
// Enough local traffic to get half the votes
@@ -206,7 +206,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration2.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 50.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 2.9, 7.4, 29.0,
+ 8, 1, 3.0, 7.4, 29.0,
fixture.autoscale());
// Mostly local
@@ -270,6 +270,21 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.autoscale());
}
+ @Test
+ public void test_autoscaling_containers_with_some_local_traffic() {
+ var fixture = DynamicProvisioningTester.fixture().clusterType(ClusterSpec.Type.container).awsProdSetup(true).build();
+
+ // Some local traffic
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200, 1.9, 0.01));
+ Duration duration1 = fixture.loader().addCpuMeasurements(0.58f, 10);
+ fixture.tester().clock().advance(duration1.negated());
+ fixture.loader().addQueryRateMeasurements(10, __ -> 10.0);
+ fixture.tester().assertResources("Not scaling down due to group info, even though it contains much evidence queries are cheap",
+ 3, 1, 4.0, 16.0, 40.8,
+ fixture.autoscale());
+ }
+
/** Tests with varying BCP group info parameters. */
@Test
public void test_autoscaling_metrics() {