diff options
author | Jon Bratseth <bratseth@gmail.com> | 2023-02-13 12:23:55 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2023-02-13 12:23:55 +0100 |
commit | 0ba8b0001cee9ae1aad8fbdfac863a79da212d1c (patch) | |
tree | 2903f5356a7efa3646b1bce6c16e000286335f06 /node-repository/src/test/java | |
parent | fa526bcc311ae6080905b61fb9248aca82aa4991 (diff) |
Cold autoscaling WIP
Core support for autoscaling using data from cluster deployments in other regions
when there is little or no traffic in our own deployment.
Diffstat (limited to 'node-repository/src/test/java')
5 files changed, 254 insertions, 5 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 05d0822758d..19c6ce16674 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; -import com.yahoo.config.provision.Cloud; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Flavor; @@ -135,6 +134,7 @@ class AutoscalingTester { cluster.required(), cluster.suggested(), cluster.target(), + cluster.bcpGroupInfo(), List.of()); // Remove scaling events cluster = cluster.with(ScalingEvent.create(cluster.minResources(), cluster.minResources(), 0, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java new file mode 100644 index 00000000000..0bd94872557 --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java @@ -0,0 +1,238 @@ +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.ClusterResources; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo; +import org.junit.Test; + +import java.time.Duration; +import java.util.Optional; + +/** + * Tests autoscaling using information from the BCP group this cluster deployment + * is part of to supplement local data when the local deployment lacks sufficient traffic. + * + * @author bratseth + */ +public class AutoscalingUsingBcpGroupInfoTest { + + /** Tests with varying BCP group info parameters. */ + @Test + public void test_autoscaling_single_content_group() { + var fixture = AutoscalingTester.fixture().awsProdSetup(true).build(); + + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 8, 1, 4.0, 7.6, 37.8, + fixture.autoscale()); + + // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 8, 1, 8.0, 7.4, 32.8, + fixture.autoscale()); + + // Higher headroom + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 9, 1, 4.2, 6.6, 33.1, + fixture.autoscale()); + + // Higher per query cost + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 9, 1, 5.4, 6.6, 33.1, + fixture.autoscale()); + } + + /** Tests with varying BCP group info parameters. */ + @Test + public void test_autoscaling_multiple_content_groups() { + var min = new ClusterResources(3, 3, + new NodeResources(1, 4, 10, 1, NodeResources.DiskSpeed.any)); + var max = new ClusterResources(21, 3, + new NodeResources(100, 1000, 1000, 1, NodeResources.DiskSpeed.any)); + var fixture = AutoscalingTester.fixture() + .awsProdSetup(true) + .initialResources(Optional.of(new ClusterResources(9, 3, new NodeResources(2, 16, 75, 1)))) + .capacity(Capacity.from(min, max)) + .build(); + + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 3, 3, 10.5, 42.3, 187.0, + fixture.autoscale()); + + // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 3, 3, 20.9, 42.3, 178.0, + fixture.autoscale()); + + // Higher headroom + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 3, 3, 12.4, 42.3, 187.0, + fixture.autoscale()); + + // Higher per query cost + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 3, 3, 15.7, 42.3, 187.0, + fixture.autoscale()); + } + + /** + * Tests with varying BCP group info parameters for containers. + * Differences from content + * - No host sharing. + * - Memory and disk is independent of cluster size. + */ + @Test + public void test_autoscaling_container() { + var fixture = AutoscalingTester.fixture().clusterType(ClusterSpec.Type.container).awsProdSetup(true).build(); + + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 8, 1, 4.0, 16.0, 40.8, + fixture.autoscale()); + + // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 8, 1, 8.0, 16.0, 40.8, + fixture.autoscale()); + + // Higher headroom + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 5, 1, 8.0, 16.0, 40.8, + fixture.autoscale()); + + // Higher per query cost + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 6, 1, 8.0, 16.0, 40.8, + fixture.autoscale()); + } + + @Test + public void test_autoscaling_single_content_group_with_some_local_traffic() { + var fixture = AutoscalingTester.fixture().awsProdSetup(true).build(); + + // Baseline: No local traffic, group traffic indicates much higher cpu usage than local + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200, 1.3, 0.45)); + fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 8, 1, 14.2, 7.4, 32.8, + fixture.autoscale()); + + // Some local traffic + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200, 1.3, 0.45)); + Duration duration1 = fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().clock().advance(duration1.negated()); + fixture.loader().addQueryRateMeasurements(10, __ -> 10.0); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 8, 1, 6.9, 7.6, 37.8, + fixture.autoscale()); + + // Enough local traffic to get half the votes + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200, 1.3, 0.45)); + Duration duration2 = fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().clock().advance(duration2.negated()); + fixture.loader().addQueryRateMeasurements(10, __ -> 50.0); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 7, 1, 3.5, 8.9, 55.5, + fixture.autoscale()); + + // Mostly local + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200, 1.3, 0.45)); + Duration duration3 = fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().clock().advance(duration3.negated()); + fixture.loader().addQueryRateMeasurements(10, __ -> 90.0); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 7, 1, 2.7, 8.9, 55.5, + fixture.autoscale()); + + // Local only + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200, 1.3, 0.45)); + Duration duration4 = fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().clock().advance(duration4.negated()); + fixture.loader().addQueryRateMeasurements(10, __ -> 100.0); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 7, 1, 2.6, 8.9, 55.5, + fixture.autoscale()); + + // No group info, should be the same as the above + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(BcpGroupInfo.empty()); + Duration duration5 = fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().clock().advance(duration5.negated()); + fixture.loader().addQueryRateMeasurements(10, __ -> 100.0); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 7, 1, 2.6, 8.9, 55.5, + fixture.autoscale()); + + // 40 query rate, no group info (for reference to the below) + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(BcpGroupInfo.empty()); + Duration duration6 = fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().clock().advance(duration6.negated()); + fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 6, 1, 2.2, 10.6, 66.5, + fixture.autoscale()); + + // Local query rate is too low but global is even lower so disregard it, giving the same as above + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200/40.0, 1.3, 0.45*40.0)); + Duration duration7 = fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().clock().advance(duration7.negated()); + fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 6, 1, 2.2, 10.6, 66.5, + fixture.autoscale()); + + // Local query rate is too low to be fully confident, and so is global but as it is slightly larger, incorporate it slightly + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200/4.0, 1.3, 0.45*4.0)); + Duration duration8 = fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().clock().advance(duration8.negated()); + fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); + fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", + 7, 1, 2.2, 8.9, 55.5, + fixture.autoscale()); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java index 1e1e00a10db..5caf50a4e83 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java @@ -21,6 +21,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo; import com.yahoo.vespa.hosted.provision.autoscale.awsnodes.AwsHostResourcesCalculatorImpl; import com.yahoo.vespa.hosted.provision.autoscale.awsnodes.AwsNodeTypes; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; @@ -132,6 +133,12 @@ public class Fixture { tester.nodeRepository().applications().put(application, tester.nodeRepository().applications().lock(applicationId)); } + public void store(BcpGroupInfo bcpGroupInfo) { + var application = application(); + application = application.with(application.cluster(clusterId()).get().with(bcpGroupInfo)); + tester.nodeRepository().applications().put(application, tester.nodeRepository().applications().lock(applicationId)); + } + public static class Builder { ApplicationId application = AutoscalingTester.applicationId("application1"); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java index 10c8c7434b1..a104f0b1bc8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java @@ -79,10 +79,10 @@ public class Loader { return Duration.between(initialTime, fixture.tester().clock().instant()); } - public void applyCpuLoad(double cpuLoad, int measurements) { - addCpuMeasurements((float)cpuLoad, measurements); - fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); - addQueryRateMeasurements(measurements, t -> t == 0 ? 200.0 : 100.0); // Query traffic only + public Duration applyCpuLoad(double cpuLoad, int measurements) { + Duration duration = addCpuMeasurements((float)cpuLoad, measurements); + fixture.tester().clock().advance(duration.negated()); + return addQueryRateMeasurements(measurements, t -> t == 0 ? 200.0 : 100.0); // Query traffic only } public void applyMemLoad(double memLoad, int measurements) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java index bce10b999bb..c8dc0d97320 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java @@ -8,6 +8,7 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.applications.Status; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; @@ -39,6 +40,7 @@ public class ApplicationSerializerTest { true, Autoscaling.empty(), Autoscaling.empty(), + BcpGroupInfo.empty(), List.of())); var minResources = new NodeResources(1, 2, 3, 4); clusters.add(new Cluster(ClusterSpec.Id.from("c2"), @@ -61,6 +63,7 @@ public class ApplicationSerializerTest { Instant.ofEpochMilli(5678L), Load.zero(), Load.one()), + new BcpGroupInfo(0.1, 0.2, 0.3), List.of(new ScalingEvent(new ClusterResources(10, 5, minResources), new ClusterResources(12, 6, minResources), 7L, @@ -90,6 +93,7 @@ public class ApplicationSerializerTest { assertEquals(originalCluster.required(), serializedCluster.required()); assertEquals(originalCluster.suggested(), serializedCluster.suggested()); assertEquals(originalCluster.target(), serializedCluster.target()); + assertEquals(originalCluster.bcpGroupInfo(), serializedCluster.bcpGroupInfo()); assertEquals(originalCluster.scalingEvents(), serializedCluster.scalingEvents()); } } |