aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/test/java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-02-13 12:23:55 +0100
committerJon Bratseth <bratseth@gmail.com>2023-02-13 12:23:55 +0100
commit0ba8b0001cee9ae1aad8fbdfac863a79da212d1c (patch)
tree2903f5356a7efa3646b1bce6c16e000286335f06 /node-repository/src/test/java
parentfa526bcc311ae6080905b61fb9248aca82aa4991 (diff)
Cold autoscaling WIP
Core support for autoscaling using data from cluster deployments in other regions when there is little or no traffic in our own deployment.
Diffstat (limited to 'node-repository/src/test/java')
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java238
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java7
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java4
5 files changed, 254 insertions, 5 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index 05d0822758d..19c6ce16674 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.provision.autoscale;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Capacity;
-import com.yahoo.config.provision.Cloud;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Flavor;
@@ -135,6 +134,7 @@ class AutoscalingTester {
cluster.required(),
cluster.suggested(),
cluster.target(),
+ cluster.bcpGroupInfo(),
List.of()); // Remove scaling events
cluster = cluster.with(ScalingEvent.create(cluster.minResources(), cluster.minResources(),
0,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
new file mode 100644
index 00000000000..0bd94872557
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
@@ -0,0 +1,238 @@
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.ClusterResources;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.Optional;
+
+/**
+ * Tests autoscaling using information from the BCP group this cluster deployment
+ * is part of to supplement local data when the local deployment lacks sufficient traffic.
+ *
+ * @author bratseth
+ */
+public class AutoscalingUsingBcpGroupInfoTest {
+
+ /** Tests with varying BCP group info parameters. */
+ @Test
+ public void test_autoscaling_single_content_group() {
+ var fixture = AutoscalingTester.fixture().awsProdSetup(true).build();
+
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 8, 1, 4.0, 7.6, 37.8,
+ fixture.autoscale());
+
+ // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 8, 1, 8.0, 7.4, 32.8,
+ fixture.autoscale());
+
+ // Higher headroom
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 9, 1, 4.2, 6.6, 33.1,
+ fixture.autoscale());
+
+ // Higher per query cost
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 9, 1, 5.4, 6.6, 33.1,
+ fixture.autoscale());
+ }
+
+ /** Tests with varying BCP group info parameters. */
+ @Test
+ public void test_autoscaling_multiple_content_groups() {
+ var min = new ClusterResources(3, 3,
+ new NodeResources(1, 4, 10, 1, NodeResources.DiskSpeed.any));
+ var max = new ClusterResources(21, 3,
+ new NodeResources(100, 1000, 1000, 1, NodeResources.DiskSpeed.any));
+ var fixture = AutoscalingTester.fixture()
+ .awsProdSetup(true)
+ .initialResources(Optional.of(new ClusterResources(9, 3, new NodeResources(2, 16, 75, 1))))
+ .capacity(Capacity.from(min, max))
+ .build();
+
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 3, 3, 10.5, 42.3, 187.0,
+ fixture.autoscale());
+
+ // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 3, 3, 20.9, 42.3, 178.0,
+ fixture.autoscale());
+
+ // Higher headroom
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 3, 3, 12.4, 42.3, 187.0,
+ fixture.autoscale());
+
+ // Higher per query cost
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 3, 3, 15.7, 42.3, 187.0,
+ fixture.autoscale());
+ }
+
+ /**
+ * Tests with varying BCP group info parameters for containers.
+ * Differences from content
+ * - No host sharing.
+ * - Memory and disk is independent of cluster size.
+ */
+ @Test
+ public void test_autoscaling_container() {
+ var fixture = AutoscalingTester.fixture().clusterType(ClusterSpec.Type.container).awsProdSetup(true).build();
+
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 8, 1, 4.0, 16.0, 40.8,
+ fixture.autoscale());
+
+ // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 8, 1, 8.0, 16.0, 40.8,
+ fixture.autoscale());
+
+ // Higher headroom
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 5, 1, 8.0, 16.0, 40.8,
+ fixture.autoscale());
+
+ // Higher per query cost
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 6, 1, 8.0, 16.0, 40.8,
+ fixture.autoscale());
+ }
+
+ @Test
+ public void test_autoscaling_single_content_group_with_some_local_traffic() {
+ var fixture = AutoscalingTester.fixture().awsProdSetup(true).build();
+
+ // Baseline: No local traffic, group traffic indicates much higher cpu usage than local
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
+ fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 8, 1, 14.2, 7.4, 32.8,
+ fixture.autoscale());
+
+ // Some local traffic
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
+ Duration duration1 = fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().clock().advance(duration1.negated());
+ fixture.loader().addQueryRateMeasurements(10, __ -> 10.0);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 8, 1, 6.9, 7.6, 37.8,
+ fixture.autoscale());
+
+ // Enough local traffic to get half the votes
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
+ Duration duration2 = fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().clock().advance(duration2.negated());
+ fixture.loader().addQueryRateMeasurements(10, __ -> 50.0);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 7, 1, 3.5, 8.9, 55.5,
+ fixture.autoscale());
+
+ // Mostly local
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
+ Duration duration3 = fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().clock().advance(duration3.negated());
+ fixture.loader().addQueryRateMeasurements(10, __ -> 90.0);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 7, 1, 2.7, 8.9, 55.5,
+ fixture.autoscale());
+
+ // Local only
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
+ Duration duration4 = fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().clock().advance(duration4.negated());
+ fixture.loader().addQueryRateMeasurements(10, __ -> 100.0);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 7, 1, 2.6, 8.9, 55.5,
+ fixture.autoscale());
+
+ // No group info, should be the same as the above
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(BcpGroupInfo.empty());
+ Duration duration5 = fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().clock().advance(duration5.negated());
+ fixture.loader().addQueryRateMeasurements(10, __ -> 100.0);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 7, 1, 2.6, 8.9, 55.5,
+ fixture.autoscale());
+
+ // 40 query rate, no group info (for reference to the below)
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(BcpGroupInfo.empty());
+ Duration duration6 = fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().clock().advance(duration6.negated());
+ fixture.loader().addQueryRateMeasurements(10, __ -> 40.0);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 6, 1, 2.2, 10.6, 66.5,
+ fixture.autoscale());
+
+ // Local query rate is too low but global is even lower so disregard it, giving the same as above
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200/40.0, 1.3, 0.45*40.0));
+ Duration duration7 = fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().clock().advance(duration7.negated());
+ fixture.loader().addQueryRateMeasurements(10, __ -> 40.0);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 6, 1, 2.2, 10.6, 66.5,
+ fixture.autoscale());
+
+ // Local query rate is too low to be fully confident, and so is global but as it is slightly larger, incorporate it slightly
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.store(new BcpGroupInfo(200/4.0, 1.3, 0.45*4.0));
+ Duration duration8 = fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().clock().advance(duration8.negated());
+ fixture.loader().addQueryRateMeasurements(10, __ -> 40.0);
+ fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+ 7, 1, 2.2, 8.9, 55.5,
+ fixture.autoscale());
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
index 1e1e00a10db..5caf50a4e83 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
@@ -21,6 +21,7 @@ import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.applications.Application;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
+import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo;
import com.yahoo.vespa.hosted.provision.autoscale.awsnodes.AwsHostResourcesCalculatorImpl;
import com.yahoo.vespa.hosted.provision.autoscale.awsnodes.AwsNodeTypes;
import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
@@ -132,6 +133,12 @@ public class Fixture {
tester.nodeRepository().applications().put(application, tester.nodeRepository().applications().lock(applicationId));
}
+ public void store(BcpGroupInfo bcpGroupInfo) {
+ var application = application();
+ application = application.with(application.cluster(clusterId()).get().with(bcpGroupInfo));
+ tester.nodeRepository().applications().put(application, tester.nodeRepository().applications().lock(applicationId));
+ }
+
public static class Builder {
ApplicationId application = AutoscalingTester.applicationId("application1");
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
index 10c8c7434b1..a104f0b1bc8 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
@@ -79,10 +79,10 @@ public class Loader {
return Duration.between(initialTime, fixture.tester().clock().instant());
}
- public void applyCpuLoad(double cpuLoad, int measurements) {
- addCpuMeasurements((float)cpuLoad, measurements);
- fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
- addQueryRateMeasurements(measurements, t -> t == 0 ? 200.0 : 100.0); // Query traffic only
+ public Duration applyCpuLoad(double cpuLoad, int measurements) {
+ Duration duration = addCpuMeasurements((float)cpuLoad, measurements);
+ fixture.tester().clock().advance(duration.negated());
+ return addQueryRateMeasurements(measurements, t -> t == 0 ? 200.0 : 100.0); // Query traffic only
}
public void applyMemLoad(double memLoad, int measurements) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java
index bce10b999bb..c8dc0d97320 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java
@@ -8,6 +8,7 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.vespa.hosted.provision.applications.Application;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
+import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo;
import com.yahoo.vespa.hosted.provision.applications.ScalingEvent;
import com.yahoo.vespa.hosted.provision.applications.Status;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling;
@@ -39,6 +40,7 @@ public class ApplicationSerializerTest {
true,
Autoscaling.empty(),
Autoscaling.empty(),
+ BcpGroupInfo.empty(),
List.of()));
var minResources = new NodeResources(1, 2, 3, 4);
clusters.add(new Cluster(ClusterSpec.Id.from("c2"),
@@ -61,6 +63,7 @@ public class ApplicationSerializerTest {
Instant.ofEpochMilli(5678L),
Load.zero(),
Load.one()),
+ new BcpGroupInfo(0.1, 0.2, 0.3),
List.of(new ScalingEvent(new ClusterResources(10, 5, minResources),
new ClusterResources(12, 6, minResources),
7L,
@@ -90,6 +93,7 @@ public class ApplicationSerializerTest {
assertEquals(originalCluster.required(), serializedCluster.required());
assertEquals(originalCluster.suggested(), serializedCluster.suggested());
assertEquals(originalCluster.target(), serializedCluster.target());
+ assertEquals(originalCluster.bcpGroupInfo(), serializedCluster.bcpGroupInfo());
assertEquals(originalCluster.scalingEvents(), serializedCluster.scalingEvents());
}
}