diff options
Diffstat (limited to 'node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java')
-rw-r--r-- | node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java | 113 |
1 files changed, 48 insertions, 65 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 90ab5cba772..7ade2cdf8c4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -15,6 +15,7 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; +import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies; @@ -51,10 +52,10 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 5, 1, hostResources); tester.clock().advance(Duration.ofDays(1)); - assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1, capacity).isEmpty()); tester.addCpuMeasurements(0.25f, 1f, 59, application1); - assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1, capacity).isEmpty()); tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.25f, 1f, 120, application1); @@ -62,10 +63,10 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.2, 28.6, 28.6, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.deploy(application1, cluster1, scaledResources); - assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1, capacity).isEmpty()); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -74,19 +75,19 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only assertTrue("Load change is large, but insufficient measurements for new config -> No change", - tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + tester.autoscale(application1, cluster1, capacity).isEmpty()); tester.addCpuMeasurements(0.19f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only - assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), capacity).target()); + assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1, capacity).target()); tester.addCpuMeasurements(0.1f, 1f, 120, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down to minimum since usage has gone down significantly", 7, 1, 1.0, 66.7, 66.7, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); var events = tester.nodeRepository().applications().get(application1).get().cluster(cluster1.id()).get().scalingEvents(); } @@ -110,7 +111,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high", 7, 1, 2.5, 80.0, 80.0, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -120,7 +121,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down since cpu usage has gone down", 4, 1, 2.5, 68.6, 68.6, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -147,7 +148,7 @@ public class AutoscalingTest { var capacity = Capacity.from(min, max); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 14, 1, 1.4, 30.8, 30.8, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); assertEquals("Disk speed from min/max is used", NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed()); tester.deploy(application1, cluster1, scaledResources); @@ -180,7 +181,7 @@ public class AutoscalingTest { // Autoscaling: Uses disk-speed any as well tester.clock().advance(Duration.ofDays(2)); tester.addCpuMeasurements(0.8f, 1f, 120, application1); - Autoscaler.Advice advice = tester.autoscale(application1, cluster1.id(), capacity); + Autoscaler.Advice advice = tester.autoscale(application1, cluster1, capacity); assertEquals(NodeResources.DiskSpeed.any, advice.target().get().nodeResources().diskSpeed()); @@ -205,7 +206,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up to limit since resource usage is too high", 6, 1, 2.4, 78.0, 79.0, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -224,7 +225,7 @@ public class AutoscalingTest { tester.addMeasurements(0.05f, 0.05f, 0.05f, 0, 120, application1); tester.assertResources("Scaling down to limit since resource usage is low", 4, 1, 1.8, 7.7, 10.0, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -251,7 +252,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up to limit since resource usage is too high", 4, 1, defaultResources, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -272,7 +273,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up since resource usage is too high", 6, 6, 3.6, 8.0, 10.0, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -290,7 +291,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 5, 1, resources); tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.25f, 1f, 120, application1); - assertTrue(tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + assertTrue(tester.autoscale(application1, cluster1, capacity).isEmpty()); } @Test @@ -360,7 +361,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 2, 1, resources); tester.addMeasurements(0.5f, 0.6f, 0.7f, 1, false, true, 120, application1); assertTrue("Not scaling up since nodes were measured while cluster was unstable", - tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + tester.autoscale(application1, cluster1, capacity).isEmpty()); } @Test @@ -378,7 +379,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 2, 1, resources); tester.addMeasurements(0.5f, 0.6f, 0.7f, 1, true, false, 120, application1); assertTrue("Not scaling up since nodes were measured while cluster was unstable", - tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + tester.autoscale(application1, cluster1, capacity).isEmpty()); } @Test @@ -399,7 +400,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up since resource usage is too high", 7, 7, 2.5, 80.0, 80.0, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -422,7 +423,7 @@ public class AutoscalingTest { t -> 1.0); tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper", 8, 1, 2.6, 83.3, 83.3, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } /** Same as above but mostly write traffic, which favors smaller groups */ @@ -446,7 +447,7 @@ public class AutoscalingTest { t -> 100.0); tester.assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper", 4, 1, 2.1, 83.3, 83.3, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -468,7 +469,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Increase group size to reduce memory load", 8, 2, 12.4, 96.2, 62.5, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -489,7 +490,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", 6, 1, 2.9, 4.0, 95.0, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -509,7 +510,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only - assertTrue(tester.autoscale(application1, cluster1.id(), capacity).target().isEmpty()); + assertTrue(tester.autoscale(application1, cluster1, capacity).target().isEmpty()); // Trying the same later causes autoscaling tester.clock().advance(Duration.ofDays(2)); @@ -518,7 +519,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down", 6, 1, 1.4, 4.0, 95.0, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -529,7 +530,8 @@ public class AutoscalingTest { var capacity = Capacity.from(min, max); { // No memory tax - AutoscalingTester tester = new AutoscalingTester(Environment.prod, hostResources, + AutoscalingTester tester = new AutoscalingTester(new Zone(Environment.prod, RegionName.from("us-east")), + hostResources, new OnlySubtractingWhenForecastingCalculator(0)); ApplicationId application1 = tester.applicationId("app1"); @@ -541,11 +543,12 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up", 4, 1, 6.7, 20.5, 200, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } { // 15 Gb memory tax - AutoscalingTester tester = new AutoscalingTester(Environment.prod, hostResources, + AutoscalingTester tester = new AutoscalingTester(new Zone(Environment.prod, RegionName.from("us-east")), + hostResources, new OnlySubtractingWhenForecastingCalculator(15)); ApplicationId application1 = tester.applicationId("app1"); @@ -557,7 +560,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up", 4, 1, 6.7, 35.5, 200, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } } @@ -588,7 +591,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.9f, 0.6f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", 8, 1, 3, 83, 34.3, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -599,7 +602,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down since resource usage has gone down", 5, 1, 3, 83, 36.0, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -621,17 +624,17 @@ public class AutoscalingTest { // (no read share stored) tester.assertResources("Advice to scale up since we set aside for bcp by default", 7, 1, 3, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.storeReadShare(0.25, 0.5, application1); tester.assertResources("Half of global share is the same as the default assumption used above", 7, 1, 3, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.storeReadShare(0.5, 0.5, application1); tester.assertResources("Advice to scale down since we don't need room for bcp", 4, 1, 3, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @@ -656,7 +659,7 @@ public class AutoscalingTest { // (no query rate data) tester.assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data", 5, 1, 6.3, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(5)); tester.addQueryRateMeasurements(application1, cluster1.id(), @@ -666,7 +669,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 100, application1); tester.assertResources("Scale down since observed growth is slower than scaling time", 5, 1, 3.4, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.clearQueryRateMeasurements(application1, cluster1.id()); @@ -678,7 +681,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 100, application1); tester.assertResources("Scale up since observed growth is faster than scaling time", 5, 1, 10.0, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -705,55 +708,35 @@ public class AutoscalingTest { tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0); tester.assertResources("Query and write load is equal -> scale up somewhat", 5, 1, 7.3, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0); tester.assertResources("Query load is 4x write load -> scale up more", 5, 1, 9.5, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.addCpuMeasurements(0.3f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0); tester.assertResources("Write load is 10x query load -> scale down", 5, 1, 2.9, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0); tester.assertResources("Query only -> largest possible", 5, 1, 10.0, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> 0.0, t -> 10.0); tester.assertResources("Write only -> smallest possible", 5, 1, 2.1, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity).target()); - } - - @Test - public void test_cd_autoscaling_test() { - NodeResources resources = new NodeResources(1, 4, 50, 1); - ClusterResources min = new ClusterResources( 2, 1, resources); - ClusterResources max = new ClusterResources(3, 1, resources); - var capacity = Capacity.from(min, max); - AutoscalingTester tester = new AutoscalingTester(resources.withVcpu(resources.vcpu() * 2)); - ApplicationId application1 = tester.applicationId("application1"); - ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); - tester.deploy(application1, cluster1, 2, 1, resources); - - tester.addQueryRateMeasurements(application1, cluster1.id(), - 500, t -> 0.0); - tester.addCpuMeasurements(0.5f, 1f, 10, application1); - - tester.assertResources("Advice to scale up since observed growth is much faster than scaling time", - 3, 1, 1, 4, 50, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } @Test @@ -771,8 +754,8 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 500, t -> 100.0); tester.addCpuMeasurements(1.0f, 1f, 10, application1); - //assertTrue("Not attempting to scale up because policies dictate we'll only get one node", - // tester.autoscale(application1, cluster1.id(), capacity).target().isEmpty()); + assertTrue("Not attempting to scale up because policies dictate we'll only get one node", + tester.autoscale(application1, cluster1, capacity).target().isEmpty()); } /** Same setup as test_autoscaling_in_dev(), just with required = true */ @@ -793,7 +776,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(1.0f, 1f, 10, application1); tester.assertResources("We scale up even in dev because resources are required", 3, 1, 1.0, 4, 50, - tester.autoscale(application1, cluster1.id(), capacity).target()); + tester.autoscale(application1, cluster1, capacity).target()); } /** |