diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-12-06 16:03:42 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-12-06 16:03:42 +0100 |
commit | b361a711b50e456831775e64642e0a7cb30b9d1c (patch) | |
tree | ddb5a9665abdc219666d72a8fdd9dfd60ee0409a /node-repository/src/test/java/com/yahoo | |
parent | 76a3908ea1d3475d36a7b2d15b867a18d7952862 (diff) |
Ignore warmup period
Diffstat (limited to 'node-repository/src/test/java/com/yahoo')
6 files changed, 140 insertions, 95 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java index b7f9dcb8e8a..8027afc665b 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java @@ -58,7 +58,6 @@ public class NodeListMicroBenchmarkTest { nodeList.childrenOf(nodes.get(indexes.get(i))); } Duration duration = Duration.between(start, Instant.now()); - System.out.println("Calling NodeList.childrenOf took " + duration + " (" + duration.toNanos() / iterations / 1000 + " microseconds per invocation)"); } private List<Node> createHosts() { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 7ade2cdf8c4..16017036e87 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -15,7 +15,6 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; -import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies; @@ -63,7 +62,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.2, 28.6, 28.6, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.deploy(application1, cluster1, scaledResources); assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1, capacity).isEmpty()); @@ -87,7 +86,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down to minimum since usage has gone down significantly", 7, 1, 1.0, 66.7, 66.7, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); var events = tester.nodeRepository().applications().get(application1).get().cluster(cluster1.id()).get().scalingEvents(); } @@ -111,7 +110,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high", 7, 1, 2.5, 80.0, 80.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -121,7 +120,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down since cpu usage has gone down", 4, 1, 2.5, 68.6, 68.6, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -148,7 +147,7 @@ public class AutoscalingTest { var capacity = Capacity.from(min, max); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 14, 1, 1.4, 30.8, 30.8, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); assertEquals("Disk speed from min/max is used", NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed()); tester.deploy(application1, cluster1, scaledResources); @@ -206,7 +205,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up to limit since resource usage is too high", 6, 1, 2.4, 78.0, 79.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -225,7 +224,7 @@ public class AutoscalingTest { tester.addMeasurements(0.05f, 0.05f, 0.05f, 0, 120, application1); tester.assertResources("Scaling down to limit since resource usage is low", 4, 1, 1.8, 7.7, 10.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -246,13 +245,14 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, Capacity.from(min, max)); tester.assertResources("Min number of nodes and default resources", 2, 1, defaultResources, - Optional.of(tester.nodeRepository().nodes().list().owner(application1).toResources())); + tester.nodeRepository().nodes().list().owner(application1).toResources()); tester.addMeasurements(0.25f, 0.95f, 0.95f, 0, 120, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up to limit since resource usage is too high", - 4, 1, defaultResources, - tester.autoscale(application1, cluster1, capacity).target()); + 4, 1, + defaultResources.vcpu(), defaultResources.memoryGb(), defaultResources.diskGb(), + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -273,7 +273,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up since resource usage is too high", 6, 6, 3.6, 8.0, 10.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -315,15 +315,15 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 3, 1, min.nodeResources()); - tester.addDiskMeasurements(0.01f, 1f, 120, application1); - tester.clock().advance(Duration.ofMinutes(-10 * 5)); + Duration timeAdded = tester.addDiskMeasurements(0.01f, 1f, 120, application1); + tester.clock().advance(timeAdded.negated()); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> 10.0); // Query traffic only - Optional<ClusterResources> suggestion = tester.suggest(application1, cluster1.id(), min, max).target(); + Autoscaler.Advice suggestion = tester.suggest(application1, cluster1.id(), min, max); tester.assertResources("Choosing the remote disk flavor as it has less disk", 6, 1, 3.0, 100.0, 10.0, suggestion); assertEquals("Choosing the remote disk flavor as it has less disk", - StorageType.remote, suggestion.get().nodeResources().storageType()); + StorageType.remote, suggestion.target().get().nodeResources().storageType()); } @Test @@ -343,7 +343,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up since resource usage is too high", 7, 1, 2.5, 80.0, 80.0, - tester.suggest(application1, cluster1.id(), min, max).target()); + tester.suggest(application1, cluster1.id(), min, max)); } @Test @@ -400,7 +400,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up since resource usage is too high", 7, 7, 2.5, 80.0, 80.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -423,7 +423,7 @@ public class AutoscalingTest { t -> 1.0); tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper", 8, 1, 2.6, 83.3, 83.3, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } /** Same as above but mostly write traffic, which favors smaller groups */ @@ -447,7 +447,7 @@ public class AutoscalingTest { t -> 100.0); tester.assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper", 4, 1, 2.1, 83.3, 83.3, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -469,7 +469,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Increase group size to reduce memory load", 8, 2, 12.4, 96.2, 62.5, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -490,7 +490,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", 6, 1, 2.9, 4.0, 95.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -519,7 +519,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down", 6, 1, 1.4, 4.0, 95.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -543,7 +543,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up", 4, 1, 6.7, 20.5, 200, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } { // 15 Gb memory tax @@ -560,7 +560,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up", 4, 1, 6.7, 35.5, 200, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } } @@ -591,7 +591,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.9f, 0.6f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", 8, 1, 3, 83, 34.3, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -602,7 +602,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down since resource usage has gone down", 5, 1, 3, 83, 36.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -624,17 +624,17 @@ public class AutoscalingTest { // (no read share stored) tester.assertResources("Advice to scale up since we set aside for bcp by default", 7, 1, 3, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.storeReadShare(0.25, 0.5, application1); tester.assertResources("Half of global share is the same as the default assumption used above", 7, 1, 3, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.storeReadShare(0.5, 0.5, application1); tester.assertResources("Advice to scale down since we don't need room for bcp", 4, 1, 3, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @@ -652,36 +652,36 @@ public class AutoscalingTest { ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); tester.deploy(application1, cluster1, 5, 1, midResources); - tester.addQueryRateMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0); - tester.clock().advance(Duration.ofMinutes(-100 * 5)); - tester.addCpuMeasurements(0.25f, 1f, 100, application1); + Duration timeAdded = tester.addQueryRateMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0); + tester.clock().advance(timeAdded.negated()); + tester.addCpuMeasurements(0.25f, 1f, 200, application1); // (no query rate data) tester.assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data", 5, 1, 6.3, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(5)); - tester.addQueryRateMeasurements(application1, cluster1.id(), - 100, - t -> 10.0 + (t < 50 ? t : 100 - t)); - tester.clock().advance(Duration.ofMinutes(-100 * 5)); - tester.addCpuMeasurements(0.25f, 1f, 100, application1); + timeAdded = tester.addQueryRateMeasurements(application1, cluster1.id(), + 100, + t -> 10.0 + (t < 50 ? t : 100 - t)); + tester.clock().advance(timeAdded.negated()); + tester.addCpuMeasurements(0.25f, 1f, 200, application1); tester.assertResources("Scale down since observed growth is slower than scaling time", 5, 1, 3.4, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.clearQueryRateMeasurements(application1, cluster1.id()); tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(60)); - tester.addQueryRateMeasurements(application1, cluster1.id(), - 100, - t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49))); - tester.clock().advance(Duration.ofMinutes(-100 * 5)); - tester.addCpuMeasurements(0.25f, 1f, 100, application1); + timeAdded = tester.addQueryRateMeasurements(application1, cluster1.id(), + 100, + t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49))); + tester.clock().advance(timeAdded.negated()); + tester.addCpuMeasurements(0.25f, 1f, 200, application1); tester.assertResources("Scale up since observed growth is faster than scaling time", 5, 1, 10.0, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -708,35 +708,35 @@ public class AutoscalingTest { tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0); tester.assertResources("Query and write load is equal -> scale up somewhat", 5, 1, 7.3, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0); tester.assertResources("Query load is 4x write load -> scale up more", 5, 1, 9.5, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.addCpuMeasurements(0.3f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0); tester.assertResources("Write load is 10x query load -> scale down", 5, 1, 2.9, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0); tester.assertResources("Query only -> largest possible", 5, 1, 10.0, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> 0.0, t -> 10.0); tester.assertResources("Write only -> smallest possible", 5, 1, 2.1, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -776,7 +776,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(1.0f, 1f, 10, application1); tester.assertResources("We scale up even in dev because resources are required", 3, 1, 1.0, 4, 50, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1, capacity)); } /** diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index ededdf8fe7f..03a29b6613c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -29,6 +29,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import java.time.Duration; +import java.time.Instant; import java.util.List; import java.util.Map; import java.util.Optional; @@ -145,7 +146,7 @@ class AutoscalingTester { NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId); float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); for (int i = 0; i < count; i++) { - clock().advance(Duration.ofMinutes(5)); + clock().advance(Duration.ofSeconds(150)); for (Node node : nodes) { Load load = new Load(value, ClusterModel.idealMemoryLoad * otherResourcesLoad, @@ -170,13 +171,15 @@ class AutoscalingTester { * @param otherResourcesLoad the load factor relative to ideal to use for other resources * @param count the number of measurements * @param applicationId the application we're adding measurements for all nodes of + * @return the duration added to the current time by this */ - public void addDiskMeasurements(float value, float otherResourcesLoad, - int count, ApplicationId applicationId) { + public Duration addDiskMeasurements(float value, float otherResourcesLoad, + int count, ApplicationId applicationId) { NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId); float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); + Instant initialTime = clock().instant(); for (int i = 0; i < count; i++) { - clock().advance(Duration.ofMinutes(5)); + clock().advance(Duration.ofSeconds(150)); for (Node node : nodes) { Load load = new Load(ClusterModel.idealQueryCpuLoad * otherResourcesLoad, ClusterModel.idealDiskLoad * otherResourcesLoad, @@ -190,6 +193,7 @@ class AutoscalingTester { 0.0)))); } } + return Duration.between(initialTime, clock().instant()); } /** @@ -290,10 +294,11 @@ class AutoscalingTester { } /** Creates the given number of measurements, spaced 5 minutes between, using the given function */ - public void addQueryRateMeasurements(ApplicationId application, - ClusterSpec.Id cluster, - int measurements, - IntFunction<Double> queryRate) { + public Duration addQueryRateMeasurements(ApplicationId application, + ClusterSpec.Id cluster, + int measurements, + IntFunction<Double> queryRate) { + Instant initialTime = clock().instant(); for (int i = 0; i < measurements; i++) { nodeMetricsDb().addClusterMetrics(application, Map.of(cluster, new ClusterMetricSnapshot(clock().instant(), @@ -301,6 +306,7 @@ class AutoscalingTester { 0.0))); clock().advance(Duration.ofMinutes(5)); } + return Duration.between(initialTime, clock().instant()); } public void clearQueryRateMeasurements(ApplicationId application, ClusterSpec.Id cluster) { @@ -329,28 +335,37 @@ class AutoscalingTester { nodeRepository().nodes().list(Node.State.active).owner(applicationId)); } - public ClusterResources assertResources(String message, - int nodeCount, int groupCount, - NodeResources expectedResources, - Optional<ClusterResources> resources) { - return assertResources(message, nodeCount, groupCount, - expectedResources.vcpu(), expectedResources.memoryGb(), expectedResources.diskGb(), - resources); + public void assertResources(String message, + int nodeCount, int groupCount, + NodeResources expectedResources, + ClusterResources resources) { + assertResources(message, nodeCount, groupCount, + expectedResources.vcpu(), expectedResources.memoryGb(), expectedResources.diskGb(), + resources); } public ClusterResources assertResources(String message, int nodeCount, int groupCount, double approxCpu, double approxMemory, double approxDisk, - Optional<ClusterResources> resources) { + Autoscaler.Advice advice) { + assertTrue("Resources are present: " + message + " (" + advice + ": " + advice.reason() + ")", + advice.target().isPresent()); + var resources = advice.target().get(); + assertResources(message, nodeCount, groupCount, approxCpu, approxMemory, approxDisk, resources); + return resources; + } + + public void assertResources(String message, + int nodeCount, int groupCount, + double approxCpu, double approxMemory, double approxDisk, + ClusterResources resources) { double delta = 0.0000000001; - assertTrue("Resources are present: " + message, resources.isPresent()); - NodeResources nodeResources = resources.get().nodeResources(); - assertEquals("Node count in " + resources.get() + ": " + message, nodeCount, resources.get().nodes()); - assertEquals("Group count in " + resources.get() + ": " + message, groupCount, resources.get().groups()); - assertEquals("Cpu in " + resources.get() + ": " + message, approxCpu, Math.round(nodeResources.vcpu() * 10) / 10.0, delta); - assertEquals("Memory in " + resources.get() + ": " + message, approxMemory, Math.round(nodeResources.memoryGb() * 10) / 10.0, delta); - assertEquals("Disk in: " + resources.get() + ": " + message, approxDisk, Math.round(nodeResources.diskGb() * 10) / 10.0, delta); - return resources.get(); + NodeResources nodeResources = resources.nodeResources(); + assertEquals("Node count in " + resources + ": " + message, nodeCount, resources.nodes()); + assertEquals("Group count in " + resources+ ": " + message, groupCount, resources.groups()); + assertEquals("Cpu in " + resources + ": " + message, approxCpu, Math.round(nodeResources.vcpu() * 10) / 10.0, delta); + assertEquals("Memory in " + resources + ": " + message, approxMemory, Math.round(nodeResources.memoryGb() * 10) / 10.0, delta); + assertEquals("Disk in: " + resources + ": " + message, approxDisk, Math.round(nodeResources.diskGb() * 10) / 10.0, delta); } public ManualClock clock() { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index 6d5677d0911..4c3274d763f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -89,11 +89,9 @@ public class AutoscalingMaintainerTest { tester.deploy(app1, cluster1, app1Capacity); // Measure overload - tester.clock().advance(Duration.ofSeconds(1)); tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1); // Causes autoscaling - tester.clock().advance(Duration.ofSeconds(1)); tester.clock().advance(Duration.ofMinutes(10)); Instant firstMaintenanceTime = tester.clock().instant(); tester.maintainer().maintain(); @@ -108,16 +106,12 @@ public class AutoscalingMaintainerTest { assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(1).at().toEpochMilli()); // Measure overload still, since change is not applied, but metrics are discarded - tester.clock().advance(Duration.ofSeconds(1)); tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1); - tester.clock().advance(Duration.ofSeconds(1)); tester.maintainer().maintain(); assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); // Measure underload, but no autoscaling since we still haven't measured we're on the new config generation - tester.clock().advance(Duration.ofSeconds(1)); tester.addMeasurements(0.1f, 0.1f, 0.1f, 0, 500, app1); - tester.clock().advance(Duration.ofSeconds(1)); tester.maintainer().maintain(); assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); @@ -126,8 +120,9 @@ public class AutoscalingMaintainerTest { tester.clock().advance(Duration.ofMinutes(5)); tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 1, app1); tester.maintainer().maintain(); + assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); // - measure underload - tester.clock().advance(Duration.ofHours(1)); + tester.clock().advance(Duration.ofDays(4)); // Exit cooling period tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 500, app1); Instant lastMaintenanceTime = tester.clock().instant(); tester.maintainer().maintain(); @@ -204,6 +199,34 @@ public class AutoscalingMaintainerTest { } @Test + public void test_autoscaling_ignores_high_cpu_right_after_generation_change() { + ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1"); + ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec(); + NodeResources resources = new NodeResources(4, 4, 10, 1); + ClusterResources min = new ClusterResources(2, 1, resources); + ClusterResources max = new ClusterResources(20, 1, resources); + var capacity = Capacity.from(min, max); + var tester = new AutoscalingMaintainerTester(new MockDeployer.ApplicationContext(app1, cluster1, capacity)); + + tester.deploy(app1, cluster1, capacity); + // fast completion + tester.addMeasurements(1.0f, 0.3f, 0.3f, 0, 1, app1); + tester.addMeasurements(1.0f, 0.3f, 0.3f, 0, 1, app1); + tester.maintainer().maintain(); + assertEquals("Scale up: " + tester.cluster(app1, cluster1).autoscalingStatus(), + 1, + tester.cluster(app1, cluster1).lastScalingEvent().get().generation()); + + // fast completion, with initially overloaded cpu + tester.addMeasurements(3.0f, 0.3f, 0.3f, 1, 1, app1); + tester.addMeasurements(0.2f, 0.3f, 0.3f, 1, 1, app1); + tester.maintainer().maintain(); + assertEquals("No autoscaling since we ignore the (first) data point in the warup period", + 1, + tester.cluster(app1, cluster1).lastScalingEvent().get().generation()); + } + + @Test public void test_cd_autoscaling_test() { ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1"); ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec(); @@ -240,7 +263,7 @@ public class AutoscalingMaintainerTest { clock.advance(completionTime); float load = down ? 0.1f : 1.0f; - tester.addMeasurements(load, load, load, generation, 200, application); + tester.addMeasurements(load, load, load, generation, 1, application); tester.maintainer().maintain(); assertEvent("Measured completion of the last scaling event, but no new autoscaling yet", generation, Optional.of(clock.instant()), diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java index 021ca4bdf64..e1a1a2af5fb 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java @@ -71,8 +71,9 @@ public class AutoscalingMaintainerTester { return provisioningTester.deploy(application, cluster, capacity); } - public void addMeasurements(float cpu, float mem, float disk, long generation, int count, ApplicationId applicationId) { + public Duration addMeasurements(float cpu, float mem, float disk, long generation, int count, ApplicationId applicationId) { NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId); + Instant startTime = clock().instant(); for (int i = 0; i < count; i++) { for (Node node : nodes) nodeRepository().metricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), @@ -82,7 +83,9 @@ public class AutoscalingMaintainerTester { true, true, 0.0)))); + clock().advance(Duration.ofSeconds(150)); } + return Duration.between(startTime, clock().instant()); } /** Creates the given number of measurements, spaced 5 minutes between, using the given function */ diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index 03b41412896..4c0395a0c7e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -13,6 +13,7 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.Zone; import com.yahoo.config.provisioning.FlavorsConfig; +import com.yahoo.test.ManualClock; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -24,6 +25,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import org.junit.Test; import java.time.Duration; +import java.time.Instant; import java.util.List; import java.util.Optional; @@ -41,14 +43,13 @@ public class ScalingSuggestionsMaintainerTest { @Test public void testScalingSuggestionsMaintainer() { - ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east3"))).flavorsConfig(flavorsConfig()).build(); - + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east3"))) + .flavorsConfig(flavorsConfig()) + .build(); ApplicationId app1 = ProvisioningTester.applicationId("app1"); - ClusterSpec cluster1 = ProvisioningTester.containerClusterSpec(); - ApplicationId app2 = ProvisioningTester.applicationId("app2"); + ClusterSpec cluster1 = ProvisioningTester.containerClusterSpec(); ClusterSpec cluster2 = ProvisioningTester.contentClusterSpec(); - tester.makeReadyNodes(20, "flt", NodeType.host, 8); tester.activateTenantHosts(); @@ -60,7 +61,8 @@ public class ScalingSuggestionsMaintainerTest { false, true)); tester.clock().advance(Duration.ofHours(13)); - addMeasurements(0.90f, 0.90f, 0.90f, 0, 500, app1, tester.nodeRepository()); + Duration timeAdded = addMeasurements(0.90f, 0.90f, 0.90f, 0, 500, app1, tester.nodeRepository()); + tester.clock().advance(timeAdded.negated()); addMeasurements(0.99f, 0.99f, 0.99f, 0, 500, app2, tester.nodeRepository()); ScalingSuggestionsMaintainer maintainer = new ScalingSuggestionsMaintainer(tester.nodeRepository(), @@ -114,10 +116,11 @@ public class ScalingSuggestionsMaintainerTest { .shouldSuggestResources(currentResources); } - public void addMeasurements(float cpu, float memory, float disk, int generation, int count, - ApplicationId applicationId, - NodeRepository nodeRepository) { + public Duration addMeasurements(float cpu, float memory, float disk, int generation, int count, + ApplicationId applicationId, + NodeRepository nodeRepository) { NodeList nodes = nodeRepository.nodes().list(Node.State.active).owner(applicationId); + Instant startTime = nodeRepository.clock().instant(); for (int i = 0; i < count; i++) { for (Node node : nodes) nodeRepository.metricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), @@ -127,7 +130,9 @@ public class ScalingSuggestionsMaintainerTest { true, true, 0.0)))); + ((ManualClock)nodeRepository.clock()).advance(Duration.ofSeconds(150)); } + return Duration.between(startTime, nodeRepository.clock().instant()); } private FlavorsConfig flavorsConfig() { |