aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/test/java/com/yahoo
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-12-06 16:03:42 +0100
committerJon Bratseth <bratseth@gmail.com>2021-12-06 16:03:42 +0100
commitb361a711b50e456831775e64642e0a7cb30b9d1c (patch)
treeddb5a9665abdc219666d72a8fdd9dfd60ee0409a /node-repository/src/test/java/com/yahoo
parent76a3908ea1d3475d36a7b2d15b867a18d7952862 (diff)
Ignore warmup period
Diffstat (limited to 'node-repository/src/test/java/com/yahoo')
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java104
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java63
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java39
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java5
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java23
6 files changed, 140 insertions, 95 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java
index b7f9dcb8e8a..8027afc665b 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeListMicroBenchmarkTest.java
@@ -58,7 +58,6 @@ public class NodeListMicroBenchmarkTest {
nodeList.childrenOf(nodes.get(indexes.get(i)));
}
Duration duration = Duration.between(start, Instant.now());
- System.out.println("Calling NodeList.childrenOf took " + duration + " (" + duration.toNanos() / iterations / 1000 + " microseconds per invocation)");
}
private List<Node> createHosts() {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index 7ade2cdf8c4..16017036e87 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -15,7 +15,6 @@ import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.Zone;
-import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.Nodelike;
import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies;
@@ -63,7 +62,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high",
15, 1, 1.2, 28.6, 28.6,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.deploy(application1, cluster1, scaledResources);
assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1, capacity).isEmpty());
@@ -87,7 +86,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling down to minimum since usage has gone down significantly",
7, 1, 1.0, 66.7, 66.7,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
var events = tester.nodeRepository().applications().get(application1).get().cluster(cluster1.id()).get().scalingEvents();
}
@@ -111,7 +110,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high",
7, 1, 2.5, 80.0, 80.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.deploy(application1, cluster1, scaledResources);
tester.deactivateRetired(application1, cluster1, scaledResources);
@@ -121,7 +120,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling down since cpu usage has gone down",
4, 1, 2.5, 68.6, 68.6,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -148,7 +147,7 @@ public class AutoscalingTest {
var capacity = Capacity.from(min, max);
ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high",
14, 1, 1.4, 30.8, 30.8,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
assertEquals("Disk speed from min/max is used",
NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed());
tester.deploy(application1, cluster1, scaledResources);
@@ -206,7 +205,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up to limit since resource usage is too high",
6, 1, 2.4, 78.0, 79.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -225,7 +224,7 @@ public class AutoscalingTest {
tester.addMeasurements(0.05f, 0.05f, 0.05f, 0, 120, application1);
tester.assertResources("Scaling down to limit since resource usage is low",
4, 1, 1.8, 7.7, 10.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -246,13 +245,14 @@ public class AutoscalingTest {
tester.deploy(application1, cluster1, Capacity.from(min, max));
tester.assertResources("Min number of nodes and default resources",
2, 1, defaultResources,
- Optional.of(tester.nodeRepository().nodes().list().owner(application1).toResources()));
+ tester.nodeRepository().nodes().list().owner(application1).toResources());
tester.addMeasurements(0.25f, 0.95f, 0.95f, 0, 120, application1);
tester.clock().advance(Duration.ofMinutes(-10 * 5));
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up to limit since resource usage is too high",
- 4, 1, defaultResources,
- tester.autoscale(application1, cluster1, capacity).target());
+ 4, 1,
+ defaultResources.vcpu(), defaultResources.memoryGb(), defaultResources.diskGb(),
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -273,7 +273,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up since resource usage is too high",
6, 6, 3.6, 8.0, 10.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -315,15 +315,15 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 3, 1, min.nodeResources());
- tester.addDiskMeasurements(0.01f, 1f, 120, application1);
- tester.clock().advance(Duration.ofMinutes(-10 * 5));
+ Duration timeAdded = tester.addDiskMeasurements(0.01f, 1f, 120, application1);
+ tester.clock().advance(timeAdded.negated());
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> 10.0); // Query traffic only
- Optional<ClusterResources> suggestion = tester.suggest(application1, cluster1.id(), min, max).target();
+ Autoscaler.Advice suggestion = tester.suggest(application1, cluster1.id(), min, max);
tester.assertResources("Choosing the remote disk flavor as it has less disk",
6, 1, 3.0, 100.0, 10.0,
suggestion);
assertEquals("Choosing the remote disk flavor as it has less disk",
- StorageType.remote, suggestion.get().nodeResources().storageType());
+ StorageType.remote, suggestion.target().get().nodeResources().storageType());
}
@Test
@@ -343,7 +343,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up since resource usage is too high",
7, 1, 2.5, 80.0, 80.0,
- tester.suggest(application1, cluster1.id(), min, max).target());
+ tester.suggest(application1, cluster1.id(), min, max));
}
@Test
@@ -400,7 +400,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up since resource usage is too high",
7, 7, 2.5, 80.0, 80.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -423,7 +423,7 @@ public class AutoscalingTest {
t -> 1.0);
tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper",
8, 1, 2.6, 83.3, 83.3,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
/** Same as above but mostly write traffic, which favors smaller groups */
@@ -447,7 +447,7 @@ public class AutoscalingTest {
t -> 100.0);
tester.assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper",
4, 1, 2.1, 83.3, 83.3,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -469,7 +469,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Increase group size to reduce memory load",
8, 2, 12.4, 96.2, 62.5,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -490,7 +490,7 @@ public class AutoscalingTest {
tester.addMemMeasurements(0.02f, 0.95f, 120, application1);
tester.assertResources("Scaling down",
6, 1, 2.9, 4.0, 95.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -519,7 +519,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling down",
6, 1, 1.4, 4.0, 95.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -543,7 +543,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up",
4, 1, 6.7, 20.5, 200,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
{ // 15 Gb memory tax
@@ -560,7 +560,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling up",
4, 1, 6.7, 35.5, 200,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
}
@@ -591,7 +591,7 @@ public class AutoscalingTest {
tester.addMemMeasurements(0.9f, 0.6f, 120, application1);
ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.",
8, 1, 3, 83, 34.3,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.deploy(application1, cluster1, scaledResources);
tester.deactivateRetired(application1, cluster1, scaledResources);
@@ -602,7 +602,7 @@ public class AutoscalingTest {
tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.assertResources("Scaling down since resource usage has gone down",
5, 1, 3, 83, 36.0,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -624,17 +624,17 @@ public class AutoscalingTest {
// (no read share stored)
tester.assertResources("Advice to scale up since we set aside for bcp by default",
7, 1, 3, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.storeReadShare(0.25, 0.5, application1);
tester.assertResources("Half of global share is the same as the default assumption used above",
7, 1, 3, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.storeReadShare(0.5, 0.5, application1);
tester.assertResources("Advice to scale down since we don't need room for bcp",
4, 1, 3, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@@ -652,36 +652,36 @@ public class AutoscalingTest {
ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
tester.deploy(application1, cluster1, 5, 1, midResources);
- tester.addQueryRateMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0);
- tester.clock().advance(Duration.ofMinutes(-100 * 5));
- tester.addCpuMeasurements(0.25f, 1f, 100, application1);
+ Duration timeAdded = tester.addQueryRateMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0);
+ tester.clock().advance(timeAdded.negated());
+ tester.addCpuMeasurements(0.25f, 1f, 200, application1);
// (no query rate data)
tester.assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data",
5, 1, 6.3, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(5));
- tester.addQueryRateMeasurements(application1, cluster1.id(),
- 100,
- t -> 10.0 + (t < 50 ? t : 100 - t));
- tester.clock().advance(Duration.ofMinutes(-100 * 5));
- tester.addCpuMeasurements(0.25f, 1f, 100, application1);
+ timeAdded = tester.addQueryRateMeasurements(application1, cluster1.id(),
+ 100,
+ t -> 10.0 + (t < 50 ? t : 100 - t));
+ tester.clock().advance(timeAdded.negated());
+ tester.addCpuMeasurements(0.25f, 1f, 200, application1);
tester.assertResources("Scale down since observed growth is slower than scaling time",
5, 1, 3.4, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.clearQueryRateMeasurements(application1, cluster1.id());
tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(60));
- tester.addQueryRateMeasurements(application1, cluster1.id(),
- 100,
- t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49)));
- tester.clock().advance(Duration.ofMinutes(-100 * 5));
- tester.addCpuMeasurements(0.25f, 1f, 100, application1);
+ timeAdded = tester.addQueryRateMeasurements(application1, cluster1.id(),
+ 100,
+ t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49)));
+ tester.clock().advance(timeAdded.negated());
+ tester.addCpuMeasurements(0.25f, 1f, 200, application1);
tester.assertResources("Scale up since observed growth is faster than scaling time",
5, 1, 10.0, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -708,35 +708,35 @@ public class AutoscalingTest {
tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0);
tester.assertResources("Query and write load is equal -> scale up somewhat",
5, 1, 7.3, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.addCpuMeasurements(0.4f, 1f, 100, application1);
tester.clock().advance(Duration.ofMinutes(-100 * 5));
tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0);
tester.assertResources("Query load is 4x write load -> scale up more",
5, 1, 9.5, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.addCpuMeasurements(0.3f, 1f, 100, application1);
tester.clock().advance(Duration.ofMinutes(-100 * 5));
tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0);
tester.assertResources("Write load is 10x query load -> scale down",
5, 1, 2.9, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.addCpuMeasurements(0.4f, 1f, 100, application1);
tester.clock().advance(Duration.ofMinutes(-100 * 5));
tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0);
tester.assertResources("Query only -> largest possible",
5, 1, 10.0, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
tester.addCpuMeasurements(0.4f, 1f, 100, application1);
tester.clock().advance(Duration.ofMinutes(-100 * 5));
tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> 0.0, t -> 10.0);
tester.assertResources("Write only -> smallest possible",
5, 1, 2.1, 100, 100,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
@Test
@@ -776,7 +776,7 @@ public class AutoscalingTest {
tester.addCpuMeasurements(1.0f, 1f, 10, application1);
tester.assertResources("We scale up even in dev because resources are required",
3, 1, 1.0, 4, 50,
- tester.autoscale(application1, cluster1, capacity).target());
+ tester.autoscale(application1, cluster1, capacity));
}
/**
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index ededdf8fe7f..03a29b6613c 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -29,6 +29,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import java.time.Duration;
+import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -145,7 +146,7 @@ class AutoscalingTester {
NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
for (int i = 0; i < count; i++) {
- clock().advance(Duration.ofMinutes(5));
+ clock().advance(Duration.ofSeconds(150));
for (Node node : nodes) {
Load load = new Load(value,
ClusterModel.idealMemoryLoad * otherResourcesLoad,
@@ -170,13 +171,15 @@ class AutoscalingTester {
* @param otherResourcesLoad the load factor relative to ideal to use for other resources
* @param count the number of measurements
* @param applicationId the application we're adding measurements for all nodes of
+ * @return the duration added to the current time by this
*/
- public void addDiskMeasurements(float value, float otherResourcesLoad,
- int count, ApplicationId applicationId) {
+ public Duration addDiskMeasurements(float value, float otherResourcesLoad,
+ int count, ApplicationId applicationId) {
NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
+ Instant initialTime = clock().instant();
for (int i = 0; i < count; i++) {
- clock().advance(Duration.ofMinutes(5));
+ clock().advance(Duration.ofSeconds(150));
for (Node node : nodes) {
Load load = new Load(ClusterModel.idealQueryCpuLoad * otherResourcesLoad,
ClusterModel.idealDiskLoad * otherResourcesLoad,
@@ -190,6 +193,7 @@ class AutoscalingTester {
0.0))));
}
}
+ return Duration.between(initialTime, clock().instant());
}
/**
@@ -290,10 +294,11 @@ class AutoscalingTester {
}
/** Creates the given number of measurements, spaced 5 minutes between, using the given function */
- public void addQueryRateMeasurements(ApplicationId application,
- ClusterSpec.Id cluster,
- int measurements,
- IntFunction<Double> queryRate) {
+ public Duration addQueryRateMeasurements(ApplicationId application,
+ ClusterSpec.Id cluster,
+ int measurements,
+ IntFunction<Double> queryRate) {
+ Instant initialTime = clock().instant();
for (int i = 0; i < measurements; i++) {
nodeMetricsDb().addClusterMetrics(application,
Map.of(cluster, new ClusterMetricSnapshot(clock().instant(),
@@ -301,6 +306,7 @@ class AutoscalingTester {
0.0)));
clock().advance(Duration.ofMinutes(5));
}
+ return Duration.between(initialTime, clock().instant());
}
public void clearQueryRateMeasurements(ApplicationId application, ClusterSpec.Id cluster) {
@@ -329,28 +335,37 @@ class AutoscalingTester {
nodeRepository().nodes().list(Node.State.active).owner(applicationId));
}
- public ClusterResources assertResources(String message,
- int nodeCount, int groupCount,
- NodeResources expectedResources,
- Optional<ClusterResources> resources) {
- return assertResources(message, nodeCount, groupCount,
- expectedResources.vcpu(), expectedResources.memoryGb(), expectedResources.diskGb(),
- resources);
+ public void assertResources(String message,
+ int nodeCount, int groupCount,
+ NodeResources expectedResources,
+ ClusterResources resources) {
+ assertResources(message, nodeCount, groupCount,
+ expectedResources.vcpu(), expectedResources.memoryGb(), expectedResources.diskGb(),
+ resources);
}
public ClusterResources assertResources(String message,
int nodeCount, int groupCount,
double approxCpu, double approxMemory, double approxDisk,
- Optional<ClusterResources> resources) {
+ Autoscaler.Advice advice) {
+ assertTrue("Resources are present: " + message + " (" + advice + ": " + advice.reason() + ")",
+ advice.target().isPresent());
+ var resources = advice.target().get();
+ assertResources(message, nodeCount, groupCount, approxCpu, approxMemory, approxDisk, resources);
+ return resources;
+ }
+
+ public void assertResources(String message,
+ int nodeCount, int groupCount,
+ double approxCpu, double approxMemory, double approxDisk,
+ ClusterResources resources) {
double delta = 0.0000000001;
- assertTrue("Resources are present: " + message, resources.isPresent());
- NodeResources nodeResources = resources.get().nodeResources();
- assertEquals("Node count in " + resources.get() + ": " + message, nodeCount, resources.get().nodes());
- assertEquals("Group count in " + resources.get() + ": " + message, groupCount, resources.get().groups());
- assertEquals("Cpu in " + resources.get() + ": " + message, approxCpu, Math.round(nodeResources.vcpu() * 10) / 10.0, delta);
- assertEquals("Memory in " + resources.get() + ": " + message, approxMemory, Math.round(nodeResources.memoryGb() * 10) / 10.0, delta);
- assertEquals("Disk in: " + resources.get() + ": " + message, approxDisk, Math.round(nodeResources.diskGb() * 10) / 10.0, delta);
- return resources.get();
+ NodeResources nodeResources = resources.nodeResources();
+ assertEquals("Node count in " + resources + ": " + message, nodeCount, resources.nodes());
+ assertEquals("Group count in " + resources+ ": " + message, groupCount, resources.groups());
+ assertEquals("Cpu in " + resources + ": " + message, approxCpu, Math.round(nodeResources.vcpu() * 10) / 10.0, delta);
+ assertEquals("Memory in " + resources + ": " + message, approxMemory, Math.round(nodeResources.memoryGb() * 10) / 10.0, delta);
+ assertEquals("Disk in: " + resources + ": " + message, approxDisk, Math.round(nodeResources.diskGb() * 10) / 10.0, delta);
}
public ManualClock clock() {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
index 6d5677d0911..4c3274d763f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
@@ -89,11 +89,9 @@ public class AutoscalingMaintainerTest {
tester.deploy(app1, cluster1, app1Capacity);
// Measure overload
- tester.clock().advance(Duration.ofSeconds(1));
tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1);
// Causes autoscaling
- tester.clock().advance(Duration.ofSeconds(1));
tester.clock().advance(Duration.ofMinutes(10));
Instant firstMaintenanceTime = tester.clock().instant();
tester.maintainer().maintain();
@@ -108,16 +106,12 @@ public class AutoscalingMaintainerTest {
assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(1).at().toEpochMilli());
// Measure overload still, since change is not applied, but metrics are discarded
- tester.clock().advance(Duration.ofSeconds(1));
tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1);
- tester.clock().advance(Duration.ofSeconds(1));
tester.maintainer().maintain();
assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
// Measure underload, but no autoscaling since we still haven't measured we're on the new config generation
- tester.clock().advance(Duration.ofSeconds(1));
tester.addMeasurements(0.1f, 0.1f, 0.1f, 0, 500, app1);
- tester.clock().advance(Duration.ofSeconds(1));
tester.maintainer().maintain();
assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
@@ -126,8 +120,9 @@ public class AutoscalingMaintainerTest {
tester.clock().advance(Duration.ofMinutes(5));
tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 1, app1);
tester.maintainer().maintain();
+ assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
// - measure underload
- tester.clock().advance(Duration.ofHours(1));
+ tester.clock().advance(Duration.ofDays(4)); // Exit cooling period
tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 500, app1);
Instant lastMaintenanceTime = tester.clock().instant();
tester.maintainer().maintain();
@@ -204,6 +199,34 @@ public class AutoscalingMaintainerTest {
}
@Test
+ public void test_autoscaling_ignores_high_cpu_right_after_generation_change() {
+ ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1");
+ ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec();
+ NodeResources resources = new NodeResources(4, 4, 10, 1);
+ ClusterResources min = new ClusterResources(2, 1, resources);
+ ClusterResources max = new ClusterResources(20, 1, resources);
+ var capacity = Capacity.from(min, max);
+ var tester = new AutoscalingMaintainerTester(new MockDeployer.ApplicationContext(app1, cluster1, capacity));
+
+ tester.deploy(app1, cluster1, capacity);
+ // fast completion
+ tester.addMeasurements(1.0f, 0.3f, 0.3f, 0, 1, app1);
+ tester.addMeasurements(1.0f, 0.3f, 0.3f, 0, 1, app1);
+ tester.maintainer().maintain();
+ assertEquals("Scale up: " + tester.cluster(app1, cluster1).autoscalingStatus(),
+ 1,
+ tester.cluster(app1, cluster1).lastScalingEvent().get().generation());
+
+ // fast completion, with initially overloaded cpu
+ tester.addMeasurements(3.0f, 0.3f, 0.3f, 1, 1, app1);
+ tester.addMeasurements(0.2f, 0.3f, 0.3f, 1, 1, app1);
+ tester.maintainer().maintain();
+ assertEquals("No autoscaling since we ignore the (first) data point in the warup period",
+ 1,
+ tester.cluster(app1, cluster1).lastScalingEvent().get().generation());
+ }
+
+ @Test
public void test_cd_autoscaling_test() {
ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1");
ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec();
@@ -240,7 +263,7 @@ public class AutoscalingMaintainerTest {
clock.advance(completionTime);
float load = down ? 0.1f : 1.0f;
- tester.addMeasurements(load, load, load, generation, 200, application);
+ tester.addMeasurements(load, load, load, generation, 1, application);
tester.maintainer().maintain();
assertEvent("Measured completion of the last scaling event, but no new autoscaling yet",
generation, Optional.of(clock.instant()),
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
index 021ca4bdf64..e1a1a2af5fb 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
@@ -71,8 +71,9 @@ public class AutoscalingMaintainerTester {
return provisioningTester.deploy(application, cluster, capacity);
}
- public void addMeasurements(float cpu, float mem, float disk, long generation, int count, ApplicationId applicationId) {
+ public Duration addMeasurements(float cpu, float mem, float disk, long generation, int count, ApplicationId applicationId) {
NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
+ Instant startTime = clock().instant();
for (int i = 0; i < count; i++) {
for (Node node : nodes)
nodeRepository().metricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
@@ -82,7 +83,9 @@ public class AutoscalingMaintainerTester {
true,
true,
0.0))));
+ clock().advance(Duration.ofSeconds(150));
}
+ return Duration.between(startTime, clock().instant());
}
/** Creates the given number of measurements, spaced 5 minutes between, using the given function */
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
index 03b41412896..4c0395a0c7e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
@@ -13,6 +13,7 @@ import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
import com.yahoo.config.provisioning.FlavorsConfig;
+import com.yahoo.test.ManualClock;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -24,6 +25,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import org.junit.Test;
import java.time.Duration;
+import java.time.Instant;
import java.util.List;
import java.util.Optional;
@@ -41,14 +43,13 @@ public class ScalingSuggestionsMaintainerTest {
@Test
public void testScalingSuggestionsMaintainer() {
- ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east3"))).flavorsConfig(flavorsConfig()).build();
-
+ ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east3")))
+ .flavorsConfig(flavorsConfig())
+ .build();
ApplicationId app1 = ProvisioningTester.applicationId("app1");
- ClusterSpec cluster1 = ProvisioningTester.containerClusterSpec();
-
ApplicationId app2 = ProvisioningTester.applicationId("app2");
+ ClusterSpec cluster1 = ProvisioningTester.containerClusterSpec();
ClusterSpec cluster2 = ProvisioningTester.contentClusterSpec();
-
tester.makeReadyNodes(20, "flt", NodeType.host, 8);
tester.activateTenantHosts();
@@ -60,7 +61,8 @@ public class ScalingSuggestionsMaintainerTest {
false, true));
tester.clock().advance(Duration.ofHours(13));
- addMeasurements(0.90f, 0.90f, 0.90f, 0, 500, app1, tester.nodeRepository());
+ Duration timeAdded = addMeasurements(0.90f, 0.90f, 0.90f, 0, 500, app1, tester.nodeRepository());
+ tester.clock().advance(timeAdded.negated());
addMeasurements(0.99f, 0.99f, 0.99f, 0, 500, app2, tester.nodeRepository());
ScalingSuggestionsMaintainer maintainer = new ScalingSuggestionsMaintainer(tester.nodeRepository(),
@@ -114,10 +116,11 @@ public class ScalingSuggestionsMaintainerTest {
.shouldSuggestResources(currentResources);
}
- public void addMeasurements(float cpu, float memory, float disk, int generation, int count,
- ApplicationId applicationId,
- NodeRepository nodeRepository) {
+ public Duration addMeasurements(float cpu, float memory, float disk, int generation, int count,
+ ApplicationId applicationId,
+ NodeRepository nodeRepository) {
NodeList nodes = nodeRepository.nodes().list(Node.State.active).owner(applicationId);
+ Instant startTime = nodeRepository.clock().instant();
for (int i = 0; i < count; i++) {
for (Node node : nodes)
nodeRepository.metricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
@@ -127,7 +130,9 @@ public class ScalingSuggestionsMaintainerTest {
true,
true,
0.0))));
+ ((ManualClock)nodeRepository.clock()).advance(Duration.ofSeconds(150));
}
+ return Duration.between(startTime, nodeRepository.clock().instant());
}
private FlavorsConfig flavorsConfig() {