diff options
18 files changed, 208 insertions, 191 deletions
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Cluster.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Cluster.java index b500cd1c133..08f975fbc29 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Cluster.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Cluster.java @@ -82,35 +82,47 @@ public class Cluster { public static class Utilization { - private final double cpu, idealCpu, currentCpu, memory, idealMemory, currentMemory, disk, idealDisk, currentDisk; + private final double cpu, idealCpu, currentCpu, peakCpu; + private final double memory, idealMemory, currentMemory, peakMemory; + private final double disk, idealDisk, currentDisk, peakDisk; - public Utilization(double cpu, double idealCpu, double currentCpu, - double memory, double idealMemory, double currentMemory, - double disk, double idealDisk, double currentDisk) { + public Utilization(double cpu, double idealCpu, double currentCpu, double peakCpu, + double memory, double idealMemory, double currentMemory, double peakMemory, + double disk, double idealDisk, double currentDisk, double peakDisk) { this.cpu = cpu; this.idealCpu = idealCpu; this.currentCpu = currentCpu; + this.peakCpu = peakCpu; + this.memory = memory; this.idealMemory = idealMemory; this.currentMemory = currentMemory; + this.peakMemory = peakMemory; + this.disk = disk; this.idealDisk = idealDisk; this.currentDisk = currentDisk; + this.peakDisk = peakDisk; } public double cpu() { return cpu; } public double idealCpu() { return idealCpu; } public double currentCpu() { return currentCpu; } + public double peakCpu() { return peakCpu; } public double memory() { return memory; } public double idealMemory() { return idealMemory; } public double currentMemory() { return currentMemory; } + public double peakMemory() { return peakMemory; } public double disk() { return disk; } public double idealDisk() { return idealDisk; } public double currentDisk() { return currentDisk; } + public double peakDisk() { return peakDisk; } - public static Utilization empty() { return new Utilization(0, 0, 0, 0, 0, 0, 0, 0, 0); } + public static Utilization empty() { return new Utilization(0, 0, 0, 0, 0, + 0, 0, 0, + 0, 0, 0, 0); } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/ClusterUtilizationData.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/ClusterUtilizationData.java index 803daf6a8c6..6632ba47b7b 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/ClusterUtilizationData.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/ClusterUtilizationData.java @@ -21,6 +21,8 @@ public class ClusterUtilizationData { public Double idealCpu; @JsonProperty("currentCpu") public Double currentCpu; + @JsonProperty("peakCpu") + public Double peakCpu; @JsonProperty("memory") public Double memory; @@ -28,6 +30,8 @@ public class ClusterUtilizationData { public Double idealMemory; @JsonProperty("currentMemory") public Double currentMemory; + @JsonProperty("peakMemory") + public Double peakMemory; @JsonProperty("disk") public Double disk; @@ -35,9 +39,13 @@ public class ClusterUtilizationData { public Double idealDisk; @JsonProperty("currentDisk") public Double currentDisk; + @JsonProperty("peakDisk") + public Double peakDisk; public Cluster.Utilization toClusterUtilization() { - return new Cluster.Utilization(cpu, idealCpu, currentCpu, memory, idealMemory, currentMemory, disk, idealDisk, currentDisk); + return new Cluster.Utilization(cpu, idealCpu, currentCpu, peakCpu, + memory, idealMemory, currentMemory, peakMemory, + disk, idealDisk, currentDisk, peakDisk); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index 8d2fac84bc0..e79aafbc783 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -2622,12 +2622,17 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { utilizationObject.setDouble("cpu", utilization.cpu()); utilizationObject.setDouble("idealCpu", utilization.idealCpu()); utilizationObject.setDouble("currentCpu", utilization.currentCpu()); + utilizationObject.setDouble("peakCpu", utilization.peakCpu()); + utilizationObject.setDouble("memory", utilization.memory()); utilizationObject.setDouble("idealMemory", utilization.idealMemory()); utilizationObject.setDouble("currentMemory", utilization.currentMemory()); + utilizationObject.setDouble("peakMemory", utilization.peakMemory()); + utilizationObject.setDouble("disk", utilization.disk()); utilizationObject.setDouble("idealDisk", utilization.idealDisk()); utilizationObject.setDouble("currentDisk", utilization.currentDisk()); + utilizationObject.setDouble("peakDisk", utilization.peakDisk()); } private void scalingEventsToSlime(List<Cluster.ScalingEvent> scalingEvents, Cursor scalingEventsArray) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java index 35a12f4b6d4..d7f83979054 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java @@ -115,7 +115,9 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer current, Optional.of(new ClusterResources(2, 1, new NodeResources(3, 8, 50, 1, slow, remote))), Optional.empty(), - new Cluster.Utilization(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9), + new Cluster.Utilization(0.1, 0.2, 0.3, 0.35, + 0.4, 0.5, 0.6, 0.65, + 0.7, 0.8, 0.9, 1.0), List.of(new Cluster.ScalingEvent(new ClusterResources(0, 0, NodeResources.unspecified()), current, Instant.ofEpochMilli(1234), diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-clusters.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-clusters.json index 3eff2ab781a..137ea64eba7 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-clusters.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-clusters.json @@ -58,12 +58,15 @@ "cpu": 0.1, "idealCpu": 0.2, "currentCpu": 0.3, + "peakCpu": 0.35, "memory": 0.4, "idealMemory": 0.5, "currentMemory": 0.6, + "peakMemory": 0.65, "disk": 0.7, "idealDisk": 0.8, - "currentDisk": 0.9 + "currentDisk": 0.9, + "peakDisk": 1.0 }, "scalingEvents": [ { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/AutoscalingStatus.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/AutoscalingStatus.java index 71a6d661594..ea15b6a42cb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/AutoscalingStatus.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/AutoscalingStatus.java @@ -46,9 +46,8 @@ public class AutoscalingStatus { @Override public boolean equals(Object o) { if (o == this) return true; - if ( ! ( o instanceof AutoscalingStatus)) return false; + if ( ! ( o instanceof AutoscalingStatus other)) return false; - AutoscalingStatus other = (AutoscalingStatus)o; if ( other.status != this.status ) return false; if ( ! other.description.equals(this.description) ) return false; return true; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 7a02fa9eb7e..4d50250e61a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ClusterResources; -import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -12,7 +11,6 @@ import com.yahoo.vespa.hosted.provision.applications.AutoscalingStatus.Status; import com.yahoo.vespa.hosted.provision.applications.Cluster; import java.time.Duration; -import java.time.Instant; import java.util.Objects; import java.util.Optional; @@ -70,24 +68,6 @@ public class Autoscaler { if ( ! clusterIsStable(clusterNodes, nodeRepository)) return Advice.none(Status.waiting, "Cluster change in progress"); - if (scaledIn(clusterModel.scalingDuration(), cluster)) - return Advice.dontScale(Status.waiting, - "Won't autoscale now: Less than " + clusterModel.scalingDuration() + - " since last resource change"); - - if (clusterModel.nodeTimeseries().measurementsPerNode() < minimumMeasurementsPerNode(clusterModel.scalingDuration())) - return Advice.none(Status.waiting, - "Collecting more data before making new scaling decisions: Need to measure for " + - clusterModel.scalingDuration() + " since the last resource change completed, " + - clusterModel.nodeTimeseries().measurementsPerNode() + " measurements per node found," + - " need at least " + minimumMeasurementsPerNode(clusterModel.scalingDuration())); - - if (clusterModel.nodeTimeseries().nodesMeasured() != clusterNodes.size()) - return Advice.none(Status.waiting, - "Collecting more data before making new scaling decisions:" + - " Have measurements from " + clusterModel.nodeTimeseries().nodesMeasured() + - " nodes, but require from " + clusterNodes.size()); - var currentAllocation = new AllocatableClusterResources(clusterNodes.asList(), nodeRepository); Optional<AllocatableClusterResources> bestAllocation = allocationOptimizer.findBestAllocation(clusterModel.loadAdjustment(), currentAllocation, clusterModel, limits); @@ -101,11 +81,6 @@ public class Autoscaler { return Advice.dontScale(Status.ideal, "Cluster is ideally scaled"); } - if (isDownscaling(bestAllocation.get(), currentAllocation) && scaledIn(clusterModel.scalingDuration().multipliedBy(3), cluster)) - return Advice.dontScale(Status.waiting, - "Waiting " + clusterModel.scalingDuration().multipliedBy(3) + - " since the last change before reducing resources"); - return Advice.scaleTo(bestAllocation.get().advertisedResources()); } @@ -134,7 +109,7 @@ public class Autoscaler { return ! similar(from.cost(), to.cost(), costDifferenceWorthReallocation); } - private static boolean meaningfulIncrease(double from, double to) { + public static boolean meaningfulIncrease(double from, double to) { return from < to && ! similar(from, to, resourceDifferenceWorthReallocation); } @@ -142,33 +117,10 @@ public class Autoscaler { return Math.abs(r1 - r2) / (( r1 + r2) / 2) < threshold; } - /** Returns true if this reduces total resources in any dimension */ - private boolean isDownscaling(AllocatableClusterResources target, AllocatableClusterResources current) { - NodeResources targetTotal = target.advertisedResources().totalResources(); - NodeResources currentTotal = current.advertisedResources().totalResources(); - return ! targetTotal.justNumbers().satisfies(currentTotal.justNumbers()); - } - - private boolean scaledIn(Duration delay, Cluster cluster) { - return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN) - .isAfter(nodeRepository.clock().instant().minus(delay)); - } - static Duration maxScalingWindow() { return Duration.ofHours(48); } - /** Returns the minimum measurements per node (average) we require to give autoscaling advice.*/ - private int minimumMeasurementsPerNode(Duration scalingWindow) { - // Measurements are ideally taken every minute, but no guarantees - // (network, nodes may be down, collecting is single threaded and may take longer than 1 minute to complete). - // Since the metric window is 5 minutes, we won't really improve from measuring more often. - long minimumMeasurements = scalingWindow.toMinutes() / 5; - minimumMeasurements = Math.round(0.8 * minimumMeasurements); // Allow 20% metrics collection blackout - if (minimumMeasurements < 1) minimumMeasurements = 1; - return (int)minimumMeasurements; - } - public static class Advice { private final boolean present; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index ae18e7ffb91..e6f2a1216f3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -9,6 +9,7 @@ import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import java.time.Clock; import java.time.Duration; +import java.time.Instant; import java.util.Optional; import java.util.OptionalDouble; import java.util.logging.Level; @@ -93,16 +94,25 @@ public class ClusterModel { /** Returns the relative load adjustment that should be made to this cluster given available measurements. */ public Load loadAdjustment() { - if (nodeTimeseries().measurementsPerNode() == 0) return Load.one(); // No info, no change - /* - // Should we scale up? - Load relativePeak = nodeTimeseries().peakLoad().divide(idealLoad()); - if (relativePeak.any(v -> v > 1)) - return relativePeak.max(Load.one()); // Don't downscale any dimension if we upscale - - // Should we scale down? - */ - return averageLoad().divide(idealLoad()); + if (nodeTimeseries().isEmpty()) return Load.one(); + + Load adjustment = peakLoad().divide(idealLoad()); + if (! safeToScaleDown()) + adjustment = adjustment.map(v -> v < 1 ? 1 : v); + return adjustment; + } + + /** Are we in a position to make decisions to scale down at this point? */ + private boolean safeToScaleDown() { + if (hasScaledIn(scalingDuration().multipliedBy(3))) return false; + if (nodeTimeseries().measurementsPerNode() < 4) return false; + if (nodeTimeseries().nodesMeasured() != nodeCount()) return false; + return true; + } + + private boolean hasScaledIn(Duration period) { + return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN) + .isAfter(clock.instant().minus(period)); } /** Returns the predicted duration of a rescaling of this cluster */ @@ -127,11 +137,14 @@ public class ClusterModel { return queryFractionOfMax = clusterTimeseries().queryFractionOfMax(scalingDuration(), clock); } - /** Returns average of the last load reading from each node. */ + /** Returns the average of the last load measurement from each node. */ public Load currentLoad() { return nodeTimeseries().currentLoad(); } - /** Returns average load during the last {@link #scalingDuration()} */ - public Load averageLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(scalingDuration())); } + /** Returns the average of all load measurements from all nodes*/ + public Load averageLoad() { return nodeTimeseries().averageLoad(); } + + /** Returns the average of the peak load measurement in each dimension, from each node. */ + public Load peakLoad() { return nodeTimeseries().peakLoad(); } /** The number of nodes this cluster has, or will have if not deployed yet. */ // TODO: Make this the deployed, not current count diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java index ab5be045dd4..2eb57dcdd87 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java @@ -48,6 +48,10 @@ public class ClusterNodesTimeseries { this.timeseries = timeseries; } + public boolean isEmpty() { + return measurementsPerNode() == 0; + } + /** Returns the average number of measurements per node */ public int measurementsPerNode() { if (clusterNodes.size() == 0) return 0; @@ -59,12 +63,11 @@ public class ClusterNodesTimeseries { public int nodesMeasured() { return timeseries.size(); } /** Returns the average load after the given instant */ - public Load averageLoad(Instant start) { + public Load averageLoad() { Load total = Load.zero(); int count = 0; for (var nodeTimeseries : timeseries) { for (var snapshot : nodeTimeseries.asList()) { - if (snapshot.at().isBefore(start)) continue; total = total.add(snapshot.load()); count++; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java index 88c7e70cd35..6ab5ff731d3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java @@ -54,11 +54,6 @@ public class Load { return new Load(divide(cpu, resources.vcpu()), divide(memory, resources.memoryGb()), divide(disk, resources.diskGb())); } - /** Returns the load having the max value of this and the given load in each dimension. */ - public Load max(Load other) { - return join(other, (a, b) -> Math.max(a, b)); - } - /** Returns the load where the given function is applied to each dimension of this. */ public Load map(DoubleUnaryOperator f) { return new Load(f.applyAsDouble(cpu), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java index 1c10de8498a..3615b9afa97 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java @@ -90,16 +90,22 @@ public class ApplicationSerializer { Load idealLoad = clusterModel.idealLoad(); Load averageLoad = clusterModel.averageLoad(); Load currentLoad = clusterModel.currentLoad(); + Load peakLoad = clusterModel.peakLoad(); utilizationObject.setDouble("cpu", averageLoad.cpu()); utilizationObject.setDouble("idealCpu", idealLoad.cpu()); utilizationObject.setDouble("currentCpu", currentLoad.cpu()); + utilizationObject.setDouble("peakCpu", peakLoad.cpu()); + utilizationObject.setDouble("memory", averageLoad.memory()); utilizationObject.setDouble("idealMemory", idealLoad.memory()); utilizationObject.setDouble("currentMemory", currentLoad.memory()); + utilizationObject.setDouble("peakMemory", peakLoad.memory()); + utilizationObject.setDouble("disk", averageLoad.disk()); utilizationObject.setDouble("idealDisk", idealLoad.disk()); utilizationObject.setDouble("currentDisk", currentLoad.disk()); + utilizationObject.setDouble("peakDisk", peakLoad.disk()); } private static void scalingEventsToSlime(List<ScalingEvent> scalingEvents, Cursor scalingEventsArray) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 28f37546eb6..e4389e84255 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -19,7 +19,6 @@ import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; -import org.junit.Ignore; import org.junit.Test; import java.time.Duration; @@ -37,16 +36,9 @@ public class AutoscalingTest { public void test_autoscaling_single_content_group() { var fixture = AutoscalingTester.fixture().build(); - fixture.tester().clock().advance(Duration.ofDays(1)); - assertTrue("No measurements -> No change", fixture.autoscale().isEmpty()); - - fixture.loader().applyCpuLoad(0.7f, 59); - assertTrue("Too few measurements -> No change", fixture.autoscale().isEmpty()); - - fixture.tester().clock().advance(Duration.ofDays(1)); - fixture.loader().applyCpuLoad(0.7f, 120); + fixture.loader().applyCpuLoad(0.7f, 10); ClusterResources scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high", - 9, 1, 2.8, 5.0, 50.0, + 8, 1, 6, 5.7, 57.1, fixture.autoscale()); fixture.deploy(Capacity.from(scaledResources)); @@ -54,17 +46,16 @@ public class AutoscalingTest { fixture.deactivateRetired(Capacity.from(scaledResources)); - fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyCpuLoad(0.8f, 3); - assertTrue("Load change is large, but insufficient measurements for new config -> No change", - fixture.autoscale().isEmpty()); - - fixture.loader().applyCpuLoad(0.19f, 100); + fixture.loader().applyCpuLoad(0.19f, 10); assertEquals("Load change is small -> No change", Optional.empty(), fixture.autoscale().target()); - fixture.loader().applyCpuLoad(0.1f, 120); + fixture.loader().applyCpuLoad(0.1f, 10); + assertEquals("Too little time passed for downscaling -> No change", Optional.empty(), fixture.autoscale().target()); + + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.loader().applyCpuLoad(0.1f, 10); fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly", - 9, 1, 1.0, 5.0, 50.0, + 9, 1, 1.3, 4.4, 43.7, fixture.autoscale()); } @@ -72,18 +63,48 @@ public class AutoscalingTest { @Test public void test_no_autoscaling_with_no_measurements() { var fixture = AutoscalingTester.fixture().build(); - System.out.println(fixture.autoscale()); assertTrue(fixture.autoscale().target().isEmpty()); } /** Using too many resources for a short period is proof we should scale up regardless of the time that takes. */ @Test - @Ignore // TODO public void test_autoscaling_up_is_fast() { var fixture = AutoscalingTester.fixture().build(); - fixture.loader().applyLoad(1.0, 1.0, 1.0, 1); + fixture.loader().applyLoad(new Load(1.0, 1.0, 1.0), 3); fixture.tester().assertResources("Scaling up since resource usage is too high", - 10, 1, 9.4, 8.5, 92.6, + 10, 1, 7.2, 8.5, 92.6, + fixture.autoscale()); + } + + /** When scaling up, disregard underutilized dimensions (memory here) */ + @Test + public void test_only_autoscaling_up_quickly() { + var fixture = AutoscalingTester.fixture().build(); + fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10); + fixture.tester().assertResources("Scaling up (only) since resource usage is too high", + 10, 1, 8.6, 4.4, 92.6, + fixture.autoscale()); + } + + /** When ok to scale down, scale in both directions simultaneously (compare to test_only_autoscaling_up_quickly) */ + @Test + public void test_scale_in_both_directions_when_ok_to_scale_down() { + var fixture = AutoscalingTester.fixture().build(); + fixture.tester.clock().advance(Duration.ofDays(2)); + fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10); + fixture.tester().assertResources("Scaling up (only) since resource usage is too high", + 10, 1, 8.6, 4.0, 92.6, + fixture.autoscale()); + } + + @Test + public void test_autoscaling_uses_peak() { + var fixture = AutoscalingTester.fixture().build(); + fixture.loader().applyCpuLoad(0.01, 100); + fixture.loader().applyCpuLoad(0.70, 1); + fixture.loader().applyCpuLoad(0.01, 100); + fixture.tester().assertResources("Scaling up since peak resource usage is too high", + 8, 1, 6.5, 5.7, 57.1, fixture.autoscale()); } @@ -91,14 +112,16 @@ public class AutoscalingTest { @Test public void test_autoscaling_single_container_group() { var fixture = AutoscalingTester.fixture().clusterType(ClusterSpec.Type.container).build(); + fixture.loader().applyCpuLoad(0.25f, 120); - ClusterResources scaledResources = fixture.tester().assertResources("Scaling up since cpu usage is too high", - 5, 1, 3.8, 8.0, 50.5, + ClusterResources scaledResources = fixture.tester().assertResources("Scaling cpu up", + 5, 1, 3.8, 8.0, 80.0, fixture.autoscale()); fixture.deploy(Capacity.from(scaledResources)); + fixture.deactivateRetired(Capacity.from(scaledResources)); fixture.loader().applyCpuLoad(0.1f, 120); fixture.tester().assertResources("Scaling down since cpu usage has gone down", - 4, 1, 2.5, 6.4, 25.5, + 4, 1, 2.5, 6.4, 64.0, fixture.autoscale()); } @@ -124,7 +147,7 @@ public class AutoscalingTest { new NodeResources(100, 1000, 1000, 1, DiskSpeed.any)); var capacity = Capacity.from(min, max); ClusterResources scaledResources = fixture.tester().assertResources("Scaling up", - 14, 1, 1.4, 30.8, 30.8, + 13, 1, 1.5, 26.7, 26.7, fixture.autoscale(capacity)); assertEquals("Disk speed from new capacity is used", DiskSpeed.any, scaledResources.nodeResources().diskSpeed()); @@ -165,7 +188,7 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)).build(); fixture.tester().clock().advance(Duration.ofDays(1)); - fixture.loader().applyLoad(0.25, 0.95, 0.95, 120); + fixture.loader().applyLoad(new Load(0.25, 0.95, 0.95), 120); fixture.tester().assertResources("Scaling up to limit since resource usage is too high", 6, 1, 2.4, 78.0, 79.0, fixture.autoscale()); @@ -179,7 +202,7 @@ public class AutoscalingTest { // deploy fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyLoad(0.05f, 0.05f, 0.05f, 120); + fixture.loader().applyLoad(new Load(0.05f, 0.05f, 0.05f), 120); fixture.tester().assertResources("Scaling down to limit since resource usage is low", 4, 1, 1.8, 7.4, 13.9, fixture.autoscale()); @@ -201,7 +224,7 @@ public class AutoscalingTest { 2, 1, defaultResources, fixture.nodes().toResources()); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyLoad(0.25, 0.95, 0.95, 120); + fixture.loader().applyLoad(new Load(0.25, 0.95, 0.95), 120); fixture.tester().assertResources("Scaling up", 5, 1, defaultResources.vcpu(), defaultResources.memoryGb(), defaultResources.diskGb(), @@ -218,9 +241,9 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyCpuLoad(0.3, 240); - fixture.tester().assertResources("Scaling up", - 6, 6, 3.8, 8.0, 10.0, + fixture.loader().applyCpuLoad(0.4, 240); + fixture.tester().assertResources("Scaling cpu up", + 6, 6, 5.0, 8.0, 10.0, fixture.autoscale()); } @@ -249,7 +272,7 @@ public class AutoscalingTest { .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyLoad(0.01, 0.01, 0.01, 120); + fixture.loader().applyLoad(new Load(0.01, 0.01, 0.01), 120); Autoscaler.Advice suggestion = fixture.suggest(); fixture.tester().assertResources("Choosing the remote disk flavor as it has less disk", 2, 1, 3.0, 100.0, 10.0, @@ -272,7 +295,7 @@ public class AutoscalingTest { .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyLoad(0.01, 0.01, 0.01, 120); + fixture.loader().applyLoad(new Load(0.01, 0.01, 0.01), 120); Autoscaler.Advice suggestion = fixture.suggest(); fixture.tester().assertResources("Always prefers local disk for content", 2, 1, 3.0, 100.0, 75.0, @@ -288,7 +311,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(1.0, 120); fixture.tester().assertResources("Suggesting above capacity limit", - 8, 1, 9.3, 5.7, 57.1, + 8, 1, 9.3, 4.6, 45.7, fixture.tester().suggest(fixture.applicationId, fixture.clusterSpec.id(), min, min)); } @@ -296,18 +319,18 @@ public class AutoscalingTest { public void not_using_out_of_service_measurements() { var fixture = AutoscalingTester.fixture().build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyLoad(0.9, 0.6, 0.7, 1, false, true, 120); + fixture.loader().applyLoad(new Load(0.9, 0.6, 0.7), 1, false, true, 120); assertTrue("Not scaling up since nodes were measured while cluster was out of service", - fixture.autoscale().isEmpty()); + fixture.autoscale().target().isEmpty()); } @Test public void not_using_unstable_measurements() { var fixture = AutoscalingTester.fixture().build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyLoad(0.9, 0.6, 0.7, 1, true, false, 120); - assertTrue("Not scaling up since nodes were measured while cluster was out of service", - fixture.autoscale().isEmpty()); + fixture.loader().applyLoad(new Load(0.9, 0.6, 0.7), 1, true, false, 120); + assertTrue("Not scaling up since nodes were measured while cluster was unstable", + fixture.autoscale().target().isEmpty()); } @Test @@ -321,7 +344,7 @@ public class AutoscalingTest { .build(); fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.9, 120); - fixture.tester().assertResources("Scaling the number of groups, but nothing requires us to stay with 1 node per group", + fixture.tester().assertResources("Scaling up to 2 nodes, scaling memory and disk down at the same time", 10, 5, 7.7, 40.0, 40.0, fixture.autoscale()); } @@ -339,7 +362,7 @@ public class AutoscalingTest { Duration timePassed = fixture.loader().addCpuMeasurements(0.25, 120); fixture.tester().clock().advance(timePassed.negated()); fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 1.0); - fixture.tester().assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper", + fixture.tester().assertResources("Scaling up cpu, others down, changing to 1 group is cheaper", 10, 1, 2.3, 27.8, 27.8, fixture.autoscale()); } @@ -375,7 +398,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(1)); fixture.loader().applyMemLoad(1.0, 1000); fixture.tester().assertResources("Increase group size to reduce memory load", - 8, 2, 6.5, 96.2, 62.5, + 8, 2, 4.5, 96.2, 62.5, fixture.autoscale()); } @@ -389,21 +412,21 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyMemLoad(0.02, 120); + fixture.loader().applyLoad(new Load(0.16, 0.02, 0.5), 120); fixture.tester().assertResources("Scaling down", - 6, 1, 3.1, 4.0, 100.0, + 6, 1, 3.0, 4.0, 100.0, fixture.autoscale()); } @Test public void scaling_down_only_after_delay() { var fixture = AutoscalingTester.fixture().build(); - fixture.loader().applyMemLoad(0.02, 120); + fixture.loader().applyCpuLoad(0.02, 120); assertTrue("Too soon after initial deployment", fixture.autoscale().target().isEmpty()); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyMemLoad(0.02, 120); + fixture.loader().applyCpuLoad(0.02, 120); fixture.tester().assertResources("Scaling down since enough time has passed", - 6, 1, 1.2, 4.0, 80.0, + 5, 1, 1.0, 8.0, 80.0, fixture.autoscale()); } @@ -413,7 +436,7 @@ public class AutoscalingTest { var fixture = AutoscalingTester.fixture() .resourceCalculator(new OnlySubtractingWhenForecastingCalculator(0)) .build(); - fixture.loader().applyLoad(1.0, 1.0, 0.7, 1000); + fixture.loader().applyLoad(new Load(1.0, 1.0, 0.7), 1000); fixture.tester().assertResources("Scaling up", 9, 1, 5.0, 9.6, 72.9, fixture.autoscale()); @@ -423,7 +446,7 @@ public class AutoscalingTest { var fixture = AutoscalingTester.fixture() .resourceCalculator(new OnlySubtractingWhenForecastingCalculator(3)) .build(); - fixture.loader().applyLoad(1.0, 1.0, 0.7, 1000); + fixture.loader().applyLoad(new Load(1.0, 1.0, 0.7), 1000); fixture.tester().assertResources("With 3Gb memory tax, we scale up memory more", 7, 1, 6.4, 15.8, 97.2, fixture.autoscale()); @@ -448,7 +471,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyMemLoad(0.9, 120); var scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high.", - 8, 1, 3, 80, 57.1, + 8, 1, 3, 80, 45.7, fixture.autoscale()); fixture.deploy(Capacity.from(scaledResources)); fixture.deactivateRetired(Capacity.from(scaledResources)); @@ -456,7 +479,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyMemLoad(0.3, 1000); fixture.tester().assertResources("Scaling down since resource usage has gone down", - 5, 1, 3, 80, 100.0, + 5, 1, 3, 80, 70.0, fixture.autoscale()); } @@ -467,22 +490,25 @@ public class AutoscalingTest { var fixture = AutoscalingTester.fixture() .capacity(Capacity.from(min, max)) .build(); + fixture.tester.clock().advance(Duration.ofDays(1)); fixture.loader().applyCpuLoad(0.25, 120); - // (no read share stored) fixture.tester().assertResources("Advice to scale up since we set aside for bcp by default", 7, 1, 3, 100, 100, fixture.autoscale()); + fixture.loader().applyCpuLoad(0.25, 120); fixture.storeReadShare(0.25, 0.5); fixture.tester().assertResources("Half of global share is the same as the default assumption used above", 7, 1, 3, 100, 100, fixture.autoscale()); + fixture.tester.clock().advance(Duration.ofDays(1)); + fixture.loader().applyCpuLoad(0.25, 120); fixture.storeReadShare(0.5, 0.5); fixture.tester().assertResources("Advice to scale down since we don't need room for bcp", - 6, 1, 3, 100, 100, + 5, 1, 3, 100, 100, fixture.autoscale()); } @@ -496,7 +522,7 @@ public class AutoscalingTest { fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data", - 9, 1, 2.1, 5, 50, + 9, 1, 2.1, 4, 40, fixture.autoscale()); fixture.setScalingDuration(Duration.ofMinutes(5)); @@ -505,7 +531,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale down since observed growth is slower than scaling time", - 9, 1, 1.8, 5, 50, + 9, 1, 1.8, 4, 40, fixture.autoscale()); fixture.setScalingDuration(Duration.ofMinutes(60)); @@ -516,7 +542,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale up since observed growth is faster than scaling time", - 9, 1, 2.1, 5, 50, + 9, 1, 2.1, 4, 40, fixture.autoscale()); } @@ -534,7 +560,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester.assertResources("Query and write load is equal -> scale up somewhat", - 9, 1, 2.4, 5, 50, + 9, 1, 2.4, 4, 40, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -543,7 +569,7 @@ public class AutoscalingTest { fixture.loader().addCpuMeasurements(0.4, 200); // TODO: Ackhually, we scale down here - why? fixture.tester().assertResources("Query load is 4x write load -> scale up more", - 9, 1, 2.1, 5, 50, + 9, 1, 2.1, 4, 40, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -551,7 +577,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Write load is 10x query load -> scale down", - 9, 1, 1.1, 5, 50, + 9, 1, 1.1, 4, 40, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -559,7 +585,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Query only -> largest possible", - 8, 1, 4.9, 5.7, 57.1, + 8, 1, 4.6, 4.6, 45.7, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -567,7 +593,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Write only -> smallest possible", - 6, 1, 1.0, 8, 80, + 6, 1, 1.0, 6.4, 64, fixture.autoscale()); } @@ -577,7 +603,7 @@ public class AutoscalingTest { .zone(new Zone(Environment.dev, RegionName.from("us-east"))) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyLoad(1.0, 1.0, 1.0, 200); + fixture.loader().applyLoad(new Load(1.0, 1.0, 1.0), 200); assertTrue("Not attempting to scale up because policies dictate we'll only get one node", fixture.autoscale().target().isEmpty()); } @@ -598,7 +624,7 @@ public class AutoscalingTest { .zone(new Zone(Environment.dev, RegionName.from("us-east"))) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyLoad(1.0, 1.0, 1.0, 200); + fixture.loader().applyLoad(new Load(1.0, 1.0, 1.0), 200); fixture.tester().assertResources("We scale even in dev because resources are required", 3, 1, 1.0, 7.7, 83.3, fixture.autoscale()); @@ -617,7 +643,7 @@ public class AutoscalingTest { .zone(new Zone(Environment.dev, RegionName.from("us-east"))) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyLoad(1.0, 1.0, 1.0, 200); + fixture.loader().applyLoad(new Load(1.0, 1.0, 1.0), 200); fixture.tester().assertResources("We scale even in dev because resources are required", 3, 1, 1.5, 8, 50, fixture.autoscale()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java index c0203f5f202..7aaaceb0fdd 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java @@ -19,6 +19,7 @@ import java.util.function.IntFunction; public class Loader { private final Fixture fixture; + private final Duration samplingInterval = Duration.ofSeconds(150L); public Loader(Fixture fixture) { this.fixture = fixture; @@ -33,16 +34,14 @@ public class Loader { * @param count the number of measurements */ public Duration addCpuMeasurements(double value, int count) { - var idealLoad = fixture.clusterModel().idealLoad(); // TODO: Use this + var idealLoad = fixture.clusterModel().idealLoad(); NodeList nodes = fixture.nodes(); float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); + Load load = new Load(value, idealLoad.memory(), idealLoad.disk()).multiply(oneExtraNodeFactor); Instant initialTime = fixture.tester().clock().instant(); for (int i = 0; i < count; i++) { - fixture.tester().clock().advance(Duration.ofSeconds(150)); + fixture.tester().clock().advance(samplingInterval); for (Node node : nodes) { - Load load = new Load(value, - ClusterModel.idealMemoryLoad, - ClusterModel.idealContentDiskLoad).multiply(oneExtraNodeFactor); fixture.tester().nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(fixture.tester().clock().instant(), load, @@ -63,23 +62,21 @@ public class Loader { Map.of(fixture.clusterId(), new ClusterMetricSnapshot(fixture.tester().clock().instant(), queryRate.apply(i), writeRate.apply(i)))); - fixture.tester().clock().advance(Duration.ofMinutes(5)); + fixture.tester().clock().advance(samplingInterval); } return Duration.between(initialTime, fixture.tester().clock().instant()); } public void applyCpuLoad(double cpuLoad, int measurements) { - Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements addCpuMeasurements((float)cpuLoad, measurements); fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); - addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + addQueryRateMeasurements(measurements, t -> t == 0 ? 20.0 : 10.0); // Query traffic only } public void applyMemLoad(double memLoad, int measurements) { - Duration samplingInterval = Duration.ofSeconds(150L); // in addMemMeasurements addMemMeasurements(memLoad, measurements); fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); - addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + addQueryRateMeasurements(measurements, t -> t == 0 ? 20.0 : 10.0); // Query traffic only } /** @@ -89,15 +86,13 @@ public class Loader { * wanting to see the ideal load with one node missing.) */ public void addMemMeasurements(double value, int count) { - var idealLoad = fixture.clusterModel().idealLoad(); // TODO: Use this + var idealLoad = fixture.clusterModel().idealLoad(); NodeList nodes = fixture.nodes(); float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); + Load load = new Load(idealLoad.cpu(), value, idealLoad.disk()).multiply(oneExtraNodeFactor); for (int i = 0; i < count; i++) { - fixture.tester().clock().advance(Duration.ofMinutes(1)); + fixture.tester().clock().advance(samplingInterval); for (Node node : nodes) { - Load load = new Load(0.2, - value, - ClusterModel.idealContentDiskLoad).multiply(oneExtraNodeFactor); fixture.tester().nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(fixture.tester().clock().instant(), load, @@ -109,19 +104,18 @@ public class Loader { } } - public Duration addMeasurements(double cpu, double memory, double disk, int count) { - return addMeasurements(cpu, memory, disk, 0, true, true, count); + public Duration addMeasurements(Load load, int count) { + return addMeasurements(load, 0, true, true, count); } - public Duration addMeasurements(double cpu, double memory, double disk, int generation, boolean inService, boolean stable, - int count) { + public Duration addMeasurements(Load load, int generation, boolean inService, boolean stable, int count) { Instant initialTime = fixture.tester().clock().instant(); for (int i = 0; i < count; i++) { - fixture.tester().clock().advance(Duration.ofMinutes(1)); + fixture.tester().clock().advance(samplingInterval); for (Node node : fixture.nodes()) { fixture.tester().nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(fixture.tester().clock().instant(), - new Load(cpu, memory, disk), + load, generation, inService, stable, @@ -131,21 +125,19 @@ public class Loader { return Duration.between(initialTime, fixture.tester().clock().instant()); } - public void applyLoad(double cpuLoad, double memoryLoad, double diskLoad, int measurements) { - Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements - addMeasurements(cpuLoad, memoryLoad, diskLoad, measurements); + public void applyLoad(Load load, int measurements) { + addMeasurements(load, measurements); fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); - addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + addQueryRateMeasurements(measurements, t -> t == 0 ? 20.0 : 10.0); // Query traffic only } - public void applyLoad(double cpuLoad, double memoryLoad, double diskLoad, int generation, boolean inService, boolean stable, int measurements) { - Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements - addMeasurements(cpuLoad, memoryLoad, diskLoad, generation, inService, stable, measurements); + public void applyLoad(Load load, int generation, boolean inService, boolean stable, int measurements) { + addMeasurements(load, generation, inService, stable, measurements); fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); - addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + addQueryRateMeasurements(measurements, t -> t == 0 ? 20.0 : 10.0); // Query traffic only } - public Duration addQueryRateMeasurements(int measurements, Duration samplingInterval, IntFunction<Double> queryRate) { + public Duration addQueryRateMeasurements(int measurements, IntFunction<Double> queryRate) { Instant initialTime = fixture.tester().clock().instant(); for (int i = 0; i < measurements; i++) { fixture.tester().nodeMetricsDb().addClusterMetrics(fixture.applicationId(), diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index cee80459176..f74ace5bd3b 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -152,8 +152,9 @@ public class AutoscalingMaintainerTest { // deploy tester.deploy(app1, cluster1, app1Capacity); - tester.addQueryRateMeasurements(app1, cluster1.id(), 12, t -> t == 0 ? 20.0 : 10.0); + int measurements = 5; + Duration samplePeriod = Duration.ofSeconds(150); for (int i = 0; i < 20; i++) { // Record completion to keep scaling window at minimum tester.addMeasurements(0.1f, 0.1f, 0.1f, i, 1, app1); @@ -162,25 +163,20 @@ public class AutoscalingMaintainerTest { tester.clock().advance(Duration.ofDays(1)); if (i % 2 == 0) { // high load - for (int j = 0; j < 200; j++ ) { - tester.addMeasurements(0.99f, 0.99f, 0.99f, i, 1, app1); - tester.clock().advance(Duration.ofMinutes(1)); - } + tester.addMeasurements(0.99f, 0.99f, 0.99f, i, measurements, app1); } else { // low load - for (int j = 0; j < 200; j++ ) { - tester.addMeasurements(0.2f, 0.2f, 0.2f, i, 1, app1); - tester.clock().advance(Duration.ofMinutes(1)); - } + tester.addMeasurements(0.2f, 0.2f, 0.2f, i, measurements, app1); } - tester.addQueryRateMeasurements(app1, cluster1.id(), 2, t -> (t == 0 ? 20.0 : 10.0 )); + tester.clock().advance(samplePeriod.negated().multipliedBy(measurements)); + tester.addQueryRateMeasurements(app1, cluster1.id(), measurements, t -> (t == 0 ? 20.0 : 10.0 )); tester.maintainer().maintain(); } assertEquals(Cluster.maxScalingEvents, tester.cluster(app1, cluster1).scalingEvents().size()); assertEquals("The latest rescaling is the last event stored", tester.clock().instant(), - tester.cluster(app1, cluster1).scalingEvents().get(Cluster.maxScalingEvents - 1).at()); + tester.cluster(app1, cluster1).scalingEvents().get(Cluster.maxScalingEvents - 1).completion().get()); } @Test diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java index e1a1a2af5fb..d921af9543e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java @@ -93,11 +93,10 @@ public class AutoscalingMaintainerTester { ClusterSpec.Id cluster, int measurements, IntFunction<Double> queryRate) { - Instant time = clock().instant(); for (int i = 0; i < measurements; i++) { nodeRepository().metricsDb().addClusterMetrics(application, - Map.of(cluster, new ClusterMetricSnapshot(time, queryRate.apply(i), 0.0))); - time = time.plus(Duration.ofMinutes(5)); + Map.of(cluster, new ClusterMetricSnapshot(clock().instant(), queryRate.apply(i), 0.0))); + clock().advance(Duration.ofSeconds(150)); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index 6d007368db5..508168261df 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -97,7 +97,7 @@ public class ScalingSuggestionsMaintainerTest { var suggested = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().suggestedResources().get().resources(); tester.deploy(app1, cluster1, Capacity.from(suggested, suggested, false, true)); tester.clock().advance(Duration.ofDays(2)); - addMeasurements(0.2f, 0.7f, 0.6f, + addMeasurements(0.2f, 0.65f, 0.6f, 0, 500, app1, tester.nodeRepository()); maintainer.maintain(); assertEquals("Suggestion is to keep the current allocation", diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json index 40719153b9e..9ae495a7396 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json @@ -73,12 +73,15 @@ "cpu" : 0.0, "idealCpu": 0.1375, "currentCpu": 0.0, + "peakCpu": 0.0, "memory" : 0.0, "idealMemory": 0.65, "currentMemory": 0.0, + "peakMemory": 0.0, "disk" : 0.0, "idealDisk": 0.95, - "currentDisk": 0.0 + "currentDisk": 0.0, + "peakDisk": 0.0 }, "scalingEvents" : [ { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json index 41aa4257c00..5babf5fc843 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json @@ -47,12 +47,15 @@ "cpu" : 0.0, "idealCpu": 0.1394913986537023, "currentCpu": 0.0, + "peakCpu": 0.0, "memory" : 0.0, "idealMemory": 0.325, "currentMemory": 0.0, + "peakMemory": 0.0, "disk" : 0.0, "idealDisk": 0.3, - "currentDisk": 0.0 + "currentDisk": 0.0, + "peakDisk": 0.0 }, "scalingEvents" : [ { |