diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-08-10 13:39:37 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2022-08-10 13:39:37 +0200 |
commit | f5a42e659650b10e59a90d35bc94fdb5f81155a1 (patch) | |
tree | 0f02ed3c9de6667841c0fd2a2d368ea70e1497ea /node-repository | |
parent | 180a265397ab329ae8f8f34f68cae09d48790785 (diff) |
Remove unnecessary downscaling duration check
Diffstat (limited to 'node-repository')
2 files changed, 9 insertions, 61 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 69271b9b45f..4d50250e61a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ClusterResources; -import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -12,7 +11,6 @@ import com.yahoo.vespa.hosted.provision.applications.AutoscalingStatus.Status; import com.yahoo.vespa.hosted.provision.applications.Cluster; import java.time.Duration; -import java.time.Instant; import java.util.Objects; import java.util.Optional; @@ -69,25 +67,7 @@ public class Autoscaler { if ( ! clusterIsStable(clusterNodes, nodeRepository)) return Advice.none(Status.waiting, "Cluster change in progress"); -/* - if (scaledIn(clusterModel.scalingDuration(), cluster)) - return Advice.dontScale(Status.waiting, - "Won't autoscale now: Less than " + clusterModel.scalingDuration() + - " since last resource change"); - - if (clusterModel.nodeTimeseries().measurementsPerNode() < minimumMeasurementsPerNode(clusterModel.scalingDuration())) - return Advice.none(Status.waiting, - "Collecting more data before making new scaling decisions: Need to measure for " + - clusterModel.scalingDuration() + " since the last resource change completed, " + - clusterModel.nodeTimeseries().measurementsPerNode() + " measurements per node found," + - " need at least " + minimumMeasurementsPerNode(clusterModel.scalingDuration())); - - if (clusterModel.nodeTimeseries().nodesMeasured() != clusterNodes.size()) - return Advice.none(Status.waiting, - "Collecting more data before making new scaling decisions:" + - " Have measurements from " + clusterModel.nodeTimeseries().nodesMeasured() + - " nodes, but require from " + clusterNodes.size()); -*/ + var currentAllocation = new AllocatableClusterResources(clusterNodes.asList(), nodeRepository); Optional<AllocatableClusterResources> bestAllocation = allocationOptimizer.findBestAllocation(clusterModel.loadAdjustment(), currentAllocation, clusterModel, limits); @@ -101,11 +81,6 @@ public class Autoscaler { return Advice.dontScale(Status.ideal, "Cluster is ideally scaled"); } - if (isDownscaling(bestAllocation.get(), currentAllocation) && scaledIn(clusterModel.scalingDuration().multipliedBy(3), cluster)) - return Advice.dontScale(Status.waiting, - "Waiting " + clusterModel.scalingDuration().multipliedBy(3) + - " since the last change before reducing resources"); - return Advice.scaleTo(bestAllocation.get().advertisedResources()); } @@ -142,33 +117,10 @@ public class Autoscaler { return Math.abs(r1 - r2) / (( r1 + r2) / 2) < threshold; } - /** Returns true if this reduces total resources in any dimension */ - private boolean isDownscaling(AllocatableClusterResources target, AllocatableClusterResources current) { - NodeResources targetTotal = target.advertisedResources().totalResources(); - NodeResources currentTotal = current.advertisedResources().totalResources(); - return ! targetTotal.justNumbers().satisfies(currentTotal.justNumbers()); - } - - private boolean scaledIn(Duration delay, Cluster cluster) { - return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN) - .isAfter(nodeRepository.clock().instant().minus(delay)); - } - static Duration maxScalingWindow() { return Duration.ofHours(48); } - /** Returns the minimum measurements per node (average) we require to give autoscaling advice.*/ - private int minimumMeasurementsPerNode(Duration scalingWindow) { - // Measurements are ideally taken every minute, but no guarantees - // (network, nodes may be down, collecting is single threaded and may take longer than 1 minute to complete). - // Since the metric window is 5 minutes, we won't really improve from measuring more often. - long minimumMeasurements = scalingWindow.toMinutes() / 5; - minimumMeasurements = Math.round(0.8 * minimumMeasurements); // Allow 20% metrics collection blackout - if (minimumMeasurements < 1) minimumMeasurements = 1; - return (int)minimumMeasurements; - } - public static class Advice { private final boolean present; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 6f3182b6e44..bbc03083024 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -36,16 +36,9 @@ public class AutoscalingTest { public void test_autoscaling_single_content_group() { var fixture = AutoscalingTester.fixture().build(); - fixture.tester().clock().advance(Duration.ofDays(1)); - assertTrue("No measurements -> No change", fixture.autoscale().target().isEmpty()); - - fixture.loader().applyCpuLoad(0.7f, 59); - assertTrue("Too few measurements -> No change", fixture.autoscale().target().isEmpty()); - - fixture.tester().clock().advance(Duration.ofDays(1)); - fixture.loader().applyCpuLoad(0.7f, 120); + fixture.loader().applyCpuLoad(0.7f, 10); ClusterResources scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high", - 9, 1, 2.8, 5.0, 50.0, + 8, 1, 6, 5.7, 57.1, fixture.autoscale()); fixture.deploy(Capacity.from(scaledResources)); @@ -53,13 +46,16 @@ public class AutoscalingTest { fixture.deactivateRetired(Capacity.from(scaledResources)); - fixture.loader().applyCpuLoad(0.19f, 100); + fixture.loader().applyCpuLoad(0.19f, 10); assertEquals("Load change is small -> No change", Optional.empty(), fixture.autoscale().target()); + fixture.loader().applyCpuLoad(0.1f, 10); + assertEquals("Too little time passed for downscaling -> No change", Optional.empty(), fixture.autoscale().target()); + fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyCpuLoad(0.1f, 120); + fixture.loader().applyCpuLoad(0.1f, 10); fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly", - 6, 1, 1.1, 8, 80.0, + 11, 1, 1.1, 4, 40.0, fixture.autoscale()); } |