diff options
author | Jon Bratseth <bratseth@gmail.com> | 2020-12-17 10:09:15 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2020-12-17 10:09:15 +0100 |
commit | 27bb875d1974ba50495876033ae52b03ce5ee45a (patch) | |
tree | 2097abcc8d022e4ba22997ba5b43cbeec88ecbcc /node-repository/src | |
parent | 30da7eaa2284c1029d235379b6f338819b0b4efb (diff) |
Track current resource changes in autoscaling status
Diffstat (limited to 'node-repository/src')
2 files changed, 19 insertions, 16 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 409d7111b9d..eced8c59247 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -64,6 +64,8 @@ public class Autoscaler { return Advice.none("Cluster change in progress"); Duration scalingWindow = scalingWindow(clusterNodes.clusterSpec(), cluster); + if (scaledIn(scalingWindow, cluster)) + return Advice.dontScale("Won't autoscale now: Less than " + scalingWindow + " since last rescaling"); ClusterTimeseries clusterTimeseries = new ClusterTimeseries(nodeRepository.clock().instant().minus(scalingWindow), cluster, clusterNodes, metricsDb); @@ -72,14 +74,14 @@ public class Autoscaler { int measurementsPerNode = clusterTimeseries.measurementsPerNode(); if (measurementsPerNode < minimumMeasurementsPerNode(scalingWindow)) - return Advice.none("Collecting more data before making new scaling decisions" + - " (has " + measurementsPerNode + " measurements per node but need " + - minimumMeasurementsPerNode(scalingWindow) + ")"); + return Advice.none("Collecting more data before making new scaling decisions: " + + "Have " + measurementsPerNode + " measurements per node but require " + + minimumMeasurementsPerNode(scalingWindow)); int nodesMeasured = clusterTimeseries.nodesMeasured(); if (nodesMeasured != clusterNodes.size()) - return Advice.none("Collecting more data before making new scaling decisions" + - "(has measurements from " + nodesMeasured + " but need from " + clusterNodes.size() + ")"); + return Advice.none("Collecting more data before making new scaling decisions: " + + "Have measurements from " + nodesMeasured + " but require from " + clusterNodes.size()); double cpuLoad = clusterTimeseries.averageLoad(Resource.cpu); double memoryLoad = clusterTimeseries.averageLoad(Resource.memory); @@ -93,10 +95,8 @@ public class Autoscaler { return Advice.dontScale("No allocation changes are possible within configured limits"); if (similar(bestAllocation.get(), currentAllocation)) - return Advice.dontScale("Cluster is ideally scaled (within configured limits)"); + return Advice.dontScale("Cluster is ideally scaled within configured limits"); - if (scaledIn(scalingWindow, cluster)) - return Advice.dontScale("Won't autoscale now: Less than " + scalingWindow + " since last rescaling"); if (isDownscaling(bestAllocation.get(), currentAllocation) && scaledIn(scalingWindow.multipliedBy(3), cluster)) return Advice.dontScale("Waiting " + scalingWindow.multipliedBy(3) + " since last rescaling before reducing resources"); @@ -163,8 +163,8 @@ public class Autoscaler { /** Returns the minimum measurements per node (average) we require to give autoscaling advice.*/ private int minimumMeasurementsPerNode(Duration scalingWindow) { // Measurements are ideally taken every minute, but no guarantees - // (network, nodes may be down, collecting is single threaded and may take longer than 1 minute to complete. - // Since the metric window is 5 minutes, we won't really improve from measuring more often: + // (network, nodes may be down, collecting is single threaded and may take longer than 1 minute to complete). + // Since the metric window is 5 minutes, we won't really improve from measuring more often. long minimumMeasurements = scalingWindow.toMinutes() / 5; minimumMeasurements = Math.round(0.8 * minimumMeasurements); // Allow 20% metrics collection blackout if (minimumMeasurements < 1) minimumMeasurements = 1; @@ -215,7 +215,7 @@ public class Autoscaler { private static Advice none(String reason) { return new Advice(Optional.empty(), false, reason); } private static Advice dontScale(String reason) { return new Advice(Optional.empty(), true, reason); } private static Advice scaleTo(ClusterResources target) { - return new Advice(Optional.of(target), true, "Scaling due to load changes"); + return new Advice(Optional.of(target), true, "Scaling to " + target); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index 45a83937989..2b03a5cae8c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -104,15 +104,18 @@ class Activator { .collect(Collectors.groupingBy(node -> node.allocation().get().membership().cluster().id())); Application modified = application.get(); for (var clusterEntry : currentNodesByCluster.entrySet()) { + var cluster = modified.cluster(clusterEntry.getKey()).get(); var previousResources = oldNodes.cluster(clusterEntry.getKey()).toResources(); var currentResources = NodeList.copyOf(clusterEntry.getValue()).toResources(); if ( ! previousResources.justNumbers().equals(currentResources.justNumbers())) { - modified = modified.with(application.get().cluster(clusterEntry.getKey()).get() - .with(ScalingEvent.create(previousResources, - currentResources, - generation, - at))); + cluster = cluster.with(ScalingEvent.create(previousResources, currentResources, generation, at)); } + if (cluster.targetResources().isPresent() + && cluster.targetResources().get().justNumbers().equals(currentResources.justNumbers())) { + cluster = cluster.withAutoscalingStatus("Cluster is ideally scaled within configured limits"); + } + if (cluster != modified.cluster(clusterEntry.getKey()).get()) + modified = modified.with(cluster); } if (modified != application.get()) |