aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-02-19 16:48:54 +0100
committerJon Bratseth <bratseth@gmail.com>2023-02-19 16:48:54 +0100
commit54849fb30ecb8d9382bada2dd54194ccd1d2f092 (patch)
treebd55c57f85642556cbb1dc1e33e490187313c74d
parent90714a0c0b5758c545e36a689c6eed75e1b4ae15 (diff)
Clean up autoscaling maintenance & avoid analysis paralysis
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java35
2 files changed, 20 insertions, 19 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 6cc9c48883b..a30c9b588c2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -127,10 +127,6 @@ public class ClusterModel {
}
public boolean isStable(NodeRepository nodeRepository) {
- // An autoscaling decision was recently made
- if (hasScaledIn(Duration.ofMinutes(5)))
- return false;
-
// The cluster is processing recent changes
if (nodes.stream().anyMatch(node -> node.status().wantToRetire() ||
node.allocation().get().membership().retired() ||
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index 613097a71a7..674c20e25f2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -67,26 +67,31 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
try (var lock = nodeRepository().applications().lock(applicationId)) {
Optional<Application> application = nodeRepository().applications().get(applicationId);
if (application.isEmpty()) return;
- Optional<Cluster> cluster = application.get().cluster(clusterId);
- if (cluster.isEmpty()) return;
+ if (application.get().cluster(clusterId).isEmpty()) return;
+ Cluster cluster = application.get().cluster(clusterId).get();
NodeList clusterNodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId).cluster(clusterId);
- Cluster updatedCluster = updateCompletion(cluster.get(), clusterNodes);
- var autoscaling = autoscaler.autoscale(application.get(), updatedCluster, clusterNodes);
-
- // 1. Update cluster info
- if ( ! autoscaling.isEmpty()) // Ignore empties we'll get from servers recently started
- updatedCluster = updatedCluster.withTarget(autoscaling);
- applications().put(application.get().with(updatedCluster), lock);
+ cluster = updateCompletion(cluster, clusterNodes);
var current = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository()).advertisedResources();
- if (autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) {
- // 2. Also autoscale
+
+ // Autoscale unless an autoscaling is already in progress
+ Autoscaling autoscaling = null;
+ if (cluster.target().resources().isEmpty() || current.equals(cluster.target().resources().get())) {
+ autoscaling = autoscaler.autoscale(application.get(), cluster, clusterNodes);
+ if ( ! autoscaling.isEmpty()) // Ignore empties we'll get from servers recently started
+ cluster = cluster.withTarget(autoscaling);
+ }
+
+ // Always store updates
+ applications().put(application.get().with(cluster), lock);
+
+ // Attempt to perform the autoscaling immediately, and log it regardless
+ if (autoscaling != null && autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) {
try (MaintenanceDeployment deployment = new MaintenanceDeployment(applicationId, deployer, metric, nodeRepository())) {
- if (deployment.isValid()) {
+ if (deployment.isValid())
deployment.activate();
- logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes.not().retired());
- }
+ logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes.not().retired());
}
}
}
@@ -122,7 +127,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
}
private void logAutoscaling(ClusterResources from, ClusterResources to, ApplicationId application, NodeList clusterNodes) {
- log.info("Autoscaled " + application + " " + clusterNodes.clusterSpec() + ":" +
+ log.info("Autoscaling " + application + " " + clusterNodes.clusterSpec() + ":" +
"\nfrom " + toString(from) + "\nto " + toString(to));
}