diff options
author | Jon Bratseth <bratseth@gmail.com> | 2023-02-19 16:48:54 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2023-02-19 16:48:54 +0100 |
commit | 54849fb30ecb8d9382bada2dd54194ccd1d2f092 (patch) | |
tree | bd55c57f85642556cbb1dc1e33e490187313c74d | |
parent | 90714a0c0b5758c545e36a689c6eed75e1b4ae15 (diff) |
Clean up autoscaling maintenance & avoid analysis paralysis
2 files changed, 20 insertions, 19 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 6cc9c48883b..a30c9b588c2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -127,10 +127,6 @@ public class ClusterModel { } public boolean isStable(NodeRepository nodeRepository) { - // An autoscaling decision was recently made - if (hasScaledIn(Duration.ofMinutes(5))) - return false; - // The cluster is processing recent changes if (nodes.stream().anyMatch(node -> node.status().wantToRetire() || node.allocation().get().membership().retired() || diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 613097a71a7..674c20e25f2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -67,26 +67,31 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { try (var lock = nodeRepository().applications().lock(applicationId)) { Optional<Application> application = nodeRepository().applications().get(applicationId); if (application.isEmpty()) return; - Optional<Cluster> cluster = application.get().cluster(clusterId); - if (cluster.isEmpty()) return; + if (application.get().cluster(clusterId).isEmpty()) return; + Cluster cluster = application.get().cluster(clusterId).get(); NodeList clusterNodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId).cluster(clusterId); - Cluster updatedCluster = updateCompletion(cluster.get(), clusterNodes); - var autoscaling = autoscaler.autoscale(application.get(), updatedCluster, clusterNodes); - - // 1. Update cluster info - if ( ! autoscaling.isEmpty()) // Ignore empties we'll get from servers recently started - updatedCluster = updatedCluster.withTarget(autoscaling); - applications().put(application.get().with(updatedCluster), lock); + cluster = updateCompletion(cluster, clusterNodes); var current = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository()).advertisedResources(); - if (autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) { - // 2. Also autoscale + + // Autoscale unless an autoscaling is already in progress + Autoscaling autoscaling = null; + if (cluster.target().resources().isEmpty() || current.equals(cluster.target().resources().get())) { + autoscaling = autoscaler.autoscale(application.get(), cluster, clusterNodes); + if ( ! autoscaling.isEmpty()) // Ignore empties we'll get from servers recently started + cluster = cluster.withTarget(autoscaling); + } + + // Always store updates + applications().put(application.get().with(cluster), lock); + + // Attempt to perform the autoscaling immediately, and log it regardless + if (autoscaling != null && autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) { try (MaintenanceDeployment deployment = new MaintenanceDeployment(applicationId, deployer, metric, nodeRepository())) { - if (deployment.isValid()) { + if (deployment.isValid()) deployment.activate(); - logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes.not().retired()); - } + logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes.not().retired()); } } } @@ -122,7 +127,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { } private void logAutoscaling(ClusterResources from, ClusterResources to, ApplicationId application, NodeList clusterNodes) { - log.info("Autoscaled " + application + " " + clusterNodes.clusterSpec() + ":" + + log.info("Autoscaling " + application + " " + clusterNodes.clusterSpec() + ":" + "\nfrom " + toString(from) + "\nto " + toString(to)); } |