diff options
author | Jon Bratseth <bratseth@gmail.com> | 2023-02-19 17:20:26 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-19 17:20:26 +0100 |
commit | 6c2676c3f02089a03b561fc50fdba64a9f61ce44 (patch) | |
tree | bd55c57f85642556cbb1dc1e33e490187313c74d | |
parent | 5a45774d4fd321c59f854cc958d3375355eaff91 (diff) | |
parent | 54849fb30ecb8d9382bada2dd54194ccd1d2f092 (diff) |
Merge pull request #26106 from vespa-engine/bratseth/disregard-retired
Avoid retired nodes where appropriate
3 files changed, 26 insertions, 26 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 410a4fcb773..4020166a132 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -57,7 +57,7 @@ public class Autoscaler { private Autoscaling autoscale(Application application, Cluster cluster, NodeList clusterNodes, Limits limits) { ClusterModel clusterModel = new ClusterModel(nodeRepository.zone(), application, - clusterNodes.clusterSpec(), + clusterNodes.not().retired().clusterSpec(), cluster, clusterNodes, nodeRepository.metricsDb(), @@ -70,7 +70,7 @@ public class Autoscaler { if ( ! clusterModel.isStable(nodeRepository)) return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", clusterModel); - var currentAllocation = new AllocatableClusterResources(clusterNodes, nodeRepository); + var currentAllocation = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository); Optional<AllocatableClusterResources> bestAllocation = allocationOptimizer.findBestAllocation(clusterModel.loadAdjustment(), currentAllocation, clusterModel, limits); if (bestAllocation.isEmpty()) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index d122f719eff..a30c9b588c2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -127,10 +127,6 @@ public class ClusterModel { } public boolean isStable(NodeRepository nodeRepository) { - // An autoscaling decision was recently made - if (hasScaledIn(Duration.ofMinutes(5))) - return false; - // The cluster is processing recent changes if (nodes.stream().anyMatch(node -> node.status().wantToRetire() || node.allocation().get().membership().retired() || @@ -270,14 +266,14 @@ public class ClusterModel { /** The number of nodes this cluster has, or will have if not deployed yet. */ // TODO: Make this the deployed, not current count private int nodeCount() { - if ( ! nodes.isEmpty()) return (int)nodes.stream().count(); + if ( ! nodes.isEmpty()) return (int)nodes.not().retired().stream().count(); return cluster.minResources().nodes(); } /** The number of groups this cluster has, or will have if not deployed yet. */ // TODO: Make this the deployed, not current count private int groupCount() { - if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count(); + if ( ! nodes.isEmpty()) return (int)nodes.not().retired().stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count(); return cluster.minResources().groups(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index f792c511adb..674c20e25f2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -67,27 +67,31 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { try (var lock = nodeRepository().applications().lock(applicationId)) { Optional<Application> application = nodeRepository().applications().get(applicationId); if (application.isEmpty()) return; - Optional<Cluster> cluster = application.get().cluster(clusterId); - if (cluster.isEmpty()) return; + if (application.get().cluster(clusterId).isEmpty()) return; + Cluster cluster = application.get().cluster(clusterId).get(); NodeList clusterNodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId).cluster(clusterId); - Cluster updatedCluster = updateCompletion(cluster.get(), clusterNodes); - var autoscaling = autoscaler.autoscale(application.get(), updatedCluster, clusterNodes); - - // 1. Update cluster info - updatedCluster = updateCompletion(cluster.get(), clusterNodes); - if ( ! autoscaling.isEmpty()) // Ignore empties we'll get from servers recently started - updatedCluster = updatedCluster.withTarget(autoscaling); - applications().put(application.get().with(updatedCluster), lock); - - var current = new AllocatableClusterResources(clusterNodes, nodeRepository()).advertisedResources(); - if (autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) { - // 2. Also autoscale + cluster = updateCompletion(cluster, clusterNodes); + + var current = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository()).advertisedResources(); + + // Autoscale unless an autoscaling is already in progress + Autoscaling autoscaling = null; + if (cluster.target().resources().isEmpty() || current.equals(cluster.target().resources().get())) { + autoscaling = autoscaler.autoscale(application.get(), cluster, clusterNodes); + if ( ! autoscaling.isEmpty()) // Ignore empties we'll get from servers recently started + cluster = cluster.withTarget(autoscaling); + } + + // Always store updates + applications().put(application.get().with(cluster), lock); + + // Attempt to perform the autoscaling immediately, and log it regardless + if (autoscaling != null && autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) { try (MaintenanceDeployment deployment = new MaintenanceDeployment(applicationId, deployer, metric, nodeRepository())) { - if (deployment.isValid()) { + if (deployment.isValid()) deployment.activate(); - logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes); - } + logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes.not().retired()); } } } @@ -123,7 +127,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { } private void logAutoscaling(ClusterResources from, ClusterResources to, ApplicationId application, NodeList clusterNodes) { - log.info("Autoscaled " + application + " " + clusterNodes.clusterSpec() + ":" + + log.info("Autoscaling " + application + " " + clusterNodes.clusterSpec() + ":" + "\nfrom " + toString(from) + "\nto " + toString(to)); } |