diff options
author | Jon Bratseth <bratseth@gmail.com> | 2023-03-10 15:20:11 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2023-03-10 15:20:11 +0100 |
commit | 3881033cffca4d8461c076e26717d9493c3bac13 (patch) | |
tree | c89d86519b76fbb08932c7e9caa4b61d7938ce85 | |
parent | ce7bcde58f5a8b80d631a11f6b19c13c36c72450 (diff) |
Don't rescale if there is an incomplete rescaling
5 files changed, 18 insertions, 10 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index 16016815b7c..6a81c17d362 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -122,6 +122,11 @@ public class Cluster { return Optional.of(scalingEvents.get(scalingEvents.size() - 1)); } + /** Returns whether the last scaling event in this has yet to complete. */ + public boolean scalingInProgress() { + return lastScalingEvent().isPresent() && lastScalingEvent().get().completion().isEmpty(); + } + public Cluster withConfiguration(boolean exclusive, Capacity capacity) { return new Cluster(id, exclusive, capacity.minResources(), capacity.maxResources(), capacity.groupSize(), capacity.isRequired(), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java index e88989514c4..91270f14fbb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java @@ -64,8 +64,7 @@ public class ScalingEvent { @Override public boolean equals(Object o) { if (o == this) return true; - if ( ! (o instanceof ScalingEvent)) return true; - ScalingEvent other = (ScalingEvent)o; + if ( ! (o instanceof ScalingEvent other)) return true; if ( other.generation != this.generation) return false; if ( ! other.at.equals(this.at)) return false; if ( ! other.from.equals(this.from)) return false; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java index 2cc43a1eb33..0c86108b36c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java @@ -155,7 +155,7 @@ public class Autoscaling { /** The cluster should be rescaled further, but no better configuration is allowed by the current limits */ insufficient, - /** Rescaling of this cluster has been scheduled */ + /** This cluster should be rescaled */ rescaling } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 69c03dbf6dc..9fffdcf34e1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -73,6 +73,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { if (application.isEmpty()) return true; if (application.get().cluster(clusterId).isEmpty()) return true; Cluster cluster = application.get().cluster(clusterId).get(); + Cluster unchangedCluster = cluster; NodeList clusterNodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId).cluster(clusterId); cluster = updateCompletion(cluster, clusterNodes); @@ -81,17 +82,20 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { // Autoscale unless an autoscaling is already in progress Autoscaling autoscaling = null; - if (cluster.target().resources().isEmpty() || current.equals(cluster.target().resources().get())) { + if (cluster.target().resources().isEmpty() && !cluster.scalingInProgress()) { autoscaling = autoscaler.autoscale(application.get(), cluster, clusterNodes); - if ( autoscaling.isPresent() || cluster.target().isEmpty()) // Ignore empty from recently started servers + if (autoscaling.isPresent() || cluster.target().isEmpty()) // Ignore empty from recently started servers cluster = cluster.withTarget(autoscaling); } - // Always store updates - applications().put(application.get().with(cluster), lock); + // Always store any updates + if (cluster != unchangedCluster) + applications().put(application.get().with(cluster), lock); // Attempt to perform the autoscaling immediately, and log it regardless - if (autoscaling != null && autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) { + if (autoscaling != null + && autoscaling.resources().isPresent() + && !current.equals(autoscaling.resources().get())) { try (MaintenanceDeployment deployment = new MaintenanceDeployment(applicationId, deployer, metric, nodeRepository())) { if (deployment.isValid()) deployment.activate(); @@ -123,7 +127,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { if (clusterNodes.retired().stream() .anyMatch(node -> node.history().hasEventAt(History.Event.Type.retired, event.at()))) return cluster; - // - 2. all nodes have switched to the right config generation (currently only measured on containers) + // - 2. all nodes have switched to the right config generation for (var nodeTimeseries : nodeRepository().metricsDb().getNodeTimeseries(Duration.between(event.at(), clock().instant()), clusterNodes)) { Optional<NodeMetricSnapshot> onNewGeneration = diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index d69d9267cfd..cbee7437d5d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -585,7 +585,7 @@ public class AutoscalingTest { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build(); fixture.loader().applyCpuLoad(0.02, 120); assertTrue("Too soon after initial deployment", fixture.autoscale().resources().isEmpty()); - fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.tester().clock().advance(Duration.ofHours(12 * 3 + 1)); fixture.loader().applyCpuLoad(0.02, 120); fixture.tester().assertResources("Scaling down since enough time has passed", 3, 1, 1.0, 24.6, 101.4, |