summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-03-10 15:20:11 +0100
committerJon Bratseth <bratseth@gmail.com>2023-03-10 15:20:11 +0100
commit3881033cffca4d8461c076e26717d9493c3bac13 (patch)
treec89d86519b76fbb08932c7e9caa4b61d7938ce85
parentce7bcde58f5a8b80d631a11f6b19c13c36c72450 (diff)
Don't rescale if there is an incomplete rescaling
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java16
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java2
5 files changed, 18 insertions, 10 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
index 16016815b7c..6a81c17d362 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
@@ -122,6 +122,11 @@ public class Cluster {
return Optional.of(scalingEvents.get(scalingEvents.size() - 1));
}
+ /** Returns whether the last scaling event in this has yet to complete. */
+ public boolean scalingInProgress() {
+ return lastScalingEvent().isPresent() && lastScalingEvent().get().completion().isEmpty();
+ }
+
public Cluster withConfiguration(boolean exclusive, Capacity capacity) {
return new Cluster(id, exclusive,
capacity.minResources(), capacity.maxResources(), capacity.groupSize(), capacity.isRequired(),
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java
index e88989514c4..91270f14fbb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java
@@ -64,8 +64,7 @@ public class ScalingEvent {
@Override
public boolean equals(Object o) {
if (o == this) return true;
- if ( ! (o instanceof ScalingEvent)) return true;
- ScalingEvent other = (ScalingEvent)o;
+ if ( ! (o instanceof ScalingEvent other)) return true;
if ( other.generation != this.generation) return false;
if ( ! other.at.equals(this.at)) return false;
if ( ! other.from.equals(this.from)) return false;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java
index 2cc43a1eb33..0c86108b36c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java
@@ -155,7 +155,7 @@ public class Autoscaling {
/** The cluster should be rescaled further, but no better configuration is allowed by the current limits */
insufficient,
- /** Rescaling of this cluster has been scheduled */
+ /** This cluster should be rescaled */
rescaling
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index 69c03dbf6dc..9fffdcf34e1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -73,6 +73,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
if (application.isEmpty()) return true;
if (application.get().cluster(clusterId).isEmpty()) return true;
Cluster cluster = application.get().cluster(clusterId).get();
+ Cluster unchangedCluster = cluster;
NodeList clusterNodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId).cluster(clusterId);
cluster = updateCompletion(cluster, clusterNodes);
@@ -81,17 +82,20 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
// Autoscale unless an autoscaling is already in progress
Autoscaling autoscaling = null;
- if (cluster.target().resources().isEmpty() || current.equals(cluster.target().resources().get())) {
+ if (cluster.target().resources().isEmpty() && !cluster.scalingInProgress()) {
autoscaling = autoscaler.autoscale(application.get(), cluster, clusterNodes);
- if ( autoscaling.isPresent() || cluster.target().isEmpty()) // Ignore empty from recently started servers
+ if (autoscaling.isPresent() || cluster.target().isEmpty()) // Ignore empty from recently started servers
cluster = cluster.withTarget(autoscaling);
}
- // Always store updates
- applications().put(application.get().with(cluster), lock);
+ // Always store any updates
+ if (cluster != unchangedCluster)
+ applications().put(application.get().with(cluster), lock);
// Attempt to perform the autoscaling immediately, and log it regardless
- if (autoscaling != null && autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) {
+ if (autoscaling != null
+ && autoscaling.resources().isPresent()
+ && !current.equals(autoscaling.resources().get())) {
try (MaintenanceDeployment deployment = new MaintenanceDeployment(applicationId, deployer, metric, nodeRepository())) {
if (deployment.isValid())
deployment.activate();
@@ -123,7 +127,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
if (clusterNodes.retired().stream()
.anyMatch(node -> node.history().hasEventAt(History.Event.Type.retired, event.at())))
return cluster;
- // - 2. all nodes have switched to the right config generation (currently only measured on containers)
+ // - 2. all nodes have switched to the right config generation
for (var nodeTimeseries : nodeRepository().metricsDb().getNodeTimeseries(Duration.between(event.at(), clock().instant()),
clusterNodes)) {
Optional<NodeMetricSnapshot> onNewGeneration =
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index d69d9267cfd..cbee7437d5d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -585,7 +585,7 @@ public class AutoscalingTest {
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build();
fixture.loader().applyCpuLoad(0.02, 120);
assertTrue("Too soon after initial deployment", fixture.autoscale().resources().isEmpty());
- fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.tester().clock().advance(Duration.ofHours(12 * 3 + 1));
fixture.loader().applyCpuLoad(0.02, 120);
fixture.tester().assertResources("Scaling down since enough time has passed",
3, 1, 1.0, 24.6, 101.4,