summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2022-06-17 10:50:19 +0200
committerGitHub <noreply@github.com>2022-06-17 10:50:19 +0200
commitbf8ab9be3b6a472e5b0fe2eee09de7833be78678 (patch)
tree159b956c45e0db249e89a255f08343d9768c353c /controller-server
parente8c0c0543ce63e4f3504e6155a245d92347c8099 (diff)
Wait no more than 2 minutes for previous runs to abort, then fail with message (#23144)
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java11
1 files changed, 8 insertions, 3 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
index 316cf2de2d4..5113d386b23 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.deployment;
import com.google.common.collect.ImmutableSortedMap;
import com.yahoo.component.Version;
import com.yahoo.component.VersionCompatibility;
+import com.yahoo.concurrent.UncheckedTimeoutException;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.zone.ZoneId;
import com.yahoo.transaction.Mutex;
@@ -628,7 +629,7 @@ public class JobController {
DeploymentId deploymentId = new DeploymentId(id, type.zone());
Optional<Run> lastRun = last(id, type);
- lastRun.filter(run -> ! run.hasEnded()).ifPresent(run -> abortAndWait(run.id()));
+ lastRun.filter(run -> ! run.hasEnded()).ifPresent(run -> abortAndWait(run.id(), Duration.ofMinutes(2)));
long build = 1 + lastRun.map(run -> run.versions().targetRevision().number()).orElse(0L);
RevisionId revisionId = RevisionId.forDevelopment(build, new JobId(id, type));
@@ -708,13 +709,17 @@ public class JobController {
}
/** Aborts a run and waits for it complete. */
- private void abortAndWait(RunId id) {
+ private void abortAndWait(RunId id, Duration timeout) {
abort(id, "replaced by new deployment");
runner.get().accept(last(id.application(), id.type()).get());
+ Instant doom = controller.clock().instant().plus(timeout);
+ Duration sleep = Duration.ofMillis(100);
while ( ! last(id.application(), id.type()).get().hasEnded()) {
+ if (controller.clock().instant().plus(sleep).isAfter(doom))
+ throw new UncheckedTimeoutException("timeout waiting for " + id + " to abort and finish");
try {
- Thread.sleep(100);
+ Thread.sleep(sleep.toMillis());
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();