diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2022-06-17 10:50:19 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-17 10:50:19 +0200 |
commit | bf8ab9be3b6a472e5b0fe2eee09de7833be78678 (patch) | |
tree | 159b956c45e0db249e89a255f08343d9768c353c /controller-server | |
parent | e8c0c0543ce63e4f3504e6155a245d92347c8099 (diff) |
Wait no more than 2 minutes for previous runs to abort, then fail with message (#23144)
Diffstat (limited to 'controller-server')
-rw-r--r-- | controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 316cf2de2d4..5113d386b23 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.deployment; import com.google.common.collect.ImmutableSortedMap; import com.yahoo.component.Version; import com.yahoo.component.VersionCompatibility; +import com.yahoo.concurrent.UncheckedTimeoutException; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.transaction.Mutex; @@ -628,7 +629,7 @@ public class JobController { DeploymentId deploymentId = new DeploymentId(id, type.zone()); Optional<Run> lastRun = last(id, type); - lastRun.filter(run -> ! run.hasEnded()).ifPresent(run -> abortAndWait(run.id())); + lastRun.filter(run -> ! run.hasEnded()).ifPresent(run -> abortAndWait(run.id(), Duration.ofMinutes(2))); long build = 1 + lastRun.map(run -> run.versions().targetRevision().number()).orElse(0L); RevisionId revisionId = RevisionId.forDevelopment(build, new JobId(id, type)); @@ -708,13 +709,17 @@ public class JobController { } /** Aborts a run and waits for it complete. */ - private void abortAndWait(RunId id) { + private void abortAndWait(RunId id, Duration timeout) { abort(id, "replaced by new deployment"); runner.get().accept(last(id.application(), id.type()).get()); + Instant doom = controller.clock().instant().plus(timeout); + Duration sleep = Duration.ofMillis(100); while ( ! last(id.application(), id.type()).get().hasEnded()) { + if (controller.clock().instant().plus(sleep).isAfter(doom)) + throw new UncheckedTimeoutException("timeout waiting for " + id + " to abort and finish"); try { - Thread.sleep(100); + Thread.sleep(sleep.toMillis()); } catch (InterruptedException e) { Thread.currentThread().interrupt(); |