diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2020-10-27 19:06:13 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2020-10-27 19:06:13 +0100 |
commit | 555f5f9c1c1d59308f0cc2299b81da6298b90337 (patch) | |
tree | 9495beecb6a5548f243f987c519b65fedfc2e56a /controller-server | |
parent | 54171e6464209213cbd97ccea19c080fdad388b6 (diff) |
Wait for all steps to finish before finishing a run
Diffstat (limited to 'controller-server')
4 files changed, 28 insertions, 7 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 3bc9bc01a76..e288127ca6d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -55,6 +55,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.Step.copyVespaLogs; import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester; import static com.yahoo.vespa.hosted.controller.deployment.Step.endStagingSetup; import static com.yahoo.vespa.hosted.controller.deployment.Step.endTests; +import static com.yahoo.vespa.hosted.controller.deployment.Step.report; import static java.util.stream.Collectors.toList; import static java.util.stream.Collectors.toMap; import static java.util.stream.Collectors.toUnmodifiableList; @@ -353,7 +354,10 @@ public class JobController { } /** Changes the status of the given run to inactive, and stores it as a historic run. */ - public void finish(RunId id) { + public void finish(RunId id) throws TimeoutException { + // Ensure no step is still running before we finish the run — report depends transitively on all the other steps. + locked(id.application(), id.type(), report, __ -> { }); + locked(id, run -> { // Store the modified run after it has been written to history, in case the latter fails. Run finishedRun = run.finished(controller.clock().instant()); locked(id.application(), id.type(), runs -> { @@ -588,7 +592,7 @@ public class JobController { /** Locks the given step and checks none of its prerequisites are running, then performs the given actions. */ public void locked(ApplicationId id, JobType type, Step step, Consumer<LockedStep> action) throws TimeoutException { try (Lock lock = curator.lock(id, type, step)) { - for (Step prerequisite : step.prerequisites()) // Check that no prerequisite is still running. + for (Step prerequisite : step.allPrerequisites()) // Check that no prerequisite is still running. try (Lock __ = curator.lock(id, type, prerequisite)) { ; } action.accept(new LockedStep(lock, step)); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java index 17cfd1bdf1d..baba4771370 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java @@ -1,9 +1,11 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.deployment; -import com.google.common.collect.ImmutableList; - import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.stream.Collectors.toUnmodifiableList; /** * Steps that make up a deployment job. See {@link JobProfile} for preset profiles. @@ -70,16 +72,28 @@ public enum Step { private final boolean alwaysRun; private final List<Step> prerequisites; + private final List<Step> allPrerequisites; Step(boolean alwaysRun, Step... prerequisites) { this.alwaysRun = alwaysRun; - this.prerequisites = ImmutableList.copyOf(prerequisites); + this.prerequisites = List.of(prerequisites); + this.allPrerequisites = Stream.concat(Stream.of(prerequisites), + Stream.of(prerequisites).flatMap(pre -> pre.allPrerequisites().stream())) + .sorted() + .distinct() + .collect(toUnmodifiableList()); } /** Returns whether this is a cleanup-step, and should always run, regardless of job outcome, when specified in a job. */ public boolean alwaysRun() { return alwaysRun; } - /** Returns the prerequisite steps that must be successfully completed before this, assuming the job contains these steps. */ + /** Returns all prerequisite steps for this, recursively. */ + public List<Step> allPrerequisites() { + return allPrerequisites; + } + + + /** Returns the direct prerequisite steps that must be completed before this, assuming the job contains these steps. */ public List<Step> prerequisites() { return prerequisites; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java index e5ee06f81dd..7f21b5c483c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java @@ -90,6 +90,9 @@ public class JobRunner extends ControllerMaintainer { controller().jobController().run(id) .ifPresent(run -> controller().applications().deploymentTrigger().notifyOfCompletion(id.application())); } + catch (TimeoutException e) { + // One of the steps are still being run — that's ok, we'll try to finish the run again later. + } catch (Exception e) { log.log(Level.WARNING, "Exception finishing " + id, e); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java index 88eab642a60..f677cc079cc 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java @@ -370,7 +370,7 @@ public class JobRunnerTest { } @Test - public void jobMetrics() { + public void jobMetrics() throws TimeoutException { DeploymentTester tester = new DeploymentTester(); JobController jobs = tester.controller().jobController(); Map<Step, RunStatus> outcomes = new EnumMap<>(Step.class); |