diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2020-10-28 12:00:34 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-28 12:00:34 +0100 |
commit | 91c600222b9ccde4e7113da2171b92067eeae091 (patch) | |
tree | dc31e23fd5918f1f92ef911612480202cb699408 /controller-server/src | |
parent | 50f5db07712504164ca2351a5fe17cb89f447314 (diff) | |
parent | e9bd7b1f84bfce80592c222198b7bb7a912a710e (diff) |
Merge pull request #15055 from vespa-engine/jonmv/wait-for-running-steps-when-aborting-and-finishing-ru
Jonmv/wait for running steps when aborting and finishing ru
Diffstat (limited to 'controller-server/src')
4 files changed, 30 insertions, 8 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 3bc9bc01a76..e288127ca6d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -55,6 +55,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.Step.copyVespaLogs; import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester; import static com.yahoo.vespa.hosted.controller.deployment.Step.endStagingSetup; import static com.yahoo.vespa.hosted.controller.deployment.Step.endTests; +import static com.yahoo.vespa.hosted.controller.deployment.Step.report; import static java.util.stream.Collectors.toList; import static java.util.stream.Collectors.toMap; import static java.util.stream.Collectors.toUnmodifiableList; @@ -353,7 +354,10 @@ public class JobController { } /** Changes the status of the given run to inactive, and stores it as a historic run. */ - public void finish(RunId id) { + public void finish(RunId id) throws TimeoutException { + // Ensure no step is still running before we finish the run — report depends transitively on all the other steps. + locked(id.application(), id.type(), report, __ -> { }); + locked(id, run -> { // Store the modified run after it has been written to history, in case the latter fails. Run finishedRun = run.finished(controller.clock().instant()); locked(id.application(), id.type(), runs -> { @@ -588,7 +592,7 @@ public class JobController { /** Locks the given step and checks none of its prerequisites are running, then performs the given actions. */ public void locked(ApplicationId id, JobType type, Step step, Consumer<LockedStep> action) throws TimeoutException { try (Lock lock = curator.lock(id, type, step)) { - for (Step prerequisite : step.prerequisites()) // Check that no prerequisite is still running. + for (Step prerequisite : step.allPrerequisites()) // Check that no prerequisite is still running. try (Lock __ = curator.lock(id, type, prerequisite)) { ; } action.accept(new LockedStep(lock, step)); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java index 17cfd1bdf1d..baba4771370 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java @@ -1,9 +1,11 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.deployment; -import com.google.common.collect.ImmutableList; - import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.stream.Collectors.toUnmodifiableList; /** * Steps that make up a deployment job. See {@link JobProfile} for preset profiles. @@ -70,16 +72,28 @@ public enum Step { private final boolean alwaysRun; private final List<Step> prerequisites; + private final List<Step> allPrerequisites; Step(boolean alwaysRun, Step... prerequisites) { this.alwaysRun = alwaysRun; - this.prerequisites = ImmutableList.copyOf(prerequisites); + this.prerequisites = List.of(prerequisites); + this.allPrerequisites = Stream.concat(Stream.of(prerequisites), + Stream.of(prerequisites).flatMap(pre -> pre.allPrerequisites().stream())) + .sorted() + .distinct() + .collect(toUnmodifiableList()); } /** Returns whether this is a cleanup-step, and should always run, regardless of job outcome, when specified in a job. */ public boolean alwaysRun() { return alwaysRun; } - /** Returns the prerequisite steps that must be successfully completed before this, assuming the job contains these steps. */ + /** Returns all prerequisite steps for this, recursively. */ + public List<Step> allPrerequisites() { + return allPrerequisites; + } + + + /** Returns the direct prerequisite steps that must be completed before this, assuming the job contains these steps. */ public List<Step> prerequisites() { return prerequisites; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java index e5ee06f81dd..f08a23ab8ed 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java @@ -1,6 +1,7 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.maintenance; +import com.yahoo.concurrent.DaemonThreadFactory; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.deployment.InternalStepRunner; @@ -35,7 +36,7 @@ public class JobRunner extends ControllerMaintainer { private final StepRunner runner; public JobRunner(Controller controller, Duration duration) { - this(controller, duration, Executors.newFixedThreadPool(32), new InternalStepRunner(controller)); + this(controller, duration, Executors.newFixedThreadPool(128, new DaemonThreadFactory("job-runner-")), new InternalStepRunner(controller)); } @TestOnly @@ -90,6 +91,9 @@ public class JobRunner extends ControllerMaintainer { controller().jobController().run(id) .ifPresent(run -> controller().applications().deploymentTrigger().notifyOfCompletion(id.application())); } + catch (TimeoutException e) { + // One of the steps are still being run — that's ok, we'll try to finish the run again later. + } catch (Exception e) { log.log(Level.WARNING, "Exception finishing " + id, e); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java index 88eab642a60..f677cc079cc 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java @@ -370,7 +370,7 @@ public class JobRunnerTest { } @Test - public void jobMetrics() { + public void jobMetrics() throws TimeoutException { DeploymentTester tester = new DeploymentTester(); JobController jobs = tester.controller().jobController(); Map<Step, RunStatus> outcomes = new EnumMap<>(Step.class); |