diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2022-02-28 15:40:43 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2022-02-28 15:40:43 +0100 |
commit | 84d6b319d2d7308b9f0c85a95ff28f0c153f61c6 (patch) | |
tree | a1049bbd07a8ae732ef4e7ca057f4631877471c4 /controller-server | |
parent | 5f8948ee7bdef69a3d5d4c113e35ed5f4044dd72 (diff) |
Log why runs are aborted
Diffstat (limited to 'controller-server')
8 files changed, 23 insertions, 12 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 9f2ad0bd41c..5a99f6b1e50 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -247,7 +247,7 @@ public class DeploymentTrigger { .collect(toList()); controller.curator().writeRetriggerEntries(newList); } - controller.jobController().abort(run.id()); + controller.jobController().abort(run.id(), "force re-triggered"); return Optional.empty(); } else { return Optional.of(reTrigger(deployment.applicationId(), jobType)); @@ -363,7 +363,10 @@ public class DeploymentTrigger { if (jobs.get(job).stream().noneMatch(versions -> versions.versions().targetsMatch(last.versions()) && versions.versions().sourcesMatchIfPresent(last.versions()))) { log.log(Level.INFO, "Aborting outdated run " + last); - controller.jobController().abort(last.id()); + controller.jobController().abort(last.id(), "run no longer scheduled, and is blocking scheduled runs: " + + jobs.get(job).stream() + .map(scheduled -> scheduled.versions().toString()) + .collect(Collectors.joining(", "))); } }); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 60339a3233c..88b47194936 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -61,6 +61,7 @@ import static com.yahoo.config.provision.Environment.prod; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.reset; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.running; import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded; +import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.unfinished; import static com.yahoo.vespa.hosted.controller.deployment.Step.copyVespaLogs; import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester; import static com.yahoo.vespa.hosted.controller.deployment.Step.endStagingSetup; @@ -68,6 +69,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.Step.endTests; import static com.yahoo.vespa.hosted.controller.deployment.Step.report; import static java.util.Comparator.naturalOrder; import static java.util.function.Predicate.not; +import static java.util.logging.Level.INFO; import static java.util.stream.Collectors.toList; import static java.util.stream.Collectors.toMap; import static java.util.stream.Collectors.toSet; @@ -440,8 +442,13 @@ public class JobController { } /** Marks the given run as aborted; no further normal steps will run, but run-always steps will try to succeed. */ - public void abort(RunId id) { - locked(id, run -> run.aborted()); + public void abort(RunId id, String reason) { + locked(id, run -> { + run.stepStatuses().entrySet().stream() + .filter(entry -> entry.getValue() == unfinished) + .forEach(entry -> log(id, entry.getKey(), INFO, "Aborting run: " + reason)); + return run.aborted(); + }); } /** Accepts and stores a new application package and test jar pair under a generated application version key. */ @@ -560,7 +567,7 @@ public class JobController { /** Aborts a run and waits for it complete. */ private void abortAndWait(RunId id) { - abort(id); + abort(id, "replaced by new deployment"); runner.get().accept(last(id.application(), id.type()).get()); while ( ! last(id.application(), id.type()).get().hasEnded()) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java index 7a547dbc150..369699eb3a3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java @@ -80,7 +80,7 @@ public class JobRunner extends ControllerMaintainer { if ( ! run.hasFailed() && controller().clock().instant().isAfter(run.sleepUntil().orElse(run.start()).plus(jobTimeout))) executors.execute(() -> { - jobs.abort(run.id()); + jobs.abort(run.id(), "job timeout of " + jobTimeout + " reached"); advance(jobs.run(run.id()).get()); }); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index 11db0f7e3c2..576e5fe1c8a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -352,7 +352,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}")) return deleteInstance(path.get("tenant"), path.get("application"), path.get("instance"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying")) return cancelDeploy(path.get("tenant"), path.get("application"), path.get("instance"), "all"); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/{choice}")) return cancelDeploy(path.get("tenant"), path.get("application"), path.get("instance"), path.get("choice")); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}")) return JobControllerApiHandlerHelper.abortJobResponse(controller.jobController(), appIdFromPath(path), jobTypeFromPath(path)); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}")) return JobControllerApiHandlerHelper.abortJobResponse(controller.jobController(), request, appIdFromPath(path), jobTypeFromPath(path)); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}/pause")) return resume(appIdFromPath(path), jobTypeFromPath(path)); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}")) return deactivate(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}/reindexing")) return disableReindexing(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region"), request); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index 659cfc6e39d..4cbbc796d04 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.restapi.application; import com.yahoo.config.application.api.DeploymentSpec; import com.yahoo.config.application.api.DeploymentSpec.ChangeBlocker; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.container.jdisc.HttpResponse; import com.yahoo.restapi.MessageResponse; import com.yahoo.restapi.SlimeJsonResponse; @@ -218,12 +219,12 @@ class JobControllerApiHandlerHelper { } /** Aborts any job of the given type. */ - static HttpResponse abortJobResponse(JobController jobs, ApplicationId id, JobType type) { + static HttpResponse abortJobResponse(JobController jobs, HttpRequest request, ApplicationId id, JobType type) { Slime slime = new Slime(); Cursor responseObject = slime.setObject(); Optional<Run> run = jobs.last(id, type).flatMap(last -> jobs.active(last.id())); if (run.isPresent()) { - jobs.abort(run.get().id()); + jobs.abort(run.get().id(), "aborted by " + request.getJDiscRequest().getUserPrincipal()); responseObject.setString("message", "Aborting " + run.get().id()); } else diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java index 08cd88cf23f..21cc69369d8 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java @@ -410,7 +410,7 @@ public class DeploymentContext { public DeploymentContext abortJob(JobType type) { var job = jobId(type); assertNotSame(RunStatus.aborted, currentRun(job).status()); - jobs.abort(currentRun(job).id()); + jobs.abort(currentRun(job).id(), "DeploymentContext.abortJob"); jobAborted(type); return this; } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java index 01e1301c8cf..39c5dd2eae0 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java @@ -135,7 +135,7 @@ public class DeploymentTester { public void abortAll() { triggerJobs(); for (Run run : jobs.active()) { - jobs.abort(run.id()); + jobs.abort(run.id(), "DeploymentTester.abortAll"); runner.advance(jobs.run(run.id()).get()); assertTrue(jobs.run(run.id()).get().hasEnded()); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java index 6b380981e15..a339cde2b89 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java @@ -174,7 +174,7 @@ public class JobRunnerTest { assertStepsWithStartTime(run.get(), deployTester, deployReal, installTester, installReal, startTests, endTests, copyVespaLogs, deactivateTester, deactivateReal); // Abortion does nothing, as the run has already failed. - jobs.abort(run.get().id()); + jobs.abort(run.get().id(), "abort"); runner.maintain(); assertEquals(List.of(deactivateReal, deactivateTester), run.get().readySteps()); assertStepsWithStartTime(run.get(), deployTester, deployReal, installTester, installReal, startTests, endTests, copyVespaLogs, deactivateTester, deactivateReal); |