summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2022-02-28 15:40:43 +0100
committerJon Marius Venstad <venstad@gmail.com>2022-02-28 15:40:43 +0100
commit84d6b319d2d7308b9f0c85a95ff28f0c153f61c6 (patch)
treea1049bbd07a8ae732ef4e7ca057f4631877471c4 /controller-server
parent5f8948ee7bdef69a3d5d4c113e35ed5f4044dd72 (diff)
Log why runs are aborted
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java7
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java13
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java5
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java2
8 files changed, 23 insertions, 12 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 9f2ad0bd41c..5a99f6b1e50 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -247,7 +247,7 @@ public class DeploymentTrigger {
.collect(toList());
controller.curator().writeRetriggerEntries(newList);
}
- controller.jobController().abort(run.id());
+ controller.jobController().abort(run.id(), "force re-triggered");
return Optional.empty();
} else {
return Optional.of(reTrigger(deployment.applicationId(), jobType));
@@ -363,7 +363,10 @@ public class DeploymentTrigger {
if (jobs.get(job).stream().noneMatch(versions -> versions.versions().targetsMatch(last.versions())
&& versions.versions().sourcesMatchIfPresent(last.versions()))) {
log.log(Level.INFO, "Aborting outdated run " + last);
- controller.jobController().abort(last.id());
+ controller.jobController().abort(last.id(), "run no longer scheduled, and is blocking scheduled runs: " +
+ jobs.get(job).stream()
+ .map(scheduled -> scheduled.versions().toString())
+ .collect(Collectors.joining(", ")));
}
});
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
index 60339a3233c..88b47194936 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
@@ -61,6 +61,7 @@ import static com.yahoo.config.provision.Environment.prod;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.reset;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.running;
import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.unfinished;
import static com.yahoo.vespa.hosted.controller.deployment.Step.copyVespaLogs;
import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester;
import static com.yahoo.vespa.hosted.controller.deployment.Step.endStagingSetup;
@@ -68,6 +69,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.Step.endTests;
import static com.yahoo.vespa.hosted.controller.deployment.Step.report;
import static java.util.Comparator.naturalOrder;
import static java.util.function.Predicate.not;
+import static java.util.logging.Level.INFO;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toSet;
@@ -440,8 +442,13 @@ public class JobController {
}
/** Marks the given run as aborted; no further normal steps will run, but run-always steps will try to succeed. */
- public void abort(RunId id) {
- locked(id, run -> run.aborted());
+ public void abort(RunId id, String reason) {
+ locked(id, run -> {
+ run.stepStatuses().entrySet().stream()
+ .filter(entry -> entry.getValue() == unfinished)
+ .forEach(entry -> log(id, entry.getKey(), INFO, "Aborting run: " + reason));
+ return run.aborted();
+ });
}
/** Accepts and stores a new application package and test jar pair under a generated application version key. */
@@ -560,7 +567,7 @@ public class JobController {
/** Aborts a run and waits for it complete. */
private void abortAndWait(RunId id) {
- abort(id);
+ abort(id, "replaced by new deployment");
runner.get().accept(last(id.application(), id.type()).get());
while ( ! last(id.application(), id.type()).get().hasEnded()) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
index 7a547dbc150..369699eb3a3 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
@@ -80,7 +80,7 @@ public class JobRunner extends ControllerMaintainer {
if ( ! run.hasFailed()
&& controller().clock().instant().isAfter(run.sleepUntil().orElse(run.start()).plus(jobTimeout)))
executors.execute(() -> {
- jobs.abort(run.id());
+ jobs.abort(run.id(), "job timeout of " + jobTimeout + " reached");
advance(jobs.run(run.id()).get());
});
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
index 11db0f7e3c2..576e5fe1c8a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
@@ -352,7 +352,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler {
if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}")) return deleteInstance(path.get("tenant"), path.get("application"), path.get("instance"), request);
if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying")) return cancelDeploy(path.get("tenant"), path.get("application"), path.get("instance"), "all");
if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/{choice}")) return cancelDeploy(path.get("tenant"), path.get("application"), path.get("instance"), path.get("choice"));
- if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}")) return JobControllerApiHandlerHelper.abortJobResponse(controller.jobController(), appIdFromPath(path), jobTypeFromPath(path));
+ if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}")) return JobControllerApiHandlerHelper.abortJobResponse(controller.jobController(), request, appIdFromPath(path), jobTypeFromPath(path));
if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}/pause")) return resume(appIdFromPath(path), jobTypeFromPath(path));
if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}")) return deactivate(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region"), request);
if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}/reindexing")) return disableReindexing(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region"), request);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
index 659cfc6e39d..4cbbc796d04 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.restapi.application;
import com.yahoo.config.application.api.DeploymentSpec;
import com.yahoo.config.application.api.DeploymentSpec.ChangeBlocker;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.container.jdisc.HttpRequest;
import com.yahoo.container.jdisc.HttpResponse;
import com.yahoo.restapi.MessageResponse;
import com.yahoo.restapi.SlimeJsonResponse;
@@ -218,12 +219,12 @@ class JobControllerApiHandlerHelper {
}
/** Aborts any job of the given type. */
- static HttpResponse abortJobResponse(JobController jobs, ApplicationId id, JobType type) {
+ static HttpResponse abortJobResponse(JobController jobs, HttpRequest request, ApplicationId id, JobType type) {
Slime slime = new Slime();
Cursor responseObject = slime.setObject();
Optional<Run> run = jobs.last(id, type).flatMap(last -> jobs.active(last.id()));
if (run.isPresent()) {
- jobs.abort(run.get().id());
+ jobs.abort(run.get().id(), "aborted by " + request.getJDiscRequest().getUserPrincipal());
responseObject.setString("message", "Aborting " + run.get().id());
}
else
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java
index 08cd88cf23f..21cc69369d8 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java
@@ -410,7 +410,7 @@ public class DeploymentContext {
public DeploymentContext abortJob(JobType type) {
var job = jobId(type);
assertNotSame(RunStatus.aborted, currentRun(job).status());
- jobs.abort(currentRun(job).id());
+ jobs.abort(currentRun(job).id(), "DeploymentContext.abortJob");
jobAborted(type);
return this;
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
index 01e1301c8cf..39c5dd2eae0 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
@@ -135,7 +135,7 @@ public class DeploymentTester {
public void abortAll() {
triggerJobs();
for (Run run : jobs.active()) {
- jobs.abort(run.id());
+ jobs.abort(run.id(), "DeploymentTester.abortAll");
runner.advance(jobs.run(run.id()).get());
assertTrue(jobs.run(run.id()).get().hasEnded());
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java
index 6b380981e15..a339cde2b89 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java
@@ -174,7 +174,7 @@ public class JobRunnerTest {
assertStepsWithStartTime(run.get(), deployTester, deployReal, installTester, installReal, startTests, endTests, copyVespaLogs, deactivateTester, deactivateReal);
// Abortion does nothing, as the run has already failed.
- jobs.abort(run.get().id());
+ jobs.abort(run.get().id(), "abort");
runner.maintain();
assertEquals(List.of(deactivateReal, deactivateTester), run.get().readySteps());
assertStepsWithStartTime(run.get(), deployTester, deployReal, installTester, installReal, startTests, endTests, copyVespaLogs, deactivateTester, deactivateReal);