diff options
author | jonmv <venstad@gmail.com> | 2022-06-15 08:24:32 +0200 |
---|---|---|
committer | jonmv <venstad@gmail.com> | 2022-06-15 08:24:32 +0200 |
commit | d548a9088083796de6c8f315b1623962ec6a43eb (patch) | |
tree | aaa8cb424051ed1856207577f33106089a32c2a0 | |
parent | ed7e067b0866108d2d4f23c0a98e9769f25576f3 (diff) |
Handle failures when triggering individual jobs
5 files changed, 29 insertions, 16 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 6e2ae0da46d..e3b9381c79d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -158,7 +158,7 @@ public class DeploymentTrigger { * * Only one job per type is triggered each run for test jobs, since their environments have limited capacity. */ - public long triggerReadyJobs() { + public TriggerResult triggerReadyJobs() { List<Job> readyJobs = computeReadyJobs(); var prodJobs = new ArrayList<Job>(); @@ -182,23 +182,35 @@ public class DeploymentTrigger { .collect(groupingBy(Job::jobType)); // Trigger all prod jobs - sortedProdJobs.forEach(this::trigger); - long triggeredJobs = sortedProdJobs.size(); + long triggeredJobs = 0; + long failedJobs = 0; + for (Job job : sortedProdJobs) { + if (trigger(job)) ++triggeredJobs; + else ++failedJobs; + } // Trigger max one test job per type - for (var jobs : sortedTestJobsByType.values()) { + for (var jobs : sortedTestJobsByType.values()) if (jobs.size() > 0) { - trigger(jobs.get(0)); - triggeredJobs++; + if (trigger(jobs.get(0))) ++triggeredJobs; + else ++failedJobs; } - } - return triggeredJobs; + + return new TriggerResult(triggeredJobs, failedJobs); } + public record TriggerResult(long triggered, long failed) { } /** Attempts to trigger the given job. */ - private void trigger(Job job) { - trigger(job, null); + private boolean trigger(Job job) { + try { + trigger(job, null); + return true; + } + catch (Exception e) { + log.log(Level.WARNING, "Failed triggering " + job.jobType() + " for " + job.instanceId, e); + return false; + } } /** Attempts to trigger the given job. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 3d4a2f40303..c83527372b2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -584,8 +584,8 @@ public class JobController { if (revision.compileVersion() .map(version -> controller.applications().versionCompatibility(id).refuse(versions.targetPlatform(), version)) .orElse(false)) - throw new IllegalArgumentException("Will not start a job with incompatible platform version (" + versions.targetPlatform() + ") " + - "and compile versions (" + revision.compileVersion().get() + ")"); + throw new IllegalArgumentException("Will not start " + type + " for " + id + " with incompatible platform version (" + + versions.targetPlatform() + ") " + "and compile versions (" + revision.compileVersion().get() + ")"); locked(id, type, __ -> { Optional<Run> last = last(id, type); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java index 26df8669fb1..5178918aa48 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger.TriggerResult; import java.time.Duration; @@ -18,8 +19,8 @@ public class ReadyJobsTrigger extends ControllerMaintainer { @Override public double maintain() { - controller().applications().deploymentTrigger().triggerReadyJobs(); - return 1.0; + TriggerResult result = controller().applications().deploymentTrigger().triggerReadyJobs(); + return result.triggered() * 1.0f / (result.triggered() + result.failed()); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java index 78e7606d7c6..2b4a2baa17e 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java @@ -146,7 +146,7 @@ public class DeploymentTester { int triggered; int triggeredTotal = 0; do { - triggered = (int) deploymentTrigger().triggerReadyJobs(); + triggered = (int) deploymentTrigger().triggerReadyJobs().triggered(); triggeredTotal += triggered; } while (triggered > 0); return triggeredTotal; diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java index d9f0f010104..c5d2dad20bc 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java @@ -842,7 +842,7 @@ public class ApplicationApiTest extends ControllerContainerTest { "{\"message\":\"Marked build '2' as non-deployable\"}"); // GET deployment job overview, after triggering system and staging test jobs. - assertEquals(2, tester.controller().applications().deploymentTrigger().triggerReadyJobs()); + assertEquals(2, tester.controller().applications().deploymentTrigger().triggerReadyJobs().triggered()); tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/job", GET) .userIdentity(USER_ID), new File("jobs.json")); |