aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2022-06-15 08:24:32 +0200
committerjonmv <venstad@gmail.com>2022-06-15 08:24:32 +0200
commitd548a9088083796de6c8f315b1623962ec6a43eb (patch)
treeaaa8cb424051ed1856207577f33106089a32c2a0
parented7e067b0866108d2d4f23c0a98e9769f25576f3 (diff)
Handle failures when triggering individual jobs
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java32
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java5
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java2
5 files changed, 29 insertions, 16 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 6e2ae0da46d..e3b9381c79d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -158,7 +158,7 @@ public class DeploymentTrigger {
*
* Only one job per type is triggered each run for test jobs, since their environments have limited capacity.
*/
- public long triggerReadyJobs() {
+ public TriggerResult triggerReadyJobs() {
List<Job> readyJobs = computeReadyJobs();
var prodJobs = new ArrayList<Job>();
@@ -182,23 +182,35 @@ public class DeploymentTrigger {
.collect(groupingBy(Job::jobType));
// Trigger all prod jobs
- sortedProdJobs.forEach(this::trigger);
- long triggeredJobs = sortedProdJobs.size();
+ long triggeredJobs = 0;
+ long failedJobs = 0;
+ for (Job job : sortedProdJobs) {
+ if (trigger(job)) ++triggeredJobs;
+ else ++failedJobs;
+ }
// Trigger max one test job per type
- for (var jobs : sortedTestJobsByType.values()) {
+ for (var jobs : sortedTestJobsByType.values())
if (jobs.size() > 0) {
- trigger(jobs.get(0));
- triggeredJobs++;
+ if (trigger(jobs.get(0))) ++triggeredJobs;
+ else ++failedJobs;
}
- }
- return triggeredJobs;
+
+ return new TriggerResult(triggeredJobs, failedJobs);
}
+ public record TriggerResult(long triggered, long failed) { }
/** Attempts to trigger the given job. */
- private void trigger(Job job) {
- trigger(job, null);
+ private boolean trigger(Job job) {
+ try {
+ trigger(job, null);
+ return true;
+ }
+ catch (Exception e) {
+ log.log(Level.WARNING, "Failed triggering " + job.jobType() + " for " + job.instanceId, e);
+ return false;
+ }
}
/** Attempts to trigger the given job. */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
index 3d4a2f40303..c83527372b2 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
@@ -584,8 +584,8 @@ public class JobController {
if (revision.compileVersion()
.map(version -> controller.applications().versionCompatibility(id).refuse(versions.targetPlatform(), version))
.orElse(false))
- throw new IllegalArgumentException("Will not start a job with incompatible platform version (" + versions.targetPlatform() + ") " +
- "and compile versions (" + revision.compileVersion().get() + ")");
+ throw new IllegalArgumentException("Will not start " + type + " for " + id + " with incompatible platform version (" +
+ versions.targetPlatform() + ") " + "and compile versions (" + revision.compileVersion().get() + ")");
locked(id, type, __ -> {
Optional<Run> last = last(id, type);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
index 26df8669fb1..5178918aa48 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.vespa.hosted.controller.Controller;
+import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger.TriggerResult;
import java.time.Duration;
@@ -18,8 +19,8 @@ public class ReadyJobsTrigger extends ControllerMaintainer {
@Override
public double maintain() {
- controller().applications().deploymentTrigger().triggerReadyJobs();
- return 1.0;
+ TriggerResult result = controller().applications().deploymentTrigger().triggerReadyJobs();
+ return result.triggered() * 1.0f / (result.triggered() + result.failed());
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
index 78e7606d7c6..2b4a2baa17e 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
@@ -146,7 +146,7 @@ public class DeploymentTester {
int triggered;
int triggeredTotal = 0;
do {
- triggered = (int) deploymentTrigger().triggerReadyJobs();
+ triggered = (int) deploymentTrigger().triggerReadyJobs().triggered();
triggeredTotal += triggered;
} while (triggered > 0);
return triggeredTotal;
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
index d9f0f010104..c5d2dad20bc 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
@@ -842,7 +842,7 @@ public class ApplicationApiTest extends ControllerContainerTest {
"{\"message\":\"Marked build '2' as non-deployable\"}");
// GET deployment job overview, after triggering system and staging test jobs.
- assertEquals(2, tester.controller().applications().deploymentTrigger().triggerReadyJobs());
+ assertEquals(2, tester.controller().applications().deploymentTrigger().triggerReadyJobs().triggered());
tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/job", GET)
.userIdentity(USER_ID),
new File("jobs.json"));