summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2017-10-05 14:32:10 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2017-10-05 14:32:10 +0200
commit4fb17280a8fd1532965f06a624b52511351c16ca (patch)
tree5772271ad62390214bf946b5e3bfe5a7e6d23af1 /controller-server
parent07368c537b93757ac8b36f35838eeb0d8ef3b507 (diff)
Don't retry jobs that are running
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java12
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java10
2 files changed, 8 insertions, 14 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 0268e187f35..9b2158e161e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -115,6 +115,7 @@ public class DeploymentTrigger {
break;
}
}
+
// Retry dead job
Optional<JobStatus> firstDeadJob = firstDeadJob(application.deploymentJobs(), timeout);
if (firstDeadJob.isPresent()) {
@@ -213,16 +214,15 @@ public class DeploymentTrigger {
/** Decide whether the job should be triggered by the periodic trigger */
private boolean shouldRetryNow(JobStatus job) {
if (job.isSuccess()) return false;
+ if (job.inProgress()) return false;
- if ( ! job.lastCompleted().isPresent()) return true; // Retry when we don't hear back
+ // Retry after 10% of the time since it started failing
+ Duration aTenthOfFailTime = Duration.ofMillis( (clock.millis() - job.firstFailing().get().at().toEpochMilli()) / 10);
+ if (job.lastCompleted().get().at().isBefore(clock.instant().minus(aTenthOfFailTime))) return true;
- // Always retry if we haven't tried in 4 hours
+ // ... or retry anyway if we haven't tried in 4 hours
if (job.lastCompleted().get().at().isBefore(clock.instant().minus(Duration.ofHours(4)))) return true;
- // Wait for 10% of the time since it started failing
- Duration aTenthOfFailTime = Duration.ofMillis( (clock.millis() - job.firstFailing().get().at().toEpochMilli()) / 10);
- if (job.lastCompleted().get().at().isBefore(clock.instant().minus(aTenthOfFailTime))) return true;
-
return false;
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 5a48bc54b49..5abadf28cfb 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -53,15 +53,9 @@ public class DeploymentTriggerTest {
tester.buildSystem().takeJobsToRun();
assertEquals("Job removed", 0, tester.buildSystem().jobs().size());
tester.clock().advance(Duration.ofHours(4).plus(Duration.ofSeconds(1)));
- tester.failureRedeployer().maintain();
+ tester.failureRedeployer().maintain(); // Causes retry of systemTests
- assertEquals("Retried job", 1, tester.buildSystem().jobs().size());
- assertEquals(JobType.systemTest.id(), tester.buildSystem().jobs().get(0).jobName());
- tester.buildSystem().takeJobsToRun();
- assertEquals("Job removed", 0, tester.buildSystem().jobs().size());
-
- // system-test succeeds and staging-test starts
- tester.failureRedeployer().maintain();
+ assertEquals("Scheduled retry", 1, tester.buildSystem().jobs().size());
tester.deployAndNotify(app, applicationPackage, true, JobType.systemTest);
// staging-test times out and is retried