summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java43
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java33
2 files changed, 41 insertions, 35 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java
index ceb04d88026..a7940076277 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java
@@ -11,14 +11,14 @@ import java.util.Optional;
/**
* The last known build status of a particular deployment job for a particular application.
* This is immutable.
- *
+ *
* @author bratseth
* @author mpolden
*/
public class JobStatus {
-
+
private final DeploymentJobs.JobType type;
-
+
private final Optional<JobRun> lastTriggered;
private final Optional<JobRun> lastCompleted;
private final Optional<JobRun> firstFailing;
@@ -42,7 +42,7 @@ public class JobStatus {
this.type = type;
this.jobError = jobError;
-
+
// Never say we triggered component because we don't:
this.lastTriggered = type == DeploymentJobs.JobType.component ? Optional.empty() : lastTriggered;
this.lastCompleted = lastCompleted;
@@ -52,7 +52,7 @@ public class JobStatus {
/** Returns an empty job status */
public static JobStatus initial(DeploymentJobs.JobType type) {
- return new JobStatus(type, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty());
+ return new JobStatus(type, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty());
}
public JobStatus withTriggering(Version version, Optional<ApplicationRevision> revision,
@@ -89,13 +89,13 @@ public class JobStatus {
Optional<JobRun> firstFailing = this.firstFailing;
if (jobError.isPresent() && ! this.firstFailing.isPresent())
firstFailing = Optional.of(thisCompletion);
-
+
Optional<JobRun> lastSuccess = this.lastSuccess;
if ( ! jobError.isPresent()) {
lastSuccess = Optional.of(thisCompletion);
firstFailing = Optional.empty();
}
-
+
return new JobStatus(type, jobError, lastTriggered, Optional.of(thisCompletion), firstFailing, lastSuccess);
}
@@ -105,7 +105,7 @@ public class JobStatus {
public boolean isSuccess() {
return lastCompleted().isPresent() && ! jobError.isPresent();
}
-
+
/** Returns true if last triggered is newer than last completed and was started after timeoutLimit */
public boolean isRunning(Instant timeoutLimit) {
if ( ! lastTriggered.isPresent()) return false;
@@ -114,6 +114,11 @@ public class JobStatus {
return ! lastTriggered.get().at().isBefore(lastCompleted.get().at());
}
+ /** Returns true if this is running and has been so since before the given limit */
+ public boolean isHanging(Instant timeoutLimit) {
+ return isRunning(Instant.MIN) && lastTriggered.get().at().isBefore(timeoutLimit.plusMillis(1));
+ }
+
/** The error of the last completion, or empty if the last run succeeded */
public Optional<DeploymentJobs.JobError> jobError() { return jobError; }
@@ -140,10 +145,10 @@ public class JobStatus {
", first failing: " + firstFailing.map(JobRun::toString).orElse("(not failing)") +
", lastSuccess: " + lastSuccess.map(JobRun::toString).orElse("(never)") + "]";
}
-
+
@Override
public int hashCode() { return Objects.hash(type, jobError, lastTriggered, lastCompleted, firstFailing, lastSuccess); }
-
+
@Override
public boolean equals(Object o) {
if (o == this) return true;
@@ -159,15 +164,15 @@ public class JobStatus {
/** Information about a particular triggering or completion of a run of a job. This is immutable. */
public static class JobRun {
-
+
private final long id;
private final Version version;
private final Optional<ApplicationRevision> revision;
private final boolean upgrade;
private final String reason;
private final Instant at;
-
- public JobRun(long id, Version version, Optional<ApplicationRevision> revision,
+
+ public JobRun(long id, Version version, Optional<ApplicationRevision> revision,
boolean upgrade, String reason, Instant at) {
Objects.requireNonNull(version, "version cannot be null");
Objects.requireNonNull(revision, "revision cannot be null");
@@ -188,16 +193,16 @@ public class JobStatus {
// TODO: Fix how this is set, and add an applicationChange() method as well, in the same vein.
/** Returns whether this job run was a Vespa upgrade */
public boolean upgrade() { return upgrade; }
-
+
/** Returns the Vespa version used on this run */
public Version version() { return version; }
-
+
/** Returns the application revision used for this run, or empty when not known */
public Optional<ApplicationRevision> revision() { return revision; }
-
+
/** Returns a human-readable reason for this particular job run */
public String reason() { return reason; }
-
+
/** Returns the time if this triggering or completion */
public Instant at() { return at; }
@@ -218,7 +223,7 @@ public class JobStatus {
public int hashCode() {
return Objects.hash(version, revision, upgrade, at);
}
-
+
@Override
public boolean equals(Object o) {
if (this == o) return true;
@@ -234,7 +239,7 @@ public class JobStatus {
@Override
public String toString() { return "job run " + id + " of version " + (upgrade() ? "upgrade " : "") + version + " "
+ revision + " at " + at; }
-
+
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index f0c950b024b..192901165be 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -32,9 +32,9 @@ import java.util.logging.Logger;
/**
* Responsible for scheduling deployment jobs in a build system and keeping
* Application.deploying() in sync with what is scheduled.
- *
+ *
* This class is multithread safe.
- *
+ *
* @author bratseth
* @author mpolden
*/
@@ -60,7 +60,7 @@ public class DeploymentTrigger {
this.order = new DeploymentOrder(controller);
this.jobTimeout = controller.system().equals(SystemName.main) ? Duration.ofHours(12) : Duration.ofHours(1);
}
-
+
/** Returns the time in the past before which jobs are at this moment considered unresponsive */
public Instant jobTimeoutLimit() { return clock.instant().minus(jobTimeout); }
@@ -70,10 +70,10 @@ public class DeploymentTrigger {
//--- Start of methods which triggers deployment jobs -------------------------
- /**
+ /**
* Called each time a job completes (successfully or not) to cause triggering of one or more follow-up jobs
* (which may possibly the same job once over).
- *
+ *
* @param report information about the job that just completed
*/
public void triggerFromCompletion(JobReport report) {
@@ -143,10 +143,11 @@ public class DeploymentTrigger {
JobStatus systemTestStatus = application.deploymentJobs().jobStatus().get(JobType.systemTest);
if (application.deploying().get() instanceof Change.VersionChange) {
Version target = ((Change.VersionChange) application.deploying().get()).version();
- if (systemTestStatus == null
+ if (systemTestStatus == null
|| ! systemTestStatus.lastTriggered().isPresent()
|| ! systemTestStatus.isSuccess()
- || ! systemTestStatus.lastTriggered().get().version().equals(target)) {
+ || ! systemTestStatus.lastTriggered().get().version().equals(target)
+ || systemTestStatus.isHanging(jobTimeoutLimit())) {
application = trigger(JobType.systemTest, application, false, "Upgrade to " + target);
controller.applications().store(application);
}
@@ -170,7 +171,7 @@ public class DeploymentTrigger {
List<JobType> nextToTrigger = new ArrayList<>();
for (JobType nextJobType : order.nextAfter(jobType, application)) {
JobStatus nextStatus = application.deploymentJobs().jobStatus().get(nextJobType);
- if (changesAvailable(application, jobStatus, nextStatus))
+ if (changesAvailable(application, jobStatus, nextStatus) || nextStatus.isHanging(jobTimeoutLimit()))
nextToTrigger.add(nextJobType);
}
// Trigger them in parallel
@@ -209,10 +210,10 @@ public class DeploymentTrigger {
return true;
return false;
}
-
+
/**
* Triggers a change of this application
- *
+ *
* @param applicationId the application to trigger
* @throws IllegalArgumentException if this application already have an ongoing change
*/
@@ -267,7 +268,7 @@ public class DeploymentTrigger {
}
/**
- * Trigger a job for an application
+ * Trigger a job for an application
*
* @param jobType the type of the job to trigger, or null to trigger nothing
* @param application the application to trigger the job for
@@ -289,7 +290,7 @@ public class DeploymentTrigger {
/**
* Trigger a job for an application, if allowed
- *
+ *
* @param jobType the type of the job to trigger, or null to trigger nothing
* @param application the application to trigger the job for
* @param first whether to trigger the job before other jobs
@@ -323,7 +324,7 @@ public class DeploymentTrigger {
/** Returns true if the given proposed job triggering should be effected */
private boolean allowedTriggering(JobType jobType, LockedApplication application) {
- // Note: We could make a more fine-grained and more correct determination about whether to block
+ // Note: We could make a more fine-grained and more correct determination about whether to block
// by instead basing the decision on what is currently deployed in the zone. However,
// this leads to some additional corner cases, and the possibility of blocking an application
// fix to a version upgrade, so not doing it now
@@ -341,7 +342,7 @@ public class DeploymentTrigger {
return true;
}
-
+
private boolean isRunningProductionJob(Application application) {
return JobList.from(application)
.production()
@@ -364,7 +365,7 @@ public class DeploymentTrigger {
if (existingDeployment == null) return false;
return existingDeployment.version().isAfter(version);
}
-
+
private boolean acceptNewRevisionNow(LockedApplication application) {
if ( ! application.deploying().isPresent()) return true;
@@ -377,5 +378,5 @@ public class DeploymentTrigger {
// Otherwise, the application is currently upgrading, without failures, and we should wait with the revision.
return false;
}
-
+
}