diff options
Diffstat (limited to 'controller-server')
2 files changed, 41 insertions, 35 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java index ceb04d88026..a7940076277 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java @@ -11,14 +11,14 @@ import java.util.Optional; /** * The last known build status of a particular deployment job for a particular application. * This is immutable. - * + * * @author bratseth * @author mpolden */ public class JobStatus { - + private final DeploymentJobs.JobType type; - + private final Optional<JobRun> lastTriggered; private final Optional<JobRun> lastCompleted; private final Optional<JobRun> firstFailing; @@ -42,7 +42,7 @@ public class JobStatus { this.type = type; this.jobError = jobError; - + // Never say we triggered component because we don't: this.lastTriggered = type == DeploymentJobs.JobType.component ? Optional.empty() : lastTriggered; this.lastCompleted = lastCompleted; @@ -52,7 +52,7 @@ public class JobStatus { /** Returns an empty job status */ public static JobStatus initial(DeploymentJobs.JobType type) { - return new JobStatus(type, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()); + return new JobStatus(type, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()); } public JobStatus withTriggering(Version version, Optional<ApplicationRevision> revision, @@ -89,13 +89,13 @@ public class JobStatus { Optional<JobRun> firstFailing = this.firstFailing; if (jobError.isPresent() && ! this.firstFailing.isPresent()) firstFailing = Optional.of(thisCompletion); - + Optional<JobRun> lastSuccess = this.lastSuccess; if ( ! jobError.isPresent()) { lastSuccess = Optional.of(thisCompletion); firstFailing = Optional.empty(); } - + return new JobStatus(type, jobError, lastTriggered, Optional.of(thisCompletion), firstFailing, lastSuccess); } @@ -105,7 +105,7 @@ public class JobStatus { public boolean isSuccess() { return lastCompleted().isPresent() && ! jobError.isPresent(); } - + /** Returns true if last triggered is newer than last completed and was started after timeoutLimit */ public boolean isRunning(Instant timeoutLimit) { if ( ! lastTriggered.isPresent()) return false; @@ -114,6 +114,11 @@ public class JobStatus { return ! lastTriggered.get().at().isBefore(lastCompleted.get().at()); } + /** Returns true if this is running and has been so since before the given limit */ + public boolean isHanging(Instant timeoutLimit) { + return isRunning(Instant.MIN) && lastTriggered.get().at().isBefore(timeoutLimit.plusMillis(1)); + } + /** The error of the last completion, or empty if the last run succeeded */ public Optional<DeploymentJobs.JobError> jobError() { return jobError; } @@ -140,10 +145,10 @@ public class JobStatus { ", first failing: " + firstFailing.map(JobRun::toString).orElse("(not failing)") + ", lastSuccess: " + lastSuccess.map(JobRun::toString).orElse("(never)") + "]"; } - + @Override public int hashCode() { return Objects.hash(type, jobError, lastTriggered, lastCompleted, firstFailing, lastSuccess); } - + @Override public boolean equals(Object o) { if (o == this) return true; @@ -159,15 +164,15 @@ public class JobStatus { /** Information about a particular triggering or completion of a run of a job. This is immutable. */ public static class JobRun { - + private final long id; private final Version version; private final Optional<ApplicationRevision> revision; private final boolean upgrade; private final String reason; private final Instant at; - - public JobRun(long id, Version version, Optional<ApplicationRevision> revision, + + public JobRun(long id, Version version, Optional<ApplicationRevision> revision, boolean upgrade, String reason, Instant at) { Objects.requireNonNull(version, "version cannot be null"); Objects.requireNonNull(revision, "revision cannot be null"); @@ -188,16 +193,16 @@ public class JobStatus { // TODO: Fix how this is set, and add an applicationChange() method as well, in the same vein. /** Returns whether this job run was a Vespa upgrade */ public boolean upgrade() { return upgrade; } - + /** Returns the Vespa version used on this run */ public Version version() { return version; } - + /** Returns the application revision used for this run, or empty when not known */ public Optional<ApplicationRevision> revision() { return revision; } - + /** Returns a human-readable reason for this particular job run */ public String reason() { return reason; } - + /** Returns the time if this triggering or completion */ public Instant at() { return at; } @@ -218,7 +223,7 @@ public class JobStatus { public int hashCode() { return Objects.hash(version, revision, upgrade, at); } - + @Override public boolean equals(Object o) { if (this == o) return true; @@ -234,7 +239,7 @@ public class JobStatus { @Override public String toString() { return "job run " + id + " of version " + (upgrade() ? "upgrade " : "") + version + " " + revision + " at " + at; } - + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index f0c950b024b..192901165be 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -32,9 +32,9 @@ import java.util.logging.Logger; /** * Responsible for scheduling deployment jobs in a build system and keeping * Application.deploying() in sync with what is scheduled. - * + * * This class is multithread safe. - * + * * @author bratseth * @author mpolden */ @@ -60,7 +60,7 @@ public class DeploymentTrigger { this.order = new DeploymentOrder(controller); this.jobTimeout = controller.system().equals(SystemName.main) ? Duration.ofHours(12) : Duration.ofHours(1); } - + /** Returns the time in the past before which jobs are at this moment considered unresponsive */ public Instant jobTimeoutLimit() { return clock.instant().minus(jobTimeout); } @@ -70,10 +70,10 @@ public class DeploymentTrigger { //--- Start of methods which triggers deployment jobs ------------------------- - /** + /** * Called each time a job completes (successfully or not) to cause triggering of one or more follow-up jobs * (which may possibly the same job once over). - * + * * @param report information about the job that just completed */ public void triggerFromCompletion(JobReport report) { @@ -143,10 +143,11 @@ public class DeploymentTrigger { JobStatus systemTestStatus = application.deploymentJobs().jobStatus().get(JobType.systemTest); if (application.deploying().get() instanceof Change.VersionChange) { Version target = ((Change.VersionChange) application.deploying().get()).version(); - if (systemTestStatus == null + if (systemTestStatus == null || ! systemTestStatus.lastTriggered().isPresent() || ! systemTestStatus.isSuccess() - || ! systemTestStatus.lastTriggered().get().version().equals(target)) { + || ! systemTestStatus.lastTriggered().get().version().equals(target) + || systemTestStatus.isHanging(jobTimeoutLimit())) { application = trigger(JobType.systemTest, application, false, "Upgrade to " + target); controller.applications().store(application); } @@ -170,7 +171,7 @@ public class DeploymentTrigger { List<JobType> nextToTrigger = new ArrayList<>(); for (JobType nextJobType : order.nextAfter(jobType, application)) { JobStatus nextStatus = application.deploymentJobs().jobStatus().get(nextJobType); - if (changesAvailable(application, jobStatus, nextStatus)) + if (changesAvailable(application, jobStatus, nextStatus) || nextStatus.isHanging(jobTimeoutLimit())) nextToTrigger.add(nextJobType); } // Trigger them in parallel @@ -209,10 +210,10 @@ public class DeploymentTrigger { return true; return false; } - + /** * Triggers a change of this application - * + * * @param applicationId the application to trigger * @throws IllegalArgumentException if this application already have an ongoing change */ @@ -267,7 +268,7 @@ public class DeploymentTrigger { } /** - * Trigger a job for an application + * Trigger a job for an application * * @param jobType the type of the job to trigger, or null to trigger nothing * @param application the application to trigger the job for @@ -289,7 +290,7 @@ public class DeploymentTrigger { /** * Trigger a job for an application, if allowed - * + * * @param jobType the type of the job to trigger, or null to trigger nothing * @param application the application to trigger the job for * @param first whether to trigger the job before other jobs @@ -323,7 +324,7 @@ public class DeploymentTrigger { /** Returns true if the given proposed job triggering should be effected */ private boolean allowedTriggering(JobType jobType, LockedApplication application) { - // Note: We could make a more fine-grained and more correct determination about whether to block + // Note: We could make a more fine-grained and more correct determination about whether to block // by instead basing the decision on what is currently deployed in the zone. However, // this leads to some additional corner cases, and the possibility of blocking an application // fix to a version upgrade, so not doing it now @@ -341,7 +342,7 @@ public class DeploymentTrigger { return true; } - + private boolean isRunningProductionJob(Application application) { return JobList.from(application) .production() @@ -364,7 +365,7 @@ public class DeploymentTrigger { if (existingDeployment == null) return false; return existingDeployment.version().isAfter(version); } - + private boolean acceptNewRevisionNow(LockedApplication application) { if ( ! application.deploying().isPresent()) return true; @@ -377,5 +378,5 @@ public class DeploymentTrigger { // Otherwise, the application is currently upgrading, without failures, and we should wait with the revision. return false; } - + } |