diff options
author | Jon Marius Venstad <jvenstad@yahoo-inc.com> | 2018-03-26 12:54:49 +0200 |
---|---|---|
committer | Jon Marius Venstad <jvenstad@yahoo-inc.com> | 2018-03-26 12:54:49 +0200 |
commit | 98a4db6159d9b97017685fc51ec05b913075b30e (patch) | |
tree | 7ee9016505574f379d076fc4578cef941790b653 /controller-server | |
parent | c8c913e6625fa42c0712363536d274802f267c03 (diff) |
More flexible parallel triggering
Diffstat (limited to 'controller-server')
3 files changed, 57 insertions, 72 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentQueue.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentQueue.java index 385de7b5a30..9e33da674aa 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentQueue.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentQueue.java @@ -33,6 +33,7 @@ public class DeploymentQueue { /** Add the given application to the queue of the given job type -- in front if first, at the back otherwise. */ public void addJob(ApplicationId applicationId, JobType jobType, boolean first) { + // TODO jvenstad: Replace with direct triggering. locked(jobType, queue -> { if ( ! queue.contains(applicationId)) { if (first) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index f2993ef9a0c..579574d35b5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -23,6 +23,8 @@ import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -106,15 +108,15 @@ public class DeploymentTrigger { // TODO jvenstad: Don't trigger. // Trigger next - if (report.success()) - application = trigger(order.nextAfter(report.jobType(), application), application, - report.jobType().jobName() + " completed"); + if (report.success()) { + List<JobType> jobs = order.nextAfter(report.jobType(), application); + for (JobType job : jobs) + application = triggerAllowParallel(new Triggering(application, job, false, report.jobType().jobName() + " completed"), jobs, false); + } else if (retryBecauseOutOfCapacity(application, report.jobType())) - application = trigger(report.jobType(), application, true, - "Retrying on out of capacity"); + application = triggerAllowParallel(new Triggering(application, report.jobType(), true, "Retrying on out of capacity"), Collections.emptySet(), false); else if (retryBecauseNewFailure(application, report.jobType())) - application = trigger(report.jobType(), application, false, - "Immediate retry on failure"); + application = triggerAllowParallel(new Triggering(application, report.jobType(), false, "Immediate retry on failure"), Collections.emptySet(), false); applications().store(application); }); @@ -145,14 +147,14 @@ public class DeploymentTrigger { || ! systemTestStatus.isSuccess() || ! systemTestStatus.lastTriggered().get().version().equals(target) || systemTestStatus.isHanging(jobTimeoutLimit())) { - application = trigger(JobType.systemTest, application, false, "Upgrade to " + target); + application = triggerAllowParallel(new Triggering(application, JobType.systemTest, false, "Upgrade to " + target), Collections.emptySet(), false); controller.applications().store(application); } } else { JobStatus componentStatus = application.deploymentJobs().jobStatus().get(JobType.component); if (componentStatus != null && changesAvailable(application, componentStatus, systemTestStatus)) { - application = trigger(JobType.systemTest, application, false, "Available change in component"); + application = triggerAllowParallel(new Triggering(application, JobType.systemTest, false, "Available change in component"), Collections.emptySet(), false); controller.applications().store(application); } } @@ -165,14 +167,13 @@ public class DeploymentTrigger { if (jobStatus.isRunning(jobTimeoutLimit())) continue; // Collect the subset of next jobs which have not run with the last changes - List<JobType> nextToTrigger = new ArrayList<>(); - for (JobType nextJobType : order.nextAfter(jobType, application)) { + // TODO jvenstad: Change to be step-centric. + List<JobType> nextJobs = order.nextAfter(jobType, application); + for (JobType nextJobType : nextJobs) { JobStatus nextStatus = application.deploymentJobs().jobStatus().get(nextJobType); if (changesAvailable(application, jobStatus, nextStatus) || nextStatus.isHanging(jobTimeoutLimit())) - nextToTrigger.add(nextJobType); + application = triggerAllowParallel(new Triggering(application, nextJobType, false, "Available change in " + jobType.jobName()), nextJobs, false); } - // Trigger them in parallel - application = trigger(nextToTrigger, application, "Available change in " + jobType.jobName()); controller.applications().store(application); } } @@ -180,36 +181,39 @@ public class DeploymentTrigger { /** * Trigger a job for an application, if allowed * - * @param jobType the type of the job to trigger, or null to trigger nothing - * @param application the application to trigger the job for - * @param first whether to trigger the job before other jobs + * @param triggering the triggering to execute, i.e., application, job type and reason * @param force true to disable checks which should normally prevent this triggering from happening - * @param reason describes why the job is triggered * @return the application in the triggered state, if actually triggered. This *must* be stored by the caller */ // TODO jvenstad: Replace with (Collection<JobType> concurrentlyWith) to allow, e.g., concurrent retries. - public LockedApplication triggerAllowParallel(JobType jobType, LockedApplication application, - boolean first, boolean force, String reason) { - if (jobType == null) return application; // we are passed null when the last job has been reached + public LockedApplication triggerAllowParallel(Triggering triggering, Collection<JobType> concurrentlyWith, boolean force) { + if (triggering.jobType == null) return triggering.application; // we are passed null when the last job has been reached + + List<JobType> runningProductionJobs = JobList.from(triggering.application) + .production() + .running(jobTimeoutLimit()) + .mapToList(JobStatus::type); + if (triggering.jobType().isProduction() && ! concurrentlyWith.containsAll(runningProductionJobs)) + return triggering.application; + // Never allow untested changes to go through // Note that this may happen because a new change catches up and prevents an older one from continuing - if ( ! application.deploymentJobs().isDeployableTo(jobType.environment(), application.change())) { - log.warning(String.format("Want to trigger %s for %s with reason %s, but change is untested", jobType, - application, reason)); - return application; + if ( ! triggering.application.deploymentJobs().isDeployableTo(triggering.jobType.environment(), triggering.application.change())) { + log.warning(String.format("Want to trigger %s for %s with reason %s, but change is untested", triggering.jobType, + triggering.application, triggering.reason)); + return triggering.application; } - if ( ! force && ! allowedTriggering(jobType, application)) return application; - log.info(String.format("Triggering %s for %s, %s: %s", jobType, application, - application.change().isPresent() ? "deploying " + application.change() : "restarted deployment", - reason)); - deploymentQueue.addJob(application.id(), jobType, first); - return application.withJobTriggering(jobType, + if ( ! force && ! allowedTriggering(triggering.jobType, triggering.application)) return triggering.application; + log.info(triggering.toString()); + deploymentQueue.addJob(triggering.application.id(), triggering.jobType, triggering.retry); + // TODO jvenstad: Let triggering set only time of triggering (and reason, for debugging?) when build system is polled for job status. + return triggering.application.withJobTriggering(triggering.jobType, clock.instant(), - application.deployVersionFor(jobType, controller), - application.deployApplicationVersionFor(jobType, controller, false) + triggering.application.deployVersionFor(triggering.jobType, controller), + triggering.application.deployApplicationVersionFor(triggering.jobType, controller, false) .orElse(ApplicationVersion.unknown), - reason); + triggering.reason); } /** @@ -227,7 +231,7 @@ public class DeploymentTrigger { if (change.application().isPresent()) application = application.withOutstandingChange(Change.empty()); // TODO jvenstad: Don't trigger. - application = trigger(JobType.systemTest, application, false, change.toString()); + application = triggerAllowParallel(new Triggering(application, JobType.systemTest, false, change.toString()), Collections.emptySet(), false); applications().store(application); }); } @@ -271,28 +275,6 @@ public class DeploymentTrigger { if ( ! jobType.isProduction()) return true; // Deployment spec only determines this for production jobs. return application.deploymentSpec().includes(jobType.environment(), jobType.region(controller.system())); } - - /** - * Trigger a job for an application - * - * @param jobType the type of the job to trigger, or null to trigger nothing - * @param application the application to trigger the job for - * @param first whether to put the job at the front of the build system queue (or the back) - * @param reason describes why the job is triggered - * @return the application in the triggered state, which *must* be stored by the caller - */ - private LockedApplication trigger(JobType jobType, LockedApplication application, boolean first, String reason) { - if (jobType.isProduction() && isRunningProductionJob(application)) return application; - return triggerAllowParallel(jobType, application, first, false, reason); - } - - private LockedApplication trigger(List<JobType> jobs, LockedApplication application, String reason) { - if (jobs.stream().anyMatch(JobType::isProduction) && isRunningProductionJob(application)) return application; - for (JobType job : jobs) - application = triggerAllowParallel(job, application, false, false, reason); - return application; - } - /** Create application version from job report */ private ApplicationVersion applicationVersionFrom(JobReport report) { return report.sourceRevision().map(sr -> ApplicationVersion.from(sr, report.buildNumber())) @@ -356,13 +338,6 @@ public class DeploymentTrigger { return true; } - private boolean isRunningProductionJob(Application application) { - return ! JobList.from(application) - .production() - .running(jobTimeoutLimit()) - .isEmpty(); - } - /** Returns whether all production zones listed in deployment spec has this change (or a newer version, if upgrade) */ private boolean deploymentComplete(LockedApplication application) { return order.jobsFrom(application.deploymentSpec()).stream() @@ -432,22 +407,22 @@ public class DeploymentTrigger { } - class Triggering { + public static class Triggering { - private final ApplicationId id; + private final LockedApplication application; // TODO jvenstad: Consider passing an ID instead. private final JobType jobType; private final boolean retry; private final String reason; - public Triggering(ApplicationId id, JobType jobType, boolean retry, String reason) { - this.id = id; + public Triggering(LockedApplication application, JobType jobType, boolean retry, String reason) { + this.application = application; this.jobType = jobType; this.retry = retry; this.reason = reason; } - public ApplicationId id() { - return id; + public LockedApplication application() { + return application; } public JobType jobType() { @@ -462,6 +437,14 @@ public class DeploymentTrigger { return reason; } + public String toString() { + return String.format("Triggering %s for %s, %s: %s", + jobType, + application, + application.change().isPresent() ? "deploying " + application.change() : "restarted deployment", + reason); + } + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java index 3a8d74d2c2e..a23206ccec4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java @@ -11,6 +11,7 @@ import com.yahoo.slime.Slime; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger; import com.yahoo.vespa.hosted.controller.restapi.ErrorResponse; import com.yahoo.vespa.hosted.controller.restapi.Path; import com.yahoo.vespa.hosted.controller.restapi.SlimeJsonResponse; @@ -18,6 +19,7 @@ import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import com.yahoo.yolean.Exceptions; import java.io.InputStream; +import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.Scanner; @@ -90,8 +92,7 @@ public class ScrewdriverApiHandler extends LoggingRequestHandler { // Since this is a manual operation we likely want it to trigger as soon as possible so we add it at to the // front of the queue application = controller.applications().deploymentTrigger().triggerAllowParallel( - jobType, application, true, true, - "Triggered from screwdriver/v1" + new DeploymentTrigger.Triggering(application, jobType, true, "Triggered from screwdriver/v1"), Collections.emptySet(), true ); controller.applications().store(application); }); |