diff options
7 files changed, 197 insertions, 33 deletions
diff --git a/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java b/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java index eda38f95054..f46fe39e8c4 100644 --- a/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java +++ b/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java @@ -239,7 +239,7 @@ public class DeploymentSpec { } } - /** A delpoyment step */ + /** A deployment step */ public abstract static class Step { /** Returns whether this step deploys to the given region */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java index 0ae69b4f2ca..2ccfcb9e460 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java @@ -47,8 +47,9 @@ public class DeploymentOrder { } /** Returns a list of jobs to trigger after the given job */ + // TODO: This does too much - should just tell us the order, as advertised public List<JobType> nextAfter(JobType job, Application application) { - if (!application.deploying().isPresent()) { // Change was cancelled + if ( ! application.deploying().isPresent()) { // Change was cancelled return Collections.emptyList(); } @@ -71,7 +72,7 @@ public class DeploymentOrder { return Collections.emptyList(); } - // Postpone if step hasn't completed all it's jobs for this change + // Postpone if step hasn't completed all its jobs for this change if (!completedSuccessfully(currentStep.get(), application.deploying().get(), application)) { return Collections.emptyList(); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 407d28264d1..76251d47703 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -22,6 +22,7 @@ import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import java.time.Clock; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.Objects; @@ -130,21 +131,19 @@ public class DeploymentTrigger { private void triggerReadyJobs(Application application, Lock lock) { if ( ! application.deploying().isPresent()) return; for (JobType jobType : order.jobsFrom(application.deploymentSpec())) { - // TODO: Do this for all jobs not just staging, and (with more work) remove triggerFailing and triggerDelayed - if (jobType.environment().equals(Environment.staging)) { - JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType); - if (jobStatus.isRunning(jobTimeoutLimit())) continue; - - for (JobType nextJobType : order.nextAfter(jobType, application)) { - JobStatus nextStatus = application.deploymentJobs().jobStatus().get(nextJobType); - - // Attempt to trigger if there are changes available - is rejected if the change is in progress, - // or is currently blocked - if (changesAvailable(jobStatus, nextStatus)) - trigger(nextJobType, application, false, "Triggering previously blocked job", lock); - } - + JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType); + if (jobStatus.isRunning(jobTimeoutLimit())) continue; + + // Collect the subset of next jobs which have not run with the last changes + List<JobType> nextToTrigger = new ArrayList<>(); + for (JobType nextJobType : order.nextAfter(jobType, application)) { + JobStatus nextStatus = application.deploymentJobs().jobStatus().get(nextJobType); + if (changesAvailable(application, jobStatus, nextStatus)) + nextToTrigger.add(nextJobType); } + // Trigger them in parallel + application = trigger(nextToTrigger, application, "Triggering previously blocked jobs", lock); + controller.applications().store(application, lock); } } @@ -152,8 +151,15 @@ public class DeploymentTrigger { * Returns true if the previous job has completed successfully with a revision and/or version which is * newer (different) than the one last completed successfully in next */ - private boolean changesAvailable(JobStatus previous, JobStatus next) { + private boolean changesAvailable(Application application, JobStatus previous, JobStatus next) { if ( ! previous.lastSuccess().isPresent()) return false; + + if ( ! application.deploying().isPresent()) return false; + Change change = application.deploying().get(); + if (change instanceof Change.VersionChange && // the last completed is out of date - don't continue with it + ! ((Change.VersionChange)change).version().equals(previous.lastSuccess().get().version())) + return false; + if (next == null) return true; if ( ! next.lastSuccess().isPresent()) return true; @@ -327,7 +333,7 @@ public class DeploymentTrigger { /** * Trigger a job for an application - * + * * @param jobType the type of the job to trigger, or null to trigger nothing * @param application the application to trigger the job for * @param first whether to trigger the job before other jobs @@ -335,6 +341,27 @@ public class DeploymentTrigger { * @return the application in the triggered state, which *must* be stored by the caller */ private Application trigger(JobType jobType, Application application, boolean first, String cause, Lock lock) { + if (isRunningProductionJob(application)) return application; + return triggerAllowParallel(jobType, application, first, cause, lock); + } + + private Application trigger(List<JobType> jobs, Application application, String cause, Lock lock) { + if (isRunningProductionJob(application)) return application; + for (JobType job : jobs) + application = triggerAllowParallel(job, application, false, cause, lock); + return application; + } + + /** + * Trigger a job for an application + * + * @param jobType the type of the job to trigger, or null to trigger nothing + * @param application the application to trigger the job for + * @param first whether to trigger the job before other jobs + * @param cause describes why the job is triggered + * @return the application in the triggered state, which *must* be stored by the caller + */ + private Application triggerAllowParallel(JobType jobType, Application application, boolean first, String cause, Lock lock) { if (jobType == null) { // previous was last job return application; } @@ -383,12 +410,11 @@ public class DeploymentTrigger { return application.withJobTriggering(jobType, application.deploying(), clock.instant(), controller); } - private Application trigger(List<JobType> jobs, Application application, String cause, Lock lock) { - for (JobType job : jobs) - application = trigger(job, application, false, cause, lock); - return application; + private boolean isRunningProductionJob(Application application) { + return application.deploymentJobs().jobStatus().entrySet().stream() + .anyMatch(entry -> entry.getKey().isProduction() && entry.getValue().isRunning(jobTimeoutLimit())); } - + private boolean acceptNewRevisionNow(Application application) { if ( ! application.deploying().isPresent()) return true; if ( application.deploying().get() instanceof Change.ApplicationChange) return true; // more changes are ok diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index e36645175a7..30d07bed50f 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -137,7 +137,7 @@ public class ControllerTest { // system and staging test job - succeeding applications.notifyJobCompletion(mockReport(app1, component, true)); - tester.deployAndNotify(app1, applicationPackage, true, systemTest); + tester.deployAndNotify(app1, applicationPackage, true, false, systemTest); assertStatus(JobStatus.initial(systemTest) .withTriggering(version1, revision, false, tester.clock().instant()) .withCompletion(Optional.empty(), tester.clock().instant(), tester.controller()), app1.id(), tester.controller()); @@ -435,7 +435,7 @@ public class ControllerTest { // out of capacity retry mechanism tester.clock().advance(Duration.ofMinutes(15)); tester.notifyJobCompletion(component, app1, true); - tester.deployAndNotify(app1, applicationPackage, true, systemTest); + tester.deployAndNotify(app1, applicationPackage, true, false, systemTest); tester.deploy(stagingTest, app1, applicationPackage); assertEquals(1, buildSystem.takeJobsToRun().size()); tester.notifyJobCompletion(stagingTest, app1, Optional.of(JobError.outOfCapacity)); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java index e45166b1c16..5af5dac714f 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java @@ -153,7 +153,7 @@ public class DeploymentTester { jobs = jobs.stream().filter(job -> ! job.isProduction()).collect(Collectors.toList()); for (JobType job : jobs) { boolean failJob = failOnJob.map(j -> j.equals(job)).orElse(false); - deployAndNotify(application, applicationPackage, !failJob, job); + deployAndNotify(application, applicationPackage, !failJob, false, job); if (failJob) { break; } @@ -205,8 +205,14 @@ public class DeploymentTester { job.zone(controller().system()).ifPresent(zone -> tester.deploy(application, zone, applicationPackage, deployCurrentVersion)); } - public void deployAndNotify(Application application, ApplicationPackage applicationPackage, boolean success, JobType... jobs) { - assertScheduledJob(application, jobs); + public void deployAndNotify(Application application, ApplicationPackage applicationPackage, boolean success, + JobType... jobs) { + deployAndNotify(application, applicationPackage, success, true, jobs); + } + + public void deployAndNotify(Application application, ApplicationPackage applicationPackage, boolean success, + boolean expectOnlyTheseJobs, JobType... jobs) { + consumeJobs(application, expectOnlyTheseJobs, jobs); for (JobType job : jobs) { if (success) { deploy(job, application, applicationPackage); @@ -215,13 +221,16 @@ public class DeploymentTester { } } - private void assertScheduledJob(Application application, JobType... jobs) { + /** Assert that the sceduled jobs of this application are exactly those given, and take them */ + private void consumeJobs(Application application, boolean expectOnlyTheseJobs, JobType... jobs) { for (JobType job : jobs) { Optional<BuildService.BuildJob> buildJob = findJob(application, job); assertTrue(String.format("Job %s is scheduled for %s", job, application), buildJob.isPresent()); assertEquals((long) application.deploymentJobs().projectId().get(), buildJob.get().projectId()); assertEquals(job.id(), buildJob.get().jobName()); } + if (expectOnlyTheseJobs) + assertEquals(jobs.length, countJobsOf(application)); buildSystem().removeJobs(application.id()); } @@ -232,6 +241,11 @@ public class DeploymentTester { return Optional.empty(); } + private int countJobsOf(Application application) { + return (int)buildSystem().jobs().stream() + .filter(job -> job.projectId() == application.deploymentJobs().projectId().get()) + .count(); + } private DeploymentJobs.JobReport jobReport(Application application, JobType jobType, Optional<DeploymentJobs.JobError> jobError) { return new DeploymentJobs.JobReport( application.id(), diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 65ed5eeb95b..f73373ac718 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -269,6 +269,7 @@ public class DeploymentTriggerTest { BlockedChangeDeployer blockedChangeDeployer = new BlockedChangeDeployer(tester.controller(), Duration.ofHours(1), new JobControl(tester.controllerTester().curator())); + Version version = Version.fromString("5.0"); tester.updateVersionStatus(version); @@ -276,10 +277,14 @@ public class DeploymentTriggerTest { .upgradePolicy("canary") // Block revision changes on tuesday in hours 18 and 19 .blockChange(true, false, "tue", "18-19", "UTC") - .region("us-west-1"); + .region("us-west-1") + .region("us-central-1") + .region("us-east-3"); Application app = tester.createAndDeploy("app1", 1, applicationPackageBuilder.build()); + + tester.clock().advance(Duration.ofHours(1)); // --------------- Enter block window: 18:30 blockedChangeDeployer.run(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index 29f1bce5ebe..7261bb3f61e 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -9,6 +9,7 @@ import com.yahoo.test.ManualClock; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; +import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; @@ -364,14 +365,14 @@ public class UpgraderTest { @Test public void testBlockVersionChange() { - ManualClock clock = new ManualClock(Instant.parse("2017-09-26T18:00:00.00Z")); // A tuesday + ManualClock clock = new ManualClock(Instant.parse("2017-09-26T18:00:00.00Z")); // Tuesday, 18:00 DeploymentTester tester = new DeploymentTester(new ControllerTester(clock)); Version version = Version.fromString("5.0"); tester.updateVersionStatus(version); ApplicationPackage applicationPackage = new ApplicationPackageBuilder() .upgradePolicy("canary") - // Block upgrades on tuesday in hours 18 and 19 + // Block upgrades on Tuesday in hours 18 and 19 .blockChange(false, true, "tue", "18-19", "UTC") .region("us-west-1") .build(); @@ -400,6 +401,123 @@ public class UpgraderTest { } @Test + public void testBlockVersionChangeHalfwayThough() { + ManualClock clock = new ManualClock(Instant.parse("2017-09-26T17:00:00.00Z")); // Tuesday, 17:00 + DeploymentTester tester = new DeploymentTester(new ControllerTester(clock)); + BlockedChangeDeployer blockedChangeDeployer = new BlockedChangeDeployer(tester.controller(), + Duration.ofHours(1), + new JobControl(tester.controllerTester().curator())); + + Version version = Version.fromString("5.0"); + tester.updateVersionStatus(version); + + ApplicationPackage applicationPackage = new ApplicationPackageBuilder() + .upgradePolicy("canary") + // Block upgrades on Tuesday in hours 18 and 19 + .blockChange(false, true, "tue", "18-19", "UTC") + .region("us-west-1") + .region("us-central-1") + .region("us-east-3") + .build(); + + Application app = tester.createAndDeploy("app1", 1, applicationPackage); + + // New version is released + version = Version.fromString("5.1"); + tester.updateVersionStatus(version); + + // Application upgrade starts + tester.upgrader().maintain(); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest); + clock.advance(Duration.ofHours(1)); // Entering block window after prod job is triggered + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsWest1); + assertTrue(tester.buildSystem().jobs().isEmpty()); // Next job not triggered due to being in the block window + + // One hour passes, time is 19:00, still no upgrade + tester.clock().advance(Duration.ofHours(1)); + blockedChangeDeployer.maintain(); + assertTrue("No jobs scheduled", tester.buildSystem().jobs().isEmpty()); + + // Another hour pass, time is 20:00 and application upgrades + tester.clock().advance(Duration.ofHours(1)); + blockedChangeDeployer.maintain(); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); + assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); + } + + /** + * Tests the scenario where a release is deployed to 2 of 3 production zones, then blocked, + * followed by timeout of the upgrade and a new release. + * In this case, the blocked production zone should not progress with upgrading to the previous version, + * and should not upgrade to the new version until the other production zones have it + * (expected behavior; both requirements are debatable). + */ + @Test + public void testBlockVersionChangeHalfwayThoughThenNewVersion() { + ManualClock clock = new ManualClock(Instant.parse("2017-09-29T16:00:00.00Z")); // Friday, 16:00 + DeploymentTester tester = new DeploymentTester(new ControllerTester(clock)); + BlockedChangeDeployer blockedChangeDeployer = new BlockedChangeDeployer(tester.controller(), + Duration.ofHours(1), + new JobControl(tester.controllerTester().curator())); + + Version version = Version.fromString("5.0"); + tester.updateVersionStatus(version); + + ApplicationPackage applicationPackage = new ApplicationPackageBuilder() + .upgradePolicy("canary") + // Block upgrades on weekends and ouside working hours + .blockChange(false, true, "mon-fri", "00-09,17-23", "UTC") + .blockChange(false, true, "sat-sun", "00-23", "UTC") + .region("us-west-1") + .region("us-central-1") + .region("us-east-3") + .build(); + + Application app = tester.createAndDeploy("app1", 1, applicationPackage); + + // New version is released + version = Version.fromString("5.1"); + tester.updateVersionStatus(version); + + // Application upgrade starts + tester.upgrader().maintain(); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsWest1); + clock.advance(Duration.ofHours(1)); // Entering block window after prod job is triggered + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1); + assertTrue(tester.buildSystem().jobs().isEmpty()); // Next job not triggered due to being in the block window + + // A day passes and we get a new version + tester.clock().advance(Duration.ofDays(1)); + version = Version.fromString("5.2"); + tester.updateVersionStatus(version); + tester.upgrader().maintain(); + blockedChangeDeployer.maintain(); + assertTrue("Nothing is scheduled", tester.buildSystem().jobs().isEmpty()); + + // Monday morning: We are not blocked + tester.clock().advance(Duration.ofDays(1)); // Sunday, 17:00 + tester.clock().advance(Duration.ofHours(17)); // Monday, 10:00 + tester.upgrader().maintain(); + blockedChangeDeployer.maintain(); + // We proceed with the new version in the expected order, not starting with the previously blocked version: + // Test jobs are run with the new version, but not production as we are in the block window + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsWest1); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1); + tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); + assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); + + // App is completely upgraded to the latest version + for (Deployment deployment : tester.applications().require(app.id()).deployments().values()) + assertEquals(version, deployment.version()); + } + + @Test public void testReschedulesUpgradeAfterTimeout() { DeploymentTester tester = new DeploymentTester(); Version version = Version.fromString("5.0"); |