diff options
author | Martin Polden <mpolden@mpolden.no> | 2018-04-26 14:51:56 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-04-26 14:51:56 +0200 |
commit | a0faca67dcac8dca2173a14f90b9244b59ad1388 (patch) | |
tree | 89ae24e82e5d03c98bb3dcfab09cc99043603db0 /controller-server | |
parent | f7c70472da91c7708d041e68aee29877b25ad30a (diff) | |
parent | 76a902d0e35e1bace8ddcd3bdbb2ca75211fb440 (diff) |
Merge pull request #5716 from vespa-engine/jvenstad/DO-unified
Jvenstad/suppress parts of change to roll out unsuppressed part or test full change
Diffstat (limited to 'controller-server')
5 files changed, 151 insertions, 61 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java index 33137202ea4..295b1adbca9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java @@ -130,7 +130,11 @@ public class Application { * Returns the change that should used for this application at the given instant, typically now. */ public Change changeAt(Instant now) { - return change.effectiveAt(deploymentSpec, now); } + Change change = change(); + if ( ! deploymentSpec.canUpgradeAt(now)) change = change.withoutPlatform(); + if ( ! deploymentSpec.canChangeRevisionAt(now)) change = change.withoutApplication(); + return change; + } /** * Returns whether this has an outstanding change (in the source repository), which diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index 5294a80464a..6249bc9f53d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -284,17 +284,18 @@ public class ApplicationController { } else { JobType jobType = JobType.from(controller.system(), zone) .orElseThrow(() -> new IllegalArgumentException("No job found for zone " + zone)); - Optional<JobRun> triggered = Optional.ofNullable(application.deploymentJobs().jobStatus().get(jobType)) - .flatMap(JobStatus::lastTriggered); - // TODO jvenstad: Verify this response with a test, and see that it sorts itself when triggered. - if ( ! triggered.isPresent()) + Optional<JobStatus> job = Optional.ofNullable(application.deploymentJobs().jobStatus().get(jobType)); + if ( ! job.isPresent() + || ! job.get().lastTriggered().isPresent() + || job.get().lastCompleted().isPresent() && job.get().lastCompleted().get().at().isAfter(job.get().lastTriggered().get().at())) return unexpectedDeployment(applicationId, zone); + JobRun triggered = job.get().lastTriggered().get(); platformVersion = preferOldestVersion - ? triggered.get().sourcePlatform().orElse(triggered.get().platform()) - : triggered.get().platform(); + ? triggered.sourcePlatform().orElse(triggered.platform()) + : triggered.platform(); applicationVersion = preferOldestVersion - ? triggered.get().sourceApplication().orElse(triggered.get().application()) - : triggered.get().application(); + ? triggered.sourceApplication().orElse(triggered.application()) + : triggered.application(); applicationPackage = new ApplicationPackage(artifactRepository.getApplicationPackage(application.id(), applicationVersion.id())); validateRun(application, zone, platformVersion, applicationVersion); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java index 5294ac9b774..c8482af1c2e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java @@ -39,10 +39,12 @@ public final class Change { this.application = application; } - /** Returns this change with currently blocked parts removed. */ - public Change effectiveAt(DeploymentSpec deploymentSpec, Instant instant) { - return new Change(platform.filter(__ -> deploymentSpec.canUpgradeAt(instant)), - application.filter(__ -> deploymentSpec.canChangeRevisionAt(instant))); + public Change withoutPlatform() { + return new Change(Optional.empty(), application); + } + + public Change withoutApplication() { + return new Change(platform, Optional.empty()); } /** Returns whether a change should currently be deployed */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 601ee18758e..403c82dd63e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -9,7 +9,6 @@ import com.yahoo.log.LogLevel; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.ApplicationController; import com.yahoo.vespa.hosted.controller.Controller; -import com.yahoo.vespa.hosted.controller.LockedApplication; import com.yahoo.vespa.hosted.controller.api.integration.BuildService; import com.yahoo.vespa.hosted.controller.api.integration.BuildService.JobState; import com.yahoo.vespa.hosted.controller.application.ApplicationList; @@ -118,7 +117,8 @@ public class DeploymentTrigger { triggering = JobRun.triggering(controller.systemVersion(), applicationVersion, empty(), empty(), "Application commit", clock.instant()); if (report.success()) { if (acceptNewApplicationVersion(application)) - application = application.withChange(application.change().with(applicationVersion)); + application = application.withChange(application.change().with(applicationVersion)) + .withOutstandingChange(Change.empty()); else application = application.withOutstandingChange(Change.of(applicationVersion)); } @@ -208,8 +208,8 @@ public class DeploymentTrigger { public void cancelChange(ApplicationId applicationId, boolean keepApplicationChange) { applications().lockOrThrow(applicationId, application -> { applications().store(application.withChange(application.change().application() + .filter(__ -> keepApplicationChange) .map(Change::of) - .filter(change -> keepApplicationChange) .orElse(Change.empty()))); }); } @@ -290,12 +290,11 @@ public class DeploymentTrigger { private List<Job> computeReadyJobs(ApplicationId id) { List<Job> jobs = new ArrayList<>(); applications().get(id).ifPresent(application -> { - List<Step> steps = application.deploymentSpec().steps().isEmpty() - ? singletonList(new DeploymentSpec.DeclaredZone(test)) - : application.deploymentSpec().steps(); - List<Step> productionSteps = steps.stream().filter(step -> step.deploysTo(prod) || step.zones().isEmpty()).collect(toList()); + List<Step> productionSteps = application.deploymentSpec().steps().stream() + .filter(step -> step.deploysTo(prod) || step.zones().isEmpty()) + .collect(toList()); - Change change = application.change(); + Change change = application.changeAt(clock.instant()); @SuppressWarnings("cast") // Bad compiler! Optional<Instant> completedAt = max((Optional<Instant>) application.deploymentJobs().statusOf(systemTest) .flatMap(job -> job.lastSuccess().map(JobRun::at)), @@ -304,49 +303,74 @@ public class DeploymentTrigger { String reason = "New change available"; List<Job> testJobs = null; - for (Step step : productionSteps) { - Set<JobType> stepJobs = step.zones().stream().map(order::toJob).collect(toSet()); - Map<Optional<Instant>, List<JobType>> jobsByCompletion = stepJobs.stream().collect(groupingBy(job -> completedAt(change, application, job))); - if (jobsByCompletion.containsKey(empty())) { // Step not complete, because some jobs remain -- trigger these if the previous step was done. - for (JobType job : jobsByCompletion.get(empty())) { - State target = targetFor(application, change, deploymentFor(application, job)); - if (isTested(application, target)) { - if (completedAt.isPresent()) - jobs.add(deploymentJob(application, target, change, job, reason, completedAt.get(), stepJobs)); + if (change.isPresent()) + for (Step step : productionSteps) { + Set<JobType> stepJobs = step.zones().stream().map(order::toJob).collect(toSet()); + Map<Optional<Instant>, List<JobType>> jobsByCompletion = stepJobs.stream().collect(groupingBy(job -> completedAt(change, application, job))); + if (jobsByCompletion.containsKey(empty())) { // Step not complete, because some jobs remain -- trigger these if the previous step was done. + for (JobType job : jobsByCompletion.get(empty())) { + State target = targetFor(application, change, deploymentFor(application, job)); + if (isTested(application, target)) { + if (completedAt.isPresent()) + jobs.add(deploymentJob(application, target, change, job, reason, completedAt.get(), stepJobs)); + } + else if (testJobs == null) { + if ( ! alreadyTriggered(application, target)) // TODO jvenstad: This is always true now ... + testJobs = testJobsFor(application, target, "Testing deployment for " + job.jobName(), completedAt.orElse(clock.instant())); + else + testJobs = emptyList(); + } } - else if (testJobs == null) { - if ( ! alreadyTriggered(application, target)) - testJobs = testJobsFor(application, target, "Testing deployment for " + job.jobName(), completedAt.orElse(clock.instant())); - else - testJobs = emptyList(); - } - } - } - else { // All jobs are complete -- find the time of completion of this step. - if (stepJobs.isEmpty()) { // No jobs means this is delay step. - Duration delay = ((DeploymentSpec.Delay) step).duration(); - completedAt = completedAt.map(at -> at.plus(delay)).filter(at -> ! at.isAfter(clock.instant())); - reason += " after a delay of " + delay; } - else { - completedAt = jobsByCompletion.keySet().stream().map(Optional::get).max(naturalOrder()); - reason = "Available change in " + stepJobs.stream().map(JobType::jobName).collect(joining(", ")); + else { // All jobs are complete -- find the time of completion of this step. + if (stepJobs.isEmpty()) { // No jobs means this is delay step. + Duration delay = ((DeploymentSpec.Delay) step).duration(); + completedAt = completedAt.map(at -> at.plus(delay)).filter(at -> ! at.isAfter(clock.instant())); + reason += " after a delay of " + delay; + } + else { + completedAt = jobsByCompletion.keySet().stream().map(Optional::get).max(naturalOrder()); + reason = "Available change in " + stepJobs.stream().map(JobType::jobName).collect(joining(", ")); + } } } - } - if (testJobs == null) testJobs = testJobsFor(application, targetFor(application, application.change(), empty()), "Testing last changes outside prod", clock.instant()); jobs.addAll(testJobs); - // TODO jvenstad: Replace with completion of individual parts of Change. - if (steps.stream().flatMap(step -> step.zones().stream()).map(order::toJob) - .allMatch(job -> completedAt(application.change(), application, job).isPresent())) - applications().lockIfPresent(id, lockedApplication -> applications().store(lockedApplication.withChange(Change.empty()))); + removeCompletedChange(application); }); return jobs; } + private void removeCompletedChange(Application application) { + List<JobType> jobs = (application.deploymentSpec().steps().isEmpty() + ? singletonList(new DeploymentSpec.DeclaredZone(test)) + : application.deploymentSpec().steps()).stream() + .flatMap(step -> step.zones().stream()) + .map(order::toJob) + .collect(toList()); + + boolean platformComplete = application.change().platform().map(Change::of) + .map(change -> jobs.stream().allMatch(job -> completedAt(change, application, job).isPresent())) + .orElse(false); + + boolean applicationComplete = application.change().application().map(Change::of) + .map(change -> jobs.stream().allMatch(job -> completedAt(change, application, job).isPresent())) + .orElse(false); + + if (platformComplete || applicationComplete) + applications().lockIfPresent(application.id(), lockedApplication -> { + if ( ! application.change().equals(lockedApplication.change())) + return; // If new changes were added after we verified completion, we can't remove those. + + Change change = application.change(); + if (platformComplete) change = change.withoutPlatform(); + if (applicationComplete) change = change.withoutApplication(); + applications().store(lockedApplication.withChange(change)); + }); + } + /** * Returns the list of test jobs that should run now, and that need to succeed on the given target for it to be considered tested. */ @@ -404,9 +428,11 @@ public class DeploymentTrigger { * * Any job is complete if the given change is already successful on that job. * A production job is also considered complete if its current change is strictly dominated by what - * is already deployed in its zone, i.e., no parts of the change are upgrades, and at least one - * part is a downgrade, regardless of the status of the job. + * is already deployed in its zone, i.e., no parts of the change are upgrades, and the full current + * change for the application downgrades the deployment, which is an acknowledgement that the deployed + * version is broken somehow, such that the job may be locked in failure until a new version is released. */ + // TODO jvenstad: This is only used for production jobs now. private Optional<Instant> completedAt(Change change, Application application, JobType jobType) { State target = targetFor(application, change, deploymentFor(application, jobType)); Optional<JobRun> lastSuccess = successOn(application, jobType, target); @@ -416,8 +442,8 @@ public class DeploymentTrigger { return deploymentFor(application, jobType) .filter(deployment -> ! ( change.upgrades(deployment.version()) || change.upgrades(deployment.applicationVersion())) - && ( change.downgrades(deployment.version()) - || change.downgrades(deployment.applicationVersion()))) + && ( application.change().downgrades(deployment.version()) + || application.change().downgrades(deployment.applicationVersion()))) .map(Deployment::at); } @@ -440,9 +466,6 @@ public class DeploymentTrigger { if ( ! job.concurrentlyWith.containsAll(runningProductionJobsFor(application))) return false; - if ( ! job.change.effectiveAt(application.deploymentSpec(), clock.instant()).isPresent()) - return false; - return true; } @@ -457,7 +480,7 @@ public class DeploymentTrigger { return controller.applications(); } - private boolean acceptNewApplicationVersion(LockedApplication application) { + private boolean acceptNewApplicationVersion(Application application) { if (application.change().application().isPresent()) return true; // More application changes are ok. if (application.deploymentJobs().hasFailures()) return true; // Allow changes to fix upgrade problems. // Otherwise, allow an application change if not currently upgrading. diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 3dcc16ccb88..f27cb0ba171 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -331,11 +331,71 @@ public class DeploymentTriggerTest { assertEquals(0, tester.buildService().jobs().size()); tester.clock().advance(Duration.ofHours(2)); // ---------------- Exit block window: 20:30 - tester.deploymentTrigger().triggerReadyJobs(); // Schedules the blocked production job(s) + tester.deploymentTrigger().triggerReadyJobs(); // Schedules staging test for the blocked production job(s) + // TODO jvenstad: Required now because changes during block window used empty source -- improvement to use first untested production job with change to test :) + tester.deployAndNotify(app, changedApplication, true, stagingTest); assertEquals(singletonList(buildJob(app, productionUsWest1)), tester.buildService().jobs()); } @Test + public void testCompletionOfPartOfChangeDuringBlockWindow() { + ManualClock clock = new ManualClock(Instant.parse("2017-09-26T17:30:00.00Z")); // Tuesday, 17:30 + DeploymentTester tester = new DeploymentTester(new ControllerTester(clock)); + ApplicationPackage applicationPackage = new ApplicationPackageBuilder() + .blockChange(false, true, "tue", "18", "UTC") + .region("us-west-1") + .region("us-east-3") + .build(); + Application application = tester.createAndDeploy("app1", 1, applicationPackage); + + // Application on (6.1, 1.0.42) + Version v1 = Version.fromString("6.1"); + + // Application is mid-upgrade when block window begins, and has an outstanding change. + Version v2 = Version.fromString("6.2"); + tester.upgradeSystem(v2); + tester.jobCompletion(component).application(application).nextBuildNumber().uploadArtifact(applicationPackage).submit(); + + tester.deployAndNotify(application, applicationPackage, true, stagingTest); + tester.deployAndNotify(application, applicationPackage, true, systemTest); + + // Entering block window will keep the outstanding change in place, but a new component run triggers a new change. + // This component completion should remove the older outstanding change, to avoid a later downgrade. + clock.advance(Duration.ofHours(1)); + tester.deployAndNotify(application, applicationPackage, true, productionUsWest1); + assertEquals((Long) BuildJob.defaultBuildNumber, tester.application(application.id()).deploymentJobs().jobStatus() + .get(productionUsWest1).lastSuccess().get().application().buildNumber().get()); + assertEquals((Long) (BuildJob.defaultBuildNumber + 1), tester.application(application.id()).outstandingChange().application().get().buildNumber().get()); + + tester.readyJobTrigger().maintain(); + assertTrue(tester.buildService().jobs().isEmpty()); + + // New component triggers a full deployment of new application version, leaving platform versions alone. + tester.jobCompletion(component).application(application).nextBuildNumber().nextBuildNumber().uploadArtifact(applicationPackage).submit(); + tester.deployAndNotify(application, applicationPackage, true, stagingTest); + tester.deployAndNotify(application, applicationPackage, true, systemTest); + tester.deployAndNotify(application, applicationPackage, true, productionUsWest1); + tester.deployAndNotify(application, applicationPackage, true, systemTest); + tester.deployAndNotify(application, applicationPackage, true, stagingTest); + tester.deployAndNotify(application, applicationPackage, true, productionUsEast3); + tester.deployAndNotify(application, applicationPackage, true, systemTest); + tester.deployAndNotify(application, applicationPackage, true, stagingTest); + + // All tests are done for now, and only the platform change remains. + assertTrue(tester.buildService().jobs().isEmpty()); + assertEquals(Change.of(v2), tester.application(application.id()).change()); + + // Exiting block window, staging test is re-run for the last prod zone, which has the old platform. + clock.advance(Duration.ofHours(1)); + tester.readyJobTrigger().maintain(); + tester.deployAndNotify(application, applicationPackage, true, stagingTest); + tester.deployAndNotify(application, applicationPackage, true, productionUsEast3); + + assertFalse(tester.application(application.id()).change().isPresent()); + assertFalse(tester.application(application.id()).outstandingChange().isPresent()); + } + + @Test public void testUpgradingButNoJobStarted() { DeploymentTester tester = new DeploymentTester(); ReadyJobsTrigger readyJobsTrigger = new ReadyJobsTrigger(tester.controller(), |