aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2018-04-26 14:51:56 +0200
committerGitHub <noreply@github.com>2018-04-26 14:51:56 +0200
commita0faca67dcac8dca2173a14f90b9244b59ad1388 (patch)
tree89ae24e82e5d03c98bb3dcfab09cc99043603db0 /controller-server
parentf7c70472da91c7708d041e68aee29877b25ad30a (diff)
parent76a902d0e35e1bace8ddcd3bdbb2ca75211fb440 (diff)
Merge pull request #5716 from vespa-engine/jvenstad/DO-unified
Jvenstad/suppress parts of change to roll out unsuppressed part or test full change
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java17
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java117
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java62
5 files changed, 151 insertions, 61 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
index 33137202ea4..295b1adbca9 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
@@ -130,7 +130,11 @@ public class Application {
* Returns the change that should used for this application at the given instant, typically now.
*/
public Change changeAt(Instant now) {
- return change.effectiveAt(deploymentSpec, now); }
+ Change change = change();
+ if ( ! deploymentSpec.canUpgradeAt(now)) change = change.withoutPlatform();
+ if ( ! deploymentSpec.canChangeRevisionAt(now)) change = change.withoutApplication();
+ return change;
+ }
/**
* Returns whether this has an outstanding change (in the source repository), which
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
index 5294a80464a..6249bc9f53d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
@@ -284,17 +284,18 @@ public class ApplicationController {
} else {
JobType jobType = JobType.from(controller.system(), zone)
.orElseThrow(() -> new IllegalArgumentException("No job found for zone " + zone));
- Optional<JobRun> triggered = Optional.ofNullable(application.deploymentJobs().jobStatus().get(jobType))
- .flatMap(JobStatus::lastTriggered);
- // TODO jvenstad: Verify this response with a test, and see that it sorts itself when triggered.
- if ( ! triggered.isPresent())
+ Optional<JobStatus> job = Optional.ofNullable(application.deploymentJobs().jobStatus().get(jobType));
+ if ( ! job.isPresent()
+ || ! job.get().lastTriggered().isPresent()
+ || job.get().lastCompleted().isPresent() && job.get().lastCompleted().get().at().isAfter(job.get().lastTriggered().get().at()))
return unexpectedDeployment(applicationId, zone);
+ JobRun triggered = job.get().lastTriggered().get();
platformVersion = preferOldestVersion
- ? triggered.get().sourcePlatform().orElse(triggered.get().platform())
- : triggered.get().platform();
+ ? triggered.sourcePlatform().orElse(triggered.platform())
+ : triggered.platform();
applicationVersion = preferOldestVersion
- ? triggered.get().sourceApplication().orElse(triggered.get().application())
- : triggered.get().application();
+ ? triggered.sourceApplication().orElse(triggered.application())
+ : triggered.application();
applicationPackage = new ApplicationPackage(artifactRepository.getApplicationPackage(application.id(), applicationVersion.id()));
validateRun(application, zone, platformVersion, applicationVersion);
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java
index 5294ac9b774..c8482af1c2e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java
@@ -39,10 +39,12 @@ public final class Change {
this.application = application;
}
- /** Returns this change with currently blocked parts removed. */
- public Change effectiveAt(DeploymentSpec deploymentSpec, Instant instant) {
- return new Change(platform.filter(__ -> deploymentSpec.canUpgradeAt(instant)),
- application.filter(__ -> deploymentSpec.canChangeRevisionAt(instant)));
+ public Change withoutPlatform() {
+ return new Change(Optional.empty(), application);
+ }
+
+ public Change withoutApplication() {
+ return new Change(platform, Optional.empty());
}
/** Returns whether a change should currently be deployed */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 601ee18758e..403c82dd63e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -9,7 +9,6 @@ import com.yahoo.log.LogLevel;
import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.ApplicationController;
import com.yahoo.vespa.hosted.controller.Controller;
-import com.yahoo.vespa.hosted.controller.LockedApplication;
import com.yahoo.vespa.hosted.controller.api.integration.BuildService;
import com.yahoo.vespa.hosted.controller.api.integration.BuildService.JobState;
import com.yahoo.vespa.hosted.controller.application.ApplicationList;
@@ -118,7 +117,8 @@ public class DeploymentTrigger {
triggering = JobRun.triggering(controller.systemVersion(), applicationVersion, empty(), empty(), "Application commit", clock.instant());
if (report.success()) {
if (acceptNewApplicationVersion(application))
- application = application.withChange(application.change().with(applicationVersion));
+ application = application.withChange(application.change().with(applicationVersion))
+ .withOutstandingChange(Change.empty());
else
application = application.withOutstandingChange(Change.of(applicationVersion));
}
@@ -208,8 +208,8 @@ public class DeploymentTrigger {
public void cancelChange(ApplicationId applicationId, boolean keepApplicationChange) {
applications().lockOrThrow(applicationId, application -> {
applications().store(application.withChange(application.change().application()
+ .filter(__ -> keepApplicationChange)
.map(Change::of)
- .filter(change -> keepApplicationChange)
.orElse(Change.empty())));
});
}
@@ -290,12 +290,11 @@ public class DeploymentTrigger {
private List<Job> computeReadyJobs(ApplicationId id) {
List<Job> jobs = new ArrayList<>();
applications().get(id).ifPresent(application -> {
- List<Step> steps = application.deploymentSpec().steps().isEmpty()
- ? singletonList(new DeploymentSpec.DeclaredZone(test))
- : application.deploymentSpec().steps();
- List<Step> productionSteps = steps.stream().filter(step -> step.deploysTo(prod) || step.zones().isEmpty()).collect(toList());
+ List<Step> productionSteps = application.deploymentSpec().steps().stream()
+ .filter(step -> step.deploysTo(prod) || step.zones().isEmpty())
+ .collect(toList());
- Change change = application.change();
+ Change change = application.changeAt(clock.instant());
@SuppressWarnings("cast") // Bad compiler!
Optional<Instant> completedAt = max((Optional<Instant>) application.deploymentJobs().statusOf(systemTest)
.flatMap(job -> job.lastSuccess().map(JobRun::at)),
@@ -304,49 +303,74 @@ public class DeploymentTrigger {
String reason = "New change available";
List<Job> testJobs = null;
- for (Step step : productionSteps) {
- Set<JobType> stepJobs = step.zones().stream().map(order::toJob).collect(toSet());
- Map<Optional<Instant>, List<JobType>> jobsByCompletion = stepJobs.stream().collect(groupingBy(job -> completedAt(change, application, job)));
- if (jobsByCompletion.containsKey(empty())) { // Step not complete, because some jobs remain -- trigger these if the previous step was done.
- for (JobType job : jobsByCompletion.get(empty())) {
- State target = targetFor(application, change, deploymentFor(application, job));
- if (isTested(application, target)) {
- if (completedAt.isPresent())
- jobs.add(deploymentJob(application, target, change, job, reason, completedAt.get(), stepJobs));
+ if (change.isPresent())
+ for (Step step : productionSteps) {
+ Set<JobType> stepJobs = step.zones().stream().map(order::toJob).collect(toSet());
+ Map<Optional<Instant>, List<JobType>> jobsByCompletion = stepJobs.stream().collect(groupingBy(job -> completedAt(change, application, job)));
+ if (jobsByCompletion.containsKey(empty())) { // Step not complete, because some jobs remain -- trigger these if the previous step was done.
+ for (JobType job : jobsByCompletion.get(empty())) {
+ State target = targetFor(application, change, deploymentFor(application, job));
+ if (isTested(application, target)) {
+ if (completedAt.isPresent())
+ jobs.add(deploymentJob(application, target, change, job, reason, completedAt.get(), stepJobs));
+ }
+ else if (testJobs == null) {
+ if ( ! alreadyTriggered(application, target)) // TODO jvenstad: This is always true now ...
+ testJobs = testJobsFor(application, target, "Testing deployment for " + job.jobName(), completedAt.orElse(clock.instant()));
+ else
+ testJobs = emptyList();
+ }
}
- else if (testJobs == null) {
- if ( ! alreadyTriggered(application, target))
- testJobs = testJobsFor(application, target, "Testing deployment for " + job.jobName(), completedAt.orElse(clock.instant()));
- else
- testJobs = emptyList();
- }
- }
- }
- else { // All jobs are complete -- find the time of completion of this step.
- if (stepJobs.isEmpty()) { // No jobs means this is delay step.
- Duration delay = ((DeploymentSpec.Delay) step).duration();
- completedAt = completedAt.map(at -> at.plus(delay)).filter(at -> ! at.isAfter(clock.instant()));
- reason += " after a delay of " + delay;
}
- else {
- completedAt = jobsByCompletion.keySet().stream().map(Optional::get).max(naturalOrder());
- reason = "Available change in " + stepJobs.stream().map(JobType::jobName).collect(joining(", "));
+ else { // All jobs are complete -- find the time of completion of this step.
+ if (stepJobs.isEmpty()) { // No jobs means this is delay step.
+ Duration delay = ((DeploymentSpec.Delay) step).duration();
+ completedAt = completedAt.map(at -> at.plus(delay)).filter(at -> ! at.isAfter(clock.instant()));
+ reason += " after a delay of " + delay;
+ }
+ else {
+ completedAt = jobsByCompletion.keySet().stream().map(Optional::get).max(naturalOrder());
+ reason = "Available change in " + stepJobs.stream().map(JobType::jobName).collect(joining(", "));
+ }
}
}
- }
-
if (testJobs == null)
testJobs = testJobsFor(application, targetFor(application, application.change(), empty()), "Testing last changes outside prod", clock.instant());
jobs.addAll(testJobs);
- // TODO jvenstad: Replace with completion of individual parts of Change.
- if (steps.stream().flatMap(step -> step.zones().stream()).map(order::toJob)
- .allMatch(job -> completedAt(application.change(), application, job).isPresent()))
- applications().lockIfPresent(id, lockedApplication -> applications().store(lockedApplication.withChange(Change.empty())));
+ removeCompletedChange(application);
});
return jobs;
}
+ private void removeCompletedChange(Application application) {
+ List<JobType> jobs = (application.deploymentSpec().steps().isEmpty()
+ ? singletonList(new DeploymentSpec.DeclaredZone(test))
+ : application.deploymentSpec().steps()).stream()
+ .flatMap(step -> step.zones().stream())
+ .map(order::toJob)
+ .collect(toList());
+
+ boolean platformComplete = application.change().platform().map(Change::of)
+ .map(change -> jobs.stream().allMatch(job -> completedAt(change, application, job).isPresent()))
+ .orElse(false);
+
+ boolean applicationComplete = application.change().application().map(Change::of)
+ .map(change -> jobs.stream().allMatch(job -> completedAt(change, application, job).isPresent()))
+ .orElse(false);
+
+ if (platformComplete || applicationComplete)
+ applications().lockIfPresent(application.id(), lockedApplication -> {
+ if ( ! application.change().equals(lockedApplication.change()))
+ return; // If new changes were added after we verified completion, we can't remove those.
+
+ Change change = application.change();
+ if (platformComplete) change = change.withoutPlatform();
+ if (applicationComplete) change = change.withoutApplication();
+ applications().store(lockedApplication.withChange(change));
+ });
+ }
+
/**
* Returns the list of test jobs that should run now, and that need to succeed on the given target for it to be considered tested.
*/
@@ -404,9 +428,11 @@ public class DeploymentTrigger {
*
* Any job is complete if the given change is already successful on that job.
* A production job is also considered complete if its current change is strictly dominated by what
- * is already deployed in its zone, i.e., no parts of the change are upgrades, and at least one
- * part is a downgrade, regardless of the status of the job.
+ * is already deployed in its zone, i.e., no parts of the change are upgrades, and the full current
+ * change for the application downgrades the deployment, which is an acknowledgement that the deployed
+ * version is broken somehow, such that the job may be locked in failure until a new version is released.
*/
+ // TODO jvenstad: This is only used for production jobs now.
private Optional<Instant> completedAt(Change change, Application application, JobType jobType) {
State target = targetFor(application, change, deploymentFor(application, jobType));
Optional<JobRun> lastSuccess = successOn(application, jobType, target);
@@ -416,8 +442,8 @@ public class DeploymentTrigger {
return deploymentFor(application, jobType)
.filter(deployment -> ! ( change.upgrades(deployment.version())
|| change.upgrades(deployment.applicationVersion()))
- && ( change.downgrades(deployment.version())
- || change.downgrades(deployment.applicationVersion())))
+ && ( application.change().downgrades(deployment.version())
+ || application.change().downgrades(deployment.applicationVersion())))
.map(Deployment::at);
}
@@ -440,9 +466,6 @@ public class DeploymentTrigger {
if ( ! job.concurrentlyWith.containsAll(runningProductionJobsFor(application)))
return false;
- if ( ! job.change.effectiveAt(application.deploymentSpec(), clock.instant()).isPresent())
- return false;
-
return true;
}
@@ -457,7 +480,7 @@ public class DeploymentTrigger {
return controller.applications();
}
- private boolean acceptNewApplicationVersion(LockedApplication application) {
+ private boolean acceptNewApplicationVersion(Application application) {
if (application.change().application().isPresent()) return true; // More application changes are ok.
if (application.deploymentJobs().hasFailures()) return true; // Allow changes to fix upgrade problems.
// Otherwise, allow an application change if not currently upgrading.
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 3dcc16ccb88..f27cb0ba171 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -331,11 +331,71 @@ public class DeploymentTriggerTest {
assertEquals(0, tester.buildService().jobs().size());
tester.clock().advance(Duration.ofHours(2)); // ---------------- Exit block window: 20:30
- tester.deploymentTrigger().triggerReadyJobs(); // Schedules the blocked production job(s)
+ tester.deploymentTrigger().triggerReadyJobs(); // Schedules staging test for the blocked production job(s)
+ // TODO jvenstad: Required now because changes during block window used empty source -- improvement to use first untested production job with change to test :)
+ tester.deployAndNotify(app, changedApplication, true, stagingTest);
assertEquals(singletonList(buildJob(app, productionUsWest1)), tester.buildService().jobs());
}
@Test
+ public void testCompletionOfPartOfChangeDuringBlockWindow() {
+ ManualClock clock = new ManualClock(Instant.parse("2017-09-26T17:30:00.00Z")); // Tuesday, 17:30
+ DeploymentTester tester = new DeploymentTester(new ControllerTester(clock));
+ ApplicationPackage applicationPackage = new ApplicationPackageBuilder()
+ .blockChange(false, true, "tue", "18", "UTC")
+ .region("us-west-1")
+ .region("us-east-3")
+ .build();
+ Application application = tester.createAndDeploy("app1", 1, applicationPackage);
+
+ // Application on (6.1, 1.0.42)
+ Version v1 = Version.fromString("6.1");
+
+ // Application is mid-upgrade when block window begins, and has an outstanding change.
+ Version v2 = Version.fromString("6.2");
+ tester.upgradeSystem(v2);
+ tester.jobCompletion(component).application(application).nextBuildNumber().uploadArtifact(applicationPackage).submit();
+
+ tester.deployAndNotify(application, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(application, applicationPackage, true, systemTest);
+
+ // Entering block window will keep the outstanding change in place, but a new component run triggers a new change.
+ // This component completion should remove the older outstanding change, to avoid a later downgrade.
+ clock.advance(Duration.ofHours(1));
+ tester.deployAndNotify(application, applicationPackage, true, productionUsWest1);
+ assertEquals((Long) BuildJob.defaultBuildNumber, tester.application(application.id()).deploymentJobs().jobStatus()
+ .get(productionUsWest1).lastSuccess().get().application().buildNumber().get());
+ assertEquals((Long) (BuildJob.defaultBuildNumber + 1), tester.application(application.id()).outstandingChange().application().get().buildNumber().get());
+
+ tester.readyJobTrigger().maintain();
+ assertTrue(tester.buildService().jobs().isEmpty());
+
+ // New component triggers a full deployment of new application version, leaving platform versions alone.
+ tester.jobCompletion(component).application(application).nextBuildNumber().nextBuildNumber().uploadArtifact(applicationPackage).submit();
+ tester.deployAndNotify(application, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(application, applicationPackage, true, systemTest);
+ tester.deployAndNotify(application, applicationPackage, true, productionUsWest1);
+ tester.deployAndNotify(application, applicationPackage, true, systemTest);
+ tester.deployAndNotify(application, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(application, applicationPackage, true, productionUsEast3);
+ tester.deployAndNotify(application, applicationPackage, true, systemTest);
+ tester.deployAndNotify(application, applicationPackage, true, stagingTest);
+
+ // All tests are done for now, and only the platform change remains.
+ assertTrue(tester.buildService().jobs().isEmpty());
+ assertEquals(Change.of(v2), tester.application(application.id()).change());
+
+ // Exiting block window, staging test is re-run for the last prod zone, which has the old platform.
+ clock.advance(Duration.ofHours(1));
+ tester.readyJobTrigger().maintain();
+ tester.deployAndNotify(application, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(application, applicationPackage, true, productionUsEast3);
+
+ assertFalse(tester.application(application.id()).change().isPresent());
+ assertFalse(tester.application(application.id()).outstandingChange().isPresent());
+ }
+
+ @Test
public void testUpgradingButNoJobStarted() {
DeploymentTester tester = new DeploymentTester();
ReadyJobsTrigger readyJobsTrigger = new ReadyJobsTrigger(tester.controller(),