summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java68
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java22
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java7
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java122
7 files changed, 197 insertions, 33 deletions
diff --git a/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java b/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java
index eda38f95054..f46fe39e8c4 100644
--- a/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java
+++ b/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java
@@ -239,7 +239,7 @@ public class DeploymentSpec {
}
}
- /** A delpoyment step */
+ /** A deployment step */
public abstract static class Step {
/** Returns whether this step deploys to the given region */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java
index 0ae69b4f2ca..2ccfcb9e460 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java
@@ -47,8 +47,9 @@ public class DeploymentOrder {
}
/** Returns a list of jobs to trigger after the given job */
+ // TODO: This does too much - should just tell us the order, as advertised
public List<JobType> nextAfter(JobType job, Application application) {
- if (!application.deploying().isPresent()) { // Change was cancelled
+ if ( ! application.deploying().isPresent()) { // Change was cancelled
return Collections.emptyList();
}
@@ -71,7 +72,7 @@ public class DeploymentOrder {
return Collections.emptyList();
}
- // Postpone if step hasn't completed all it's jobs for this change
+ // Postpone if step hasn't completed all its jobs for this change
if (!completedSuccessfully(currentStep.get(), application.deploying().get(), application)) {
return Collections.emptyList();
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 407d28264d1..76251d47703 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -22,6 +22,7 @@ import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
+import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
@@ -130,21 +131,19 @@ public class DeploymentTrigger {
private void triggerReadyJobs(Application application, Lock lock) {
if ( ! application.deploying().isPresent()) return;
for (JobType jobType : order.jobsFrom(application.deploymentSpec())) {
- // TODO: Do this for all jobs not just staging, and (with more work) remove triggerFailing and triggerDelayed
- if (jobType.environment().equals(Environment.staging)) {
- JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType);
- if (jobStatus.isRunning(jobTimeoutLimit())) continue;
-
- for (JobType nextJobType : order.nextAfter(jobType, application)) {
- JobStatus nextStatus = application.deploymentJobs().jobStatus().get(nextJobType);
-
- // Attempt to trigger if there are changes available - is rejected if the change is in progress,
- // or is currently blocked
- if (changesAvailable(jobStatus, nextStatus))
- trigger(nextJobType, application, false, "Triggering previously blocked job", lock);
- }
-
+ JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType);
+ if (jobStatus.isRunning(jobTimeoutLimit())) continue;
+
+ // Collect the subset of next jobs which have not run with the last changes
+ List<JobType> nextToTrigger = new ArrayList<>();
+ for (JobType nextJobType : order.nextAfter(jobType, application)) {
+ JobStatus nextStatus = application.deploymentJobs().jobStatus().get(nextJobType);
+ if (changesAvailable(application, jobStatus, nextStatus))
+ nextToTrigger.add(nextJobType);
}
+ // Trigger them in parallel
+ application = trigger(nextToTrigger, application, "Triggering previously blocked jobs", lock);
+ controller.applications().store(application, lock);
}
}
@@ -152,8 +151,15 @@ public class DeploymentTrigger {
* Returns true if the previous job has completed successfully with a revision and/or version which is
* newer (different) than the one last completed successfully in next
*/
- private boolean changesAvailable(JobStatus previous, JobStatus next) {
+ private boolean changesAvailable(Application application, JobStatus previous, JobStatus next) {
if ( ! previous.lastSuccess().isPresent()) return false;
+
+ if ( ! application.deploying().isPresent()) return false;
+ Change change = application.deploying().get();
+ if (change instanceof Change.VersionChange && // the last completed is out of date - don't continue with it
+ ! ((Change.VersionChange)change).version().equals(previous.lastSuccess().get().version()))
+ return false;
+
if (next == null) return true;
if ( ! next.lastSuccess().isPresent()) return true;
@@ -327,7 +333,7 @@ public class DeploymentTrigger {
/**
* Trigger a job for an application
- *
+ *
* @param jobType the type of the job to trigger, or null to trigger nothing
* @param application the application to trigger the job for
* @param first whether to trigger the job before other jobs
@@ -335,6 +341,27 @@ public class DeploymentTrigger {
* @return the application in the triggered state, which *must* be stored by the caller
*/
private Application trigger(JobType jobType, Application application, boolean first, String cause, Lock lock) {
+ if (isRunningProductionJob(application)) return application;
+ return triggerAllowParallel(jobType, application, first, cause, lock);
+ }
+
+ private Application trigger(List<JobType> jobs, Application application, String cause, Lock lock) {
+ if (isRunningProductionJob(application)) return application;
+ for (JobType job : jobs)
+ application = triggerAllowParallel(job, application, false, cause, lock);
+ return application;
+ }
+
+ /**
+ * Trigger a job for an application
+ *
+ * @param jobType the type of the job to trigger, or null to trigger nothing
+ * @param application the application to trigger the job for
+ * @param first whether to trigger the job before other jobs
+ * @param cause describes why the job is triggered
+ * @return the application in the triggered state, which *must* be stored by the caller
+ */
+ private Application triggerAllowParallel(JobType jobType, Application application, boolean first, String cause, Lock lock) {
if (jobType == null) { // previous was last job
return application;
}
@@ -383,12 +410,11 @@ public class DeploymentTrigger {
return application.withJobTriggering(jobType, application.deploying(), clock.instant(), controller);
}
- private Application trigger(List<JobType> jobs, Application application, String cause, Lock lock) {
- for (JobType job : jobs)
- application = trigger(job, application, false, cause, lock);
- return application;
+ private boolean isRunningProductionJob(Application application) {
+ return application.deploymentJobs().jobStatus().entrySet().stream()
+ .anyMatch(entry -> entry.getKey().isProduction() && entry.getValue().isRunning(jobTimeoutLimit()));
}
-
+
private boolean acceptNewRevisionNow(Application application) {
if ( ! application.deploying().isPresent()) return true;
if ( application.deploying().get() instanceof Change.ApplicationChange) return true; // more changes are ok
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
index e36645175a7..30d07bed50f 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
@@ -137,7 +137,7 @@ public class ControllerTest {
// system and staging test job - succeeding
applications.notifyJobCompletion(mockReport(app1, component, true));
- tester.deployAndNotify(app1, applicationPackage, true, systemTest);
+ tester.deployAndNotify(app1, applicationPackage, true, false, systemTest);
assertStatus(JobStatus.initial(systemTest)
.withTriggering(version1, revision, false, tester.clock().instant())
.withCompletion(Optional.empty(), tester.clock().instant(), tester.controller()), app1.id(), tester.controller());
@@ -435,7 +435,7 @@ public class ControllerTest {
// out of capacity retry mechanism
tester.clock().advance(Duration.ofMinutes(15));
tester.notifyJobCompletion(component, app1, true);
- tester.deployAndNotify(app1, applicationPackage, true, systemTest);
+ tester.deployAndNotify(app1, applicationPackage, true, false, systemTest);
tester.deploy(stagingTest, app1, applicationPackage);
assertEquals(1, buildSystem.takeJobsToRun().size());
tester.notifyJobCompletion(stagingTest, app1, Optional.of(JobError.outOfCapacity));
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
index e45166b1c16..5af5dac714f 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
@@ -153,7 +153,7 @@ public class DeploymentTester {
jobs = jobs.stream().filter(job -> ! job.isProduction()).collect(Collectors.toList());
for (JobType job : jobs) {
boolean failJob = failOnJob.map(j -> j.equals(job)).orElse(false);
- deployAndNotify(application, applicationPackage, !failJob, job);
+ deployAndNotify(application, applicationPackage, !failJob, false, job);
if (failJob) {
break;
}
@@ -205,8 +205,14 @@ public class DeploymentTester {
job.zone(controller().system()).ifPresent(zone -> tester.deploy(application, zone, applicationPackage, deployCurrentVersion));
}
- public void deployAndNotify(Application application, ApplicationPackage applicationPackage, boolean success, JobType... jobs) {
- assertScheduledJob(application, jobs);
+ public void deployAndNotify(Application application, ApplicationPackage applicationPackage, boolean success,
+ JobType... jobs) {
+ deployAndNotify(application, applicationPackage, success, true, jobs);
+ }
+
+ public void deployAndNotify(Application application, ApplicationPackage applicationPackage, boolean success,
+ boolean expectOnlyTheseJobs, JobType... jobs) {
+ consumeJobs(application, expectOnlyTheseJobs, jobs);
for (JobType job : jobs) {
if (success) {
deploy(job, application, applicationPackage);
@@ -215,13 +221,16 @@ public class DeploymentTester {
}
}
- private void assertScheduledJob(Application application, JobType... jobs) {
+ /** Assert that the sceduled jobs of this application are exactly those given, and take them */
+ private void consumeJobs(Application application, boolean expectOnlyTheseJobs, JobType... jobs) {
for (JobType job : jobs) {
Optional<BuildService.BuildJob> buildJob = findJob(application, job);
assertTrue(String.format("Job %s is scheduled for %s", job, application), buildJob.isPresent());
assertEquals((long) application.deploymentJobs().projectId().get(), buildJob.get().projectId());
assertEquals(job.id(), buildJob.get().jobName());
}
+ if (expectOnlyTheseJobs)
+ assertEquals(jobs.length, countJobsOf(application));
buildSystem().removeJobs(application.id());
}
@@ -232,6 +241,11 @@ public class DeploymentTester {
return Optional.empty();
}
+ private int countJobsOf(Application application) {
+ return (int)buildSystem().jobs().stream()
+ .filter(job -> job.projectId() == application.deploymentJobs().projectId().get())
+ .count();
+ }
private DeploymentJobs.JobReport jobReport(Application application, JobType jobType, Optional<DeploymentJobs.JobError> jobError) {
return new DeploymentJobs.JobReport(
application.id(),
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 65ed5eeb95b..f73373ac718 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -269,6 +269,7 @@ public class DeploymentTriggerTest {
BlockedChangeDeployer blockedChangeDeployer = new BlockedChangeDeployer(tester.controller(),
Duration.ofHours(1),
new JobControl(tester.controllerTester().curator()));
+
Version version = Version.fromString("5.0");
tester.updateVersionStatus(version);
@@ -276,10 +277,14 @@ public class DeploymentTriggerTest {
.upgradePolicy("canary")
// Block revision changes on tuesday in hours 18 and 19
.blockChange(true, false, "tue", "18-19", "UTC")
- .region("us-west-1");
+ .region("us-west-1")
+ .region("us-central-1")
+ .region("us-east-3");
Application app = tester.createAndDeploy("app1", 1, applicationPackageBuilder.build());
+
+
tester.clock().advance(Duration.ofHours(1)); // --------------- Enter block window: 18:30
blockedChangeDeployer.run();
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 29f1bce5ebe..7261bb3f61e 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -9,6 +9,7 @@ import com.yahoo.test.ManualClock;
import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.ControllerTester;
import com.yahoo.vespa.hosted.controller.application.ApplicationPackage;
+import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.DeploymentJobs;
import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester;
@@ -364,14 +365,14 @@ public class UpgraderTest {
@Test
public void testBlockVersionChange() {
- ManualClock clock = new ManualClock(Instant.parse("2017-09-26T18:00:00.00Z")); // A tuesday
+ ManualClock clock = new ManualClock(Instant.parse("2017-09-26T18:00:00.00Z")); // Tuesday, 18:00
DeploymentTester tester = new DeploymentTester(new ControllerTester(clock));
Version version = Version.fromString("5.0");
tester.updateVersionStatus(version);
ApplicationPackage applicationPackage = new ApplicationPackageBuilder()
.upgradePolicy("canary")
- // Block upgrades on tuesday in hours 18 and 19
+ // Block upgrades on Tuesday in hours 18 and 19
.blockChange(false, true, "tue", "18-19", "UTC")
.region("us-west-1")
.build();
@@ -400,6 +401,123 @@ public class UpgraderTest {
}
@Test
+ public void testBlockVersionChangeHalfwayThough() {
+ ManualClock clock = new ManualClock(Instant.parse("2017-09-26T17:00:00.00Z")); // Tuesday, 17:00
+ DeploymentTester tester = new DeploymentTester(new ControllerTester(clock));
+ BlockedChangeDeployer blockedChangeDeployer = new BlockedChangeDeployer(tester.controller(),
+ Duration.ofHours(1),
+ new JobControl(tester.controllerTester().curator()));
+
+ Version version = Version.fromString("5.0");
+ tester.updateVersionStatus(version);
+
+ ApplicationPackage applicationPackage = new ApplicationPackageBuilder()
+ .upgradePolicy("canary")
+ // Block upgrades on Tuesday in hours 18 and 19
+ .blockChange(false, true, "tue", "18-19", "UTC")
+ .region("us-west-1")
+ .region("us-central-1")
+ .region("us-east-3")
+ .build();
+
+ Application app = tester.createAndDeploy("app1", 1, applicationPackage);
+
+ // New version is released
+ version = Version.fromString("5.1");
+ tester.updateVersionStatus(version);
+
+ // Application upgrade starts
+ tester.upgrader().maintain();
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest);
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest);
+ clock.advance(Duration.ofHours(1)); // Entering block window after prod job is triggered
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsWest1);
+ assertTrue(tester.buildSystem().jobs().isEmpty()); // Next job not triggered due to being in the block window
+
+ // One hour passes, time is 19:00, still no upgrade
+ tester.clock().advance(Duration.ofHours(1));
+ blockedChangeDeployer.maintain();
+ assertTrue("No jobs scheduled", tester.buildSystem().jobs().isEmpty());
+
+ // Another hour pass, time is 20:00 and application upgrades
+ tester.clock().advance(Duration.ofHours(1));
+ blockedChangeDeployer.maintain();
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1);
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3);
+ assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty());
+ }
+
+ /**
+ * Tests the scenario where a release is deployed to 2 of 3 production zones, then blocked,
+ * followed by timeout of the upgrade and a new release.
+ * In this case, the blocked production zone should not progress with upgrading to the previous version,
+ * and should not upgrade to the new version until the other production zones have it
+ * (expected behavior; both requirements are debatable).
+ */
+ @Test
+ public void testBlockVersionChangeHalfwayThoughThenNewVersion() {
+ ManualClock clock = new ManualClock(Instant.parse("2017-09-29T16:00:00.00Z")); // Friday, 16:00
+ DeploymentTester tester = new DeploymentTester(new ControllerTester(clock));
+ BlockedChangeDeployer blockedChangeDeployer = new BlockedChangeDeployer(tester.controller(),
+ Duration.ofHours(1),
+ new JobControl(tester.controllerTester().curator()));
+
+ Version version = Version.fromString("5.0");
+ tester.updateVersionStatus(version);
+
+ ApplicationPackage applicationPackage = new ApplicationPackageBuilder()
+ .upgradePolicy("canary")
+ // Block upgrades on weekends and ouside working hours
+ .blockChange(false, true, "mon-fri", "00-09,17-23", "UTC")
+ .blockChange(false, true, "sat-sun", "00-23", "UTC")
+ .region("us-west-1")
+ .region("us-central-1")
+ .region("us-east-3")
+ .build();
+
+ Application app = tester.createAndDeploy("app1", 1, applicationPackage);
+
+ // New version is released
+ version = Version.fromString("5.1");
+ tester.updateVersionStatus(version);
+
+ // Application upgrade starts
+ tester.upgrader().maintain();
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest);
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest);
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsWest1);
+ clock.advance(Duration.ofHours(1)); // Entering block window after prod job is triggered
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1);
+ assertTrue(tester.buildSystem().jobs().isEmpty()); // Next job not triggered due to being in the block window
+
+ // A day passes and we get a new version
+ tester.clock().advance(Duration.ofDays(1));
+ version = Version.fromString("5.2");
+ tester.updateVersionStatus(version);
+ tester.upgrader().maintain();
+ blockedChangeDeployer.maintain();
+ assertTrue("Nothing is scheduled", tester.buildSystem().jobs().isEmpty());
+
+ // Monday morning: We are not blocked
+ tester.clock().advance(Duration.ofDays(1)); // Sunday, 17:00
+ tester.clock().advance(Duration.ofHours(17)); // Monday, 10:00
+ tester.upgrader().maintain();
+ blockedChangeDeployer.maintain();
+ // We proceed with the new version in the expected order, not starting with the previously blocked version:
+ // Test jobs are run with the new version, but not production as we are in the block window
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest);
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest);
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsWest1);
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1);
+ tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3);
+ assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty());
+
+ // App is completely upgraded to the latest version
+ for (Deployment deployment : tester.applications().require(app.id()).deployments().values())
+ assertEquals(version, deployment.version());
+ }
+
+ @Test
public void testReschedulesUpgradeAfterTimeout() {
DeploymentTester tester = new DeploymentTester();
Version version = Version.fromString("5.0");