summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <jvenstad@yahoo-inc.com>2018-09-21 09:41:12 +0200
committerJon Marius Venstad <jvenstad@yahoo-inc.com>2018-09-21 09:41:12 +0200
commit1c03478fe8f9f7c97b7a363763a72e4c37c5194b (patch)
tree15142dd5996d989ecda2c8602ec15e7e4d22bc9a
parentadc3f3eaee4e53f5c1b68ccb22a943f0148b6f0e (diff)
Allow a change which has deployed to complete when blocked
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java16
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java25
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java85
3 files changed, 59 insertions, 67 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
index 677f2363c08..6dec9aae5dc 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
@@ -129,12 +129,20 @@ public class Application {
public Change change() { return change; }
/**
- * Returns the change that should used for this application at the given instant, typically now.
+ * Returns the target that should be used for this application at the given instant, typically now.
+ *
+ * This will be any parts of current total change that aren't both blocked and not yet deployed anywhere.
*/
public Change changeAt(Instant now) {
- Change change = change();
- if ( ! deploymentSpec.canUpgradeAt(now)) change = change.withoutPlatform();
- if ( ! deploymentSpec.canChangeRevisionAt(now)) change = change.withoutApplication();
+ Change change = this.change;
+ if ( this.change.platform().isPresent()
+ && productionDeployments().values().stream().noneMatch(deployment -> deployment.version().equals(this.change.platform().get()))
+ && ! deploymentSpec.canUpgradeAt(now))
+ change = change.withoutPlatform();
+ if ( this.change.application().isPresent()
+ && productionDeployments().values().stream().noneMatch(deployment -> deployment.applicationVersion().equals(this.change.application().get()))
+ && ! deploymentSpec.canChangeRevisionAt(now))
+ change = change.withoutApplication();
return change;
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 3b381e21b27..b82855813ba 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -46,6 +46,7 @@ import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobTy
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.stagingTest;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.systemTest;
import static java.time.temporal.ChronoUnit.MILLIS;
+import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -340,7 +341,7 @@ public class DeploymentTriggerTest {
tester.deployAndNotify(app, changedApplication, true, stagingTest);
readyJobsTrigger.run();
- assertEquals(0, tester.buildService().jobs().size());
+ assertEquals(emptyList(), tester.buildService().jobs());
tester.clock().advance(Duration.ofHours(2)); // ---------------- Exit block window: 20:30
tester.deploymentTrigger().triggerReadyJobs(); // Schedules staging test for the blocked production job(s)
@@ -380,27 +381,17 @@ public class DeploymentTriggerTest {
assertEquals((BuildJob.defaultBuildNumber + 1), tester.application(application.id()).outstandingChange().application().get().buildNumber().getAsLong());
tester.readyJobTrigger().maintain();
- assertTrue(tester.buildService().jobs().isEmpty());
+ // Platform upgrade keeps rolling, since it has already deployed in a production zone.
+ assertEquals(1, tester.buildService().jobs().size());
+ tester.deployAndNotify(application, applicationPackage, true, productionUsEast3);
+ assertEquals(emptyList(), tester.buildService().jobs());
+
- // New component triggers a full deployment of new application version, leaving platform versions alone.
+ // New component triggers a full deployment of new application version, but only after the upgrade is done.
tester.jobCompletion(component).application(application).nextBuildNumber().nextBuildNumber().uploadArtifact(applicationPackage).submit();
tester.deployAndNotify(application, applicationPackage, true, stagingTest);
tester.deployAndNotify(application, applicationPackage, true, systemTest);
tester.deployAndNotify(application, applicationPackage, true, productionUsWest1);
- tester.deployAndNotify(application, applicationPackage, true, systemTest);
- tester.deployAndNotify(application, applicationPackage, true, stagingTest);
- tester.deployAndNotify(application, applicationPackage, true, productionUsEast3);
- tester.deployAndNotify(application, applicationPackage, true, systemTest);
- tester.deployAndNotify(application, applicationPackage, true, stagingTest);
-
- // All tests are done for now, and only the platform change remains.
- assertTrue(tester.buildService().jobs().isEmpty());
- assertEquals(Change.of(v2), tester.application(application.id()).change());
-
- // Exiting block window, staging test is re-run for the last prod zone, which has the old platform.
- clock.advance(Duration.ofHours(1));
- tester.readyJobTrigger().maintain();
- tester.deployAndNotify(application, applicationPackage, true, stagingTest);
tester.deployAndNotify(application, applicationPackage, true, productionUsEast3);
assertFalse(tester.application(application.id()).change().isPresent());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index f1b20694f3d..f2e209436dc 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -19,6 +19,7 @@ import org.junit.Test;
import java.time.Duration;
import java.time.Instant;
+import java.util.Collections;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.component;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.productionUsCentral1;
@@ -26,6 +27,7 @@ import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobTy
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.productionUsWest1;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.stagingTest;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.systemTest;
+import static java.lang.Enum.valueOf;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -650,30 +652,15 @@ public class UpgraderTest {
tester.deployAndNotify(app, applicationPackage, true, stagingTest);
clock.advance(Duration.ofHours(1)); // Entering block window after prod job is triggered
tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
- assertTrue(tester.buildService().jobs().isEmpty()); // Next job not triggered due to being in the block window
+ assertEquals(1, tester.buildService().jobs().size()); // Next job triggered because upgrade is already rolling out.
- // One hour passes, time is 19:00, still no upgrade
- tester.clock().advance(Duration.ofHours(1));
- tester.triggerUntilQuiescence();
- assertTrue("No jobs scheduled", tester.buildService().jobs().isEmpty());
-
- // Another hour pass, time is 20:00 and application upgrades
- tester.clock().advance(Duration.ofHours(1));
- tester.triggerUntilQuiescence();
tester.deployAndNotify(app, applicationPackage, true, productionUsCentral1);
tester.deployAndNotify(app, applicationPackage, true, productionUsEast3);
assertTrue("All jobs consumed", tester.buildService().jobs().isEmpty());
}
- /**
- * Tests the scenario where a release is deployed to 2 of 3 production zones, then blocked,
- * followed by timeout of the upgrade and a new release.
- * In this case, the blocked production zone should not progress with upgrading to the previous version,
- * and should not upgrade to the new version until the other production zones have it
- * (expected behavior; both requirements are debatable).
- */
@Test
- public void testBlockVersionChangeHalfwayThoughThenNewVersion() {
+ public void testBlockVersionChangeHalfwayThoughThenNewRevision() {
ManualClock clock = new ManualClock(Instant.parse("2017-09-29T16:00:00.00Z")); // Friday, 16:00
DeploymentTester tester = new DeploymentTester(new ControllerTester(clock));
@@ -681,7 +668,6 @@ public class UpgraderTest {
tester.upgradeSystem(version);
ApplicationPackage applicationPackage = new ApplicationPackageBuilder()
- .upgradePolicy("canary")
// Block upgrades on weekends and ouside working hours
.blockChange(false, true, "mon-fri", "00-09,17-23", "UTC")
.blockChange(false, true, "sat-sun", "00-23", "UTC")
@@ -701,20 +687,38 @@ public class UpgraderTest {
tester.triggerUntilQuiescence();
tester.deployAndNotify(app, applicationPackage, true, systemTest);
tester.deployAndNotify(app, applicationPackage, true, stagingTest);
- tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
clock.advance(Duration.ofHours(1)); // Entering block window after prod job is triggered
- tester.deployAndNotify(app, applicationPackage, true, productionUsCentral1);
- assertTrue(tester.buildService().jobs().isEmpty()); // Next job not triggered due to being in the block window
+ tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
+ assertEquals(1, tester.buildService().jobs().size()); // Next job triggered, as upgrade is already in progress.
+ tester.deployAndNotify(app, applicationPackage, false, productionUsCentral1); // us-central-1 fails, permitting a new revision.
+
+ // A new revision is submitted and starts rolling out.
+ tester.jobCompletion(component).application(app).nextBuildNumber().uploadArtifact(applicationPackage).submit();
- // A day passes and we get a new version
+ // us-central-1 fails again, and isn't re-triggered, because the target is now a revision instead.
+ tester.deployAndNotify(app, applicationPackage, false, productionUsCentral1);
+ assertEquals(2, tester.buildService().jobs().size());
+ tester.deployAndNotify(app, applicationPackage, true, systemTest);
+ tester.deployAndNotify(app, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
+ // us-central-1 has an older version, and needs a new staging test to begin.
+ tester.deployAndNotify(app, applicationPackage, true, stagingTest);
+
+ // A new version is also released, cancelling the upgrade, since it is failing on a now outdated version.
tester.clock().advance(Duration.ofDays(1));
version = Version.fromString("5.2");
tester.upgradeSystem(version);
tester.upgrader().maintain();
tester.triggerUntilQuiescence();
- assertTrue("Nothing is scheduled", tester.buildService().jobs().isEmpty());
- // Monday morning: We are not blocked
+ // us-central-1 succeeds upgrade to 5.1, with the revision, but us-east-3 wants to proceed with only the revision change.
+ tester.deployAndNotify(app, applicationPackage, true, productionUsCentral1);
+ tester.deployAndNotify(app, applicationPackage, true, systemTest);
+ tester.deployAndNotify(app, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(app, applicationPackage, true, productionUsEast3);
+ assertEquals(Collections.emptyList(), tester.buildService().jobs());
+
+ // Monday morning: We are not blocked, and the new version rolls out to all zones.
tester.clock().advance(Duration.ofDays(1)); // Sunday, 17:00
tester.clock().advance(Duration.ofHours(17)); // Monday, 10:00
tester.upgrader().maintain();
@@ -723,7 +727,6 @@ public class UpgraderTest {
tester.deployAndNotify(app, applicationPackage, true, stagingTest);
tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
tester.deployAndNotify(app, applicationPackage, true, productionUsCentral1);
- // us-east-3 has an older version than the other zones, and needs a new staging test run.
tester.deployAndNotify(app, applicationPackage, true, stagingTest);
tester.deployAndNotify(app, applicationPackage, true, productionUsEast3);
assertTrue("All jobs consumed", tester.buildService().jobs().isEmpty());
@@ -961,34 +964,24 @@ public class UpgraderTest {
tester.deployAndNotify(app, applicationPackage, true, stagingTest);
clock.advance(Duration.ofHours(1)); // Entering block window after prod job is triggered.
tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
- assertTrue(tester.buildService().jobs().isEmpty()); // Next job not triggered due to being in the block window.
+ assertEquals(1, tester.buildService().jobs().size()); // Next job triggered in spite of block, because it is already rolling out.
- // One hour passes, time is 19:00, still no upgrade.
- tester.clock().advance(Duration.ofHours(1));
- tester.triggerUntilQuiescence();
- assertTrue("No jobs scheduled", tester.buildService().jobs().isEmpty());
-
- // New version is released and upgrades are started in the two first production zones.
+ // New version is released, but upgrades won't start since there's already a revision rolling out.
version = Version.fromString("5.1");
tester.upgradeSystem(version);
- tester.deployAndNotify(app, applicationPackage, true, systemTest);
- tester.deployAndNotify(app, applicationPackage, true, stagingTest);
- tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
-
- // Tests for central-1.
- tester.deployAndNotify(app, applicationPackage, true, systemTest);
- tester.deployAndNotify(app, applicationPackage, true, stagingTest);
+ tester.triggerUntilQuiescence();
+ assertEquals(1, tester.buildService().jobs().size()); // Still just the revision upgrade.
- // Another hour pass, time is 20:00 and both revision and version upgrades are now allowed.
- tester.clock().advance(Duration.ofHours(1));
- tester.triggerUntilQuiescence(); // Tests that trigger now test the full upgrade, since central-1 is still on old versions.
- tester.deployAndNotify(app, applicationPackage, true, productionUsCentral1); // Only upgrade for now.
- // west-1 is now fully upgraded, central-1 only has new version, and east-3 has only old versions.
+ tester.deployAndNotify(app, applicationPackage, true, productionUsCentral1);
+ tester.deployAndNotify(app, applicationPackage, true, productionUsEast3);
+ assertEquals(Collections.emptyList(), tester.buildService().jobs()); // No jobs left.
- // These tests were triggered with an upgrade of both version and revision. Since central-1 no longer upgrades version,
- // it ignores the initial version of the staging job, and so the current staging job is OK for both zones.
+ // Upgrade may start, now that revision is rolled out.
+ tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
tester.deployAndNotify(app, applicationPackage, true, systemTest);
tester.deployAndNotify(app, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
tester.deployAndNotify(app, applicationPackage, true, productionUsCentral1);
tester.deployAndNotify(app, applicationPackage, true, productionUsEast3);
assertTrue("All jobs consumed", tester.buildService().jobs().isEmpty());