summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2022-07-01 17:19:39 +0200
committerjonmv <venstad@gmail.com>2022-07-01 17:19:39 +0200
commit0c18fdd48a7202fba90126de776c7fda86a15c44 (patch)
treebdf59e6c05752df064f8d53fd6ba07f0ce5509b9 /controller-server
parentbc61be2efe931f7d4b5357b2792c001d3266517b (diff)
Set broken revisions aside, so upgrades may be attempted
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java1
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java3
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java50
4 files changed, 56 insertions, 2 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
index 24cd92d005f..2b764abb090 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
@@ -518,6 +518,7 @@ public class DeploymentStatus {
boolean revisionReadyFirst = revisionReadyAt.get().isBefore(platformReadyAt.get());
boolean failingUpgradeOnlyTests = ! jobs().type(systemTest(job.type()), stagingTest(job.type()))
.failingHardOn(Versions.from(change.withoutApplication(), application, deploymentFor(job), systemVersion))
+ .matching(testJob -> ! testJob.isSuccess())
.isEmpty();
switch (rollout) {
case separate: // Let whichever change rolled out first, keep rolling first, unless upgrade alone is failing.
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index c28f94bc4d7..87e86d98da9 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -448,6 +448,10 @@ public class DeploymentTrigger {
private boolean acceptNewRevision(DeploymentStatus status, InstanceName instance, RevisionId revision) {
if (status.application().deploymentSpec().instance(instance).isEmpty()) return false; // Unknown instance.
+ if ( ! status.jobs().failingApplicationChange()
+ .firstFailing().endedNoLaterThan(clock.instant().minus(Duration.ofDays(5)))
+ .firstFailing().on(revision)
+ .isEmpty()) return false; // Don't deploy a broken revision.
boolean isChangingRevision = status.application().require(instance).change().revision().isPresent();
DeploymentInstanceSpec spec = status.application().deploymentSpec().requireInstance(instance);
Predicate<RevisionId> revisionFilter = spec.revisionTarget() == DeploymentSpec.RevisionTarget.next
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
index b45e7b046d6..4f84c86350b 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
@@ -114,7 +114,7 @@ public class Upgrader extends ControllerMaintainer {
// Prefer the newest target for each instance.
remaining = remaining.not().matching(eligible.asList()::contains)
.not().hasCompleted(Change.of(version));
- for (ApplicationId id : outdated.and(eligible.not().upgrading()).not().changingRevision())
+ for (ApplicationId id : outdated.and(eligible.not().upgrading()))
targets.put(id, version);
}
@@ -123,6 +123,7 @@ public class Upgrader extends ControllerMaintainer {
log.log(Level.INFO, "Triggering upgrade to " + targets.get(id) + " for " + id);
if (failingRevision.contains(id))
controller().applications().deploymentTrigger().cancelChange(id, ChangesToCancel.APPLICATION);
+
controller().applications().deploymentTrigger().triggerChange(id, Change.of(targets.get(id)));
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 3b21d3a017e..6fd2a8a6858 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -35,6 +35,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.DeploymentContext.pro
import static com.yahoo.vespa.hosted.controller.deployment.DeploymentContext.stagingTest;
import static com.yahoo.vespa.hosted.controller.deployment.DeploymentContext.systemTest;
import static com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger.ChangesToCancel.ALL;
+import static com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger.ChangesToCancel.APPLICATION;
import static com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger.ChangesToCancel.PIN;
import static com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger.ChangesToCancel.PLATFORM;
import static org.junit.Assert.assertEquals;
@@ -995,6 +996,53 @@ public class UpgraderTest {
}
@Test
+ public void testSettingFailingRevisionAside() {
+ DeploymentContext app = tester.newDeploymentContext().submit().deploy();
+
+ // New revision fails.
+ app.submit();
+ Optional<RevisionId> revision1 = app.lastSubmission();
+ app.failDeployment(systemTest);
+
+ // New version is not targeted.
+ Version version1 = new Version("7");
+ tester.controllerTester().upgradeSystem(version1);
+ assertEquals(Change.of(revision1.get()), app.instance().change());
+
+ tester.upgrader().run();
+ assertEquals(Change.of(revision1.get()), app.instance().change());
+
+ // Broken revision is cancelled, and new version targeted, after some time.
+ tester.clock().advance(Duration.ofDays(6));
+ tester.upgrader().run();
+ assertEquals(Change.of(version1), app.instance().change());
+
+ // Broken revision is not targeted again.
+ app.triggerJobs();
+ tester.upgrader().run();
+ tester.outstandingChangeDeployer().run();
+ assertEquals(Change.of(version1), app.instance().change());
+
+ app.failDeployment(systemTest);
+ tester.upgrader().run();
+ tester.outstandingChangeDeployer().run();
+ assertEquals(Change.of(version1), app.instance().change());
+
+ // Revision gets a second change when upgrade fixes the failing job.
+ tester.clock().advance(Duration.ofDays(12)); // Time for retries.
+ app.runJob(systemTest).jobAborted(stagingTest).runJob(stagingTest);
+ tester.upgrader().run();
+ tester.outstandingChangeDeployer().run();
+
+ assertEquals(Change.of(version1).with(revision1.get()), app.instance().change());
+ app.runJob(productionUsCentral1); // Upgrade rolls.
+ app.runJob(systemTest).runJob(stagingTest).runJob(productionUsCentral1); // Revision rolls.
+ app.runJob(productionUsEast3).runJob(productionUsWest1); // Upgrade completes.
+ app.runJob(productionUsEast3).runJob(productionUsWest1); // Revision completes.
+ assertEquals(Change.empty(), app.instance().change());
+ }
+
+ @Test
public void testsEachUpgradeCombinationWithFailingDeployments() {
Version v1 = Version.fromString("6.1");
tester.controllerTester().upgradeSystem(v1);
@@ -1085,7 +1133,7 @@ public class UpgraderTest {
default2.instanceId(), default2);
// Throttle upgrades per run
- ((ManualClock) tester.controller().clock()).setInstant(Instant.ofEpochMilli(1589787109000L)); // Fixed random seed
+ ((ManualClock) tester.controller().clock()).setInstant(Instant.ofEpochMilli(1589787107000L)); // Fixed random seed
Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10));
upgrader.setUpgradesPerMinute(0.1);