summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2022-03-01 12:06:52 +0100
committerJon Marius Venstad <venstad@gmail.com>2022-03-01 12:06:52 +0100
commit0d41f8dff5c3d1343a39653cec19a24eb6c63a11 (patch)
treef0afc2e87a27223f35f78d6bdb3a1367b63ff18e /controller-server
parentc391ef917ac415c07cfa96dc382747f7e46cef72 (diff)
Detect test failure when considering change splitting for prod deployments
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java9
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java10
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java15
3 files changed, 31 insertions, 3 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
index 989f305b0cc..8c933f98277 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
@@ -351,8 +351,8 @@ public class DeploymentStatus {
|| step.completedAt(change.withoutPlatform(), Optional.of(job)).isPresent())
return List.of(change);
- // For a dual change, where both target remain, we determine what to run by looking at when the two parts became ready:
- // for deployments, we look at dependencies; for tests, this may be overridden by what is already deployed.
+ // For a dual change, where both targets remain, we determine what to run by looking at when the two parts became ready:
+ // for deployments, we look at dependencies; for production tests, this may be overridden by what is already deployed.
JobId deployment = new JobId(job.application(), JobType.from(system, job.type().zone(system)).get());
UpgradeRollout rollout = application.deploymentSpec().requireInstance(job.application().instance()).upgradeRollout();
if (job.type().isTest()) {
@@ -409,10 +409,13 @@ public class DeploymentStatus {
// Both changes are ready for this step, and we look to the specified rollout to decide.
boolean platformReadyFirst = platformReadyAt.get().isBefore(revisionReadyAt.get());
boolean revisionReadyFirst = revisionReadyAt.get().isBefore(platformReadyAt.get());
+ boolean failingUpgradeOnlyTests = ! jobs().type(systemTest, stagingTest)
+ .failingHardOn(Versions.from(change.withoutApplication(), application, deploymentFor(job), systemVersion))
+ .isEmpty();
switch (rollout) {
case separate: // Let whichever change rolled out first, keep rolling first, unless upgrade alone is failing.
return (platformReadyFirst || platformReadyAt.get().equals(Instant.EPOCH)) // Assume platform was first if no jobs have run yet.
- ? step.job().flatMap(jobs()::get).flatMap(JobStatus::firstFailing).isPresent()
+ ? step.job().flatMap(jobs()::get).flatMap(JobStatus::firstFailing).isPresent() || failingUpgradeOnlyTests
? List.of(change) // Platform was first, but is failing.
: List.of(change.withoutApplication(), change) // Platform was first, and is OK.
: revisionReadyFirst
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
index 5de07bad859..d06bdc45583 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
@@ -17,6 +17,7 @@ import java.util.function.Function;
import java.util.function.Predicate;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted;
+import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.nodeAllocationFailure;
/**
* A list of deployment jobs that can be filtered in various ways.
@@ -102,6 +103,15 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> {
return matching(job -> job.id().type().isProduction());
}
+ /** Returns the jobs with any runs failing with non-out-of-test-capacity on the given versions — targets only for system test, everything present otherwise. */
+ public JobList failingHardOn(Versions versions) {
+ return matching(job -> ! RunList.from(job)
+ .on(versions)
+ .matching(Run::hasFailed)
+ .not().matching(run -> run.status() == nodeAllocationFailure && run.id().type().environment().isTest())
+ .isEmpty());
+ }
+
/** Returns the jobs with any runs matching the given versions — targets only for system test, everything present otherwise. */
public JobList triggeredOn(Versions versions) {
return matching(job -> ! RunList.from(job).on(versions).isEmpty());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index c01827b6eb0..b2847a29654 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -1434,6 +1434,21 @@ public class DeploymentTriggerTest {
tester.jobs().last(app.instanceId(), productionUsWest1).get().versions());
app.runJob(productionUsWest1);
assertEquals(Change.empty(), app.instance().change());
+
+ // New upgrade fails in staging-test, and revision to fix it is submitted.
+ var version2 = new Version("7.2");
+ tester.controllerTester().upgradeSystem(version2);
+ tester.upgrader().maintain();
+ app.runJob(systemTest).failDeployment(stagingTest);
+ tester.clock().advance(Duration.ofMinutes(30));
+ app.failDeployment(stagingTest);
+ app.submit(appPackage);
+
+ app.runJob(systemTest).runJob(stagingTest) // Tests run with combined upgrade.
+ .runJob(productionUsCentral1) // Combined upgrade stays together.
+ .runJob(productionUsEast3).runJob(productionUsWest1);
+ assertEquals(Map.of(), app.deploymentStatus().jobsToRun());
+ assertEquals(Change.empty(), app.instance().change());
}
@Test