summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2020-08-17 11:03:05 +0200
committerJon Marius Venstad <venstad@gmail.com>2020-08-17 11:03:05 +0200
commit57c235368d9c450918e76445b8d0782564e8380e (patch)
treed18ac3543d59c03b9321d42e9cfdf7e5ce480b3b
parent0f03e049eaa576fadd4c3e459148099a7fc6b126 (diff)
Keep only sequences of failing production jobs in version history
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java21
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java80
3 files changed, 99 insertions, 6 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java
index 99cf7542d53..df018d64748 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java
@@ -84,13 +84,14 @@ public class DeploymentStatistics {
for (Deployment deployment : instance.productionDeployments().values())
allVersions.add(deployment.version());
- JobList failing = status.jobs().failing();
+ JobList failing = status.jobs().failing()
+ .not().withStatus(RunStatus.outOfCapacity)
+ .not().withStatus(RunStatus.aborted);
- // Add all unsuccessful runs for failing jobs as any run may have resulted in an incomplete deployment
+ // Add all unsuccessful runs for failing production jobs as any run may have resulted in an incomplete deployment
// where a subset of nodes have upgraded.
failing.not().failingApplicationChange()
- .not().withStatus(RunStatus.outOfCapacity)
- .not().withStatus(RunStatus.aborted)
+ .production()
.mapToList(JobStatus::runs)
.forEach(runs -> runs.descendingMap().values().stream()
.dropWhile(run -> ! run.hasEnded())
@@ -101,9 +102,17 @@ public class DeploymentStatistics {
failingUpgrade.get(run.versions().targetPlatform()).add(run);
}));
+ // Add only the last failing run for test jobs.
+ failing.not().failingApplicationChange()
+ .not().production()
+ .lastCompleted().asList()
+ .forEach(run -> {
+ failingUpgrade.putIfAbsent(run.versions().targetPlatform(), new ArrayList<>());
+ failingUpgrade.get(run.versions().targetPlatform()).add(run);
+ });
+
+ // Add only the last failing for instances failing only an application change, i.e., no upgrade.
failing.failingApplicationChange()
- .concat(failing.withStatus(RunStatus.outOfCapacity))
- .concat(failing.withStatus(RunStatus.aborted))
.lastCompleted().asList()
.forEach(run -> {
otherFailing.putIfAbsent(run.versions().targetPlatform(), new ArrayList<>());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java
index 157941bd092..dcb7a6dd42b 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java
@@ -162,6 +162,10 @@ public class DeploymentIssueReporterTest {
reporter.maintain();
assertTrue("We get a platform issue when confidence is broken", issues.platformIssue());
assertFalse("No deployment issue is filed for app2, which has a version upgrade failure.", issues.isOpenFor(app2.application().id()));
+
+ app2.runJob(systemTest);
+ tester.controllerTester().upgradeSystem(version);
+ assertEquals(VespaVersion.Confidence.low, tester.controller().versionStatus().systemVersion().get().confidence());
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
index 44e172d9f93..3ea9d038d99 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
@@ -385,6 +385,86 @@ public class VersionStatusTest {
}
@Test
+ public void testConfidenceWithLingeringVersions() {
+ DeploymentTester tester = new DeploymentTester().atMondayMorning();
+ Version version0 = new Version("6.2");
+ tester.controllerTester().upgradeSystem(version0);
+ tester.upgrader().maintain();
+ var appPackage = new ApplicationPackageBuilder().region("us-west-1").region("us-east-3").upgradePolicy("canary");
+
+ var canary0 = tester.newDeploymentContext("tenant1", "canary0", "default")
+ .submit(appPackage.build())
+ .deploy();
+
+ assertEquals("All applications running on this version: High",
+ Confidence.high, confidence(tester.controller(), version0));
+
+ // New version is released
+ Version version1 = new Version("6.3");
+ tester.controllerTester().upgradeSystem(version1);
+ tester.upgrader().maintain();
+ tester.triggerJobs();
+
+ // App upgrades to the new version and fails
+ canary0.failDeployment(systemTest);
+ canary0.abortJob(stagingTest);
+ tester.controllerTester().computeVersionStatus();
+ assertEquals("One canary failed: Broken",
+ Confidence.broken, confidence(tester.controller(), version1));
+
+ // New version is released
+ Version version2 = new Version("6.4");
+ tester.controllerTester().upgradeSystem(version2);
+ tester.upgrader().maintain();
+ assertEquals("Confidence remains unchanged for version1 until app overrides old tests: Broken",
+ Confidence.broken, confidence(tester.controller(), version1));
+ assertEquals("Confidence defaults to low for version with no applications",
+ Confidence.low, confidence(tester.controller(), version2));
+ assertEquals(version2, canary0.instance().change().platform().orElseThrow());
+
+ canary0.failDeployment(systemTest);
+ canary0.abortJob(stagingTest);
+ tester.controllerTester().computeVersionStatus();
+ assertFalse("Previous version should be forgotten, as canary only had test jobs run on it",
+ tester.controller().versionStatus().versions().stream().anyMatch(version -> version.versionNumber().equals(version1)));
+
+ // App succeeds with tests, but fails production deployment
+ canary0.runJob(systemTest)
+ .runJob(stagingTest)
+ .failDeployment(productionUsWest1);
+
+ assertEquals("One canary failed: Broken",
+ Confidence.broken, confidence(tester.controller(), version2));
+
+ // A new version is released, and the app again fails production deployment.
+ Version version3 = new Version("6.5");
+ tester.controllerTester().upgradeSystem(version3);
+ tester.upgrader().maintain();
+ assertEquals("Confidence remains unchanged for version2: Broken",
+ Confidence.broken, confidence(tester.controller(), version2));
+ assertEquals("Confidence defaults to low for version with no applications",
+ Confidence.low, confidence(tester.controller(), version3));
+ assertEquals(version3, canary0.instance().change().platform().orElseThrow());
+
+ canary0.runJob(systemTest)
+ .runJob(stagingTest)
+ .failDeployment(productionUsWest1);
+ tester.controllerTester().computeVersionStatus();
+ assertEquals("Confidence remains unchanged for version2: Broken",
+ Confidence.broken, confidence(tester.controller(), version2));
+ assertEquals("Canary broken, so confidence for version3: Broken",
+ Confidence.broken, confidence(tester.controller(), version3));
+
+ // App succeeds production deployment, clearing failure on version2
+ canary0.runJob(productionUsWest1);
+ tester.controllerTester().computeVersionStatus();
+ assertFalse("Previous version should be forgotten, as canary only had test jobs run on it",
+ tester.controller().versionStatus().versions().stream().anyMatch(version -> version.versionNumber().equals(version2)));
+ assertEquals("Canary OK, but not done upgrading, so confidence for version3: Low",
+ Confidence.low, confidence(tester.controller(), version3));
+ }
+
+ @Test
public void testConfidenceOverride() {
DeploymentTester tester = new DeploymentTester();
Version version0 = new Version("6.2");