summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2022-10-11 13:55:07 +0200
committerjonmv <venstad@gmail.com>2022-10-11 13:55:07 +0200
commita6a3f5574c0355b8f24724c3c85a0d2bf82350ef (patch)
treed52b36b6bc353f93e7b36a1d7872396c1a4aa7ff /controller-server
parenta235a0f62327020f10ead81934f749cf426a3c47 (diff)
Abandon apps which never deployed to prod, and failed for at least 30 days
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java21
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java48
2 files changed, 69 insertions, 0 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 2da70540b83..d5a31a07408 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -91,10 +91,31 @@ public class DeploymentTrigger {
status,
false));
}
+
+ // If app has been broken since it was first submitted, and not fixed for a long time, we stop managing it until a new submission comes in.
+ if (applicationWasAlwaysBroken(status))
+ application = application.withProjectId(OptionalLong.empty());
+
applications().store(application);
});
}
+ private boolean applicationWasAlwaysBroken(DeploymentStatus status) {
+ // If application has a production deployment, we cannot forget it.
+ if (status.application().instances().values().stream().anyMatch(instance -> ! instance.productionDeployments().isEmpty()))
+ return false;
+
+ // Then, we need a job that always failed, and failed on the last revision for at least 30 days.
+ RevisionId last = status.application().revisions().last().get().id();
+ Instant threshold = clock.instant().minus(Duration.ofDays(30));
+ for (JobStatus job : status.jobs().asList())
+ for (Run run : job.runs().descendingMap().values())
+ if (run.hasEnded() && ! run.hasFailed() || ! run.versions().targetRevision().equals(last)) break;
+ else if (run.start().isBefore(threshold)) return true;
+
+ return false;
+ }
+
/**
* Records information when a job completes (successfully or not). This information is used when deciding what to
* trigger next.
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index d8cef45f124..537090c6d68 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -2700,6 +2700,54 @@ public class DeploymentTriggerTest {
}
@Test
+ void testBrokenApplication() {
+ DeploymentContext app = tester.newDeploymentContext();
+ app.submit().runJob(systemTest).failDeployment(stagingTest).failDeployment(stagingTest);
+ tester.clock().advance(Duration.ofDays(31));
+ tester.outstandingChangeDeployer().run();
+ assertEquals(OptionalLong.empty(), app.application().projectId());
+
+ app.assertNotRunning(stagingTest);
+ tester.triggerJobs();
+ app.assertNotRunning(stagingTest);
+ assertEquals(4, app.deploymentStatus().jobsToRun().size());
+
+ app.submit().runJob(systemTest).failDeployment(stagingTest);
+ tester.clock().advance(Duration.ofDays(20));
+ app.submit().runJob(systemTest).failDeployment(stagingTest);
+ tester.clock().advance(Duration.ofDays(20));
+ tester.outstandingChangeDeployer().run();
+ assertEquals(OptionalLong.of(1000), app.application().projectId());
+ tester.clock().advance(Duration.ofDays(20));
+ tester.outstandingChangeDeployer().run();
+ assertEquals(OptionalLong.empty(), app.application().projectId());
+
+ app.assertNotRunning(stagingTest);
+ tester.triggerJobs();
+ app.assertNotRunning(stagingTest);
+ assertEquals(4, app.deploymentStatus().jobsToRun().size());
+
+ app.submit().runJob(systemTest).runJob(stagingTest).failDeployment(productionUsCentral1);
+ tester.clock().advance(Duration.ofDays(31));
+ tester.outstandingChangeDeployer().run();
+ assertEquals(OptionalLong.empty(), app.application().projectId());
+
+ app.assertNotRunning(productionUsCentral1);
+ tester.triggerJobs();
+ app.assertNotRunning(productionUsCentral1);
+ assertEquals(3, app.deploymentStatus().jobsToRun().size());
+
+ app.submit().runJob(systemTest).runJob(stagingTest).timeOutConvergence(productionUsCentral1);
+ tester.clock().advance(Duration.ofDays(31));
+ tester.outstandingChangeDeployer().run();
+ assertEquals(OptionalLong.of(1000), app.application().projectId());
+
+ app.assertNotRunning(productionUsCentral1);
+ tester.triggerJobs();
+ app.assertRunning(productionUsCentral1);
+ }
+
+ @Test
void testJobNames() {
ZoneRegistryMock zones = new ZoneRegistryMock(SystemName.main);
List<ZoneApi> existing = new ArrayList<>(zones.zones().all().zones());