diff options
author | jonmv <venstad@gmail.com> | 2022-10-11 13:55:07 +0200 |
---|---|---|
committer | jonmv <venstad@gmail.com> | 2022-10-11 13:55:07 +0200 |
commit | a6a3f5574c0355b8f24724c3c85a0d2bf82350ef (patch) | |
tree | d52b36b6bc353f93e7b36a1d7872396c1a4aa7ff /controller-server | |
parent | a235a0f62327020f10ead81934f749cf426a3c47 (diff) |
Abandon apps which never deployed to prod, and failed for at least 30 days
Diffstat (limited to 'controller-server')
2 files changed, 69 insertions, 0 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 2da70540b83..d5a31a07408 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -91,10 +91,31 @@ public class DeploymentTrigger { status, false)); } + + // If app has been broken since it was first submitted, and not fixed for a long time, we stop managing it until a new submission comes in. + if (applicationWasAlwaysBroken(status)) + application = application.withProjectId(OptionalLong.empty()); + applications().store(application); }); } + private boolean applicationWasAlwaysBroken(DeploymentStatus status) { + // If application has a production deployment, we cannot forget it. + if (status.application().instances().values().stream().anyMatch(instance -> ! instance.productionDeployments().isEmpty())) + return false; + + // Then, we need a job that always failed, and failed on the last revision for at least 30 days. + RevisionId last = status.application().revisions().last().get().id(); + Instant threshold = clock.instant().minus(Duration.ofDays(30)); + for (JobStatus job : status.jobs().asList()) + for (Run run : job.runs().descendingMap().values()) + if (run.hasEnded() && ! run.hasFailed() || ! run.versions().targetRevision().equals(last)) break; + else if (run.start().isBefore(threshold)) return true; + + return false; + } + /** * Records information when a job completes (successfully or not). This information is used when deciding what to * trigger next. diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index d8cef45f124..537090c6d68 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -2700,6 +2700,54 @@ public class DeploymentTriggerTest { } @Test + void testBrokenApplication() { + DeploymentContext app = tester.newDeploymentContext(); + app.submit().runJob(systemTest).failDeployment(stagingTest).failDeployment(stagingTest); + tester.clock().advance(Duration.ofDays(31)); + tester.outstandingChangeDeployer().run(); + assertEquals(OptionalLong.empty(), app.application().projectId()); + + app.assertNotRunning(stagingTest); + tester.triggerJobs(); + app.assertNotRunning(stagingTest); + assertEquals(4, app.deploymentStatus().jobsToRun().size()); + + app.submit().runJob(systemTest).failDeployment(stagingTest); + tester.clock().advance(Duration.ofDays(20)); + app.submit().runJob(systemTest).failDeployment(stagingTest); + tester.clock().advance(Duration.ofDays(20)); + tester.outstandingChangeDeployer().run(); + assertEquals(OptionalLong.of(1000), app.application().projectId()); + tester.clock().advance(Duration.ofDays(20)); + tester.outstandingChangeDeployer().run(); + assertEquals(OptionalLong.empty(), app.application().projectId()); + + app.assertNotRunning(stagingTest); + tester.triggerJobs(); + app.assertNotRunning(stagingTest); + assertEquals(4, app.deploymentStatus().jobsToRun().size()); + + app.submit().runJob(systemTest).runJob(stagingTest).failDeployment(productionUsCentral1); + tester.clock().advance(Duration.ofDays(31)); + tester.outstandingChangeDeployer().run(); + assertEquals(OptionalLong.empty(), app.application().projectId()); + + app.assertNotRunning(productionUsCentral1); + tester.triggerJobs(); + app.assertNotRunning(productionUsCentral1); + assertEquals(3, app.deploymentStatus().jobsToRun().size()); + + app.submit().runJob(systemTest).runJob(stagingTest).timeOutConvergence(productionUsCentral1); + tester.clock().advance(Duration.ofDays(31)); + tester.outstandingChangeDeployer().run(); + assertEquals(OptionalLong.of(1000), app.application().projectId()); + + app.assertNotRunning(productionUsCentral1); + tester.triggerJobs(); + app.assertRunning(productionUsCentral1); + } + + @Test void testJobNames() { ZoneRegistryMock zones = new ZoneRegistryMock(SystemName.main); List<ZoneApi> existing = new ArrayList<>(zones.zones().all().zones()); |