diff options
author | Martin Polden <mpolden@mpolden.no> | 2018-06-15 14:21:58 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2018-06-15 14:21:58 +0200 |
commit | ea00a066a41493412272dac1d03c4164796b615a (patch) | |
tree | 3ec41e89d85b1b6ee5cc08b1c147607e799361fa | |
parent | 58f9b545bac1941d71e57c1fe99d36bfee094be9 (diff) |
Always run job if targets change
4 files changed, 107 insertions, 52 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index f637e96d065..00b2a5c9b52 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -283,7 +283,7 @@ public class DeploymentTrigger { Versions versions = Versions.from(change, application, deploymentFor(application, job), controller.systemVersion()); if (isTested(application, versions)) { - if (completedAt.isPresent() && canTrigger(job, application, stepJobs)) { + if (completedAt.isPresent() && canTrigger(job, versions, application, stepJobs)) { jobs.add(deploymentJob(application, versions, change, job, reason, completedAt.get())); } if (!alreadyTriggered(application, versions)) { @@ -319,25 +319,26 @@ public class DeploymentTrigger { } /** Returns whether given job should be triggered */ - private boolean canTrigger(JobType job, Application application, List<JobType> parallelJobs) { + private boolean canTrigger(JobType job, Versions versions, Application application, List<JobType> parallelJobs) { if (jobStateOf(application, job) != idle) return false; if (parallelJobs != null && !parallelJobs.containsAll(runningProductionJobs(application))) return false; - return triggerAt(clock.instant(), job, application); + return triggerAt(clock.instant(), job, versions, application); } /** Returns whether given job should be triggered */ - private boolean canTrigger(JobType job, Application application) { - return canTrigger(job, application, null); + private boolean canTrigger(JobType job, Versions versions, Application application) { + return canTrigger(job, versions, application, null); } /** Returns whether job can trigger at given instant */ - private boolean triggerAt(Instant instant, JobType job, Application application) { + private boolean triggerAt(Instant instant, JobType job, Versions versions, Application application) { Optional<JobStatus> jobStatus = application.deploymentJobs().statusOf(job); if (!jobStatus.isPresent()) return true; if (jobStatus.get().isSuccess()) return true; // Success if (!jobStatus.get().lastCompleted().isPresent()) return true; // Never completed if (!jobStatus.get().firstFailing().isPresent()) return true; // Should not happen as firstFailing should be set for an unsuccessful job + if (!versions.targetsMatch(jobStatus.get().lastCompleted().get())) return true; // Always trigger as targets have changed Instant firstFailing = jobStatus.get().firstFailing().get().at(); Instant lastCompleted = jobStatus.get().lastCompleted().get().at(); @@ -462,7 +463,7 @@ public class DeploymentTrigger { for (JobType jobType : steps(application.deploymentSpec()).testJobs()) { Optional<JobRun> completion = successOn(application, jobType, versions) .filter(run -> versions.sourcesMatchIfPresent(run) || jobType == systemTest); - if (!completion.isPresent() && canTrigger(jobType, application)) { + if (!completion.isPresent() && canTrigger(jobType, versions, application)) { jobs.add(deploymentJob(application, versions, application.change(), jobType, reason, availableSince)); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index e21a9aed508..225516644eb 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -150,12 +150,11 @@ public class ControllerTest { .withTriggering(version1, applicationVersion, productionCorpUsEast1.zone(main).map(tester.application(app1.id()).deployments()::get), "", tester.clock().instant()) .withCompletion(42, Optional.empty(), tester.clock().instant()), app1.id(), tester.controller()); + tester.clock().advance(Duration.ofHours(1)); // Stop retrying tester.jobCompletion(productionCorpUsEast1).application(app1).unsuccessful().submit(); tester.deployAndNotify(app1, applicationPackage, true, stagingTest); // production job succeeding now - tester.clock().advance(Duration.ofHours(2).plus(Duration.ofSeconds(1))); // Enough time passes for failing job to be retried - tester.readyJobTrigger().maintain(); expectedJobStatus = expectedJobStatus .withTriggering(version1, applicationVersion, productionCorpUsEast1.zone(main).map(tester.application(app1.id()).deployments()::get), "", tester.clock().instant()) .withCompletion(42, Optional.empty(), tester.clock().instant()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java index 8fab43ef654..f91dce2b203 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java @@ -12,7 +12,6 @@ import com.yahoo.vespa.hosted.controller.ArtifactRepositoryMock; import com.yahoo.vespa.hosted.controller.ConfigServerMock; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.ControllerTester; -import com.yahoo.vespa.hosted.controller.api.integration.BuildService; import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockBuildService; import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; @@ -262,6 +261,10 @@ public class DeploymentTester { deployAndNotify(application, Optional.of(applicationPackage), success, job); } + public void deployAndNotify(Application application, boolean success, JobType job) { + deployAndNotify(application, Optional.empty(), success, job); + } + public void deployAndNotify(Application application, Optional<ApplicationPackage> applicationPackage, boolean success, JobType job) { if (success) { // Staging deploys twice, once with current version and once with new version @@ -305,11 +308,7 @@ public class DeploymentTester { } private boolean isRunning(JobType job, ApplicationId application) { - return buildService().jobs().contains(BuildService.BuildJob.of(application, - application(application).deploymentJobs() - .projectId() - .getAsLong(), - job.jobName())); + return buildService().jobs().contains(ControllerTester.buildJob(application(application), job)); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 49a0bdfef3d..10f53f17153 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -21,6 +21,7 @@ import org.junit.Test; import java.time.Duration; import java.time.Instant; +import java.util.Optional; import java.util.function.Supplier; import static com.yahoo.config.provision.SystemName.main; @@ -33,7 +34,6 @@ import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobTy import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.stagingTest; import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.systemTest; import static java.util.Collections.singletonList; -import static java.util.Optional.empty; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; @@ -425,8 +425,8 @@ public class DeploymentTriggerTest { tester.completeDeploymentWithError(application, applicationPackage, BuildJob.defaultBuildNumber + 1, productionUsCentral1); // deployAndNotify doesn't actually deploy if the job fails, so we need to do that manually. - tester.deployAndNotify(application, empty(), false, productionUsCentral1); - tester.deploy(productionUsCentral1, application, empty(), false); + tester.deployAndNotify(application, false, productionUsCentral1); + tester.deploy(productionUsCentral1, application, Optional.empty(), false); ApplicationVersion appVersion1 = ApplicationVersion.from(BuildJob.defaultSourceRevision, BuildJob.defaultBuildNumber + 1); assertEquals(appVersion1, app.get().deployments().get(ZoneId.from("prod.us-central-1")).applicationVersion()); @@ -443,18 +443,18 @@ public class DeploymentTriggerTest { Version version1 = new Version("6.2"); tester.upgradeSystem(version1); tester.jobCompletion(productionUsCentral1).application(application).unsuccessful().submit(); - tester.deployAndNotify(application, empty(), true, systemTest); - tester.deployAndNotify(application, empty(), true, stagingTest); - tester.deployAndNotify(application, empty(), false, productionUsCentral1); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); + tester.deployAndNotify(application, false, productionUsCentral1); // The last job has a different target, and the tests need to run again. // These may now start, since the first job has been triggered once, and thus is verified already. - tester.deployAndNotify(application, empty(), true, systemTest); - tester.deployAndNotify(application, empty(), true, stagingTest); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); // Finally, the two production jobs complete, in order. - tester.deployAndNotify(application, empty(), true, productionUsCentral1); - tester.deployAndNotify(application, empty(), true, productionEuWest1); + tester.deployAndNotify(application, true, productionUsCentral1); + tester.deployAndNotify(application, true, productionEuWest1); assertEquals(appVersion1, app.get().deployments().get(ZoneId.from("prod.us-central-1")).applicationVersion()); } @@ -499,9 +499,7 @@ public class DeploymentTriggerTest { tester.deployAndNotify(application, applicationPackage, true, systemTest); tester.deployAndNotify(application, applicationPackage, true, stagingTest); - tester.assertNotRunning(productionUsCentral1, application.id()); - tester.clock().advance(Duration.ofHours(1).plus(Duration.ofSeconds(1))); // Enough time for prod.us-central-1 to be retried - tester.readyJobTrigger().maintain(); + tester.assertRunning(productionUsCentral1, application.id()); assertEquals(v2, app.get().deployments().get(productionUsCentral1.zone(main).get()).version()); assertEquals(Long.valueOf(42L), app.get().deployments().get(productionUsCentral1.zone(main).get()).applicationVersion().buildNumber().get()); assertNotEquals(triggered, app.get().deploymentJobs().jobStatus().get(productionUsCentral1).lastTriggered().get().at()); @@ -519,8 +517,8 @@ public class DeploymentTriggerTest { tester.assertNotRunning(productionUsCentral1, application.id()); // Last job has a different deployment target, so tests need to run again. - tester.deployAndNotify(application, empty(), true, systemTest); - tester.deployAndNotify(application, empty(), true, stagingTest); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); tester.deployAndNotify(application, applicationPackage, true, productionEuWest1); assertFalse(app.get().change().isPresent()); assertFalse(app.get().deploymentJobs().jobStatus().get(productionUsCentral1).isSuccess()); @@ -543,8 +541,8 @@ public class DeploymentTriggerTest { Version v1 = new Version("6.1"); Version v2 = new Version("6.2"); tester.upgradeSystem(v2); - tester.deployAndNotify(application, empty(), true, systemTest); - tester.deployAndNotify(application, empty(), true, stagingTest); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); tester.deploymentTrigger().cancelChange(application.id(), true); tester.deploy(productionEuWest1, application, applicationPackage); assertEquals(v2, app.get().deployments().get(productionEuWest1.zone(main).get()).version()); @@ -555,8 +553,8 @@ public class DeploymentTriggerTest { Version firstTested = app.get().deploymentJobs().jobStatus().get(systemTest).lastTriggered().get().platform(); assertEquals(firstTested, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform()); - tester.deployAndNotify(application, empty(), true, systemTest); - tester.deployAndNotify(application, empty(), true, stagingTest); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); // Tests are not re-triggered, because the jobs they were run for has not yet been triggered with the tested versions. assertEquals(firstTested, app.get().deploymentJobs().jobStatus().get(systemTest).lastTriggered().get().platform()); @@ -570,14 +568,14 @@ public class DeploymentTriggerTest { // New upgrade is already tested for one of the jobs, which has now been triggered, and tests may run for the other job. assertNotEquals(firstTested, app.get().deploymentJobs().jobStatus().get(systemTest).lastTriggered().get().platform()); assertNotEquals(firstTested, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform()); - tester.deployAndNotify(application, empty(), true, systemTest); - tester.deployAndNotify(application, empty(), true, stagingTest); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); // Both jobs fail again, and must be re-triggered -- this is ok, as they are both already triggered on their current targets. - tester.deployAndNotify(application, empty(), false, productionEuWest1); - tester.deployAndNotify(application, empty(), false, productionUsEast3); - tester.deployAndNotify(application, empty(), true, productionUsEast3); - tester.deployAndNotify(application, empty(), true, productionEuWest1); + tester.deployAndNotify(application, false, productionEuWest1); + tester.deployAndNotify(application, false, productionUsEast3); + tester.deployAndNotify(application, true, productionUsEast3); + tester.deployAndNotify(application, true, productionEuWest1); assertFalse(app.get().change().isPresent()); assertEquals(43, app.get().deploymentJobs().jobStatus().get(productionEuWest1).lastSuccess().get().application().buildNumber().get().longValue()); assertEquals(43, app.get().deploymentJobs().jobStatus().get(productionUsEast3).lastSuccess().get().application().buildNumber().get().longValue()); @@ -600,28 +598,86 @@ public class DeploymentTriggerTest { Version v1 = new Version("6.1"); Version v2 = new Version("6.2"); tester.upgradeSystem(v2); - tester.deployAndNotify(application, empty(), true, systemTest); - tester.deployAndNotify(application, empty(), true, stagingTest); - tester.deployAndNotify(application, empty(), true, productionUsCentral1); - tester.deployAndNotify(application, empty(), true, productionEuWest1); - tester.deployAndNotify(application, empty(), false, productionUsEast3); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); + tester.deployAndNotify(application, true, productionUsCentral1); + tester.deployAndNotify(application, true, productionEuWest1); + tester.deployAndNotify(application, false, productionUsEast3); assertEquals(v2, app.get().deployments().get(ZoneId.from("prod", "us-central-1")).version()); assertEquals(v2, app.get().deployments().get(ZoneId.from("prod", "eu-west-1")).version()); assertEquals(v1, app.get().deployments().get(ZoneId.from("prod", "us-east-3")).version()); Version v3 = new Version("6.3"); tester.upgradeSystem(v3); - tester.deployAndNotify(application, empty(), false, productionUsEast3); + tester.deployAndNotify(application, false, productionUsEast3); // See that sources for staging are: first v2, then v1. - tester.deployAndNotify(application, empty(), true, systemTest); - tester.deployAndNotify(application, empty(), true, stagingTest); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); assertEquals(v2, app.get().deploymentJobs().jobStatus().get(stagingTest).lastSuccess().get().sourcePlatform().get()); - tester.deployAndNotify(application, empty(), true, productionUsCentral1); + tester.deployAndNotify(application, true, productionUsCentral1); assertEquals(v1, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().sourcePlatform().get()); - tester.deployAndNotify(application, empty(), true, stagingTest); - tester.deployAndNotify(application, empty(), true, productionEuWest1); - tester.deployAndNotify(application, empty(), true, productionUsEast3); + tester.deployAndNotify(application, true, stagingTest); + tester.deployAndNotify(application, true, productionEuWest1); + tester.deployAndNotify(application, true, productionUsEast3); + } + + @Test + public void retriesFailingJobs() { + DeploymentTester tester = new DeploymentTester(); + Application application = tester.createApplication("app1", "tenant1", 1, 1L); + ApplicationPackage applicationPackage = new ApplicationPackageBuilder() + .environment(Environment.prod) + .region("us-central-1") + .build(); + + // Deploy completely on default application and platform versions + tester.deployCompletely(application, applicationPackage); + + // New application change is deployed and fails in system-test for a while + tester.jobCompletion(component).application(application).nextBuildNumber().uploadArtifact(applicationPackage).submit(); + tester.deployAndNotify(application, false, systemTest); + tester.deployAndNotify(application, true, stagingTest); + + // Retries immediately in the first minute after failing + tester.clock().advance(Duration.ofSeconds(59)); + tester.jobCompletion(systemTest).application(application).unsuccessful().submit(); + tester.readyJobTrigger().maintain(); + tester.assertRunning(systemTest, application.id()); + + // Stops immediate retry after failing for 1 minute + tester.clock().advance(Duration.ofSeconds(1)); + tester.jobCompletion(systemTest).application(application).unsuccessful().submit(); + tester.readyJobTrigger().maintain(); + tester.assertNotRunning(systemTest, application.id()); + + // Retries after 10 minutes since previous completion as we failed within the last hour + tester.clock().advance(Duration.ofMinutes(10).plus(Duration.ofSeconds(1))); + tester.readyJobTrigger().maintain(); + tester.assertRunning(systemTest, application.id()); + + // Retries less frequently after 1 hour of failure + tester.clock().advance(Duration.ofMinutes(50)); + tester.jobCompletion(systemTest).application(application).unsuccessful().submit(); + tester.readyJobTrigger().maintain(); + tester.assertNotRunning(systemTest, application.id()); + + // Retries after two hours pass since last completion + tester.clock().advance(Duration.ofHours(2).plus(Duration.ofSeconds(1))); + tester.readyJobTrigger().maintain(); + tester.assertRunning(systemTest, application.id()); + + // Still fails and is not retried + tester.jobCompletion(systemTest).application(application).unsuccessful().submit(); + tester.readyJobTrigger().maintain(); + tester.assertNotRunning(systemTest, application.id()); + + // Another application change is deployed and fixes system-test. Change is triggered immediately as target changes + tester.jobCompletion(component).application(application).nextBuildNumber(2).uploadArtifact(applicationPackage).submit(); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); + tester.deployAndNotify(application, true, productionUsCentral1); + assertTrue("Deployment completed", tester.buildService().jobs().isEmpty()); } } |