diff options
author | Martin Polden <mpolden@mpolden.no> | 2018-03-27 08:58:18 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-03-27 08:58:18 +0200 |
commit | 893937f64d70de70768281bd1ee8438c00c83710 (patch) | |
tree | 46c8bac0a6fc7002356b0e897a1c80ad108fbe05 | |
parent | 45f34b57d8704b3b06d76ff2029d147463b11d66 (diff) | |
parent | 6db944523a1dd6de81faaae7878066956ee2527a (diff) |
Merge pull request #5428 from vespa-engine/jvenstad/DO-unified
Jvenstad/do unified
10 files changed, 104 insertions, 68 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 54deb355836..767ffbaa7ea 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.logging.Logger; +import java.util.stream.Stream; /** * Responsible for scheduling deployment jobs in a build system and keeping @@ -108,14 +109,17 @@ public class DeploymentTrigger { // TODO jvenstad: Don't trigger. // Trigger next if (report.success()) { - List<JobType> jobs = order.nextAfter(report.jobType(), application); - for (JobType job : jobs) - application = trigger(new Triggering(application, job, false, report.jobType().jobName() + " completed"), jobs, false); + triggerReadyJobs(application); + return; // Don't overwrite below. + } + else if (retryBecauseOutOfCapacity(application, report.jobType())) { + triggerReadyJobs(application); + return; // Don't overwrite below. + } + else if (retryBecauseNewFailure(application, report.jobType())) { + triggerReadyJobs(application); + return; // Don't overwrite below. } - else if (retryBecauseOutOfCapacity(application, report.jobType())) - application = trigger(new Triggering(application, report.jobType(), true, "Retrying on out of capacity"), Collections.emptySet(), false); - else if (retryBecauseNewFailure(application, report.jobType())) - application = trigger(new Triggering(application, report.jobType(), false, "Immediate retry on failure"), Collections.emptySet(), false); applications().store(application); }); @@ -133,7 +137,6 @@ public class DeploymentTrigger { /** Find the next step to trigger if any, and triggers it */ public void triggerReadyJobs(LockedApplication application) { - if ( ! application.change().isPresent()) return; List<JobType> jobs = order.jobsFrom(application.deploymentSpec()); // Should the first step be triggered? @@ -147,33 +150,27 @@ public class DeploymentTrigger { || ! systemTestStatus.lastTriggered().get().version().equals(target) || systemTestStatus.isHanging(jobTimeoutLimit())) { application = trigger(new Triggering(application, JobType.systemTest, false, "Upgrade to " + target), Collections.emptySet(), false); - controller.applications().store(application); - } - } - else { - JobStatus componentStatus = application.deploymentJobs().jobStatus().get(JobType.component); - if (componentStatus != null && changesAvailable(application, componentStatus, systemTestStatus)) { - application = trigger(new Triggering(application, JobType.systemTest, false, "Available change in component"), Collections.emptySet(), false); - controller.applications().store(application); + applications().store(application); } } } // Find next steps to trigger based on the state of the previous step - for (JobType jobType : jobs) { + for (JobType jobType : (Iterable<JobType>) Stream.concat(Stream.of(JobType.component), jobs.stream())::iterator) { JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType); if (jobStatus == null) continue; // job has never run - if (jobStatus.isRunning(jobTimeoutLimit())) continue; // Collect the subset of next jobs which have not run with the last changes // TODO jvenstad: Change to be step-centric. List<JobType> nextJobs = order.nextAfter(jobType, application); for (JobType nextJobType : nextJobs) { JobStatus nextStatus = application.deploymentJobs().jobStatus().get(nextJobType); - if (changesAvailable(application, jobStatus, nextStatus) || nextStatus.isHanging(jobTimeoutLimit())) - application = trigger(new Triggering(application, nextJobType, false, "Available change in " + jobType.jobName()), nextJobs, false); + if (changesAvailable(application, jobStatus, nextStatus) || nextStatus.isHanging(jobTimeoutLimit())) { + boolean isRetry = nextStatus != null && nextStatus.jobError().filter(JobError.outOfCapacity::equals).isPresent(); + application = trigger(new Triggering(application, nextJobType, isRetry, isRetry ? "Retrying on out of capacity" : "Available change in " + jobType.jobName()), nextJobs, false); + } } - controller.applications().store(application); + applications().store(application); } } @@ -185,7 +182,6 @@ public class DeploymentTrigger { * @param force true to disable checks which should normally prevent this triggering from happening * @return the application in the triggered state, if actually triggered. This *must* be stored by the caller */ - // TODO jvenstad: Replace with (Collection<JobType> concurrentlyWith) to allow, e.g., concurrent retries. public LockedApplication trigger(Triggering triggering, Collection<JobType> concurrentlyWith, boolean force) { if (triggering.jobType == null) return triggering.application; // we are passed null when the last job has been reached @@ -209,11 +205,11 @@ public class DeploymentTrigger { deploymentQueue.addJob(triggering.application.id(), triggering.jobType, triggering.retry); // TODO jvenstad: Let triggering set only time of triggering (and reason, for debugging?) when build system is polled for job status. return triggering.application.withJobTriggering(triggering.jobType, - clock.instant(), - triggering.application.deployVersionFor(triggering.jobType, controller), - triggering.application.deployApplicationVersionFor(triggering.jobType, controller, false) - .orElse(ApplicationVersion.unknown), - triggering.reason); + clock.instant(), + triggering.application.deployVersionFor(triggering.jobType, controller), + triggering.application.deployApplicationVersionFor(triggering.jobType, controller, false) + .orElse(ApplicationVersion.unknown), + triggering.reason); } /** diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index bba82ce2980..895ba195b08 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -146,11 +146,13 @@ public class ControllerTest { tester.jobCompletion(productionCorpUsEast1).application(app1).unsuccessful().submit(); // system and staging test job - succeeding - tester.jobCompletion(component).application(app1).submit(); + tester.jobCompletion(component).application(app1).nextBuildNumber().uploadArtifact(applicationPackage).submit(); + applicationVersion = tester.application("app1").change().application().get(); tester.deployAndNotify(app1, applicationPackage, true, false, systemTest); assertStatus(JobStatus.initial(systemTest) .withTriggering(version1, applicationVersion, "", tester.clock().instant().minus(Duration.ofMillis(1))) - .withCompletion(42, Optional.empty(), tester.clock().instant(), tester.controller()), app1.id(), tester.controller()); + .withCompletion(42, Optional.empty(), tester.clock().instant(), tester.controller()), + app1.id(), tester.controller()); tester.deployAndNotify(app1, applicationPackage, true, stagingTest); // production job succeeding now @@ -186,7 +188,7 @@ public class ControllerTest { JobStatus jobStatus = applications.require(app1.id()).deploymentJobs().jobStatus().get(productionCorpUsEast1); assertNotNull("Deployment job was not removed", jobStatus); assertEquals(42, jobStatus.lastCompleted().get().id()); - assertEquals("staging-test completed", jobStatus.lastCompleted().get().reason()); + assertEquals("Available change in staging-test", jobStatus.lastCompleted().get().reason()); // prod zone removal is allowed with override applicationPackage = new ApplicationPackageBuilder() @@ -392,7 +394,7 @@ public class ControllerTest { // Initial failure tester.clock().advance(Duration.ofMillis(1000)); initialFailure = tester.clock().instant(); - tester.jobCompletion(component).application(app).submit(); + tester.jobCompletion(component).application(app).nextBuildNumber().uploadArtifact(applicationPackage).submit(); tester.deployAndNotify(app, applicationPackage, false, systemTest); assertEquals("Failure age is right at initial failure", initialFailure.plus(Duration.ofMillis(2)), firstFailing(app, tester).get().at()); @@ -470,14 +472,14 @@ public class ControllerTest { assertEquals(2, deploymentQueue.jobs().size()); // app1: 4 hours pass in total, staging-test job for app1 is re-queued by periodic trigger mechanism and added at the - // back of the queue + // front of the queue tester.clock().advance(Duration.ofHours(3)); tester.clock().advance(Duration.ofMinutes(50)); tester.readyJobTrigger().maintain(); + assertEquals(Collections.singletonList(new BuildService.BuildJob(project1, stagingTest.jobName())), deploymentQueue.takeJobsToRun()); assertEquals(Collections.singletonList(new BuildService.BuildJob(project2, stagingTest.jobName())), deploymentQueue.takeJobsToRun()); assertEquals(Collections.singletonList(new BuildService.BuildJob(project3, stagingTest.jobName())), deploymentQueue.takeJobsToRun()); - assertEquals(Collections.singletonList(new BuildService.BuildJob(project1, stagingTest.jobName())), deploymentQueue.takeJobsToRun()); assertEquals(Collections.emptyList(), deploymentQueue.takeJobsToRun()); } @@ -661,7 +663,7 @@ public class ControllerTest { .environment(Environment.prod) .allow(ValidationId.deploymentRemoval) .build(); - tester.jobCompletion(component).application(app1).uploadArtifact(applicationPackage).submit(); + tester.jobCompletion(component).application(app1).nextBuildNumber().uploadArtifact(applicationPackage).submit(); tester.deployAndNotify(app1, applicationPackage, true, systemTest); tester.applications().deactivate(app1, ZoneId.from(Environment.test, RegionName.from("us-east-1"))); tester.applications().deactivate(app1, ZoneId.from(Environment.staging, RegionName.from("us-east-3"))); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/BuildJob.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/BuildJob.java index bb8840a7b1f..f1e6108710d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/BuildJob.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/BuildJob.java @@ -104,6 +104,7 @@ public class BuildJob { /** Upload given application package to artifact repository as part of this job */ public BuildJob uploadArtifact(ApplicationPackage applicationPackage) { Objects.requireNonNull(job, "job cannot be null"); + Objects.requireNonNull(applicationId, "applicationId cannot be null"); if (job != DeploymentJobs.JobType.component) { throw new IllegalStateException(job + " cannot upload artifact"); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java index 155c9f14f77..548f7c33fa1 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java @@ -134,7 +134,7 @@ public class DeploymentIssueReporterTest { // app3 now has a new failure past max failure age; see that a new issue is filed. - tester.jobCompletion(component).application(app3).submit(); + tester.jobCompletion(component).application(app3).nextBuildNumber().uploadArtifact(applicationPackage).submit(); tester.deployAndNotify(app3, applicationPackage, false, systemTest); tester.clock().advance(maxInactivity.plus(maxFailureAge)); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java index 5ed4c3a186e..f6216856317 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java @@ -74,7 +74,7 @@ public class DnsMaintainerTest { .environment(Environment.prod) .allow(ValidationId.deploymentRemoval) .build(); - tester.jobCompletion(component).application(application).uploadArtifact(applicationPackage).submit(); + tester.jobCompletion(component).application(application).nextBuildNumber().uploadArtifact(applicationPackage).submit(); tester.deployAndNotify(application, applicationPackage, true, systemTest); tester.applications().deactivate(application, ZoneId.from(Environment.test, RegionName.from("us-east-1"))); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index 088895408fc..021fe0954c2 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -12,6 +12,7 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; +import com.yahoo.vespa.hosted.controller.deployment.BuildJob; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import org.junit.Test; @@ -152,7 +153,7 @@ public class UpgraderTest { // --- Failing application is repaired by changing the application, causing confidence to move above 'high' threshold // Deploy application change - tester.deployCompletely("default0"); + tester.deployCompletely(tester.application("default0"), DeploymentTester.applicationPackage("default"), BuildJob.defaultBuildNumber + 1); tester.updateVersionStatus(version); assertEquals(VespaVersion.Confidence.high, tester.controller().versionStatus().systemVersion().get().confidence()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/testdata/application-without-project-id.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/testdata/application-without-project-id.json index 63832531c7d..31949cce282 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/testdata/application-without-project-id.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/testdata/application-without-project-id.json @@ -1,6 +1,6 @@ { "id": "tenant1:app1:default", - "deploymentSpecField": "<deployment version='1.0'>\n <test />\n <staging />\n <prod>\n <region active=\"true\">cd-us-central-1</region>\n <region active=\"true\">cd-us-central-2</region>\n </prod>\n</deployment>\n", + "deploymentSpecField": "<deployment version='1.0'>\n <test />\n <staging />\n <prod>\n <region active=\"true\">us-central-1</region>\n <region active=\"true\">us-west-1</region>\n </prod>\n</deployment>\n", "validationOverrides": "<deployment version='1.0'/>", "deployments": [], "deploymentJobs": { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-without-change-multiple-deployments.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-without-change-multiple-deployments.json index 9cd9329e36f..06d562d40f2 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-without-change-multiple-deployments.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-without-change-multiple-deployments.json @@ -48,7 +48,7 @@ "gitCommit": "commit1" } }, - "reason": "component completed", + "reason": "Available change in component", "at": "(ignore)" }, "lastCompleted": { @@ -62,7 +62,7 @@ "gitCommit": "commit1" } }, - "reason": "component completed", + "reason": "Available change in component", "at": "(ignore)" }, "lastSuccess": { @@ -76,7 +76,7 @@ "gitCommit": "commit1" } }, - "reason": "component completed", + "reason": "Available change in component", "at": "(ignore)" } }, @@ -94,7 +94,7 @@ "gitCommit": "commit1" } }, - "reason": "system-test completed", + "reason": "Available change in system-test", "at": "(ignore)" }, "lastCompleted": { @@ -108,7 +108,7 @@ "gitCommit": "commit1" } }, - "reason": "system-test completed", + "reason": "Available change in system-test", "at": "(ignore)" }, "lastSuccess": { @@ -122,7 +122,7 @@ "gitCommit": "commit1" } }, - "reason": "system-test completed", + "reason": "Available change in system-test", "at": "(ignore)" } }, @@ -140,7 +140,7 @@ "gitCommit": "commit1" } }, - "reason": "staging-test completed", + "reason": "Available change in staging-test", "at": "(ignore)" }, "lastCompleted": { @@ -154,7 +154,7 @@ "gitCommit": "commit1" } }, - "reason": "staging-test completed", + "reason": "Available change in staging-test", "at": "(ignore)" }, "lastSuccess": { @@ -168,7 +168,7 @@ "gitCommit": "commit1" } }, - "reason": "staging-test completed", + "reason": "Available change in staging-test", "at": "(ignore)" } }, @@ -186,7 +186,7 @@ "gitCommit": "commit1" } }, - "reason": "production-us-west-1 completed", + "reason": "Available change in production-us-west-1", "at": "(ignore)" }, "lastCompleted": { @@ -200,7 +200,7 @@ "gitCommit": "commit1" } }, - "reason": "production-us-west-1 completed", + "reason": "Available change in production-us-west-1", "at": "(ignore)" }, "lastSuccess": { @@ -214,7 +214,7 @@ "gitCommit": "commit1" } }, - "reason": "production-us-west-1 completed", + "reason": "Available change in production-us-west-1", "at": "(ignore)" } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application.json index 3d8d8a4b34d..a067d19fb6a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application.json @@ -58,7 +58,7 @@ "gitCommit": "commit1" } }, - "reason": "component completed", + "reason": "Available change in component", "at": "(ignore)" }, "lastCompleted": { @@ -72,7 +72,7 @@ "gitCommit": "commit1" } }, - "reason": "component completed", + "reason": "Available change in component", "at": "(ignore)" }, "lastSuccess": { @@ -86,7 +86,7 @@ "gitCommit": "commit1" } }, - "reason": "component completed", + "reason": "Available change in component", "at": "(ignore)" } }, @@ -104,7 +104,7 @@ "gitCommit": "commit1" } }, - "reason": "system-test completed", + "reason": "Available change in system-test", "at": "(ignore)" }, "lastCompleted": { @@ -118,7 +118,7 @@ "gitCommit": "commit1" } }, - "reason": "system-test completed", + "reason": "Available change in system-test", "at": "(ignore)" }, "lastSuccess": { @@ -132,7 +132,7 @@ "gitCommit": "commit1" } }, - "reason": "system-test completed", + "reason": "Available change in system-test", "at": "(ignore)" } }, @@ -150,7 +150,7 @@ "gitCommit": "commit1" } }, - "reason": "staging-test completed", + "reason": "Available change in staging-test", "at": "(ignore)" }, "lastCompleted": { @@ -164,7 +164,7 @@ "gitCommit": "commit1" } }, - "reason": "staging-test completed", + "reason": "Available change in staging-test", "at": "(ignore)" }, "firstFailing": { @@ -178,7 +178,25 @@ "gitCommit": "commit1" } }, - "reason": "staging-test completed", + "reason": "Available change in staging-test", + "at": "(ignore)" + } + }, + { + "type": "production-us-east-3", + "success": false, + "lastTriggered": { + "id": -1, + "version": "(ignore)", + "revision": { + "hash": "(ignore)", + "source": { + "gitRepository": "repository1", + "gitBranch": "master", + "gitCommit": "commit1" + } + }, + "reason": "Available change in production-corp-us-east-1", "at": "(ignore)" } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application1-recursive.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application1-recursive.json index 16bd5c2e720..3c4cc32111c 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application1-recursive.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application1-recursive.json @@ -58,7 +58,7 @@ "gitCommit": "commit1" } }, - "reason": "component completed", + "reason": "Available change in component", "at": "(ignore)" }, "lastCompleted": { @@ -72,7 +72,7 @@ "gitCommit": "commit1" } }, - "reason": "component completed", + "reason": "Available change in component", "at": "(ignore)" }, "lastSuccess": { @@ -86,7 +86,7 @@ "gitCommit": "commit1" } }, - "reason": "component completed", + "reason": "Available change in component", "at": "(ignore)" } }, @@ -104,7 +104,7 @@ "gitCommit": "commit1" } }, - "reason": "system-test completed", + "reason": "Available change in system-test", "at": "(ignore)" }, "lastCompleted": { @@ -118,7 +118,7 @@ "gitCommit": "commit1" } }, - "reason": "system-test completed", + "reason": "Available change in system-test", "at": "(ignore)" }, "lastSuccess": { @@ -132,7 +132,7 @@ "gitCommit": "commit1" } }, - "reason": "system-test completed", + "reason": "Available change in system-test", "at": "(ignore)" } }, @@ -150,7 +150,7 @@ "gitCommit": "commit1" } }, - "reason": "staging-test completed", + "reason": "Available change in staging-test", "at": "(ignore)" }, "lastCompleted": { @@ -164,7 +164,7 @@ "gitCommit": "commit1" } }, - "reason": "staging-test completed", + "reason": "Available change in staging-test", "at": "(ignore)" }, "firstFailing": { @@ -178,7 +178,25 @@ "gitCommit": "commit1" } }, - "reason": "staging-test completed", + "reason": "Available change in staging-test", + "at": "(ignore)" + } + }, + { + "type": "production-us-east-3", + "success": false, + "lastTriggered": { + "id": -1, + "version": "(ignore)", + "revision": { + "hash": "(ignore)", + "source": { + "gitRepository": "repository1", + "gitBranch": "master", + "gitCommit": "commit1" + } + }, + "reason": "Available change in production-corp-us-east-1", "at": "(ignore)" } } |