summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2018-03-27 14:21:30 +0200
committerGitHub <noreply@github.com>2018-03-27 14:21:30 +0200
commit37702d7e5c74f1da1f8a181a7406f5e0a5f9415f (patch)
tree10362aa59c4c461c3e78ecada0d733099edfc378
parent945db42c14146695825e7236bc40205cef8c27cd (diff)
parent0b3b1301e75c5b56fb354b7b054a764660608f82 (diff)
Merge pull request #5430 from vespa-engine/jvenstad/DO-unified
Jvenstad/do unified
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java123
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java25
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java13
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java39
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java1
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java39
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java3
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java107
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java1
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java39
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-deployment-cancelled.json2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java1
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java8
14 files changed, 188 insertions, 218 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
index 5823dd160c0..1d3fff57a78 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
@@ -553,6 +553,11 @@ public class ApplicationController {
}
public void notifyJobCompletion(JobReport report) {
+ log.log(Level.INFO, String.format("Notified of %s of %s %d for '%s'.",
+ report.jobError().map(error -> error + " failure").orElse("success"),
+ report.jobType(),
+ report.buildNumber(),
+ report.applicationId()));
if ( ! get(report.applicationId()).isPresent()) {
log.log(Level.WARNING, "Ignoring completion of job of project '" + report.projectId() +
"': Unknown application '" + report.applicationId() + "'");
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 767ffbaa7ea..f6f65df56b7 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -87,17 +87,12 @@ public class DeploymentTrigger {
// Handle successful starting and ending
if (report.jobType() == JobType.component) {
if (report.success()) {
- if ( ! acceptNewApplicationVersionNow(application)) {
- applications().store(application.withOutstandingChange(Change.of(applicationVersion)));
- return;
- }
- // Note that in case of an ongoing upgrade this may result in both the upgrade and application
- // change being deployed together
- application = application.withChange(application.change().with(applicationVersion));
- }
- else { // don't re-trigger component on failure
- applications().store(application);
- return;
+ if ( ! acceptNewApplicationVersionNow(application))
+ application = application.withOutstandingChange(Change.of(applicationVersion));
+ else
+ // Note that in case of an ongoing upgrade this may result in both the upgrade and application
+ // change being deployed together
+ application = application.withChange(application.change().with(applicationVersion));
}
}
else if (report.jobType().isProduction() && deploymentComplete(application)) {
@@ -106,21 +101,6 @@ public class DeploymentTrigger {
application = application.withChange(Change.empty());
}
- // TODO jvenstad: Don't trigger.
- // Trigger next
- if (report.success()) {
- triggerReadyJobs(application);
- return; // Don't overwrite below.
- }
- else if (retryBecauseOutOfCapacity(application, report.jobType())) {
- triggerReadyJobs(application);
- return; // Don't overwrite below.
- }
- else if (retryBecauseNewFailure(application, report.jobType())) {
- triggerReadyJobs(application);
- return; // Don't overwrite below.
- }
-
applications().store(application);
});
}
@@ -135,45 +115,6 @@ public class DeploymentTrigger {
applications().lockIfPresent(application.id(), this::triggerReadyJobs);
}
- /** Find the next step to trigger if any, and triggers it */
- public void triggerReadyJobs(LockedApplication application) {
- List<JobType> jobs = order.jobsFrom(application.deploymentSpec());
-
- // Should the first step be triggered?
- if ( ! jobs.isEmpty() && jobs.get(0).equals(JobType.systemTest) ) {
- JobStatus systemTestStatus = application.deploymentJobs().jobStatus().get(JobType.systemTest);
- if (application.change().platform().isPresent()) {
- Version target = application.change().platform().get();
- if (systemTestStatus == null
- || ! systemTestStatus.lastTriggered().isPresent()
- || ! systemTestStatus.isSuccess()
- || ! systemTestStatus.lastTriggered().get().version().equals(target)
- || systemTestStatus.isHanging(jobTimeoutLimit())) {
- application = trigger(new Triggering(application, JobType.systemTest, false, "Upgrade to " + target), Collections.emptySet(), false);
- applications().store(application);
- }
- }
- }
-
- // Find next steps to trigger based on the state of the previous step
- for (JobType jobType : (Iterable<JobType>) Stream.concat(Stream.of(JobType.component), jobs.stream())::iterator) {
- JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType);
- if (jobStatus == null) continue; // job has never run
-
- // Collect the subset of next jobs which have not run with the last changes
- // TODO jvenstad: Change to be step-centric.
- List<JobType> nextJobs = order.nextAfter(jobType, application);
- for (JobType nextJobType : nextJobs) {
- JobStatus nextStatus = application.deploymentJobs().jobStatus().get(nextJobType);
- if (changesAvailable(application, jobStatus, nextStatus) || nextStatus.isHanging(jobTimeoutLimit())) {
- boolean isRetry = nextStatus != null && nextStatus.jobError().filter(JobError.outOfCapacity::equals).isPresent();
- application = trigger(new Triggering(application, nextJobType, isRetry, isRetry ? "Retrying on out of capacity" : "Available change in " + jobType.jobName()), nextJobs, false);
- }
- }
- applications().store(application);
- }
- }
-
/**
* Trigger a job for an application, if allowed
*
@@ -226,8 +167,7 @@ public class DeploymentTrigger {
application = application.withChange(change);
if (change.application().isPresent())
application = application.withOutstandingChange(Change.empty());
- // TODO jvenstad: Don't trigger.
- application = trigger(new Triggering(application, JobType.systemTest, false, change.toString()), Collections.emptySet(), false);
+
applications().store(application);
});
}
@@ -250,22 +190,47 @@ public class DeploymentTrigger {
//--- End of methods which triggers deployment jobs ----------------------------
- private ApplicationController applications() { return controller.applications(); }
+ /** Find the next step to trigger if any, and triggers it */
+ private void triggerReadyJobs(LockedApplication application) {
+ List<JobType> jobs = order.jobsFrom(application.deploymentSpec());
- /** Retry immediately only if this job just started failing. Otherwise retry periodically */
- private boolean retryBecauseNewFailure(Application application, JobType jobType) {
- JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType);
- return (jobStatus != null && jobStatus.firstFailing().get().at().isAfter(clock.instant().minus(Duration.ofSeconds(10))));
- }
+ // Should the first step be triggered?
+ if ( ! jobs.isEmpty() && jobs.get(0).equals(JobType.systemTest) ) {
+ JobStatus systemTestStatus = application.deploymentJobs().jobStatus().get(JobType.systemTest);
+ if (application.change().platform().isPresent()) {
+ Version target = application.change().platform().get();
+ if (systemTestStatus == null
+ || ! systemTestStatus.lastTriggered().isPresent()
+ || ! systemTestStatus.isSuccess()
+ || ! systemTestStatus.lastTriggered().get().version().equals(target)
+ || systemTestStatus.isHanging(jobTimeoutLimit())) {
+ application = trigger(new Triggering(application, JobType.systemTest, false, "Upgrade to " + target), Collections.emptySet(), false);
+ applications().store(application);
+ }
+ }
+ }
+
+ // Find next steps to trigger based on the state of the previous step
+ for (JobType jobType : (Iterable<JobType>) Stream.concat(Stream.of(JobType.component), jobs.stream())::iterator) {
+ JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType);
+ if (jobStatus == null) continue; // job has never run
- /** Decide whether to retry due to capacity restrictions */
- private boolean retryBecauseOutOfCapacity(Application application, JobType jobType) {
- JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType);
- if (jobStatus == null || ! jobStatus.jobError().equals(Optional.of(JobError.outOfCapacity))) return false;
- // Retry the job if it failed recently
- return jobStatus.firstFailing().get().at().isAfter(clock.instant().minus(Duration.ofMinutes(15)));
+ // Collect the subset of next jobs which have not run with the last changes
+ // TODO jvenstad: Change to be step-centric.
+ List<JobType> nextJobs = order.nextAfter(jobType, application);
+ for (JobType nextJobType : nextJobs) {
+ JobStatus nextStatus = application.deploymentJobs().jobStatus().get(nextJobType);
+ if (changesAvailable(application, jobStatus, nextStatus) || nextStatus.isHanging(jobTimeoutLimit())) {
+ boolean isRetry = nextStatus != null && nextStatus.jobError().filter(JobError.outOfCapacity::equals).isPresent();
+ application = trigger(new Triggering(application, nextJobType, isRetry, isRetry ? "Retrying on out of capacity" : "Available change in " + jobType.jobName()), nextJobs, false);
+ }
+ }
+ applications().store(application);
+ }
}
+ private ApplicationController applications() { return controller.applications(); }
+
/** Returns whether the given job type should be triggered according to deployment spec */
private boolean hasJob(JobType jobType, Application application) {
if ( ! jobType.isProduction()) return true; // Deployment spec only determines this for production jobs.
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
index 895ba195b08..5c24b70fd65 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
@@ -142,9 +142,6 @@ public class ControllerTest {
tester.clock().advance(Duration.ofHours(1));
- // Need to complete the job, or new jobs won't start.
- tester.jobCompletion(productionCorpUsEast1).application(app1).unsuccessful().submit();
-
// system and staging test job - succeeding
tester.jobCompletion(component).application(app1).nextBuildNumber().uploadArtifact(applicationPackage).submit();
applicationVersion = tester.application("app1").change().application().get();
@@ -156,6 +153,7 @@ public class ControllerTest {
tester.deployAndNotify(app1, applicationPackage, true, stagingTest);
// production job succeeding now
+ tester.jobCompletion(productionCorpUsEast1).application(app1).unsuccessful().submit();
tester.deployAndNotify(app1, applicationPackage, true, productionCorpUsEast1);
expectedJobStatus = expectedJobStatus
.withTriggering(version1, applicationVersion, "", tester.clock().instant().minus(Duration.ofMillis(1)))
@@ -280,6 +278,7 @@ public class ControllerTest {
// Version upgrade changes system version
applications.deploymentTrigger().triggerChange(app1.id(), Change.of(newSystemVersion));
+ tester.deploymentTrigger().triggerReadyJobs();
tester.deployAndNotify(app1, applicationPackage, true, systemTest);
tester.deployAndNotify(app1, applicationPackage, true, stagingTest);
tester.deployAndNotify(app1, applicationPackage, true, productionUsWest1);
@@ -450,17 +449,6 @@ public class ControllerTest {
tester.deployAndNotify(app3, applicationPackage, true, stagingTest);
tester.deployAndNotify(app3, applicationPackage, true, productionCorpUsEast1);
- // app1: 15 minutes pass, staging-test job is still failing due out of capacity, but is no longer re-queued by
- // out of capacity retry mechanism
- tester.clock().advance(Duration.ofMinutes(15));
- tester.jobCompletion(stagingTest).application(app1).error(JobError.outOfCapacity).submit(); // Clear the previous staging test
- tester.jobCompletion(component).application(app1).nextBuildNumber().uploadArtifact(applicationPackage).submit();
- tester.deployAndNotify(app1, applicationPackage, true, false, systemTest);
- tester.deploy(stagingTest, app1, applicationPackage);
- assertEquals(1, deploymentQueue.takeJobsToRun().size());
- tester.jobCompletion(stagingTest).application(app1).error(JobError.outOfCapacity).submit();
- assertTrue("No jobs queued", deploymentQueue.jobs().isEmpty());
-
// app2 and app3: New change triggers system-test jobs
// Provide a changed application package, too, or the deployment is a no-op.
tester.jobCompletion(component).application(app2).nextBuildNumber().uploadArtifact(applicationPackage).submit();
@@ -469,14 +457,6 @@ public class ControllerTest {
tester.jobCompletion(component).application(app3).nextBuildNumber().uploadArtifact(applicationPackage).submit();
tester.deployAndNotify(app3, applicationPackage2, true, systemTest);
- assertEquals(2, deploymentQueue.jobs().size());
-
- // app1: 4 hours pass in total, staging-test job for app1 is re-queued by periodic trigger mechanism and added at the
- // front of the queue
- tester.clock().advance(Duration.ofHours(3));
- tester.clock().advance(Duration.ofMinutes(50));
- tester.readyJobTrigger().maintain();
-
assertEquals(Collections.singletonList(new BuildService.BuildJob(project1, stagingTest.jobName())), deploymentQueue.takeJobsToRun());
assertEquals(Collections.singletonList(new BuildService.BuildJob(project2, stagingTest.jobName())), deploymentQueue.takeJobsToRun());
assertEquals(Collections.singletonList(new BuildService.BuildJob(project3, stagingTest.jobName())), deploymentQueue.takeJobsToRun());
@@ -578,6 +558,7 @@ public class ControllerTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// Test environments pass
tester.deploy(DeploymentJobs.JobType.systemTest, application, applicationPackage);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
index 92bf22df535..6bc544605b6 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
@@ -101,6 +101,7 @@ public class DeploymentTester {
configServer().setDefaultVersion(version);
updateVersionStatus(version);
upgrader().maintain();
+ readyJobTrigger().maintain();
}
public Version defaultVespaVersion() {
@@ -275,7 +276,7 @@ public class DeploymentTester {
assertEquals(job.jobName(), buildJob.jobName());
}
if (expectOnlyTheseJobs)
- assertEquals(jobs.length, countJobsOf(application));
+ assertEquals("Unexpected job queue: " + jobsOf(application), jobs.length, jobsOf(application).size());
deploymentQueue().removeJobs(application.id());
}
@@ -286,15 +287,17 @@ public class DeploymentTester {
throw new IllegalArgumentException(jobType + " is not scheduled for " + application);
}
- private int countJobsOf(Application application) {
- return (int) deploymentQueue().jobs().stream()
- .filter(job -> job.projectId() == application.deploymentJobs().projectId().get())
- .count();
+ private List<JobType> jobsOf(Application application) {
+ return deploymentQueue().jobs().stream()
+ .filter(job -> job.projectId() == application.deploymentJobs().projectId().get())
+ .map(buildJob -> JobType.fromJobName(buildJob.jobName()))
+ .collect(Collectors.toList());
}
private void notifyJobCompletion(DeploymentJobs.JobReport report) {
clock().advance(Duration.ofMillis(1));
applications().notifyJobCompletion(report);
+ applications().deploymentTrigger().triggerReadyJobs();
}
public static ApplicationPackage applicationPackage(String upgradePolicy) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index ce765249b97..364cb66c3d1 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -55,6 +55,7 @@ public class DeploymentTriggerTest {
Version version = new Version(5, 1);
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// Deploy completely once
tester.jobCompletion(component).application(app).uploadArtifact(applicationPackage).submit();
@@ -66,17 +67,11 @@ public class DeploymentTriggerTest {
version = new Version(5, 2);
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// system-test fails and is retried
tester.deployAndNotify(app, applicationPackage, false, JobType.systemTest);
- assertEquals("Retried immediately", 1, tester.deploymentQueue().jobs().size());
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(app, applicationPackage, false, JobType.systemTest);
- tester.clock().advance(Duration.ofHours(1));
- assertEquals("Nothing scheduled", 0, tester.deploymentQueue().jobs().size());
- tester.readyJobTrigger().maintain(); // Causes retry of systemTests
-
- assertEquals("Scheduled retry", 1, tester.deploymentQueue().jobs().size());
+ assertEquals("Job is retried on failure", 1, tester.deploymentQueue().jobs().size());
tester.deployAndNotify(app, applicationPackage, true, JobType.systemTest);
// staging-test times out and is retried
@@ -390,28 +385,22 @@ public class DeploymentTriggerTest {
tester.upgradeSystem(version1);
tester.completeUpgradeWithError(application, version1, applicationPackage, productionEuWest1);
- // Exhaust the retry, so productionEuWest1 is no longer running.
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(application, Optional.empty(), false, true, productionEuWest1);
- assertTrue(tester.deploymentQueue().jobs().isEmpty());
-
// Deploy the new application version, even though the platform version is already deployed in us-central-1.
// Let it fail in us-central-1 after deployment, so we can test this zone is later skipped.
- tester.completeDeploymentWithError(application, applicationPackage, BuildJob.defaultBuildNumber + 1, productionUsCentral1);
+ tester.jobCompletion(component).application(application).nextBuildNumber().uploadArtifact(applicationPackage).submit();
+ tester.deployAndNotify(application, applicationPackage, true, false, systemTest);
+ tester.deployAndNotify(application, applicationPackage, true, false, stagingTest);
+ tester.jobCompletion(productionEuWest1).application(application).unsuccessful().submit();
tester.deploy(productionUsCentral1, application, Optional.empty(), false);
+ // Deploying before notifying here makes the job not re-trigger, but instead triggers the next job (because of triggerReadyJobs() in notification.)
+ tester.deployAndNotify(application, applicationPackage, false, productionUsCentral1);
assertEquals(ApplicationVersion.from(BuildJob.defaultSourceRevision, BuildJob.defaultBuildNumber + 1),
app.get().deployments().get(ZoneId.from("prod.us-central-1")).applicationVersion());
- // Exhaust the automatic retry.
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(application, Optional.empty(), false, true, productionUsCentral1);
- assertTrue(tester.deploymentQueue().jobs().isEmpty());
-
- // Let the ReadyJobTrigger get what it thinks is the next job -- should be the last job.
- tester.readyJobTrigger().maintain();
assertEquals(Collections.singletonList(new BuildService.BuildJob(1, productionEuWest1.jobName())),
tester.deploymentQueue().jobs());
+
tester.deploy(productionEuWest1, application, Optional.empty(), false);
tester.deployAndNotify(application, Optional.empty(), false, true, productionEuWest1);
assertFalse(app.get().change().isPresent());
@@ -438,11 +427,6 @@ public class DeploymentTriggerTest {
tester.deployAndNotify(application, Optional.empty(), false, true, productionUsCentral1);
tester.deploy(productionUsCentral1, application, Optional.empty(), false);
- // Exhaust the automatic retry.
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(application, Optional.empty(), false, true, productionUsCentral1);
- assertTrue(tester.deploymentQueue().jobs().isEmpty());
-
ApplicationVersion appVersion1 = ApplicationVersion.from(BuildJob.defaultSourceRevision, BuildJob.defaultBuildNumber + 1);
assertEquals(appVersion1, app.get().deployments().get(ZoneId.from("prod.us-central-1")).applicationVersion());
@@ -450,13 +434,14 @@ public class DeploymentTriggerTest {
tester.deploymentTrigger().cancelChange(application.id(), true);
assertEquals(Change.of(appVersion1), app.get().change());
- // Now cancel the change -- this should not normally happen.
+ // Now cancel the change as is done through the web API.
tester.deploymentTrigger().cancelChange(application.id(), false);
assertEquals(Change.empty(), app.get().change());
// A new version is released, which should now deploy the currently deployed application version to avoid downgrades.
Version version1 = new Version("6.2");
tester.upgradeSystem(version1);
+ tester.jobCompletion(productionUsCentral1).application(application).unsuccessful().submit();
tester.completeUpgrade(application, version1, applicationPackage);
assertEquals(appVersion1, app.get().deployments().get(ZoneId.from("prod.us-central-1")).applicationVersion());
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java
index 548f7c33fa1..f088c4216ba 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporterTest.java
@@ -148,6 +148,7 @@ public class DeploymentIssueReporterTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
tester.completeUpgradeWithError(app2, version, canaryPackage, systemTest);
tester.updateVersionStatus(version);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
index 092cdcd6984..429a0da6543 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
@@ -23,6 +23,7 @@ import java.util.Collections;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.component;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionUsEast3;
+import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.systemTest;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -54,6 +55,7 @@ public class FailureRedeployerTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// Test environments pass
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest);
@@ -68,14 +70,14 @@ public class FailureRedeployerTest {
// Another version is released, which cancels any pending upgrades to lower versions
version = Version.fromString("5.2");
tester.updateVersionStatus(version);
- tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); // Finish previous production job.
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Application starts upgrading to new version", 1, tester.deploymentQueue().jobs().size());
assertEquals("Application has pending upgrade to " + version, version, tester.application(app.id()).change().platform().get());
- // Failure redeployer does not retry failing job for prod.us-east-3 as there's an ongoing deployment
+ // Failure re-deployer does not retry failing job for prod.us-east-3, since it no longer has an available change
tester.clock().advance(Duration.ofMinutes(1));
- tester.readyJobTrigger().maintain();
+ tester.jobCompletion(DeploymentJobs.JobType.productionUsEast3).application(app).unsuccessful().submit();
assertFalse("Job is not retried", tester.deploymentQueue().jobs().stream()
.anyMatch(j -> j.jobName().equals(DeploymentJobs.JobType.productionUsEast3.jobName())));
@@ -83,16 +85,8 @@ public class FailureRedeployerTest {
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest);
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest);
- // Production job fails again and exhausts all immediate retries
+ // Production job fails again, and is retried
tester.deployAndNotify(app, applicationPackage, false, DeploymentJobs.JobType.productionUsEast3);
- tester.deploymentQueue().takeJobsToRun();
- tester.clock().advance(Duration.ofMinutes(10));
- tester.jobCompletion(DeploymentJobs.JobType.productionUsEast3).application(app).unsuccessful().submit();
- assertTrue("Retries exhausted", tester.deploymentQueue().jobs().isEmpty());
- assertTrue("Failure is recorded", tester.application(app.id()).deploymentJobs().hasFailures());
-
- // Failure redeployer retries job
- tester.clock().advance(Duration.ofMinutes(5));
tester.readyJobTrigger().maintain();
assertEquals("Job is retried", Collections.singletonList(new BuildService.BuildJob(app.deploymentJobs().projectId().get(), productionUsEast3.jobName())), tester.deploymentQueue().jobs());
@@ -154,29 +148,25 @@ public class FailureRedeployerTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Application has pending upgrade to " + version, version, tester.application(app.id()).change().platform().get());
- // system-test fails and exhausts all immediate retries
+ // system-test fails and is left with a retry
tester.deployAndNotify(app, applicationPackage, false, DeploymentJobs.JobType.systemTest);
- tester.deploymentQueue().takeJobsToRun();
- tester.clock().advance(Duration.ofMinutes(10));
- tester.jobCompletion(DeploymentJobs.JobType.systemTest).application(app).unsuccessful().submit();
- assertTrue("Retries exhausted", tester.deploymentQueue().jobs().isEmpty());
// Another version is released
version = Version.fromString("5.2");
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
+
+ // Job is left "running", so needs to time out before it can be retried.
+ tester.clock().advance(Duration.ofHours(13));
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Application has pending upgrade to " + version, version, tester.application(app.id()).change().platform().get());
- // Consume system-test job for 5.2
- tester.deploymentQueue().takeJobsToRun();
-
- // Failure re-deployer does not retry failing system-test job as it failed for an older change
- tester.clock().advance(Duration.ofMinutes(5));
- tester.readyJobTrigger().maintain();
- assertTrue("No jobs retried", tester.deploymentQueue().jobs().isEmpty());
+ // Cancellation of outdated version and triggering on a new version is done by the upgrader.
+ assertEquals(version, tester.application(app.id()).deploymentJobs().jobStatus().get(systemTest).lastTriggered().get().version());
}
@Test
@@ -207,6 +197,7 @@ public class FailureRedeployerTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// Test environments pass
tester.deploy(DeploymentJobs.JobType.systemTest, application, applicationPackage);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java
index 12fb2b6c862..a4e464a065f 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java
@@ -42,6 +42,7 @@ public class OutstandingChangeDeployerTest {
Version version = new Version(6, 2);
tester.deploymentTrigger().triggerChange(tester.application("app1").id(), Change.of(version));
+ tester.deploymentTrigger().triggerReadyJobs();
assertEquals(Change.of(version), tester.application("app1").change());
assertFalse(tester.application("app1").outstandingChange().isPresent());
@@ -59,6 +60,7 @@ public class OutstandingChangeDeployerTest {
assertEquals(1, tester.deploymentQueue().jobs().size());
deployer.maintain();
+ tester.deploymentTrigger().triggerReadyJobs();
assertEquals("No effect as job is in progress", 1, tester.deploymentQueue().jobs().size());
assertEquals("1.0.43-cafed00d", app.outstandingChange().application().get().id());
@@ -68,6 +70,7 @@ public class OutstandingChangeDeployerTest {
assertEquals("Upgrade done", 0, tester.deploymentQueue().jobs().size());
deployer.maintain();
+ tester.deploymentTrigger().triggerReadyJobs();
app = tester.application("app1");
assertEquals("1.0.43-cafed00d", app.change().application().get().id());
List<BuildService.BuildJob> jobs = tester.deploymentQueue().jobs();
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 021fe0954c2..b7eff6d8448 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -21,6 +21,8 @@ import java.time.Duration;
import java.time.Instant;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.component;
+import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionEuWest1;
+import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionUsEast3;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionUsWest1;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.stagingTest;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.systemTest;
@@ -42,6 +44,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("No applications: Nothing to do", 0, tester.deploymentQueue().jobs().size());
// Setup applications
@@ -53,6 +56,7 @@ public class UpgraderTest {
Application conservative0 = tester.createAndDeploy("conservative0", 6, "conservative");
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("All already on the right version: Nothing to do", 0, tester.deploymentQueue().jobs().size());
// --- 5.1 is released - everything goes smoothly
@@ -60,6 +64,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("New system version: Should upgrade Canaries", 2, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(canary0, version, "canary");
@@ -67,6 +72,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("One canary pending; nothing else", 1, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(canary1, version, "canary");
@@ -74,6 +80,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Canaries done: Should upgrade defaults", 3, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(default0, version, "default");
@@ -83,11 +90,13 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.high, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Normals done: Should upgrade conservatives", 1, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(conservative0, version, "conservative");
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Nothing to do", 0, tester.deploymentQueue().jobs().size());
// --- 5.2 is released - which fails a Canary
@@ -95,31 +104,25 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("New system version: Should upgrade Canaries", 2, tester.deploymentQueue().jobs().size());
tester.completeUpgradeWithError(canary0, version, "canary", DeploymentJobs.JobType.stagingTest);
assertEquals("Other Canary was cancelled", 2, tester.deploymentQueue().jobs().size());
- // TODO: Cancelled would mean it was triggerd, removed from the build system, but never reported in.
- // Thus, the expected number of jobs should be 1, above: the retrying canary0.
- // Further, canary1 should be retried after the timeout period of 12 hours, but verifying this is
- // not possible when jobs are consumed form the build system on notification, rather than on deploy.
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Version broken, but Canaries should keep trying", 2, tester.deploymentQueue().jobs().size());
- // Exhaust canary retries.
- tester.jobCompletion(systemTest).application(canary1).unsuccessful().submit();
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(canary0, DeploymentTester.applicationPackage("canary"), false, DeploymentJobs.JobType.stagingTest);
- tester.jobCompletion(systemTest).application(canary1).unsuccessful().submit();
-
// --- A new version is released - which repairs the Canary app and fails a default
+ tester.clock().advance(Duration.ofHours(13));
version = Version.fromString("5.3");
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("New system version: Should upgrade Canaries", 2, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(canary0, version, "canary");
@@ -127,6 +130,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("One canary pending; nothing else", 1, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(canary1, version, "canary");
@@ -134,6 +138,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Canaries done: Should upgrade defaults", 3, tester.deploymentQueue().jobs().size());
@@ -147,22 +152,27 @@ public class UpgraderTest {
assertEquals("Upgrade with error should retry", 1, tester.deploymentQueue().jobs().size());
- // Finish previous run, with exhausted retry.
- tester.clock().advance(Duration.ofHours(1));
- tester.jobCompletion(stagingTest).application(default0).unsuccessful().submit();
// --- Failing application is repaired by changing the application, causing confidence to move above 'high' threshold
// Deploy application change
- tester.deployCompletely(tester.application("default0"), DeploymentTester.applicationPackage("default"), BuildJob.defaultBuildNumber + 1);
+ tester.deploymentQueue().takeJobsToRun();
+ tester.jobCompletion(component).application(default0).nextBuildNumber().uploadArtifact(DeploymentTester.applicationPackage("default")).submit();
+ tester.jobCompletion(stagingTest).application(default0).unsuccessful().submit();
+ tester.deployAndNotify(default0, "default", true, systemTest);
+ tester.deployAndNotify(default0, "default", true, stagingTest);
+ tester.deployAndNotify(default0, "default", true, productionUsWest1);
+ tester.deployAndNotify(default0, "default", true, productionUsEast3);
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.high, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Normals done: Should upgrade conservatives", 1, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(conservative0, version, "conservative");
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Applications are on 5.3 - nothing to do", 0, tester.deploymentQueue().jobs().size());
// --- Starting upgrading to a new version which breaks, causing upgrades to commence on the previous version
@@ -171,11 +181,13 @@ public class UpgraderTest {
Application default4 = tester.createAndDeploy("default4", 5, "default");
tester.updateVersionStatus(version54);
tester.upgrader().maintain(); // cause canary upgrades to 5.4
+ tester.readyJobTrigger().maintain();
tester.completeUpgrade(canary0, version54, "canary");
tester.completeUpgrade(canary1, version54, "canary");
tester.updateVersionStatus(version54);
assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Upgrade of defaults are scheduled", 5, tester.deploymentQueue().jobs().size());
assertEquals(version54, tester.application(default0.id()).change().platform().get());
@@ -189,11 +201,13 @@ public class UpgraderTest {
Version version55 = Version.fromString("5.5");
tester.updateVersionStatus(version55);
tester.upgrader().maintain(); // cause canary upgrades to 5.5
+ tester.readyJobTrigger().maintain();
tester.completeUpgrade(canary0, version55, "canary");
tester.completeUpgrade(canary1, version55, "canary");
tester.updateVersionStatus(version55);
assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Upgrade of defaults are scheduled", 5, tester.deploymentQueue().jobs().size());
assertEquals(version55, tester.application(default0.id()).change().platform().get());
@@ -206,13 +220,12 @@ public class UpgraderTest {
tester.completeUpgrade(default2, version54, "default");
tester.completeUpgradeWithError(default3, version54, "default", DeploymentJobs.JobType.stagingTest);
- // Exhaust immediate retries for upgrade
- tester.clock().advance(Duration.ofHours(1));
- tester.jobCompletion(stagingTest).application(default3).unsuccessful().submit();
tester.completeUpgradeWithError(default4, version54, "default", DeploymentJobs.JobType.productionUsWest1);
// State: Default applications started upgrading to 5.5
+ tester.clock().advance(Duration.ofHours(13));
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
tester.completeUpgradeWithError(default0, version55, "default", DeploymentJobs.JobType.stagingTest);
tester.completeUpgradeWithError(default1, version55, "default", DeploymentJobs.JobType.stagingTest);
tester.completeUpgradeWithError(default2, version55, "default", DeploymentJobs.JobType.stagingTest);
@@ -225,6 +238,7 @@ public class UpgraderTest {
tester.jobCompletion(DeploymentJobs.JobType.productionUsWest1).application(default3).unsuccessful().submit();
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Upgrade of defaults are scheduled on 5.4 instead, since 5.5 broken: " +
"This is default3 since it failed upgrade on both 5.4 and 5.5",
1, tester.deploymentQueue().jobs().size());
@@ -236,12 +250,14 @@ public class UpgraderTest {
// --- Setup
DeploymentTester tester = new DeploymentTester();
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("No system version: Nothing to do", 0, tester.deploymentQueue().jobs().size());
Version version = Version.fromString("5.0"); // (lower than the hardcoded version in the config server client)
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("No applications: Nothing to do", 0, tester.deploymentQueue().jobs().size());
// Setup applications
@@ -259,6 +275,7 @@ public class UpgraderTest {
Application default9 = tester.createAndDeploy("default9", 12, "default");
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("All already on the right version: Nothing to do", 0, tester.deploymentQueue().jobs().size());
// --- A new version is released
@@ -266,6 +283,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("New system version: Should upgrade Canaries", 2, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(canary0, version, "canary");
@@ -273,6 +291,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("One canary pending; nothing else", 1, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(canary1, version, "canary");
@@ -280,6 +299,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Canaries done: Should upgrade defaults", 10, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(default0, version, "default");
@@ -291,6 +311,7 @@ public class UpgraderTest {
// > 40% and at least 4 failed - version is broken
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence());
assertEquals("Upgrades are cancelled", 0, tester.deploymentQueue().jobs().size());
}
@@ -313,6 +334,7 @@ public class UpgraderTest {
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Application is on expected version: Nothing to do", 0,
tester.deploymentQueue().jobs().size());
@@ -321,16 +343,13 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// system-test completes successfully
tester.deployAndNotify(app, applicationPackage, true, systemTest);
- // staging-test fails multiple times, exhausts retries and failure is recorded
+ // staging-test fails and failure is recorded
tester.deployAndNotify(app, applicationPackage, false, DeploymentJobs.JobType.stagingTest);
- tester.deploymentQueue().takeJobsToRun();
- tester.clock().advance(Duration.ofMinutes(10));
- tester.jobCompletion(stagingTest).application(app).unsuccessful().submit();
- assertTrue("Retries exhausted", tester.deploymentQueue().jobs().isEmpty());
assertTrue("Failure is recorded", tester.application(app.id()).deploymentJobs().hasFailures());
assertTrue("Application has pending change", tester.application(app.id()).change().isPresent());
@@ -341,12 +360,14 @@ public class UpgraderTest {
// Upgrade is scheduled. system-tests starts, but does not complete
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertTrue("Application still has failures", tester.application(app.id()).deploymentJobs().hasFailures());
assertEquals(1, tester.deploymentQueue().jobs().size());
tester.deploymentQueue().takeJobsToRun();
// Upgrader runs again, nothing happens as there's already a job in progress for this change
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertTrue("No more jobs triggered at this time", tester.deploymentQueue().jobs().isEmpty());
}
@@ -370,6 +391,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// Canaries upgrade and raise confidence
tester.completeUpgrade(canary0, version, "canary");
@@ -379,6 +401,7 @@ public class UpgraderTest {
// Applications with default policy start upgrading
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Upgrade scheduled for remaining apps", 5, tester.deploymentQueue().jobs().size());
// 4/5 applications fail and lowers confidence
@@ -389,6 +412,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// 5th app passes system-test, but does not trigger next job as upgrade is cancelled
assertFalse("No change present", tester.applications().require(default4.id()).change().isPresent());
@@ -424,6 +448,7 @@ public class UpgraderTest {
tester.updateVersionStatus(v1);
assertEquals(v1, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// Canaries upgrade and raise confidence of V+1 (other apps are not upgraded)
tester.completeUpgrade(canary0, v1, "canary");
@@ -436,6 +461,7 @@ public class UpgraderTest {
tester.updateVersionStatus(v2);
assertEquals(v2, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// We "manually" cancel upgrades to V1 so that we can use the applications to make V2 fail instead
// But we keep one (default4) to avoid V1 being garbage collected
@@ -453,6 +479,7 @@ public class UpgraderTest {
// Applications with default policy start upgrading to V2
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Upgrade scheduled for remaining apps", 5, tester.deploymentQueue().jobs().size());
// 4/5 applications fail (in the last prod zone) and lowers confidence
@@ -466,6 +493,7 @@ public class UpgraderTest {
assertEquals(v2, tester.application("default0").deployments().get(ZoneId.from("prod.us-west-1")).version());
assertEquals(v0, tester.application("default0").deployments().get(ZoneId.from("prod.us-east-3")).version());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Upgrade to 5.1 scheduled for apps not completely on 5.1 or 5.2", 5, tester.deploymentQueue().jobs().size());
tester.deploymentTrigger().triggerReadyJobs();
@@ -514,6 +542,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// Canaries upgrade and raise confidence
tester.completeUpgrade(canary0, version, "canary");
@@ -523,6 +552,7 @@ public class UpgraderTest {
// All applications upgrade successfully
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
tester.completeUpgrade(default0, version, "default");
tester.completeUpgrade(default1, version, "default");
tester.completeUpgrade(default2, version, "default");
@@ -565,16 +595,19 @@ public class UpgraderTest {
// Application is not upgraded at this time
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertTrue("No jobs scheduled", tester.deploymentQueue().jobs().isEmpty());
// One hour passes, time is 19:00, still no upgrade
tester.clock().advance(Duration.ofHours(1));
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertTrue("No jobs scheduled", tester.deploymentQueue().jobs().isEmpty());
// Two hours pass in total, time is 20:00 and application upgrades
tester.clock().advance(Duration.ofHours(1));
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertFalse("Job is scheduled", tester.deploymentQueue().jobs().isEmpty());
tester.completeUpgrade(app, version, applicationPackage);
assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty());
@@ -608,6 +641,7 @@ public class UpgraderTest {
// Application upgrade starts
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
tester.deployAndNotify(app, applicationPackage, true, systemTest);
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest);
clock.advance(Duration.ofHours(1)); // Entering block window after prod job is triggered
@@ -638,9 +672,6 @@ public class UpgraderTest {
public void testBlockVersionChangeHalfwayThoughThenNewVersion() {
ManualClock clock = new ManualClock(Instant.parse("2017-09-29T16:00:00.00Z")); // Friday, 16:00
DeploymentTester tester = new DeploymentTester(new ControllerTester(clock));
- ReadyJobsTrigger readyJobsTrigger = new ReadyJobsTrigger(tester.controller(),
- Duration.ofHours(1),
- new JobControl(tester.controllerTester().curator()));
Version version = Version.fromString("5.0");
tester.updateVersionStatus(version);
@@ -663,6 +694,7 @@ public class UpgraderTest {
// Application upgrade starts
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
tester.deployAndNotify(app, applicationPackage, true, systemTest);
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest);
tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
@@ -675,14 +707,14 @@ public class UpgraderTest {
version = Version.fromString("5.2");
tester.updateVersionStatus(version);
tester.upgrader().maintain();
- readyJobsTrigger.maintain();
+ tester.readyJobTrigger().maintain();
assertTrue("Nothing is scheduled", tester.deploymentQueue().jobs().isEmpty());
// Monday morning: We are not blocked
tester.clock().advance(Duration.ofDays(1)); // Sunday, 17:00
tester.clock().advance(Duration.ofHours(17)); // Monday, 10:00
tester.upgrader().maintain();
- readyJobsTrigger.maintain();
+ tester.readyJobTrigger().maintain();
// We proceed with the new version in the expected order, not starting with the previously blocked version:
// Test jobs are run with the new version, but not production as we are in the block window
tester.deployAndNotify(app, applicationPackage, true, systemTest);
@@ -730,6 +762,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
// Canaries upgrade and raise confidence
tester.completeUpgrade(canary0, version, canaryApplicationPackage);
@@ -740,6 +773,7 @@ public class UpgraderTest {
// Applications with default policy start upgrading
tester.clock().advance(Duration.ofMinutes(1));
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Upgrade scheduled for remaining apps", 5, tester.deploymentQueue().jobs().size());
// 4/5 applications fail, confidence is lowered and upgrade is cancelled
@@ -751,9 +785,9 @@ public class UpgraderTest {
assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
- // Exhaust retries and finish runs
- tester.clock().advance(Duration.ofHours(1));
+ // Finish runs
tester.jobCompletion(systemTest).application(default0).unsuccessful().submit();
tester.jobCompletion(systemTest).application(default1).unsuccessful().submit();
tester.jobCompletion(systemTest).application(default2).unsuccessful().submit();
@@ -780,6 +814,7 @@ public class UpgraderTest {
assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
assertEquals("Upgrade scheduled for previously failing apps", 4, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(default0, version, defaultApplicationPackageV2);
@@ -822,6 +857,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
upgrader.maintain();
+ tester.readyJobTrigger().maintain();
assertEquals(2, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(canary0, version, "canary");
@@ -830,16 +866,19 @@ public class UpgraderTest {
// Next run upgrades a subset
upgrader.maintain();
+ tester.readyJobTrigger().maintain();
assertEquals(2, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(default0, version, "default");
tester.completeUpgrade(default2, version, "default");
// Remaining applications upgraded
upgrader.maintain();
+ tester.readyJobTrigger().maintain();
assertEquals(2, tester.deploymentQueue().jobs().size());
tester.completeUpgrade(default1, version, "default");
tester.completeUpgrade(default3, version, "default");
upgrader.maintain();
+ tester.readyJobTrigger().maintain();
assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty());
}
@@ -860,13 +899,10 @@ public class UpgraderTest {
version = Version.fromString("5.1");
tester.updateVersionStatus(version);
tester.upgrader().maintain();
+ tester.readyJobTrigger().maintain();
tester.deployAndNotify(app, applicationPackage, true, systemTest);
tester.deployAndNotify(app, applicationPackage, true, stagingTest);
-
- // Production job fails and exhausts retries, new application changes are now accepted
- tester.deployAndNotify(app, applicationPackage, false, productionUsWest1);
- tester.clock().advance(Duration.ofHours(1));
tester.deployAndNotify(app, applicationPackage, false, productionUsWest1);
// New application change
@@ -880,8 +916,9 @@ public class UpgraderTest {
app.change().application().get().id().equals(applicationVersion));
// Deployment completes
- tester.deployAndNotify(app, applicationPackage, true, systemTest);
- tester.deployAndNotify(app, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(app, applicationPackage, true, false, systemTest);
+ tester.deployAndNotify(app, applicationPackage, true, false, stagingTest);
+ tester.jobCompletion(productionUsWest1).application(app).unsuccessful().submit();
tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java
index b810c3adeb5..8e60e63e873 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java
@@ -126,6 +126,7 @@ public class ContainerControllerTester {
throw new RuntimeException(e);
}
controller().applications().notifyJobCompletion(jobReport);
+ controller().applications().deploymentTrigger().triggerReadyJobs();
}
private AthenzDomain addTenantAthenzDomain(String domainName, String userName) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
index 0ae9cf767d0..b9eef2069d9 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.restapi.application;
+import com.google.common.base.Functions;
import com.yahoo.application.container.handler.Request;
import com.yahoo.application.container.handler.Response;
import com.yahoo.component.Version;
@@ -173,22 +174,6 @@ public class ApplicationApiTest extends ControllerContainerTest {
addUserToHostedOperatorRole(HostedAthenzIdentities.from(HOSTED_VESPA_OPERATOR));
- // POST triggering of a full deployment to an application (if version is omitted, current system version is used)
- tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/deploying", POST)
- .userIdentity(HOSTED_VESPA_OPERATOR)
- .data("6.1.0"),
- new File("application-deployment.json"));
-
- // DELETE (cancel) ongoing change
- tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/deploying", DELETE)
- .userIdentity(HOSTED_VESPA_OPERATOR),
- new File("application-deployment-cancelled.json"));
-
- // DELETE (cancel) again is a no-op
- tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/deploying", DELETE)
- .userIdentity(HOSTED_VESPA_OPERATOR),
- new File("application-deployment-cancelled-no-op.json"));
-
// POST (deploy) an application to a zone - manual user deployment
HttpEntity entity = createApplicationDeployData(applicationPackage, Optional.empty());
tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/environment/dev/region/us-west-1/instance/default/deploy", POST)
@@ -285,6 +270,21 @@ public class ApplicationApiTest extends ControllerContainerTest {
.recursive("true"),
new File("application1-recursive.json"));
+ // DELETE (cancel) ongoing change
+ tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/deploying", DELETE)
+ .userIdentity(HOSTED_VESPA_OPERATOR),
+ new File("application-deployment-cancelled.json"));
+
+ // DELETE (cancel) again is a no-op
+ tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/deploying", DELETE)
+ .userIdentity(HOSTED_VESPA_OPERATOR),
+ new File("application-deployment-cancelled-no-op.json"));
+
+ // POST triggering of a full deployment to an application (if version is omitted, current system version is used)
+ tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/deploying", POST)
+ .userIdentity(HOSTED_VESPA_OPERATOR)
+ .data("6.1.0"),
+ new File("application-deployment.json"));
// POST a 'restart application' command
tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/environment/prod/region/corp-us-east-1/instance/default/restart", POST)
@@ -792,7 +792,7 @@ public class ApplicationApiTest extends ControllerContainerTest {
Version vespaVersion = new Version("6.1"); // system version from mock config server client
- BuildJob job = new BuildJob(this::notifyCompletion, tester.artifactRepository())
+ BuildJob job = new BuildJob(report -> notifyCompletion(report, tester), tester.artifactRepository())
.application(app)
.projectId(projectId);
job.type(DeploymentJobs.JobType.component).uploadArtifact(applicationPackage).submit();
@@ -846,7 +846,7 @@ public class ApplicationApiTest extends ControllerContainerTest {
.build();
// Report job failing with out of capacity
- BuildJob job = new BuildJob(this::notifyCompletion, tester.artifactRepository())
+ BuildJob job = new BuildJob(report -> notifyCompletion(report, tester), tester.artifactRepository())
.application(app)
.projectId(projectId);
job.type(DeploymentJobs.JobType.component).uploadArtifact(applicationPackage).submit();
@@ -866,12 +866,13 @@ public class ApplicationApiTest extends ControllerContainerTest {
assertEquals(DeploymentJobs.JobError.outOfCapacity, jobStatus.jobError().get());
}
- private void notifyCompletion(DeploymentJobs.JobReport report) {
+ private void notifyCompletion(DeploymentJobs.JobReport report, ContainerControllerTester tester) {
assertResponse(request("/application/v4/tenant/tenant1/application/application1/jobreport", POST)
.userIdentity(HOSTED_VESPA_OPERATOR)
.data(asJson(report))
.get(),
200, "{\"message\":\"ok\"}");
+ tester.controller().applications().deploymentTrigger().triggerReadyJobs();
}
private static byte[] asJson(DeploymentJobs.JobReport report) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-deployment-cancelled.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-deployment-cancelled.json
index 3b6d8ed71e9..bc09003d86f 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-deployment-cancelled.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-deployment-cancelled.json
@@ -1 +1 @@
-{"message":"Cancelled upgrade to 6.1 for application 'tenant1.application1'"}
+{"message":"Cancelled application change to 1.0.42-commit1 for application 'tenant1.application1'"}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java
index 4de3b9abd5b..743baf76759 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java
@@ -69,6 +69,7 @@ public class DeploymentApiTest extends ControllerContainerTest {
// Applications upgrade, 1/2 succeed
tester.upgrader().maintain();
+ tester.controller().applications().deploymentTrigger().triggerReadyJobs();
deployCompletely(failingApplication, applicationPackage, projectId, false);
deployCompletely(productionApplication, applicationPackage, projectId, true);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
index 27e26e3267a..14f5d00ec88 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
@@ -156,12 +156,6 @@ public class VersionStatusTest {
assertEquals("One canary failed: Broken",
Confidence.broken, confidence(tester.controller(), version1));
- // Finish running jobs
- tester.deployAndNotify(canary2, DeploymentTester.applicationPackage("canary"), false, systemTest);
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(canary1, DeploymentTester.applicationPackage("canary"), false, productionUsWest1);
- tester.deployAndNotify(canary2, DeploymentTester.applicationPackage("canary"), false, systemTest);
-
// New version is released
Version version2 = new Version("5.2");
tester.upgradeSystem(version2);
@@ -170,6 +164,7 @@ public class VersionStatusTest {
// All canaries upgrade successfully
tester.completeUpgrade(canary0, version2, "canary");
+ tester.jobCompletion(productionUsWest1).application(canary1).unsuccessful().submit();
tester.completeUpgrade(canary1, version2, "canary");
assertEquals("Confidence for remains unchanged for version1: Broken",
@@ -178,6 +173,7 @@ public class VersionStatusTest {
Confidence.low, confidence(tester.controller(), version2));
// Remaining canary upgrades to version2 which raises confidence to normal and more apps upgrade
+ tester.jobCompletion(systemTest).application(canary2).unsuccessful().submit();
tester.completeUpgrade(canary2, version2, "canary");
tester.upgradeSystem(version2);
assertEquals("Canaries have upgraded: Normal",