summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <jvenstad@yahoo-inc.com>2018-03-27 13:00:03 +0200
committerJon Marius Venstad <jvenstad@yahoo-inc.com>2018-03-27 13:00:03 +0200
commitcd751ee56ffa3aafac76136032dd62330c3f64ac (patch)
treec6a94d4d0b3330d723fdb517d0c197db63c356e5
parent73d8fec6290ab7056b093403e3500df4c189cefd (diff)
Removed triggering on job completion (but notify in tests call it to simulate the maintainer)
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java33
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java23
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java12
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java37
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java34
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java46
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java1
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java8
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java8
10 files changed, 63 insertions, 144 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
index 5823dd160c0..1d3fff57a78 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
@@ -553,6 +553,11 @@ public class ApplicationController {
}
public void notifyJobCompletion(JobReport report) {
+ log.log(Level.INFO, String.format("Notified of %s of %s %d for '%s'.",
+ report.jobError().map(error -> error + " failure").orElse("success"),
+ report.jobType(),
+ report.buildNumber(),
+ report.applicationId()));
if ( ! get(report.applicationId()).isPresent()) {
log.log(Level.WARNING, "Ignoring completion of job of project '" + report.projectId() +
"': Unknown application '" + report.applicationId() + "'");
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 038a4a56e9e..68d80d54a3c 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -87,17 +87,12 @@ public class DeploymentTrigger {
// Handle successful starting and ending
if (report.jobType() == JobType.component) {
if (report.success()) {
- if ( ! acceptNewApplicationVersionNow(application)) {
- applications().store(application.withOutstandingChange(Change.of(applicationVersion)));
- return;
- }
- // Note that in case of an ongoing upgrade this may result in both the upgrade and application
- // change being deployed together
- application = application.withChange(application.change().with(applicationVersion));
- }
- else { // don't re-trigger component on failure
- applications().store(application);
- return;
+ if ( ! acceptNewApplicationVersionNow(application))
+ application = application.withOutstandingChange(Change.of(applicationVersion));
+ else
+ // Note that in case of an ongoing upgrade this may result in both the upgrade and application
+ // change being deployed together
+ application = application.withChange(application.change().with(applicationVersion));
}
}
else if (report.jobType().isProduction() && deploymentComplete(application)) {
@@ -106,21 +101,6 @@ public class DeploymentTrigger {
application = application.withChange(Change.empty());
}
- // TODO jvenstad: Don't trigger.
- // Trigger next
- if (report.success()) {
- triggerReadyJobs(application);
- return; // Don't overwrite below.
- }
- else if (retryBecauseOutOfCapacity(application, report.jobType())) {
- triggerReadyJobs(application);
- return; // Don't overwrite below.
- }
- else if (retryBecauseNewFailure(application, report.jobType())) {
- triggerReadyJobs(application);
- return; // Don't overwrite below.
- }
-
applications().store(application);
});
}
@@ -226,7 +206,6 @@ public class DeploymentTrigger {
application = application.withChange(change);
if (change.application().isPresent())
application = application.withOutstandingChange(Change.empty());
- // TODO jvenstad: Don't trigger.
triggerReadyJobs(application);
});
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
index 895ba195b08..ee3c5b80d46 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
@@ -142,9 +142,6 @@ public class ControllerTest {
tester.clock().advance(Duration.ofHours(1));
- // Need to complete the job, or new jobs won't start.
- tester.jobCompletion(productionCorpUsEast1).application(app1).unsuccessful().submit();
-
// system and staging test job - succeeding
tester.jobCompletion(component).application(app1).nextBuildNumber().uploadArtifact(applicationPackage).submit();
applicationVersion = tester.application("app1").change().application().get();
@@ -156,6 +153,7 @@ public class ControllerTest {
tester.deployAndNotify(app1, applicationPackage, true, stagingTest);
// production job succeeding now
+ tester.jobCompletion(productionCorpUsEast1).application(app1).unsuccessful().submit();
tester.deployAndNotify(app1, applicationPackage, true, productionCorpUsEast1);
expectedJobStatus = expectedJobStatus
.withTriggering(version1, applicationVersion, "", tester.clock().instant().minus(Duration.ofMillis(1)))
@@ -450,17 +448,6 @@ public class ControllerTest {
tester.deployAndNotify(app3, applicationPackage, true, stagingTest);
tester.deployAndNotify(app3, applicationPackage, true, productionCorpUsEast1);
- // app1: 15 minutes pass, staging-test job is still failing due out of capacity, but is no longer re-queued by
- // out of capacity retry mechanism
- tester.clock().advance(Duration.ofMinutes(15));
- tester.jobCompletion(stagingTest).application(app1).error(JobError.outOfCapacity).submit(); // Clear the previous staging test
- tester.jobCompletion(component).application(app1).nextBuildNumber().uploadArtifact(applicationPackage).submit();
- tester.deployAndNotify(app1, applicationPackage, true, false, systemTest);
- tester.deploy(stagingTest, app1, applicationPackage);
- assertEquals(1, deploymentQueue.takeJobsToRun().size());
- tester.jobCompletion(stagingTest).application(app1).error(JobError.outOfCapacity).submit();
- assertTrue("No jobs queued", deploymentQueue.jobs().isEmpty());
-
// app2 and app3: New change triggers system-test jobs
// Provide a changed application package, too, or the deployment is a no-op.
tester.jobCompletion(component).application(app2).nextBuildNumber().uploadArtifact(applicationPackage).submit();
@@ -469,14 +456,6 @@ public class ControllerTest {
tester.jobCompletion(component).application(app3).nextBuildNumber().uploadArtifact(applicationPackage).submit();
tester.deployAndNotify(app3, applicationPackage2, true, systemTest);
- assertEquals(2, deploymentQueue.jobs().size());
-
- // app1: 4 hours pass in total, staging-test job for app1 is re-queued by periodic trigger mechanism and added at the
- // front of the queue
- tester.clock().advance(Duration.ofHours(3));
- tester.clock().advance(Duration.ofMinutes(50));
- tester.readyJobTrigger().maintain();
-
assertEquals(Collections.singletonList(new BuildService.BuildJob(project1, stagingTest.jobName())), deploymentQueue.takeJobsToRun());
assertEquals(Collections.singletonList(new BuildService.BuildJob(project2, stagingTest.jobName())), deploymentQueue.takeJobsToRun());
assertEquals(Collections.singletonList(new BuildService.BuildJob(project3, stagingTest.jobName())), deploymentQueue.takeJobsToRun());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
index 92bf22df535..093c2dc8d50 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
@@ -275,7 +275,7 @@ public class DeploymentTester {
assertEquals(job.jobName(), buildJob.jobName());
}
if (expectOnlyTheseJobs)
- assertEquals(jobs.length, countJobsOf(application));
+ assertEquals("Unexpected job queue: " + jobsOf(application), jobs.length, jobsOf(application).size());
deploymentQueue().removeJobs(application.id());
}
@@ -286,15 +286,17 @@ public class DeploymentTester {
throw new IllegalArgumentException(jobType + " is not scheduled for " + application);
}
- private int countJobsOf(Application application) {
- return (int) deploymentQueue().jobs().stream()
- .filter(job -> job.projectId() == application.deploymentJobs().projectId().get())
- .count();
+ private List<JobType> jobsOf(Application application) {
+ return deploymentQueue().jobs().stream()
+ .filter(job -> job.projectId() == application.deploymentJobs().projectId().get())
+ .map(buildJob -> JobType.fromJobName(buildJob.jobName()))
+ .collect(Collectors.toList());
}
private void notifyJobCompletion(DeploymentJobs.JobReport report) {
clock().advance(Duration.ofMillis(1));
applications().notifyJobCompletion(report);
+ applications().deploymentTrigger().triggerReadyJobs();
}
public static ApplicationPackage applicationPackage(String upgradePolicy) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index ce765249b97..e3a71b40be0 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -69,14 +69,7 @@ public class DeploymentTriggerTest {
// system-test fails and is retried
tester.deployAndNotify(app, applicationPackage, false, JobType.systemTest);
- assertEquals("Retried immediately", 1, tester.deploymentQueue().jobs().size());
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(app, applicationPackage, false, JobType.systemTest);
- tester.clock().advance(Duration.ofHours(1));
- assertEquals("Nothing scheduled", 0, tester.deploymentQueue().jobs().size());
- tester.readyJobTrigger().maintain(); // Causes retry of systemTests
-
- assertEquals("Scheduled retry", 1, tester.deploymentQueue().jobs().size());
+ assertEquals("Job is retried on failure", 1, tester.deploymentQueue().jobs().size());
tester.deployAndNotify(app, applicationPackage, true, JobType.systemTest);
// staging-test times out and is retried
@@ -390,28 +383,22 @@ public class DeploymentTriggerTest {
tester.upgradeSystem(version1);
tester.completeUpgradeWithError(application, version1, applicationPackage, productionEuWest1);
- // Exhaust the retry, so productionEuWest1 is no longer running.
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(application, Optional.empty(), false, true, productionEuWest1);
- assertTrue(tester.deploymentQueue().jobs().isEmpty());
-
// Deploy the new application version, even though the platform version is already deployed in us-central-1.
// Let it fail in us-central-1 after deployment, so we can test this zone is later skipped.
- tester.completeDeploymentWithError(application, applicationPackage, BuildJob.defaultBuildNumber + 1, productionUsCentral1);
+ tester.jobCompletion(component).application(application).nextBuildNumber().uploadArtifact(applicationPackage).submit();
+ tester.deployAndNotify(application, applicationPackage, true, false, systemTest);
+ tester.deployAndNotify(application, applicationPackage, true, false, stagingTest);
+ tester.jobCompletion(productionEuWest1).application(application).unsuccessful().submit();
tester.deploy(productionUsCentral1, application, Optional.empty(), false);
+ // Deploying before notifying here makes the job not re-trigger, but instead triggers the next job (because of triggerReadyJobs() in notification.)
+ tester.deployAndNotify(application, applicationPackage, false, productionUsCentral1);
assertEquals(ApplicationVersion.from(BuildJob.defaultSourceRevision, BuildJob.defaultBuildNumber + 1),
app.get().deployments().get(ZoneId.from("prod.us-central-1")).applicationVersion());
- // Exhaust the automatic retry.
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(application, Optional.empty(), false, true, productionUsCentral1);
- assertTrue(tester.deploymentQueue().jobs().isEmpty());
-
- // Let the ReadyJobTrigger get what it thinks is the next job -- should be the last job.
- tester.readyJobTrigger().maintain();
assertEquals(Collections.singletonList(new BuildService.BuildJob(1, productionEuWest1.jobName())),
tester.deploymentQueue().jobs());
+
tester.deploy(productionEuWest1, application, Optional.empty(), false);
tester.deployAndNotify(application, Optional.empty(), false, true, productionEuWest1);
assertFalse(app.get().change().isPresent());
@@ -438,11 +425,6 @@ public class DeploymentTriggerTest {
tester.deployAndNotify(application, Optional.empty(), false, true, productionUsCentral1);
tester.deploy(productionUsCentral1, application, Optional.empty(), false);
- // Exhaust the automatic retry.
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(application, Optional.empty(), false, true, productionUsCentral1);
- assertTrue(tester.deploymentQueue().jobs().isEmpty());
-
ApplicationVersion appVersion1 = ApplicationVersion.from(BuildJob.defaultSourceRevision, BuildJob.defaultBuildNumber + 1);
assertEquals(appVersion1, app.get().deployments().get(ZoneId.from("prod.us-central-1")).applicationVersion());
@@ -450,13 +432,14 @@ public class DeploymentTriggerTest {
tester.deploymentTrigger().cancelChange(application.id(), true);
assertEquals(Change.of(appVersion1), app.get().change());
- // Now cancel the change -- this should not normally happen.
+ // Now cancel the change as is done through the web API.
tester.deploymentTrigger().cancelChange(application.id(), false);
assertEquals(Change.empty(), app.get().change());
// A new version is released, which should now deploy the currently deployed application version to avoid downgrades.
Version version1 = new Version("6.2");
tester.upgradeSystem(version1);
+ tester.jobCompletion(productionUsCentral1).application(application).unsuccessful().submit();
tester.completeUpgrade(application, version1, applicationPackage);
assertEquals(appVersion1, app.get().deployments().get(ZoneId.from("prod.us-central-1")).applicationVersion());
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
index 092cdcd6984..94f68f04580 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
@@ -23,6 +23,7 @@ import java.util.Collections;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.component;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionUsEast3;
+import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.systemTest;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -68,14 +69,13 @@ public class FailureRedeployerTest {
// Another version is released, which cancels any pending upgrades to lower versions
version = Version.fromString("5.2");
tester.updateVersionStatus(version);
- tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); // Finish previous production job.
tester.upgrader().maintain();
assertEquals("Application starts upgrading to new version", 1, tester.deploymentQueue().jobs().size());
assertEquals("Application has pending upgrade to " + version, version, tester.application(app.id()).change().platform().get());
- // Failure redeployer does not retry failing job for prod.us-east-3 as there's an ongoing deployment
+ // Failure re-deployer does not retry failing job for prod.us-east-3, since it no longer has an available change
tester.clock().advance(Duration.ofMinutes(1));
- tester.readyJobTrigger().maintain();
+ tester.jobCompletion(DeploymentJobs.JobType.productionUsEast3).application(app).unsuccessful().submit();
assertFalse("Job is not retried", tester.deploymentQueue().jobs().stream()
.anyMatch(j -> j.jobName().equals(DeploymentJobs.JobType.productionUsEast3.jobName())));
@@ -83,16 +83,8 @@ public class FailureRedeployerTest {
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest);
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest);
- // Production job fails again and exhausts all immediate retries
+ // Production job fails again, and is retried
tester.deployAndNotify(app, applicationPackage, false, DeploymentJobs.JobType.productionUsEast3);
- tester.deploymentQueue().takeJobsToRun();
- tester.clock().advance(Duration.ofMinutes(10));
- tester.jobCompletion(DeploymentJobs.JobType.productionUsEast3).application(app).unsuccessful().submit();
- assertTrue("Retries exhausted", tester.deploymentQueue().jobs().isEmpty());
- assertTrue("Failure is recorded", tester.application(app.id()).deploymentJobs().hasFailures());
-
- // Failure redeployer retries job
- tester.clock().advance(Duration.ofMinutes(5));
tester.readyJobTrigger().maintain();
assertEquals("Job is retried", Collections.singletonList(new BuildService.BuildJob(app.deploymentJobs().projectId().get(), productionUsEast3.jobName())), tester.deploymentQueue().jobs());
@@ -156,27 +148,21 @@ public class FailureRedeployerTest {
tester.upgrader().maintain();
assertEquals("Application has pending upgrade to " + version, version, tester.application(app.id()).change().platform().get());
- // system-test fails and exhausts all immediate retries
+ // system-test fails and is left with a retry
tester.deployAndNotify(app, applicationPackage, false, DeploymentJobs.JobType.systemTest);
- tester.deploymentQueue().takeJobsToRun();
- tester.clock().advance(Duration.ofMinutes(10));
- tester.jobCompletion(DeploymentJobs.JobType.systemTest).application(app).unsuccessful().submit();
- assertTrue("Retries exhausted", tester.deploymentQueue().jobs().isEmpty());
// Another version is released
version = Version.fromString("5.2");
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
+
+ // Job is left "running", so needs to time out before it can be retried.
+ tester.clock().advance(Duration.ofHours(13));
tester.upgrader().maintain();
assertEquals("Application has pending upgrade to " + version, version, tester.application(app.id()).change().platform().get());
- // Consume system-test job for 5.2
- tester.deploymentQueue().takeJobsToRun();
-
- // Failure re-deployer does not retry failing system-test job as it failed for an older change
- tester.clock().advance(Duration.ofMinutes(5));
- tester.readyJobTrigger().maintain();
- assertTrue("No jobs retried", tester.deploymentQueue().jobs().isEmpty());
+ // Cancellation of outdated version and triggering on a new version is done by the upgrader.
+ assertEquals(version, tester.application(app.id()).deploymentJobs().jobStatus().get(systemTest).lastTriggered().get().version());
}
@Test
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 021fe0954c2..5a8a4721a09 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -21,6 +21,8 @@ import java.time.Duration;
import java.time.Instant;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.component;
+import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionEuWest1;
+import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionUsEast3;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionUsWest1;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.stagingTest;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.systemTest;
@@ -99,23 +101,14 @@ public class UpgraderTest {
assertEquals("New system version: Should upgrade Canaries", 2, tester.deploymentQueue().jobs().size());
tester.completeUpgradeWithError(canary0, version, "canary", DeploymentJobs.JobType.stagingTest);
assertEquals("Other Canary was cancelled", 2, tester.deploymentQueue().jobs().size());
- // TODO: Cancelled would mean it was triggerd, removed from the build system, but never reported in.
- // Thus, the expected number of jobs should be 1, above: the retrying canary0.
- // Further, canary1 should be retried after the timeout period of 12 hours, but verifying this is
- // not possible when jobs are consumed form the build system on notification, rather than on deploy.
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
assertEquals("Version broken, but Canaries should keep trying", 2, tester.deploymentQueue().jobs().size());
- // Exhaust canary retries.
- tester.jobCompletion(systemTest).application(canary1).unsuccessful().submit();
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(canary0, DeploymentTester.applicationPackage("canary"), false, DeploymentJobs.JobType.stagingTest);
- tester.jobCompletion(systemTest).application(canary1).unsuccessful().submit();
-
// --- A new version is released - which repairs the Canary app and fails a default
+ tester.clock().advance(Duration.ofHours(13));
version = Version.fromString("5.3");
tester.updateVersionStatus(version);
assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
@@ -147,13 +140,16 @@ public class UpgraderTest {
assertEquals("Upgrade with error should retry", 1, tester.deploymentQueue().jobs().size());
- // Finish previous run, with exhausted retry.
- tester.clock().advance(Duration.ofHours(1));
- tester.jobCompletion(stagingTest).application(default0).unsuccessful().submit();
// --- Failing application is repaired by changing the application, causing confidence to move above 'high' threshold
// Deploy application change
- tester.deployCompletely(tester.application("default0"), DeploymentTester.applicationPackage("default"), BuildJob.defaultBuildNumber + 1);
+ tester.deploymentQueue().takeJobsToRun();
+ tester.jobCompletion(component).application(default0).nextBuildNumber().uploadArtifact(DeploymentTester.applicationPackage("default")).submit();
+ tester.jobCompletion(stagingTest).application(default0).unsuccessful().submit();
+ tester.deployAndNotify(default0, "default", true, systemTest);
+ tester.deployAndNotify(default0, "default", true, stagingTest);
+ tester.deployAndNotify(default0, "default", true, productionUsWest1);
+ tester.deployAndNotify(default0, "default", true, productionUsEast3);
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.high, tester.controller().versionStatus().systemVersion().get().confidence());
@@ -206,12 +202,10 @@ public class UpgraderTest {
tester.completeUpgrade(default2, version54, "default");
tester.completeUpgradeWithError(default3, version54, "default", DeploymentJobs.JobType.stagingTest);
- // Exhaust immediate retries for upgrade
- tester.clock().advance(Duration.ofHours(1));
- tester.jobCompletion(stagingTest).application(default3).unsuccessful().submit();
tester.completeUpgradeWithError(default4, version54, "default", DeploymentJobs.JobType.productionUsWest1);
// State: Default applications started upgrading to 5.5
+ tester.clock().advance(Duration.ofHours(13));
tester.upgrader().maintain();
tester.completeUpgradeWithError(default0, version55, "default", DeploymentJobs.JobType.stagingTest);
tester.completeUpgradeWithError(default1, version55, "default", DeploymentJobs.JobType.stagingTest);
@@ -325,12 +319,8 @@ public class UpgraderTest {
// system-test completes successfully
tester.deployAndNotify(app, applicationPackage, true, systemTest);
- // staging-test fails multiple times, exhausts retries and failure is recorded
+ // staging-test fails and failure is recorded
tester.deployAndNotify(app, applicationPackage, false, DeploymentJobs.JobType.stagingTest);
- tester.deploymentQueue().takeJobsToRun();
- tester.clock().advance(Duration.ofMinutes(10));
- tester.jobCompletion(stagingTest).application(app).unsuccessful().submit();
- assertTrue("Retries exhausted", tester.deploymentQueue().jobs().isEmpty());
assertTrue("Failure is recorded", tester.application(app.id()).deploymentJobs().hasFailures());
assertTrue("Application has pending change", tester.application(app.id()).change().isPresent());
@@ -752,8 +742,7 @@ public class UpgraderTest {
tester.upgrader().maintain();
- // Exhaust retries and finish runs
- tester.clock().advance(Duration.ofHours(1));
+ // Finish runs
tester.jobCompletion(systemTest).application(default0).unsuccessful().submit();
tester.jobCompletion(systemTest).application(default1).unsuccessful().submit();
tester.jobCompletion(systemTest).application(default2).unsuccessful().submit();
@@ -863,10 +852,6 @@ public class UpgraderTest {
tester.deployAndNotify(app, applicationPackage, true, systemTest);
tester.deployAndNotify(app, applicationPackage, true, stagingTest);
-
- // Production job fails and exhausts retries, new application changes are now accepted
- tester.deployAndNotify(app, applicationPackage, false, productionUsWest1);
- tester.clock().advance(Duration.ofHours(1));
tester.deployAndNotify(app, applicationPackage, false, productionUsWest1);
// New application change
@@ -880,8 +865,9 @@ public class UpgraderTest {
app.change().application().get().id().equals(applicationVersion));
// Deployment completes
- tester.deployAndNotify(app, applicationPackage, true, systemTest);
- tester.deployAndNotify(app, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(app, applicationPackage, true, false, systemTest);
+ tester.deployAndNotify(app, applicationPackage, true, false, stagingTest);
+ tester.jobCompletion(productionUsWest1).application(app).unsuccessful().submit();
tester.deployAndNotify(app, applicationPackage, true, productionUsWest1);
assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java
index b810c3adeb5..8e60e63e873 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java
@@ -126,6 +126,7 @@ public class ContainerControllerTester {
throw new RuntimeException(e);
}
controller().applications().notifyJobCompletion(jobReport);
+ controller().applications().deploymentTrigger().triggerReadyJobs();
}
private AthenzDomain addTenantAthenzDomain(String domainName, String userName) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
index 657a8dbb091..b9eef2069d9 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.restapi.application;
+import com.google.common.base.Functions;
import com.yahoo.application.container.handler.Request;
import com.yahoo.application.container.handler.Response;
import com.yahoo.component.Version;
@@ -791,7 +792,7 @@ public class ApplicationApiTest extends ControllerContainerTest {
Version vespaVersion = new Version("6.1"); // system version from mock config server client
- BuildJob job = new BuildJob(this::notifyCompletion, tester.artifactRepository())
+ BuildJob job = new BuildJob(report -> notifyCompletion(report, tester), tester.artifactRepository())
.application(app)
.projectId(projectId);
job.type(DeploymentJobs.JobType.component).uploadArtifact(applicationPackage).submit();
@@ -845,7 +846,7 @@ public class ApplicationApiTest extends ControllerContainerTest {
.build();
// Report job failing with out of capacity
- BuildJob job = new BuildJob(this::notifyCompletion, tester.artifactRepository())
+ BuildJob job = new BuildJob(report -> notifyCompletion(report, tester), tester.artifactRepository())
.application(app)
.projectId(projectId);
job.type(DeploymentJobs.JobType.component).uploadArtifact(applicationPackage).submit();
@@ -865,12 +866,13 @@ public class ApplicationApiTest extends ControllerContainerTest {
assertEquals(DeploymentJobs.JobError.outOfCapacity, jobStatus.jobError().get());
}
- private void notifyCompletion(DeploymentJobs.JobReport report) {
+ private void notifyCompletion(DeploymentJobs.JobReport report, ContainerControllerTester tester) {
assertResponse(request("/application/v4/tenant/tenant1/application/application1/jobreport", POST)
.userIdentity(HOSTED_VESPA_OPERATOR)
.data(asJson(report))
.get(),
200, "{\"message\":\"ok\"}");
+ tester.controller().applications().deploymentTrigger().triggerReadyJobs();
}
private static byte[] asJson(DeploymentJobs.JobReport report) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
index 27e26e3267a..14f5d00ec88 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
@@ -156,12 +156,6 @@ public class VersionStatusTest {
assertEquals("One canary failed: Broken",
Confidence.broken, confidence(tester.controller(), version1));
- // Finish running jobs
- tester.deployAndNotify(canary2, DeploymentTester.applicationPackage("canary"), false, systemTest);
- tester.clock().advance(Duration.ofHours(1));
- tester.deployAndNotify(canary1, DeploymentTester.applicationPackage("canary"), false, productionUsWest1);
- tester.deployAndNotify(canary2, DeploymentTester.applicationPackage("canary"), false, systemTest);
-
// New version is released
Version version2 = new Version("5.2");
tester.upgradeSystem(version2);
@@ -170,6 +164,7 @@ public class VersionStatusTest {
// All canaries upgrade successfully
tester.completeUpgrade(canary0, version2, "canary");
+ tester.jobCompletion(productionUsWest1).application(canary1).unsuccessful().submit();
tester.completeUpgrade(canary1, version2, "canary");
assertEquals("Confidence for remains unchanged for version1: Broken",
@@ -178,6 +173,7 @@ public class VersionStatusTest {
Confidence.low, confidence(tester.controller(), version2));
// Remaining canary upgrades to version2 which raises confidence to normal and more apps upgrade
+ tester.jobCompletion(systemTest).application(canary2).unsuccessful().submit();
tester.completeUpgrade(canary2, version2, "canary");
tester.upgradeSystem(version2);
assertEquals("Canaries have upgraded: Normal",