summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2021-08-19 15:01:58 +0200
committerJon Marius Venstad <venstad@gmail.com>2021-08-19 15:01:58 +0200
commitfa9cb345a17bcedfae450022d50cef687e4482c4 (patch)
tree58c5226c15124295357b6fe811ca9fb23416762c /controller-server
parenta36f3ee1bedc1917e8ff1aeddcaa3b81f6dfe54e (diff)
Abort running jobs less eagerly
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java51
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java31
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java1
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java2
7 files changed, 49 insertions, 46 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 30e731853f2..2dbf910b49e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -105,16 +105,6 @@ public class DeploymentTrigger {
instance = instance.withChange(instance.change().with(outstanding.application().get()));
return instance.withChange(remainingChange(instance, status));
});
-
- // Abort irrelevant, running jobs to get new application out faster.
- Map<JobId, List<Versions>> newJobsToRun = jobs.deploymentStatus(application.get()).jobsToRun();
- for (Run run : jobs.active(application.get().id().instance(instanceName))) {
- if ( ! run.id().type().environment().isManuallyDeployed()
- && newJobsToRun.getOrDefault(run.id().job(), List.of()).stream()
- .noneMatch(versions -> versions.targetsMatch(run.versions())
- && versions.sourcesMatchIfPresent(run.versions())))
- jobs.abort(run.id());
- }
}
}
applications().store(application);
@@ -303,24 +293,22 @@ public class DeploymentTrigger {
.collect(toList());
}
- /**
- * Finds the next step to trigger for the given application, if any, and returns these as a list.
- */
+ /** Finds the next step to trigger for the given application, if any, and returns these as a list. */
private List<Job> computeReadyJobs(DeploymentStatus status) {
List<Job> jobs = new ArrayList<>();
status.jobsToRun().forEach((job, versionsList) -> {
- for (Versions versions : versionsList)
- status.jobSteps().get(job).readyAt(status.application().require(job.application().instance()).change())
- .filter(readyAt -> ! clock.instant().isBefore(readyAt))
- .filter(__ -> ! status.jobs().get(job).get().isRunning())
- .filter(__ -> ! (job.type().isProduction() && isUnhealthyInAnotherZone(status.application(), job)))
- .ifPresent(readyAt -> {
- jobs.add(deploymentJob(status.application().require(job.application().instance()),
- versions,
- job.type(),
- status.instanceJobs(job.application().instance()).get(job.type()),
- readyAt));
- });
+ for (Versions versions : versionsList)
+ status.jobSteps().get(job).readyAt(status.application().require(job.application().instance()).change())
+ .filter(readyAt -> ! clock.instant().isBefore(readyAt))
+ .filter(__ -> ! (job.type().isProduction() && isUnhealthyInAnotherZone(status.application(), job)))
+ .filter(__ -> abortIfRunning(versionsList, status.jobs().get(job).get())) // Abort and trigger this later if running with outdated parameters.
+ .ifPresent(readyAt -> {
+ jobs.add(deploymentJob(status.application().require(job.application().instance()),
+ versions,
+ job.type(),
+ status.instanceJobs(job.application().instance()).get(job.type()),
+ readyAt));
+ });
});
return Collections.unmodifiableList(jobs);
}
@@ -335,6 +323,19 @@ public class DeploymentTrigger {
return false;
}
+ /** Returns whether the job is not running, and also aborts it if it's running with outdated versions. */
+ private boolean abortIfRunning(List<Versions> versionsList, JobStatus status) {
+ if ( ! status.isRunning())
+ return true;
+
+ Run last = status.lastTriggered().get();
+ if (versionsList.stream().noneMatch(versions -> versions.targetsMatch(last.versions())
+ && versions.sourcesMatchIfPresent(last.versions())))
+ controller.jobController().abort(last.id());
+
+ return false;
+ }
+
// ---------- Change management o_O ----------
private boolean acceptNewApplicationVersion(DeploymentStatus status, InstanceName instance) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
index e52d1900a9d..86b9370150c 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
@@ -114,7 +114,7 @@ public class ControllerTest {
context.runJob(stagingTest);
// production job succeeding now
- context.jobAborted(productionUsWest1);
+ context.triggerJobs().jobAborted(productionUsWest1);
context.runJob(productionUsWest1);
// causes triggering of next production job
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 70a967ecef9..d4c9425fc03 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -127,7 +127,7 @@ public class DeploymentTriggerTest {
assertEquals(upgrade.with(app.lastSubmission().get()), app.instance().change());
}
- @Test
+ @Test
public void abortsJobsOnNewApplicationChange() {
var app = tester.newDeploymentContext();
app.submit()
@@ -141,15 +141,15 @@ public class DeploymentTriggerTest {
app.submit();
assertTrue(tester.jobs().active(id).isPresent());
+ tester.triggerJobs();
tester.runner().run();
- assertFalse(tester.jobs().active(id).isPresent());
+ assertTrue(tester.jobs().active(id).isPresent()); // old run
+ app.runJob(systemTest).runJob(stagingTest).runJob(stagingTest); // outdated run is aborted when otherwise blocking a new run
tester.triggerJobs();
- assertEquals(EnumSet.of(systemTest, stagingTest), tester.jobs().active().stream()
- .map(run -> run.id().type())
- .collect(Collectors.toCollection(() -> EnumSet.noneOf(JobType.class))));
+ app.jobAborted(productionUsCentral1);
- app.deploy();
+ app.runJob(productionUsCentral1).runJob(productionUsWest1).runJob(productionUsEast3);
assertEquals(Change.empty(), app.instance().change());
tester.controllerTester().upgradeSystem(new Version("8.9"));
@@ -159,6 +159,8 @@ public class DeploymentTriggerTest {
// Jobs are not aborted when the new submission remains outstanding.
app.submit();
+ app.runJob(systemTest).runJob(stagingTest);
+ tester.triggerJobs();
tester.runner().run();
assertEquals(EnumSet.of(productionUsCentral1), tester.jobs().active().stream()
.map(run -> run.id().type())
@@ -489,8 +491,9 @@ public class DeploymentTriggerTest {
Version version2 = Version.fromString("7.2");
tester.controllerTester().upgradeSystem(version2);
tester.upgrader().maintain();
- app1.runJob(systemTest).runJob(stagingTest) // tests for previous version — these are "reused" later.
- .runJob(systemTest).runJob(stagingTest).timeOutConvergence(productionUsCentral1);
+ tester.triggerJobs();
+ app1.jobAborted(systemTest).jobAborted(stagingTest);
+ app1.runJob(systemTest).runJob(stagingTest).timeOutConvergence(productionUsCentral1);
assertEquals(version2, app1.deployment(productionUsCentral1.zone(main)).version());
Instant triggered = app1.instanceJobs().get(productionUsCentral1).lastTriggered().get().start();
tester.clock().advance(Duration.ofHours(1));
@@ -502,7 +505,8 @@ public class DeploymentTriggerTest {
assertEquals("Change becomes latest non-broken version", Change.of(version1), app1.instance().change());
// version1 proceeds 'til the last job, where it fails; us-central-1 is skipped, as current change is strictly dominated by what's deployed there.
- app1.failDeployment(productionEuWest1);
+ app1.runJob(systemTest).runJob(stagingTest)
+ .failDeployment(productionEuWest1);
assertEquals(triggered, app1.instanceJobs().get(productionUsCentral1).lastTriggered().get().start());
// Roll out a new application version, which gives a dual change -- this should trigger us-central-1, but only as long as it hasn't yet deployed there.
@@ -569,7 +573,7 @@ public class DeploymentTriggerTest {
app.runJob(systemTest).runJob(stagingTest);
// Finish old run of the aborted production job.
- app.jobAborted(productionUsEast3);
+ app.triggerJobs().jobAborted(productionUsEast3);
// New upgrade is already tested for both jobs.
@@ -722,16 +726,15 @@ public class DeploymentTriggerTest {
// Finish deployment for apps 2 and 3, then release a new version, leaving only app1 with an application upgrade.
app2.deploy();
app3.deploy();
- app1.assertRunning(stagingTest);
- assertEquals(1, tester.jobs().active().size());
+ app1.runJob(stagingTest);
+ assertEquals(0, tester.jobs().active().size());
tester.controllerTester().upgradeSystem(new Version("6.2"));
tester.upgrader().maintain();
app1.submit(applicationPackage);
- app1.jobAborted(stagingTest);
// Tests for app1 trigger before the others since it carries an application upgrade.
- tester.readyJobsTrigger().maintain();
+ tester.readyJobsTrigger().run();
app1.assertRunning(systemTest);
app1.assertRunning(stagingTest);
assertEquals(2, tester.jobs().active().size());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 326f4bf311e..91b5dc232e5 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -167,7 +167,7 @@ public class UpgraderTest {
// --- Failing application is repaired by changing the application, causing confidence to move above 'high' threshold
// Deploy application change
default0.submit(applicationPackage("default"));
- default0.jobAborted(stagingTest);
+ default0.triggerJobs().jobAborted(stagingTest);
default0.deploy();
tester.controllerTester().computeVersionStatus();
@@ -232,7 +232,7 @@ public class UpgraderTest {
// State: Default applications started upgrading to version5
tester.clock().advance(Duration.ofHours(1));
tester.upgrader().maintain();
- default3.failDeployment(stagingTest);
+ default3.triggerJobs().jobAborted(stagingTest);
default0.runJob(systemTest)
.failDeployment(stagingTest);
default1.runJob(systemTest)
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
index ebfd78c2764..8a182ce5eca 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java
@@ -84,7 +84,6 @@ import java.io.File;
import java.math.BigInteger;
import java.net.URI;
import java.security.cert.X509Certificate;
-import java.time.Duration;
import java.time.Instant;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json
index 55be0881ec2..2813bd0ab7d 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json
@@ -413,7 +413,7 @@
"id": 1,
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-west-1/run/1",
"start": "(ignore)",
- "status": "aborted",
+ "status": "running",
"versions": {
"targetPlatform": "6.1.0",
"targetApplication": {
@@ -469,7 +469,7 @@
"id": 2,
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-east-3/run/2",
"start": "(ignore)",
- "status": "aborted",
+ "status": "running",
"versions": {
"targetPlatform": "6.1.0",
"targetApplication": {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
index 03002aee955..e3fef9f9066 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
@@ -304,7 +304,7 @@ public class VersionStatusTest {
Confidence.low, confidence(tester.controller(), version2));
// Remaining canary upgrades to version2 which raises confidence to normal and more apps upgrade
- canary2.failDeployment(systemTest);
+ canary2.triggerJobs().jobAborted(systemTest).jobAborted(stagingTest);
canary2.runJob(stagingTest);
canary2.deployPlatform(version2);
tester.controllerTester().computeVersionStatus();