summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2019-11-08 12:17:16 +0100
committerJon Marius Venstad <venstad@gmail.com>2019-11-11 08:28:01 +0100
commitea83ed12a7583335c77329be9afd415849263354 (patch)
tree9e9b4034971ae3f270382d11a2d0a41397719538 /controller-server
parent4faca41f28583c4fd6d8d27e8cc22f0d5071bd35 (diff)
Consider all known runs when checking whether versions are verified
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java81
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java21
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java43
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java14
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java10
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java15
6 files changed, 106 insertions, 78 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index f1b93c7b3b2..e7d70793898 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -205,8 +205,9 @@ public class DeploymentTrigger {
controller.systemVersion());
String reason = "Job triggered manually by " + user;
var jobStatus = jobs.deploymentStatus(application).instanceJobs(instance.name());
- return (jobType.isProduction() && ! isTested(jobStatus, versions)
- ? testJobs(application.deploymentSpec(), application.change(), instance, jobStatus, versions, reason, clock.instant(), __ -> true).stream()
+ var jobList = JobList.from(jobStatus.values());
+ return (jobType.isProduction() && ! isTested(jobList, versions)
+ ? testJobs(application.deploymentSpec(), application.change(), instance, jobList, versions, reason, clock.instant(), __ -> true).stream()
: Stream.of(deploymentJob(instance, versions, application.change(), jobType, jobStatus.get(jobType), reason, clock.instant())))
.peek(this::trigger)
.map(Job::jobType).collect(toList());
@@ -301,10 +302,11 @@ public class DeploymentTrigger {
DeploymentStatus deploymentStatus = this.jobs.deploymentStatus(application);
for (Instance instance : instances) {
var jobStatus = deploymentStatus.instanceJobs(instance.name());
+ var jobList = JobList.from(jobStatus.values());
Change change = application.change();
- Optional<Instant> completedAt = max(Optional.ofNullable(jobStatus.get(systemTest))
+ Optional<Instant> completedAt = max(jobList.type(systemTest).first()
.<Instant>flatMap(job -> job.lastSuccess().map(run -> run.end().get())),
- Optional.ofNullable(jobStatus.get(stagingTest))
+ jobList.type(stagingTest).first()
.<Instant>flatMap(job -> job.lastSuccess().map(run -> run.end().get())));
String reason = "New change available";
List<Job> testJobs = null; // null means "uninitialised", while empty means "don't run any jobs".
@@ -318,17 +320,14 @@ public class DeploymentTrigger {
for (JobType job : remainingJobs) {
Versions versions = Versions.from(change, application, deploymentFor(instance, job),
controller.systemVersion());
- if (isTested(jobStatus, versions)) {
- if (completedAt.isPresent() && canTrigger(job, jobStatus, versions, instance, application.deploymentSpec(), stepJobs)) {
+ if (isTested(jobList, versions)) {
+ if (completedAt.isPresent() && canTrigger(job, jobList, versions, instance, application.deploymentSpec(), stepJobs)) {
jobs.add(deploymentJob(instance, versions, change, job, jobStatus.get(job), reason, completedAt.get()));
}
- if ( ! alreadyTriggered(jobStatus, versions) && testJobs == null) {
- testJobs = emptyList();
- }
}
else if (testJobs == null) {
testJobs = testJobs(application.deploymentSpec(),
- change, instance, jobStatus, versions,
+ change, instance, jobList, versions,
String.format("Testing deployment for %s (%s)",
job.jobName(), versions.toString()),
completedAt.orElseGet(clock::instant));
@@ -349,7 +348,7 @@ public class DeploymentTrigger {
}
}
if (testJobs == null) { // If nothing to test, but outstanding commits, test those.
- testJobs = testJobs(application.deploymentSpec(), change, instance, jobStatus,
+ testJobs = testJobs(application.deploymentSpec(), change, instance, jobList,
Versions.from(application.outstandingChange().onTopOf(change),
application,
steps.sortedDeployments(instance.productionDeployments().values()).stream().findFirst(),
@@ -363,21 +362,22 @@ public class DeploymentTrigger {
}
/** Returns whether given job should be triggered */
- private boolean canTrigger(JobType job, Map<JobType, JobStatus> status, Versions versions, Instance instance, DeploymentSpec deploymentSpec, List<JobType> parallelJobs) {
- if (status.get(job).isRunning()) return false;
+ private boolean canTrigger(JobType type, JobList jobList, Versions versions, Instance instance, DeploymentSpec deploymentSpec, List<JobType> parallelJobs) {
+ if ( ! jobList.type(type).running().isEmpty()) return false;
// Are we already running jobs which are not in the set which can run in parallel with this?
- if (parallelJobs != null && ! parallelJobs.containsAll(runningProductionJobs(status))) return false;
+ if ( parallelJobs != null
+ && ! parallelJobs.containsAll(jobList.running().production().mapToList(job -> job.id().type()))) return false;
// Are there another suspended deployment such that we shouldn't simultaneously change this?
- if (job.isProduction() && isSuspendedInAnotherZone(instance, job.zone(controller.system()))) return false;
+ if (type.isProduction() && isSuspendedInAnotherZone(instance, type.zone(controller.system()))) return false;
- return triggerAt(clock.instant(), job, status.get(job), versions, instance, deploymentSpec);
+ return triggerAt(clock.instant(), type, jobList.type(type).first().get(), versions, instance, deploymentSpec);
}
/** Returns whether given job should be triggered */
- private boolean canTrigger(JobType job, Map<JobType, JobStatus> status, Versions versions, Instance instance, DeploymentSpec deploymentSpec) {
- return canTrigger(job, status, versions, instance, deploymentSpec, null);
+ private boolean canTrigger(JobType job, JobList jobList, Versions versions, Instance instance, DeploymentSpec deploymentSpec) {
+ return canTrigger(job, jobList, versions, instance, deploymentSpec, null);
}
private boolean isSuspendedInAnotherZone(Instance instance, ZoneId zone) {
@@ -466,28 +466,10 @@ public class DeploymentTrigger {
return change.downgrades(deployment.version()) || change.downgrades(deployment.applicationVersion());
}
- private boolean isTested(Map<JobType, JobStatus> status, Versions versions) {
- return testedIn(systemTest, status.get(systemTest), versions)
- && testedIn(stagingTest, status.get(stagingTest), versions)
- || alreadyTriggered(status, versions);
- }
-
- public boolean testedIn(JobType testType, JobStatus status, Versions versions) {
- if (testType == systemTest)
- return successOn(status, versions).isPresent();
- if (testType == stagingTest)
- return successOn(status, versions).map(Run::versions).filter(versions::sourcesMatchIfPresent).isPresent();
- throw new IllegalArgumentException(testType + " is not a test job!");
- }
-
- public boolean alreadyTriggered(Map<JobType, JobStatus> status, Versions versions) {
- return status.values().stream()
- .filter(job -> job.id().type().isProduction())
- .anyMatch(job -> job.lastTriggered()
- .map(Run::versions)
- .filter(versions::targetsMatch)
- .filter(versions::sourcesMatchIfPresent)
- .isPresent());
+ public boolean isTested(JobList jobs, Versions versions) {
+ return ! jobs.type(systemTest).successOn(versions).isEmpty()
+ && ! jobs.type(stagingTest).successOn(versions).isEmpty()
+ || ! jobs.production().triggeredOn(versions).isEmpty();
}
// ---------- Change management o_O ----------
@@ -527,23 +509,22 @@ public class DeploymentTrigger {
/**
* Returns the list of test jobs that should run now, and that need to succeed on the given versions for it to be considered tested.
*/
- private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, Map<JobType, JobStatus> status, Versions versions,
+ private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, JobList jobList, Versions versions,
String reason, Instant availableSince) {
- return testJobs(deploymentSpec, change, instance, status, versions, reason, availableSince,
- jobType -> canTrigger(jobType, status, versions, instance, deploymentSpec));
+ return testJobs(deploymentSpec, change, instance, jobList, versions, reason, availableSince,
+ jobType -> canTrigger(jobType, jobList, versions, instance, deploymentSpec));
}
/**
* Returns the list of test jobs that need to succeed on the given versions for it to be considered tested, filtered by the given condition.
*/
- private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, Map<JobType, JobStatus> status, Versions versions,
+ private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, JobList jobList, Versions versions,
String reason, Instant availableSince, Predicate<JobType> condition) {
List<Job> jobs = new ArrayList<>();
for (JobType jobType : new DeploymentSteps(deploymentSpec.requireInstance(instance.name()), controller::system).testJobs()) { // TODO jonmv: Allow cross-instance validation
- Optional<Run> completion = successOn(status.get(jobType), versions)
- .filter(run -> versions.sourcesMatchIfPresent(run.versions()) || jobType == systemTest);
- if (completion.isEmpty() && condition.test(jobType))
- jobs.add(deploymentJob(instance, versions, change, jobType, status.get(jobType), reason, availableSince));
+ if ( jobList.type(jobType).successOn(versions).isEmpty()
+ && condition.test(jobType))
+ jobs.add(deploymentJob(instance, versions, change, jobType, jobList.type(jobType).first().get(), reason, availableSince));
}
return jobs;
}
@@ -552,8 +533,8 @@ public class DeploymentTrigger {
if (jobStatus.isOutOfCapacity()) reason += "; retrying on out of capacity";
var triggering = JobRun.triggering(versions.targetPlatform(), versions.targetApplication(),
- versions.sourcePlatform(), versions.sourceApplication(),
- reason, clock.instant());
+ versions.sourcePlatform(), versions.sourceApplication(),
+ reason, clock.instant());
return new Job(instance, triggering, jobType, availableSince, jobStatus.isOutOfCapacity(), change.application().isPresent());
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
index 1ef83153bef..56a6bec5777 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
@@ -44,6 +44,10 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> {
return matching(job -> ! job.isSuccess());
}
+ public JobList running() {
+ return matching(job -> job.isRunning());
+ }
+
/** Returns the subset of jobs which must be failing due to an application change */
public JobList failingApplicationChange() {
return matching(JobList::failingApplicationChange);
@@ -55,8 +59,13 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> {
}
/** Returns the subset of jobs of the given type -- most useful when negated */
+ public JobList type(Collection<? extends JobType> types) {
+ return matching(job -> types.contains(job.id().type()));
+ }
+
+ /** Returns the subset of jobs of the given type -- most useful when negated */
public JobList type(JobType... types) {
- return matching(job -> List.of(types).contains(job.id().type()));
+ return type(List.of(types));
}
/** Returns the subset of jobs of which are production jobs */
@@ -64,6 +73,16 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> {
return matching(job -> job.id().type().isProduction());
}
+ /** Returns the jobs with any runs matching the given versions — targets only for system test, everything present otherwise. */
+ public JobList triggeredOn(Versions versions) {
+ return matching(job -> ! RunList.from(job).on(versions).isEmpty());
+ }
+
+ /** Returns the jobs with successful runs matching the given versions — targets only for system test, everything present otherwise. */
+ public JobList successOn(Versions versions) {
+ return matching(job -> ! RunList.from(job).status(RunStatus.success).on(versions).isEmpty());
+ }
+
// ----------------------------------- JobRun filtering
/** Returns the list in a state where the next filter is for the lastTriggered run type */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java
new file mode 100644
index 00000000000..dc17e1c17bb
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java
@@ -0,0 +1,43 @@
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.yahoo.collections.AbstractFilteringList;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * List for filtering deployment job {@link Run}s.
+ *
+ * @author jonmv
+ */
+public class RunList extends AbstractFilteringList<Run, RunList> {
+
+ private RunList(Collection<? extends Run> items, boolean negate) {
+ super(items, negate, RunList::new);
+ }
+
+ public static RunList from(Collection<? extends Run> runs) {
+ return new RunList(runs, false);
+ }
+
+ public static RunList from(JobStatus job) {
+ return from(job.runs().descendingMap().values());
+ }
+
+ /** Returns the jobs with runs matching the given versions — targets only for system test, everything present otherwise. */
+ public RunList on(Versions versions) {
+ return matching(run -> matchingVersions(run, versions));
+ }
+
+ /** Returns the runs with status among the given. */
+ public RunList status(RunStatus... status) {
+ return matching(run -> List.of(status).contains(run.status()));
+ }
+
+ private static boolean matchingVersions(Run run, Versions versions) {
+ return versions.targetsMatch(run.versions())
+ && (versions.sourcesMatchIfPresent(run.versions()) || run.id().type() == JobType.systemTest);
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
index 9a9a9798c6d..ed9612a7343 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
@@ -25,8 +25,10 @@ import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentSteps;
import com.yahoo.vespa.hosted.controller.deployment.JobController;
+import com.yahoo.vespa.hosted.controller.deployment.JobList;
import com.yahoo.vespa.hosted.controller.deployment.JobStatus;
import com.yahoo.vespa.hosted.controller.deployment.Run;
+import com.yahoo.vespa.hosted.controller.deployment.RunList;
import com.yahoo.vespa.hosted.controller.deployment.RunLog;
import com.yahoo.vespa.hosted.controller.deployment.RunStatus;
import com.yahoo.vespa.hosted.controller.deployment.Step;
@@ -239,11 +241,12 @@ class JobControllerApiHandlerHelper {
jobObject.setLong("pausedUntil", until)));
int runs = 0;
Cursor runArray = jobObject.setArray("runs");
+ JobList jobList = JobList.from(status.values());
if (type.isTest()) {
Deque<List<JobType>> pending = new ArrayDeque<>();
pendingProduction.entrySet().stream()
- .filter(typeVersions -> ! controller.applications().deploymentTrigger().testedIn(type, status.get(type), typeVersions.getValue()))
- .filter(typeVersions -> ! controller.applications().deploymentTrigger().alreadyTriggered(status, typeVersions.getValue()))
+ .filter(typeVersions -> jobList.type(type).successOn(typeVersions.getValue()).isEmpty())
+ .filter(typeVersions -> jobList.production().triggeredOn(typeVersions.getValue()).isEmpty())
.collect(groupingBy(Map.Entry::getValue,
LinkedHashMap::new,
Collectors.mapping(Map.Entry::getKey, toList())))
@@ -279,12 +282,13 @@ class JobControllerApiHandlerHelper {
pendingObject.setString("cooldown", "failed");
else {
int pending = 0;
- if ( ! controller.applications().deploymentTrigger().alreadyTriggered(status, versions)) {
- if ( ! controller.applications().deploymentTrigger().testedIn(systemTest, status.get(systemTest), versions)) {
+ controller.applications().deploymentTrigger();
+ if (jobList.production().triggeredOn(versions).isEmpty()) {
+ if (jobList.type(systemTest).successOn(versions).isEmpty()) {
pending++;
pendingObject.setString(shortNameOf(systemTest, controller.system()), statusOf(controller, instance.id(), systemTest, versions));
}
- if ( ! controller.applications().deploymentTrigger().testedIn(stagingTest, status.get(stagingTest), versions)) {
+ if (jobList.type(stagingTest).successOn(versions).isEmpty()) {
pending++;
pendingObject.setString(shortNameOf(stagingTest, controller.system()), statusOf(controller, instance.id(), stagingTest, versions));
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 8fb766164c2..9d17acc6a56 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -504,7 +504,7 @@ public class DeploymentTriggerTest {
Version version2 = Version.fromString("7.2");
tester.controllerTester().upgradeSystem(version2);
tester.upgrader().maintain();
- app1.runJob(systemTest).runJob(stagingTest) // tests for previous version.
+ app1.runJob(systemTest).runJob(stagingTest) // tests for previous version — these are "reused" later.
.runJob(systemTest).runJob(stagingTest).timeOutConvergence(productionUsCentral1);
assertEquals(version2, app1.deployment(productionUsCentral1.zone(main)).version());
Instant triggered = app1.instance().deploymentJobs().jobStatus().get(productionUsCentral1).lastTriggered().get().at();
@@ -517,18 +517,12 @@ public class DeploymentTriggerTest {
assertEquals("Change becomes latest non-broken version", Change.of(version1), app1.application().change());
// version1 proceeds 'til the last job, where it fails; us-central-1 is skipped, as current change is strictly dominated by what's deployed there.
- app1.runJob(systemTest)
- .runJob(stagingTest)
- .failDeployment(productionEuWest1);
+ app1.failDeployment(productionEuWest1);
assertEquals(triggered, app1.instance().deploymentJobs().jobStatus().get(productionUsCentral1).lastTriggered().get().at());
- // Eagerly triggered system and staging tests complete.
- app1.runJob(systemTest).runJob(stagingTest);
-
// Roll out a new application version, which gives a dual change -- this should trigger us-central-1, but only as long as it hasn't yet deployed there.
ApplicationVersion revision1 = app1.lastSubmission().get();
app1.submit(applicationPackage);
- app1.jobAborted(productionEuWest1);
ApplicationVersion revision2 = app1.lastSubmission().get();
app1.runJob(systemTest).runJob(stagingTest);
assertEquals(Change.of(version1).with(revision2), app1.application().change());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 36039c47025..880d74b0da6 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -578,7 +578,6 @@ public class UpgraderTest {
// us-central-1 succeeds upgrade to 6.3, with the revision, but us-east-3 wants to proceed with only the revision change.
app.runJob(productionUsCentral1);
app.runJob(systemTest).runJob(stagingTest).runJob(productionUsEast3);
- app.runJob(systemTest).runJob(stagingTest); // Test last changes outside prod.
assertEquals(List.of(), tester.jobs().active());
assertEquals(new Version("6.3"), app.deployment(ZoneId.from("prod", "us-central-1")).version());
@@ -936,7 +935,7 @@ public class UpgraderTest {
// Application downgrades to pinned version.
tester.abortAll();
- context.runJob(systemTest).runJob(stagingTest).runJob(productionUsCentral1);
+ context.runJob(stagingTest).runJob(productionUsCentral1);
assertTrue(context.application().change().hasTargets());
context.runJob(productionUsWest1); // us-east-3 never upgraded, so no downgrade is needed.
assertFalse(context.application().change().hasTargets());
@@ -1034,21 +1033,9 @@ public class UpgraderTest {
assertEquals(v2, application.deployment(ZoneId.from("prod", "us-west-1")).version());
assertEquals(v1, application.deployment(ZoneId.from("prod", "us-east-3")).version());
- // Need to test v2->v3 combination again before upgrading second deployment (not really, but this is a limitation of lastSuccess)
- tester.triggerJobs();
- assertEquals(v2, application.instance().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().sourcePlatform().get());
- assertEquals(v3, application.instance().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform());
- application.runJob(stagingTest);
-
// Second deployment upgrades
application.runJob(productionUsWest1);
- // ... now we have to test v1->v3 again :(
- tester.triggerJobs();
- assertEquals(v1, application.instance().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().sourcePlatform().get());
- assertEquals(v3, application.instance().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform());
- application.runJob(stagingTest);
-
// Upgrade completes
application.runJob(productionUsEast3);
assertTrue("Upgrade complete", application.application().change().isEmpty());