summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2019-12-10 15:17:16 +0100
committerJon Marius Venstad <venstad@gmail.com>2019-12-11 10:49:29 +0100
commit16b65830f46abf2ac3ae1b9f4482daaafd8ccd42 (patch)
tree7f6001d76e643e67fdb413348860273001acf221 /controller-server
parentcb63b0fc9ebad9517ef8daaf0e836c75e5201db5 (diff)
Some cleanup and TODOs
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java58
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java128
2 files changed, 10 insertions, 176 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
index 45850669210..549c188f795 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
@@ -358,12 +358,13 @@ public class DeploymentStatus {
}
@Override
+ // TODO jonmv: Split in readyAt(change, versions), pausedUntil(), and coolingDownUntil(versions)
public Optional<Instant> readyAt(Change change, Versions versions) {
Optional<Instant> readyAt = super.readyAt(change, versions);
if (readyAt.isEmpty())
return Optional.empty();
- Optional<Instant> pausedUntil = status.application.require(job.id().application().instance()).jobPause(job.id().type());
+ Optional<Instant> pausedUntil = status.application().require(job.id().application().instance()).jobPause(job.id().type());
if (pausedUntil.isPresent() && pausedUntil.get().isAfter(readyAt.get()))
return pausedUntil;
@@ -455,61 +456,6 @@ public class DeploymentStatus {
}
- /*
- * Compute all JobIds to run: test and staging for first instance, unless declared in a parallel instance.
- * Create StepStatus for the first two, then
- *
- *
- *
- *
- *
- *
- *
- *
- *
- *
- * Prod: completedAt: change, fullChange
- * Test: completedAt: versions?
- * Delay: completedAt: change, fullChange
- * Any: readyAt: change -> versions
- * Any: testedAt: versions
- *
- * Start with system and staging for first instance as dependencies.
- * Declared test jobs replace these for intra-instance dependents.
- * Test and staging are implicitly parallel. (Well, they already are, if they don't depend on each other.)
- *
- * Map JobId to StepStatus:
- * For each prod JobId: Compute Optional<Versions> to run for current change to be done.
- * Prod jobs may wait for other prod jobs' to-do runs, but ignores their versions.
- * Prod jobs may wait for test jobs, considering their versions.
- * For each prod job, if job already triggered on desired versions, ignore the below.
- * For each prod job, add other prod job dependencies.
- * For each prod job, add explicit tests in same instance.
- * For each prod versions to run, find all prod jobs for which those versions aren't tested (before the job), then
- * for each such set of jobs, find the last common dependency instance, and add the test for that, or
- * for each such set of jobs, add tests for those versions with the first declared instance; in any case
- * add all implicit tests to some structure for tracking.
- *
- * Eliminate already running jobs.
- * Keep set of JobId x Versions for each StepStatus, in topological order, for starting jobs and for display.
- * DepTri: Needs all jobs to run that are also ready. Test jobs are always ready.
- * API: Needs all jobs to run, and what they are waiting for, like, delay (until), or other jobs, or pause.
- * To find dependency jobs, DFS and sort by topological order.
- *
- * anySysTest && anyStaTest || triggeredProd
- *
- *
- *
- * Delay after test jobs? Requires test jobs in DAG,
- * which requires completeAt, and thus readyAt, to accept Change _and_ Versions. Public with Change only perhaps?
- * Careful not to require ALL tests to run with versions for each prod job!
- *
- * readyAt and ordering of test jobs? Use lastSuccess.at for tests? Not quite right, but used today ...
- *
- * Add all possible test jobs to steps. Add them as prerequisites for first job in each instance ... ?
- *
- */
-
public static List<JobId> jobsFor(Application application, SystemName system) {
if (DeploymentSpec.empty.equals(application.deploymentSpec()))
return List.of();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 7bcdfd48265..d895fe27322 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.controller.deployment;
import com.yahoo.config.application.api.DeploymentInstanceSpec;
import com.yahoo.config.application.api.DeploymentSpec;
-import com.yahoo.config.application.api.DeploymentSpec.Step;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.zone.ZoneId;
@@ -14,7 +13,6 @@ import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.Instance;
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
-import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
import com.yahoo.vespa.hosted.controller.application.ApplicationList;
import com.yahoo.vespa.hosted.controller.application.Change;
@@ -35,15 +33,12 @@ import java.util.OptionalLong;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.logging.Logger;
-import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.stagingTest;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.systemTest;
import static java.util.Comparator.comparing;
-import static java.util.Comparator.naturalOrder;
import static java.util.stream.Collectors.groupingBy;
-import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.partitioningBy;
import static java.util.stream.Collectors.toList;
@@ -136,7 +131,7 @@ public class DeploymentTrigger {
.collect(groupingBy(Job::applicationId)))
.values().stream()
.map(jobs -> (Supplier<Long>) jobs.stream()
- .filter(this::trigger)
+ .peek(this::trigger)
.limit(entry.getKey() ? 1 : Long.MAX_VALUE)::count))
.parallel().map(Supplier::get).reduce(0L, Long::sum);
}
@@ -147,24 +142,13 @@ public class DeploymentTrigger {
* If the build service can not find the given job, or claims it is illegal to trigger it,
* the project id is removed from the application owning the job, to prevent further trigger attempts.
*/
- public boolean trigger(Job job) {
+ public void trigger(Job job) {
log.log(LogLevel.DEBUG, "Triggering " + job);
- try {
- applications().lockApplicationOrThrow(TenantAndApplicationId.from(job.applicationId()), application -> {
- jobs.start(job.applicationId(), job.jobType, job.versions);
- applications().store(application.with(job.applicationId().instance(), instance ->
- instance.withJobPause(job.jobType, OptionalLong.empty())));
- });
-
- return true;
- }
- catch (RuntimeException e) {
- log.log(LogLevel.WARNING, "Exception triggering " + job + ": " + e);
- if (e instanceof NoSuchElementException || e instanceof IllegalArgumentException)
- applications().lockApplicationOrThrow(TenantAndApplicationId.from(job.applicationId()), application ->
- applications().store(application.withProjectId(OptionalLong.empty())));
- return false;
- }
+ applications().lockApplicationOrThrow(TenantAndApplicationId.from(job.applicationId()), application -> {
+ jobs.start(job.applicationId(), job.jobType, job.versions);
+ applications().store(application.with(job.applicationId().instance(), instance ->
+ instance.withJobPause(job.jobType, OptionalLong.empty())));
+ });
}
/** Force triggering of a job for given instance. */
@@ -177,6 +161,7 @@ public class DeploymentTrigger {
var jobStatus = jobs.deploymentStatus(application).instanceJobs(instance.name());
var jobList = JobList.from(jobStatus.values());
return (jobType.isProduction() && requireTests && ! isTested(jobList, versions)
+ // TODO jonmv: Expose adding test jobs from DeploymentStatus, and use this here.
? testJobs(application.deploymentSpec(), application.change(), instance, jobList, versions, reason, clock.instant(), __ -> true).stream()
: Stream.of(deploymentJob(instance, versions, application.change(), jobType, jobStatus.get(jobType), reason, clock.instant())))
.peek(this::trigger)
@@ -238,10 +223,6 @@ public class DeploymentTrigger {
return Optional.ofNullable(instance.deployments().get(jobType.zone(controller.system())));
}
- private static <T extends Comparable<T>> Optional<T> max(Optional<T> o1, Optional<T> o2) {
- return o1.isEmpty() ? o2 : o2.isEmpty() ? o1 : o1.get().compareTo(o2.get()) >= 0 ? o1 : o2;
- }
-
// ---------- Ready job computation ----------
/** Returns the set of all jobs which have changes to propagate from the upstream steps. */
@@ -280,94 +261,10 @@ public class DeploymentTrigger {
});
});
});
-
- /*
- applications().getApplication(id).ifPresent(application -> {
- Collection<Instance> instances = application.deploymentSpec().instances().stream()
- .flatMap(instance -> application.get(instance.name()).stream())
- .collect(Collectors.toUnmodifiableList());
- DeploymentStatus deploymentStatus = this.jobs.deploymentStatus(application);
- for (Instance instance : instances) {
- var jobStatus = deploymentStatus.instanceJobs(instance.name());
- var jobList = JobList.from(jobStatus.values());
- Change change = application.change();
- Optional<Instant> completedAt = max(jobList.type(systemTest).first()
- .<Instant>flatMap(job -> job.lastSuccess().map(run -> run.end().get())),
- jobList.type(stagingTest).first()
- .<Instant>flatMap(job -> job.lastSuccess().map(run -> run.end().get())));
- String reason = "New change available";
- List<Job> testJobs = null; // null means "uninitialised", while empty means "don't run any jobs".
- DeploymentSteps steps = steps(application.deploymentSpec().requireInstance(instance.name()));
-
- if (change.hasTargets()) {
- for (Step step : steps.production()) {
- List<JobType> stepJobs = steps.toJobs(step);
- List<JobType> remainingJobs = stepJobs.stream().filter(job -> ! isComplete(change, change, instance, job, jobStatus.get(job))).collect(toList());
- if ( ! remainingJobs.isEmpty()) { // Change is incomplete; trigger remaining jobs if ready, or their test jobs if untested.
- for (JobType job : remainingJobs) {
- Versions versions = Versions.from(change, application, deploymentFor(instance, job),
- controller.systemVersion());
- if (isTested(jobList, versions)) {
- if (completedAt.isPresent() && canTrigger(job, jobList, versions, instance, application.deploymentSpec(), stepJobs)) {
- jobs.add(deploymentJob(instance, versions, change, job, jobStatus.get(job), reason, completedAt.get()));
- }
- }
- else if (testJobs == null) {
- testJobs = testJobs(application.deploymentSpec(),
- change, instance, jobList, versions,
- String.format("Testing deployment for %s (%s)",
- job.jobName(), versions.toString()),
- completedAt.orElseGet(clock::instant));
- }
- }
- completedAt = Optional.empty();
- }
- else { // All jobs are complete; find the time of completion of this step.
- if (stepJobs.isEmpty()) { // No jobs means this is a delay step.
- completedAt = completedAt.map(at -> at.plus(step.delay())).filter(at -> ! at.isAfter(clock.instant()));
- reason += " after a delay of " + step.delay();
- }
- else {
- completedAt = stepJobs.stream().map(job -> jobStatus.get(job).lastCompleted().get().end().get()).max(naturalOrder());
- reason = "Available change in " + stepJobs.stream().map(JobType::jobName).collect(joining(", "));
- }
- }
- }
- }
- if (testJobs == null) { // If nothing to test, but outstanding commits, test those.
- testJobs = testJobs(application.deploymentSpec(), change, instance, jobList,
- Versions.from(application.outstandingChange().onTopOf(change),
- application,
- steps.sortedDeployments(instance.productionDeployments().values()).stream().findFirst(),
- controller.systemVersion()),
- "Testing last changes outside prod", clock.instant());
- }
- jobs.addAll(testJobs);
- }
- });
- */
return Collections.unmodifiableList(jobs);
}
/** Returns whether given job should be triggered */
- private boolean canTrigger(JobType type, JobList jobList, Versions versions, Instance instance, DeploymentSpec deploymentSpec, List<JobType> parallelJobs) {
- if ( ! jobList.type(type).running().isEmpty()) return false;
-
- // Are we already running jobs which are not in the set which can run in parallel with this?
- if ( parallelJobs != null
- && ! parallelJobs.containsAll(jobList.running().production().mapToList(job -> job.id().type()))) return false;
-
- // Are there another suspended deployment such that we shouldn't simultaneously change this?
- if (type.isProduction() && isSuspendedInAnotherZone(instance, type.zone(controller.system()))) return false;
-
- return triggerAt(clock.instant(), type, jobList.type(type).first().get(), versions, instance, deploymentSpec);
- }
-
- /** Returns whether given job should be triggered */
- private boolean canTrigger(JobType job, JobList jobList, Versions versions, Instance instance, DeploymentSpec deploymentSpec) {
- return canTrigger(job, jobList, versions, instance, deploymentSpec, null);
- }
-
private boolean isSuspendedInAnotherZone(Instance instance, ZoneId zone) {
for (Deployment deployment : instance.productionDeployments().values()) {
if ( ! deployment.zone().equals(zone)
@@ -473,15 +370,6 @@ public class DeploymentTrigger {
// ---------- Version and job helpers ----------
/**
- * Returns the list of test jobs that should run now, and that need to succeed on the given versions for it to be considered tested.
- */
- private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, JobList jobList, Versions versions,
- String reason, Instant availableSince) {
- return testJobs(deploymentSpec, change, instance, jobList, versions, reason, availableSince,
- jobType -> canTrigger(jobType, jobList, versions, instance, deploymentSpec));
- }
-
- /**
* Returns the list of test jobs that need to succeed on the given versions for it to be considered tested, filtered by the given condition.
*/
private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, JobList jobList, Versions versions,