diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2019-12-10 15:17:16 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2019-12-11 10:49:29 +0100 |
commit | 16b65830f46abf2ac3ae1b9f4482daaafd8ccd42 (patch) | |
tree | 7f6001d76e643e67fdb413348860273001acf221 /controller-server | |
parent | cb63b0fc9ebad9517ef8daaf0e836c75e5201db5 (diff) |
Some cleanup and TODOs
Diffstat (limited to 'controller-server')
2 files changed, 10 insertions, 176 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index 45850669210..549c188f795 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -358,12 +358,13 @@ public class DeploymentStatus { } @Override + // TODO jonmv: Split in readyAt(change, versions), pausedUntil(), and coolingDownUntil(versions) public Optional<Instant> readyAt(Change change, Versions versions) { Optional<Instant> readyAt = super.readyAt(change, versions); if (readyAt.isEmpty()) return Optional.empty(); - Optional<Instant> pausedUntil = status.application.require(job.id().application().instance()).jobPause(job.id().type()); + Optional<Instant> pausedUntil = status.application().require(job.id().application().instance()).jobPause(job.id().type()); if (pausedUntil.isPresent() && pausedUntil.get().isAfter(readyAt.get())) return pausedUntil; @@ -455,61 +456,6 @@ public class DeploymentStatus { } - /* - * Compute all JobIds to run: test and staging for first instance, unless declared in a parallel instance. - * Create StepStatus for the first two, then - * - * - * - * - * - * - * - * - * - * - * Prod: completedAt: change, fullChange - * Test: completedAt: versions? - * Delay: completedAt: change, fullChange - * Any: readyAt: change -> versions - * Any: testedAt: versions - * - * Start with system and staging for first instance as dependencies. - * Declared test jobs replace these for intra-instance dependents. - * Test and staging are implicitly parallel. (Well, they already are, if they don't depend on each other.) - * - * Map JobId to StepStatus: - * For each prod JobId: Compute Optional<Versions> to run for current change to be done. - * Prod jobs may wait for other prod jobs' to-do runs, but ignores their versions. - * Prod jobs may wait for test jobs, considering their versions. - * For each prod job, if job already triggered on desired versions, ignore the below. - * For each prod job, add other prod job dependencies. - * For each prod job, add explicit tests in same instance. - * For each prod versions to run, find all prod jobs for which those versions aren't tested (before the job), then - * for each such set of jobs, find the last common dependency instance, and add the test for that, or - * for each such set of jobs, add tests for those versions with the first declared instance; in any case - * add all implicit tests to some structure for tracking. - * - * Eliminate already running jobs. - * Keep set of JobId x Versions for each StepStatus, in topological order, for starting jobs and for display. - * DepTri: Needs all jobs to run that are also ready. Test jobs are always ready. - * API: Needs all jobs to run, and what they are waiting for, like, delay (until), or other jobs, or pause. - * To find dependency jobs, DFS and sort by topological order. - * - * anySysTest && anyStaTest || triggeredProd - * - * - * - * Delay after test jobs? Requires test jobs in DAG, - * which requires completeAt, and thus readyAt, to accept Change _and_ Versions. Public with Change only perhaps? - * Careful not to require ALL tests to run with versions for each prod job! - * - * readyAt and ordering of test jobs? Use lastSuccess.at for tests? Not quite right, but used today ... - * - * Add all possible test jobs to steps. Add them as prerequisites for first job in each instance ... ? - * - */ - public static List<JobId> jobsFor(Application application, SystemName system) { if (DeploymentSpec.empty.equals(application.deploymentSpec())) return List.of(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 7bcdfd48265..d895fe27322 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.controller.deployment; import com.yahoo.config.application.api.DeploymentInstanceSpec; import com.yahoo.config.application.api.DeploymentSpec; -import com.yahoo.config.application.api.DeploymentSpec.Step; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.zone.ZoneId; @@ -14,7 +13,6 @@ import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.Instance; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.Change; @@ -35,15 +33,12 @@ import java.util.OptionalLong; import java.util.function.Predicate; import java.util.function.Supplier; import java.util.logging.Logger; -import java.util.stream.Collectors; import java.util.stream.Stream; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.stagingTest; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.systemTest; import static java.util.Comparator.comparing; -import static java.util.Comparator.naturalOrder; import static java.util.stream.Collectors.groupingBy; -import static java.util.stream.Collectors.joining; import static java.util.stream.Collectors.partitioningBy; import static java.util.stream.Collectors.toList; @@ -136,7 +131,7 @@ public class DeploymentTrigger { .collect(groupingBy(Job::applicationId))) .values().stream() .map(jobs -> (Supplier<Long>) jobs.stream() - .filter(this::trigger) + .peek(this::trigger) .limit(entry.getKey() ? 1 : Long.MAX_VALUE)::count)) .parallel().map(Supplier::get).reduce(0L, Long::sum); } @@ -147,24 +142,13 @@ public class DeploymentTrigger { * If the build service can not find the given job, or claims it is illegal to trigger it, * the project id is removed from the application owning the job, to prevent further trigger attempts. */ - public boolean trigger(Job job) { + public void trigger(Job job) { log.log(LogLevel.DEBUG, "Triggering " + job); - try { - applications().lockApplicationOrThrow(TenantAndApplicationId.from(job.applicationId()), application -> { - jobs.start(job.applicationId(), job.jobType, job.versions); - applications().store(application.with(job.applicationId().instance(), instance -> - instance.withJobPause(job.jobType, OptionalLong.empty()))); - }); - - return true; - } - catch (RuntimeException e) { - log.log(LogLevel.WARNING, "Exception triggering " + job + ": " + e); - if (e instanceof NoSuchElementException || e instanceof IllegalArgumentException) - applications().lockApplicationOrThrow(TenantAndApplicationId.from(job.applicationId()), application -> - applications().store(application.withProjectId(OptionalLong.empty()))); - return false; - } + applications().lockApplicationOrThrow(TenantAndApplicationId.from(job.applicationId()), application -> { + jobs.start(job.applicationId(), job.jobType, job.versions); + applications().store(application.with(job.applicationId().instance(), instance -> + instance.withJobPause(job.jobType, OptionalLong.empty()))); + }); } /** Force triggering of a job for given instance. */ @@ -177,6 +161,7 @@ public class DeploymentTrigger { var jobStatus = jobs.deploymentStatus(application).instanceJobs(instance.name()); var jobList = JobList.from(jobStatus.values()); return (jobType.isProduction() && requireTests && ! isTested(jobList, versions) + // TODO jonmv: Expose adding test jobs from DeploymentStatus, and use this here. ? testJobs(application.deploymentSpec(), application.change(), instance, jobList, versions, reason, clock.instant(), __ -> true).stream() : Stream.of(deploymentJob(instance, versions, application.change(), jobType, jobStatus.get(jobType), reason, clock.instant()))) .peek(this::trigger) @@ -238,10 +223,6 @@ public class DeploymentTrigger { return Optional.ofNullable(instance.deployments().get(jobType.zone(controller.system()))); } - private static <T extends Comparable<T>> Optional<T> max(Optional<T> o1, Optional<T> o2) { - return o1.isEmpty() ? o2 : o2.isEmpty() ? o1 : o1.get().compareTo(o2.get()) >= 0 ? o1 : o2; - } - // ---------- Ready job computation ---------- /** Returns the set of all jobs which have changes to propagate from the upstream steps. */ @@ -280,94 +261,10 @@ public class DeploymentTrigger { }); }); }); - - /* - applications().getApplication(id).ifPresent(application -> { - Collection<Instance> instances = application.deploymentSpec().instances().stream() - .flatMap(instance -> application.get(instance.name()).stream()) - .collect(Collectors.toUnmodifiableList()); - DeploymentStatus deploymentStatus = this.jobs.deploymentStatus(application); - for (Instance instance : instances) { - var jobStatus = deploymentStatus.instanceJobs(instance.name()); - var jobList = JobList.from(jobStatus.values()); - Change change = application.change(); - Optional<Instant> completedAt = max(jobList.type(systemTest).first() - .<Instant>flatMap(job -> job.lastSuccess().map(run -> run.end().get())), - jobList.type(stagingTest).first() - .<Instant>flatMap(job -> job.lastSuccess().map(run -> run.end().get()))); - String reason = "New change available"; - List<Job> testJobs = null; // null means "uninitialised", while empty means "don't run any jobs". - DeploymentSteps steps = steps(application.deploymentSpec().requireInstance(instance.name())); - - if (change.hasTargets()) { - for (Step step : steps.production()) { - List<JobType> stepJobs = steps.toJobs(step); - List<JobType> remainingJobs = stepJobs.stream().filter(job -> ! isComplete(change, change, instance, job, jobStatus.get(job))).collect(toList()); - if ( ! remainingJobs.isEmpty()) { // Change is incomplete; trigger remaining jobs if ready, or their test jobs if untested. - for (JobType job : remainingJobs) { - Versions versions = Versions.from(change, application, deploymentFor(instance, job), - controller.systemVersion()); - if (isTested(jobList, versions)) { - if (completedAt.isPresent() && canTrigger(job, jobList, versions, instance, application.deploymentSpec(), stepJobs)) { - jobs.add(deploymentJob(instance, versions, change, job, jobStatus.get(job), reason, completedAt.get())); - } - } - else if (testJobs == null) { - testJobs = testJobs(application.deploymentSpec(), - change, instance, jobList, versions, - String.format("Testing deployment for %s (%s)", - job.jobName(), versions.toString()), - completedAt.orElseGet(clock::instant)); - } - } - completedAt = Optional.empty(); - } - else { // All jobs are complete; find the time of completion of this step. - if (stepJobs.isEmpty()) { // No jobs means this is a delay step. - completedAt = completedAt.map(at -> at.plus(step.delay())).filter(at -> ! at.isAfter(clock.instant())); - reason += " after a delay of " + step.delay(); - } - else { - completedAt = stepJobs.stream().map(job -> jobStatus.get(job).lastCompleted().get().end().get()).max(naturalOrder()); - reason = "Available change in " + stepJobs.stream().map(JobType::jobName).collect(joining(", ")); - } - } - } - } - if (testJobs == null) { // If nothing to test, but outstanding commits, test those. - testJobs = testJobs(application.deploymentSpec(), change, instance, jobList, - Versions.from(application.outstandingChange().onTopOf(change), - application, - steps.sortedDeployments(instance.productionDeployments().values()).stream().findFirst(), - controller.systemVersion()), - "Testing last changes outside prod", clock.instant()); - } - jobs.addAll(testJobs); - } - }); - */ return Collections.unmodifiableList(jobs); } /** Returns whether given job should be triggered */ - private boolean canTrigger(JobType type, JobList jobList, Versions versions, Instance instance, DeploymentSpec deploymentSpec, List<JobType> parallelJobs) { - if ( ! jobList.type(type).running().isEmpty()) return false; - - // Are we already running jobs which are not in the set which can run in parallel with this? - if ( parallelJobs != null - && ! parallelJobs.containsAll(jobList.running().production().mapToList(job -> job.id().type()))) return false; - - // Are there another suspended deployment such that we shouldn't simultaneously change this? - if (type.isProduction() && isSuspendedInAnotherZone(instance, type.zone(controller.system()))) return false; - - return triggerAt(clock.instant(), type, jobList.type(type).first().get(), versions, instance, deploymentSpec); - } - - /** Returns whether given job should be triggered */ - private boolean canTrigger(JobType job, JobList jobList, Versions versions, Instance instance, DeploymentSpec deploymentSpec) { - return canTrigger(job, jobList, versions, instance, deploymentSpec, null); - } - private boolean isSuspendedInAnotherZone(Instance instance, ZoneId zone) { for (Deployment deployment : instance.productionDeployments().values()) { if ( ! deployment.zone().equals(zone) @@ -473,15 +370,6 @@ public class DeploymentTrigger { // ---------- Version and job helpers ---------- /** - * Returns the list of test jobs that should run now, and that need to succeed on the given versions for it to be considered tested. - */ - private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, JobList jobList, Versions versions, - String reason, Instant availableSince) { - return testJobs(deploymentSpec, change, instance, jobList, versions, reason, availableSince, - jobType -> canTrigger(jobType, jobList, versions, instance, deploymentSpec)); - } - - /** * Returns the list of test jobs that need to succeed on the given versions for it to be considered tested, filtered by the given condition. */ private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, JobList jobList, Versions versions, |