diff options
14 files changed, 399 insertions, 177 deletions
diff --git a/config-model-api/abi-spec.json b/config-model-api/abi-spec.json index d946dd972f4..ae9e7a22129 100644 --- a/config-model-api/abi-spec.json +++ b/config-model-api/abi-spec.json @@ -397,7 +397,8 @@ ], "fields": [ "public static final enum com.yahoo.config.application.api.DeploymentSpec$UpgradeRollout separate", - "public static final enum com.yahoo.config.application.api.DeploymentSpec$UpgradeRollout leading" + "public static final enum com.yahoo.config.application.api.DeploymentSpec$UpgradeRollout leading", + "public static final enum com.yahoo.config.application.api.DeploymentSpec$UpgradeRollout simultaneous" ] }, "com.yahoo.config.application.api.DeploymentSpec": { diff --git a/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java b/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java index 4b019bd9f7a..8ad42b1d4a8 100644 --- a/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java +++ b/config-model-api/src/main/java/com/yahoo/config/application/api/DeploymentSpec.java @@ -564,9 +564,9 @@ public class DeploymentSpec { /** Separate: Application changes wait for upgrade to complete, unless upgrade fails. */ separate, /** Leading: Application changes are allowed to start and catch up to the platform upgrade. */ - leading + leading, // /** Simultaneous: Application changes deploy independently of platform upgrades. */ - // simultaneous + simultaneous } diff --git a/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java b/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java index b031af9faf2..b12d4024591 100644 --- a/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java +++ b/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java @@ -507,7 +507,7 @@ public class DeploymentSpecXmlReader { switch (rollout) { case "separate": return DeploymentSpec.UpgradeRollout.separate; case "leading": return DeploymentSpec.UpgradeRollout.leading; - // case "simultaneous": return DeploymentSpec.UpgradePolicy.conservative; + case "simultaneous": return DeploymentSpec.UpgradeRollout.simultaneous; default: throw new IllegalArgumentException("Illegal upgrade rollout '" + rollout + "': " + "Must be one of " + Arrays.toString(DeploymentSpec.UpgradeRollout.values())); } diff --git a/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java b/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java index a97faf5995d..f6af155ffc2 100644 --- a/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java +++ b/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java @@ -386,12 +386,16 @@ public class DeploymentSpecTest { " <instance id='default'>" + " <upgrade rollout='leading' />" + " </instance>" + + " <instance id='aggressive'>" + + " <upgrade rollout='simultaneous' />" + + " </instance>" + " <instance id='custom'/>" + "</deployment>" ); DeploymentSpec spec = DeploymentSpec.fromXml(r); assertEquals("leading", spec.requireInstance("default").upgradeRollout().toString()); assertEquals("separate", spec.requireInstance("custom").upgradeRollout().toString()); + assertEquals("simultaneous", spec.requireInstance("aggressive").upgradeRollout().toString()); } @Test diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index ad237a6aa6e..7f7449fb38b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -24,10 +24,13 @@ import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -107,10 +110,45 @@ public class DeploymentStatus { return allJobs; } + /** Whether any jobs both dependent on the dependency, and a dependency for the dependent, are failing. */ + private boolean hasFailures(StepStatus dependency, StepStatus dependent) { + Set<StepStatus> dependents = new HashSet<>(); + fillDependents(dependency, new HashSet<>(), dependents, dependent); + Set<JobId> criticalJobs = dependents.stream().flatMap(step -> step.job().stream()).collect(Collectors.toSet()); + + return ! allJobs.matching(job -> criticalJobs.contains(job.id())) + .failing() + .not().outOfTestCapacity() + .isEmpty(); + } + + private boolean fillDependents(StepStatus dependency, Set<StepStatus> visited, Set<StepStatus> dependents, StepStatus current) { + if (visited.contains(current)) + return dependents.contains(current); + + if (dependency == current) + dependents.add(current); + else + for (StepStatus dep : current.dependencies) + if (fillDependents(dependency, visited, dependents, dep)) + dependents.add(current); + + visited.add(current); + return dependents.contains(current); + } + + /** Whether any job is failing on anything older than version, with errors other than lack of capacity in a test zone.. */ + public boolean hasFailures(ApplicationVersion version) { + return ! allJobs.failing() + .not().outOfTestCapacity() + .matching(job -> job.lastTriggered().get().versions().targetApplication().compareTo(version) < 0) + .isEmpty(); + } + /** Whether any jobs of this application are failing with other errors than lack of capacity in a test zone. */ public boolean hasFailures() { return ! allJobs.failing() - .not().withStatus(RunStatus.outOfCapacity) + .not().outOfTestCapacity() .isEmpty(); } @@ -129,13 +167,13 @@ public class DeploymentStatus { /** * The set of jobs that need to run for the changes of each instance of the application to be considered complete, - * and any test jobs for any oustanding change, which will likely be needed to lated deploy this change. + * and any test jobs for any outstanding change, which will likely be needed to later deploy this change. */ - public Map<JobId, List<Versions>> jobsToRun() { + public Map<JobId, List<Job>> jobsToRun() { Map<InstanceName, Change> changes = new LinkedHashMap<>(); for (InstanceName instance : application.deploymentSpec().instanceNames()) changes.put(instance, application.require(instance).change()); - Map<JobId, List<Versions>> jobs = jobsToRun(changes); + Map<JobId, List<Job>> jobs = jobsToRun(changes); // Add test jobs for any outstanding change. for (InstanceName instance : application.deploymentSpec().instanceNames()) @@ -151,12 +189,12 @@ public class DeploymentStatus { Collections::unmodifiableMap)); } - private Map<JobId, List<Versions>> jobsToRun(Map<InstanceName, Change> changes, boolean eagerTests) { - Map<JobId, Versions> productionJobs = new LinkedHashMap<>(); + private Map<JobId, List<Job>> jobsToRun(Map<InstanceName, Change> changes, boolean eagerTests) { + Map<JobId, List<Job>> productionJobs = new LinkedHashMap<>(); changes.forEach((instance, change) -> productionJobs.putAll(productionJobs(instance, change, eagerTests))); - Map<JobId, List<Versions>> testJobs = testJobs(productionJobs); - Map<JobId, List<Versions>> jobs = new LinkedHashMap<>(testJobs); - productionJobs.forEach((job, versions) -> jobs.put(job, List.of(versions))); + Map<JobId, List<Job>> testJobs = testJobs(productionJobs); + Map<JobId, List<Job>> jobs = new LinkedHashMap<>(testJobs); + jobs.putAll(productionJobs); // Add runs for idle, declared test jobs if they have no successes on their instance's change's versions. jobSteps.forEach((job, step) -> { if ( ! step.isDeclared() || jobs.containsKey(job)) @@ -173,13 +211,13 @@ public class DeploymentStatus { Versions versions = Versions.from(change, application, firstProductionJobWithDeployment.flatMap(this::deploymentFor), systemVersion); if (step.completedAt(change, firstProductionJobWithDeployment).isEmpty()) - jobs.merge(job, List.of(versions), DeploymentStatus::union); + jobs.merge(job, List.of(new Job(versions, step.readyAt(change), change)), DeploymentStatus::union); }); return Collections.unmodifiableMap(jobs); } /** The set of jobs that need to run for the given changes to be considered complete. */ - public Map<JobId, List<Versions>> jobsToRun(Map<InstanceName, Change> changes) { + public Map<JobId, List<Job>> jobsToRun(Map<InstanceName, Change> changes) { return jobsToRun(changes, false); } @@ -239,61 +277,151 @@ public class DeploymentStatus { .distinct(); } - /** - * True if the job has already been triggered on the given versions, or if all test types (systemTest, stagingTest), - * restricted to the job's instance if declared in that instance, have successful runs on the given versions. - */ - public boolean isTested(JobId job, Change change) { - Versions versions = Versions.from(change, application, deploymentFor(job), systemVersion); - return allJobs.triggeredOn(versions).get(job).isPresent() - || Stream.of(systemTest, stagingTest) - .noneMatch(testType -> declaredTest(job.application(), testType).map(__ -> allJobs.instance(job.application().instance())) - .orElse(allJobs) - .type(testType) - .successOn(versions).isEmpty()); - } - - private Map<JobId, Versions> productionJobs(InstanceName instance, Change change, boolean assumeUpgradesSucceed) { - Map<JobId, Versions> jobs = new LinkedHashMap<>(); + /** Earliest instant when job was triggered with given versions, or both system and staging tests were successful. */ + public Optional<Instant> verifiedAt(JobId job, Versions versions) { + Optional<Instant> triggeredAt = allJobs.get(job) + .flatMap(status -> status.runs().values().stream() + .filter(run -> run.versions().equals(versions)) + .findFirst()) + .map(Run::start); + Optional<Instant> systemTestedAt = testedAt(job.application(), systemTest, versions); + Optional<Instant> stagingTestedAt = testedAt(job.application(), stagingTest, versions); + if (systemTestedAt.isEmpty() || stagingTestedAt.isEmpty()) return triggeredAt; + Optional<Instant> testedAt = systemTestedAt.get().isAfter(stagingTestedAt.get()) ? systemTestedAt : stagingTestedAt; + return triggeredAt.isPresent() && triggeredAt.get().isBefore(testedAt.get()) ? triggeredAt : testedAt; + } + + /** Earliest instant when versions were tested for the given instance */ + private Optional<Instant> testedAt(ApplicationId instance, JobType type, Versions versions) { + return declaredTest(instance, type).map(__ -> allJobs.instance(instance.instance())) + .orElse(allJobs) + .type(type).asList().stream() + .flatMap(status -> RunList.from(status) + .on(versions) + .status(RunStatus.success) + .asList().stream() + .map(Run::start)) + .min(naturalOrder()); + } + + private Map<JobId, List<Job>> productionJobs(InstanceName instance, Change change, boolean assumeUpgradesSucceed) { + Map<JobId, List<Job>> jobs = new LinkedHashMap<>(); jobSteps.forEach((job, step) -> { // When computing eager test jobs for outstanding changes, assume current upgrade completes successfully. Optional<Deployment> deployment = deploymentFor(job) - .map(existing -> assumeUpgradesSucceed ? new Deployment(existing.zone(), - existing.applicationVersion(), - change.platform().orElse(existing.version()), - existing.at(), - existing.metrics(), - existing.activity(), - existing.quota(), - existing.cost()) - : existing); - if ( job.application().instance().equals(instance) - && job.type().isProduction() - && step.completedAt(change, Optional.of(job)).isEmpty()) // Signal strict completion criterion by depending on job itself. - jobs.put(job, Versions.from(change, application, deployment, systemVersion)); + .map(existing -> assumeUpgradesSucceed ? withChange(existing, change.withoutApplication()) : existing); + if (job.application().instance().equals(instance) && job.type().isProduction()) { + + List<Job> toRun = new ArrayList<>(); + List<Change> changes = changes(job, step, change, deployment); + if (changes.isEmpty()) return; + for (Change partial : changes) { + toRun.add(new Job(Versions.from(partial, application, deployment, systemVersion), + step.readyAt(partial, Optional.of(job)), + partial)); + // Assume first partial change is applied before the second. + deployment = deployment.map(existing -> withChange(existing, partial)); + } + jobs.put(job, toRun); + } }); return jobs; } + private static Deployment withChange(Deployment deployment, Change change) { + return new Deployment(deployment.zone(), + change.application().orElse(deployment.applicationVersion()), + change.platform().orElse(deployment.version()), + deployment.at(), + deployment.metrics(), + deployment.activity(), + deployment.quota(), + deployment.cost()); + } + + /** Changes to deploy with the given job, possibly split in two steps. */ + private List<Change> changes(JobId job, StepStatus step, Change change, Optional<Deployment> deployment) { + // Signal strict completion criterion by depending on job itself. + if (step.completedAt(change, Optional.of(job)).isPresent()) + return List.of(); + + if ( step.completedAt(change.withoutApplication(), Optional.of(job)).isPresent() + || step.completedAt(change.withoutPlatform(), Optional.of(job)).isPresent()) + return List.of(change); + + if (change.platform().isEmpty() || change.application().isEmpty() || change.isPinned()) + return List.of(change); + + Optional<Instant> platformReadyAt = step.dependenciesCompletedAt(change.withoutApplication(), Optional.of(job)); + Optional<Instant> revisionReadyAt = step.dependenciesCompletedAt(change.withoutPlatform(), Optional.of(job)); + + // If the revision is not ready for this job, we prioritise the upgrade, assuming it was triggered first, + // unless this is a production test job, and the upgrade is already failing between deployment and here, + // in which case we must abandon the production test on the pure upgrade, so the revision can be deployed. + if (revisionReadyAt.isEmpty()) { + if (isTestAndIsFailingBetweenDeploymentAndThis(job)) return List.of(change); + return List.of(change.withoutApplication(), change); + } + // If only the revision is ready, we prioritise that. + if (platformReadyAt.isEmpty()) return List.of(change.withoutPlatform(), change); + + // Both changes are ready for this step, so we use the policy to decide. + // TODO jonmv adding change.application.at and change.platform.at makes this better + boolean platformReadyFirst = platformReadyAt.get().isBefore(revisionReadyAt.get()); + boolean revisionReadyFirst = revisionReadyAt.get().isBefore(platformReadyAt.get()); + switch (application.deploymentSpec().requireInstance(job.application().instance()).upgradeRollout()) { + case separate: // Let whichever change rolled out first, keep rolling first, unless upgrade alone is failing. + return (platformReadyFirst || platformReadyAt.get().equals(Instant.EPOCH)) // Assume platform was first if no jobs have run yet. + ? step.job().flatMap(jobs()::get).flatMap(JobStatus::firstFailing).isPresent() + ? List.of(change) // Platform was first, but is failing. + : List.of(change.withoutApplication(), change) // Platform was first, and is OK. + : revisionReadyFirst + ? List.of(change.withoutPlatform(), change) // Revision was first. + : List.of(change); // Both ready at the same time, probably due to earlier failure. + case leading: // When one change catches up, they fuse and continue together. + return List.of(change); + case simultaneous: // Revisions are allowed to run ahead, but the job where it caught up should have both changes. + return platformReadyFirst ? List.of(change) : List.of(change.withoutPlatform(), change); + default: throw new IllegalStateException("Unknown upgrade rollout policy"); + } + } + + private boolean isTestAndIsFailingBetweenDeploymentAndThis(JobId job) { + if ( ! job.type().isTest()) return false; + JobId deployment = new JobId(job.application(), JobType.from(system, job.type().zone(system)).get()); + return hasFailures(jobSteps.get(deployment), jobSteps.get(job)); + } + /** The test jobs that need to run prior to the given production deployment jobs. */ - public Map<JobId, List<Versions>> testJobs(Map<JobId, Versions> jobs) { - Map<JobId, List<Versions>> testJobs = new LinkedHashMap<>(); + public Map<JobId, List<Job>> testJobs(Map<JobId, List<Job>> jobs) { + Map<JobId, List<Job>> testJobs = new LinkedHashMap<>(); for (JobType testType : List.of(systemTest, stagingTest)) { - jobs.forEach((job, versions) -> { + jobs.forEach((job, versionsList) -> { if (job.type().isProduction() && job.type().isDeployment()) { declaredTest(job.application(), testType).ifPresent(testJob -> { - if (allJobs.successOn(versions).get(testJob).isEmpty()) - testJobs.merge(testJob, List.of(versions), DeploymentStatus::union); + for (Job productionJob : versionsList) + if (allJobs.successOn(productionJob.versions()).get(testJob).isEmpty()) + testJobs.merge(testJob, List.of(new Job(productionJob.versions(), + jobSteps().get(testJob).readyAt(productionJob.change), + productionJob.change)), + DeploymentStatus::union); }); } }); - jobs.forEach((job, versions) -> { - if ( job.type().isProduction() && job.type().isDeployment() - && allJobs.successOn(versions).type(testType).isEmpty() - && testJobs.keySet().stream() - .noneMatch(test -> test.type() == testType - && testJobs.get(test).contains(versions))) - testJobs.merge(firstDeclaredOrElseImplicitTest(testType), List.of(versions), DeploymentStatus::union); + jobs.forEach((job, versionsList) -> { + for (Job productionJob : versionsList) + if ( job.type().isProduction() && job.type().isDeployment() + && allJobs.successOn(productionJob.versions()).type(testType).isEmpty() + && testJobs.keySet().stream() + .noneMatch(test -> test.type() == testType + && testJobs.get(test).stream().anyMatch(testJob -> testJob.versions().equals(productionJob.versions())))) { + JobId testJob = firstDeclaredOrElseImplicitTest(testType); + testJobs.merge(testJob, + List.of(new Job(productionJob.versions(), + jobSteps.get(testJob).readyAt(productionJob.change), + productionJob.change)), + DeploymentStatus::union); + } }); } return Collections.unmodifiableMap(testJobs); @@ -535,11 +663,10 @@ public class DeploymentStatus { Optional<Instant> completedAt(Change change, Optional<JobId> dependent) { return ( (change.platform().isEmpty() || change.platform().equals(instance.change().platform())) && (change.application().isEmpty() || change.application().equals(instance.change().application())) - || status.jobsToRun(Map.of(instance.name(), change)).isEmpty()) + || step().steps().stream().noneMatch(step -> step.concerns(prod))) ? dependenciesCompletedAt(change, dependent) : Optional.empty(); } - // TODO jonmv: complete for p-jobs: last is XXX, but ready/verified uses any is XXX. @Override public Optional<Instant> blockedUntil(Change change) { @@ -614,8 +741,10 @@ public class DeploymentStatus { @Override public Optional<Instant> readyAt(Change change, Optional<JobId> dependent) { - return super.readyAt(change, Optional.of(job.id())) - .filter(__ -> status.isTested(job.id(), change)); + Optional<Instant> readyAt = super.readyAt(change, dependent); + Optional<Instant> testedAt = status.verifiedAt(job.id(), Versions.from(change, status.application, existingDeployment, status.systemVersion)); + if (readyAt.isEmpty() || testedAt.isEmpty()) return Optional.empty(); + return readyAt.get().isAfter(testedAt.get()) ? readyAt : testedAt; } /** Complete if deployment is on pinned version, and last successful deployment, or if given versions is strictly a downgrade, and this isn't forced by a pin. */ @@ -641,8 +770,9 @@ public class DeploymentStatus { : RunList.from(job).status(RunStatus.success).asList().stream()) .filter(run -> change.platform().map(run.versions().targetPlatform()::equals).orElse(true) && change.application().map(run.versions().targetApplication()::equals).orElse(true)) - .findFirst() - .flatMap(Run::end); + .map(Run::end) + .flatMap(Optional::stream) + .min(naturalOrder()); } }; } @@ -696,4 +826,39 @@ public class DeploymentStatus { } + public static class Job { + + private final Versions versions; + private final Optional<Instant> readyAt; + private final Change change; + + public Job(Versions versions, Optional<Instant> readyAt, Change change) { + this.versions = versions; + this.readyAt = readyAt; + this.change = change; + } + + public Versions versions() { + return versions; + } + + public Optional<Instant> readyAt() { + return readyAt; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Job job = (Job) o; + return versions.equals(job.versions) && readyAt.equals(job.readyAt) && change.equals(job.change); + } + + @Override + public int hashCode() { + return Objects.hash(versions, readyAt, change); + } + + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 9790ece421e..b944593bbab 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -97,7 +97,7 @@ public class DeploymentTrigger { && status.instanceSteps().get(instanceName) .readyAt(outstanding) .map(readyAt -> ! readyAt.isAfter(clock.instant())).orElse(false) - && acceptNewApplicationVersion(status, instanceName)) { + && acceptNewApplicationVersion(status, instanceName, outstanding.application().get())) { application = application.with(instanceName, instance -> withRemainingChange(instance, instance.change().with(outstanding.application().get()), status)); } @@ -198,11 +198,12 @@ public class DeploymentTrigger { DeploymentStatus status = jobs.deploymentStatus(application); Versions versions = Versions.from(instance.change(), application, status.deploymentFor(job), controller.readSystemVersion()); - Map<JobId, List<Versions>> jobs = status.testJobs(Map.of(job, versions)); + DeploymentStatus.Job toTrigger = new DeploymentStatus.Job(versions, Optional.of(controller.clock().instant()), instance.change()); + Map<JobId, List<DeploymentStatus.Job>> jobs = status.testJobs(Map.of(job, List.of(toTrigger))); if (jobs.isEmpty() || ! requireTests) - jobs = Map.of(job, List.of(versions)); + jobs = Map.of(job, List.of(toTrigger)); jobs.forEach((jobId, versionsList) -> { - trigger(deploymentJob(instance, versionsList.get(0), jobId.type(), status.jobs().get(jobId).get(), clock.instant())); + trigger(deploymentJob(instance, versionsList.get(0).versions(), jobId.type(), status.jobs().get(jobId).get(), clock.instant())); }); return List.copyOf(jobs.keySet()); } @@ -321,19 +322,18 @@ public class DeploymentTrigger { /** Finds the next step to trigger for the given application, if any, and returns these as a list. */ private List<Job> computeReadyJobs(DeploymentStatus status) { List<Job> jobs = new ArrayList<>(); - status.jobsToRun().forEach((job, versionsList) -> { - for (Versions versions : versionsList) - status.jobSteps().get(job).readyAt(status.application().require(job.application().instance()).change()) - .filter(readyAt -> ! clock.instant().isBefore(readyAt)) - .filter(__ -> ! (job.type().isProduction() && isUnhealthyInAnotherZone(status.application(), job))) - .filter(__ -> abortIfRunning(versionsList, status.jobs().get(job).get())) // Abort and trigger this later if running with outdated parameters. - .ifPresent(readyAt -> { - jobs.add(deploymentJob(status.application().require(job.application().instance()), - versions, - job.type(), - status.instanceJobs(job.application().instance()).get(job.type()), - readyAt)); - }); + Map<JobId, List<DeploymentStatus.Job>> jobsToRun = status.jobsToRun(); + jobsToRun.forEach((job, versionsList) -> { + versionsList.get(0).readyAt() + .filter(readyAt -> ! clock.instant().isBefore(readyAt)) + .filter(__ -> ! (job.type().isProduction() && isUnhealthyInAnotherZone(status.application(), job))) + .filter(__ -> abortIfRunning(status, jobsToRun, job)) // Abort and trigger this later if running with outdated parameters. + .map(readyAt -> deploymentJob(status.application().require(job.application().instance()), + versionsList.get(0).versions(), + job.type(), + status.instanceJobs(job.application().instance()).get(job.type()), + readyAt)) + .ifPresent(jobs::add); }); return Collections.unmodifiableList(jobs); } @@ -348,29 +348,50 @@ public class DeploymentTrigger { return false; } - /** Returns whether the job is not running, and also aborts it if it's running with outdated versions. */ - private boolean abortIfRunning(List<Versions> versionsList, JobStatus status) { - if ( ! status.isRunning()) - return true; + private void abortIfOutdated(DeploymentStatus status, Map<JobId, List<DeploymentStatus.Job>> jobs, JobId job) { + status.jobs().get(job) + .flatMap(JobStatus::lastTriggered) + .filter(last -> ! last.hasEnded()) + .ifPresent(last -> { + if (jobs.get(job).stream().noneMatch(versions -> versions.versions().targetsMatch(last.versions()) + && versions.versions().sourcesMatchIfPresent(last.versions()))) { + log.log(Level.INFO, "Aborting outdated run " + last); + controller.jobController().abort(last.id()); + } + }); + } - Run last = status.lastTriggered().get(); - if (versionsList.stream().noneMatch(versions -> versions.targetsMatch(last.versions()) - && versions.sourcesMatchIfPresent(last.versions()))) - controller.jobController().abort(last.id()); + /** Returns whether the job is free to start, and also aborts it if it's running with outdated versions. */ + private boolean abortIfRunning(DeploymentStatus status, Map<JobId, List<DeploymentStatus.Job>> jobs, JobId job) { + abortIfOutdated(status, jobs, job); + boolean blocked = status.jobs().get(job).get().isRunning(); + + if ( ! job.type().isTest()) { + Optional<JobStatus> productionTest = JobType.testFrom(controller.system(), job.type().zone(controller.system()).region()) + .map(type -> new JobId(job.application(), type)) + .flatMap(status.jobs()::get); + if (productionTest.isPresent()) { + abortIfOutdated(status, jobs, productionTest.get().id()); + // Production deployments are also blocked by their declared tests, if the next versions to run + // for those are not the same as the versions we're considering running in the deployment job now. + if (productionTest.map(JobStatus::id).map(jobs::get) + .map(versions -> ! versions.get(0).versions().targetsMatch(jobs.get(job).get(0).versions())) + .orElse(false)) + blocked = true; + } + } - return false; + return ! blocked; } // ---------- Change management o_O ---------- - private boolean acceptNewApplicationVersion(DeploymentStatus status, InstanceName instance) { + private boolean acceptNewApplicationVersion(DeploymentStatus status, InstanceName instance, ApplicationVersion version) { if (status.application().deploymentSpec().instance(instance).isEmpty()) return false; // Unknown instance. - if (status.hasFailures()) return true; // Allow changes to fix upgrade or previous revision problems. + if (status.hasFailures(version)) return true; // Allow changes to fix upgrade or previous revision problems. DeploymentInstanceSpec spec = status.application().deploymentSpec().requireInstance(instance); Change change = status.application().require(instance).change(); - if (change.platform().isPresent() && spec.upgradeRollout() == DeploymentSpec.UpgradeRollout.separate) return false; - if (change.application().isPresent() && spec.upgradeRevision() == DeploymentSpec.UpgradeRevision.separate) return false; - return true; + return change.application().isEmpty() || spec.upgradeRevision() != DeploymentSpec.UpgradeRevision.separate; } private Instance withRemainingChange(Instance instance, Change change, DeploymentStatus status) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java index 5021d6b2076..fe9c6e6f3d6 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java @@ -12,6 +12,7 @@ import java.time.Instant; import java.util.Collection; import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.function.Function; import java.util.function.Predicate; @@ -51,6 +52,10 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> { return matching(job -> job.lastCompleted().isPresent() && ! job.isSuccess()); } + public JobList outOfTestCapacity() { + return matching(job -> job.isOutOfCapacity() && job.id().type().environment().isTest()); + } + public JobList running() { return matching(job -> job.isRunning()); } @@ -76,8 +81,13 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> { } /** Returns the subset of jobs run for the given instance. */ - public JobList instance(InstanceName instance) { - return matching(job -> job.id().application().instance().equals(instance)); + public JobList instance(InstanceName... instances) { + return instance(Set.of(instances)); + } + + /** Returns the subset of jobs run for the given instance. */ + public JobList instance(Collection<InstanceName> instances) { + return matching(job -> instances.contains(job.id().application().instance())); } /** Returns the subset of jobs of which are production jobs. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java index 925bd500199..00cd4bd5c6c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java @@ -26,7 +26,7 @@ public class RunList extends AbstractFilteringList<Run, RunList> { return from(job.runs().descendingMap().values()); } - /** Returns the jobs with runs matching the given versions — targets only for system test, everything present otherwise. */ + /** Returns the jobs with runs matching the given versions — targets only for system test, everything present otherwise. */ public RunList on(Versions versions) { return matching(run -> matchingVersions(run, versions)); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index 30bd040a682..3244779abb7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -256,7 +256,7 @@ class JobControllerApiHandlerHelper { responseObject.setString("application", id.application().value()); application.projectId().ifPresent(projectId -> responseObject.setLong("projectId", projectId)); - Map<JobId, List<Versions>> jobsToRun = status.jobsToRun(); + Map<JobId, List<DeploymentStatus.Job>> jobsToRun = status.jobsToRun(); Cursor stepsArray = responseObject.setArray("steps"); VersionStatus versionStatus = controller.readVersionStatus(); for (DeploymentStatus.StepStatus stepStatus : status.allSteps()) { @@ -330,17 +330,17 @@ class JobControllerApiHandlerHelper { JobStatus jobStatus = status.jobs().get(job).get(); Cursor toRunArray = stepObject.setArray("toRun"); - for (Versions versions : jobsToRun.getOrDefault(job, List.of())) { + for (DeploymentStatus.Job versions : jobsToRun.getOrDefault(job, List.of())) { boolean running = jobStatus.lastTriggered() .map(run -> jobStatus.isRunning() - && versions.targetsMatch(run.versions()) - && (job.type().isProduction() || versions.sourcesMatchIfPresent(run.versions()))) + && versions.versions().targetsMatch(run.versions()) + && (job.type().isProduction() || versions.versions().sourcesMatchIfPresent(run.versions()))) .orElse(false); if (running) continue; // Run will be contained in the "runs" array. Cursor runObject = toRunArray.addObject(); - toSlime(runObject.setObject("versions"), versions); + toSlime(runObject.setObject("versions"), versions.versions()); } toSlime(stepObject.setArray("runs"), jobStatus.runs().descendingMap().values(), 10, baseUriForJob); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java index 5a6d929b054..158cc6caede 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java @@ -185,6 +185,7 @@ public class DeploymentApiHandler extends ThreadedHttpRequestHandler { .ifPresent(until -> jobObject.setLong("coolingDownUntil", until.toEpochMilli())); if (jobsToRun.containsKey(job)) { List<Versions> versionsOnThisPlatform = jobsToRun.get(job).stream() + .map(DeploymentStatus.Job::versions) .filter(versions -> versions.targetPlatform().equals(statistics.version())) .collect(Collectors.toList()); if ( ! versionsOnThisPlatform.isEmpty()) diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java index 699721b128c..9c5f7e3376a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java @@ -164,14 +164,13 @@ public class DeploymentContext { } - /** Completely deploy the latest change */ + /** Completely deploy the current change */ public DeploymentContext deploy() { Application application = application(); assertTrue("Application package submitted", application.latestVersion().isPresent()); assertFalse("Submission is not already deployed", application.instances().values().stream() .anyMatch(instance -> instance.deployments().values().stream() .anyMatch(deployment -> deployment.applicationVersion().equals(lastSubmission)))); - assertEquals(application.latestVersion(), instance().change().application()); completeRollout(application.deploymentSpec().instances().size() > 1); for (var instance : application().instances().values()) { assertFalse(instance.change().hasTargets()); @@ -334,20 +333,20 @@ public class DeploymentContext { /** Runs and returns all remaining jobs for the application, at most once, and asserts the current change is rolled out. */ public DeploymentContext completeRollout(boolean multiInstance) { triggerJobs(); - Map<ApplicationId, Set<JobType>> jobsByInstance = new HashMap<>(); + Map<ApplicationId, Map<JobType, Versions>> jobsByInstance = new HashMap<>(); List<Run> activeRuns; while ( ! (activeRuns = this.jobs.active(applicationId)).isEmpty()) for (Run run : activeRuns) { - Set<JobType> jobs = jobsByInstance.computeIfAbsent(run.id().application(), k -> new HashSet<>()); - if (jobs.add(run.id().type())) { - runJob(run.id().type(), run.id().application()); - if (multiInstance) { - tester.outstandingChangeDeployer().run(); - } - triggerJobs(); - } else { - throw new AssertionError("Job '" + run.id() + "' was run twice"); + Map<JobType, Versions> jobs = jobsByInstance.computeIfAbsent(run.id().application(), k -> new HashMap<>()); + Versions previous = jobs.put(run.id().type(), run.versions()); + if (run.versions().equals(previous)) { + throw new AssertionError("Job '" + run.id() + "' was run twice on same versions"); } + runJob(run.id().type(), run.id().application()); + if (multiInstance) { + tester.outstandingChangeDeployer().run(); + } + triggerJobs(); } assertFalse("Change should have no targets, but was " + instance().change(), instance().change().hasTargets()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index cd1e12312a8..0ac1691829a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -189,9 +189,10 @@ public class DeploymentTriggerTest { tester.controllerTester().upgradeSystem(new Version("8.9")); tester.upgrader().maintain(); app.runJob(systemTest).runJob(stagingTest); + tester.clock().advance(Duration.ofMinutes(1)); tester.triggerJobs(); - // Jobs are not aborted when the new submission remains outstanding. + // Upgrade is allowed to proceed ahead of revision change, and is not aborted. app.submit(); app.runJob(systemTest).runJob(stagingTest); tester.triggerJobs(); @@ -381,17 +382,15 @@ public class DeploymentTriggerTest { // Application on (6.1, 1.0.1) Version v1 = Version.fromString("6.1"); - // Application is mid-upgrade when block window begins, and has an outstanding change. + // Application is mid-upgrade when block window begins, and gets an outstanding change. Version v2 = Version.fromString("6.2"); tester.controllerTester().upgradeSystem(v2); tester.upgrader().maintain(); - app.submit(applicationPackage); - app.runJob(stagingTest).runJob(systemTest); // Entering block window will keep the outstanding change in place. tester.clock().advance(Duration.ofHours(1)); - tester.outstandingChangeDeployer().run(); + app.submit(applicationPackage); app.runJob(productionUsWest1); assertEquals(1, app.instanceJobs().get(productionUsWest1).lastSuccess().get().versions().targetApplication().buildNumber().getAsLong()); assertEquals(2, app.deploymentStatus().outstandingChange(app.instance().name()).application().get().buildNumber().getAsLong()); @@ -599,19 +598,23 @@ public class DeploymentTriggerTest { // Change has a higher application version than what is deployed -- deployment should trigger. app1.timeOutUpgrade(productionUsCentral1); - assertEquals(version2, app1.instance().deployments().get(productionUsCentral1.zone(main)).version()); + assertEquals(version2, app1.deployment(productionUsCentral1.zone(main)).version()); assertEquals(revision2, app1.deployment(productionUsCentral1.zone(main)).applicationVersion()); // Change is again strictly dominated, and us-central-1 is skipped, even though it is still failing. - tester.clock().advance(Duration.ofHours(2).plus(Duration.ofSeconds(1))); // Enough time for retry + tester.clock().advance(Duration.ofHours(3)); // Enough time for retry tester.triggerJobs(); // Failing job is not retried as change has been deployed app1.assertNotRunning(productionUsCentral1); // Last job has a different deployment target, so tests need to run again. - app1.runJob(systemTest).runJob(stagingTest).runJob(productionEuWest1); - assertFalse(app1.instance().change().hasTargets()); - assertFalse(app1.instanceJobs().get(productionUsCentral1).isSuccess()); + app1.runJob(systemTest) + .runJob(stagingTest) // Eager test of outstanding change, assuming upgrade in west succeeds. + .runJob(productionEuWest1) // Upgrade completes, and revision is the only change. + .runJob(productionUsCentral1) // With only revision change, central should run to cover a previous failure. + .runJob(productionEuWest1); // Finally, west changes revision. + assertEquals(Change.empty(), app1.instance().change()); + assertEquals(Optional.of(RunStatus.success), app1.instanceJobs().get(productionUsCentral1).lastStatus()); } @Test @@ -1168,48 +1171,65 @@ public class DeploymentTriggerTest { assertEquals(Change.empty(), app2.instance().change()); assertEquals(Change.empty(), app3.instance().change()); - // Upgrade instance 1; a failure in any instance allows an application change to accompany the upgrade. + // Upgrade instance 1; upgrade rolls out first, with revision following. // The new platform won't roll out to the conservative instance until the normal one is upgraded. - app2.failDeployment(systemTest); app1.submit(applicationPackage); assertEquals(Change.of(version).with(app1.application().latestVersion().get()), app1.instance().change()); + // Upgrade platform. app2.runJob(systemTest); - app1.jobAborted(stagingTest) - .runJob(stagingTest) + app1.runJob(stagingTest) .runJob(productionUsWest1) .runJob(productionUsEast3); - app1.runJob(stagingTest); // Tests with only the outstanding application change. - app2.runJob(systemTest); // Tests with only the outstanding application change. + // Upgrade revision + tester.clock().advance(Duration.ofSeconds(1)); // Ensure we see revision as rolling after upgrade. + app2.runJob(systemTest); // R + app1.runJob(stagingTest) // R + .runJob(productionUsWest1); // R + // productionUsEast3 won't change revision before its production test has completed for the upgrade, which is one of the last jobs! tester.clock().advance(Duration.ofHours(2)); app1.runJob(productionEuWest1); tester.clock().advance(Duration.ofHours(1)); app1.runJob(productionAwsUsEast1a); - tester.triggerJobs(); app1.runJob(testAwsUsEast1a); + tester.clock().advance(Duration.ofSeconds(1)); + app1.runJob(productionAwsUsEast1a); // R + app1.runJob(testAwsUsEast1a); // R app1.runJob(productionApNortheast2); app1.runJob(productionApNortheast1); tester.clock().advance(Duration.ofHours(1)); app1.runJob(testApNortheast1); app1.runJob(testApNortheast2); + app1.runJob(productionApNortheast2); // R + app1.runJob(productionApNortheast1); // R app1.runJob(testUsEast3); app1.runJob(productionApSoutheast1); + tester.clock().advance(Duration.ofSeconds(1)); + app1.runJob(productionUsEast3); // R + tester.clock().advance(Duration.ofHours(2)); + app1.runJob(productionEuWest1); // R + tester.clock().advance(Duration.ofMinutes(330)); + app1.runJob(testApNortheast1); // R + app1.runJob(testApNortheast2); // R + app1.runJob(testUsEast3); // R + app1.runJob(productionApSoutheast1); // R + + app1.runJob(stagingTest); // Tests with only the outstanding application change. + app2.runJob(systemTest); // Tests with only the outstanding application change. // Confidence rises to high, for the new version, and instance 2 starts to upgrade. tester.controllerTester().computeVersionStatus(); tester.upgrader().maintain(); tester.outstandingChangeDeployer().run(); tester.triggerJobs(); - assertEquals(2, tester.jobs().active().size()); + assertEquals(tester.jobs().active().toString(), 1, tester.jobs().active().size()); assertEquals(Change.empty(), app1.instance().change()); assertEquals(Change.of(version), app2.instance().change()); assertEquals(Change.empty(), app3.instance().change()); - app1.runJob(stagingTest); // Never completed successfully with just the upgrade. - app2.runJob(systemTest) // Never completed successfully with just the upgrade. - .runJob(productionEuWest1) + app2.runJob(productionEuWest1) .failDeployment(testEuWest1); - // Instance 2 failed the last job, and now exist block window, letting application change roll out with the upgrade. + // Instance 2 failed the last job, and now exits block window, letting application change roll out with the upgrade. tester.clock().advance(Duration.ofDays(1)); // Leave block window for revisions. tester.upgrader().maintain(); tester.outstandingChangeDeployer().run(); @@ -1224,41 +1244,48 @@ public class DeploymentTriggerTest { assertEquals(Change.empty(), app2.instance().change()); assertEquals(Change.empty(), app3.instance().change()); - // Two first instances upgraded and with new revision — last instance gets change from whatever maintainer runs first. + // Two first instances upgraded and with new revision — last instance gets both changes as well. tester.upgrader().maintain(); tester.outstandingChangeDeployer().run(); - assertEquals(Change.of(version), app3.instance().change()); + assertEquals(Change.of(version).with(app1.lastSubmission().get()), app3.instance().change()); tester.deploymentTrigger().cancelChange(app3.instanceId(), ALL); tester.outstandingChangeDeployer().run(); tester.upgrader().maintain(); - assertEquals(Change.of(app1.application().latestVersion().get()), app3.instance().change()); + assertEquals(Change.of(app1.lastSubmission().get()), app3.instance().change()); app3.runJob(productionEuWest1); tester.upgrader().maintain(); + app1.runJob(stagingTest); app3.runJob(productionEuWest1); tester.triggerJobs(); assertEquals(List.of(), tester.jobs().active()); assertEquals(Change.empty(), app3.instance().change()); } + // TODO test new revision allowed exactly when dependent instance is failing + // TODO test multi-tier pipeline with various policies + // TODO test multi-tier pipeline with upgrade after new version is the candidate + @Test - public void testChangeCompletion() { + public void testRevisionJoinsUpgradeWithSeparateRollout() { var app = tester.newDeploymentContext().submit().deploy(); var version = new Version("7.1"); tester.controllerTester().upgradeSystem(version); tester.upgrader().maintain(); app.runJob(systemTest).runJob(stagingTest).runJob(productionUsCentral1); + tester.clock().advance(Duration.ofMinutes(1)); app.submit(); - tester.triggerJobs(); - tester.outstandingChangeDeployer().run(); - assertEquals(Change.of(version), app.instance().change()); + assertEquals(Change.of(version).with(app.lastSubmission().get()), app.instance().change()); + app.runJob(systemTest).runJob(stagingTest).runJob(productionUsCentral1); app.runJob(productionUsEast3).runJob(productionUsWest1); tester.triggerJobs(); - tester.outstandingChangeDeployer().run(); assertEquals(Change.of(app.lastSubmission().get()), app.instance().change()); + + app.runJob(productionUsEast3).runJob(productionUsWest1); + assertEquals(Change.empty(), app.instance().change()); } @Test diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java index 8e90cfc69f3..acaa8b24d9d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision; import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackage; @@ -30,42 +31,34 @@ public class OutstandingChangeDeployerTest { OutstandingChangeDeployer deployer = new OutstandingChangeDeployer(tester.controller(), Duration.ofMinutes(10)); ApplicationPackage applicationPackage = new ApplicationPackageBuilder() .region("us-west-1") + .upgradeRevision("separate") .build(); - var app1 = tester.newDeploymentContext("tenant", "app1", "default").submit(applicationPackage).deploy(); + var app = tester.newDeploymentContext().submit(applicationPackage).deploy(); Version version = new Version(6, 2); - tester.deploymentTrigger().triggerChange(app1.instanceId(), Change.of(version)); - tester.deploymentTrigger().triggerReadyJobs(); + tester.deploymentTrigger().triggerChange(app.instanceId(), Change.of(version)); + assertEquals(Change.of(version), app.instance().change()); + assertFalse(app.deploymentStatus().outstandingChange(app.instance().name()).hasTargets()); - assertEquals(Change.of(version), app1.instance().change()); - assertFalse(app1.deploymentStatus().outstandingChange(app1.instance().name()).hasTargets()); + app.submit(applicationPackage); + Optional<ApplicationVersion> revision = app.lastSubmission(); + assertFalse(app.deploymentStatus().outstandingChange(app.instance().name()).hasTargets()); + assertEquals(Change.of(version).with(revision.get()), app.instance().change()); - assertEquals(1, app1.application().latestVersion().get().buildNumber().getAsLong()); - app1.submit(applicationPackage, Optional.of(new SourceRevision("repository1", "master", "cafed00d"))); + app.submit(applicationPackage); + Optional<ApplicationVersion> outstanding = app.lastSubmission(); + assertTrue(app.deploymentStatus().outstandingChange(app.instance().name()).hasTargets()); + assertEquals(Change.of(version).with(revision.get()), app.instance().change()); - assertTrue(app1.deploymentStatus().outstandingChange(app1.instance().name()).hasTargets()); - assertEquals("1.0.2-cafed00d", app1.deploymentStatus().outstandingChange(app1.instance().name()).application().get().id()); - app1.assertRunning(JobType.systemTest); - app1.assertRunning(JobType.stagingTest); - assertEquals(2, tester.jobs().active().size()); + tester.outstandingChangeDeployer().run(); + assertTrue(app.deploymentStatus().outstandingChange(app.instance().name()).hasTargets()); + assertEquals(Change.of(version).with(revision.get()), app.instance().change()); - deployer.maintain(); - tester.triggerJobs(); - assertEquals("No effect as job is in progress", 2, tester.jobs().active().size()); - assertEquals("1.0.2-cafed00d", app1.deploymentStatus().outstandingChange(app1.instance().name()).application().get().id()); - - app1.runJob(JobType.systemTest).runJob(JobType.stagingTest).runJob(JobType.productionUsWest1) - .runJob(JobType.stagingTest).runJob(JobType.systemTest); - assertEquals("Upgrade done", 0, tester.jobs().active().size()); - - deployer.maintain(); - tester.triggerJobs(); - assertEquals("1.0.2-cafed00d", app1.instance().change().application().get().id()); - List<Run> runs = tester.jobs().active(); - assertEquals(1, runs.size()); - app1.assertRunning(JobType.productionUsWest1); - assertFalse(app1.deploymentStatus().outstandingChange(app1.instance().name()).hasTargets()); + app.deploy(); + tester.outstandingChangeDeployer().run(); + assertFalse(app.deploymentStatus().outstandingChange(app.instance().name()).hasTargets()); + assertEquals(Change.of(outstanding.get()), app.instance().change()); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index f9a976fcadc..0e61ffd3ea6 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -167,7 +167,6 @@ public class UpgraderTest { // --- Failing application is repaired by changing the application, causing confidence to move above 'high' threshold // Deploy application change default0.submit(applicationPackage("default")); - default0.triggerJobs().jobAborted(stagingTest); default0.deploy(); tester.controllerTester().computeVersionStatus(); @@ -570,12 +569,12 @@ public class UpgraderTest { // A new revision is submitted and starts rolling out. app.submit(applicationPackage); - // production-us-central-1 isn't triggered, as the revision + platform is the new change to roll out. + // production-us-central-1 is re-triggered with upgrade until revision catches up. tester.triggerJobs(); - assertEquals(2, tester.jobs().active().size()); + assertEquals(3, tester.jobs().active().size()); app.runJob(systemTest).runJob(stagingTest).runJob(productionUsWest1); // us-central-1 has an older version, and needs a new staging test to begin. - app.runJob(stagingTest); + app.runJob(stagingTest).triggerJobs().jobAborted(productionUsCentral1); // Retry will include revision. tester.triggerJobs(); // Triggers us-central-1 before platform upgrade is cancelled. // A new version is also released, cancelling the upgrade, since it is failing on a now outdated version. @@ -799,8 +798,10 @@ public class UpgraderTest { app.instance().change().application().get().id().equals(applicationVersion)); // Deployment completes - app.runJob(systemTest).runJob(stagingTest).runJob(productionUsWest1).runJob(productionUsEast3); - assertTrue("All jobs consumed", tester.jobs().active().isEmpty()); + app.runJob(systemTest).runJob(stagingTest) + .runJob(productionUsWest1) + .runJob(productionUsEast3); + assertEquals("All jobs consumed", List.of(), tester.jobs().active()); for (Deployment deployment : app.instance().deployments().values()) { assertEquals(version, deployment.version()); |