From ea83ed12a7583335c77329be9afd415849263354 Mon Sep 17 00:00:00 2001 From: Jon Marius Venstad Date: Fri, 8 Nov 2019 12:17:16 +0100 Subject: Consider all known runs when checking whether versions are verified --- .../controller/deployment/DeploymentTrigger.java | 81 +++++++++------------- .../hosted/controller/deployment/JobList.java | 21 +++++- .../hosted/controller/deployment/RunList.java | 43 ++++++++++++ .../application/JobControllerApiHandlerHelper.java | 14 ++-- .../deployment/DeploymentTriggerTest.java | 10 +-- .../controller/maintenance/UpgraderTest.java | 15 +--- 6 files changed, 106 insertions(+), 78 deletions(-) create mode 100644 controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java (limited to 'controller-server') diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index f1b93c7b3b2..e7d70793898 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -205,8 +205,9 @@ public class DeploymentTrigger { controller.systemVersion()); String reason = "Job triggered manually by " + user; var jobStatus = jobs.deploymentStatus(application).instanceJobs(instance.name()); - return (jobType.isProduction() && ! isTested(jobStatus, versions) - ? testJobs(application.deploymentSpec(), application.change(), instance, jobStatus, versions, reason, clock.instant(), __ -> true).stream() + var jobList = JobList.from(jobStatus.values()); + return (jobType.isProduction() && ! isTested(jobList, versions) + ? testJobs(application.deploymentSpec(), application.change(), instance, jobList, versions, reason, clock.instant(), __ -> true).stream() : Stream.of(deploymentJob(instance, versions, application.change(), jobType, jobStatus.get(jobType), reason, clock.instant()))) .peek(this::trigger) .map(Job::jobType).collect(toList()); @@ -301,10 +302,11 @@ public class DeploymentTrigger { DeploymentStatus deploymentStatus = this.jobs.deploymentStatus(application); for (Instance instance : instances) { var jobStatus = deploymentStatus.instanceJobs(instance.name()); + var jobList = JobList.from(jobStatus.values()); Change change = application.change(); - Optional completedAt = max(Optional.ofNullable(jobStatus.get(systemTest)) + Optional completedAt = max(jobList.type(systemTest).first() .flatMap(job -> job.lastSuccess().map(run -> run.end().get())), - Optional.ofNullable(jobStatus.get(stagingTest)) + jobList.type(stagingTest).first() .flatMap(job -> job.lastSuccess().map(run -> run.end().get()))); String reason = "New change available"; List testJobs = null; // null means "uninitialised", while empty means "don't run any jobs". @@ -318,17 +320,14 @@ public class DeploymentTrigger { for (JobType job : remainingJobs) { Versions versions = Versions.from(change, application, deploymentFor(instance, job), controller.systemVersion()); - if (isTested(jobStatus, versions)) { - if (completedAt.isPresent() && canTrigger(job, jobStatus, versions, instance, application.deploymentSpec(), stepJobs)) { + if (isTested(jobList, versions)) { + if (completedAt.isPresent() && canTrigger(job, jobList, versions, instance, application.deploymentSpec(), stepJobs)) { jobs.add(deploymentJob(instance, versions, change, job, jobStatus.get(job), reason, completedAt.get())); } - if ( ! alreadyTriggered(jobStatus, versions) && testJobs == null) { - testJobs = emptyList(); - } } else if (testJobs == null) { testJobs = testJobs(application.deploymentSpec(), - change, instance, jobStatus, versions, + change, instance, jobList, versions, String.format("Testing deployment for %s (%s)", job.jobName(), versions.toString()), completedAt.orElseGet(clock::instant)); @@ -349,7 +348,7 @@ public class DeploymentTrigger { } } if (testJobs == null) { // If nothing to test, but outstanding commits, test those. - testJobs = testJobs(application.deploymentSpec(), change, instance, jobStatus, + testJobs = testJobs(application.deploymentSpec(), change, instance, jobList, Versions.from(application.outstandingChange().onTopOf(change), application, steps.sortedDeployments(instance.productionDeployments().values()).stream().findFirst(), @@ -363,21 +362,22 @@ public class DeploymentTrigger { } /** Returns whether given job should be triggered */ - private boolean canTrigger(JobType job, Map status, Versions versions, Instance instance, DeploymentSpec deploymentSpec, List parallelJobs) { - if (status.get(job).isRunning()) return false; + private boolean canTrigger(JobType type, JobList jobList, Versions versions, Instance instance, DeploymentSpec deploymentSpec, List parallelJobs) { + if ( ! jobList.type(type).running().isEmpty()) return false; // Are we already running jobs which are not in the set which can run in parallel with this? - if (parallelJobs != null && ! parallelJobs.containsAll(runningProductionJobs(status))) return false; + if ( parallelJobs != null + && ! parallelJobs.containsAll(jobList.running().production().mapToList(job -> job.id().type()))) return false; // Are there another suspended deployment such that we shouldn't simultaneously change this? - if (job.isProduction() && isSuspendedInAnotherZone(instance, job.zone(controller.system()))) return false; + if (type.isProduction() && isSuspendedInAnotherZone(instance, type.zone(controller.system()))) return false; - return triggerAt(clock.instant(), job, status.get(job), versions, instance, deploymentSpec); + return triggerAt(clock.instant(), type, jobList.type(type).first().get(), versions, instance, deploymentSpec); } /** Returns whether given job should be triggered */ - private boolean canTrigger(JobType job, Map status, Versions versions, Instance instance, DeploymentSpec deploymentSpec) { - return canTrigger(job, status, versions, instance, deploymentSpec, null); + private boolean canTrigger(JobType job, JobList jobList, Versions versions, Instance instance, DeploymentSpec deploymentSpec) { + return canTrigger(job, jobList, versions, instance, deploymentSpec, null); } private boolean isSuspendedInAnotherZone(Instance instance, ZoneId zone) { @@ -466,28 +466,10 @@ public class DeploymentTrigger { return change.downgrades(deployment.version()) || change.downgrades(deployment.applicationVersion()); } - private boolean isTested(Map status, Versions versions) { - return testedIn(systemTest, status.get(systemTest), versions) - && testedIn(stagingTest, status.get(stagingTest), versions) - || alreadyTriggered(status, versions); - } - - public boolean testedIn(JobType testType, JobStatus status, Versions versions) { - if (testType == systemTest) - return successOn(status, versions).isPresent(); - if (testType == stagingTest) - return successOn(status, versions).map(Run::versions).filter(versions::sourcesMatchIfPresent).isPresent(); - throw new IllegalArgumentException(testType + " is not a test job!"); - } - - public boolean alreadyTriggered(Map status, Versions versions) { - return status.values().stream() - .filter(job -> job.id().type().isProduction()) - .anyMatch(job -> job.lastTriggered() - .map(Run::versions) - .filter(versions::targetsMatch) - .filter(versions::sourcesMatchIfPresent) - .isPresent()); + public boolean isTested(JobList jobs, Versions versions) { + return ! jobs.type(systemTest).successOn(versions).isEmpty() + && ! jobs.type(stagingTest).successOn(versions).isEmpty() + || ! jobs.production().triggeredOn(versions).isEmpty(); } // ---------- Change management o_O ---------- @@ -527,23 +509,22 @@ public class DeploymentTrigger { /** * Returns the list of test jobs that should run now, and that need to succeed on the given versions for it to be considered tested. */ - private List testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, Map status, Versions versions, + private List testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, JobList jobList, Versions versions, String reason, Instant availableSince) { - return testJobs(deploymentSpec, change, instance, status, versions, reason, availableSince, - jobType -> canTrigger(jobType, status, versions, instance, deploymentSpec)); + return testJobs(deploymentSpec, change, instance, jobList, versions, reason, availableSince, + jobType -> canTrigger(jobType, jobList, versions, instance, deploymentSpec)); } /** * Returns the list of test jobs that need to succeed on the given versions for it to be considered tested, filtered by the given condition. */ - private List testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, Map status, Versions versions, + private List testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, JobList jobList, Versions versions, String reason, Instant availableSince, Predicate condition) { List jobs = new ArrayList<>(); for (JobType jobType : new DeploymentSteps(deploymentSpec.requireInstance(instance.name()), controller::system).testJobs()) { // TODO jonmv: Allow cross-instance validation - Optional completion = successOn(status.get(jobType), versions) - .filter(run -> versions.sourcesMatchIfPresent(run.versions()) || jobType == systemTest); - if (completion.isEmpty() && condition.test(jobType)) - jobs.add(deploymentJob(instance, versions, change, jobType, status.get(jobType), reason, availableSince)); + if ( jobList.type(jobType).successOn(versions).isEmpty() + && condition.test(jobType)) + jobs.add(deploymentJob(instance, versions, change, jobType, jobList.type(jobType).first().get(), reason, availableSince)); } return jobs; } @@ -552,8 +533,8 @@ public class DeploymentTrigger { if (jobStatus.isOutOfCapacity()) reason += "; retrying on out of capacity"; var triggering = JobRun.triggering(versions.targetPlatform(), versions.targetApplication(), - versions.sourcePlatform(), versions.sourceApplication(), - reason, clock.instant()); + versions.sourcePlatform(), versions.sourceApplication(), + reason, clock.instant()); return new Job(instance, triggering, jobType, availableSince, jobStatus.isOutOfCapacity(), change.application().isPresent()); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java index 1ef83153bef..56a6bec5777 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java @@ -44,6 +44,10 @@ public class JobList extends AbstractFilteringList { return matching(job -> ! job.isSuccess()); } + public JobList running() { + return matching(job -> job.isRunning()); + } + /** Returns the subset of jobs which must be failing due to an application change */ public JobList failingApplicationChange() { return matching(JobList::failingApplicationChange); @@ -54,9 +58,14 @@ public class JobList extends AbstractFilteringList { return matching(job -> job.lastStatus().map(status::equals).orElse(false)); } + /** Returns the subset of jobs of the given type -- most useful when negated */ + public JobList type(Collection types) { + return matching(job -> types.contains(job.id().type())); + } + /** Returns the subset of jobs of the given type -- most useful when negated */ public JobList type(JobType... types) { - return matching(job -> List.of(types).contains(job.id().type())); + return type(List.of(types)); } /** Returns the subset of jobs of which are production jobs */ @@ -64,6 +73,16 @@ public class JobList extends AbstractFilteringList { return matching(job -> job.id().type().isProduction()); } + /** Returns the jobs with any runs matching the given versions — targets only for system test, everything present otherwise. */ + public JobList triggeredOn(Versions versions) { + return matching(job -> ! RunList.from(job).on(versions).isEmpty()); + } + + /** Returns the jobs with successful runs matching the given versions — targets only for system test, everything present otherwise. */ + public JobList successOn(Versions versions) { + return matching(job -> ! RunList.from(job).status(RunStatus.success).on(versions).isEmpty()); + } + // ----------------------------------- JobRun filtering /** Returns the list in a state where the next filter is for the lastTriggered run type */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java new file mode 100644 index 00000000000..dc17e1c17bb --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunList.java @@ -0,0 +1,43 @@ +package com.yahoo.vespa.hosted.controller.deployment; + +import com.yahoo.collections.AbstractFilteringList; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; + +import java.util.Collection; +import java.util.List; + +/** + * List for filtering deployment job {@link Run}s. + * + * @author jonmv + */ +public class RunList extends AbstractFilteringList { + + private RunList(Collection items, boolean negate) { + super(items, negate, RunList::new); + } + + public static RunList from(Collection runs) { + return new RunList(runs, false); + } + + public static RunList from(JobStatus job) { + return from(job.runs().descendingMap().values()); + } + + /** Returns the jobs with runs matching the given versions — targets only for system test, everything present otherwise. */ + public RunList on(Versions versions) { + return matching(run -> matchingVersions(run, versions)); + } + + /** Returns the runs with status among the given. */ + public RunList status(RunStatus... status) { + return matching(run -> List.of(status).contains(run.status())); + } + + private static boolean matchingVersions(Run run, Versions versions) { + return versions.targetsMatch(run.versions()) + && (versions.sourcesMatchIfPresent(run.versions()) || run.id().type() == JobType.systemTest); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index 9a9a9798c6d..ed9612a7343 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -25,8 +25,10 @@ import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.deployment.DeploymentSteps; import com.yahoo.vespa.hosted.controller.deployment.JobController; +import com.yahoo.vespa.hosted.controller.deployment.JobList; import com.yahoo.vespa.hosted.controller.deployment.JobStatus; import com.yahoo.vespa.hosted.controller.deployment.Run; +import com.yahoo.vespa.hosted.controller.deployment.RunList; import com.yahoo.vespa.hosted.controller.deployment.RunLog; import com.yahoo.vespa.hosted.controller.deployment.RunStatus; import com.yahoo.vespa.hosted.controller.deployment.Step; @@ -239,11 +241,12 @@ class JobControllerApiHandlerHelper { jobObject.setLong("pausedUntil", until))); int runs = 0; Cursor runArray = jobObject.setArray("runs"); + JobList jobList = JobList.from(status.values()); if (type.isTest()) { Deque> pending = new ArrayDeque<>(); pendingProduction.entrySet().stream() - .filter(typeVersions -> ! controller.applications().deploymentTrigger().testedIn(type, status.get(type), typeVersions.getValue())) - .filter(typeVersions -> ! controller.applications().deploymentTrigger().alreadyTriggered(status, typeVersions.getValue())) + .filter(typeVersions -> jobList.type(type).successOn(typeVersions.getValue()).isEmpty()) + .filter(typeVersions -> jobList.production().triggeredOn(typeVersions.getValue()).isEmpty()) .collect(groupingBy(Map.Entry::getValue, LinkedHashMap::new, Collectors.mapping(Map.Entry::getKey, toList()))) @@ -279,12 +282,13 @@ class JobControllerApiHandlerHelper { pendingObject.setString("cooldown", "failed"); else { int pending = 0; - if ( ! controller.applications().deploymentTrigger().alreadyTriggered(status, versions)) { - if ( ! controller.applications().deploymentTrigger().testedIn(systemTest, status.get(systemTest), versions)) { + controller.applications().deploymentTrigger(); + if (jobList.production().triggeredOn(versions).isEmpty()) { + if (jobList.type(systemTest).successOn(versions).isEmpty()) { pending++; pendingObject.setString(shortNameOf(systemTest, controller.system()), statusOf(controller, instance.id(), systemTest, versions)); } - if ( ! controller.applications().deploymentTrigger().testedIn(stagingTest, status.get(stagingTest), versions)) { + if (jobList.type(stagingTest).successOn(versions).isEmpty()) { pending++; pendingObject.setString(shortNameOf(stagingTest, controller.system()), statusOf(controller, instance.id(), stagingTest, versions)); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 8fb766164c2..9d17acc6a56 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -504,7 +504,7 @@ public class DeploymentTriggerTest { Version version2 = Version.fromString("7.2"); tester.controllerTester().upgradeSystem(version2); tester.upgrader().maintain(); - app1.runJob(systemTest).runJob(stagingTest) // tests for previous version. + app1.runJob(systemTest).runJob(stagingTest) // tests for previous version — these are "reused" later. .runJob(systemTest).runJob(stagingTest).timeOutConvergence(productionUsCentral1); assertEquals(version2, app1.deployment(productionUsCentral1.zone(main)).version()); Instant triggered = app1.instance().deploymentJobs().jobStatus().get(productionUsCentral1).lastTriggered().get().at(); @@ -517,18 +517,12 @@ public class DeploymentTriggerTest { assertEquals("Change becomes latest non-broken version", Change.of(version1), app1.application().change()); // version1 proceeds 'til the last job, where it fails; us-central-1 is skipped, as current change is strictly dominated by what's deployed there. - app1.runJob(systemTest) - .runJob(stagingTest) - .failDeployment(productionEuWest1); + app1.failDeployment(productionEuWest1); assertEquals(triggered, app1.instance().deploymentJobs().jobStatus().get(productionUsCentral1).lastTriggered().get().at()); - // Eagerly triggered system and staging tests complete. - app1.runJob(systemTest).runJob(stagingTest); - // Roll out a new application version, which gives a dual change -- this should trigger us-central-1, but only as long as it hasn't yet deployed there. ApplicationVersion revision1 = app1.lastSubmission().get(); app1.submit(applicationPackage); - app1.jobAborted(productionEuWest1); ApplicationVersion revision2 = app1.lastSubmission().get(); app1.runJob(systemTest).runJob(stagingTest); assertEquals(Change.of(version1).with(revision2), app1.application().change()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index 36039c47025..880d74b0da6 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -578,7 +578,6 @@ public class UpgraderTest { // us-central-1 succeeds upgrade to 6.3, with the revision, but us-east-3 wants to proceed with only the revision change. app.runJob(productionUsCentral1); app.runJob(systemTest).runJob(stagingTest).runJob(productionUsEast3); - app.runJob(systemTest).runJob(stagingTest); // Test last changes outside prod. assertEquals(List.of(), tester.jobs().active()); assertEquals(new Version("6.3"), app.deployment(ZoneId.from("prod", "us-central-1")).version()); @@ -936,7 +935,7 @@ public class UpgraderTest { // Application downgrades to pinned version. tester.abortAll(); - context.runJob(systemTest).runJob(stagingTest).runJob(productionUsCentral1); + context.runJob(stagingTest).runJob(productionUsCentral1); assertTrue(context.application().change().hasTargets()); context.runJob(productionUsWest1); // us-east-3 never upgraded, so no downgrade is needed. assertFalse(context.application().change().hasTargets()); @@ -1034,21 +1033,9 @@ public class UpgraderTest { assertEquals(v2, application.deployment(ZoneId.from("prod", "us-west-1")).version()); assertEquals(v1, application.deployment(ZoneId.from("prod", "us-east-3")).version()); - // Need to test v2->v3 combination again before upgrading second deployment (not really, but this is a limitation of lastSuccess) - tester.triggerJobs(); - assertEquals(v2, application.instance().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().sourcePlatform().get()); - assertEquals(v3, application.instance().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform()); - application.runJob(stagingTest); - // Second deployment upgrades application.runJob(productionUsWest1); - // ... now we have to test v1->v3 again :( - tester.triggerJobs(); - assertEquals(v1, application.instance().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().sourcePlatform().get()); - assertEquals(v3, application.instance().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform()); - application.runJob(stagingTest); - // Upgrade completes application.runJob(productionUsEast3); assertTrue("Upgrade complete", application.application().change().isEmpty()); -- cgit v1.2.3