diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2022-05-06 10:12:38 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-06 10:12:38 +0200 |
commit | eed0a45c502785321272fb5e5166b0c188daef04 (patch) | |
tree | 48ac5618c489f8b44d9d0b0767aa16183803dd43 /controller-server | |
parent | e0b3120c5c5b96612d899c4fe38b625f6f3aa13c (diff) | |
parent | 84bf960f5233c9a9e9e78a7a1caa48cbcb1f1da9 (diff) |
Merge pull request #22471 from vespa-engine/jonmv/fix-dep-orch-hiccup-for-tests-only-instances
Jonmv/fix dep orch hiccup for tests only instances
Diffstat (limited to 'controller-server')
5 files changed, 167 insertions, 28 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index cc7031bab5a..52283a3e27d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -211,6 +211,11 @@ public class DeploymentStatus { /** The set of jobs that need to run for the given changes to be considered complete. */ public boolean hasCompleted(InstanceName instance, Change change) { + if ( ! application.deploymentSpec().requireInstance(instance).concerns(prod)) { + if (newestTested(instance, run -> run.versions().targetRevision()).map(change::downgrades).orElse(false)) return true; + if (newestTested(instance, run -> run.versions().targetPlatform()).map(change::downgrades).orElse(false)) return true; + } + return jobsToRun(Map.of(instance, change), false).isEmpty(); } @@ -247,6 +252,14 @@ public class DeploymentStatus { .deployments().get(job.type().zone())); } + private <T extends Comparable<T>> Optional<T> newestTested(InstanceName instance, Function<Run, T> runMapper) { + return instanceJobs().get(application.id().instance(instance)) + .type(systemTest, stagingTest) + .asList().stream().flatMap(jobs -> jobs.runs().values().stream()) + .filter(Run::hasSucceeded) + .map(runMapper) + .max(naturalOrder()); + } /** * The change to a revision which all dependencies of the given instance has completed, * which does not downgrade any deployments in the instance, @@ -263,16 +276,20 @@ public class DeploymentStatus { int nextRisk = 0; int skippedCumulativeRisk = 0; Instant readySince = now; + + Optional<RevisionId> newestRevision = application.productionDeployments() + .getOrDefault(instance, List.of()).stream() + .map(Deployment::revision).max(naturalOrder()); Change candidate = Change.empty(); for (ApplicationVersion version : application.revisions().deployable(ascending)) { // A revision is only a candidate if it upgrades, and does not downgrade, this instance. Change change = Change.of(version.id()); - if (application.productionDeployments().getOrDefault(instance, List.of()).stream() - .anyMatch(deployment -> change.downgrades(deployment.revision()))) continue; - if ( ! application.require(instance).change().revision().map(change::upgrades).orElse(true)) continue; - if (hasCompleted(instance, change)) + if ( newestRevision.isPresent() && change.downgrades(newestRevision.get()) + || ! application.require(instance).change().revision().map(change::upgrades).orElse(true) + || hasCompleted(instance, change)) { if (ascending) continue; // Keep looking for the next revision which is an upgrade, or ... else return Change.empty(); // ... if the latest is already complete, there's nothing outstanding. + } // This revision contains something new, so start aggregating the risk score. skippedCumulativeRisk += version.risk(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 4ed34a91029..9cddbd0b903 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -101,7 +101,8 @@ public class Upgrader extends ControllerMaintainer { cancelUpgradesOf(outdated.upgrading(), "Upgrading to outdated versions"); // Prefer the newest target for each instance. - remaining = remaining.not().matching(eligible.asList()::contains); + remaining = remaining.not().matching(eligible.asList()::contains) + .not().hasCompleted(Change.of(version)); for (ApplicationId id : outdated.and(eligible.not().upgrading()).not().changingRevision()) targets.put(id, version); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java index fd294f9cf9f..ae4b6259da1 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java @@ -1,13 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.deployment; -import com.google.common.base.Supplier; import com.google.common.base.Suppliers; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.AthenzService; -import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.ClusterSpec.Id; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.zone.ZoneId; @@ -20,19 +19,20 @@ import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.Instance; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException.ErrorCode; import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterCloud; import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterCloud.Status; import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterId; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.EndpointId; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackage; +import com.yahoo.vespa.hosted.controller.deployment.InternalStepRunner.Timeouts; import com.yahoo.vespa.hosted.controller.integration.ConfigServerMock; import com.yahoo.vespa.hosted.controller.maintenance.JobRunner; import com.yahoo.vespa.hosted.controller.maintenance.NameServiceDispatcher; @@ -50,9 +50,11 @@ import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Supplier; import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.failed; import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded; @@ -110,7 +112,7 @@ public class DeploymentContext { .parallel("us-west-1", "us-east-3") .emailRole("author") .emailAddress("b@a") - .build()); + .build())::get; private static final Supplier<ApplicationPackage> publicCdApplicationPackage = Suppliers.memoize(() -> new ApplicationPackageBuilder() .athenzIdentity(AthenzDomain.from("domain"), AthenzService.from("service")) @@ -119,7 +121,7 @@ public class DeploymentContext { .emailRole("author") .emailAddress("b@a") .trust(generateCertificate()) - .build()); + .build())::get; public static final SourceRevision defaultSourceRevision = new SourceRevision("repository1", "master", "commit1"); @@ -268,7 +270,7 @@ public class DeploymentContext { /** Add a routing policy for this in given zone, with status set to inactive */ public DeploymentContext addInactiveRoutingPolicy(ZoneId zone) { var clusterId = "default-inactive"; - var id = new RoutingPolicyId(instanceId, ClusterSpec.Id.from(clusterId), zone); + var id = new RoutingPolicyId(instanceId, Id.from(clusterId), zone); var policies = new LinkedHashMap<>(tester.controller().routing().policies().read(instanceId).asMap()); policies.put(id, new RoutingPolicy(id, HostName.of("lb-host"), Optional.empty(), @@ -330,7 +332,7 @@ public class DeploymentContext { /** Fail current deployment in given job */ public DeploymentContext nodeAllocationFailure(JobType type) { return failDeployment(type, - new ConfigServerException(ConfigServerException.ErrorCode.NODE_ALLOCATION_FAILURE, + new ConfigServerException(ErrorCode.NODE_ALLOCATION_FAILURE, "Node allocation failure", "Failed to deploy application")); } @@ -359,7 +361,7 @@ public class DeploymentContext { if (messagePart.isPresent()) { Optional<Step> firstFailing = run.stepStatuses().entrySet().stream() .filter(kv -> kv.getValue() == failed) - .map(Map.Entry::getKey) + .map(Entry::getKey) .findFirst(); assertTrue("Found failing step", firstFailing.isPresent()); Optional<RunLog> details = jobs.details(id); @@ -490,7 +492,7 @@ public class DeploymentContext { triggerJobs(); RunId id = currentRun(job).id(); doDeploy(job); - tester.clock().advance(InternalStepRunner.Timeouts.of(tester.controller().system()).noNodesDown().plusSeconds(1)); + tester.clock().advance(Timeouts.of(tester.controller().system()).noNodesDown().plusSeconds(1)); runner.advance(currentRun(job)); assertTrue(jobs.run(id).get().hasFailed()); assertTrue(jobs.run(id).get().hasEnded()); @@ -504,7 +506,7 @@ public class DeploymentContext { RunId id = currentRun(job).id(); doDeploy(job); doUpgrade(job); - tester.clock().advance(InternalStepRunner.Timeouts.of(tester.controller().system()).noNodesDown().plusSeconds(1)); + tester.clock().advance(Timeouts.of(tester.controller().system()).noNodesDown().plusSeconds(1)); runner.advance(currentRun(job)); assertTrue(jobs.run(id).get().hasFailed()); assertTrue(jobs.run(id).get().hasEnded()); @@ -574,15 +576,15 @@ public class DeploymentContext { tester.configServer().nodeRepository().doUpgrade(deployment, Optional.empty(), tester.configServer().application(job.application(), zone).get().version().get()); configServer().convergeServices(id.application(), zone); runner.advance(currentRun(job)); - assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installInitialReal)); + assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installInitialReal)); // All installation is complete and endpoints are ready, so setup may begin. - assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installInitialReal)); - assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installTester)); - assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.startStagingSetup)); + assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installInitialReal)); + assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installTester)); + assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.startStagingSetup)); assertEquals(unfinished, jobs.run(id).get().stepStatuses().get(Step.endStagingSetup)); - tester.cloud().set(TesterCloud.Status.SUCCESS); + tester.cloud().set(Status.SUCCESS); runner.advance(currentRun(job)); assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.endStagingSetup)); } @@ -618,11 +620,11 @@ public class DeploymentContext { configServer().convergeServices(id.application(), zone); runner.advance(currentRun(job)); if (job.type().environment().isManuallyDeployed()) { - assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal)); + assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal)); assertTrue(jobs.run(id).get().hasEnded()); return; } - assertEquals("Status of " + id, Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal)); + assertEquals("Status of " + id, succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal)); } /** Installs tester and starts tests. */ @@ -647,12 +649,12 @@ public class DeploymentContext { // All installation is complete and endpoints are ready, so tests may begin. if (job.type().isDeployment()) - assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal)); - assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installTester)); - assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.startTests)); + assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal)); + assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installTester)); + assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.startTests)); assertEquals(unfinished, jobs.run(id).get().stepStatuses().get(Step.endTests)); - tester.cloud().set(TesterCloud.Status.SUCCESS); + tester.cloud().set(Status.SUCCESS); runner.advance(currentRun(job)); assertTrue(jobs.run(id).get().hasEnded()); assertFalse(jobs.run(id).get().hasFailed()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 4d4d94f9e1f..beda9bd551d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -7,6 +7,7 @@ import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.Instance; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.application.Change; @@ -2147,4 +2148,123 @@ public class DeploymentTriggerTest { assertEquals(version2, app.application().revisions().get(tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetRevision()).compileVersion().get()); } + @Test + public void testInstanceWithOnlySystemTest() { + String spec = "<deployment>\n" + + " <instance id='tests'>" + + " <test />\n" + + " <upgrade revision-target='next' />" + + " </instance>\n" + + " <instance id='main'>\n" + + " <prod>\n" + + " <region>us-east-3</region>\n" + + " </prod>\n" + + " <upgrade revision-target='next' />" + + " </instance>\n" + + "</deployment>\n"; + ApplicationPackage appPackage = ApplicationPackageBuilder.fromDeploymentXml(spec); + DeploymentContext tests = tester.newDeploymentContext("tenant", "application", "tests"); + DeploymentContext main = tester.newDeploymentContext("tenant", "application", "main"); + Version version1 = new Version("7"); + tester.controllerTester().upgradeSystem(version1); + tests.submit(appPackage).deploy(); + Optional<RevisionId> revision1 = tests.lastSubmission(); + JobId systemTestJob = new JobId(tests.instanceId(), systemTest); + JobId stagingTestJob = new JobId(tests.instanceId(), stagingTest); + JobId mainJob = new JobId(main.instanceId(), productionUsEast3); + + assertEquals(Change.empty(), tests.instance().change()); + assertEquals(Change.empty(), main.instance().change()); + assertEquals(Set.of(), tests.deploymentStatus().jobsToRun().keySet()); + + // Versions 2 and 3 become available. + // Tests instance fails on 2, then update to 3. + // Version 2 should not be a target for either instance. + // Version 2 should also not be possible to set as a forced target for the tests instance. + Version version2 = new Version("8"); + tester.controllerTester().upgradeSystem(version2); + tester.upgrader().run(); + tester.triggerJobs(); + + assertEquals(Change.of(version2), tests.instance().change()); + assertEquals(Change.empty(), main.instance().change()); + assertEquals(Set.of(systemTestJob), tests.deploymentStatus().jobsToRun().keySet()); + + Version version3 = new Version("9"); + tester.controllerTester().upgradeSystem(version3); + tests.failDeployment(systemTest); + tester.upgrader().run(); + + assertEquals(Change.of(version3), tests.instance().change()); + assertEquals(Change.empty(), main.instance().change()); + assertEquals(Set.of(systemTestJob), tests.deploymentStatus().jobsToRun().keySet()); + + tests.runJob(systemTest); + tester.upgrader().run(); + tests.runJob(stagingTest); + + assertEquals(Change.empty(), tests.instance().change()); + assertEquals(Change.of(version3), main.instance().change()); + assertEquals(Set.of(mainJob), tests.deploymentStatus().jobsToRun().keySet()); + + main.runJob(productionUsEast3); + + assertEquals(Change.empty(), tests.instance().change()); + assertEquals(Change.empty(), main.instance().change()); + assertEquals(Set.of(), tests.deploymentStatus().jobsToRun().keySet()); + + tester.deploymentTrigger().forceChange(tests.instanceId(), Change.of(version2)); + + assertEquals(Change.empty(), tests.instance().change()); + assertEquals(Change.empty(), main.instance().change()); + assertEquals(Set.of(), tests.deploymentStatus().jobsToRun().keySet()); + + // Revisions 2 and 3 become available. + // Tests instance fails on 2, then update to 3. + // Revision 2 should not be a target for either instance. + // Revision 2 should also not be possible to set as a forced target for the tests instance. + tests.submit(appPackage); + Optional<RevisionId> revision2 = tests.lastSubmission(); + + assertEquals(Change.of(revision2.get()), tests.instance().change()); + assertEquals(Change.empty(), main.instance().change()); + assertEquals(Set.of(systemTestJob), tests.deploymentStatus().jobsToRun().keySet()); + + tests.submit(appPackage); + Optional<RevisionId> revision3 = tests.lastSubmission(); + + assertEquals(Change.of(revision2.get()), tests.instance().change()); + assertEquals(Change.empty(), main.instance().change()); + assertEquals(Set.of(systemTestJob), tests.deploymentStatus().jobsToRun().keySet()); + + tests.failDeployment(systemTest); + tester.outstandingChangeDeployer().run(); + + assertEquals(Change.of(revision3.get()), tests.instance().change()); + assertEquals(Change.empty(), main.instance().change()); + assertEquals(Set.of(systemTestJob), tests.deploymentStatus().jobsToRun().keySet()); + + tests.runJob(systemTest); + tester.outstandingChangeDeployer().run(); + tester.outstandingChangeDeployer().run(); + tests.runJob(stagingTest); + + assertEquals(Change.empty(), tests.instance().change()); + assertEquals(Change.of(revision3.get()), main.instance().change()); + assertEquals(Set.of(mainJob), tests.deploymentStatus().jobsToRun().keySet()); + + main.runJob(productionUsEast3); + tester.outstandingChangeDeployer().run(); + + assertEquals(Change.empty(), tests.instance().change()); + assertEquals(Change.empty(), main.instance().change()); + assertEquals(Set.of(), tests.deploymentStatus().jobsToRun().keySet()); + + tester.deploymentTrigger().forceChange(tests.instanceId(), Change.of(revision2.get())); + + assertEquals(Change.empty(), tests.instance().change()); + assertEquals(Change.empty(), main.instance().change()); + assertEquals(Set.of(), tests.deploymentStatus().jobsToRun().keySet()); + } + } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index 185c1e8c891..3b21d3a017e 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -905,10 +905,9 @@ public class UpgraderTest { tester.controllerTester().upgradeSystem(version0); // Create an application with pinned platform version. - var context = tester.newDeploymentContext(); + var context = tester.newDeploymentContext().submit().deploy(); tester.deploymentTrigger().forceChange(context.instanceId(), Change.empty().withPin()); - context.submit().deploy(); assertFalse(context.instance().change().hasTargets()); assertTrue(context.instance().change().isPinned()); assertEquals(3, context.instance().deployments().size()); |