summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2022-05-06 10:12:38 +0200
committerGitHub <noreply@github.com>2022-05-06 10:12:38 +0200
commiteed0a45c502785321272fb5e5166b0c188daef04 (patch)
tree48ac5618c489f8b44d9d0b0767aa16183803dd43 /controller-server
parente0b3120c5c5b96612d899c4fe38b625f6f3aa13c (diff)
parent84bf960f5233c9a9e9e78a7a1caa48cbcb1f1da9 (diff)
Merge pull request #22471 from vespa-engine/jonmv/fix-dep-orch-hiccup-for-tests-only-instances
Jonmv/fix dep orch hiccup for tests only instances
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java25
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java3
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java44
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java120
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java3
5 files changed, 167 insertions, 28 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
index cc7031bab5a..52283a3e27d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
@@ -211,6 +211,11 @@ public class DeploymentStatus {
/** The set of jobs that need to run for the given changes to be considered complete. */
public boolean hasCompleted(InstanceName instance, Change change) {
+ if ( ! application.deploymentSpec().requireInstance(instance).concerns(prod)) {
+ if (newestTested(instance, run -> run.versions().targetRevision()).map(change::downgrades).orElse(false)) return true;
+ if (newestTested(instance, run -> run.versions().targetPlatform()).map(change::downgrades).orElse(false)) return true;
+ }
+
return jobsToRun(Map.of(instance, change), false).isEmpty();
}
@@ -247,6 +252,14 @@ public class DeploymentStatus {
.deployments().get(job.type().zone()));
}
+ private <T extends Comparable<T>> Optional<T> newestTested(InstanceName instance, Function<Run, T> runMapper) {
+ return instanceJobs().get(application.id().instance(instance))
+ .type(systemTest, stagingTest)
+ .asList().stream().flatMap(jobs -> jobs.runs().values().stream())
+ .filter(Run::hasSucceeded)
+ .map(runMapper)
+ .max(naturalOrder());
+ }
/**
* The change to a revision which all dependencies of the given instance has completed,
* which does not downgrade any deployments in the instance,
@@ -263,16 +276,20 @@ public class DeploymentStatus {
int nextRisk = 0;
int skippedCumulativeRisk = 0;
Instant readySince = now;
+
+ Optional<RevisionId> newestRevision = application.productionDeployments()
+ .getOrDefault(instance, List.of()).stream()
+ .map(Deployment::revision).max(naturalOrder());
Change candidate = Change.empty();
for (ApplicationVersion version : application.revisions().deployable(ascending)) {
// A revision is only a candidate if it upgrades, and does not downgrade, this instance.
Change change = Change.of(version.id());
- if (application.productionDeployments().getOrDefault(instance, List.of()).stream()
- .anyMatch(deployment -> change.downgrades(deployment.revision()))) continue;
- if ( ! application.require(instance).change().revision().map(change::upgrades).orElse(true)) continue;
- if (hasCompleted(instance, change))
+ if ( newestRevision.isPresent() && change.downgrades(newestRevision.get())
+ || ! application.require(instance).change().revision().map(change::upgrades).orElse(true)
+ || hasCompleted(instance, change)) {
if (ascending) continue; // Keep looking for the next revision which is an upgrade, or ...
else return Change.empty(); // ... if the latest is already complete, there's nothing outstanding.
+ }
// This revision contains something new, so start aggregating the risk score.
skippedCumulativeRisk += version.risk();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
index 4ed34a91029..9cddbd0b903 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
@@ -101,7 +101,8 @@ public class Upgrader extends ControllerMaintainer {
cancelUpgradesOf(outdated.upgrading(), "Upgrading to outdated versions");
// Prefer the newest target for each instance.
- remaining = remaining.not().matching(eligible.asList()::contains);
+ remaining = remaining.not().matching(eligible.asList()::contains)
+ .not().hasCompleted(Change.of(version));
for (ApplicationId id : outdated.and(eligible.not().upgrading()).not().changingRevision())
targets.put(id, version);
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java
index fd294f9cf9f..ae4b6259da1 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java
@@ -1,13 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.deployment;
-import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.AthenzDomain;
import com.yahoo.config.provision.AthenzService;
-import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.ClusterSpec.Id;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.zone.ZoneId;
@@ -20,19 +19,20 @@ import com.yahoo.vespa.hosted.controller.ControllerTester;
import com.yahoo.vespa.hosted.controller.Instance;
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException.ErrorCode;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision;
-import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterCloud;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterCloud.Status;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterId;
import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.EndpointId;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackage;
+import com.yahoo.vespa.hosted.controller.deployment.InternalStepRunner.Timeouts;
import com.yahoo.vespa.hosted.controller.integration.ConfigServerMock;
import com.yahoo.vespa.hosted.controller.maintenance.JobRunner;
import com.yahoo.vespa.hosted.controller.maintenance.NameServiceDispatcher;
@@ -50,9 +50,11 @@ import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Supplier;
import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.failed;
import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded;
@@ -110,7 +112,7 @@ public class DeploymentContext {
.parallel("us-west-1", "us-east-3")
.emailRole("author")
.emailAddress("b@a")
- .build());
+ .build())::get;
private static final Supplier<ApplicationPackage> publicCdApplicationPackage = Suppliers.memoize(() -> new ApplicationPackageBuilder()
.athenzIdentity(AthenzDomain.from("domain"), AthenzService.from("service"))
@@ -119,7 +121,7 @@ public class DeploymentContext {
.emailRole("author")
.emailAddress("b@a")
.trust(generateCertificate())
- .build());
+ .build())::get;
public static final SourceRevision defaultSourceRevision = new SourceRevision("repository1", "master", "commit1");
@@ -268,7 +270,7 @@ public class DeploymentContext {
/** Add a routing policy for this in given zone, with status set to inactive */
public DeploymentContext addInactiveRoutingPolicy(ZoneId zone) {
var clusterId = "default-inactive";
- var id = new RoutingPolicyId(instanceId, ClusterSpec.Id.from(clusterId), zone);
+ var id = new RoutingPolicyId(instanceId, Id.from(clusterId), zone);
var policies = new LinkedHashMap<>(tester.controller().routing().policies().read(instanceId).asMap());
policies.put(id, new RoutingPolicy(id, HostName.of("lb-host"),
Optional.empty(),
@@ -330,7 +332,7 @@ public class DeploymentContext {
/** Fail current deployment in given job */
public DeploymentContext nodeAllocationFailure(JobType type) {
return failDeployment(type,
- new ConfigServerException(ConfigServerException.ErrorCode.NODE_ALLOCATION_FAILURE,
+ new ConfigServerException(ErrorCode.NODE_ALLOCATION_FAILURE,
"Node allocation failure",
"Failed to deploy application"));
}
@@ -359,7 +361,7 @@ public class DeploymentContext {
if (messagePart.isPresent()) {
Optional<Step> firstFailing = run.stepStatuses().entrySet().stream()
.filter(kv -> kv.getValue() == failed)
- .map(Map.Entry::getKey)
+ .map(Entry::getKey)
.findFirst();
assertTrue("Found failing step", firstFailing.isPresent());
Optional<RunLog> details = jobs.details(id);
@@ -490,7 +492,7 @@ public class DeploymentContext {
triggerJobs();
RunId id = currentRun(job).id();
doDeploy(job);
- tester.clock().advance(InternalStepRunner.Timeouts.of(tester.controller().system()).noNodesDown().plusSeconds(1));
+ tester.clock().advance(Timeouts.of(tester.controller().system()).noNodesDown().plusSeconds(1));
runner.advance(currentRun(job));
assertTrue(jobs.run(id).get().hasFailed());
assertTrue(jobs.run(id).get().hasEnded());
@@ -504,7 +506,7 @@ public class DeploymentContext {
RunId id = currentRun(job).id();
doDeploy(job);
doUpgrade(job);
- tester.clock().advance(InternalStepRunner.Timeouts.of(tester.controller().system()).noNodesDown().plusSeconds(1));
+ tester.clock().advance(Timeouts.of(tester.controller().system()).noNodesDown().plusSeconds(1));
runner.advance(currentRun(job));
assertTrue(jobs.run(id).get().hasFailed());
assertTrue(jobs.run(id).get().hasEnded());
@@ -574,15 +576,15 @@ public class DeploymentContext {
tester.configServer().nodeRepository().doUpgrade(deployment, Optional.empty(), tester.configServer().application(job.application(), zone).get().version().get());
configServer().convergeServices(id.application(), zone);
runner.advance(currentRun(job));
- assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installInitialReal));
+ assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installInitialReal));
// All installation is complete and endpoints are ready, so setup may begin.
- assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installInitialReal));
- assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installTester));
- assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.startStagingSetup));
+ assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installInitialReal));
+ assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installTester));
+ assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.startStagingSetup));
assertEquals(unfinished, jobs.run(id).get().stepStatuses().get(Step.endStagingSetup));
- tester.cloud().set(TesterCloud.Status.SUCCESS);
+ tester.cloud().set(Status.SUCCESS);
runner.advance(currentRun(job));
assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.endStagingSetup));
}
@@ -618,11 +620,11 @@ public class DeploymentContext {
configServer().convergeServices(id.application(), zone);
runner.advance(currentRun(job));
if (job.type().environment().isManuallyDeployed()) {
- assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal));
+ assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal));
assertTrue(jobs.run(id).get().hasEnded());
return;
}
- assertEquals("Status of " + id, Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal));
+ assertEquals("Status of " + id, succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal));
}
/** Installs tester and starts tests. */
@@ -647,12 +649,12 @@ public class DeploymentContext {
// All installation is complete and endpoints are ready, so tests may begin.
if (job.type().isDeployment())
- assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal));
- assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.installTester));
- assertEquals(Step.Status.succeeded, jobs.run(id).get().stepStatuses().get(Step.startTests));
+ assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installReal));
+ assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.installTester));
+ assertEquals(succeeded, jobs.run(id).get().stepStatuses().get(Step.startTests));
assertEquals(unfinished, jobs.run(id).get().stepStatuses().get(Step.endTests));
- tester.cloud().set(TesterCloud.Status.SUCCESS);
+ tester.cloud().set(Status.SUCCESS);
runner.advance(currentRun(job));
assertTrue(jobs.run(id).get().hasEnded());
assertFalse(jobs.run(id).get().hasFailed());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 4d4d94f9e1f..beda9bd551d 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -7,6 +7,7 @@ import com.yahoo.config.provision.zone.ZoneId;
import com.yahoo.vespa.flags.PermanentFlags;
import com.yahoo.vespa.hosted.controller.ControllerTester;
import com.yahoo.vespa.hosted.controller.Instance;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
import com.yahoo.vespa.hosted.controller.application.Change;
@@ -2147,4 +2148,123 @@ public class DeploymentTriggerTest {
assertEquals(version2, app.application().revisions().get(tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetRevision()).compileVersion().get());
}
+ @Test
+ public void testInstanceWithOnlySystemTest() {
+ String spec = "<deployment>\n" +
+ " <instance id='tests'>" +
+ " <test />\n" +
+ " <upgrade revision-target='next' />" +
+ " </instance>\n" +
+ " <instance id='main'>\n" +
+ " <prod>\n" +
+ " <region>us-east-3</region>\n" +
+ " </prod>\n" +
+ " <upgrade revision-target='next' />" +
+ " </instance>\n" +
+ "</deployment>\n";
+ ApplicationPackage appPackage = ApplicationPackageBuilder.fromDeploymentXml(spec);
+ DeploymentContext tests = tester.newDeploymentContext("tenant", "application", "tests");
+ DeploymentContext main = tester.newDeploymentContext("tenant", "application", "main");
+ Version version1 = new Version("7");
+ tester.controllerTester().upgradeSystem(version1);
+ tests.submit(appPackage).deploy();
+ Optional<RevisionId> revision1 = tests.lastSubmission();
+ JobId systemTestJob = new JobId(tests.instanceId(), systemTest);
+ JobId stagingTestJob = new JobId(tests.instanceId(), stagingTest);
+ JobId mainJob = new JobId(main.instanceId(), productionUsEast3);
+
+ assertEquals(Change.empty(), tests.instance().change());
+ assertEquals(Change.empty(), main.instance().change());
+ assertEquals(Set.of(), tests.deploymentStatus().jobsToRun().keySet());
+
+ // Versions 2 and 3 become available.
+ // Tests instance fails on 2, then update to 3.
+ // Version 2 should not be a target for either instance.
+ // Version 2 should also not be possible to set as a forced target for the tests instance.
+ Version version2 = new Version("8");
+ tester.controllerTester().upgradeSystem(version2);
+ tester.upgrader().run();
+ tester.triggerJobs();
+
+ assertEquals(Change.of(version2), tests.instance().change());
+ assertEquals(Change.empty(), main.instance().change());
+ assertEquals(Set.of(systemTestJob), tests.deploymentStatus().jobsToRun().keySet());
+
+ Version version3 = new Version("9");
+ tester.controllerTester().upgradeSystem(version3);
+ tests.failDeployment(systemTest);
+ tester.upgrader().run();
+
+ assertEquals(Change.of(version3), tests.instance().change());
+ assertEquals(Change.empty(), main.instance().change());
+ assertEquals(Set.of(systemTestJob), tests.deploymentStatus().jobsToRun().keySet());
+
+ tests.runJob(systemTest);
+ tester.upgrader().run();
+ tests.runJob(stagingTest);
+
+ assertEquals(Change.empty(), tests.instance().change());
+ assertEquals(Change.of(version3), main.instance().change());
+ assertEquals(Set.of(mainJob), tests.deploymentStatus().jobsToRun().keySet());
+
+ main.runJob(productionUsEast3);
+
+ assertEquals(Change.empty(), tests.instance().change());
+ assertEquals(Change.empty(), main.instance().change());
+ assertEquals(Set.of(), tests.deploymentStatus().jobsToRun().keySet());
+
+ tester.deploymentTrigger().forceChange(tests.instanceId(), Change.of(version2));
+
+ assertEquals(Change.empty(), tests.instance().change());
+ assertEquals(Change.empty(), main.instance().change());
+ assertEquals(Set.of(), tests.deploymentStatus().jobsToRun().keySet());
+
+ // Revisions 2 and 3 become available.
+ // Tests instance fails on 2, then update to 3.
+ // Revision 2 should not be a target for either instance.
+ // Revision 2 should also not be possible to set as a forced target for the tests instance.
+ tests.submit(appPackage);
+ Optional<RevisionId> revision2 = tests.lastSubmission();
+
+ assertEquals(Change.of(revision2.get()), tests.instance().change());
+ assertEquals(Change.empty(), main.instance().change());
+ assertEquals(Set.of(systemTestJob), tests.deploymentStatus().jobsToRun().keySet());
+
+ tests.submit(appPackage);
+ Optional<RevisionId> revision3 = tests.lastSubmission();
+
+ assertEquals(Change.of(revision2.get()), tests.instance().change());
+ assertEquals(Change.empty(), main.instance().change());
+ assertEquals(Set.of(systemTestJob), tests.deploymentStatus().jobsToRun().keySet());
+
+ tests.failDeployment(systemTest);
+ tester.outstandingChangeDeployer().run();
+
+ assertEquals(Change.of(revision3.get()), tests.instance().change());
+ assertEquals(Change.empty(), main.instance().change());
+ assertEquals(Set.of(systemTestJob), tests.deploymentStatus().jobsToRun().keySet());
+
+ tests.runJob(systemTest);
+ tester.outstandingChangeDeployer().run();
+ tester.outstandingChangeDeployer().run();
+ tests.runJob(stagingTest);
+
+ assertEquals(Change.empty(), tests.instance().change());
+ assertEquals(Change.of(revision3.get()), main.instance().change());
+ assertEquals(Set.of(mainJob), tests.deploymentStatus().jobsToRun().keySet());
+
+ main.runJob(productionUsEast3);
+ tester.outstandingChangeDeployer().run();
+
+ assertEquals(Change.empty(), tests.instance().change());
+ assertEquals(Change.empty(), main.instance().change());
+ assertEquals(Set.of(), tests.deploymentStatus().jobsToRun().keySet());
+
+ tester.deploymentTrigger().forceChange(tests.instanceId(), Change.of(revision2.get()));
+
+ assertEquals(Change.empty(), tests.instance().change());
+ assertEquals(Change.empty(), main.instance().change());
+ assertEquals(Set.of(), tests.deploymentStatus().jobsToRun().keySet());
+ }
+
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 185c1e8c891..3b21d3a017e 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -905,10 +905,9 @@ public class UpgraderTest {
tester.controllerTester().upgradeSystem(version0);
// Create an application with pinned platform version.
- var context = tester.newDeploymentContext();
+ var context = tester.newDeploymentContext().submit().deploy();
tester.deploymentTrigger().forceChange(context.instanceId(), Change.empty().withPin());
- context.submit().deploy();
assertFalse(context.instance().change().hasTargets());
assertTrue(context.instance().change().isPinned());
assertEquals(3, context.instance().deployments().size());