diff options
author | Martin Polden <mpolden@mpolden.no> | 2019-02-08 16:40:48 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-02-08 16:40:48 +0100 |
commit | 63cd60257711b02a435a92fa0cc3c2fb7b976d9b (patch) | |
tree | 317e4f5a7e13621edf5b687cb16d91789e670b7b | |
parent | e263492cbe9c97a13def5d10334a7138db625d83 (diff) | |
parent | 4b4f379df411301e195e3fc097a5f7e107ed3098 (diff) |
Merge pull request #8447 from vespa-engine/mpolden/avoid-upgrade-deadlock
Avoid potential upgrade deadlock when deployments fail
7 files changed, 112 insertions, 36 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java index 963dca8d3b5..910d4ba6508 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java @@ -10,11 +10,11 @@ import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.SystemName; import com.yahoo.vespa.hosted.controller.api.integration.MetricsService.ApplicationMetrics; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId; import com.yahoo.vespa.hosted.controller.api.integration.organization.User; import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationActivity; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; @@ -29,6 +29,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.OptionalInt; import java.util.OptionalLong; import java.util.function.Function; import java.util.stream.Collectors; @@ -52,7 +53,7 @@ public class Application { private final Change outstandingChange; private final Optional<IssueId> ownershipIssueId; private final Optional<User> owner; - private final Optional<Integer> majorVersion; + private final OptionalInt majorVersion; private final ApplicationMetrics metrics; private final Optional<RotationId> rotation; private final Map<HostName, RotationStatus> rotationStatus; @@ -61,7 +62,7 @@ public class Application { public Application(ApplicationId id, Instant now) { this(id, now, DeploymentSpec.empty, ValidationOverrides.empty, Collections.emptyMap(), new DeploymentJobs(OptionalLong.empty(), Collections.emptyList(), Optional.empty(), false), - Change.empty(), Change.empty(), Optional.empty(), Optional.empty(), Optional.empty(), + Change.empty(), Change.empty(), Optional.empty(), Optional.empty(), OptionalInt.empty(), new ApplicationMetrics(0, 0), Optional.empty(), Collections.emptyMap()); } @@ -70,7 +71,7 @@ public class Application { public Application(ApplicationId id, Instant createdAt, DeploymentSpec deploymentSpec, ValidationOverrides validationOverrides, List<Deployment> deployments, DeploymentJobs deploymentJobs, Change change, Change outstandingChange, Optional<IssueId> ownershipIssueId, Optional<User> owner, - Optional<Integer> majorVersion, ApplicationMetrics metrics, + OptionalInt majorVersion, ApplicationMetrics metrics, Optional<RotationId> rotation, Map<HostName, RotationStatus> rotationStatus) { this(id, createdAt, deploymentSpec, validationOverrides, deployments.stream().collect(Collectors.toMap(Deployment::zone, d -> d)), @@ -81,7 +82,7 @@ public class Application { Application(ApplicationId id, Instant createdAt, DeploymentSpec deploymentSpec, ValidationOverrides validationOverrides, Map<ZoneId, Deployment> deployments, DeploymentJobs deploymentJobs, Change change, Change outstandingChange, Optional<IssueId> ownershipIssueId, Optional<User> owner, - Optional<Integer> majorVersion, ApplicationMetrics metrics, + OptionalInt majorVersion, ApplicationMetrics metrics, Optional<RotationId> rotation, Map<HostName, RotationStatus> rotationStatus) { this.id = Objects.requireNonNull(id, "id cannot be null"); this.createdAt = Objects.requireNonNull(createdAt, "instant of creation cannot be null"); @@ -156,7 +157,7 @@ public class Application { * Overrides the system major version for this application. This override takes effect if the deployment * spec does not specify a major version. */ - public Optional<Integer> majorVersion() { return majorVersion; } + public OptionalInt majorVersion() { return majorVersion; } /** Returns metrics for this */ public ApplicationMetrics metrics() { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java index 130519335ce..1fb22d28ac7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java @@ -32,6 +32,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.OptionalInt; import java.util.OptionalLong; import java.util.stream.Collectors; @@ -54,7 +55,7 @@ public class LockedApplication { private final Change outstandingChange; private final Optional<IssueId> ownershipIssueId; private final Optional<User> owner; - private final Optional<Integer> majorVersion; + private final OptionalInt majorVersion; private final ApplicationMetrics metrics; private final Optional<RotationId> rotation; private final Map<HostName, RotationStatus> rotationStatus; @@ -78,7 +79,7 @@ public class LockedApplication { DeploymentSpec deploymentSpec, ValidationOverrides validationOverrides, Map<ZoneId, Deployment> deployments, DeploymentJobs deploymentJobs, Change change, Change outstandingChange, Optional<IssueId> ownershipIssueId, Optional<User> owner, - Optional<Integer> majorVersion, ApplicationMetrics metrics, + OptionalInt majorVersion, ApplicationMetrics metrics, Optional<RotationId> rotation, Map<HostName, RotationStatus> rotationStatus) { this.lock = lock; this.id = id; @@ -231,7 +232,9 @@ public class LockedApplication { public LockedApplication withMajorVersion(Integer majorVersion) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, deployments, deploymentJobs, change, outstandingChange, - ownershipIssueId, owner, Optional.ofNullable(majorVersion), metrics, rotation, rotationStatus); + ownershipIssueId, owner, + majorVersion == null ? OptionalInt.empty() : OptionalInt.of(majorVersion), + metrics, rotation, rotationStatus); } public LockedApplication with(MetricsService.ApplicationMetrics metrics) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index c8ac0c1b0e2..16f08ad1e15 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -11,13 +11,13 @@ import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.BuildService; import com.yahoo.vespa.hosted.controller.api.integration.BuildService.JobState; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationList; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobReport; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.application.JobStatus.JobRun; @@ -45,7 +45,6 @@ import static com.yahoo.vespa.hosted.controller.api.integration.BuildService.Job import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.component; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.stagingTest; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.systemTest; -import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; import static java.util.Comparator.comparing; import static java.util.Comparator.naturalOrder; @@ -328,7 +327,10 @@ public class DeploymentTrigger { jobs.add(deploymentJob(application, versions, change, job, reason, completedAt.get())); } if ( ! alreadyTriggered(application, versions)) { - testJobs = emptyList(); + // Only remove test jobs that target this combination + if (testJobs == null) testJobs = new ArrayList<>(); + testJobs.removeIf(j -> versions.sourcesMatchIfPresent(j.triggering) && + versions.targetsMatch(j.triggering)); } } else if (testJobs == null) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java index 74edfcea6f7..7f052fd7574 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java @@ -14,11 +14,12 @@ import com.yahoo.slime.Slime; import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.api.integration.MetricsService.ApplicationMetrics; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision; import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId; import com.yahoo.vespa.hosted.controller.api.integration.organization.User; import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.ClusterInfo; import com.yahoo.vespa.hosted.controller.application.ClusterUtilization; @@ -29,7 +30,6 @@ import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobError; import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics; import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.application.RotationStatus; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision; import com.yahoo.vespa.hosted.controller.rotation.RotationId; import java.time.Instant; @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; import java.util.OptionalDouble; +import java.util.OptionalInt; import java.util.OptionalLong; import java.util.TreeMap; @@ -325,7 +326,7 @@ public class ApplicationSerializer { Change outstandingChange = changeFromSlime(root.field(outstandingChangeField)); Optional<IssueId> ownershipIssueId = optionalString(root.field(ownershipIssueIdField)).map(IssueId::from); Optional<User> owner = optionalString(root.field(ownerField)).map(User::from); - Optional<Integer> majorVersion = optionalInteger(root.field(majorVersionField)); + OptionalInt majorVersion = optionalInteger(root.field(majorVersionField)); ApplicationMetrics metrics = new ApplicationMetrics(root.field(queryQualityField).asDouble(), root.field(writeQualityField).asDouble()); Optional<RotationId> rotation = rotationFromSlime(root.field(rotationField)); @@ -519,8 +520,8 @@ public class ApplicationSerializer { return field.valid() ? OptionalLong.of(field.asLong()) : OptionalLong.empty(); } - private Optional<Integer> optionalInteger(Inspector field) { - return field.valid() ? Optional.of((int)field.asLong()) : Optional.empty(); + private OptionalInt optionalInteger(Inspector field) { + return field.valid() ? OptionalInt.of((int) field.asLong()) : OptionalInt.empty(); } private OptionalDouble optionalDouble(Inspector field) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index aa2b4d03028..36261eb05fe 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -23,7 +23,6 @@ import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.maintenance.JobControl; import com.yahoo.vespa.hosted.controller.maintenance.ReadyJobsTrigger; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; -import org.junit.Before; import org.junit.Test; import java.nio.file.Files; @@ -66,12 +65,7 @@ import static org.junit.Assert.assertTrue; */ public class DeploymentTriggerTest { - private DeploymentTester tester; - - @Before - public void before() { - tester = new DeploymentTester(); - } + private final DeploymentTester tester = new DeploymentTester(); @Test public void testTriggerFailing() { @@ -117,7 +111,7 @@ public class DeploymentTriggerTest { @Test public void abortsInternalJobsOnNewApplicationChange() { InternalDeploymentTester iTester = new InternalDeploymentTester(); - tester = iTester.tester(); + DeploymentTester tester = iTester.tester(); Application app = iTester.app(); ApplicationPackage applicationPackage = InternalDeploymentTester.applicationPackage; @@ -730,7 +724,7 @@ public class DeploymentTriggerTest { tester.deployAndNotify(application, true, systemTest); tester.deployAndNotify(application, true, stagingTest); - // Tests are not re-triggered, because the jobs they were run for has not yet been triggered with the tested versions. + // Tests are not re-triggered, because the deployments that were tested have not yet been triggered on the tested versions. assertEquals(firstTested, app.get().deploymentJobs().jobStatus().get(systemTest).lastTriggered().get().platform()); assertEquals(firstTested, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index 7953b462ab2..2a13d73e42d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -16,14 +16,16 @@ import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; import com.yahoo.vespa.hosted.controller.deployment.BuildJob; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; -import org.junit.Before; import org.junit.Test; import java.time.Duration; import java.time.Instant; import java.util.Collections; import java.util.Optional; +import java.util.OptionalInt; +import java.util.function.Supplier; +import static com.yahoo.config.provision.SystemName.main; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.component; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.productionUsCentral1; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.productionUsEast3; @@ -41,12 +43,7 @@ import static org.junit.Assert.assertTrue; */ public class UpgraderTest { - private DeploymentTester tester; - - @Before - public void before() { - tester = new DeploymentTester(); - } + private final DeploymentTester tester = new DeploymentTester(); @Test public void testUpgrading() { @@ -943,7 +940,7 @@ public class UpgraderTest { Application canary0 = tester.createAndDeploy("canary0", 1, "canary"); Application default0 = tester.createAndDeploy("default0", 2, default0ApplicationPackage); tester.applications().lockOrThrow(default0.id(), a -> tester.applications().store(a.withMajorVersion(6))); - assertEquals(Optional.of(6), tester.applications().get(default0.id()).get().majorVersion()); + assertEquals(OptionalInt.of(6), tester.applications().get(default0.id()).get().majorVersion()); // New major version is released version = Version.fromString("7.0"); @@ -1273,4 +1270,81 @@ public class UpgraderTest { tester.controller().applications().require(app1.id()).change().platform().get()); } + @Test + public void testsEachUpgradeCombinationWithFailingDeployments() { + Application application = tester.createApplication("app1", "tenant1", 1, 1L); + Supplier<Application> app = () -> tester.application(application.id()); + ApplicationPackage applicationPackage = new ApplicationPackageBuilder() + .environment(Environment.prod) + .region("us-central-1") + .region("us-west-1") + .region("us-east-3") + .build(); + + // Application deploys on system version + Version v1 = Version.fromString("6.1"); + tester.deployCompletely(application, applicationPackage); + + // Next version is released and 2/3 deployments upgrade + Version v2 = Version.fromString("6.2"); + tester.upgradeSystem(v2); + tester.upgrader().maintain(); + assertEquals(Change.of(v2), app.get().change()); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); + tester.deployAndNotify(application, true, productionUsCentral1); + + // While second deployment completes upgrade, version confidence becomes broken and upgrade is cancelled + tester.upgrader().overrideConfidence(v2, VespaVersion.Confidence.broken); + tester.computeVersionStatus(); + tester.upgrader().maintain(); + tester.deployAndNotify(application, true, productionUsWest1); + assertTrue(app.get().change().isEmpty()); + + // Next version is released + Version v3 = Version.fromString("6.3"); + tester.upgradeSystem(v3); + tester.upgrader().maintain(); + assertEquals(Change.of(v3), app.get().change()); + tester.deployAndNotify(application, true, systemTest); + tester.deployAndNotify(application, true, stagingTest); + + // First deployment starts upgrading + tester.deploy(productionUsCentral1, application, applicationPackage); + + // Before deployment completes, v1->v3 combination is tested as us-east-3 is still on v1 + tester.readyJobTrigger().maintain(); + tester.deployAndNotify(application, true, stagingTest); + assertEquals(v1, app.get().deploymentJobs().jobStatus().get(stagingTest).lastSuccess().get().sourcePlatform().get()); + assertEquals(v3, app.get().deploymentJobs().jobStatus().get(stagingTest).lastSuccess().get().platform()); + + // First deployment fails and then successfully upgrades to v3 + tester.jobCompletion(productionUsCentral1).application(application).unsuccessful().submit(); + tester.jobCompletion(productionUsCentral1).application(application).submit(); + + // Deployments are now on 3 versions + assertEquals(v3, app.get().deployments().get(productionUsCentral1.zone(main)).version()); + assertEquals(v2, app.get().deployments().get(productionUsWest1.zone(main)).version()); + assertEquals(v1, app.get().deployments().get(productionUsEast3.zone(main)).version()); + + // Need to test v2->v3 combination again before upgrading second deployment + tester.readyJobTrigger().maintain(); + assertEquals(v2, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().sourcePlatform().get()); + assertEquals(v3, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform()); + tester.deployAndNotify(application, true, stagingTest); + + // Second deployment upgrades + tester.deployAndNotify(application, true, productionUsWest1); + + // ... now we have to test v1->v3 again :( + tester.readyJobTrigger().maintain(); + assertEquals(v1, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().sourcePlatform().get()); + assertEquals(v3, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform()); + tester.deployAndNotify(application, true, stagingTest); + + // Upgrade completes + tester.deployAndNotify(application, true, productionUsEast3); + assertTrue("Upgrade complete", app.get().change().isEmpty()); + } + } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java index e46119dc0b5..849b251a230 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; import java.util.OptionalDouble; +import java.util.OptionalInt; import java.util.OptionalLong; import java.util.TreeMap; @@ -110,7 +111,7 @@ public class ApplicationSerializerTest { Change.of(ApplicationVersion.from(new SourceRevision("repo", "master", "deadcafe"), 42)), Optional.of(IssueId.from("1234")), Optional.of(User.from("by-username")), - Optional.of(7), + OptionalInt.of(7), new MetricsService.ApplicationMetrics(0.5, 0.9), Optional.of(new RotationId("my-rotation")), rotationStatus); |