summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2019-02-08 16:40:48 +0100
committerGitHub <noreply@github.com>2019-02-08 16:40:48 +0100
commit63cd60257711b02a435a92fa0cc3c2fb7b976d9b (patch)
tree317e4f5a7e13621edf5b687cb16d91789e670b7b
parente263492cbe9c97a13def5d10334a7138db625d83 (diff)
parent4b4f379df411301e195e3fc097a5f7e107ed3098 (diff)
Merge pull request #8447 from vespa-engine/mpolden/avoid-upgrade-deadlock
Avoid potential upgrade deadlock when deployments fail
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java13
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java9
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java11
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java12
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java90
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java3
7 files changed, 112 insertions, 36 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
index 963dca8d3b5..910d4ba6508 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
@@ -10,11 +10,11 @@ import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.SystemName;
import com.yahoo.vespa.hosted.controller.api.integration.MetricsService.ApplicationMetrics;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId;
import com.yahoo.vespa.hosted.controller.api.integration.organization.User;
import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.application.ApplicationActivity;
-import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.DeploymentJobs;
@@ -29,6 +29,7 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
+import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.function.Function;
import java.util.stream.Collectors;
@@ -52,7 +53,7 @@ public class Application {
private final Change outstandingChange;
private final Optional<IssueId> ownershipIssueId;
private final Optional<User> owner;
- private final Optional<Integer> majorVersion;
+ private final OptionalInt majorVersion;
private final ApplicationMetrics metrics;
private final Optional<RotationId> rotation;
private final Map<HostName, RotationStatus> rotationStatus;
@@ -61,7 +62,7 @@ public class Application {
public Application(ApplicationId id, Instant now) {
this(id, now, DeploymentSpec.empty, ValidationOverrides.empty, Collections.emptyMap(),
new DeploymentJobs(OptionalLong.empty(), Collections.emptyList(), Optional.empty(), false),
- Change.empty(), Change.empty(), Optional.empty(), Optional.empty(), Optional.empty(),
+ Change.empty(), Change.empty(), Optional.empty(), Optional.empty(), OptionalInt.empty(),
new ApplicationMetrics(0, 0),
Optional.empty(), Collections.emptyMap());
}
@@ -70,7 +71,7 @@ public class Application {
public Application(ApplicationId id, Instant createdAt, DeploymentSpec deploymentSpec, ValidationOverrides validationOverrides,
List<Deployment> deployments, DeploymentJobs deploymentJobs, Change change,
Change outstandingChange, Optional<IssueId> ownershipIssueId, Optional<User> owner,
- Optional<Integer> majorVersion, ApplicationMetrics metrics,
+ OptionalInt majorVersion, ApplicationMetrics metrics,
Optional<RotationId> rotation, Map<HostName, RotationStatus> rotationStatus) {
this(id, createdAt, deploymentSpec, validationOverrides,
deployments.stream().collect(Collectors.toMap(Deployment::zone, d -> d)),
@@ -81,7 +82,7 @@ public class Application {
Application(ApplicationId id, Instant createdAt, DeploymentSpec deploymentSpec, ValidationOverrides validationOverrides,
Map<ZoneId, Deployment> deployments, DeploymentJobs deploymentJobs, Change change,
Change outstandingChange, Optional<IssueId> ownershipIssueId, Optional<User> owner,
- Optional<Integer> majorVersion, ApplicationMetrics metrics,
+ OptionalInt majorVersion, ApplicationMetrics metrics,
Optional<RotationId> rotation, Map<HostName, RotationStatus> rotationStatus) {
this.id = Objects.requireNonNull(id, "id cannot be null");
this.createdAt = Objects.requireNonNull(createdAt, "instant of creation cannot be null");
@@ -156,7 +157,7 @@ public class Application {
* Overrides the system major version for this application. This override takes effect if the deployment
* spec does not specify a major version.
*/
- public Optional<Integer> majorVersion() { return majorVersion; }
+ public OptionalInt majorVersion() { return majorVersion; }
/** Returns metrics for this */
public ApplicationMetrics metrics() {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java
index 130519335ce..1fb22d28ac7 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java
@@ -32,6 +32,7 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
+import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.stream.Collectors;
@@ -54,7 +55,7 @@ public class LockedApplication {
private final Change outstandingChange;
private final Optional<IssueId> ownershipIssueId;
private final Optional<User> owner;
- private final Optional<Integer> majorVersion;
+ private final OptionalInt majorVersion;
private final ApplicationMetrics metrics;
private final Optional<RotationId> rotation;
private final Map<HostName, RotationStatus> rotationStatus;
@@ -78,7 +79,7 @@ public class LockedApplication {
DeploymentSpec deploymentSpec, ValidationOverrides validationOverrides,
Map<ZoneId, Deployment> deployments, DeploymentJobs deploymentJobs, Change change,
Change outstandingChange, Optional<IssueId> ownershipIssueId, Optional<User> owner,
- Optional<Integer> majorVersion, ApplicationMetrics metrics,
+ OptionalInt majorVersion, ApplicationMetrics metrics,
Optional<RotationId> rotation, Map<HostName, RotationStatus> rotationStatus) {
this.lock = lock;
this.id = id;
@@ -231,7 +232,9 @@ public class LockedApplication {
public LockedApplication withMajorVersion(Integer majorVersion) {
return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, deployments,
deploymentJobs, change, outstandingChange,
- ownershipIssueId, owner, Optional.ofNullable(majorVersion), metrics, rotation, rotationStatus);
+ ownershipIssueId, owner,
+ majorVersion == null ? OptionalInt.empty() : OptionalInt.of(majorVersion),
+ metrics, rotation, rotationStatus);
}
public LockedApplication with(MetricsService.ApplicationMetrics metrics) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index c8ac0c1b0e2..16f08ad1e15 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -11,13 +11,13 @@ import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.integration.BuildService;
import com.yahoo.vespa.hosted.controller.api.integration.BuildService.JobState;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.application.ApplicationList;
-import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobReport;
-import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
import com.yahoo.vespa.hosted.controller.application.JobStatus;
import com.yahoo.vespa.hosted.controller.application.JobStatus.JobRun;
@@ -45,7 +45,6 @@ import static com.yahoo.vespa.hosted.controller.api.integration.BuildService.Job
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.component;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.stagingTest;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.systemTest;
-import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static java.util.Comparator.comparing;
import static java.util.Comparator.naturalOrder;
@@ -328,7 +327,10 @@ public class DeploymentTrigger {
jobs.add(deploymentJob(application, versions, change, job, reason, completedAt.get()));
}
if ( ! alreadyTriggered(application, versions)) {
- testJobs = emptyList();
+ // Only remove test jobs that target this combination
+ if (testJobs == null) testJobs = new ArrayList<>();
+ testJobs.removeIf(j -> versions.sourcesMatchIfPresent(j.triggering) &&
+ versions.targetsMatch(j.triggering));
}
}
else if (testJobs == null) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java
index 74edfcea6f7..7f052fd7574 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java
@@ -14,11 +14,12 @@ import com.yahoo.slime.Slime;
import com.yahoo.vespa.config.SlimeUtils;
import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.api.integration.MetricsService.ApplicationMetrics;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision;
import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId;
import com.yahoo.vespa.hosted.controller.api.integration.organization.User;
import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
-import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.ClusterInfo;
import com.yahoo.vespa.hosted.controller.application.ClusterUtilization;
@@ -29,7 +30,6 @@ import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobError;
import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics;
import com.yahoo.vespa.hosted.controller.application.JobStatus;
import com.yahoo.vespa.hosted.controller.application.RotationStatus;
-import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision;
import com.yahoo.vespa.hosted.controller.rotation.RotationId;
import java.time.Instant;
@@ -41,6 +41,7 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalDouble;
+import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.TreeMap;
@@ -325,7 +326,7 @@ public class ApplicationSerializer {
Change outstandingChange = changeFromSlime(root.field(outstandingChangeField));
Optional<IssueId> ownershipIssueId = optionalString(root.field(ownershipIssueIdField)).map(IssueId::from);
Optional<User> owner = optionalString(root.field(ownerField)).map(User::from);
- Optional<Integer> majorVersion = optionalInteger(root.field(majorVersionField));
+ OptionalInt majorVersion = optionalInteger(root.field(majorVersionField));
ApplicationMetrics metrics = new ApplicationMetrics(root.field(queryQualityField).asDouble(),
root.field(writeQualityField).asDouble());
Optional<RotationId> rotation = rotationFromSlime(root.field(rotationField));
@@ -519,8 +520,8 @@ public class ApplicationSerializer {
return field.valid() ? OptionalLong.of(field.asLong()) : OptionalLong.empty();
}
- private Optional<Integer> optionalInteger(Inspector field) {
- return field.valid() ? Optional.of((int)field.asLong()) : Optional.empty();
+ private OptionalInt optionalInteger(Inspector field) {
+ return field.valid() ? OptionalInt.of((int) field.asLong()) : OptionalInt.empty();
}
private OptionalDouble optionalDouble(Inspector field) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index aa2b4d03028..36261eb05fe 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -23,7 +23,6 @@ import com.yahoo.vespa.hosted.controller.application.DeploymentJobs;
import com.yahoo.vespa.hosted.controller.maintenance.JobControl;
import com.yahoo.vespa.hosted.controller.maintenance.ReadyJobsTrigger;
import com.yahoo.vespa.hosted.controller.versions.VespaVersion;
-import org.junit.Before;
import org.junit.Test;
import java.nio.file.Files;
@@ -66,12 +65,7 @@ import static org.junit.Assert.assertTrue;
*/
public class DeploymentTriggerTest {
- private DeploymentTester tester;
-
- @Before
- public void before() {
- tester = new DeploymentTester();
- }
+ private final DeploymentTester tester = new DeploymentTester();
@Test
public void testTriggerFailing() {
@@ -117,7 +111,7 @@ public class DeploymentTriggerTest {
@Test
public void abortsInternalJobsOnNewApplicationChange() {
InternalDeploymentTester iTester = new InternalDeploymentTester();
- tester = iTester.tester();
+ DeploymentTester tester = iTester.tester();
Application app = iTester.app();
ApplicationPackage applicationPackage = InternalDeploymentTester.applicationPackage;
@@ -730,7 +724,7 @@ public class DeploymentTriggerTest {
tester.deployAndNotify(application, true, systemTest);
tester.deployAndNotify(application, true, stagingTest);
- // Tests are not re-triggered, because the jobs they were run for has not yet been triggered with the tested versions.
+ // Tests are not re-triggered, because the deployments that were tested have not yet been triggered on the tested versions.
assertEquals(firstTested, app.get().deploymentJobs().jobStatus().get(systemTest).lastTriggered().get().platform());
assertEquals(firstTested, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 7953b462ab2..2a13d73e42d 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -16,14 +16,16 @@ import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder;
import com.yahoo.vespa.hosted.controller.deployment.BuildJob;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester;
import com.yahoo.vespa.hosted.controller.versions.VespaVersion;
-import org.junit.Before;
import org.junit.Test;
import java.time.Duration;
import java.time.Instant;
import java.util.Collections;
import java.util.Optional;
+import java.util.OptionalInt;
+import java.util.function.Supplier;
+import static com.yahoo.config.provision.SystemName.main;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.component;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.productionUsCentral1;
import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.productionUsEast3;
@@ -41,12 +43,7 @@ import static org.junit.Assert.assertTrue;
*/
public class UpgraderTest {
- private DeploymentTester tester;
-
- @Before
- public void before() {
- tester = new DeploymentTester();
- }
+ private final DeploymentTester tester = new DeploymentTester();
@Test
public void testUpgrading() {
@@ -943,7 +940,7 @@ public class UpgraderTest {
Application canary0 = tester.createAndDeploy("canary0", 1, "canary");
Application default0 = tester.createAndDeploy("default0", 2, default0ApplicationPackage);
tester.applications().lockOrThrow(default0.id(), a -> tester.applications().store(a.withMajorVersion(6)));
- assertEquals(Optional.of(6), tester.applications().get(default0.id()).get().majorVersion());
+ assertEquals(OptionalInt.of(6), tester.applications().get(default0.id()).get().majorVersion());
// New major version is released
version = Version.fromString("7.0");
@@ -1273,4 +1270,81 @@ public class UpgraderTest {
tester.controller().applications().require(app1.id()).change().platform().get());
}
+ @Test
+ public void testsEachUpgradeCombinationWithFailingDeployments() {
+ Application application = tester.createApplication("app1", "tenant1", 1, 1L);
+ Supplier<Application> app = () -> tester.application(application.id());
+ ApplicationPackage applicationPackage = new ApplicationPackageBuilder()
+ .environment(Environment.prod)
+ .region("us-central-1")
+ .region("us-west-1")
+ .region("us-east-3")
+ .build();
+
+ // Application deploys on system version
+ Version v1 = Version.fromString("6.1");
+ tester.deployCompletely(application, applicationPackage);
+
+ // Next version is released and 2/3 deployments upgrade
+ Version v2 = Version.fromString("6.2");
+ tester.upgradeSystem(v2);
+ tester.upgrader().maintain();
+ assertEquals(Change.of(v2), app.get().change());
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
+ tester.deployAndNotify(application, true, productionUsCentral1);
+
+ // While second deployment completes upgrade, version confidence becomes broken and upgrade is cancelled
+ tester.upgrader().overrideConfidence(v2, VespaVersion.Confidence.broken);
+ tester.computeVersionStatus();
+ tester.upgrader().maintain();
+ tester.deployAndNotify(application, true, productionUsWest1);
+ assertTrue(app.get().change().isEmpty());
+
+ // Next version is released
+ Version v3 = Version.fromString("6.3");
+ tester.upgradeSystem(v3);
+ tester.upgrader().maintain();
+ assertEquals(Change.of(v3), app.get().change());
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
+
+ // First deployment starts upgrading
+ tester.deploy(productionUsCentral1, application, applicationPackage);
+
+ // Before deployment completes, v1->v3 combination is tested as us-east-3 is still on v1
+ tester.readyJobTrigger().maintain();
+ tester.deployAndNotify(application, true, stagingTest);
+ assertEquals(v1, app.get().deploymentJobs().jobStatus().get(stagingTest).lastSuccess().get().sourcePlatform().get());
+ assertEquals(v3, app.get().deploymentJobs().jobStatus().get(stagingTest).lastSuccess().get().platform());
+
+ // First deployment fails and then successfully upgrades to v3
+ tester.jobCompletion(productionUsCentral1).application(application).unsuccessful().submit();
+ tester.jobCompletion(productionUsCentral1).application(application).submit();
+
+ // Deployments are now on 3 versions
+ assertEquals(v3, app.get().deployments().get(productionUsCentral1.zone(main)).version());
+ assertEquals(v2, app.get().deployments().get(productionUsWest1.zone(main)).version());
+ assertEquals(v1, app.get().deployments().get(productionUsEast3.zone(main)).version());
+
+ // Need to test v2->v3 combination again before upgrading second deployment
+ tester.readyJobTrigger().maintain();
+ assertEquals(v2, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().sourcePlatform().get());
+ assertEquals(v3, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform());
+ tester.deployAndNotify(application, true, stagingTest);
+
+ // Second deployment upgrades
+ tester.deployAndNotify(application, true, productionUsWest1);
+
+ // ... now we have to test v1->v3 again :(
+ tester.readyJobTrigger().maintain();
+ assertEquals(v1, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().sourcePlatform().get());
+ assertEquals(v3, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform());
+ tester.deployAndNotify(application, true, stagingTest);
+
+ // Upgrade completes
+ tester.deployAndNotify(application, true, productionUsEast3);
+ assertTrue("Upgrade complete", app.get().change().isEmpty());
+ }
+
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java
index e46119dc0b5..849b251a230 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java
@@ -41,6 +41,7 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalDouble;
+import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.TreeMap;
@@ -110,7 +111,7 @@ public class ApplicationSerializerTest {
Change.of(ApplicationVersion.from(new SourceRevision("repo", "master", "deadcafe"), 42)),
Optional.of(IssueId.from("1234")),
Optional.of(User.from("by-username")),
- Optional.of(7),
+ OptionalInt.of(7),
new MetricsService.ApplicationMetrics(0.5, 0.9),
Optional.of(new RotationId("my-rotation")),
rotationStatus);