summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2019-02-06 14:11:20 +0100
committerHarald Musum <musum@verizonmedia.com>2019-02-06 14:11:20 +0100
commit183c77de4a560fa730a559675b8f9ff1ebdf28fb (patch)
tree817b9b82c72c1171fb2d5c2daf597699cc8b3969 /controller-server
parent934d2962613da65b12a54732889cb255938ce1e2 (diff)
Use a separate timeout for deployment lock
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java14
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java13
2 files changed, 20 insertions, 7 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
index 4a8670b9f9e..f9254e9c467 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
@@ -68,10 +68,8 @@ import java.net.URI;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
-import java.util.ArrayDeque;
import java.util.Collections;
import java.util.Comparator;
-import java.util.Deque;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
@@ -267,7 +265,7 @@ public class ApplicationController {
if (applicationId.instance().isTester())
throw new IllegalArgumentException("'" + applicationId + "' is a tester application!");
- try (Lock lock = lock(applicationId)) {
+ try (Lock lock = lockForDeployment(applicationId)) {
LockedApplication application = get(applicationId)
.map(app -> new LockedApplication(app, lock))
.orElseGet(() -> new LockedApplication(createApplication(applicationId, Optional.empty()), lock));
@@ -659,6 +657,16 @@ public class ApplicationController {
return curator.lock(application);
}
+ /**
+ * Returns a lock which provides exclusive rights to changing this application, with a timeout that
+ * is suitable for deployments.
+ * Any operation which stores an application need to first acquire this lock, then read, modify
+ * and store the application, and finally release (close) the lock.
+ */
+ private Lock lockForDeployment(ApplicationId application) {
+ return curator.lockForDeployment(application);
+ }
+
/** Verify that each of the production zones listed in the deployment spec exist in this system. */
private void validate(DeploymentSpec deploymentSpec) {
new DeploymentSteps(deploymentSpec, controller::system).jobs();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
index d84bc3baa26..d509b2b62d4 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
@@ -63,6 +63,7 @@ import static java.util.stream.Collectors.collectingAndThen;
public class CuratorDb {
private static final Logger log = Logger.getLogger(CuratorDb.class.getName());
+ private static final Duration deployLockTimeout = Duration.ofMinutes(20);
private static final Duration defaultLockTimeout = Duration.ofMinutes(5);
private static final Duration defaultTryLockTimeout = Duration.ofSeconds(1);
@@ -127,10 +128,14 @@ public class CuratorDb {
}
public Lock lock(ApplicationId id) {
- // Timeout should be higher than a deployment takes, since there might be deployments wanting
- // to run in parallel, too low timeout in that case has been seen to lead to deployments not
- // getting the lock before it times out
- return lock(lockPath(id), defaultLockTimeout.multipliedBy(4));
+ return lock(lockPath(id), defaultLockTimeout.multipliedBy(2));
+ }
+
+ // Timeout should be higher than the time deployment takes, since there might be deployments wanting
+ // to run in parallel, too low timeout in that case has been seen to lead to deployments not
+ // getting the lock before it times out
+ public Lock lockForDeployment(ApplicationId id) {
+ return lock(lockPath(id), deployLockTimeout);
}
public Lock lock(ApplicationId id, JobType type) {