From 934d2962613da65b12a54732889cb255938ce1e2 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Sun, 27 Jan 2019 18:45:54 +0100 Subject: Increase timeout for application lock --- .../com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'controller-server') diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index 9c498233809..d84bc3baa26 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -115,7 +115,7 @@ public class CuratorDb { // -------------- Locks --------------------------------------------------- - /** Create a reentrant lock */ + /** Creates a reentrant lock */ private Lock lock(Path path, Duration timeout) { Lock lock = locks.computeIfAbsent(path, (pathArg) -> new Lock(pathArg.getAbsolute(), curator)); lock.acquire(timeout); @@ -127,7 +127,10 @@ public class CuratorDb { } public Lock lock(ApplicationId id) { - return lock(lockPath(id), defaultLockTimeout.multipliedBy(2)); + // Timeout should be higher than a deployment takes, since there might be deployments wanting + // to run in parallel, too low timeout in that case has been seen to lead to deployments not + // getting the lock before it times out + return lock(lockPath(id), defaultLockTimeout.multipliedBy(4)); } public Lock lock(ApplicationId id, JobType type) { -- cgit v1.2.3 From 183c77de4a560fa730a559675b8f9ff1ebdf28fb Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Wed, 6 Feb 2019 14:11:20 +0100 Subject: Use a separate timeout for deployment lock --- .../vespa/hosted/controller/ApplicationController.java | 14 +++++++++++--- .../vespa/hosted/controller/persistence/CuratorDb.java | 13 +++++++++---- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'controller-server') diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index 4a8670b9f9e..f9254e9c467 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -68,10 +68,8 @@ import java.net.URI; import java.time.Clock; import java.time.Duration; import java.time.Instant; -import java.util.ArrayDeque; import java.util.Collections; import java.util.Comparator; -import java.util.Deque; import java.util.EnumSet; import java.util.HashSet; import java.util.List; @@ -267,7 +265,7 @@ public class ApplicationController { if (applicationId.instance().isTester()) throw new IllegalArgumentException("'" + applicationId + "' is a tester application!"); - try (Lock lock = lock(applicationId)) { + try (Lock lock = lockForDeployment(applicationId)) { LockedApplication application = get(applicationId) .map(app -> new LockedApplication(app, lock)) .orElseGet(() -> new LockedApplication(createApplication(applicationId, Optional.empty()), lock)); @@ -659,6 +657,16 @@ public class ApplicationController { return curator.lock(application); } + /** + * Returns a lock which provides exclusive rights to changing this application, with a timeout that + * is suitable for deployments. + * Any operation which stores an application need to first acquire this lock, then read, modify + * and store the application, and finally release (close) the lock. + */ + private Lock lockForDeployment(ApplicationId application) { + return curator.lockForDeployment(application); + } + /** Verify that each of the production zones listed in the deployment spec exist in this system. */ private void validate(DeploymentSpec deploymentSpec) { new DeploymentSteps(deploymentSpec, controller::system).jobs(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index d84bc3baa26..d509b2b62d4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -63,6 +63,7 @@ import static java.util.stream.Collectors.collectingAndThen; public class CuratorDb { private static final Logger log = Logger.getLogger(CuratorDb.class.getName()); + private static final Duration deployLockTimeout = Duration.ofMinutes(20); private static final Duration defaultLockTimeout = Duration.ofMinutes(5); private static final Duration defaultTryLockTimeout = Duration.ofSeconds(1); @@ -127,10 +128,14 @@ public class CuratorDb { } public Lock lock(ApplicationId id) { - // Timeout should be higher than a deployment takes, since there might be deployments wanting - // to run in parallel, too low timeout in that case has been seen to lead to deployments not - // getting the lock before it times out - return lock(lockPath(id), defaultLockTimeout.multipliedBy(4)); + return lock(lockPath(id), defaultLockTimeout.multipliedBy(2)); + } + + // Timeout should be higher than the time deployment takes, since there might be deployments wanting + // to run in parallel, too low timeout in that case has been seen to lead to deployments not + // getting the lock before it times out + public Lock lockForDeployment(ApplicationId id) { + return lock(lockPath(id), deployLockTimeout); } public Lock lock(ApplicationId id, JobType type) { -- cgit v1.2.3