diff options
author | Harald Musum <musum@verizonmedia.com> | 2019-02-06 14:48:02 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-02-06 14:48:02 +0100 |
commit | 07437b3ea3525a877d4bd42cf0cde9891df352a9 (patch) | |
tree | 3188ad4ad7f37df279cb49376e00d6e91f3e216d | |
parent | 7e93d5dac4c722f1efa0e85e511a90084264a99f (diff) | |
parent | 183c77de4a560fa730a559675b8f9ff1ebdf28fb (diff) |
Merge pull request #8246 from vespa-engine/hmusum/increase-application-lock-timeout
Increase timeout for application lock when deploying
2 files changed, 20 insertions, 4 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index 4a8670b9f9e..f9254e9c467 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -68,10 +68,8 @@ import java.net.URI; import java.time.Clock; import java.time.Duration; import java.time.Instant; -import java.util.ArrayDeque; import java.util.Collections; import java.util.Comparator; -import java.util.Deque; import java.util.EnumSet; import java.util.HashSet; import java.util.List; @@ -267,7 +265,7 @@ public class ApplicationController { if (applicationId.instance().isTester()) throw new IllegalArgumentException("'" + applicationId + "' is a tester application!"); - try (Lock lock = lock(applicationId)) { + try (Lock lock = lockForDeployment(applicationId)) { LockedApplication application = get(applicationId) .map(app -> new LockedApplication(app, lock)) .orElseGet(() -> new LockedApplication(createApplication(applicationId, Optional.empty()), lock)); @@ -659,6 +657,16 @@ public class ApplicationController { return curator.lock(application); } + /** + * Returns a lock which provides exclusive rights to changing this application, with a timeout that + * is suitable for deployments. + * Any operation which stores an application need to first acquire this lock, then read, modify + * and store the application, and finally release (close) the lock. + */ + private Lock lockForDeployment(ApplicationId application) { + return curator.lockForDeployment(application); + } + /** Verify that each of the production zones listed in the deployment spec exist in this system. */ private void validate(DeploymentSpec deploymentSpec) { new DeploymentSteps(deploymentSpec, controller::system).jobs(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index d0fc2f9163f..466e00cef2a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -63,6 +63,7 @@ import static java.util.stream.Collectors.collectingAndThen; public class CuratorDb { private static final Logger log = Logger.getLogger(CuratorDb.class.getName()); + private static final Duration deployLockTimeout = Duration.ofMinutes(20); private static final Duration defaultLockTimeout = Duration.ofMinutes(5); private static final Duration defaultTryLockTimeout = Duration.ofSeconds(1); @@ -115,7 +116,7 @@ public class CuratorDb { // -------------- Locks --------------------------------------------------- - /** Create a reentrant lock */ + /** Creates a reentrant lock */ private Lock lock(Path path, Duration timeout) { Lock lock = locks.computeIfAbsent(path, (pathArg) -> new Lock(pathArg.getAbsolute(), curator)); lock.acquire(timeout); @@ -130,6 +131,13 @@ public class CuratorDb { return lock(lockPath(id), defaultLockTimeout.multipliedBy(2)); } + // Timeout should be higher than the time deployment takes, since there might be deployments wanting + // to run in parallel, too low timeout in that case has been seen to lead to deployments not + // getting the lock before it times out + public Lock lockForDeployment(ApplicationId id) { + return lock(lockPath(id), deployLockTimeout); + } + public Lock lock(ApplicationId id, JobType type) { return lock(lockPath(id, type), defaultLockTimeout); } |