summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2019-02-06 14:48:02 +0100
committerGitHub <noreply@github.com>2019-02-06 14:48:02 +0100
commit07437b3ea3525a877d4bd42cf0cde9891df352a9 (patch)
tree3188ad4ad7f37df279cb49376e00d6e91f3e216d /controller-server
parent7e93d5dac4c722f1efa0e85e511a90084264a99f (diff)
parent183c77de4a560fa730a559675b8f9ff1ebdf28fb (diff)
Merge pull request #8246 from vespa-engine/hmusum/increase-application-lock-timeout
Increase timeout for application lock when deploying
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java14
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java10
2 files changed, 20 insertions, 4 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
index 4a8670b9f9e..f9254e9c467 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
@@ -68,10 +68,8 @@ import java.net.URI;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
-import java.util.ArrayDeque;
import java.util.Collections;
import java.util.Comparator;
-import java.util.Deque;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
@@ -267,7 +265,7 @@ public class ApplicationController {
if (applicationId.instance().isTester())
throw new IllegalArgumentException("'" + applicationId + "' is a tester application!");
- try (Lock lock = lock(applicationId)) {
+ try (Lock lock = lockForDeployment(applicationId)) {
LockedApplication application = get(applicationId)
.map(app -> new LockedApplication(app, lock))
.orElseGet(() -> new LockedApplication(createApplication(applicationId, Optional.empty()), lock));
@@ -659,6 +657,16 @@ public class ApplicationController {
return curator.lock(application);
}
+ /**
+ * Returns a lock which provides exclusive rights to changing this application, with a timeout that
+ * is suitable for deployments.
+ * Any operation which stores an application need to first acquire this lock, then read, modify
+ * and store the application, and finally release (close) the lock.
+ */
+ private Lock lockForDeployment(ApplicationId application) {
+ return curator.lockForDeployment(application);
+ }
+
/** Verify that each of the production zones listed in the deployment spec exist in this system. */
private void validate(DeploymentSpec deploymentSpec) {
new DeploymentSteps(deploymentSpec, controller::system).jobs();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
index d0fc2f9163f..466e00cef2a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
@@ -63,6 +63,7 @@ import static java.util.stream.Collectors.collectingAndThen;
public class CuratorDb {
private static final Logger log = Logger.getLogger(CuratorDb.class.getName());
+ private static final Duration deployLockTimeout = Duration.ofMinutes(20);
private static final Duration defaultLockTimeout = Duration.ofMinutes(5);
private static final Duration defaultTryLockTimeout = Duration.ofSeconds(1);
@@ -115,7 +116,7 @@ public class CuratorDb {
// -------------- Locks ---------------------------------------------------
- /** Create a reentrant lock */
+ /** Creates a reentrant lock */
private Lock lock(Path path, Duration timeout) {
Lock lock = locks.computeIfAbsent(path, (pathArg) -> new Lock(pathArg.getAbsolute(), curator));
lock.acquire(timeout);
@@ -130,6 +131,13 @@ public class CuratorDb {
return lock(lockPath(id), defaultLockTimeout.multipliedBy(2));
}
+ // Timeout should be higher than the time deployment takes, since there might be deployments wanting
+ // to run in parallel, too low timeout in that case has been seen to lead to deployments not
+ // getting the lock before it times out
+ public Lock lockForDeployment(ApplicationId id) {
+ return lock(lockPath(id), deployLockTimeout);
+ }
+
public Lock lock(ApplicationId id, JobType type) {
return lock(lockPath(id, type), defaultLockTimeout);
}