summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2022-04-19 14:09:21 +0200
committerHarald Musum <musum@yahooinc.com>2022-04-19 14:09:21 +0200
commit9ebad8ca92af962da93731b7054f5e5302b0aaf2 (patch)
treed11befad5ed555e2ce8e81784edb066e08ae5dec /node-repository
parent1e4a7bd6c22f52d7c6cd57b6f48dbfa5a89388b5 (diff)
Use timeout when getting locks with NodePatcher
Client has a timeout of 10 seconds, using a much larger timeout (as is the default) might lead to lots of requests waiting for the lock
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java21
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java3
3 files changed, 28 insertions, 4 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index d544ea76983..3aa09b1b667 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -773,12 +773,15 @@ public class Nodes {
public Mutex lockUnallocated() { return db.lockInactive(); }
/** Returns the unallocated/application lock, and the node acquired under that lock. */
- public Optional<NodeMutex> lockAndGet(Node node) {
+ public Optional<NodeMutex> lockAndGet(Node node) { return lockAndGet(node, Optional.empty()); }
+
+ /** Returns the unallocated/application lock, and the node acquired under that lock. */
+ public Optional<NodeMutex> lockAndGet(Node node, Optional<Duration> timeout) {
Node staleNode = node;
final int maxRetries = 4;
for (int i = 0; i < maxRetries; ++i) {
- Mutex lockToClose = lock(staleNode);
+ Mutex lockToClose = timeout.isPresent() ? lock(staleNode, timeout.get()) : lock(staleNode);
try {
// As an optimization we first try finding the node in the same state
Optional<Node> freshNode = node(staleNode.hostname(), staleNode.state());
@@ -813,6 +816,11 @@ public class Nodes {
}
/** Returns the unallocated/application lock, and the node acquired under that lock. */
+ public Optional<NodeMutex> lockAndGet(String hostname, Duration timeout) {
+ return node(hostname).flatMap(node -> lockAndGet(node, Optional.of(timeout)));
+ }
+
+ /** Returns the unallocated/application lock, and the node acquired under that lock. */
public NodeMutex lockAndGetRequired(Node node) {
return lockAndGet(node).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + node.hostname() + "'"));
}
@@ -822,10 +830,19 @@ public class Nodes {
return lockAndGet(hostname).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + hostname + "'"));
}
+ /** Returns the unallocated/application lock, and the node acquired under that lock. */
+ public NodeMutex lockAndGetRequired(String hostname, Duration timeout) {
+ return lockAndGet(hostname, timeout).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + hostname + "'"));
+ }
+
private Mutex lock(Node node) {
return node.allocation().isPresent() ? lock(node.allocation().get().owner()) : lockUnallocated();
}
+ private Mutex lock(Node node, Duration timeout) {
+ return node.allocation().isPresent() ? lock(node.allocation().get().owner(), timeout) : lockUnallocated();
+ }
+
private Node requireNode(String hostname) {
return node(hostname).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + hostname + "'"));
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
index a9abc352d8c..13dd458c041 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
@@ -69,6 +69,7 @@ public class CuratorDatabaseClient {
private static final Path firmwareCheckPath = root.append("firmwareCheck");
private static final Path archiveUrisPath = root.append("archiveUris");
+ // TODO: Explain reasoning behind timeout value (why its it as high as 10 minutes?)
private static final Duration defaultLockTimeout = Duration.ofMinutes(10);
private final NodeSerializer nodeSerializer;
@@ -319,7 +320,12 @@ public class CuratorDatabaseClient {
/** Acquires the single cluster-global, reentrant lock for all non-active nodes */
public Lock lockInactive() {
- return db.lock(lockPath.append("unallocatedLock"), defaultLockTimeout);
+ return lockInactive(defaultLockTimeout);
+ }
+
+ /** Acquires the single cluster-global, reentrant lock for all non-active nodes */
+ public Lock lockInactive(Duration timeout) {
+ return db.lock(lockPath.append("unallocatedLock"), timeout);
}
/** Acquires the single cluster-global, reentrant lock for active nodes of this application */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java
index fa6c44e6851..d8e1828b10c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java
@@ -29,6 +29,7 @@ import com.yahoo.yolean.Exceptions;
import java.io.InputStream;
import java.time.Clock;
+import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.HashMap;
@@ -86,7 +87,7 @@ public class NodePatcher {
Map<String, Inspector> recursiveFields = Maps.filterKeys(fields, RECURSIVE_FIELDS::contains);
// Patch
- NodeMutex nodeMutex = nodeRepository.nodes().lockAndGetRequired(hostname);
+ NodeMutex nodeMutex = nodeRepository.nodes().lockAndGetRequired(hostname, Duration.ofSeconds(10)); // timeout should match the one used by clients
patch(nodeMutex, regularFields, root, false);
patchIpConfig(hostname, ipConfigFields);
if (nodeMutex.node().type().isHost()) {