summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2020-07-22 10:29:55 +0200
committerMartin Polden <mpolden@mpolden.no>2020-07-22 10:29:55 +0200
commit8c00e73031b5205c6c29a69bc672887e7e9e776f (patch)
tree91a386fa98848b5e734131b6edf16867a834a730 /node-repository
parente85ff8599fd9cfad5802405bd4e1aa4324073e9c (diff)
Take config server lock in node-repository
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java53
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java20
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClientTest.java2
3 files changed, 63 insertions, 12 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index 99b6c48c90b..7bf76b7c2c2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -18,6 +18,7 @@ import com.yahoo.transaction.Mutex;
import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.flags.FlagSource;
+import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.hosted.provision.Node.State;
import com.yahoo.vespa.hosted.provision.applications.Applications;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
@@ -46,6 +47,7 @@ import com.yahoo.vespa.hosted.provision.restapi.NotFoundException;
import java.time.Clock;
import java.time.Duration;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
@@ -58,6 +60,8 @@ import java.util.Set;
import java.util.TreeSet;
import java.util.function.BiFunction;
import java.util.function.Predicate;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -92,6 +96,8 @@ import java.util.stream.Stream;
// Nodes might have an application assigned in dirty.
public class NodeRepository extends AbstractComponent {
+ private static final Logger log = Logger.getLogger(NodeRepository.class.getName());
+
private final CuratorDatabaseClient db;
private final Clock clock;
private final Zone zone;
@@ -106,6 +112,7 @@ public class NodeRepository extends AbstractComponent {
private final Applications applications;
private final boolean canProvisionHosts;
private final int spareCount;
+ private final boolean useConfigServerLock;
/**
* Creates a node repository from a zookeeper provider.
@@ -146,7 +153,9 @@ public class NodeRepository extends AbstractComponent {
boolean useCuratorClientCache,
boolean canProvisionHosts,
int spareCount) {
- this.db = new CuratorDatabaseClient(flavors, curator, clock, zone, useCuratorClientCache);
+ // Flag is read once here as it shouldn't not change at runtime
+ this.useConfigServerLock = Flags.USE_CONFIG_SERVER_LOCK.bindTo(flagSource).value();
+ this.db = new CuratorDatabaseClient(flavors, curator, clock, zone, useCuratorClientCache, useConfigServerLock);
this.zone = zone;
this.clock = clock;
this.flavors = flavors;
@@ -160,13 +169,33 @@ public class NodeRepository extends AbstractComponent {
this.applications = new Applications(db);
this.canProvisionHosts = canProvisionHosts;
this.spareCount = spareCount;
-
- // read and write all nodes to make sure they are stored in the latest version of the serialized format
- for (State state : State.values())
- // TODO(mpolden): Add per-node locking. In its current state this may collide with other callers making
- // node state changes. Example: A redeployment on another config server which moves a node
- // to another state while this is constructed.
- db.writeTo(state, db.readNodes(state), Agent.system, Optional.empty());
+ rewriteNodes();
+ }
+
+ /** Read and write all nodes to make sure they are stored in the latest version of the serialized format */
+ private void rewriteNodes() {
+ Instant start = clock.instant();
+ int nodesWritten = 0;
+ if (useConfigServerLock) {
+ for (var state : State.values()) {
+ for (var node : db.readNodes(state)) {
+ try (var lock = lock(node)) {
+ var currentNode = db.readNode(node.hostname());
+ if (currentNode.isEmpty()) continue; // Node disappeared while running this loop
+ db.writeTo(currentNode.get().state(), currentNode.get(), Agent.system, Optional.empty());
+ nodesWritten++;
+ }
+ }
+ }
+ } else {
+ for (State state : State.values()) {
+ List<Node> nodes = db.readNodes(state);
+ db.writeTo(state, nodes, Agent.system, Optional.empty());
+ nodesWritten += nodes.size();
+ }
+ }
+ Instant end = clock.instant();
+ log.log(Level.INFO, String.format("Rewrote %d nodes in %s", nodesWritten, Duration.between(start, end)));
}
/** Returns the curator database client used by this */
@@ -829,10 +858,14 @@ public class NodeRepository extends AbstractComponent {
public Zone zone() { return zone; }
/** Create a lock which provides exclusive rights to making changes to the given application */
- public Mutex lock(ApplicationId application) { return db.legacyLock(application); }
+ public Mutex lock(ApplicationId application) {
+ return useConfigServerLock ? db.lock(application) : db.legacyLock(application);
+ }
/** Create a lock with a timeout which provides exclusive rights to making changes to the given application */
- public Mutex lock(ApplicationId application, Duration timeout) { return db.legacyLock(application, timeout); }
+ public Mutex lock(ApplicationId application, Duration timeout) {
+ return useConfigServerLock ? db.lock(application, timeout) : db.legacyLock(application, timeout);
+ }
/** Create a lock which provides exclusive rights to modifying unallocated nodes */
public Mutex lockUnallocated() { return db.lockInactive(); }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
index 5e2f0bd4761..cc62ae67e84 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
@@ -77,13 +77,15 @@ public class CuratorDatabaseClient {
private final Clock clock;
private final Zone zone;
private final CuratorCounter provisionIndexCounter;
+ private final boolean logStackTracesOnLockTimeout;
- public CuratorDatabaseClient(NodeFlavors flavors, Curator curator, Clock clock, Zone zone, boolean useCache) {
+ public CuratorDatabaseClient(NodeFlavors flavors, Curator curator, Clock clock, Zone zone, boolean useCache, boolean logStackTracesOnLockTimeout) {
this.nodeSerializer = new NodeSerializer(flavors);
this.zone = zone;
this.db = new CuratorDatabase(curator, root, useCache);
this.clock = clock;
this.provisionIndexCounter = new CuratorCounter(curator, root.append("provisionIndexCounter").getAbsolute());
+ this.logStackTracesOnLockTimeout = logStackTracesOnLockTimeout;
initZK();
}
@@ -394,6 +396,22 @@ public class CuratorDatabaseClient {
try {
return db.lock(lockPath(application), timeout);
} catch (UncheckedTimeoutException e) {
+ if (logStackTracesOnLockTimeout) {
+ log.log(Level.WARNING, "Logging stack trace from all threads due to lock timeout");
+ Map<Thread, StackTraceElement[]> stackTraces = Thread.getAllStackTraces();
+ for (Map.Entry<Thread, StackTraceElement[]> kv : stackTraces.entrySet()) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("Thread '")
+ .append(kv.getKey().getName())
+ .append("'\n");
+ for (var stackTraceElement : kv.getValue()) {
+ sb.append("\tat ")
+ .append(stackTraceElement)
+ .append("\n");
+ }
+ log.log(Level.WARNING, sb.toString());
+ }
+ }
throw new ApplicationLockException(e);
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClientTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClientTest.java
index 880f5af5653..b3d567c8d58 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClientTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClientTest.java
@@ -26,7 +26,7 @@ public class CuratorDatabaseClientTest {
private final Curator curator = new MockCurator();
private final CuratorDatabaseClient zkClient = new CuratorDatabaseClient(
- FlavorConfigBuilder.createDummies("default"), curator, Clock.systemUTC(), Zone.defaultZone(), true);
+ FlavorConfigBuilder.createDummies("default"), curator, Clock.systemUTC(), Zone.defaultZone(), true, false);
@Test
public void can_read_stored_host_information() throws Exception {