diff options
4 files changed, 64 insertions, 82 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 56564fed782..40cd6b03e0a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -127,15 +127,11 @@ public class NodeRepository extends AbstractComponent { this.applications = new Applications(); // read and write all nodes to make sure they are stored in the latest version of the serialized format - for (var state : State.values()) { - for (var node : db.getNodes(state)) { - try (var lock = lock(node)) { - var currentNode = db.getNode(node.hostname()); - if (currentNode.isEmpty()) continue; // Node was removed during this loop - db.writeTo(currentNode.get().state(), currentNode.get(), Agent.system, Optional.empty()); - } - } - } + for (State state : State.values()) + // TODO(mpolden): Add per-node locking. In its current state this may collide with other callers making + // node state changes. Example: A redeployment on another config server which moves a node + // to another state while this is constructed. + db.writeTo(state, db.getNodes(state), Agent.system, Optional.empty()); } /** Returns the curator database client used by this */ @@ -779,10 +775,11 @@ public class NodeRepository extends AbstractComponent { public Zone zone() { return zone; } /** Create a lock which provides exclusive rights to making changes to the given application */ - public Mutex lock(ApplicationId application) { return db.legacyLock(application); } + // TODO(mpolden): Make this delegate to CuratorDatabaseClient#lockConfig instead + public Mutex lock(ApplicationId application) { return db.lock(application); } /** Create a lock with a timeout which provides exclusive rights to making changes to the given application */ - public Mutex lock(ApplicationId application, Duration timeout) { return db.legacyLock(application, timeout); } + public Mutex lock(ApplicationId application, Duration timeout) { return db.lock(application, timeout); } /** Create a lock which provides exclusive rights to modifying unallocated nodes */ public Mutex lockUnallocated() { return db.lockInactive(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java index 8b8ac939423..7beb717ea8b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java @@ -120,7 +120,7 @@ public class LoadBalancerExpirer extends Maintainer { /** Apply operation to all load balancers that exist in given state, while holding lock */ private void withLoadBalancersIn(LoadBalancer.State state, Consumer<LoadBalancer> operation) { for (var id : db.readLoadBalancerIds()) { - try (var lock = db.lock(id.application())) { + try (var lock = db.lockConfig(id.application())) { try (var legacyLock = db.lockLoadBalancers(id.application())) { var loadBalancer = db.readLoadBalancer(id); if (loadBalancer.isEmpty()) continue; // Load balancer was removed during loop diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index d869cff68bc..6036cc2366f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -12,7 +12,6 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Zone; import com.yahoo.log.LogLevel; import com.yahoo.path.Path; -import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.transaction.Transaction; import com.yahoo.vespa.curator.Curator; @@ -69,7 +68,7 @@ public class CuratorDatabaseClient { private final NodeSerializer nodeSerializer; private final StringSetSerializer stringSetSerializer = new StringSetSerializer(); - private final CuratorDatabase db; + private final CuratorDatabase curatorDatabase; private final Clock clock; private final Zone zone; private final CuratorCounter provisionIndexCounter; @@ -77,26 +76,26 @@ public class CuratorDatabaseClient { public CuratorDatabaseClient(NodeFlavors flavors, Curator curator, Clock clock, Zone zone, boolean useCache) { this.nodeSerializer = new NodeSerializer(flavors); this.zone = zone; - this.db = new CuratorDatabase(curator, root, useCache); + this.curatorDatabase = new CuratorDatabase(curator, root, useCache); this.clock = clock; this.provisionIndexCounter = new CuratorCounter(curator, root.append("provisionIndexCounter").getAbsolute()); initZK(); } public List<HostName> cluster() { - return db.cluster(); + return curatorDatabase.cluster(); } private void initZK() { - db.create(root); + curatorDatabase.create(root); for (Node.State state : Node.State.values()) - db.create(toPath(state)); - db.create(inactiveJobsPath()); - db.create(infrastructureVersionsPath()); - db.create(osVersionsPath()); - db.create(dockerImagesPath()); - db.create(firmwareCheckPath()); - db.create(loadBalancersRoot); + curatorDatabase.create(toPath(state)); + curatorDatabase.create(inactiveJobsPath()); + curatorDatabase.create(infrastructureVersionsPath()); + curatorDatabase.create(osVersionsPath()); + curatorDatabase.create(dockerImagesPath()); + curatorDatabase.create(firmwareCheckPath()); + curatorDatabase.create(loadBalancersRoot); provisionIndexCounter.initialize(100); } @@ -105,7 +104,7 @@ public class CuratorDatabaseClient { */ public List<Node> addNodesInState(List<Node> nodes, Node.State expectedState) { NestedTransaction transaction = new NestedTransaction(); - CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); + CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); for (Node node : nodes) { if (node.state() != expectedState) throw new IllegalArgumentException(node + " is not in the " + expectedState + " state"); @@ -131,7 +130,7 @@ public class CuratorDatabaseClient { for (Node node : nodes) { Path path = toPath(node.state(), node.hostname()); - CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); + CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); curatorTransaction.add(CuratorOperations.delete(path.getAbsolute())); } @@ -201,7 +200,7 @@ public class CuratorDatabaseClient { List<Node> writtenNodes = new ArrayList<>(nodes.size()); - CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); + CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); for (Node node : nodes) { Node newNode = new Node(node.id(), node.ipConfig(), node.hostname(), node.parentHostname(), node.flavor(), @@ -261,7 +260,7 @@ public class CuratorDatabaseClient { List<Node> nodes = new ArrayList<>(); if (states.length == 0) states = Node.State.values(); - CuratorDatabase.Session session = db.getSession(); + CuratorDatabase.Session session = curatorDatabase.getSession(); for (Node.State state : states) { for (String hostname : session.getChildren(toPath(state))) { Optional<Node> node = getNode(session, hostname, state); @@ -301,7 +300,7 @@ public class CuratorDatabaseClient { * If no states are given this returns the node if it is present in any state. */ public Optional<Node> getNode(String hostname, Node.State ... states) { - return getNode(db.getSession(), hostname, states); + return getNode(curatorDatabase.getSession(), hostname, states); } private Path toPath(Node.State nodeState) { return root.append(toDir(nodeState)); } @@ -315,22 +314,21 @@ public class CuratorDatabaseClient { } /** Creates and returns the path to the lock for this application */ - // TODO(mpolden): Remove when all config servers take the new lock - private Path legacyLockPath(ApplicationId application) { + private Path lockPath(ApplicationId application) { Path lockPath = lockRoot .append(application.tenant().value()) .append(application.application().value()) .append(application.instance().value()); - db.create(lockPath); + curatorDatabase.create(lockPath); return lockPath; } - /** Creates and returns the lock path for this application */ - private Path lockPath(ApplicationId application) { + /** Creates and returns the path to the config server lock for this application */ + private Path configLockPath(ApplicationId application) { // This must match the lock path used by com.yahoo.vespa.config.server.application.TenantApplications Path lockPath = configLockRoot.append(application.tenant().value()).append(application.serializedForm()); - db.create(lockPath); + curatorDatabase.create(lockPath); return lockPath; } @@ -351,39 +349,26 @@ public class CuratorDatabaseClient { /** Acquires the single cluster-global, reentrant lock for all non-active nodes */ public Lock lockInactive() { - return db.lock(lockRoot.append("unallocatedLock"), defaultLockTimeout); + return lock(lockRoot.append("unallocatedLock"), defaultLockTimeout); } /** Acquires the single cluster-global, reentrant lock for active nodes of this application */ - // TODO(mpolden): Remove when all config servers take the new lock - public Mutex legacyLock(ApplicationId application) { - return legacyLock(application, defaultLockTimeout); + public Lock lock(ApplicationId application) { + return lock(application, defaultLockTimeout); } /** Acquires the single cluster-global, reentrant lock with the specified timeout for active nodes of this application */ - // TODO(mpolden): Remove when all config servers take the new lock - public Mutex legacyLock(ApplicationId application, Duration timeout) { - Mutex legacyLock; - Mutex lock; + public Lock lock(ApplicationId application, Duration timeout) { try { - legacyLock = db.lock(legacyLockPath(application), timeout); - } catch (UncheckedTimeoutException e) { - throw new ApplicationLockException(e); + return lock(lockPath(application), timeout); } - try { - lock = db.lock(lockPath(application), timeout); - } catch (UncheckedTimeoutException e) { - legacyLock.close(); + catch (UncheckedTimeoutException e) { throw new ApplicationLockException(e); } - return () -> { - lock.close(); - legacyLock.close(); - }; } /** - * Acquires the single cluster-global, re-entrant lock for given application. Note that this is the same lock + * Acquires the single cluster-global, re-entrant config lock for given application. Note that this is the same lock * that configserver itself takes when modifying applications. * * This lock must be taken when writes to paths owned by this class may happen on both the configserver and @@ -396,25 +381,25 @@ public class CuratorDatabaseClient { * transaction. The config server then commits (writes) the transaction which may include operations that modify * data in paths owned by this class. */ - public Lock lock(ApplicationId application, Duration timeout) { + public Lock lockConfig(ApplicationId application) { try { - return db.lock(lockPath(application), timeout); + return lock(configLockPath(application), defaultLockTimeout); } catch (UncheckedTimeoutException e) { throw new ApplicationLockException(e); } } - public Lock lock(ApplicationId application) { - return lock(application, defaultLockTimeout); + private Lock lock(Path path, Duration timeout) { + return curatorDatabase.lock(path, timeout); } private <T> Optional<T> read(Path path, Function<byte[], T> mapper) { - return db.getData(path).filter(data -> data.length > 0).map(mapper); + return curatorDatabase.getData(path).filter(data -> data.length > 0).map(mapper); } // Maintenance jobs public Lock lockMaintenanceJob(String jobName) { - return db.lock(lockRoot.append("maintenanceJobLocks").append(jobName), defaultLockTimeout); + return lock(lockRoot.append("maintenanceJobLocks").append(jobName), defaultLockTimeout); } public Set<String> readInactiveJobs() { @@ -430,14 +415,14 @@ public class CuratorDatabaseClient { public void writeInactiveJobs(Set<String> inactiveJobs) { NestedTransaction transaction = new NestedTransaction(); - CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); + CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); curatorTransaction.add(CuratorOperations.setData(inactiveJobsPath().getAbsolute(), stringSetSerializer.toJson(inactiveJobs))); transaction.commit(); } public Lock lockInactiveJobs() { - return db.lock(lockRoot.append("inactiveJobsLock"), defaultLockTimeout); + return lock(lockRoot.append("inactiveJobsLock"), defaultLockTimeout); } private Path inactiveJobsPath() { @@ -452,14 +437,14 @@ public class CuratorDatabaseClient { public void writeInfrastructureVersions(Map<NodeType, Version> infrastructureVersions) { NestedTransaction transaction = new NestedTransaction(); - CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); + CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); curatorTransaction.add(CuratorOperations.setData(infrastructureVersionsPath().getAbsolute(), NodeTypeVersionsSerializer.toJson(infrastructureVersions))); transaction.commit(); } public Lock lockInfrastructureVersions() { - return db.lock(lockRoot.append("infrastructureVersionsLock"), defaultLockTimeout); + return lock(lockRoot.append("infrastructureVersionsLock"), defaultLockTimeout); } private Path infrastructureVersionsPath() { @@ -474,14 +459,14 @@ public class CuratorDatabaseClient { public void writeOsVersions(Map<NodeType, Version> versions) { NestedTransaction transaction = new NestedTransaction(); - CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); + CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); curatorTransaction.add(CuratorOperations.setData(osVersionsPath().getAbsolute(), OsVersionsSerializer.toJson(versions))); transaction.commit(); } public Lock lockOsVersions() { - return db.lock(lockRoot.append("osVersionsLock"), defaultLockTimeout); + return lock(lockRoot.append("osVersionsLock"), defaultLockTimeout); } private Path osVersionsPath() { @@ -496,14 +481,14 @@ public class CuratorDatabaseClient { public void writeDockerImages(Map<NodeType, DockerImage> dockerImages) { NestedTransaction transaction = new NestedTransaction(); - CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); + CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); curatorTransaction.add(CuratorOperations.setData(dockerImagesPath().getAbsolute(), NodeTypeDockerImagesSerializer.toJson(dockerImages))); transaction.commit(); } public Lock lockDockerImages() { - return db.lock(lockRoot.append("dockerImagesLock"), defaultLockTimeout); + return lock(lockRoot.append("dockerImagesLock"), defaultLockTimeout); } private Path dockerImagesPath() { @@ -517,7 +502,7 @@ public class CuratorDatabaseClient { byte[] data = after.map(instant -> Long.toString(instant.toEpochMilli()).getBytes()) .orElse(new byte[0]); NestedTransaction transaction = new NestedTransaction(); - CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); + CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); curatorTransaction.add(CuratorOperations.setData(firmwareCheckPath().getAbsolute(), data)); transaction.commit(); } @@ -557,7 +542,7 @@ public class CuratorDatabaseClient { } public void writeLoadBalancers(Collection<LoadBalancer> loadBalancers, NestedTransaction transaction) { - CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); + CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); loadBalancers.forEach(loadBalancer -> { curatorTransaction.add(createOrSet(loadBalancerPath(loadBalancer.id()), LoadBalancerSerializer.toJson(loadBalancer))); @@ -566,14 +551,14 @@ public class CuratorDatabaseClient { public void removeLoadBalancer(LoadBalancerId loadBalancer) { NestedTransaction transaction = new NestedTransaction(); - CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); + CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); curatorTransaction.add(CuratorOperations.delete(loadBalancerPath(loadBalancer).getAbsolute())); transaction.commit(); } // TODO(mpolden): Remove this and usages after April 2020 public Lock lockLoadBalancers(ApplicationId application) { - return db.lock(lockRoot.append("loadBalancersLock2").append(application.serializedForm()), defaultLockTimeout); + return lock(lockRoot.append("loadBalancersLock2").append(application.serializedForm()), defaultLockTimeout); } private Path loadBalancerPath(LoadBalancerId id) { @@ -581,14 +566,14 @@ public class CuratorDatabaseClient { } private List<LoadBalancerId> readLoadBalancerIds(Predicate<LoadBalancerId> predicate) { - return db.getChildren(loadBalancersRoot).stream() - .map(LoadBalancerId::fromSerializedForm) - .filter(predicate) - .collect(Collectors.toUnmodifiableList()); + return curatorDatabase.getChildren(loadBalancersRoot).stream() + .map(LoadBalancerId::fromSerializedForm) + .filter(predicate) + .collect(Collectors.toUnmodifiableList()); } private Transaction.Operation createOrSet(Path path, byte[] data) { - if (db.exists(path)) { + if (curatorDatabase.exists(path)) { return CuratorOperations.setData(path.getAbsolute(), data); } return CuratorOperations.create(path.getAbsolute(), data); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java index 77d7faf7778..b694a7c3cd4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java @@ -52,7 +52,7 @@ public class LoadBalancerProvisioner { this.service = service; // Read and write all load balancers to make sure they are stored in the latest version of the serialization format for (var id : db.readLoadBalancerIds()) { - try (var lock = db.lock(id.application())) { + try (var lock = db.lockConfig(id.application())) { try (var legacyLock = db.lockLoadBalancers(id.application())) { var loadBalancer = db.readLoadBalancer(id); loadBalancer.ifPresent(db::writeLoadBalancer); @@ -75,7 +75,7 @@ public class LoadBalancerProvisioner { if (requestedNodes.type() != NodeType.tenant) return; // Nothing to provision for this node type if (!cluster.type().isContainer()) return; // Nothing to provision for this cluster type if (application.instance().isTester()) return; // Do not provision for tester instances - try (var lock = db.lock(application)) { + try (var lock = db.lockConfig(application)) { try (var legacyLock = db.lockLoadBalancers(application)) { provision(application, effectiveId(cluster), false, lock); } @@ -94,7 +94,7 @@ public class LoadBalancerProvisioner { */ public void activate(ApplicationId application, Set<ClusterSpec> clusters, @SuppressWarnings("unused") Mutex applicationLock, NestedTransaction transaction) { - try (var lock = db.lock(application)) { + try (var lock = db.lockConfig(application)) { try (var legacyLock = db.lockLoadBalancers(application)) { var containerClusters = containerClustersOf(clusters); for (var clusterId : containerClusters) { @@ -114,7 +114,7 @@ public class LoadBalancerProvisioner { */ public void deactivate(ApplicationId application, NestedTransaction transaction) { try (var applicationLock = nodeRepository.lock(application)) { - try (var lock = db.lock(application)) { + try (var lock = db.lockConfig(application)) { try (var legacyLock = db.lockLoadBalancers(application)) { deactivate(nodeRepository.loadBalancers(application).asList(), transaction); } |