diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-04-14 09:44:06 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-14 09:44:06 +0200 |
commit | 028da87d62146b22738172c2c5231e203a149c54 (patch) | |
tree | baf1e8081bc516cf5460370974668256395d2b01 | |
parent | 6aac938f0d89f644bebcb629cae4efa4536911b5 (diff) | |
parent | bc3b0922b32eafbff64391654fdf64d2fd74ffa4 (diff) |
Merge pull request #12877 from vespa-engine/mpolden/use-config-lock
Hold config lock when modifying load balancers
4 files changed, 68 insertions, 24 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 6f7be09aa0e..1b5b5469302 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -5,7 +5,6 @@ import com.google.inject.Inject; import com.yahoo.collections.ListMap; import com.yahoo.component.AbstractComponent; import com.yahoo.component.Version; -import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Flavor; @@ -17,8 +16,7 @@ import com.yahoo.config.provisioning.NodeRepositoryConfig; import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.curator.Curator; -import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerId; @@ -130,6 +128,9 @@ public class NodeRepository extends AbstractComponent { // read and write all nodes to make sure they are stored in the latest version of the serialized format for (State state : State.values()) + // TODO(mpolden): Add per-node locking. In its current state this may collide with other callers making + // node state changes. Example: A redeployment on another config server which moves a node + // to another state while this is constructed. db.writeTo(state, db.getNodes(state), Agent.system, Optional.empty()); } @@ -763,6 +764,7 @@ public class NodeRepository extends AbstractComponent { public Zone zone() { return zone; } /** Create a lock which provides exclusive rights to making changes to the given application */ + // TODO(mpolden): Make this delegate to CuratorDatabaseClient#lockConfig instead public Mutex lock(ApplicationId application) { return db.lock(application); } /** Create a lock with a timeout which provides exclusive rights to making changes to the given application */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java index e2b70608d58..7beb717ea8b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java @@ -120,11 +120,13 @@ public class LoadBalancerExpirer extends Maintainer { /** Apply operation to all load balancers that exist in given state, while holding lock */ private void withLoadBalancersIn(LoadBalancer.State state, Consumer<LoadBalancer> operation) { for (var id : db.readLoadBalancerIds()) { - try (var lock = db.lockLoadBalancers(id.application())) { - var loadBalancer = db.readLoadBalancer(id); - if (loadBalancer.isEmpty()) continue; // Load balancer was removed during loop - if (loadBalancer.get().state() != state) continue; // Wrong state - operation.accept(loadBalancer.get()); + try (var lock = db.lockConfig(id.application())) { + try (var legacyLock = db.lockLoadBalancers(id.application())) { + var loadBalancer = db.readLoadBalancer(id); + if (loadBalancer.isEmpty()) continue; // Load balancer was removed during loop + if (loadBalancer.get().state() != state) continue; // Wrong state + operation.accept(loadBalancer.get()); + } } } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index 8ecdb0cbb1f..6036cc2366f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -62,6 +62,7 @@ public class CuratorDatabaseClient { private static final Path root = Path.fromString("/provision/v1"); private static final Path lockRoot = root.append("locks"); + private static final Path configLockRoot = Path.fromString("/config/v2/locks/"); private static final Path loadBalancersRoot = root.append("loadBalancers"); private static final Duration defaultLockTimeout = Duration.ofMinutes(2); @@ -312,7 +313,7 @@ public class CuratorDatabaseClient { return root.append(toDir(nodeState)).append(nodeName); } - /** Creates an returns the path to the lock for this application */ + /** Creates and returns the path to the lock for this application */ private Path lockPath(ApplicationId application) { Path lockPath = lockRoot @@ -323,6 +324,14 @@ public class CuratorDatabaseClient { return lockPath; } + /** Creates and returns the path to the config server lock for this application */ + private Path configLockPath(ApplicationId application) { + // This must match the lock path used by com.yahoo.vespa.config.server.application.TenantApplications + Path lockPath = configLockRoot.append(application.tenant().value()).append(application.serializedForm()); + curatorDatabase.create(lockPath); + return lockPath; + } + private String toDir(Node.State state) { switch (state) { case active: return "allocated"; // legacy name @@ -358,6 +367,28 @@ public class CuratorDatabaseClient { } } + /** + * Acquires the single cluster-global, re-entrant config lock for given application. Note that this is the same lock + * that configserver itself takes when modifying applications. + * + * This lock must be taken when writes to paths owned by this class may happen on both the configserver and + * node-repository side. This behaviour is obviously wrong, but since we pass a NestedTransaction across the + * configserver and node-repository boundary, the ownership semantics of the transaction (and its operations) + * becomes unclear. + * + * Example of when to use: The config server creates a new transaction and passes the transaction to + * {@link com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner}, which appends operations to the + * transaction. The config server then commits (writes) the transaction which may include operations that modify + * data in paths owned by this class. + */ + public Lock lockConfig(ApplicationId application) { + try { + return lock(configLockPath(application), defaultLockTimeout); + } catch (UncheckedTimeoutException e) { + throw new ApplicationLockException(e); + } + } + private Lock lock(Path path, Duration timeout) { return curatorDatabase.lock(path, timeout); } @@ -525,6 +556,7 @@ public class CuratorDatabaseClient { transaction.commit(); } + // TODO(mpolden): Remove this and usages after April 2020 public Lock lockLoadBalancers(ApplicationId application) { return lock(lockRoot.append("loadBalancersLock2").append(application.serializedForm()), defaultLockTimeout); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java index 1d3fe114e23..b694a7c3cd4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java @@ -52,9 +52,11 @@ public class LoadBalancerProvisioner { this.service = service; // Read and write all load balancers to make sure they are stored in the latest version of the serialization format for (var id : db.readLoadBalancerIds()) { - try (var lock = db.lockLoadBalancers(id.application())) { - var loadBalancer = db.readLoadBalancer(id); - loadBalancer.ifPresent(db::writeLoadBalancer); + try (var lock = db.lockConfig(id.application())) { + try (var legacyLock = db.lockLoadBalancers(id.application())) { + var loadBalancer = db.readLoadBalancer(id); + loadBalancer.ifPresent(db::writeLoadBalancer); + } } } } @@ -73,8 +75,10 @@ public class LoadBalancerProvisioner { if (requestedNodes.type() != NodeType.tenant) return; // Nothing to provision for this node type if (!cluster.type().isContainer()) return; // Nothing to provision for this cluster type if (application.instance().isTester()) return; // Do not provision for tester instances - try (var lock = db.lockLoadBalancers(application)) { - provision(application, effectiveId(cluster), false, lock); + try (var lock = db.lockConfig(application)) { + try (var legacyLock = db.lockLoadBalancers(application)) { + provision(application, effectiveId(cluster), false, lock); + } } } @@ -90,15 +94,17 @@ public class LoadBalancerProvisioner { */ public void activate(ApplicationId application, Set<ClusterSpec> clusters, @SuppressWarnings("unused") Mutex applicationLock, NestedTransaction transaction) { - try (var lock = db.lockLoadBalancers(application)) { - var containerClusters = containerClustersOf(clusters); - for (var clusterId : containerClusters) { - // Provision again to ensure that load balancer instance is re-configured with correct nodes - provision(application, clusterId, true, lock); + try (var lock = db.lockConfig(application)) { + try (var legacyLock = db.lockLoadBalancers(application)) { + var containerClusters = containerClustersOf(clusters); + for (var clusterId : containerClusters) { + // Provision again to ensure that load balancer instance is re-configured with correct nodes + provision(application, clusterId, true, legacyLock); + } + // Deactivate any surplus load balancers, i.e. load balancers for clusters that have been removed + var surplusLoadBalancers = surplusLoadBalancersOf(application, containerClusters); + deactivate(surplusLoadBalancers, transaction); } - // Deactivate any surplus load balancers, i.e. load balancers for clusters that have been removed - var surplusLoadBalancers = surplusLoadBalancersOf(application, containerClusters); - deactivate(surplusLoadBalancers, transaction); } } @@ -108,8 +114,10 @@ public class LoadBalancerProvisioner { */ public void deactivate(ApplicationId application, NestedTransaction transaction) { try (var applicationLock = nodeRepository.lock(application)) { - try (var lock = db.lockLoadBalancers(application)) { - deactivate(nodeRepository.loadBalancers(application).asList(), transaction); + try (var lock = db.lockConfig(application)) { + try (var legacyLock = db.lockLoadBalancers(application)) { + deactivate(nodeRepository.loadBalancers(application).asList(), transaction); + } } } } |