diff options
author | Jon Bratseth <bratseth@oath.com> | 2020-04-14 15:47:58 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-14 15:47:58 +0200 |
commit | 60f845464bb5332428737086282157500726f247 (patch) | |
tree | 018d6f311edf3ede3c56209bb120e440c406151b /node-repository | |
parent | decc08cb94e432008ab0466b1c773f0786725846 (diff) | |
parent | 60ea36a95f51e01e83a453c60e8dff661260e106 (diff) |
Merge pull request #12904 from vespa-engine/mpolden/config-lock
Hold config lock when modifying load balancers
Diffstat (limited to 'node-repository')
5 files changed, 70 insertions, 35 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 6f7be09aa0e..1b5b5469302 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -5,7 +5,6 @@ import com.google.inject.Inject; import com.yahoo.collections.ListMap; import com.yahoo.component.AbstractComponent; import com.yahoo.component.Version; -import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Flavor; @@ -17,8 +16,7 @@ import com.yahoo.config.provisioning.NodeRepositoryConfig; import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.curator.Curator; -import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerId; @@ -130,6 +128,9 @@ public class NodeRepository extends AbstractComponent { // read and write all nodes to make sure they are stored in the latest version of the serialized format for (State state : State.values()) + // TODO(mpolden): Add per-node locking. In its current state this may collide with other callers making + // node state changes. Example: A redeployment on another config server which moves a node + // to another state while this is constructed. db.writeTo(state, db.getNodes(state), Agent.system, Optional.empty()); } @@ -763,6 +764,7 @@ public class NodeRepository extends AbstractComponent { public Zone zone() { return zone; } /** Create a lock which provides exclusive rights to making changes to the given application */ + // TODO(mpolden): Make this delegate to CuratorDatabaseClient#lockConfig instead public Mutex lock(ApplicationId application) { return db.lock(application); } /** Create a lock with a timeout which provides exclusive rights to making changes to the given application */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java index e2b70608d58..7beb717ea8b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java @@ -120,11 +120,13 @@ public class LoadBalancerExpirer extends Maintainer { /** Apply operation to all load balancers that exist in given state, while holding lock */ private void withLoadBalancersIn(LoadBalancer.State state, Consumer<LoadBalancer> operation) { for (var id : db.readLoadBalancerIds()) { - try (var lock = db.lockLoadBalancers(id.application())) { - var loadBalancer = db.readLoadBalancer(id); - if (loadBalancer.isEmpty()) continue; // Load balancer was removed during loop - if (loadBalancer.get().state() != state) continue; // Wrong state - operation.accept(loadBalancer.get()); + try (var lock = db.lockConfig(id.application())) { + try (var legacyLock = db.lockLoadBalancers(id.application())) { + var loadBalancer = db.readLoadBalancer(id); + if (loadBalancer.isEmpty()) continue; // Load balancer was removed during loop + if (loadBalancer.get().state() != state) continue; // Wrong state + operation.accept(loadBalancer.get()); + } } } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabase.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabase.java index e12cd3e53b9..08f2cfec40f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabase.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabase.java @@ -38,18 +38,12 @@ public class CuratorDatabase { /** A partial cache of the Curator database, which is only valid if generations match */ private final AtomicReference<Cache> cache = new AtomicReference<>(); - /** Whether we should return data from the cache or always read fro ZooKeeper */ + /** Whether we should return data from the cache or always read from ZooKeeper */ private final boolean useCache; private final Object cacheCreationLock = new Object(); /** - * All keys, to allow reentrancy. - * This will grow forever with the number of applications seen, but this should be too slow to be a problem. - */ - private final ConcurrentHashMap<Path, Lock> locks = new ConcurrentHashMap<>(); - - /** * Creates a curator database * * @param curator the curator instance @@ -72,11 +66,8 @@ public class CuratorDatabase { } /** Create a reentrant lock */ - // Locks are not cached in the in-memory state public Lock lock(Path path, Duration timeout) { - Lock lock = locks.computeIfAbsent(path, (pathArg) -> new Lock(pathArg.getAbsolute(), curator)); - lock.acquire(timeout); - return lock; + return curator.lock(path, timeout); } // --------- Write operations ------------------------------------------------------------------------------ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index 8ecdb0cbb1f..6036cc2366f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -62,6 +62,7 @@ public class CuratorDatabaseClient { private static final Path root = Path.fromString("/provision/v1"); private static final Path lockRoot = root.append("locks"); + private static final Path configLockRoot = Path.fromString("/config/v2/locks/"); private static final Path loadBalancersRoot = root.append("loadBalancers"); private static final Duration defaultLockTimeout = Duration.ofMinutes(2); @@ -312,7 +313,7 @@ public class CuratorDatabaseClient { return root.append(toDir(nodeState)).append(nodeName); } - /** Creates an returns the path to the lock for this application */ + /** Creates and returns the path to the lock for this application */ private Path lockPath(ApplicationId application) { Path lockPath = lockRoot @@ -323,6 +324,14 @@ public class CuratorDatabaseClient { return lockPath; } + /** Creates and returns the path to the config server lock for this application */ + private Path configLockPath(ApplicationId application) { + // This must match the lock path used by com.yahoo.vespa.config.server.application.TenantApplications + Path lockPath = configLockRoot.append(application.tenant().value()).append(application.serializedForm()); + curatorDatabase.create(lockPath); + return lockPath; + } + private String toDir(Node.State state) { switch (state) { case active: return "allocated"; // legacy name @@ -358,6 +367,28 @@ public class CuratorDatabaseClient { } } + /** + * Acquires the single cluster-global, re-entrant config lock for given application. Note that this is the same lock + * that configserver itself takes when modifying applications. + * + * This lock must be taken when writes to paths owned by this class may happen on both the configserver and + * node-repository side. This behaviour is obviously wrong, but since we pass a NestedTransaction across the + * configserver and node-repository boundary, the ownership semantics of the transaction (and its operations) + * becomes unclear. + * + * Example of when to use: The config server creates a new transaction and passes the transaction to + * {@link com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner}, which appends operations to the + * transaction. The config server then commits (writes) the transaction which may include operations that modify + * data in paths owned by this class. + */ + public Lock lockConfig(ApplicationId application) { + try { + return lock(configLockPath(application), defaultLockTimeout); + } catch (UncheckedTimeoutException e) { + throw new ApplicationLockException(e); + } + } + private Lock lock(Path path, Duration timeout) { return curatorDatabase.lock(path, timeout); } @@ -525,6 +556,7 @@ public class CuratorDatabaseClient { transaction.commit(); } + // TODO(mpolden): Remove this and usages after April 2020 public Lock lockLoadBalancers(ApplicationId application) { return lock(lockRoot.append("loadBalancersLock2").append(application.serializedForm()), defaultLockTimeout); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java index 1d3fe114e23..b694a7c3cd4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java @@ -52,9 +52,11 @@ public class LoadBalancerProvisioner { this.service = service; // Read and write all load balancers to make sure they are stored in the latest version of the serialization format for (var id : db.readLoadBalancerIds()) { - try (var lock = db.lockLoadBalancers(id.application())) { - var loadBalancer = db.readLoadBalancer(id); - loadBalancer.ifPresent(db::writeLoadBalancer); + try (var lock = db.lockConfig(id.application())) { + try (var legacyLock = db.lockLoadBalancers(id.application())) { + var loadBalancer = db.readLoadBalancer(id); + loadBalancer.ifPresent(db::writeLoadBalancer); + } } } } @@ -73,8 +75,10 @@ public class LoadBalancerProvisioner { if (requestedNodes.type() != NodeType.tenant) return; // Nothing to provision for this node type if (!cluster.type().isContainer()) return; // Nothing to provision for this cluster type if (application.instance().isTester()) return; // Do not provision for tester instances - try (var lock = db.lockLoadBalancers(application)) { - provision(application, effectiveId(cluster), false, lock); + try (var lock = db.lockConfig(application)) { + try (var legacyLock = db.lockLoadBalancers(application)) { + provision(application, effectiveId(cluster), false, lock); + } } } @@ -90,15 +94,17 @@ public class LoadBalancerProvisioner { */ public void activate(ApplicationId application, Set<ClusterSpec> clusters, @SuppressWarnings("unused") Mutex applicationLock, NestedTransaction transaction) { - try (var lock = db.lockLoadBalancers(application)) { - var containerClusters = containerClustersOf(clusters); - for (var clusterId : containerClusters) { - // Provision again to ensure that load balancer instance is re-configured with correct nodes - provision(application, clusterId, true, lock); + try (var lock = db.lockConfig(application)) { + try (var legacyLock = db.lockLoadBalancers(application)) { + var containerClusters = containerClustersOf(clusters); + for (var clusterId : containerClusters) { + // Provision again to ensure that load balancer instance is re-configured with correct nodes + provision(application, clusterId, true, legacyLock); + } + // Deactivate any surplus load balancers, i.e. load balancers for clusters that have been removed + var surplusLoadBalancers = surplusLoadBalancersOf(application, containerClusters); + deactivate(surplusLoadBalancers, transaction); } - // Deactivate any surplus load balancers, i.e. load balancers for clusters that have been removed - var surplusLoadBalancers = surplusLoadBalancersOf(application, containerClusters); - deactivate(surplusLoadBalancers, transaction); } } @@ -108,8 +114,10 @@ public class LoadBalancerProvisioner { */ public void deactivate(ApplicationId application, NestedTransaction transaction) { try (var applicationLock = nodeRepository.lock(application)) { - try (var lock = db.lockLoadBalancers(application)) { - deactivate(nodeRepository.loadBalancers(application).asList(), transaction); + try (var lock = db.lockConfig(application)) { + try (var legacyLock = db.lockLoadBalancers(application)) { + deactivate(nodeRepository.loadBalancers(application).asList(), transaction); + } } } } |