diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-10-07 09:07:50 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-07 09:07:50 +0200 |
commit | b09f95f1d811b4a14f727a8f6fcde989dcf9b67c (patch) | |
tree | 3920496fb3fab97064aef7bbeba9a2b3379e1f86 /node-repository/src | |
parent | b2f9f7ecf5be7164a5803a7be19348eea992db95 (diff) | |
parent | 3aedbdfc275fd16081c78b1fee0b95ff8fd0990b (diff) |
Merge pull request #14740 from vespa-engine/mpolden/lock-migration
Stop taking config lock exclusively in node-repository
Diffstat (limited to 'node-repository/src')
6 files changed, 63 insertions, 83 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 0155f1dd7c2..455d9e59734 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -18,7 +18,6 @@ import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; @@ -112,7 +111,6 @@ public class NodeRepository extends AbstractComponent { private final Applications applications; private final boolean canProvisionHosts; private final int spareCount; - private final boolean useConfigServerLock; /** * Creates a node repository from a zookeeper provider. @@ -155,8 +153,6 @@ public class NodeRepository extends AbstractComponent { boolean canProvisionHosts, int spareCount, long nodeCacheSize) { - // Flag is read once here as it shouldn't not change at runtime - this.useConfigServerLock = Flags.USE_CONFIG_SERVER_LOCK.bindTo(flagSource).value(); this.db = new CuratorDatabaseClient(flavors, curator, clock, zone, useCuratorClientCache, nodeCacheSize); this.zone = zone; this.clock = clock; @@ -178,23 +174,11 @@ public class NodeRepository extends AbstractComponent { private void rewriteNodes() { Instant start = clock.instant(); int nodesWritten = 0; - if (useConfigServerLock) { - for (var state : State.values()) { - for (var node : db.readNodes(state)) { - try (var lock = lock(node)) { - var currentNode = db.readNode(node.hostname()); - if (currentNode.isEmpty()) continue; // Node disappeared while running this loop - db.writeTo(currentNode.get().state(), currentNode.get(), Agent.system, Optional.empty()); - nodesWritten++; - } - } - } - } else { - for (State state : State.values()) { - List<Node> nodes = db.readNodes(state); - db.writeTo(state, nodes, Agent.system, Optional.empty()); - nodesWritten += nodes.size(); - } + for (State state : State.values()) { + List<Node> nodes = db.readNodes(state); + // TODO(mpolden): This should take the lock before writing + db.writeTo(state, nodes, Agent.system, Optional.empty()); + nodesWritten += nodes.size(); } Instant end = clock.instant(); log.log(Level.INFO, String.format("Rewrote %d nodes in %s", nodesWritten, Duration.between(start, end))); @@ -860,12 +844,12 @@ public class NodeRepository extends AbstractComponent { /** Create a lock which provides exclusive rights to making changes to the given application */ public Mutex lock(ApplicationId application) { - return useConfigServerLock ? db.lock(application) : db.legacyLock(application); + return db.lock(application); } /** Create a lock with a timeout which provides exclusive rights to making changes to the given application */ public Mutex lock(ApplicationId application, Duration timeout) { - return useConfigServerLock ? db.lock(application, timeout) : db.legacyLock(application, timeout); + return db.lock(application, timeout); } /** Create a lock which provides exclusive rights to modifying unallocated nodes */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java index 17f8d73c88f..56e52d9f658 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java @@ -25,7 +25,10 @@ public class Applications { // read and write all to make sure they are stored in the latest version of the serialized format for (ApplicationId id : ids()) { try (Mutex lock = db.lock(id)) { - get(id).ifPresent(application -> put(application, lock)); + // TODO(mpolden): Remove inner lock + try (Mutex innerLock = db.configLock(id)) { + get(id).ifPresent(application -> put(application, lock)); + } } } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java index 90cf3ba8f54..55d17f3e080 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java @@ -124,10 +124,13 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer { private void withLoadBalancersIn(LoadBalancer.State state, Consumer<LoadBalancer> operation) { for (var id : db.readLoadBalancerIds()) { try (var lock = db.lock(id.application())) { - var loadBalancer = db.readLoadBalancer(id); - if (loadBalancer.isEmpty()) continue; // Load balancer was removed during loop - if (loadBalancer.get().state() != state) continue; // Wrong state - operation.accept(loadBalancer.get()); + // TODO(mpolden): Remove inner lock + try (var innerLock = db.configLock(id.application())) { + var loadBalancer = db.readLoadBalancer(id); + if (loadBalancer.isEmpty()) continue; // Load balancer was removed during loop + if (loadBalancer.get().state() != state) continue; // Wrong state + operation.accept(loadBalancer.get()); + } } } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index 5d6a4668263..2c00b4bab68 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -320,19 +320,17 @@ public class CuratorDatabaseClient { } /** Creates and returns the path to the lock for this application */ - // TODO(mpolden): Remove when all config servers take the new lock - private Path legacyLockPath(ApplicationId application) { - Path lockPath = - CuratorDatabaseClient.lockPath - .append(application.tenant().value()) - .append(application.application().value()) - .append(application.instance().value()); + private Path lockPath(ApplicationId application) { + Path lockPath = CuratorDatabaseClient.lockPath.append(application.tenant().value()) + .append(application.application().value()) + .append(application.instance().value()); db.create(lockPath); return lockPath; } - /** Creates and returns the lock path for this application */ - private Path lockPath(ApplicationId application) { + /** Creates and returns the config lock path for this application */ + // TODO(mpolden): Remove + private Path configLockPath(ApplicationId application) { // This must match the lock path used by com.yahoo.vespa.config.server.application.TenantApplications Path lockPath = configLockPath.append(application.tenant().value()).append(application.serializedForm()); db.create(lockPath); @@ -360,46 +358,32 @@ public class CuratorDatabaseClient { } /** Acquires the single cluster-global, reentrant lock for active nodes of this application */ - // TODO(mpolden): Remove when all config servers take the new lock - public Lock legacyLock(ApplicationId application) { - return legacyLock(application, defaultLockTimeout); + public Lock lock(ApplicationId application) { + return lock(application, defaultLockTimeout); } /** Acquires the single cluster-global, reentrant lock with the specified timeout for active nodes of this application */ - // TODO(mpolden): Remove when all config servers take the new lock - public Lock legacyLock(ApplicationId application, Duration timeout) { + public Lock lock(ApplicationId application, Duration timeout) { try { - return db.lock(legacyLockPath(application), timeout); + return db.lock(lockPath(application), timeout); } catch (UncheckedTimeoutException e) { throw new ApplicationLockException(e); } } - /** - * Acquires the single cluster-global, re-entrant lock for given application. Note that this is the same lock - * that configserver itself takes when modifying applications. - * - * This lock must be taken when writes to paths owned by this class may happen on both the configserver and - * node-repository side. This behaviour is obviously wrong, but since we pass a NestedTransaction across the - * configserver and node-repository boundary, the ownership semantics of the transaction (and its operations) - * becomes unclear. - * - * Example of when to use: The config server creates a new transaction and passes the transaction to - * {@link com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner}, which appends operations to the - * transaction. The config server then commits (writes) the transaction which may include operations that modify - * data in paths owned by this class. - */ - public Lock lock(ApplicationId application, Duration timeout) { + // TODO(mpolden): Remove + private Lock configLock(ApplicationId application, Duration timeout) { try { - return db.lock(lockPath(application), timeout); + return db.lock(configLockPath(application), timeout); } catch (UncheckedTimeoutException e) { throw new ApplicationLockException(e); } } - public Lock lock(ApplicationId application) { - return lock(application, defaultLockTimeout); + // TODO(mpolden): Remove + public Lock configLock(ApplicationId application) { + return configLock(application, defaultLockTimeout); } // Applications ----------------------------------------------------------- diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java index 6403bbe2b0c..09a33093654 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java @@ -60,8 +60,11 @@ public class LoadBalancerProvisioner { // Read and write all load balancers to make sure they are stored in the latest version of the serialization format for (var id : db.readLoadBalancerIds()) { try (var lock = db.lock(id.application())) { - var loadBalancer = db.readLoadBalancer(id); - loadBalancer.ifPresent(db::writeLoadBalancer); + // TODO(mpolden): Remove inner lock + try (var innerLock = db.configLock(id.application())) { + var loadBalancer = db.readLoadBalancer(id); + loadBalancer.ifPresent(db::writeLoadBalancer); + } } } } @@ -80,9 +83,12 @@ public class LoadBalancerProvisioner { if (!canForwardTo(requestedNodes.type(), cluster)) return; // Nothing to provision for this node and cluster type if (application.instance().isTester()) return; // Do not provision for tester instances try (var lock = db.lock(application)) { - ClusterSpec.Id clusterId = effectiveId(cluster); - List<Node> nodes = nodesOf(clusterId, application); - provision(application, clusterId, nodes,false, lock); + // TODO(mpolden): Remove inner lock + try (var innerLock = db.configLock(application)) { + ClusterSpec.Id clusterId = effectiveId(cluster); + List<Node> nodes = nodesOf(clusterId, application); + provision(application, clusterId, nodes, false, lock); + } } } @@ -99,15 +105,18 @@ public class LoadBalancerProvisioner { public void activate(ApplicationId application, Set<ClusterSpec> clusters, @SuppressWarnings("unused") Mutex applicationLock, NestedTransaction transaction) { try (var lock = db.lock(application)) { - for (var cluster : loadBalancedClustersOf(application).entrySet()) { - // Provision again to ensure that load balancer instance is re-configured with correct nodes - provision(application, cluster.getKey(), cluster.getValue(), true, lock); + // TODO(mpolden): Remove inner lock + try (var innerLock = db.configLock(application)) { + for (var cluster : loadBalancedClustersOf(application).entrySet()) { + // Provision again to ensure that load balancer instance is re-configured with correct nodes + provision(application, cluster.getKey(), cluster.getValue(), true, lock); + } + // Deactivate any surplus load balancers, i.e. load balancers for clusters that have been removed + var surplusLoadBalancers = surplusLoadBalancersOf(application, clusters.stream() + .map(LoadBalancerProvisioner::effectiveId) + .collect(Collectors.toSet())); + deactivate(surplusLoadBalancers, transaction); } - // Deactivate any surplus load balancers, i.e. load balancers for clusters that have been removed - var surplusLoadBalancers = surplusLoadBalancersOf(application, clusters.stream() - .map(LoadBalancerProvisioner::effectiveId) - .collect(Collectors.toSet())); - deactivate(surplusLoadBalancers, transaction); } } @@ -116,8 +125,9 @@ public class LoadBalancerProvisioner { * load balancer(s). */ public void deactivate(ApplicationId application, NestedTransaction transaction) { - try (var applicationLock = nodeRepository.lock(application)) { - try (var lock = db.lock(application)) { + try (var lock = nodeRepository.lock(application)) { + // TODO(mpolden): Remove inner lock + try (var innerLock = db.configLock(application)) { deactivate(nodeRepository.loadBalancers(application).asList(), transaction); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClientTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClientTest.java index 36625f3559e..11df26e4da6 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClientTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClientTest.java @@ -8,7 +8,6 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.curator.Curator; -import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.curator.mock.MockCurator; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; @@ -42,19 +41,16 @@ public class CuratorDatabaseClientTest { public void locks_can_be_acquired_and_released() { ApplicationId app = ApplicationId.from(TenantName.from("testTenant"), ApplicationName.from("testApp"), InstanceName.from("testInstance")); - try (Lock mutex1 = zkClient.lock(app)) { - mutex1.toString(); // reference to avoid warning + try (var ignored = zkClient.lock(app)) { throw new RuntimeException(); } catch (RuntimeException expected) { } - try (Lock mutex2 = zkClient.lock(app)) { - mutex2.toString(); // reference to avoid warning + try (var ignored = zkClient.lock(app)) { } - try (Lock mutex3 = zkClient.lock(app)) { - mutex3.toString(); // reference to avoid warning + try (var ignored = zkClient.lock(app)) { } } |