diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-02-12 16:01:44 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2020-02-13 08:35:46 +0100 |
commit | 1c6eb408e8317d8d1a50d77d69fa41dac748c3f8 (patch) | |
tree | 300dcd46216678dabbe7fbd42edb94fa794ece9f | |
parent | b2df0af1d3e4cbb888e0cbe425f9994dcafbacf9 (diff) |
Take load balancer lock per application
6 files changed, 83 insertions, 69 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerList.java index 870184b0d9e..479fd328162 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerList.java @@ -1,7 +1,6 @@ // Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.lb; -import java.time.Instant; import java.util.Collection; import java.util.Iterator; import java.util.List; @@ -29,11 +28,6 @@ public class LoadBalancerList implements Iterable<LoadBalancer> { return of(loadBalancers.stream().filter(lb -> lb.state() == state)); } - /** Returns the subset of load balancers that last changed before given instant */ - public LoadBalancerList changedBefore(Instant instant) { - return of(loadBalancers.stream().filter(lb -> lb.changedAt().isBefore(instant))); - } - public List<LoadBalancer> asList() { return loadBalancers; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java index bcb0c901f14..0f363991310 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java @@ -15,6 +15,8 @@ import java.util.ArrayList; import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Consumer; import java.util.stream.Collectors; /** @@ -51,37 +53,31 @@ public class LoadBalancerExpirer extends Maintainer { /** Move reserved load balancer that have expired to inactive */ private void expireReserved() { - try (var lock = db.lockLoadBalancers()) { - var now = nodeRepository().clock().instant(); - var expirationTime = now.minus(reservedExpiry); - var expired = nodeRepository().loadBalancers() - .in(State.reserved) - .changedBefore(expirationTime); - expired.forEach(lb -> db.writeLoadBalancer(lb.with(State.inactive, now))); - } + var now = nodeRepository().clock().instant(); + withLoadBalancersIn(State.reserved, lb -> { + var gracePeriod = now.minus(reservedExpiry); + if (!lb.changedAt().isBefore(gracePeriod)) return; // Should not move to inactive yet + db.writeLoadBalancer(lb.with(State.inactive, now)); + }); } /** Deprovision inactive load balancers that have expired */ private void removeInactive() { var failed = new ArrayList<LoadBalancerId>(); - Exception lastException = null; - try (var lock = db.lockLoadBalancers()) { - var now = nodeRepository().clock().instant(); - var expirationTime = now.minus(inactiveExpiry); - var expired = nodeRepository().loadBalancers() - .in(State.inactive) - .changedBefore(expirationTime); - for (var lb : expired) { - if (!allocatedNodes(lb.id()).isEmpty()) continue; // Defer removal if there are still nodes allocated to application - try { - service.remove(lb.id().application(), lb.id().cluster()); - db.removeLoadBalancer(lb.id()); - } catch (Exception e) { - failed.add(lb.id()); - lastException = e; - } + var lastException = new AtomicReference<Exception>(); + var now = nodeRepository().clock().instant(); + withLoadBalancersIn(State.inactive, lb -> { + var gracePeriod = now.minus(inactiveExpiry); + if (!lb.changedAt().isBefore(gracePeriod)) return; // Should not be removed yet + if (!allocatedNodes(lb.id()).isEmpty()) return; // Still has nodes, do not remove + try { + service.remove(lb.id().application(), lb.id().cluster()); + db.removeLoadBalancer(lb.id()); + } catch (Exception e){ + failed.add(lb.id()); + lastException.set(e); } - } + }); if (!failed.isEmpty()) { log.log(LogLevel.WARNING, String.format("Failed to remove %d load balancers: %s, retrying in %s", failed.size(), @@ -89,30 +85,27 @@ public class LoadBalancerExpirer extends Maintainer { .map(LoadBalancerId::serializedForm) .collect(Collectors.joining(", ")), interval()), - lastException); + lastException.get()); } } /** Remove reals from inactive load balancers */ private void pruneReals() { var failed = new ArrayList<LoadBalancerId>(); - Exception lastException = null; - try (var lock = db.lockLoadBalancers()) { - var deactivated = nodeRepository().loadBalancers().in(State.inactive); - for (var lb : deactivated) { - var allocatedNodes = allocatedNodes(lb.id()).stream().map(Node::hostname).collect(Collectors.toSet()); - var reals = new LinkedHashSet<>(lb.instance().reals()); - // Remove any real no longer allocated to this application - reals.removeIf(real -> !allocatedNodes.contains(real.hostname().value())); - try { - service.create(lb.id().application(), lb.id().cluster(), reals, true); - db.writeLoadBalancer(lb.with(lb.instance().withReals(reals))); - } catch (Exception e) { - failed.add(lb.id()); - lastException = e; - } + var lastException = new AtomicReference<Exception>(); + withLoadBalancersIn(State.inactive, lb -> { + var allocatedNodes = allocatedNodes(lb.id()).stream().map(Node::hostname).collect(Collectors.toSet()); + var reals = new LinkedHashSet<>(lb.instance().reals()); + // Remove any real no longer allocated to this application + reals.removeIf(real -> !allocatedNodes.contains(real.hostname().value())); + try { + service.create(lb.id().application(), lb.id().cluster(), reals, true); + db.writeLoadBalancer(lb.with(lb.instance().withReals(reals))); + } catch (Exception e) { + failed.add(lb.id()); + lastException.set(e); } - } + }); if (!failed.isEmpty()) { log.log(LogLevel.WARNING, String.format("Failed to remove reals from %d load balancers: %s, retrying in %s", failed.size(), @@ -120,7 +113,21 @@ public class LoadBalancerExpirer extends Maintainer { .map(LoadBalancerId::serializedForm) .collect(Collectors.joining(", ")), interval()), - lastException); + lastException.get()); + } + } + + /** Apply operation to all load balancers that exist in given state, while holding lock */ + private void withLoadBalancersIn(LoadBalancer.State state, Consumer<LoadBalancer> operation) { + try (var legacyLock = db.lockLoadBalancers()) { + for (var id : db.readLoadBalancerIds()) { + try (var lock = db.lockLoadBalancers(id.application())) { + var loadBalancer = db.readLoadBalancer(id); + if (loadBalancer.isEmpty()) continue; // Load balancer was removed during loop + if (loadBalancer.get().state() != state) continue; // Wrong state + operation.accept(loadBalancer.get()); + } + } } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index 1b90b8d3461..0a8575578ce 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -521,10 +521,15 @@ public class CuratorDatabaseClient { transaction.commit(); } + // TODO(mpolden): Remove this and all usages once migration to per-application lock is complete public Lock lockLoadBalancers() { return lock(lockRoot.append("loadBalancersLock"), defaultLockTimeout); } + public Lock lockLoadBalancers(ApplicationId application) { + return lock(lockRoot.append("loadBalancersLock2").append(application.serializedForm()), defaultLockTimeout); + } + private Path loadBalancerPath(LoadBalancerId id) { return loadBalancersRoot.append(id.serializedForm()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java index 25f245120ea..d4d5b46dfdf 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java @@ -51,10 +51,12 @@ public class LoadBalancerProvisioner { this.db = nodeRepository.database(); this.service = service; // Read and write all load balancers to make sure they are stored in the latest version of the serialization format - try (var lock = db.lockLoadBalancers()) { + try (var legacyLock = db.lockLoadBalancers()) { for (var id : db.readLoadBalancerIds()) { - var loadBalancer = db.readLoadBalancer(id); - loadBalancer.ifPresent(db::writeLoadBalancer); + try (var lock = db.lockLoadBalancers(id.application())) { + var loadBalancer = db.readLoadBalancer(id); + loadBalancer.ifPresent(db::writeLoadBalancer); + } } } } @@ -73,8 +75,10 @@ public class LoadBalancerProvisioner { if (requestedNodes.type() != NodeType.tenant) return; // Nothing to provision for this node type if (!cluster.type().isContainer()) return; // Nothing to provision for this cluster type if (application.instance().isTester()) return; // Do not provision for tester instances - try (var loadBalancersLock = db.lockLoadBalancers()) { - provision(application, cluster.id(), false, loadBalancersLock); + try (var legacyLock = db.lockLoadBalancers()) { + try (var lock = db.lockLoadBalancers(application)) { + provision(application, cluster.id(), false, lock); + } } } @@ -90,15 +94,17 @@ public class LoadBalancerProvisioner { */ public void activate(ApplicationId application, Set<ClusterSpec> clusters, @SuppressWarnings("unused") Mutex applicationLock, NestedTransaction transaction) { - try (var loadBalancersLock = db.lockLoadBalancers()) { - var containerClusters = containerClusterOf(clusters); - for (var clusterId : containerClusters) { - // Provision again to ensure that load balancer instance is re-configured with correct nodes - provision(application, clusterId, true, loadBalancersLock); + try (var legacyLock = db.lockLoadBalancers()) { + try (var lock = db.lockLoadBalancers(application)) { + var containerClusters = containerClusterOf(clusters); + for (var clusterId : containerClusters) { + // Provision again to ensure that load balancer instance is re-configured with correct nodes + provision(application, clusterId, true, lock); + } + // Deactivate any surplus load balancers, i.e. load balancers for clusters that have been removed + var surplusLoadBalancers = surplusLoadBalancersOf(application, containerClusters); + deactivate(surplusLoadBalancers, transaction); } - // Deactivate any surplus load balancers, i.e. load balancers for clusters that have been removed - var surplusLoadBalancers = surplusLoadBalancersOf(application, containerClusters); - deactivate(surplusLoadBalancers, transaction); } } @@ -108,8 +114,10 @@ public class LoadBalancerProvisioner { */ public void deactivate(ApplicationId application, NestedTransaction transaction) { try (var applicationLock = nodeRepository.lock(application)) { - try (Mutex loadBalancersLock = db.lockLoadBalancers()) { - deactivate(nodeRepository.loadBalancers(application).asList(), transaction); + try (var legacyLock = db.lockLoadBalancers()) { + try (var lock = db.lockLoadBalancers(application)) { + deactivate(nodeRepository.loadBalancers(application).asList(), transaction); + } } } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirerTest.java index f0faf36fc5d..4b7534b431e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirerTest.java @@ -32,7 +32,7 @@ import static org.junit.Assert.assertTrue; */ public class LoadBalancerExpirerTest { - private ProvisioningTester tester = new ProvisioningTester.Builder().build(); + private final ProvisioningTester tester = new ProvisioningTester.Builder().build(); @Test public void expire_inactive() { @@ -67,10 +67,11 @@ public class LoadBalancerExpirerTest { // Expirer prunes reals before expiration time of load balancer itself expirer.maintain(); assertEquals(Set.of(), tester.loadBalancerService().instances().get(lb1).reals()); - assertEquals(Set.of(), tester.nodeRepository().loadBalancers(lb1.application()).asList().get(0).instance().reals()); + assertEquals(Set.of(), loadBalancers.get().get(lb1).instance().reals()); // Expirer defers removal of load balancer until expiration time passes expirer.maintain(); + assertSame(LoadBalancer.State.inactive, loadBalancers.get().get(lb1).state()); assertTrue("Inactive load balancer not removed", tester.loadBalancerService().instances().containsKey(lb1)); // Expirer removes load balancers once expiration time passes @@ -85,7 +86,7 @@ public class LoadBalancerExpirerTest { // A single cluster is removed deployApplication(app2, cluster1); expirer.maintain(); - assertEquals(LoadBalancer.State.inactive, loadBalancers.get().get(lb3).state()); + assertSame(LoadBalancer.State.inactive, loadBalancers.get().get(lb3).state()); // Expirer defers removal while nodes are still allocated to cluster expirer.maintain(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java index 7031c368e1d..ee9a582c4db 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java @@ -40,10 +40,9 @@ public class LoadBalancerProvisionerTest { private final ApplicationId app1 = ApplicationId.from("tenant1", "application1", "default"); private final ApplicationId app2 = ApplicationId.from("tenant2", "application2", "default"); - private final ApplicationId infraApp1 = ApplicationId.from("vespa", "tenant-host", "default"); - private ProvisioningTester tester = new ProvisioningTester.Builder().build(); + private final ProvisioningTester tester = new ProvisioningTester.Builder().build(); @Test public void provision_load_balancer() { |