diff options
author | Valerij Fredriksen <valerijf@vespa.ai> | 2023-10-25 23:32:30 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@vespa.ai> | 2023-10-25 23:32:30 +0200 |
commit | c6a0e3f87a67c2d8836630589c0c9d6ae5a5cbb1 (patch) | |
tree | 03cfa1ee7822c0879a12985b2e939505a28efc1a | |
parent | 1e4db6b537090b41e1582dd03b958301260ae475 (diff) |
Wait for provisioning without holding the locks
4 files changed, 15 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java index 4526933fb4a..be6c420c63b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java @@ -229,8 +229,9 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { sharingMode, clusterType.map(ClusterSpec.Type::valueOf), Optional.empty(), nodeRepository().zone().cloud().account(), false); List<Node> hosts = new ArrayList<>(); + Runnable waiter; try (var lock = nodeRepository().nodes().lockUnallocated()) { - hostProvisioner.provisionHosts(request, + waiter = hostProvisioner.provisionHosts(request, resources -> true, provisionedHosts -> { hosts.addAll(provisionedHosts.stream() @@ -240,6 +241,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { nodeRepository().nodes().addNodes(hosts, Agent.HostCapacityMaintainer); }); } + waiter.run(); return hosts; } catch (NodeAllocationException | IllegalArgumentException | IllegalStateException e) { throw new NodeAllocationException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage(), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java index a876999e80b..38cbfa7fe5f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java @@ -50,8 +50,10 @@ public interface HostProvisioner { * written to ZK immediately in case the config server goes down while waiting * for the provisioning to finish. * @throws NodeAllocationException if the cloud provider cannot satisfy the request + * @return a runnable that waits for the provisioning request to finish. It can be run without holding any locks, + * but may fail with an exception that should be propagated to the user initiating prepare() */ - void provisionHosts(HostProvisionRequest request, Predicate<NodeResources> realHostResourcesWithinLimits, Consumer<List<ProvisionedHost>> whenProvisioned) throws NodeAllocationException; + Runnable provisionHosts(HostProvisionRequest request, Predicate<NodeResources> realHostResourcesWithinLimits, Consumer<List<ProvisionedHost>> whenProvisioned) throws NodeAllocationException; /** * Continue provisioning of given list of Nodes. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index fad42449285..cf99adc647f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -108,6 +108,8 @@ public class Preparer { /// Note that this will write to the node repo. private List<Node> prepareWithLocks(ApplicationId application, ClusterSpec cluster, NodeSpec requested, NodeIndices indices, boolean makeExclusive) { + Runnable waiter = null; + List<Node> acceptedNodes; try (Mutex lock = nodeRepository.applications().lock(application); ApplicationMutex parentLockOrNull = parentLockOrNull(makeExclusive, requested.type()); Mutex allocationLock = nodeRepository.nodes().lockUnallocated()) { @@ -149,7 +151,7 @@ public class Preparer { requested.cloudAccount(), deficit.dueToFlavorUpgrade()); Predicate<NodeResources> realHostResourcesWithinLimits = resources -> nodeRepository.nodeResourceLimits().isWithinRealLimits(resources, application, cluster); - hostProvisioner.get().provisionHosts(request, realHostResourcesWithinLimits, whenProvisioned); + waiter = hostProvisioner.get().provisionHosts(request, realHostResourcesWithinLimits, whenProvisioned); } catch (NodeAllocationException e) { // Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do // not exist, we cannot remove them from ZK here because other nodes may already have been @@ -177,7 +179,7 @@ public class Preparer { nodeRepository.nodes().setExclusiveToApplicationId(exclusiveParents, parentLockOrNull); // TODO: also update tags } - List<Node> acceptedNodes = allocation.finalNodes(); + acceptedNodes = allocation.finalNodes(); nodeRepository.nodes().reserve(allocation.reservableNodes()); nodeRepository.nodes().addReservedNodes(new LockedNodeList(allocation.newNodes(), allocationLock)); @@ -187,8 +189,10 @@ public class Preparer { .filter(node -> node.parentHostname().isEmpty() || activeHosts.parentOf(node).isPresent()) .toList(); } - return acceptedNodes; } + + if (waiter != null) waiter.run(); + return acceptedNodes; } private NodeAllocation prepareAllocation(ApplicationId application, ClusterSpec cluster, NodeSpec requested, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java index f7710ca7019..b5bb91af71a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java @@ -73,7 +73,7 @@ public class MockHostProvisioner implements HostProvisioner { } @Override - public void provisionHosts(HostProvisionRequest request, Predicate<NodeResources> realHostResourcesWithinLimits, Consumer<List<ProvisionedHost>> whenProvisioned) throws NodeAllocationException { + public Runnable provisionHosts(HostProvisionRequest request, Predicate<NodeResources> realHostResourcesWithinLimits, Consumer<List<ProvisionedHost>> whenProvisioned) throws NodeAllocationException { if (behaviour(Behaviour.failProvisionRequest)) throw new NodeAllocationException("No capacity for provision request", true); Flavor hostFlavor = hostFlavors.get(request.clusterType().orElse(ClusterSpec.Type.content)); if (hostFlavor == null) @@ -101,6 +101,7 @@ public class MockHostProvisioner implements HostProvisioner { } provisionedHosts.addAll(hosts); whenProvisioned.accept(hosts); + return () -> {}; } @Override |