diff options
author | Martin Polden <mpolden@mpolden.no> | 2022-07-08 13:58:06 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2022-07-08 13:58:06 +0200 |
commit | c1b7cb7af2f3a5c7138572c9a7186dd55573c064 (patch) | |
tree | 3f5dbde5921d949f1299ba72481e06c33781c9e8 /node-repository | |
parent | 4e7ba5a2ef53e90581acc376d6fc726fe554db34 (diff) |
Reduce scope of unallocated lock and avoid deadlock
Before this change a call to `failOrMarkRecursively` could cause a deadlock
because we would then take the application lock while holding unallocatedLock,
but a deployment (e.g. by `InfrastructureProvisioner`) does the opposite.
Diffstat (limited to 'node-repository')
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java | 36 |
1 files changed, 15 insertions, 21 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 9d9a1304418..368a8da0f90 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -12,7 +12,6 @@ import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.jdisc.Metric; import com.yahoo.lang.MutableInteger; -import com.yahoo.transaction.Mutex; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.flags.JacksonFlag; import com.yahoo.vespa.flags.ListFlag; @@ -77,16 +76,14 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { @Override protected double maintain() { - try (Mutex lock = nodeRepository().nodes().lockUnallocated()) { - NodeList nodes = nodeRepository().nodes().list(); - resumeProvisioning(nodes, lock); - convergeToCapacity(nodes); - } + NodeList nodes = nodeRepository().nodes().list(); + resumeProvisioning(nodes); + convergeToCapacity(nodes); return 1.0; } /** Resume provisioning of already provisioned hosts and their children */ - private void resumeProvisioning(NodeList nodes, Mutex lock) { + private void resumeProvisioning(NodeList nodes) { Map<String, Set<Node>> nodesByProvisionedParentHostname = nodes.nodeType(NodeType.tenant, NodeType.config, NodeType.controller) .asList() @@ -97,9 +94,11 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { nodes.state(Node.State.provisioned).nodeType(NodeType.host, NodeType.confighost, NodeType.controllerhost).forEach(host -> { Set<Node> children = nodesByProvisionedParentHostname.getOrDefault(host.hostname(), Set.of()); try { - List<Node> updatedNodes = hostProvisioner.provision(host, children); - verifyDns(updatedNodes); - nodeRepository().nodes().write(updatedNodes, lock); + try (var lock = nodeRepository().nodes().lockUnallocated()) { + List<Node> updatedNodes = hostProvisioner.provision(host, children); + verifyDns(updatedNodes); + nodeRepository().nodes().write(updatedNodes, lock); + } } catch (IllegalArgumentException | IllegalStateException e) { log.log(Level.INFO, "Could not provision " + host.hostname() + " with " + children.size() + " children, will retry in " + interval() + ": " + Exceptions.toMessageString(e)); @@ -189,17 +188,12 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { private List<Node> candidatesForRemoval(List<Node> nodes) { Map<String, Node> hostsByHostname = new HashMap<>(nodes.stream() - .filter(node -> { - switch (node.type()) { - case host: - // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here - return node.state() != Node.State.parked || node.status().wantToDeprovision(); - case confighost: - case controllerhost: - return node.state() == Node.State.parked && node.status().wantToDeprovision(); - default: - return false; - } + .filter(node -> switch (node.type()) { + case host -> + // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here + node.state() != Node.State.parked || node.status().wantToDeprovision(); + case confighost, controllerhost -> node.state() == Node.State.parked && node.status().wantToDeprovision(); + default -> false; }) .collect(Collectors.toMap(Node::hostname, Function.identity()))); |