diff options
author | Håkon Hallingstad <hakon@yahooinc.com> | 2022-06-09 14:16:30 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@yahooinc.com> | 2022-06-09 14:16:30 +0200 |
commit | a7e32ae0e17ba1c93e9142b5528fd5a79ab046d0 (patch) | |
tree | baf40fde66bd4431dcf39e90a2f7419595c43eab /node-repository/src/main/java | |
parent | 46ef33bd14f2c12ccdeac9b7c588f12b7cfa9c8c (diff) |
Deprovision host with host lock and sanity-check
Diffstat (limited to 'node-repository/src/main/java')
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 4651cc85710..a461d98697d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.component.Version; import com.yahoo.component.Vtag; +import com.yahoo.concurrent.UncheckedTimeoutException; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterMembership; import com.yahoo.config.provision.ClusterSpec; @@ -21,6 +22,7 @@ import com.yahoo.vespa.flags.custom.SharedHost; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeMutex; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.NodesAndHosts; import com.yahoo.vespa.hosted.provision.node.Agent; @@ -130,13 +132,20 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { } excessHosts.forEach(host -> { - try { + Optional<NodeMutex> optionalMutex = nodeRepository().nodes().lockAndGet(host, Optional.of(Duration.ofSeconds(10))); + if (optionalMutex.isEmpty()) return; + try (NodeMutex mutex = optionalMutex.get()) { + if (host.state() != mutex.node().state()) return; + host = mutex.node(); // First mark the host as wantToDeprovision so that if hostProvisioner fails, this host // * wont get new nodes allocated to it // * will be selected as excess on next iteration of this maintainer nodeRepository().nodes().deprovision(host.hostname(), Agent.DynamicProvisioningMaintainer, nodeRepository().clock().instant()); hostProvisioner.deprovision(host); nodeRepository().nodes().removeRecursively(host, true); + } catch (UncheckedTimeoutException e) { + log.log(Level.WARNING, "Failed to deprovision " + host.hostname() + + ": Failed to get lock on node, will retry later"); } catch (RuntimeException e) { log.log(Level.WARNING, "Failed to deprovision " + host.hostname() + ", will retry in " + interval(), e); } @@ -152,11 +161,8 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { * without wantToDeprovision (which means an operator is looking at the node). */ private List<Node> provision(NodeList nodeList) { - final List<Node> nodes = new ArrayList<>(provisionUntilNoDeficit(nodeList)); - - - Map<String, Node> sharedHosts = new HashMap<>(findSharedHosts(nodeList)); - + var nodes = new ArrayList<>(provisionUntilNoDeficit(nodeList)); + var sharedHosts = new HashMap<>(findSharedHosts(nodeList)); int minCount = sharedHostFlag.value().getMinCount(); int deficit = minCount - sharedHosts.size(); if (deficit > 0) { |