diff options
author | valerijf <valerijf@yahoo-inc.com> | 2017-06-09 16:17:17 +0200 |
---|---|---|
committer | valerijf <valerijf@yahoo-inc.com> | 2017-06-09 16:17:17 +0200 |
commit | f0444218e1c7d295e2e817c755a8b65d52bb65d8 (patch) | |
tree | 7700bcca13fd57e85530c857d0fb9019317cb5d9 /node-repository | |
parent | 447871a4c0965704f97a54f0ba5d29eb5084e9f6 (diff) |
Only make 1 call against NodeRepository.getNodes() in retireAllocated
Diffstat (limited to 'node-repository')
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java | 75 |
1 files changed, 51 insertions, 24 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java index 1a901509edc..856b542bdb8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java @@ -15,6 +15,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.FlavorSpareChecker; import java.time.Duration; import java.util.Arrays; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -22,6 +23,7 @@ import java.util.Optional; import java.util.Set; import java.util.logging.Logger; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Automatically retires ready and active nodes if they meet a certain criteria given by the {@link RetirementPolicy} @@ -109,35 +111,60 @@ public class NodeRetirer extends Maintainer { Map<Flavor, Map<Node.State, Long>> numSpareNodesByFlavorByState = getNumberOfNodesByFlavorByNodeState(allNodes); flavorSpareChecker.updateReadyAndActiveCountsByFlavor(numSpareNodesByFlavorByState); + // Get all the nodes that we could retire along with their deployments + Map<Deployment, Set<Node>> nodesToRetireByDeployment = new HashMap<>(); for (ApplicationId applicationId : activeApplications) { + List<Node> applicationNodes = getNodesBelongingToApplication(allNodes, applicationId); + Set<Node> retireableNodes = getRetireableNodesForApplication(applicationNodes); + long numNodesAllowedToRetire = getNumberNodesAllowToRetireForApplication(applicationNodes, MAX_SIMULTANEOUS_RETIRES_PER_APPLICATION); + if (retireableNodes.isEmpty() || numNodesAllowedToRetire == 0) continue; + Optional<Deployment> deployment = deployer.deployFromLocalActive(applicationId, Duration.ofMinutes(30)); if ( ! deployment.isPresent()) continue; // this will be done at another config server - long numNodesWantedToRetire = 0; - try (Mutex lock = nodeRepository().lock(applicationId)) { - // Get nodes for current application under lock - List<Node> applicationNodes = nodeRepository().getNodes(applicationId); - Set<Node> retireableNodes = getRetireableNodesForApplication(applicationNodes); - long numNodesAllowedToRetire = getNumberNodesAllowToRetireForApplication(applicationNodes, MAX_SIMULTANEOUS_RETIRES_PER_APPLICATION); - - for (Iterator<Node> iterator = retireableNodes.iterator(); iterator.hasNext() && numNodesAllowedToRetire > numNodesWantedToRetire; ) { - Node retireableNode = iterator.next(); - - if (flavorSpareChecker.canRetireAllocatedNodeWithFlavor(retireableNode.flavor())) { - log.info("Setting wantToRetire and wantToDeprovision for host " + retireableNode.hostname() + - " with flavor " + retireableNode.flavor().name() + - " allocated to " + retireableNode.allocation().get().owner() + ". Policy: " + - retirementPolicy.getClass().getSimpleName()); - Node updatedNode = retireableNode.with(retireableNode.status() - .withWantToRetire(true) - .withWantToDeprovision(true)); - nodeRepository().write(updatedNode); - numNodesWantedToRetire++; - } - } - } - if (numNodesWantedToRetire > 0) deployment.get().activate(); + Set<Node> replaceableNodes = retireableNodes.stream() + .filter(node -> flavorSpareChecker.canRetireAllocatedNodeWithFlavor(node.flavor())) + .limit(numNodesAllowedToRetire) + .collect(Collectors.toSet()); + if (! replaceableNodes.isEmpty()) nodesToRetireByDeployment.put(deployment.get(), replaceableNodes); } + + // While under application lock, make sure that the state and the owner of the node has not changed + // in the mean time, then retire the node and redeploy. + nodesToRetireByDeployment.forEach(((deployment, nodes) -> { + ApplicationId app = nodes.iterator().next().allocation().get().owner(); + Set<Node> nodesToRetire; + + try (Mutex lock = nodeRepository().lock(app)) { + nodesToRetire = nodes.stream() + .map(node -> + nodeRepository().getNode(node.hostname()) + .filter(upToDateNode -> node.state() == Node.State.active) + .filter(upToDateNode -> node.allocation().get().owner().equals(upToDateNode.allocation().get().owner()))) + .flatMap(node -> node.map(Stream::of).orElseGet(Stream::empty)) + .collect(Collectors.toSet()); + + nodesToRetire.forEach(node -> { + log.info("Setting wantToRetire and wantToDeprovision for host " + node.hostname() + + " with flavor " + node.flavor().name() + + " allocated to " + node.allocation().get().owner() + ". Policy: " + + retirementPolicy.getClass().getSimpleName()); + Node updatedNode = node.with(node.status() + .withWantToRetire(true) + .withWantToDeprovision(true)); + nodeRepository().write(updatedNode); + }); + } + + if (! nodesToRetire.isEmpty()) deployment.activate(); + })); + } + + private List<Node> getNodesBelongingToApplication(List<Node> allNodes, ApplicationId applicationId) { + return allNodes.stream() + .filter(node -> node.allocation().isPresent()) + .filter(node -> node.allocation().get().owner().equals(applicationId)) + .collect(Collectors.toList()); } /** |