diff options
author | Valerij Fredriksen <valerij92@gmail.com> | 2019-06-20 19:03:27 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerij92@gmail.com> | 2019-06-20 19:03:27 +0200 |
commit | da767a3f5d2f95d9b17e47446a06294f74522e0c (patch) | |
tree | c7a0b9a3744217651d0e4721f91e77085ab6fd86 /node-repository/src/main/java | |
parent | 86ddeed7f3d62f9703f100a013f7f4b39957f3b9 (diff) |
Remove NodeRetirer
Diffstat (limited to 'node-repository/src/main/java')
10 files changed, 3 insertions, 618 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java index 684f6dbcd50..d7f41c4d8e2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java @@ -49,7 +49,7 @@ import java.util.stream.Collectors; */ public class FailedExpirer extends Maintainer { - private static final Logger log = Logger.getLogger(NodeRetirer.class.getName()); + private static final Logger log = Logger.getLogger(FailedExpirer.class.getName()); private static final int maxAllowedFailures = 5; // Stop recycling nodes after this number of failures private final NodeRepository nodeRepository; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index b71c7c7ec81..0ecbfab2b99 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -11,11 +11,6 @@ import com.yahoo.config.provision.Zone; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.maintenance.retire.RetireIPv4OnlyNodes; -import com.yahoo.vespa.hosted.provision.maintenance.retire.RetirementPolicy; -import com.yahoo.vespa.hosted.provision.maintenance.retire.RetirementPolicyList; -import com.yahoo.vespa.hosted.provision.provisioning.FlavorSpareChecker; -import com.yahoo.vespa.hosted.provision.provisioning.FlavorSpareCount; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.service.monitor.ServiceMonitor; @@ -46,7 +41,6 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final DirtyExpirer dirtyExpirer; private final ProvisionedExpirer provisionedExpirer; private final NodeRebooter nodeRebooter; - private final NodeRetirer nodeRetirer; private final MetricsReporter metricsReporter; private final InfrastructureProvisioner infrastructureProvisioner; private final Optional<LoadBalancerExpirer> loadBalancerExpirer; @@ -90,11 +84,6 @@ public class NodeRepositoryMaintenance extends AbstractComponent { // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintain(); - - RetirementPolicy policy = new RetirementPolicyList(new RetireIPv4OnlyNodes(zone)); - FlavorSpareChecker flavorSpareChecker = new FlavorSpareChecker( - NodeRetirer.SPARE_NODES_POLICY, FlavorSpareCount.constructFlavorSpareCountGraph(zone.nodeFlavors().get().getFlavors())); - nodeRetirer = new NodeRetirer(nodeRepository, flavorSpareChecker, durationFromEnv("retire_interval").orElse(defaults.nodeRetirerInterval), deployer, policy); } @Override @@ -108,7 +97,6 @@ public class NodeRepositoryMaintenance extends AbstractComponent { failedExpirer.deconstruct(); dirtyExpirer.deconstruct(); nodeRebooter.deconstruct(); - nodeRetirer.deconstruct(); provisionedExpirer.deconstruct(); metricsReporter.deconstruct(); infrastructureProvisioner.deconstruct(); @@ -152,7 +140,6 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration dirtyExpiry; private final Duration provisionedExpiry; private final Duration rebootInterval; - private final Duration nodeRetirerInterval; private final Duration metricsInterval; private final Duration retiredInterval; private final Duration infrastructureProvisionInterval; @@ -171,7 +158,6 @@ public class NodeRepositoryMaintenance extends AbstractComponent { failedExpirerInterval = Duration.ofMinutes(10); provisionedExpiry = Duration.ofHours(4); rebootInterval = Duration.ofDays(30); - nodeRetirerInterval = Duration.ofMinutes(30); metricsInterval = Duration.ofMinutes(1); infrastructureProvisionInterval = Duration.ofMinutes(1); throttlePolicy = NodeFailer.ThrottlePolicy.hosted; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java deleted file mode 100644 index 0245f2a92a3..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.maintenance; - -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.Deployer; -import com.yahoo.config.provision.Deployment; -import com.yahoo.config.provision.Flavor; -import com.yahoo.config.provision.NodeType; -import com.yahoo.log.LogLevel; -import com.yahoo.transaction.Mutex; -import com.yahoo.vespa.hosted.provision.Node; -import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.maintenance.retire.RetirementPolicy; -import com.yahoo.vespa.hosted.provision.node.Agent; -import com.yahoo.vespa.hosted.provision.provisioning.FlavorSpareChecker; - -import java.time.Duration; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.logging.Logger; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -/** - * Automatically retires ready and active nodes if they meet a certain criteria given by the {@link RetirementPolicy} - * and if there are enough remaining nodes to both replace the retiring node as well as to keep enough in spare. - * - * @author freva - */ -public class NodeRetirer extends Maintainer { - - public static final FlavorSpareChecker.SpareNodesPolicy SPARE_NODES_POLICY = flavorSpareCount -> - flavorSpareCount.getNumReadyAmongReplacees() > 2; - - private static final long MAX_SIMULTANEOUS_RETIRES_PER_CLUSTER = 1; - private static final Logger log = Logger.getLogger(NodeRetirer.class.getName()); - - private final Deployer deployer; - private final FlavorSpareChecker flavorSpareChecker; - private final RetirementPolicy retirementPolicy; - - NodeRetirer(NodeRepository nodeRepository, FlavorSpareChecker flavorSpareChecker, Duration interval, - Deployer deployer, RetirementPolicy retirementPolicy) { - super(nodeRepository, interval); - this.deployer = deployer; - this.retirementPolicy = retirementPolicy; - this.flavorSpareChecker = flavorSpareChecker; - } - - @Override - protected void maintain() { - if (! retirementPolicy.isActive()) return; - - if (retireUnallocated()) { - retireAllocated(); - } - } - - /** - * Retires unallocated nodes by moving them directly to parked. - * Returns true iff all there are no unallocated nodes that match the retirement policy - */ - boolean retireUnallocated() { - try (Mutex lock = nodeRepository().lockAllocation()) { - List<Node> allNodes = nodeRepository().getNodes(NodeType.tenant); - Map<Flavor, Map<Node.State, Long>> numSpareNodesByFlavorByState = getNumberOfNodesByFlavorByNodeState(allNodes); - flavorSpareChecker.updateReadyAndActiveCountsByFlavor(numSpareNodesByFlavorByState); - - long numFlavorsWithUnsuccessfullyRetiredNodes = allNodes.stream() - .filter(node -> node.state() == Node.State.ready) - .filter(node -> retirementPolicy.shouldRetire(node).isPresent()) - .collect(Collectors.groupingBy( - Node::flavor, - Collectors.toSet())) - .entrySet().stream() - .filter(entry -> { - Set<Node> nodesThatShouldBeRetiredForFlavor = entry.getValue(); - for (Iterator<Node> iter = nodesThatShouldBeRetiredForFlavor.iterator(); iter.hasNext(); ) { - Node nodeToRetire = iter.next(); - if (! flavorSpareChecker.canRetireUnallocatedNodeWithFlavor(nodeToRetire.flavor())) break; - - retirementPolicy.shouldRetire(nodeToRetire).ifPresent(reason -> { - nodeRepository().write(nodeToRetire.with(nodeToRetire.status().withWantToDeprovision(true)), lock); - nodeRepository().park(nodeToRetire.hostname(), false, Agent.NodeRetirer, reason); - iter.remove(); - }); - } - - if (! nodesThatShouldBeRetiredForFlavor.isEmpty()) { - String commaSeparatedHostnames = nodesThatShouldBeRetiredForFlavor.stream().map(Node::hostname) - .collect(Collectors.joining(", ")); - log.info(String.format("Failed to retire %s, wanted to retire %d nodes (%s), but there are no spare nodes left.", - entry.getKey(), nodesThatShouldBeRetiredForFlavor.size(), commaSeparatedHostnames)); - } - return ! nodesThatShouldBeRetiredForFlavor.isEmpty(); - }).count(); - - return numFlavorsWithUnsuccessfullyRetiredNodes == 0; - } - } - - void retireAllocated() { - List<Node> allNodes = nodeRepository().getNodes(NodeType.tenant); - List<ApplicationId> activeApplications = getActiveApplicationIds(allNodes); - Map<Flavor, Map<Node.State, Long>> numSpareNodesByFlavorByState = getNumberOfNodesByFlavorByNodeState(allNodes); - flavorSpareChecker.updateReadyAndActiveCountsByFlavor(numSpareNodesByFlavorByState); - - // Get all the nodes that we could retire along with their deployments - Map<Deployment, Set<Node>> nodesToRetireByDeployment = new HashMap<>(); - for (ApplicationId applicationId : activeApplications) { - Map<ClusterSpec.Id, Set<Node>> nodesByCluster = getNodesBelongingToApplication(allNodes, applicationId).stream() - .collect(Collectors.groupingBy( - node -> node.allocation().get().membership().cluster().id(), - Collectors.toSet())); - Map<ClusterSpec.Id, Set<Node>> retireableNodesByCluster = nodesByCluster.entrySet().stream() - .collect(Collectors.toMap( - Map.Entry::getKey, - entry -> filterRetireableNodes(entry.getValue()))); - if (retireableNodesByCluster.values().stream().mapToInt(Set::size).sum() == 0) continue; - - Optional<Deployment> deployment = deployer.deployFromLocalActive(applicationId); - if ( ! deployment.isPresent()) continue; // this will be done at another config server - - Set<Node> replaceableNodes = retireableNodesByCluster.entrySet().stream() - .flatMap(entry -> entry.getValue().stream() - .filter(node -> flavorSpareChecker.canRetireAllocatedNodeWithFlavor(node.flavor())) - .limit(getNumberNodesAllowToRetireForCluster(nodesByCluster.get(entry.getKey()), MAX_SIMULTANEOUS_RETIRES_PER_CLUSTER))) - .collect(Collectors.toSet()); - if (! replaceableNodes.isEmpty()) nodesToRetireByDeployment.put(deployment.get(), replaceableNodes); - } - - nodesToRetireByDeployment.forEach(((deployment, nodes) -> { - ApplicationId app = nodes.iterator().next().allocation().get().owner(); - Set<Node> nodesToRetire; - - // While under application lock, get up-to-date node, and make sure that the state and the owner of the - // node has not changed in the meantime, mutate the up-to-date node (so to not overwrite other fields - // that may have changed) with wantToRetire and wantToDeprovision. - try (Mutex lock = nodeRepository().lock(app)) { - nodesToRetire = nodes.stream() - .map(node -> - nodeRepository().getNode(node.hostname()) - .filter(upToDateNode -> node.state() == Node.State.active) - .filter(upToDateNode -> node.allocation().get().owner().equals(upToDateNode.allocation().get().owner()))) - .flatMap(node -> node.map(Stream::of).orElseGet(Stream::empty)) - .collect(Collectors.toSet()); - - nodesToRetire.forEach(node -> - retirementPolicy.shouldRetire(node).ifPresent(reason -> { - log.info("Setting wantToRetire and wantToDeprovision for host " + node.hostname() + - " with flavor " + node.flavor().name() + - " allocated to " + node.allocation().get().owner() + ". Reason: " + reason); - - Node updatedNode = node.with(node.status() - .withWantToRetire(true) - .withWantToDeprovision(true)); - nodeRepository().write(updatedNode, lock); - })); - } - - // This takes a while, so do it outside of the application lock - if (! nodesToRetire.isEmpty()) { - try { - deployment.activate(); - } catch (Exception e) { - log.log(LogLevel.INFO, "Failed to redeploy " + app.serializedForm() + ", will be redeployed later by application maintainer", e); - } - } - })); - } - - private List<Node> getNodesBelongingToApplication(Collection<Node> allNodes, ApplicationId applicationId) { - return allNodes.stream() - .filter(node -> node.allocation().isPresent()) - .filter(node -> node.allocation().get().owner().equals(applicationId)) - .collect(Collectors.toList()); - } - - /** - * Returns a list of ApplicationIds sorted by number of active nodes the application has allocated to it - */ - List<ApplicationId> getActiveApplicationIds(Collection<Node> nodes) { - return nodes.stream() - .filter(node -> node.state() == Node.State.active) - .collect(Collectors.groupingBy( - node -> node.allocation().get().owner(), - Collectors.counting())) - .entrySet().stream() - .sorted((c1, c2) -> c2.getValue().compareTo(c1.getValue())) - .map(Map.Entry::getKey) - .collect(Collectors.toList()); - } - - /** - * @param nodes Collection of nodes that are considered for retirement - * @return Set of nodes that all should eventually be retired - */ - Set<Node> filterRetireableNodes(Collection<Node> nodes) { - return nodes.stream() - .filter(node -> node.state() == Node.State.active) - .filter(node -> !node.status().wantToRetire()) - .filter(node -> retirementPolicy.shouldRetire(node).isPresent()) - .collect(Collectors.toSet()); - } - - /** - * @param clusterNodes All the nodes allocated to an application belonging to a single cluster - * @return number of nodes we can safely start retiring - */ - long getNumberNodesAllowToRetireForCluster(Collection<Node> clusterNodes, long maxSimultaneousRetires) { - long numNodesInWantToRetire = clusterNodes.stream() - .filter(node -> node.status().wantToRetire()) - .filter(node -> node.state() != Node.State.parked) - .count(); - return Math.max(0, maxSimultaneousRetires - numNodesInWantToRetire); - } - - private Map<Flavor, Map<Node.State, Long>> getNumberOfNodesByFlavorByNodeState(Collection<Node> allNodes) { - return allNodes.stream() - .collect(Collectors.groupingBy( - Node::flavor, - Collectors.groupingBy(Node::state, Collectors.counting()))); - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetireIPv4OnlyNodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetireIPv4OnlyNodes.java deleted file mode 100644 index 6562a89c2d6..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetireIPv4OnlyNodes.java +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.maintenance.retire; - -import com.google.common.net.InetAddresses; -import com.yahoo.config.provision.Environment; -import com.yahoo.config.provision.Flavor; -import com.yahoo.config.provision.RegionName; -import com.yahoo.config.provision.SystemName; -import com.yahoo.config.provision.Zone; -import com.yahoo.vespa.hosted.provision.Node; - -import java.net.Inet4Address; -import java.util.Optional; - -/** - * @author freva - */ -public class RetireIPv4OnlyNodes implements RetirementPolicy { - private final Zone zone; - - public RetireIPv4OnlyNodes(Zone zone) { - this.zone = zone; - } - - @Override - public boolean isActive() { - if(zone.system() == SystemName.cd) { - return zone.environment() == Environment.dev || zone.environment() == Environment.prod; - } - - if (zone.system() == SystemName.main) { - if (zone.region().equals(RegionName.from("us-east-3"))) { - return zone.environment() == Environment.perf || zone.environment() == Environment.prod; - } else if (zone.region().equals(RegionName.from("us-west-1"))) { - return zone.environment() == Environment.prod; - } else if (zone.region().equals(RegionName.from("us-central-1"))) { - return zone.environment() == Environment.prod; - } else if (zone.region().equals(RegionName.from("ap-southeast-1"))) { - return zone.environment() == Environment.prod; - } else if (zone.region().equals(RegionName.from("ap-northeast-1"))) { - return zone.environment() == Environment.prod; - } else if (zone.region().equals(RegionName.from("ap-northeast-2"))) { - return zone.environment() == Environment.prod; - } else if (zone.region().equals(RegionName.from("eu-west-1"))) { - return zone.environment() == Environment.prod; - } - } - - return false; - } - - @Override - public Optional<String> shouldRetire(Node node) { - if (node.flavor().getType() == Flavor.Type.VIRTUAL_MACHINE) return Optional.empty(); - boolean shouldRetire = node.ipAddresses().stream() - .map(InetAddresses::forString) - .allMatch(address -> address instanceof Inet4Address); - - return shouldRetire ? Optional.of("Node is IPv4-only") : Optional.empty(); - } -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetirementPolicy.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetirementPolicy.java deleted file mode 100644 index ca0419f11c3..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetirementPolicy.java +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.maintenance.retire; - -import com.yahoo.vespa.hosted.provision.Node; - -import java.util.Optional; - -/** - * @author freva - */ -public interface RetirementPolicy { - - /** - * Returns whether the policy is currently active. NodeRetirer ask every time before executing. - */ - boolean isActive(); - - /** - * Returns reason for retiring the node, empty if node should not be retired - */ - Optional<String> shouldRetire(Node node); -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetirementPolicyCache.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetirementPolicyCache.java deleted file mode 100644 index c112daadcc9..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetirementPolicyCache.java +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.maintenance.retire; - -import com.yahoo.vespa.hosted.provision.Node; - -import java.util.Optional; - -/** - * @author freva - */ -public class RetirementPolicyCache implements RetirementPolicy { - private final RetirementPolicy policy; - private final boolean isActiveCached; - - RetirementPolicyCache(RetirementPolicy policy) { - this.policy = policy; - this.isActiveCached = policy.isActive(); - } - - @Override - public boolean isActive() { - return isActiveCached; - } - - public Optional<String> shouldRetire(Node node) { - return policy.shouldRetire(node); - } -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetirementPolicyList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetirementPolicyList.java deleted file mode 100644 index 5f4d887b029..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/retire/RetirementPolicyList.java +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.maintenance.retire; - -import com.yahoo.vespa.hosted.provision.Node; - -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -/** - * @author freva - */ -public class RetirementPolicyList implements RetirementPolicy { - private final List<RetirementPolicy> retirementPolicies; - - public RetirementPolicyList(RetirementPolicy... retirementPolicies) { - this.retirementPolicies = Stream.of(retirementPolicies) - .map(RetirementPolicyCache::new) - .collect(Collectors.toList()); - } - - @Override - public boolean isActive() { - return retirementPolicies.stream().anyMatch(RetirementPolicy::isActive); - } - - @Override - public Optional<String> shouldRetire(Node node) { - List<String> retirementReasons = retirementPolicies.stream() - .filter(RetirementPolicy::isActive) - .map(retirementPolicy -> retirementPolicy.shouldRetire(node)) - .flatMap(reason -> reason.map(Stream::of).orElse(Stream.empty())) - .collect(Collectors.toList()); - - return retirementReasons.isEmpty() ? Optional.empty() : - Optional.of("[" + String.join(", ", retirementReasons) + "]"); - } -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 424889caf72..45fb1e050a7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -385,7 +385,7 @@ public class NodeSerializer { case "application" : return Agent.application; case "system" : return Agent.system; case "operator" : return Agent.operator; - case "NodeRetirer" : return Agent.NodeRetirer; + case "NodeRetirer" : return Agent.system; // TODO: Remove after 7.67 case "NodeFailer" : return Agent.NodeFailer; } throw new IllegalArgumentException("Unknown node event agent '" + eventAgentField.asString() + "'"); @@ -395,7 +395,7 @@ public class NodeSerializer { case application : return "application"; case system : return "system"; case operator : return "operator"; - case NodeRetirer : return "NodeRetirer"; + case NodeRetirer : return "system"; // TODO: Remove after 7.67 case NodeFailer : return "NodeFailer"; } throw new IllegalArgumentException("Serialized form of '" + agent + "' not defined"); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorSpareChecker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorSpareChecker.java deleted file mode 100644 index 5f81fed2a04..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorSpareChecker.java +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.provisioning; - -import com.yahoo.config.provision.Flavor; -import com.yahoo.vespa.hosted.provision.Node; - -import java.util.Collections; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -/** - * This class helps answer the question if there are enough nodes to retire a node with flavor f by: - * <ul> - * <li>Finding all the possible flavors that the replacement node could end up on</li> - * <li>Making sure that regardless of which flavor it ends up on, there is still enough spare nodes - * to handle at unexpected node failures.</li> - * </ul> - * <p> - * Definitions: - * <ul> - * <li>Wanted flavor: The flavor that is the node prefers, for example by specifying in services.xml</li> - * <li>Node-repo flavor: The flavor that the node actually has (Either the wanted flavor or a flavor that transitively - * replaces the wanted flavor)</li> - * <li>Replacee flavor: Flavor x is replacee of y iff x transitively replaces y</li> - * <li>Immediate replacee flavor: Flavor x is an immediate replacee of flavor y iff x directly replaces y.</li> - * </ul> - * - * @author freva - */ -public class FlavorSpareChecker { - - private final SpareNodesPolicy spareNodesPolicy; - private final Map<Flavor, FlavorSpareCount> spareCountByFlavor; - - public FlavorSpareChecker(SpareNodesPolicy spareNodesPolicy, Map<Flavor, FlavorSpareCount> spareCountByFlavor) { - this.spareNodesPolicy = spareNodesPolicy; - this.spareCountByFlavor = spareCountByFlavor; - } - - public void updateReadyAndActiveCountsByFlavor(Map<Flavor, Map<Node.State, Long>> numberOfNodesByFlavorByState) { - spareCountByFlavor.forEach((flavor, flavorSpareCount) -> { - Map<Node.State, Long> numberOfNodesByState = numberOfNodesByFlavorByState.getOrDefault(flavor, Collections.emptyMap()); - flavorSpareCount.updateReadyAndActiveCounts( - numberOfNodesByState.getOrDefault(Node.State.ready, 0L), - numberOfNodesByState.getOrDefault(Node.State.active, 0L)); - }); - } - - public boolean canRetireAllocatedNodeWithFlavor(Flavor flavor) { - Set<FlavorSpareCount> possibleNewFlavors = findPossibleReplacementFlavorFor(spareCountByFlavor.get(flavor)); - possibleNewFlavors.forEach(FlavorSpareCount::decrementNumberOfReady); - return !possibleNewFlavors.isEmpty(); - } - - public boolean canRetireUnallocatedNodeWithFlavor(Flavor flavor) { - FlavorSpareCount flavorSpareCount = spareCountByFlavor.get(flavor); - if (flavorSpareCount.hasReady() && spareNodesPolicy.hasSpare(flavorSpareCount)) { - flavorSpareCount.decrementNumberOfReady(); - return true; - } - - return false; - } - - - /** - * Returns a set of possible new flavors that can replace this flavor given current node allocation. - * If the set is empty, there are not enough spare nodes to safely retire this flavor. - * <p> - * The algorithm is: - * for all possible wanted flavor, check: - * <ul> - * <li>1: Sum of ready nodes of flavor f and all replacee flavors of f is > reserved (set by {@link SpareNodesPolicy}</li> - * <li>2a: Number of ready nodes of flavor f is > 0</li> - * <li>2b: Verify 1 & 2 for all immediate replacee of f, f_i, where sum of ready nodes of f_i and all - * replacee flavors of f_i is > 0</li> - * </ul> - * Only 2a OR 2b need to be satisfied. - */ - private Set<FlavorSpareCount> findPossibleReplacementFlavorFor(FlavorSpareCount flavorSpareCount) { - Set<FlavorSpareCount> possibleReplacementFlavors = new HashSet<>(); - for (FlavorSpareCount possibleWantedFlavor : flavorSpareCount.getPossibleWantedFlavors()) { - Set<FlavorSpareCount> replacementFlavors = verifyReplacementConditions(possibleWantedFlavor); - if (replacementFlavors.isEmpty()) return Collections.emptySet(); - else possibleReplacementFlavors.addAll(replacementFlavors); - } - - return possibleReplacementFlavors; - } - - private Set<FlavorSpareCount> verifyReplacementConditions(FlavorSpareCount flavorSpareCount) { - Set<FlavorSpareCount> possibleReplacementFlavors = new HashSet<>(); - // Breaks condition 1, end - if (! spareNodesPolicy.hasSpare(flavorSpareCount)) return Collections.emptySet(); - - // Condition 2a - if (flavorSpareCount.hasReady()) { - possibleReplacementFlavors.add(flavorSpareCount); - - // Condition 2b - } else { - for (FlavorSpareCount possibleNewFlavor : flavorSpareCount.getImmediateReplacees()) { - if (possibleNewFlavor.getNumReadyAmongReplacees() == 0) continue; - - Set<FlavorSpareCount> replacementFlavors = verifyReplacementConditions(possibleNewFlavor); - if (replacementFlavors.isEmpty()) return Collections.emptySet(); - else possibleReplacementFlavors.addAll(replacementFlavors); - } - } - return possibleReplacementFlavors; - } - - public interface SpareNodesPolicy { - boolean hasSpare(FlavorSpareCount flavorSpareCount); - } -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorSpareCount.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorSpareCount.java deleted file mode 100644 index 217f4999bfb..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorSpareCount.java +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.provisioning; - -import com.yahoo.config.provision.Flavor; - -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -/** - * Keeps track of number of ready and active nodes for a flavor and its replaces neighbors - * - * @author freva - */ -public class FlavorSpareCount { - - private final Flavor flavor; - private Set<FlavorSpareCount> possibleWantedFlavors; - private Set<FlavorSpareCount> immediateReplacees; - private long numReady; - private long numActive; - - public static Map<Flavor, FlavorSpareCount> constructFlavorSpareCountGraph(List<Flavor> flavors) { - Map<Flavor, FlavorSpareCount> spareCountByFlavor = new HashMap<>(); - Map<Flavor, Set<Flavor>> immediateReplaceeFlavorsByFlavor = new HashMap<>(); - for (Flavor flavor : flavors) { - for (Flavor replaces : flavor.replaces()) { - if (! immediateReplaceeFlavorsByFlavor.containsKey(replaces)) { - immediateReplaceeFlavorsByFlavor.put(replaces, new HashSet<>()); - } - immediateReplaceeFlavorsByFlavor.get(replaces).add(flavor); - } - - spareCountByFlavor.put(flavor, new FlavorSpareCount(flavor)); - } - - spareCountByFlavor.forEach((flavor, flavorSpareCount) -> { - flavorSpareCount.immediateReplacees = ! immediateReplaceeFlavorsByFlavor.containsKey(flavor) ? - Collections.emptySet() : - immediateReplaceeFlavorsByFlavor.get(flavor).stream().map(spareCountByFlavor::get).collect(Collectors.toSet()); - flavorSpareCount.possibleWantedFlavors = recursiveReplacements(flavor, new HashSet<>()) - .stream().map(spareCountByFlavor::get).collect(Collectors.toSet()); - }); - - return spareCountByFlavor; - } - - private static Set<Flavor> recursiveReplacements(Flavor flavor, Set<Flavor> replacements) { - replacements.add(flavor); - for (Flavor replaces : flavor.replaces()) { - recursiveReplacements(replaces, replacements); - } - - return replacements; - } - - private FlavorSpareCount(Flavor flavor) { - this.flavor = flavor; - } - - public Flavor getFlavor() { - return flavor; - } - - void updateReadyAndActiveCounts(long numReady, long numActive) { - this.numReady = numReady; - this.numActive = numActive; - } - - boolean hasReady() { - return numReady > 0; - } - - public long getNumReadyAmongReplacees() { - long sumReadyNodes = numReady; - for (FlavorSpareCount replacee : immediateReplacees) { - sumReadyNodes += replacee.getNumReadyAmongReplacees(); - } - - return sumReadyNodes; - } - - Set<FlavorSpareCount> getPossibleWantedFlavors() { - return possibleWantedFlavors; - } - - Set<FlavorSpareCount> getImmediateReplacees() { - return immediateReplacees; - } - - void decrementNumberOfReady() { - numReady--; - } - - @Override - public String toString() { - return flavor.name() + " has " + numReady + " ready nodes and " + numActive + " active nodes"; - } -} |