diff options
author | Martin Polden <mpolden@mpolden.no> | 2023-07-12 14:36:12 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-12 14:36:12 +0200 |
commit | 97f0cf32edba5e6545dc027cdcdaaec125bad37d (patch) | |
tree | 46256c1f97d406818521ebd397939dac8461529c | |
parent | 393ed3b6ee5ef58094a7980363089bdd82ead7b5 (diff) | |
parent | 8daf09b502135f120e2f1fab23b86de597cac982 (diff) |
Merge pull request #27745 from vespa-engine/bratseth/provisioning-cleanup
Bratseth/provisioning cleanup
10 files changed, 209 insertions, 345 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java index 331759127e4..8213286639c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java @@ -273,7 +273,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { NodePrioritizer prioritizer = new NodePrioritizer(allNodes, applicationId, clusterSpec, nodeSpec, true, nodeRepository().nameResolver(), nodeRepository().nodes(), nodeRepository().resourcesCalculator(), nodeRepository().spareCount(), nodeSpec.cloudAccount().isExclave(nodeRepository().zone())); - List<NodeCandidate> nodeCandidates = prioritizer.collect(List.of()); + List<NodeCandidate> nodeCandidates = prioritizer.collect(); MutableInteger index = new MutableInteger(0); return nodeCandidates .stream() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupIndices.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupAssigner.java index 44f371be293..4d7114520b3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupIndices.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupAssigner.java @@ -20,13 +20,13 @@ import java.util.Optional; * * @author bratseth */ -class GroupIndices { +class GroupAssigner { private final NodeSpec requested; private final NodeList allNodes; private final Clock clock; - GroupIndices(NodeSpec requested, NodeList allNodes, Clock clock) { + GroupAssigner(NodeSpec requested, NodeList allNodes, Clock clock) { if (requested.groups() > 1 && requested.count().isEmpty()) throw new IllegalArgumentException("Unlimited nodes cannot be grouped"); this.requested = requested; @@ -78,7 +78,7 @@ class GroupIndices { private NodeCandidate moveNodeInSurplusGroup(NodeCandidate node, int[] countInGroup) { var currentGroup = node.allocation().get().membership().cluster().group(); - if (currentGroup.isEmpty()) return node; // Shouldn't happen + if (currentGroup.isEmpty()) return node; if (currentGroup.get().index() < requested.groups()) return node; return inFirstGroupWithDeficiency(node, countInGroup); } @@ -89,7 +89,7 @@ class GroupIndices { if (currentGroup.isEmpty()) return node; if (currentGroup.get().index() >= requested.groups()) return node; if (requested.count().isEmpty()) return node; // Can't retire - if (countInGroup[currentGroup.get().index()] <= requested.count().get() / requested.groups()) return node; + if (countInGroup[currentGroup.get().index()] <= requested.groupSize()) return node; countInGroup[currentGroup.get().index()]--; return node.withNode(node.toNode().retire(Agent.application, clock.instant())); } @@ -101,7 +101,7 @@ class GroupIndices { if (currentGroup.isEmpty()) return node; if (currentGroup.get().index() >= requested.groups()) return node; if (node.preferToRetire() || node.wantToRetire()) return node; - if (requested.count().isPresent() && countInGroup[currentGroup.get().index()] >= requested.count().get() / requested.groups()) return node; + if (requested.count().isPresent() && countInGroup[currentGroup.get().index()] >= requested.groupSize()) return node; node = unretire(node); if (node.allocation().get().membership().retired()) return node; countInGroup[currentGroup.get().index()]++; @@ -110,7 +110,7 @@ class GroupIndices { private NodeCandidate inFirstGroupWithDeficiency(NodeCandidate node, int[] countInGroup) { for (int group = 0; group < requested.groups(); group++) { - if (requested.count().isEmpty() || countInGroup[group] < requested.count().get() / requested.groups()) { + if (requested.count().isEmpty() || countInGroup[group] < requested.groupSize()) { return inGroup(group, node, countInGroup); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java deleted file mode 100644 index e6b47d38779..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.provisioning; - -import com.yahoo.component.Version; -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.NodeAllocationException; -import com.yahoo.config.provision.NodeResources; -import com.yahoo.config.provision.NodeType; -import com.yahoo.transaction.Mutex; -import com.yahoo.vespa.hosted.provision.LockedNodeList; -import com.yahoo.vespa.hosted.provision.Node; -import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.node.Agent; -import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing; - -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.function.Consumer; -import java.util.function.Supplier; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Performs preparation of node activation changes for a single host group in an application. - * - * @author bratseth - */ -public class GroupPreparer { - - private static final Mutex PROBE_LOCK = () -> {}; - private static final Logger log = Logger.getLogger(GroupPreparer.class.getName()); - - private final NodeRepository nodeRepository; - private final Optional<HostProvisioner> hostProvisioner; - - /** Contains list of prepared nodes and the {@link LockedNodeList} object to use for next prepare call */ - record PrepareResult(List<Node> prepared, LockedNodeList allNodes) {} - - public GroupPreparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner) { - this.nodeRepository = nodeRepository; - this.hostProvisioner = hostProvisioner; - } - - /** - * Ensure sufficient nodes are reserved or active for the given application, group and cluster - * - * @param application the application we are allocating to - * @param cluster the cluster and group we are allocating to - * @param requestedNodes a specification of the requested nodes - * @param surplusActiveNodes currently active nodes which are available to be assigned to this group. - * This method will remove from this list if it finds it needs additional nodes - * @param indices the next available node indices for this cluster. - * This method will consume these when it allocates new nodes to the cluster. - * @param allNodes list of all nodes and hosts. Use {@link #createUnlockedNodeList()} to create param for - * first invocation. Then use previous {@link PrepareResult#allNodes()} for the following. - * @return the list of nodes this cluster group will have allocated if activated, and - */ - // Note: This operation may make persisted changes to the set of reserved and inactive nodes, - // but it may not change the set of active nodes, as the active nodes must stay in sync with the - // active config model which is changed on activate - public PrepareResult prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - List<Node> surplusActiveNodes, NodeIndices indices, - LockedNodeList allNodes) { - log.log(Level.FINE, () -> "Preparing " + cluster.type().name() + " " + cluster.id() + " with requested resources " + - requestedNodes.resources().orElse(NodeResources.unspecified())); - // Try preparing in memory without global unallocated lock. Most of the time there should be no changes, - // and we can return nodes previously allocated. - NodeAllocation probeAllocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, - indices::probeNext, allNodes); - if (probeAllocation.fulfilledAndNoChanges()) { - List<Node> acceptedNodes = probeAllocation.finalNodes(); - surplusActiveNodes.removeAll(acceptedNodes); - indices.commitProbe(); - return new PrepareResult(acceptedNodes, allNodes); - } else { - // There were some changes, so re-do the allocation with locks - indices.resetProbe(); - List<Node> prepared = prepareWithLocks(application, cluster, requestedNodes, surplusActiveNodes, indices); - return new PrepareResult(prepared, createUnlockedNodeList()); - } - } - - // Use this to create allNodes param to prepare method for first invocation of prepare - LockedNodeList createUnlockedNodeList() { return nodeRepository.nodes().list(PROBE_LOCK); } - - /// Note that this will write to the node repo. - private List<Node> prepareWithLocks(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - List<Node> surplusActiveNodes, NodeIndices indices) { - try (Mutex lock = nodeRepository.applications().lock(application); - Mutex allocationLock = nodeRepository.nodes().lockUnallocated()) { - LockedNodeList allNodes = nodeRepository.nodes().list(allocationLock); - NodeAllocation allocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, - indices::next, allNodes); - NodeType hostType = allocation.nodeType().hostType(); - if (canProvisionDynamically(hostType) && allocation.hostDeficit().isPresent()) { - HostSharing sharing = hostSharing(cluster, hostType); - Version osVersion = nodeRepository.osVersions().targetFor(hostType).orElse(Version.emptyVersion); - NodeAllocation.HostDeficit deficit = allocation.hostDeficit().get(); - List<Node> hosts = new ArrayList<>(); - Consumer<List<ProvisionedHost>> whenProvisioned = provisionedHosts -> { - hosts.addAll(provisionedHosts.stream().map(host -> host.generateHost(requestedNodes.hostTTL())).toList()); - nodeRepository.nodes().addNodes(hosts, Agent.application); - - // Offer the nodes on the newly provisioned hosts, this should be enough to cover the deficit - List<NodeCandidate> candidates = provisionedHosts.stream() - .map(host -> NodeCandidate.createNewExclusiveChild(host.generateNode(), - host.generateHost(requestedNodes.hostTTL()))) - .toList(); - allocation.offer(candidates); - }; - try { - HostProvisionRequest request = new HostProvisionRequest(allocation.provisionIndices(deficit.count()), - hostType, - deficit.resources(), - application, - osVersion, - sharing, - Optional.of(cluster.type()), - Optional.of(cluster.id()), - requestedNodes.cloudAccount(), - deficit.dueToFlavorUpgrade()); - hostProvisioner.get().provisionHosts(request, whenProvisioned); - } catch (NodeAllocationException e) { - // Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do - // not exist, we cannot remove them from ZK here because other nodes may already have been - // allocated on them, so let HostDeprovisioner deal with it - hosts.forEach(host -> nodeRepository.nodes().deprovision(host.hostname(), Agent.system, nodeRepository.clock().instant())); - throw e; - } - } else if (allocation.hostDeficit().isPresent() && requestedNodes.canFail() && - allocation.hasRetiredJustNow() && requestedNodes instanceof NodeSpec.CountNodeSpec cns) { - // Non-dynamically provisioned zone with a deficit because we just now retired some nodes. - // Try again, but without retiring - indices.resetProbe(); - List<Node> accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), surplusActiveNodes, indices); - log.warning("Prepared " + application + " " + cluster.id() + " without retirement due to lack of capacity"); - return accepted; - } - - if (! allocation.fulfilled() && requestedNodes.canFail()) - throw new NodeAllocationException(allocation.allocationFailureDetails(), true); - - // Carry out and return allocation - List<Node> acceptedNodes = allocation.finalNodes(); - nodeRepository.nodes().reserve(allocation.reservableNodes()); - nodeRepository.nodes().addReservedNodes(new LockedNodeList(allocation.newNodes(), allocationLock)); - surplusActiveNodes.removeAll(acceptedNodes); - return acceptedNodes; - } - } - - private NodeAllocation prepareAllocation(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - List<Node> surplusActiveNodes, Supplier<Integer> nextIndex, - LockedNodeList allNodes) { - - NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requestedNodes, nextIndex, nodeRepository); - NodePrioritizer prioritizer = new NodePrioritizer(allNodes, - application, - cluster, - requestedNodes, - nodeRepository.zone().cloud().dynamicProvisioning(), - nodeRepository.nameResolver(), - nodeRepository.nodes(), - nodeRepository.resourcesCalculator(), - nodeRepository.spareCount(), - requestedNodes.cloudAccount().isExclave(nodeRepository.zone())); - allocation.offer(prioritizer.collect(surplusActiveNodes)); - return allocation; - } - - private boolean canProvisionDynamically(NodeType hostType) { - return nodeRepository.zone().cloud().dynamicProvisioning() && - (hostType == NodeType.host || hostType.isConfigServerHostLike()); - } - - private HostSharing hostSharing(ClusterSpec cluster, NodeType hostType) { - if ( hostType.isSharable()) - return nodeRepository.exclusiveAllocation(cluster) ? HostSharing.exclusive : HostSharing.any; - else - return HostSharing.any; - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java index 10aae94a986..5a35ed1cc42 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java @@ -88,12 +88,12 @@ public class LoadBalancerProvisioner { * <p> * Calling this for irrelevant node or cluster types is a no-op. */ - public void prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes) { - if (!shouldProvision(application, requestedNodes.type(), cluster.type())) return; + public void prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requested) { + if (!shouldProvision(application, requested.type(), cluster.type())) return; try (var lock = db.lock(application)) { ClusterSpec.Id clusterId = effectiveId(cluster); LoadBalancerId loadBalancerId = requireNonClashing(new LoadBalancerId(application, clusterId)); - prepare(loadBalancerId, cluster.zoneEndpoint(), requestedNodes.cloudAccount()); + prepare(loadBalancerId, cluster.zoneEndpoint(), requested.cloudAccount()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index 40e5909d4d9..1d05548e571 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -51,7 +51,7 @@ class NodeAllocation { private final ClusterSpec cluster; /** The requested nodes of this list */ - private final NodeSpec requestedNodes; + private final NodeSpec requested; /** The node candidates this has accepted so far, keyed on hostname */ private final Map<String, NodeCandidate> nodes = new LinkedHashMap<>(); @@ -86,12 +86,12 @@ class NodeAllocation { private final NodeResourceLimits nodeResourceLimits; private final Optional<String> requiredHostFlavor; - NodeAllocation(NodeList allNodes, ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, + NodeAllocation(NodeList allNodes, ApplicationId application, ClusterSpec cluster, NodeSpec requested, Supplier<Integer> nextIndex, NodeRepository nodeRepository) { this.allNodes = allNodes; this.application = application; this.cluster = cluster; - this.requestedNodes = requestedNodes; + this.requested = requested; this.nextIndex = nextIndex; this.nodeRepository = nodeRepository; this.nodeResourceLimits = new NodeResourceLimits(nodeRepository); @@ -122,11 +122,11 @@ class NodeAllocation { if ( candidate.state() == Node.State.active && allocation.removable()) continue; // don't accept; causes removal if ( candidate.state() == Node.State.active && candidate.wantToFail()) continue; // don't accept; causes failing if ( indexes.contains(membership.index())) continue; // duplicate index (just to be sure) - if (nodeRepository.zone().cloud().allowEnclave() && candidate.parent.isPresent() && ! candidate.parent.get().cloudAccount().equals(requestedNodes.cloudAccount())) continue; // wrong account + if (nodeRepository.zone().cloud().allowEnclave() && candidate.parent.isPresent() && ! candidate.parent.get().cloudAccount().equals(requested.cloudAccount())) continue; // wrong account - boolean resizeable = requestedNodes.considerRetiring() && candidate.isResizable; + boolean resizeable = requested.considerRetiring() && candidate.isResizable; - if ((! saturated() && hasCompatibleResources(candidate) && requestedNodes.acceptable(candidate)) || acceptIncompatible(candidate)) { + if ((! saturated() && hasCompatibleResources(candidate) && requested.acceptable(candidate)) || acceptIncompatible(candidate)) { candidate = candidate.withNode(); if (candidate.isValid()) acceptNode(candidate, shouldRetire(candidate, candidates), resizeable); @@ -150,7 +150,7 @@ class NodeAllocation { } candidate = candidate.allocate(application, ClusterMembership.from(cluster, nextIndex.get()), - requestedNodes.resources().orElse(candidate.resources()), + requested.resources().orElse(candidate.resources()), nodeRepository.clock().instant()); if (candidate.isValid()) { acceptNode(candidate, Retirement.none, false); @@ -161,7 +161,7 @@ class NodeAllocation { /** Returns the cause of retirement for given candidate */ private Retirement shouldRetire(NodeCandidate candidate, List<NodeCandidate> candidates) { - if ( ! requestedNodes.considerRetiring()) { + if ( ! requested.considerRetiring()) { boolean alreadyRetired = candidate.allocation().map(a -> a.membership().retired()).orElse(false); return alreadyRetired ? Retirement.alreadyRetired : Retirement.none; } @@ -199,7 +199,7 @@ class NodeAllocation { private boolean violatesExclusivity(NodeCandidate candidate) { if (candidate.parentHostname().isEmpty()) return false; - if (requestedNodes.type() != NodeType.tenant) return false; + if (requested.type() != NodeType.tenant) return false; // In zones which does not allow host sharing, exclusivity is violated if... if ( ! nodeRepository.zone().cloud().allowHostSharing()) { @@ -244,20 +244,20 @@ class NodeAllocation { if (candidate.state() != Node.State.active) return false; if (candidate.allocation().get().membership().retired()) return true; // don't second-guess if already retired - if ( ! requestedNodes.considerRetiring()) // the node is active and we are not allowed to remove gracefully, so keep + if ( ! requested.considerRetiring()) // the node is active and we are not allowed to remove gracefully, so keep return true; return cluster.isStateful() || (cluster.type() == ClusterSpec.Type.container && !hasCompatibleResources(candidate)); } private boolean hasCompatibleResources(NodeCandidate candidate) { - return requestedNodes.isCompatible(candidate.resources()) || candidate.isResizable; + return requested.isCompatible(candidate.resources()) || candidate.isResizable; } private Node acceptNode(NodeCandidate candidate, Retirement retirement, boolean resizeable) { Node node = candidate.toNode(); if (node.allocation().isPresent()) // Record the currently requested resources - node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.resources()))); + node = node.with(node.allocation().get().withRequestedResources(requested.resources().orElse(node.resources()))); if (retirement == Retirement.none) { @@ -265,7 +265,7 @@ class NodeAllocation { // for the purpose of deciding when to stop accepting nodes (saturation) if (node.allocation().isEmpty() || (canBeUsedInGroupWithDeficiency(node) && - ! ( requestedNodes.needsResize(node) && (node.allocation().get().membership().retired() || ! requestedNodes.considerRetiring())))) { + ! (requested.needsResize(node) && (node.allocation().get().membership().retired() || ! requested.considerRetiring())))) { acceptedAndCompatible++; } @@ -296,12 +296,12 @@ class NodeAllocation { } private boolean canBeUsedInGroupWithDeficiency(Node node) { - if (requestedNodes.count().isEmpty()) return true; + if (requested.count().isEmpty()) return true; if (node.allocation().isEmpty()) return true; var group = node.allocation().get().membership().cluster().group(); if (group.isEmpty()) return true; long nodesInGroup = nodes.values().stream().filter(n -> groupOf(n).equals(group)).count(); - return nodesInGroup < requestedNodes.count().get() / requestedNodes.groups(); + return nodesInGroup < requested.groupSize(); } private Optional<ClusterSpec.Group> groupOf(NodeCandidate candidate) { @@ -310,10 +310,10 @@ class NodeAllocation { private Node resize(Node node) { NodeResources hostResources = allNodes.parentOf(node).get().flavor().resources(); - return node.with(new Flavor(requestedNodes.resources().get() - .with(hostResources.diskSpeed()) - .with(hostResources.storageType()) - .with(hostResources.architecture())), + return node.with(new Flavor(requested.resources().get() + .with(hostResources.diskSpeed()) + .with(hostResources.storageType()) + .with(hostResources.architecture())), Agent.application, nodeRepository.clock().instant()); } @@ -324,12 +324,12 @@ class NodeAllocation { /** Returns true if no more nodes are needed in this list */ public boolean saturated() { - return requestedNodes.saturatedBy(acceptedAndCompatible); + return requested.saturatedBy(acceptedAndCompatible); } /** Returns true if the content of this list is sufficient to meet the request */ boolean fulfilled() { - return requestedNodes.fulfilledBy(acceptedAndCompatibleOrResizable()); + return requested.fulfilledBy(acceptedAndCompatibleOrResizable()); } /** Returns true if this allocation was already fulfilled and resulted in no new changes */ @@ -352,10 +352,10 @@ class NodeAllocation { if (nodeType().isHost()) { return Optional.empty(); // Hosts are provisioned as required by the child application } - int deficit = requestedNodes.fulfilledDeficitCount(acceptedAndCompatibleOrResizable()); + int deficit = requested.fulfilledDeficitCount(acceptedAndCompatibleOrResizable()); // We can only require flavor upgrade if the entire deficit is caused by upgrades boolean dueToFlavorUpgrade = deficit == wasRetiredDueToFlavorUpgrade; - return Optional.of(new HostDeficit(requestedNodes.resources().orElseGet(NodeResources::unspecified), + return Optional.of(new HostDeficit(requested.resources().orElseGet(NodeResources::unspecified), deficit, dueToFlavorUpgrade)) .filter(hostDeficit -> hostDeficit.count() > 0); @@ -364,7 +364,7 @@ class NodeAllocation { /** Returns the indices to use when provisioning hosts for this */ List<Integer> provisionIndices(int count) { if (count < 1) throw new IllegalArgumentException("Count must be positive"); - NodeType hostType = requestedNodes.type().hostType(); + NodeType hostType = requested.type().hostType(); // Tenant hosts have a continuously increasing index if (hostType == NodeType.host) return nodeRepository.database().readProvisionIndices(count); @@ -398,7 +398,7 @@ class NodeAllocation { /** The node type this is allocating */ NodeType nodeType() { - return requestedNodes.type(); + return requested.type(); } List<Node> finalNodes() { @@ -411,10 +411,13 @@ class NodeAllocation { nodes.put(candidate.toNode().hostname(), candidate); } - GroupIndices groupIndices = new GroupIndices(requestedNodes, allNodes, nodeRepository.clock()); - Collection<NodeCandidate> finalNodes = groupIndices.assignTo(nodes.values()); + // Place in groups + GroupAssigner groupAssigner = new GroupAssigner(requested, allNodes, nodeRepository.clock()); + Collection<NodeCandidate> finalNodes = groupAssigner.assignTo(nodes.values()); nodes.clear(); finalNodes.forEach(candidate -> nodes.put(candidate.toNode().hostname(), candidate)); + + // Set cluster ID and index return finalNodes.stream().map(NodeCandidate::toNode).toList(); } @@ -452,7 +455,7 @@ class NodeAllocation { reasons.add("insufficient real resources on hosts"); if (reasons.isEmpty()) return ""; - return "Not enough suitable nodes available due to " + String.join(", ", reasons); + return ": Not enough suitable nodes available due to " + String.join(", ", reasons); } private static Integer parseIndex(String hostname) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeIndices.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeIndices.java index a5a098dbfd6..1ffd54872d3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeIndices.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeIndices.java @@ -1,9 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.provisioning; -import java.util.List; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.vespa.hosted.provision.NodeList; -import static java.util.Comparator.naturalOrder; +import java.util.List; /** * Tracks indices of a node cluster, and proposes the index of the next allocation. @@ -18,8 +19,12 @@ class NodeIndices { private int probe; /** Pass the list of current indices in the cluster. */ + NodeIndices(ClusterSpec.Id cluster, NodeList allNodes) { + this(allNodes.cluster(cluster).mapToList(node -> node.allocation().get().membership().index())); + } + NodeIndices(List<Integer> used) { - this.used = List.copyOf(used); + this.used = used; this.last = -1; this.probe = last; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java index 9f00e5fdbba..4ac90753ed1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java @@ -33,7 +33,7 @@ public class NodePrioritizer { private final LockedNodeList allNodes; private final HostCapacity capacity; private final HostResourcesCalculator calculator; - private final NodeSpec requestedNodes; + private final NodeSpec requested; private final ApplicationId application; private final ClusterSpec clusterSpec; private final NameResolver nameResolver; @@ -51,7 +51,7 @@ public class NodePrioritizer { this.allNodes = allNodes; this.calculator = hostResourcesCalculator; this.capacity = new HostCapacity(this.allNodes, hostResourcesCalculator); - this.requestedNodes = nodeSpec; + this.requested = nodeSpec; this.clusterSpec = clusterSpec; this.application = application; this.dynamicProvisioning = dynamicProvisioning; @@ -70,7 +70,7 @@ public class NodePrioritizer { .stream()) .distinct() .count(); - this.topologyChange = currentGroups != requestedNodes.groups(); + this.topologyChange = currentGroups != requested.groups(); this.currentClusterSize = (int) nonRetiredNodesInCluster.state(Node.State.active).stream().count(); @@ -81,9 +81,8 @@ public class NodePrioritizer { } /** Collects all node candidates for this application and returns them in the most-to-least preferred order */ - public List<NodeCandidate> collect(List<Node> surplusActiveNodes) { + public List<NodeCandidate> collect() { addApplicationNodes(); - addSurplusNodes(surplusActiveNodes); addReadyNodes(); addCandidatesOnExistingHosts(); return prioritize(); @@ -115,22 +114,9 @@ public class NodePrioritizer { return nodes; } - /** - * Add nodes that have been previously reserved to the same application from - * an earlier downsizing of a cluster - */ - private void addSurplusNodes(List<Node> surplusNodes) { - for (Node node : surplusNodes) { - NodeCandidate candidate = candidateFrom(node, true); - if (!candidate.violatesSpares || canAllocateToSpareHosts) { - candidates.add(candidate); - } - } - } - /** Add a node on each host with enough capacity for the requested flavor */ private void addCandidatesOnExistingHosts() { - if (requestedNodes.resources().isEmpty()) return; + if (requested.resources().isEmpty()) return; for (Node host : allNodes) { if ( ! nodes.canAllocateTenantNodeTo(host, dynamicProvisioning)) continue; @@ -140,10 +126,10 @@ public class NodePrioritizer { if (host.exclusiveToApplicationId().isPresent() && ! fitsPerfectly(host)) continue; if ( ! host.exclusiveToClusterType().map(clusterSpec.type()::equals).orElse(true)) continue; if (spareHosts.contains(host) && !canAllocateToSpareHosts) continue; - if ( ! capacity.hasCapacity(host, requestedNodes.resources().get())) continue; + if ( ! capacity.hasCapacity(host, requested.resources().get())) continue; if ( ! allNodes.childrenOf(host).owner(application).cluster(clusterSpec.id()).isEmpty()) continue; - candidates.add(NodeCandidate.createNewChild(requestedNodes.resources().get(), + candidates.add(NodeCandidate.createNewChild(requested.resources().get(), capacity.availableCapacityOf(host), host, spareHosts.contains(host), @@ -154,14 +140,14 @@ public class NodePrioritizer { } private boolean fitsPerfectly(Node host) { - return calculator.advertisedResourcesOf(host.flavor()).compatibleWith(requestedNodes.resources().get()); + return calculator.advertisedResourcesOf(host.flavor()).compatibleWith(requested.resources().get()); } /** Add existing nodes allocated to the application */ private void addApplicationNodes() { EnumSet<Node.State> legalStates = EnumSet.of(Node.State.active, Node.State.inactive, Node.State.reserved); allNodes.stream() - .filter(node -> node.type() == requestedNodes.type()) + .filter(node -> node.type() == requested.type()) .filter(node -> legalStates.contains(node.state())) .filter(node -> node.allocation().isPresent()) .filter(node -> node.allocation().get().owner().equals(application)) @@ -174,7 +160,7 @@ public class NodePrioritizer { /** Add nodes already provisioned, but not allocated to any application */ private void addReadyNodes() { allNodes.stream() - .filter(node -> node.type() == requestedNodes.type()) + .filter(node -> node.type() == requested.type()) .filter(node -> node.state() == Node.State.ready) .map(node -> candidateFrom(node, false)) .filter(n -> !n.violatesSpares || canAllocateToSpareHosts) @@ -193,11 +179,11 @@ public class NodePrioritizer { isSurplus, false, parent.exclusiveToApplicationId().isEmpty() - && requestedNodes.canResize(node.resources(), - capacity.unusedCapacityOf(parent), - clusterSpec.type(), - topologyChange, - currentClusterSize)); + && requested.canResize(node.resources(), + capacity.unusedCapacityOf(parent), + clusterSpec.type(), + topologyChange, + currentClusterSize)); } else { return NodeCandidate.createStandalone(node, isSurplus, false); } @@ -210,7 +196,7 @@ public class NodePrioritizer { .orElse(nodesInCluster); int failedNodesInGroup = nodesInGroup.failing().size() + nodesInGroup.state(Node.State.failed).size(); if (failedNodesInGroup == 0) return false; - return ! requestedNodes.fulfilledBy(nodesInGroup.size() - failedNodesInGroup); + return ! requested.fulfilledBy(nodesInGroup.size() - failedNodesInGroup); } /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index c29c51ccbd5..43b8cd08989 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -86,7 +86,6 @@ public class NodeRepositoryProvisioner implements Provisioner { " for application " + application + ", cluster " + cluster); validate(application, cluster, requested, logger); - int groups; NodeResources resources; NodeSpec nodeSpec; if (requested.type() == NodeType.tenant) { @@ -239,6 +238,7 @@ public class NodeRepositoryProvisioner implements Provisioner { } private List<HostSpec> asSortedHosts(List<Node> nodes, NodeResources requestedResources) { + nodes = new ArrayList<>(nodes); nodes.sort(Comparator.comparingInt(node -> node.allocation().get().membership().index())); List<HostSpec> hosts = new ArrayList<>(nodes.size()); for (Node node : nodes) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java index f4b2c4ceee0..cea0608013d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java @@ -40,6 +40,9 @@ public interface NodeSpec { int groups(); + /** Returns the group size requested if count() is present. Throws RuntimeException otherwise. */ + default int groupSize() { return count().get() / groups(); } + /** Returns whether this should throw an exception if the requested nodes are not fully available */ boolean canFail(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index 25efcabfe8e..8975dda8e60 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -1,128 +1,180 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.provisioning; +import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterMembership; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeAllocationException; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; +import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing; -import java.time.Clock; import java.util.ArrayList; import java.util.List; -import java.util.ListIterator; import java.util.Optional; +import java.util.function.Consumer; +import java.util.function.Supplier; +import java.util.logging.Level; +import java.util.logging.Logger; /** - * Performs preparation of node activation changes for an application. + * Performs preparation of node activation changes for a cluster of an application. * * @author bratseth */ -class Preparer { +public class Preparer { - private final GroupPreparer groupPreparer; + private static final Mutex PROBE_LOCK = () -> {}; + private static final Logger log = Logger.getLogger(Preparer.class.getName()); + + private final NodeRepository nodeRepository; + private final Optional<HostProvisioner> hostProvisioner; private final Optional<LoadBalancerProvisioner> loadBalancerProvisioner; - public Preparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, - Optional<LoadBalancerProvisioner> loadBalancerProvisioner) { + public Preparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, Optional<LoadBalancerProvisioner> loadBalancerProvisioner) { + this.nodeRepository = nodeRepository; + this.hostProvisioner = hostProvisioner; this.loadBalancerProvisioner = loadBalancerProvisioner; - this.groupPreparer = new GroupPreparer(nodeRepository, hostProvisioner); - } - - /** Prepare all required resources for the given application and cluster */ - public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes) { - try { - var nodes = prepareNodes(application, cluster, requestedNodes); - prepareLoadBalancer(application, cluster, requestedNodes); - return nodes; - } - catch (NodeAllocationException e) { - throw new NodeAllocationException("Could not satisfy " + requestedNodes + - " in " + application + " " + cluster, e, - e.retryable()); - } } /** - * Ensure sufficient nodes are reserved or active for the given application and cluster + * Ensure sufficient nodes are reserved or active for the given application, group and cluster * - * @return the list of nodes this cluster will have allocated if activated + * @param application the application we are allocating to + * @param cluster the cluster and group we are allocating to + * @param requested a specification of the requested nodes + * @return the list of nodes this cluster group will have allocated if activated */ - // Note: This operation may make persisted changes to the set of reserved and inactive nodes, - // but it may not change the set of active nodes, as the active nodes must stay in sync with the - // active config model which is changed on activate - private List<Node> prepareNodes(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes) { - LockedNodeList allNodes = groupPreparer.createUnlockedNodeList(); - NodeList clusterNodes = allNodes.owner(application); - List<Node> surplusNodes = findNodesInRemovableGroups(clusterNodes, requestedNodes.groups()); - - List<Integer> usedIndices = clusterNodes.mapToList(node -> node.allocation().get().membership().index()); - NodeIndices indices = new NodeIndices(usedIndices); - List<Node> acceptedNodes = new ArrayList<>(); - - GroupPreparer.PrepareResult result = groupPreparer.prepare(application, cluster, - requestedNodes, - surplusNodes, indices, - allNodes); - List<Node> accepted = result.prepared(); - if (requestedNodes.rejectNonActiveParent()) { - NodeList activeHosts = result.allNodes().state(Node.State.active).parents().nodeType(requestedNodes.type().hostType()); - accepted = accepted.stream() - .filter(node -> node.parentHostname().isEmpty() || activeHosts.parentOf(node).isPresent()) - .toList(); - } + // Note: This operation may make persisted changes to the set of reserved and inactive nodes, + // but it may not change the set of active nodes, as the active nodes must stay in sync with the + // active config model which is changed on activate + public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requested) { + log.log(Level.FINE, () -> "Preparing " + cluster.type().name() + " " + cluster.id() + " with requested resources " + + requested.resources().orElse(NodeResources.unspecified())); - replace(acceptedNodes, accepted); - moveToActiveGroup(surplusNodes, requestedNodes.groups(), cluster.group()); - acceptedNodes.removeAll(surplusNodes); - return acceptedNodes; - } - - /** Prepare a load balancer for given application and cluster */ - public void prepareLoadBalancer(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes) { - loadBalancerProvisioner.ifPresent(provisioner -> provisioner.prepare(application, cluster, requestedNodes)); - } + loadBalancerProvisioner.ifPresent(provisioner -> provisioner.prepare(application, cluster, requested)); - /** - * Returns a list of the nodes which are - * in groups with index number above or equal the group count - */ - private List<Node> findNodesInRemovableGroups(NodeList clusterNodes, int wantedGroups) { - List<Node> surplusNodes = new ArrayList<>(); - for (Node node : clusterNodes.state(Node.State.active)) { - ClusterSpec nodeCluster = node.allocation().get().membership().cluster(); - if (nodeCluster.group().get().index() >= wantedGroups) - surplusNodes.add(node); + // Try preparing in memory without global unallocated lock. Most of the time there should be no changes, + // and we can return nodes previously allocated. + LockedNodeList allNodes = nodeRepository.nodes().list(PROBE_LOCK); + NodeIndices indices = new NodeIndices(cluster.id(), allNodes); + NodeAllocation probeAllocation = prepareAllocation(application, cluster, requested, indices::probeNext, allNodes); + if (probeAllocation.fulfilledAndNoChanges()) { + List<Node> acceptedNodes = probeAllocation.finalNodes(); + indices.commitProbe(); + return acceptedNodes; + } else { + // There were some changes, so re-do the allocation with locks + indices.resetProbe(); + return prepareWithLocks(application, cluster, requested, indices); } - return surplusNodes; } - /** Move nodes from unwanted groups to wanted groups to avoid lingering groups consisting of retired nodes */ - private void moveToActiveGroup(List<Node> surplusNodes, int wantedGroups, Optional<ClusterSpec.Group> targetGroup) { - for (ListIterator<Node> i = surplusNodes.listIterator(); i.hasNext(); ) { - Node node = i.next(); - ClusterMembership membership = node.allocation().get().membership(); - ClusterSpec cluster = membership.cluster(); - if (cluster.group().get().index() >= wantedGroups) { - ClusterSpec.Group newGroup = targetGroup.orElse(ClusterSpec.Group.from(0)); - ClusterMembership newGroupMembership = membership.with(cluster.with(Optional.of(newGroup))); - i.set(node.with(node.allocation().get().with(newGroupMembership))); + /// Note that this will write to the node repo. + private List<Node> prepareWithLocks(ApplicationId application, ClusterSpec cluster, NodeSpec requested, NodeIndices indices) { + try (Mutex lock = nodeRepository.applications().lock(application); + Mutex allocationLock = nodeRepository.nodes().lockUnallocated()) { + LockedNodeList allNodes = nodeRepository.nodes().list(allocationLock); + NodeAllocation allocation = prepareAllocation(application, cluster, requested, indices::next, allNodes); + NodeType hostType = allocation.nodeType().hostType(); + if (canProvisionDynamically(hostType) && allocation.hostDeficit().isPresent()) { + HostSharing sharing = hostSharing(cluster, hostType); + Version osVersion = nodeRepository.osVersions().targetFor(hostType).orElse(Version.emptyVersion); + NodeAllocation.HostDeficit deficit = allocation.hostDeficit().get(); + List<Node> hosts = new ArrayList<>(); + Consumer<List<ProvisionedHost>> whenProvisioned = provisionedHosts -> { + hosts.addAll(provisionedHosts.stream().map(host -> host.generateHost(requested.hostTTL())).toList()); + nodeRepository.nodes().addNodes(hosts, Agent.application); + + // Offer the nodes on the newly provisioned hosts, this should be enough to cover the deficit + List<NodeCandidate> candidates = provisionedHosts.stream() + .map(host -> NodeCandidate.createNewExclusiveChild(host.generateNode(), + host.generateHost(requested.hostTTL()))) + .toList(); + allocation.offer(candidates); + }; + try { + HostProvisionRequest request = new HostProvisionRequest(allocation.provisionIndices(deficit.count()), + hostType, + deficit.resources(), + application, + osVersion, + sharing, + Optional.of(cluster.type()), + Optional.of(cluster.id()), + requested.cloudAccount(), + deficit.dueToFlavorUpgrade()); + hostProvisioner.get().provisionHosts(request, whenProvisioned); + } catch (NodeAllocationException e) { + // Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do + // not exist, we cannot remove them from ZK here because other nodes may already have been + // allocated on them, so let HostDeprovisioner deal with it + hosts.forEach(host -> nodeRepository.nodes().deprovision(host.hostname(), Agent.system, nodeRepository.clock().instant())); + throw e; + } + } else if (allocation.hostDeficit().isPresent() && requested.canFail() && + allocation.hasRetiredJustNow() && requested instanceof NodeSpec.CountNodeSpec cns) { + // Non-dynamically provisioned zone with a deficit because we just now retired some nodes. + // Try again, but without retiring + indices.resetProbe(); + List<Node> accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), indices); + log.warning("Prepared " + application + " " + cluster.id() + " without retirement due to lack of capacity"); + return accepted; } + + if (! allocation.fulfilled() && requested.canFail()) + throw new NodeAllocationException("Could not satisfy " + requested + " in " + application + " " + cluster + + allocation.allocationFailureDetails(), true); + + // Carry out and return allocation + List<Node> acceptedNodes = allocation.finalNodes(); + nodeRepository.nodes().reserve(allocation.reservableNodes()); + nodeRepository.nodes().addReservedNodes(new LockedNodeList(allocation.newNodes(), allocationLock)); + + if (requested.rejectNonActiveParent()) { // TODO: Move into offer() - currently this must be done *after* reserving + NodeList activeHosts = allNodes.state(Node.State.active).parents().nodeType(requested.type().hostType()); + acceptedNodes = acceptedNodes.stream() + .filter(node -> node.parentHostname().isEmpty() || activeHosts.parentOf(node).isPresent()) + .toList(); + } + return acceptedNodes; } } - /** - * Nodes are immutable so when changing attributes to the node we create a new instance. - * - * This method is used to both add new nodes and replaces old node references with the new references. - */ - private List<Node> replace(List<Node> list, List<Node> changed) { - list.removeAll(changed); - list.addAll(changed); - return list; + private NodeAllocation prepareAllocation(ApplicationId application, ClusterSpec cluster, NodeSpec requested, + Supplier<Integer> nextIndex, LockedNodeList allNodes) { + + NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requested, nextIndex, nodeRepository); + NodePrioritizer prioritizer = new NodePrioritizer(allNodes, + application, + cluster, + requested, + nodeRepository.zone().cloud().dynamicProvisioning(), + nodeRepository.nameResolver(), + nodeRepository.nodes(), + nodeRepository.resourcesCalculator(), + nodeRepository.spareCount(), + requested.cloudAccount().isExclave(nodeRepository.zone())); + allocation.offer(prioritizer.collect()); + return allocation; + } + + private boolean canProvisionDynamically(NodeType hostType) { + return nodeRepository.zone().cloud().dynamicProvisioning() && + (hostType == NodeType.host || hostType.isConfigServerHostLike()); + } + + private HostSharing hostSharing(ClusterSpec cluster, NodeType hostType) { + if ( hostType.isSharable()) + return nodeRepository.exclusiveAllocation(cluster) ? HostSharing.exclusive : HostSharing.any; + else + return HostSharing.any; } } |