diff options
Diffstat (limited to 'node-repository/src/main/java/com')
19 files changed, 394 insertions, 185 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/LockedNodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/LockedNodeList.java index 9bc18533ddf..e760e36f90b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/LockedNodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/LockedNodeList.java @@ -24,7 +24,7 @@ public final class LockedNodeList extends NodeList { this.lock = Objects.requireNonNull(lock, "lock must be non-null"); } - /** Returns a new LockedNodeList with the for the same lock. */ + /** Returns a new LockedNodeList with the same lock. */ public LockedNodeList childList(List<Node> nodes) { return new LockedNodeList(nodes, lock); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 9da66413b9c..f3d69fdf103 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -205,7 +205,7 @@ public class NodeRepository extends AbstractComponent { */ public boolean exclusiveAllocation(ClusterSpec clusterSpec) { return clusterSpec.isExclusive() || - ( clusterSpec.type().isContainer() && zone.system().isPublic() && !zone.environment().isTest() ) || + ( clusterSpec.type().isContainer() && zone.system().isPublic() && !zone.environment().isTest() ) || ( !zone().cloud().allowHostSharing() && !sharedHosts.value().isEnabled(clusterSpec.type().name())); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index a2ef76e84d0..40d1d50e0e8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -195,6 +195,7 @@ public class AllocatableClusterResources { else { // Return the cheapest flavor satisfying the requested resources, if any NodeResources cappedWantedResources = applicationLimits.cap(wantedResources.nodeResources()); Optional<AllocatableClusterResources> best = Optional.empty(); + Optional<AllocatableClusterResources> bestDisregardingDiskLimit = Optional.empty(); for (Flavor flavor : nodeRepository.flavors().getFlavors()) { // Flavor decide resources: Real resources are the worst case real resources we'll get if we ask for these advertised resources NodeResources advertisedResources = nodeRepository.resourcesCalculator().advertisedResourcesOf(flavor); @@ -202,7 +203,9 @@ public class AllocatableClusterResources { // Adjust where we don't need exact match to the flavor if (flavor.resources().storageType() == NodeResources.StorageType.remote) { - double diskGb = systemLimits.enlargeToLegal(cappedWantedResources, applicationId, clusterSpec, exclusive).diskGb(); + double diskGb = systemLimits.enlargeToLegal(cappedWantedResources, applicationId, clusterSpec, exclusive, true).diskGb(); + if (diskGb > applicationLimits.max().nodeResources().diskGb() || diskGb < applicationLimits.min().nodeResources().diskGb()) // TODO: Remove when disk limit is enforced + diskGb = systemLimits.enlargeToLegal(cappedWantedResources, applicationId, clusterSpec, exclusive, false).diskGb(); advertisedResources = advertisedResources.withDiskGb(diskGb); realResources = realResources.withDiskGb(diskGb); } @@ -213,14 +216,24 @@ public class AllocatableClusterResources { if ( ! between(applicationLimits.min().nodeResources(), applicationLimits.max().nodeResources(), advertisedResources)) continue; if ( ! systemLimits.isWithinRealLimits(realResources, applicationId, clusterSpec)) continue; + var candidate = new AllocatableClusterResources(wantedResources.with(realResources), advertisedResources, wantedResources, clusterSpec); + + if ( ! systemLimits.isWithinAdvertisedDiskLimits(advertisedResources, clusterSpec)) { // TODO: Remove when disk limit is enforced + if (bestDisregardingDiskLimit.isEmpty() || candidate.preferableTo(bestDisregardingDiskLimit.get())) { + bestDisregardingDiskLimit = Optional.of(candidate); + } + continue; + } if (best.isEmpty() || candidate.preferableTo(best.get())) { best = Optional.of(candidate); } } + if (best.isEmpty()) + best = bestDisregardingDiskLimit; return best; } } @@ -234,7 +247,7 @@ public class AllocatableClusterResources { boolean bestCase) { var systemLimits = new NodeResourceLimits(nodeRepository); var advertisedResources = nodeRepository.resourcesCalculator().realToRequest(wantedResources.nodeResources(), exclusive, bestCase); - advertisedResources = systemLimits.enlargeToLegal(advertisedResources, applicationId, clusterSpec, exclusive); // Ask for something legal + advertisedResources = systemLimits.enlargeToLegal(advertisedResources, applicationId, clusterSpec, exclusive, true); // Ask for something legal advertisedResources = applicationLimits.cap(advertisedResources); // Overrides other conditions, even if it will then fail var realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive, bestCase); // What we'll really get if ( ! systemLimits.isWithinRealLimits(realResources, applicationId, clusterSpec) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index b56e8d1b247..2287b768dee 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -5,6 +5,7 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.IntRange; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceLimits; import java.util.Optional; @@ -63,9 +64,8 @@ public class AllocationOptimizer { availableRealHostResources, nodeRepository); if (allocatableResources.isEmpty()) continue; - if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) { + if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) bestAllocation = allocatableResources; - } } } return bestAllocation; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java index 2a0b4f02b20..331759127e4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java @@ -268,11 +268,9 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { // build() requires a version, even though it is not (should not be) used .vespaVersion(Vtag.currentVersion) .build(); - NodeSpec nodeSpec = NodeSpec.from(clusterCapacity.count(), nodeResources, false, true, + NodeSpec nodeSpec = NodeSpec.from(clusterCapacity.count(), 1, nodeResources, false, true, nodeRepository().zone().cloud().account(), Duration.ZERO); - int wantedGroups = 1; - - NodePrioritizer prioritizer = new NodePrioritizer(allNodes, applicationId, clusterSpec, nodeSpec, wantedGroups, + NodePrioritizer prioritizer = new NodePrioritizer(allNodes, applicationId, clusterSpec, nodeSpec, true, nodeRepository().nameResolver(), nodeRepository().nodes(), nodeRepository().resourcesCalculator(), nodeRepository().spareCount(), nodeSpec.cloudAccount().isExclave(nodeRepository().zone())); List<NodeCandidate> nodeCandidates = prioritizer.collect(List.of()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index eec195ccfcb..0bb045dc6a1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -24,7 +24,6 @@ import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer; import com.yahoo.vespa.hosted.provision.node.filter.NodeFilter; import com.yahoo.vespa.hosted.provision.persistence.CuratorDb; -import com.yahoo.vespa.hosted.provision.provisioning.HostIpConfig; import com.yahoo.vespa.orchestrator.HostNameNotFoundException; import com.yahoo.vespa.orchestrator.Orchestrator; @@ -36,7 +35,6 @@ import java.util.Collection; import java.util.Comparator; import java.util.EnumSet; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.NavigableSet; import java.util.Optional; @@ -48,6 +46,7 @@ import java.util.function.Predicate; import java.util.logging.Level; import java.util.logging.Logger; +import static com.yahoo.collections.Iterables.reversed; import static com.yahoo.vespa.hosted.provision.restapi.NodePatcher.DROP_DOCUMENTS_REPORT; import static java.util.Comparator.comparing; import static java.util.stream.Collectors.groupingBy; @@ -968,7 +967,7 @@ public class Nodes { // If the first node is now earlier in lock order than some other locks we have, we need to close those and re-acquire them. Node next = unlocked.pollFirst(); Set<NodeMutex> outOfOrder = locked.tailSet(new NodeMutex(next, () -> { }), false); - NodeMutexes.close(outOfOrder.iterator()); + NodeMutexes.close(outOfOrder); for (NodeMutex node : outOfOrder) unlocked.add(node.node()); outOfOrder.clear(); @@ -1002,15 +1001,25 @@ public class Nodes { } finally { // If we didn't manage to lock all nodes, we must close the ones we did lock before we throw. - NodeMutexes.close(locked.iterator()); + NodeMutexes.close(locked); } } /** A node with their locks, acquired in a universal order. */ public record NodeMutexes(List<NodeMutex> nodes) implements AutoCloseable { - @Override public void close() { close(nodes.iterator()); } - private static void close(Iterator<NodeMutex> nodes) { - if (nodes.hasNext()) try (NodeMutex node = nodes.next()) { close(nodes); } + @Override public void close() { close(nodes); } + private static void close(Collection<NodeMutex> nodes) { + RuntimeException thrown = null; + for (NodeMutex node : reversed(List.copyOf(nodes))) { + try { + node.close(); + } + catch (RuntimeException e) { + if (thrown == null) thrown = e; + else thrown.addSuppressed(e); + } + } + if (thrown != null) throw thrown; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index c25f33bc8c2..9adff9f9d7a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -14,7 +14,6 @@ import com.yahoo.vespa.hosted.provision.NodeMutex; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; -import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; @@ -71,8 +70,8 @@ class Activator { NodeList allNodes = nodeRepository.nodes().list(); NodeList applicationNodes = allNodes.owner(application); - NodeList reserved = updatePortsFrom(hosts, applicationNodes.state(Node.State.reserved) - .matching(node -> hostnames.contains(node.hostname()))); + NodeList reserved = applicationNodes.state(Node.State.reserved).matching(node -> hostnames.contains(node.hostname())); + reserved = updatePortsFrom(hosts, reserved); nodeRepository.nodes().reserve(reserved.asList()); // Re-reserve nodes to avoid reservation expiry NodeList oldActive = applicationNodes.state(Node.State.active); // All nodes active now diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java index bfd06d744f6..8a39f309935 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java @@ -9,11 +9,11 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; -import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.flags.StringFlag; import com.yahoo.vespa.hosted.provision.NodeRepository; + import java.util.Map; import java.util.TreeMap; @@ -115,10 +115,6 @@ public class CapacityPolicies { return versioned(clusterSpec, Map.of(new Version(0), smallestSharedResources())).with(architecture); } - if (zone.environment() == Environment.dev && zone.system() == SystemName.cd) { - return versioned(clusterSpec, Map.of(new Version(0), new NodeResources(1.5, 4, 50, 0.3))); - } - if (clusterSpec.type() == ClusterSpec.Type.content) { return zone.cloud().dynamicProvisioning() ? versioned(clusterSpec, Map.of(new Version(0), new NodeResources(2, 16, 300, 0.3))) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupIndices.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupIndices.java new file mode 100644 index 00000000000..44f371be293 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupIndices.java @@ -0,0 +1,163 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.provisioning; + +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Flavor; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.node.Agent; + +import java.time.Clock; +import java.util.Collection; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; + +/** + * Knows how to assign a group index to a number of nodes (some of which have an index already), + * such that the nodes are placed in the desired groups with minimal group movement. + * + * @author bratseth + */ +class GroupIndices { + + private final NodeSpec requested; + private final NodeList allNodes; + private final Clock clock; + + GroupIndices(NodeSpec requested, NodeList allNodes, Clock clock) { + if (requested.groups() > 1 && requested.count().isEmpty()) + throw new IllegalArgumentException("Unlimited nodes cannot be grouped"); + this.requested = requested; + this.allNodes = allNodes; + this.clock = clock; + } + + Collection<NodeCandidate> assignTo(Collection<NodeCandidate> nodes) { + int[] countInGroup = countInEachGroup(nodes); + nodes = byUnretiringPriority(nodes).stream().map(node -> unretireNodeInExpandedGroup(node, countInGroup)).toList(); + nodes = nodes.stream().map(node -> assignGroupToNewNode(node, countInGroup)).toList(); + nodes = byUnretiringPriority(nodes).stream().map(node -> moveNodeInSurplusGroup(node, countInGroup)).toList(); + nodes = byRetiringPriority(nodes).stream().map(node -> retireSurplusNodeInGroup(node, countInGroup)).toList(); + nodes = nodes.stream().filter(node -> ! shouldRemove(node)).toList(); + return nodes; + } + + /** Prefer to retire nodes we want the least */ + private List<NodeCandidate> byRetiringPriority(Collection<NodeCandidate> candidates) { + return candidates.stream().sorted(Comparator.reverseOrder()).toList(); + } + + /** Prefer to unretire nodes we don't want to retire, and otherwise those with lower index */ + private List<NodeCandidate> byUnretiringPriority(Collection<NodeCandidate> candidates) { + return candidates.stream() + .sorted(Comparator.comparing(NodeCandidate::wantToRetire) + .thenComparing(n -> n.allocation().get().membership().index())) + .toList(); + } + + private int[] countInEachGroup(Collection<NodeCandidate> nodes) { + int[] countInGroup = new int[requested.groups()]; + for (var node : nodes) { + if (node.allocation().get().membership().retired()) continue; + var currentGroup = node.allocation().get().membership().cluster().group(); + if (currentGroup.isEmpty()) continue; + if (currentGroup.get().index() >= requested.groups()) continue; + countInGroup[currentGroup.get().index()]++; + } + return countInGroup; + } + + /** Assign a group to new or to be reactivated nodes. */ + private NodeCandidate assignGroupToNewNode(NodeCandidate node, int[] countInGroup) { + if (node.state() == Node.State.active && node.allocation().get().membership().retired()) return node; + if (node.state() == Node.State.active && node.allocation().get().membership().cluster().group().isPresent()) return node; + return inFirstGroupWithDeficiency(node, countInGroup); + } + + private NodeCandidate moveNodeInSurplusGroup(NodeCandidate node, int[] countInGroup) { + var currentGroup = node.allocation().get().membership().cluster().group(); + if (currentGroup.isEmpty()) return node; // Shouldn't happen + if (currentGroup.get().index() < requested.groups()) return node; + return inFirstGroupWithDeficiency(node, countInGroup); + } + + private NodeCandidate retireSurplusNodeInGroup(NodeCandidate node, int[] countInGroup) { + if (node.allocation().get().membership().retired()) return node; + var currentGroup = node.allocation().get().membership().cluster().group(); + if (currentGroup.isEmpty()) return node; + if (currentGroup.get().index() >= requested.groups()) return node; + if (requested.count().isEmpty()) return node; // Can't retire + if (countInGroup[currentGroup.get().index()] <= requested.count().get() / requested.groups()) return node; + countInGroup[currentGroup.get().index()]--; + return node.withNode(node.toNode().retire(Agent.application, clock.instant())); + } + + /** Unretire nodes that are already in the correct group when the group is deficient. */ + private NodeCandidate unretireNodeInExpandedGroup(NodeCandidate node, int[] countInGroup) { + if ( ! node.allocation().get().membership().retired()) return node; + var currentGroup = node.allocation().get().membership().cluster().group(); + if (currentGroup.isEmpty()) return node; + if (currentGroup.get().index() >= requested.groups()) return node; + if (node.preferToRetire() || node.wantToRetire()) return node; + if (requested.count().isPresent() && countInGroup[currentGroup.get().index()] >= requested.count().get() / requested.groups()) return node; + node = unretire(node); + if (node.allocation().get().membership().retired()) return node; + countInGroup[currentGroup.get().index()]++; + return node; + } + + private NodeCandidate inFirstGroupWithDeficiency(NodeCandidate node, int[] countInGroup) { + for (int group = 0; group < requested.groups(); group++) { + if (requested.count().isEmpty() || countInGroup[group] < requested.count().get() / requested.groups()) { + return inGroup(group, node, countInGroup); + } + } + return node; + } + + private boolean shouldRemove(NodeCandidate node) { + var currentGroup = node.allocation().get().membership().cluster().group(); + if (currentGroup.isEmpty()) return true; // new and not assigned an index: Not needed + return currentGroup.get().index() >= requested.groups(); + } + + private NodeCandidate inGroup(int group, NodeCandidate node, int[] countInGroup) { + node = unretire(node); + if (node.allocation().get().membership().retired()) return node; + var membership = node.allocation().get().membership(); + var currentGroup = membership.cluster().group(); + countInGroup[group]++; + if ( ! currentGroup.isEmpty() && currentGroup.get().index() < requested.groups()) + countInGroup[membership.cluster().group().get().index()]--; + return node.withNode(node.toNode().with(node.allocation().get().with(membership.with(membership.cluster().with(Optional.of(ClusterSpec.Group.from(group))))))); + } + + /** Attempt to unretire the given node if it is retired. */ + private NodeCandidate unretire(NodeCandidate node) { + if (node.retiredNow()) return node; + if ( ! node.allocation().get().membership().retired()) return node; + if ( ! hasCompatibleResources(node) ) return node; + var parent = node.parentHostname().flatMap(hostname -> allNodes.node(hostname)); + if (parent.isPresent() && (parent.get().status().wantToRetire() || parent.get().status().preferToRetire())) return node; + node = node.withNode(); + if ( ! requested.isCompatible(node.resources())) + node = node.withNode(resize(node.toNode())); + return node.withNode(node.toNode().unretire()); + } + + private Node resize(Node node) { + NodeResources hostResources = allNodes.parentOf(node).get().flavor().resources(); + return node.with(new Flavor(requested.resources().get() + .with(hostResources.diskSpeed()) + .with(hostResources.storageType()) + .with(hostResources.architecture())), + Agent.application, clock.instant()); + } + + private boolean hasCompatibleResources(NodeCandidate candidate) { + return requested.isCompatible(candidate.resources()) || candidate.isResizable; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java index 0c4838abe4d..e6b47d38779 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java @@ -61,14 +61,14 @@ public class GroupPreparer { // but it may not change the set of active nodes, as the active nodes must stay in sync with the // active config model which is changed on activate public PrepareResult prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - List<Node> surplusActiveNodes, NodeIndices indices, int wantedGroups, + List<Node> surplusActiveNodes, NodeIndices indices, LockedNodeList allNodes) { log.log(Level.FINE, () -> "Preparing " + cluster.type().name() + " " + cluster.id() + " with requested resources " + requestedNodes.resources().orElse(NodeResources.unspecified())); // Try preparing in memory without global unallocated lock. Most of the time there should be no changes, // and we can return nodes previously allocated. NodeAllocation probeAllocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, - indices::probeNext, wantedGroups, allNodes); + indices::probeNext, allNodes); if (probeAllocation.fulfilledAndNoChanges()) { List<Node> acceptedNodes = probeAllocation.finalNodes(); surplusActiveNodes.removeAll(acceptedNodes); @@ -77,7 +77,7 @@ public class GroupPreparer { } else { // There were some changes, so re-do the allocation with locks indices.resetProbe(); - List<Node> prepared = prepareWithLocks(application, cluster, requestedNodes, surplusActiveNodes, indices, wantedGroups); + List<Node> prepared = prepareWithLocks(application, cluster, requestedNodes, surplusActiveNodes, indices); return new PrepareResult(prepared, createUnlockedNodeList()); } } @@ -87,12 +87,12 @@ public class GroupPreparer { /// Note that this will write to the node repo. private List<Node> prepareWithLocks(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - List<Node> surplusActiveNodes, NodeIndices indices, int wantedGroups) { + List<Node> surplusActiveNodes, NodeIndices indices) { try (Mutex lock = nodeRepository.applications().lock(application); Mutex allocationLock = nodeRepository.nodes().lockUnallocated()) { LockedNodeList allNodes = nodeRepository.nodes().list(allocationLock); NodeAllocation allocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, - indices::next, wantedGroups, allNodes); + indices::next, allNodes); NodeType hostType = allocation.nodeType().hostType(); if (canProvisionDynamically(hostType) && allocation.hostDeficit().isPresent()) { HostSharing sharing = hostSharing(cluster, hostType); @@ -134,27 +134,25 @@ public class GroupPreparer { // Non-dynamically provisioned zone with a deficit because we just now retired some nodes. // Try again, but without retiring indices.resetProbe(); - List<Node> accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), surplusActiveNodes, indices, wantedGroups); + List<Node> accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), surplusActiveNodes, indices); log.warning("Prepared " + application + " " + cluster.id() + " without retirement due to lack of capacity"); return accepted; } if (! allocation.fulfilled() && requestedNodes.canFail()) - throw new NodeAllocationException((cluster.group().isPresent() ? "Node allocation failure on " + cluster.group().get() - : "") + allocation.allocationFailureDetails(), - true); + throw new NodeAllocationException(allocation.allocationFailureDetails(), true); // Carry out and return allocation + List<Node> acceptedNodes = allocation.finalNodes(); nodeRepository.nodes().reserve(allocation.reservableNodes()); nodeRepository.nodes().addReservedNodes(new LockedNodeList(allocation.newNodes(), allocationLock)); - List<Node> acceptedNodes = allocation.finalNodes(); surplusActiveNodes.removeAll(acceptedNodes); return acceptedNodes; } } private NodeAllocation prepareAllocation(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - List<Node> surplusActiveNodes, Supplier<Integer> nextIndex, int wantedGroups, + List<Node> surplusActiveNodes, Supplier<Integer> nextIndex, LockedNodeList allNodes) { NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requestedNodes, nextIndex, nodeRepository); @@ -162,7 +160,6 @@ public class GroupPreparer { application, cluster, requestedNodes, - wantedGroups, nodeRepository.zone().cloud().dynamicProvisioning(), nodeRepository.nameResolver(), nodeRepository.nodes(), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index a2e0e59e329..40e5909d4d9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -19,7 +19,6 @@ import com.yahoo.vespa.hosted.provision.node.Allocation; import java.util.ArrayList; import java.util.Collection; -import java.util.Comparator; import java.util.EnumSet; import java.util.HashSet; import java.util.LinkedHashMap; @@ -60,7 +59,7 @@ class NodeAllocation { /** The number of already allocated nodes of compatible size */ private int acceptedAndCompatible = 0; - /** The number of already allocated nodes which can be made compatible*/ + /** The number of already allocated nodes which can be made compatible */ private int acceptedAndCompatibleOrResizable = 0; /** The number of nodes rejected because of clashing parentHostname */ @@ -120,7 +119,6 @@ class NodeAllocation { ClusterMembership membership = allocation.membership(); if ( ! allocation.owner().equals(application)) continue; // wrong application if ( ! membership.cluster().satisfies(cluster)) continue; // wrong cluster id/type - if ((! candidate.isSurplus || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group, and we can't or have no reason to change it if ( candidate.state() == Node.State.active && allocation.removable()) continue; // don't accept; causes removal if ( candidate.state() == Node.State.active && candidate.wantToFail()) continue; // don't accept; causes failing if ( indexes.contains(membership.index())) continue; // duplicate index (just to be sure) @@ -175,6 +173,7 @@ class NodeAllocation { if (candidate.preferToRetire() && candidate.replaceableBy(candidates)) return Retirement.softRequest; if (violatesExclusivity(candidate)) return Retirement.violatesExclusivity; if (requiredHostFlavor.isPresent() && ! candidate.parent.map(node -> node.flavor().name()).equals(requiredHostFlavor)) return Retirement.violatesHostFlavor; + if (candidate.violatesSpares) return Retirement.violatesSpares; return Retirement.none; } @@ -243,12 +242,10 @@ class NodeAllocation { */ private boolean acceptIncompatible(NodeCandidate candidate) { if (candidate.state() != Node.State.active) return false; - if (! candidate.allocation().get().membership().cluster().group().equals(cluster.group())) return false; if (candidate.allocation().get().membership().retired()) return true; // don't second-guess if already retired if ( ! requestedNodes.considerRetiring()) // the node is active and we are not allowed to remove gracefully, so keep return true; - return cluster.isStateful() || (cluster.type() == ClusterSpec.Type.container && !hasCompatibleResources(candidate)); } @@ -259,7 +256,6 @@ class NodeAllocation { private Node acceptNode(NodeCandidate candidate, Retirement retirement, boolean resizeable) { Node node = candidate.toNode(); - if (node.allocation().isPresent()) // Record the currently requested resources node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.resources()))); @@ -268,10 +264,11 @@ class NodeAllocation { // We want to allocate new nodes rather than unretiring with resize, so count without those // for the purpose of deciding when to stop accepting nodes (saturation) if (node.allocation().isEmpty() - || ! ( requestedNodes.needsResize(node) && - (node.allocation().get().membership().retired() || ! requestedNodes.considerRetiring()))) { + || (canBeUsedInGroupWithDeficiency(node) && + ! ( requestedNodes.needsResize(node) && (node.allocation().get().membership().retired() || ! requestedNodes.considerRetiring())))) { acceptedAndCompatible++; } + if (hasCompatibleResources(candidate)) acceptedAndCompatibleOrResizable++; @@ -289,15 +286,28 @@ class NodeAllocation { node = node.retire(nodeRepository.clock().instant()); } if ( ! node.allocation().get().membership().cluster().equals(cluster)) { - // group may be different - node = setCluster(cluster, node); + // Cluster has the updated settings but do not set a group + node = setCluster(cluster.with(node.allocation().get().membership().cluster().group()), node); } - candidate = candidate.withNode(node); + candidate = candidate.withNode(node, retirement != Retirement.none && retirement != Retirement.alreadyRetired ); indexes.add(node.allocation().get().membership().index()); nodes.put(node.hostname(), candidate); return node; } + private boolean canBeUsedInGroupWithDeficiency(Node node) { + if (requestedNodes.count().isEmpty()) return true; + if (node.allocation().isEmpty()) return true; + var group = node.allocation().get().membership().cluster().group(); + if (group.isEmpty()) return true; + long nodesInGroup = nodes.values().stream().filter(n -> groupOf(n).equals(group)).count(); + return nodesInGroup < requestedNodes.count().get() / requestedNodes.groups(); + } + + private Optional<ClusterSpec.Group> groupOf(NodeCandidate candidate) { + return candidate.allocation().flatMap(a -> a.membership().cluster().group()); + } + private Node resize(Node node) { NodeResources hostResources = allNodes.parentOf(node).get().flavor().resources(); return node.with(new Flavor(requestedNodes.resources().get() @@ -391,52 +401,21 @@ class NodeAllocation { return requestedNodes.type(); } - /** - * Make the number of <i>non-retired</i> nodes in the list equal to the requested number - * of nodes, and retire the rest of the list. Only retire currently active nodes. - * Prefer to retire nodes of the wrong flavor. - * Make as few changes to the retired set as possible. - * - * @return the final list of nodes - */ List<Node> finalNodes() { - int wantToRetireCount = (int) matching(NodeCandidate::wantToRetire).count(); - int currentRetiredCount = (int) matching(node -> node.allocation().get().membership().retired()).count(); - int deltaRetiredCount = requestedNodes.idealRetiredCount(nodes.size(), wantToRetireCount, currentRetiredCount); - - if (deltaRetiredCount > 0) { // retire until deltaRetiredCount is 0 - for (NodeCandidate candidate : byRetiringPriority(nodes.values())) { - if ( ! candidate.allocation().get().membership().retired() && candidate.state() == Node.State.active) { - candidate = candidate.withNode(); - candidate = candidate.withNode(candidate.toNode().retire(Agent.application, nodeRepository.clock().instant())); - nodes.put(candidate.toNode().hostname(), candidate); - if (--deltaRetiredCount == 0) break; - } - } - } - else if (deltaRetiredCount < 0) { // unretire until deltaRetiredCount is 0 - for (NodeCandidate candidate : byUnretiringPriority(nodes.values())) { - if (candidate.allocation().get().membership().retired() && hasCompatibleResources(candidate) ) { - candidate = candidate.withNode(); - if (candidate.isResizable) - candidate = candidate.withNode(resize(candidate.toNode())); - candidate = candidate.withNode(candidate.toNode().unretire()); - nodes.put(candidate.toNode().hostname(), candidate); - if (++deltaRetiredCount == 0) break; - } - } - } - + // Set whether the node is exclusive for (NodeCandidate candidate : nodes.values()) { - // Set whether the node is exclusive candidate = candidate.withNode(); Allocation allocation = candidate.allocation().get(); candidate = candidate.withNode(candidate.toNode().with(allocation.with(allocation.membership() - .with(allocation.membership().cluster().exclusive(cluster.isExclusive()))))); + .with(allocation.membership().cluster().exclusive(cluster.isExclusive()))))); nodes.put(candidate.toNode().hostname(), candidate); } - return nodes.values().stream().map(NodeCandidate::toNode).toList(); + GroupIndices groupIndices = new GroupIndices(requestedNodes, allNodes, nodeRepository.clock()); + Collection<NodeCandidate> finalNodes = groupIndices.assignTo(nodes.values()); + nodes.clear(); + finalNodes.forEach(candidate -> nodes.put(candidate.toNode().hostname(), candidate)); + return finalNodes.stream().map(NodeCandidate::toNode).toList(); } List<Node> reservableNodes() { @@ -461,19 +440,6 @@ class NodeAllocation { return allNodes.nodeType(nodeType()).size(); } - /** Prefer to retire nodes we want the least */ - private List<NodeCandidate> byRetiringPriority(Collection<NodeCandidate> candidates) { - return candidates.stream().sorted(Comparator.reverseOrder()).toList(); - } - - /** Prefer to unretire nodes we don't want to retire, and otherwise those with lower index */ - private List<NodeCandidate> byUnretiringPriority(Collection<NodeCandidate> candidates) { - return candidates.stream() - .sorted(Comparator.comparing(NodeCandidate::wantToRetire) - .thenComparing(n -> n.allocation().get().membership().index())) - .toList(); - } - String allocationFailureDetails() { List<String> reasons = new ArrayList<>(); if (rejectedDueToExclusivity > 0) @@ -486,7 +452,7 @@ class NodeAllocation { reasons.add("insufficient real resources on hosts"); if (reasons.isEmpty()) return ""; - return ": Not enough suitable nodes available due to " + String.join(", ", reasons); + return "Not enough suitable nodes available due to " + String.join(", ", reasons); } private static Integer parseIndex(String hostname) { @@ -510,6 +476,7 @@ class NodeAllocation { violatesExclusivity("node violates host exclusivity"), violatesHostFlavor("node violates host flavor"), violatesHostFlavorGeneration("node violates host flavor generation"), + violatesSpares("node is assigned to a host we want to use as a spare"), none(""); private final String description; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java index 8462e23fbfd..adc04c491e2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java @@ -81,6 +81,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public abstract boolean preferToRetire(); + /** Returns true if we have decided to retire this node as part of this deployment */ + public boolean retiredNow() { return false; } + public abstract boolean wantToFail(); public abstract Flavor flavor(); @@ -217,7 +220,12 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat /** Returns a copy of this with node set to given value */ NodeCandidate withNode(Node node) { - return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); + return withNode(node, retiredNow()); + } + + /** Returns a copy of this with node set to given value */ + NodeCandidate withNode(Node node, boolean retiredNow) { + return new ConcreteNodeCandidate(node, retiredNow, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); } /** Returns the switch priority, based on switch exclusivity, of this compared to other */ @@ -260,7 +268,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat boolean isSurplus, boolean isNew, boolean isResizeable) { - return new ConcreteNodeCandidate(node, freeParentCapacity, Optional.of(parent), violatesSpares, true, isSurplus, isNew, isResizeable); + return new ConcreteNodeCandidate(node, false, freeParentCapacity, Optional.of(parent), violatesSpares, true, isSurplus, isNew, isResizeable); } public static NodeCandidate createNewChild(NodeResources resources, @@ -274,26 +282,33 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat } public static NodeCandidate createNewExclusiveChild(Node node, Node parent) { - return new ConcreteNodeCandidate(node, node.resources(), Optional.of(parent), false, true, false, true, false); + return new ConcreteNodeCandidate(node, false, node.resources(), Optional.of(parent), false, true, false, true, false); } public static NodeCandidate createStandalone(Node node, boolean isSurplus, boolean isNew) { - return new ConcreteNodeCandidate(node, node.resources(), Optional.empty(), false, true, isSurplus, isNew, false); + return new ConcreteNodeCandidate(node, false, node.resources(), Optional.empty(), false, true, isSurplus, isNew, false); } /** A candidate backed by a node */ static class ConcreteNodeCandidate extends NodeCandidate { private final Node node; + private final boolean retiredNow; - ConcreteNodeCandidate(Node node, NodeResources freeParentCapacity, Optional<Node> parent, + ConcreteNodeCandidate(Node node, + boolean retiredNow, + NodeResources freeParentCapacity, Optional<Node> parent, boolean violatesSpares, boolean exclusiveSwitch, boolean isSurplus, boolean isNew, boolean isResizeable) { super(freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizeable); + this.retiredNow = retiredNow; this.node = Objects.requireNonNull(node, "Node cannot be null"); } @Override + public boolean retiredNow() { return retiredNow; } + + @Override public NodeResources resources() { return node.resources(); } @Override @@ -322,7 +337,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat @Override public NodeCandidate allocate(ApplicationId owner, ClusterMembership membership, NodeResources requestedResources, Instant at) { - return new ConcreteNodeCandidate(node.allocate(owner, membership, requestedResources, at), + return new ConcreteNodeCandidate(node.allocate(owner, membership, requestedResources, at), retiredNow, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); } @@ -332,7 +347,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat @Override public NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch) { - return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, + return new ConcreteNodeCandidate(node, retiredNow, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); } @@ -439,7 +454,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat NodeType.tenant) .cloudAccount(parent.get().cloudAccount()) .build(); - return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); + return new ConcreteNodeCandidate(node, false, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java index 4f21c8dcd50..9f00e5fdbba 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java @@ -46,7 +46,7 @@ public class NodePrioritizer { private final boolean enclave; public NodePrioritizer(LockedNodeList allNodes, ApplicationId application, ClusterSpec clusterSpec, NodeSpec nodeSpec, - int wantedGroups, boolean dynamicProvisioning, NameResolver nameResolver, Nodes nodes, + boolean dynamicProvisioning, NameResolver nameResolver, Nodes nodes, HostResourcesCalculator hostResourcesCalculator, int spareCount, boolean enclave) { this.allNodes = allNodes; this.calculator = hostResourcesCalculator; @@ -70,12 +70,9 @@ public class NodePrioritizer { .stream()) .distinct() .count(); - this.topologyChange = currentGroups != wantedGroups; + this.topologyChange = currentGroups != requestedNodes.groups(); - this.currentClusterSize = (int) nonRetiredNodesInCluster.state(Node.State.active).stream() - .map(node -> node.allocation().flatMap(alloc -> alloc.membership().cluster().group())) - .filter(clusterSpec.group()::equals) - .count(); + this.currentClusterSize = (int) nonRetiredNodesInCluster.state(Node.State.active).stream().count(); // In dynamically provisioned zones, we can always take spare hosts since we can provision new on-demand, // NodeCandidate::compareTo will ensure that they will not be used until there is no room elsewhere. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index ffd2805bcff..c29c51ccbd5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -81,11 +81,10 @@ public class NodeRepositoryProvisioner implements Provisioner { * The nodes are ordered by increasing index number. */ @Override - public List<HostSpec> prepare(ApplicationId application, ClusterSpec cluster, Capacity requested, - ProvisionLogger logger) { + public List<HostSpec> prepare(ApplicationId application, ClusterSpec cluster, Capacity requested, ProvisionLogger logger) { log.log(Level.FINE, "Received deploy prepare request for " + requested + " for application " + application + ", cluster " + cluster); - validate(application, cluster, requested); + validate(application, cluster, requested, logger); int groups; NodeResources resources; @@ -97,23 +96,21 @@ public class NodeRepositoryProvisioner implements Provisioner { validate(actual, target, cluster, application); logIfDownscaled(requested.minResources().nodes(), actual.minResources().nodes(), cluster, logger); - groups = target.groups(); resources = getNodeResources(cluster, target.nodeResources(), application); - nodeSpec = NodeSpec.from(target.nodes(), resources, cluster.isExclusive(), actual.canFail(), + nodeSpec = NodeSpec.from(target.nodes(), target.groups(), resources, cluster.isExclusive(), actual.canFail(), requested.cloudAccount().orElse(nodeRepository.zone().cloud().account()), requested.clusterInfo().hostTTL()); } else { - groups = 1; // type request with multiple groups is not supported cluster = cluster.withExclusivity(true); resources = getNodeResources(cluster, requested.minResources().nodeResources(), application); nodeSpec = NodeSpec.from(requested.type(), nodeRepository.zone().cloud().account()); } - return asSortedHosts(preparer.prepare(application, cluster, nodeSpec, groups), + return asSortedHosts(preparer.prepare(application, cluster, nodeSpec), requireCompatibleResources(resources, cluster)); } - private void validate(ApplicationId application, ClusterSpec cluster, Capacity requested) { + private void validate(ApplicationId application, ClusterSpec cluster, Capacity requested, ProvisionLogger logger) { if (cluster.group().isPresent()) throw new IllegalArgumentException("Node requests cannot specify a group"); nodeResourceLimits.ensureWithinAdvertisedLimits("Min", requested.minResources().nodeResources(), application, cluster); @@ -121,6 +118,18 @@ public class NodeRepositoryProvisioner implements Provisioner { if ( ! requested.minResources().nodeResources().gpuResources().equals(requested.maxResources().nodeResources().gpuResources())) throw new IllegalArgumentException(requested + " is invalid: Gpu capacity cannot have ranges"); + + logInsufficientDiskResources(cluster, requested, logger); + } + + private void logInsufficientDiskResources(ClusterSpec cluster, Capacity requested, ProvisionLogger logger) { + var resources = requested.minResources().nodeResources(); + if ( ! nodeResourceLimits.isWithinAdvertisedDiskLimits(resources, cluster)) { + logger.logApplicationPackage(Level.WARNING, "Requested disk (" + resources.diskGb() + + "Gb) in " + cluster.id() + " is not large enough to fit " + + "core/heap dumps. Minimum recommended disk resources " + + "is 2x memory for containers and 3x memory for content"); + } } private NodeResources getNodeResources(ClusterSpec cluster, NodeResources nodeResources, ApplicationId applicationId) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java index 9ded1a2735c..8c5a7b6c61e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java @@ -2,14 +2,17 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.ProvisionLogger; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.util.Locale; +import java.util.logging.Level; /** * Defines the resource limits for nodes in various zones @@ -35,6 +38,12 @@ public class NodeResourceLimits { illegal(type, "diskGb", "Gb", cluster, requested.diskGb(), minAdvertisedDiskGb(requested, cluster.isExclusive())); } + // TODO: Remove this when we are ready to fail, not just warn on this. */ + public boolean isWithinAdvertisedDiskLimits(NodeResources requested, ClusterSpec cluster) { + if (requested.diskGbIsUnspecified() || requested.memoryGbIsUnspecified()) return true; + return requested.diskGb() >= minAdvertisedDiskGb(requested, cluster); + } + /** Returns whether the real resources we'll end up with on a given tenant node are within limits */ public boolean isWithinRealLimits(NodeCandidate candidateNode, ApplicationId applicationId, ClusterSpec cluster) { if (candidateNode.type() != NodeType.tenant) return true; // Resource limits only apply to tenant nodes @@ -52,9 +61,12 @@ public class NodeResourceLimits { return true; } - public NodeResources enlargeToLegal(NodeResources requested, ApplicationId applicationId, ClusterSpec cluster, boolean exclusive) { + public NodeResources enlargeToLegal(NodeResources requested, ApplicationId applicationId, ClusterSpec cluster, boolean exclusive, boolean followRecommendations) { if (requested.isUnspecified()) return requested; + if (followRecommendations) // TODO: Do unconditionally when we enforce this limit + requested = requested.withDiskGb(Math.max(minAdvertisedDiskGb(requested, cluster), requested.diskGb())); + return requested.withVcpu(Math.max(minAdvertisedVcpu(applicationId, cluster), requested.vcpu())) .withMemoryGb(Math.max(minAdvertisedMemoryGb(cluster), requested.memoryGb())) .withDiskGb(Math.max(minAdvertisedDiskGb(requested, exclusive), requested.diskGb())); @@ -78,6 +90,15 @@ public class NodeResourceLimits { return minRealDiskGb() + reservedDiskSpaceGb(requested.storageType(), exclusive); } + // TODO: Move this check into the above when we are ready to fail, not just warn on this. */ + private double minAdvertisedDiskGb(NodeResources requested, ClusterSpec cluster) { + return requested.memoryGb() * switch (cluster.type()) { + case combined, content -> 3; + case container -> 2; + default -> 0; // No constraint on other types + }; + } + // Note: Assumes node type 'host' private long reservedDiskSpaceGb(NodeResources.StorageType storageType, boolean exclusive) { if (storageType == NodeResources.StorageType.local && ! zone().cloud().allowHostSharing()) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java index f32928a9ec4..f4b2c4ceee0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java @@ -35,21 +35,20 @@ public interface NodeSpec { return fulfilledDeficitCount(count) == 0; } + /** Returns the total number of nodes this is requesting, or empty if not specified */ + Optional<Integer> count(); + + int groups(); + /** Returns whether this should throw an exception if the requested nodes are not fully available */ boolean canFail(); /** Returns whether we should retire nodes at all when fulfilling this spec */ boolean considerRetiring(); - /** Returns the ideal number of nodes that should be retired to fulfill this spec */ - int idealRetiredCount(int acceptedCount, int wantToRetireCount, int currentRetiredCount); - /** Returns number of additional nodes needed for this spec to be fulfilled given the current node count */ int fulfilledDeficitCount(int count); - /** Returns a specification of a fraction of all the nodes of this. It is assumed the argument is a valid divisor. */ - NodeSpec fraction(int divisor); - /** Returns the resources requested by this or empty if none are explicitly requested */ Optional<NodeResources> resources(); @@ -77,9 +76,9 @@ public interface NodeSpec { return false; } - static NodeSpec from(int nodeCount, NodeResources resources, boolean exclusive, boolean canFail, + static NodeSpec from(int nodeCount, int groupCount, NodeResources resources, boolean exclusive, boolean canFail, CloudAccount cloudAccount, Duration hostTTL) { - return new CountNodeSpec(nodeCount, resources, exclusive, canFail, canFail, cloudAccount, hostTTL); + return new CountNodeSpec(nodeCount, groupCount, resources, exclusive, canFail, canFail, cloudAccount, hostTTL); } static NodeSpec from(NodeType type, CloudAccount cloudAccount) { @@ -90,6 +89,7 @@ public interface NodeSpec { class CountNodeSpec implements NodeSpec { private final int count; + private final int groups; private final NodeResources requestedNodeResources; private final boolean exclusive; private final boolean canFail; @@ -97,9 +97,10 @@ public interface NodeSpec { private final CloudAccount cloudAccount; private final Duration hostTTL; - private CountNodeSpec(int count, NodeResources resources, boolean exclusive, boolean canFail, + private CountNodeSpec(int count, int groups, NodeResources resources, boolean exclusive, boolean canFail, boolean considerRetiring, CloudAccount cloudAccount, Duration hostTTL) { this.count = count; + this.groups = groups; this.requestedNodeResources = Objects.requireNonNull(resources, "Resources must be specified"); this.exclusive = exclusive; this.canFail = canFail; @@ -112,6 +113,12 @@ public interface NodeSpec { } @Override + public Optional<Integer> count() { return Optional.of(count); } + + @Override + public int groups() { return groups; } + + @Override public Optional<NodeResources> resources() { return Optional.of(requestedNodeResources); } @@ -136,22 +143,12 @@ public interface NodeSpec { } @Override - public int idealRetiredCount(int acceptedCount, int wantToRetireCount, int currentRetiredCount) { - return acceptedCount - this.count - currentRetiredCount; - } - - @Override public int fulfilledDeficitCount(int count) { return Math.max(this.count - count, 0); } - @Override - public NodeSpec fraction(int divisor) { - return new CountNodeSpec(count/divisor, requestedNodeResources, exclusive, canFail, considerRetiring, cloudAccount, hostTTL); - } - public NodeSpec withoutRetiring() { - return new CountNodeSpec(count, requestedNodeResources, exclusive, canFail, false, cloudAccount, hostTTL); + return new CountNodeSpec(count, groups, requestedNodeResources, exclusive, canFail, false, cloudAccount, hostTTL); } @Override @@ -163,7 +160,6 @@ public interface NodeSpec { public boolean canResize(NodeResources currentNodeResources, NodeResources currentSpareHostResources, ClusterSpec.Type type, boolean hasTopologyChange, int currentClusterSize) { if (exclusive) return false; // exclusive resources must match the host - // Never allow in-place resize when also changing topology or decreasing cluster size if (hasTopologyChange || count < currentClusterSize) return false; @@ -192,7 +188,10 @@ public interface NodeSpec { public Duration hostTTL() { return hostTTL; } @Override - public String toString() { return "request for " + count + " nodes with " + requestedNodeResources; } + public String toString() { + return "request for " + count + " nodes" + + ( groups > 1 ? " (in " + groups + " groups)" : "") + + " with " + requestedNodeResources; } } @@ -211,6 +210,12 @@ public interface NodeSpec { } @Override + public Optional<Integer> count() { return Optional.empty(); } + + @Override + public int groups() { return 1; } + + @Override public NodeType type() { return type; } @Override @@ -226,20 +231,12 @@ public interface NodeSpec { public boolean considerRetiring() { return true; } @Override - public int idealRetiredCount(int acceptedCount, int wantToRetireCount, int currentRetiredCount) { - return wantToRetireCount - currentRetiredCount; - } - - @Override public int fulfilledDeficitCount(int count) { // If no wanted count is specified for this node type, then any count fulfills the deficit return Math.max(0, WANTED_NODE_COUNT.getOrDefault(type, 0) - count); } @Override - public NodeSpec fraction(int divisor) { return this; } - - @Override public Optional<NodeResources> resources() { return Optional.empty(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index b6c7324c75c..25efcabfe8e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -10,6 +10,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import java.time.Clock; import java.util.ArrayList; import java.util.List; import java.util.ListIterator; @@ -32,16 +33,15 @@ class Preparer { } /** Prepare all required resources for the given application and cluster */ - public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, int wantedGroups) { + public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes) { try { - var nodes = prepareNodes(application, cluster, requestedNodes, wantedGroups); + var nodes = prepareNodes(application, cluster, requestedNodes); prepareLoadBalancer(application, cluster, requestedNodes); return nodes; } catch (NodeAllocationException e) { throw new NodeAllocationException("Could not satisfy " + requestedNodes + - ( wantedGroups > 1 ? " (in " + wantedGroups + " groups)" : "") + - " in " + application + " " + cluster + ": " + e.getMessage(), + " in " + application + " " + cluster, e, e.retryable()); } } @@ -54,34 +54,29 @@ class Preparer { // Note: This operation may make persisted changes to the set of reserved and inactive nodes, // but it may not change the set of active nodes, as the active nodes must stay in sync with the // active config model which is changed on activate - private List<Node> prepareNodes(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - int wantedGroups) { + private List<Node> prepareNodes(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes) { LockedNodeList allNodes = groupPreparer.createUnlockedNodeList(); - NodeList appNodes = allNodes.owner(application); - List<Node> surplusNodes = findNodesInRemovableGroups(appNodes, cluster, wantedGroups); + NodeList clusterNodes = allNodes.owner(application); + List<Node> surplusNodes = findNodesInRemovableGroups(clusterNodes, requestedNodes.groups()); - List<Integer> usedIndices = appNodes.cluster(cluster.id()).mapToList(node -> node.allocation().get().membership().index()); + List<Integer> usedIndices = clusterNodes.mapToList(node -> node.allocation().get().membership().index()); NodeIndices indices = new NodeIndices(usedIndices); List<Node> acceptedNodes = new ArrayList<>(); - for (int groupIndex = 0; groupIndex < wantedGroups; groupIndex++) { - ClusterSpec clusterGroup = cluster.with(Optional.of(ClusterSpec.Group.from(groupIndex))); - GroupPreparer.PrepareResult result = groupPreparer.prepare(application, clusterGroup, - requestedNodes.fraction(wantedGroups), - surplusNodes, indices, wantedGroups, - allNodes); - allNodes = result.allNodes(); // Might have changed - List<Node> accepted = result.prepared(); - if (requestedNodes.rejectNonActiveParent()) { - NodeList activeHosts = allNodes.state(Node.State.active).parents().nodeType(requestedNodes.type().hostType()); - accepted = accepted.stream() - .filter(node -> node.parentHostname().isEmpty() || activeHosts.parentOf(node).isPresent()) - .toList(); - } - - replace(acceptedNodes, accepted); + GroupPreparer.PrepareResult result = groupPreparer.prepare(application, cluster, + requestedNodes, + surplusNodes, indices, + allNodes); + List<Node> accepted = result.prepared(); + if (requestedNodes.rejectNonActiveParent()) { + NodeList activeHosts = result.allNodes().state(Node.State.active).parents().nodeType(requestedNodes.type().hostType()); + accepted = accepted.stream() + .filter(node -> node.parentHostname().isEmpty() || activeHosts.parentOf(node).isPresent()) + .toList(); } - moveToActiveGroup(surplusNodes, wantedGroups, cluster.group()); + + replace(acceptedNodes, accepted); + moveToActiveGroup(surplusNodes, requestedNodes.groups(), cluster.group()); acceptedNodes.removeAll(surplusNodes); return acceptedNodes; } @@ -95,18 +90,16 @@ class Preparer { * Returns a list of the nodes which are * in groups with index number above or equal the group count */ - private List<Node> findNodesInRemovableGroups(NodeList appNodes, ClusterSpec requestedCluster, int wantedGroups) { + private List<Node> findNodesInRemovableGroups(NodeList clusterNodes, int wantedGroups) { List<Node> surplusNodes = new ArrayList<>(); - for (Node node : appNodes.state(Node.State.active)) { + for (Node node : clusterNodes.state(Node.State.active)) { ClusterSpec nodeCluster = node.allocation().get().membership().cluster(); - if ( ! nodeCluster.id().equals(requestedCluster.id())) continue; - if ( ! nodeCluster.type().equals(requestedCluster.type())) continue; if (nodeCluster.group().get().index() >= wantedGroups) surplusNodes.add(node); } return surplusNodes; } - + /** Move nodes from unwanted groups to wanted groups to avoid lingering groups consisting of retired nodes */ private void moveToActiveGroup(List<Node> surplusNodes, int wantedGroups, Optional<ClusterSpec.Group> targetGroup) { for (ListIterator<Node> i = surplusNodes.listIterator(); i.hasNext(); ) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java new file mode 100644 index 00000000000..65abcbef698 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.testutils; + +import com.yahoo.config.provision.ProvisionLogger; + +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; + +/** + * A logger which remembers all messages logged in addition to writing them to standard out. + * + * @author bratseth + */ +public class InMemoryProvisionLogger implements ProvisionLogger { + + private final List<String> systemLog = new ArrayList<>(); + private final List<String> applicationLog = new ArrayList<>(); + + @Override + public void log(Level level, String message) { + System.out.println("ProvisionLogger system " + level + ": " + message); + systemLog.add(level + ": " + message); + } + + @Override + public void logApplicationPackage(Level level, String message) { + System.out.println("ProvisionLogger application " + level + ": " + message); + applicationLog.add(level + ": " + message); + } + + public List<String> systemLog() { return systemLog; } + public List<String> applicationLog() { return applicationLog; } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index dcde521bfda..3ed01e00ee6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -261,7 +261,7 @@ public class MockDeployer implements Deployer { public ClusterSpec cluster() { return cluster; } private List<HostSpec> prepare(NodeRepositoryProvisioner provisioner) { - return provisioner.prepare(id, cluster, capacity, null); + return provisioner.prepare(id, cluster, capacity, new InMemoryProvisionLogger()); } } |