diff options
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java')
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java | 80 |
1 files changed, 52 insertions, 28 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java index ea5ef276b95..be6c420c63b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.component.Version; @@ -13,7 +13,9 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.jdisc.Metric; import com.yahoo.lang.MutableInteger; import com.yahoo.transaction.Mutex; +import com.yahoo.vespa.flags.BooleanFlag; import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.ListFlag; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.flags.custom.ClusterCapacity; @@ -59,6 +61,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { private final HostProvisioner hostProvisioner; private final ListFlag<ClusterCapacity> preprovisionCapacityFlag; + private final BooleanFlag makeExclusiveFlag; private final ProvisioningThrottler throttler; HostCapacityMaintainer(NodeRepository nodeRepository, @@ -69,6 +72,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { super(nodeRepository, interval, metric); this.hostProvisioner = hostProvisioner; this.preprovisionCapacityFlag = PermanentFlags.PREPROVISION_CAPACITY.bindTo(flagSource); + this.makeExclusiveFlag = Flags.MAKE_EXCLUSIVE.bindTo(flagSource); this.throttler = new ProvisioningThrottler(nodeRepository, metric); } @@ -187,6 +191,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { */ private List<Node> provisionUntilNoDeficit(NodeList nodeList) { List<ClusterCapacity> preprovisionCapacity = preprovisionCapacityFlag.value(); + boolean makeExclusive = makeExclusiveFlag.value(); // Worst-case each ClusterCapacity in preprovisionCapacity will require an allocation. int maxProvisions = preprovisionCapacity.size(); @@ -194,7 +199,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { var nodesPlusProvisioned = new ArrayList<>(nodeList.asList()); for (int numProvisions = 0;; ++numProvisions) { var nodesPlusProvisionedPlusAllocated = new ArrayList<>(nodesPlusProvisioned); - Optional<ClusterCapacity> deficit = allocatePreprovisionCapacity(preprovisionCapacity, nodesPlusProvisionedPlusAllocated); + Optional<ClusterCapacity> deficit = allocatePreprovisionCapacity(preprovisionCapacity, nodesPlusProvisionedPlusAllocated, makeExclusive); if (deficit.isEmpty()) { return nodesPlusProvisionedPlusAllocated; } @@ -204,32 +209,39 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { } ClusterCapacity clusterCapacityDeficit = deficit.get(); - var clusterType = Optional.ofNullable(clusterCapacityDeficit.clusterType()); nodesPlusProvisioned.addAll(provisionHosts(clusterCapacityDeficit.count(), toNodeResources(clusterCapacityDeficit), - clusterType.map(ClusterSpec.Type::from), + Optional.ofNullable(clusterCapacityDeficit.clusterType()), nodeList)); } } - private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<ClusterSpec.Type> clusterType, NodeList allNodes) { + private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<String> clusterType, NodeList allNodes) { try { if (throttler.throttle(allNodes, Agent.HostCapacityMaintainer)) { throw new NodeAllocationException("Host provisioning is being throttled", true); } Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); List<Integer> provisionIndices = nodeRepository().database().readProvisionIndices(count); + HostSharing sharingMode = nodeRepository().exclusiveAllocation(asSpec(clusterType, 0)) ? HostSharing.exclusive : HostSharing.shared; HostProvisionRequest request = new HostProvisionRequest(provisionIndices, NodeType.host, nodeResources, ApplicationId.defaultId(), osVersion, - HostSharing.shared, clusterType, Optional.empty(), + sharingMode, clusterType.map(ClusterSpec.Type::valueOf), Optional.empty(), nodeRepository().zone().cloud().account(), false); List<Node> hosts = new ArrayList<>(); - hostProvisioner.provisionHosts(request, - resources -> true, - provisionedHosts -> { - hosts.addAll(provisionedHosts.stream().map(host -> host.generateHost(Duration.ZERO)).toList()); - nodeRepository().nodes().addNodes(hosts, Agent.HostCapacityMaintainer); - }); + Runnable waiter; + try (var lock = nodeRepository().nodes().lockUnallocated()) { + waiter = hostProvisioner.provisionHosts(request, + resources -> true, + provisionedHosts -> { + hosts.addAll(provisionedHosts.stream() + .map(host -> host.generateHost(Duration.ZERO)) + .map(host -> host.withExclusiveToApplicationId(null)) + .toList()); + nodeRepository().nodes().addNodes(hosts, Agent.HostCapacityMaintainer); + }); + } + waiter.run(); return hosts; } catch (NodeAllocationException | IllegalArgumentException | IllegalStateException e) { throw new NodeAllocationException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage(), @@ -247,11 +259,12 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { * @return the part of a cluster capacity it was unable to allocate, if any */ private Optional<ClusterCapacity> allocatePreprovisionCapacity(List<ClusterCapacity> preprovisionCapacity, - ArrayList<Node> mutableNodes) { + ArrayList<Node> mutableNodes, + boolean makeExclusive) { for (int clusterIndex = 0; clusterIndex < preprovisionCapacity.size(); ++clusterIndex) { ClusterCapacity clusterCapacity = preprovisionCapacity.get(clusterIndex); LockedNodeList allNodes = new LockedNodeList(mutableNodes, () -> {}); - List<Node> candidates = findCandidates(clusterCapacity, clusterIndex, allNodes); + List<Node> candidates = findCandidates(clusterCapacity, clusterIndex, allNodes, makeExclusive); int deficit = Math.max(0, clusterCapacity.count() - candidates.size()); if (deficit > 0) { return Optional.of(clusterCapacity.withCount(deficit)); @@ -264,40 +277,51 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { return Optional.empty(); } - private List<Node> findCandidates(ClusterCapacity clusterCapacity, int clusterIndex, LockedNodeList allNodes) { + private List<Node> findCandidates(ClusterCapacity clusterCapacity, int clusterIndex, LockedNodeList allNodes, boolean makeExclusive) { NodeResources nodeResources = toNodeResources(clusterCapacity); // We'll allocate each ClusterCapacity as a unique cluster in a dummy application ApplicationId applicationId = ApplicationId.defaultId(); - ClusterSpec.Id clusterId = ClusterSpec.Id.from(String.valueOf(clusterIndex)); - ClusterSpec.Type type = clusterCapacity.clusterType() != null - ? ClusterSpec.Type.from(clusterCapacity.clusterType()) - : ClusterSpec.Type.content; - ClusterSpec clusterSpec = ClusterSpec.request(type, clusterId) - // build() requires a version, even though it is not (should not be) used - .vespaVersion(Vtag.currentVersion) - .build(); + ClusterSpec cluster = asSpec(Optional.ofNullable(clusterCapacity.clusterType()), clusterIndex); NodeSpec nodeSpec = NodeSpec.from(clusterCapacity.count(), 1, nodeResources, false, true, nodeRepository().zone().cloud().account(), Duration.ZERO); var allocationContext = IP.Allocation.Context.from(nodeRepository().zone().cloud().name(), nodeSpec.cloudAccount().isExclave(nodeRepository().zone()), nodeRepository().nameResolver()); - NodePrioritizer prioritizer = new NodePrioritizer(allNodes, applicationId, clusterSpec, nodeSpec, - true, allocationContext, nodeRepository().nodes(), nodeRepository().resourcesCalculator(), - nodeRepository().spareCount()); - List<NodeCandidate> nodeCandidates = prioritizer.collect(); + NodePrioritizer prioritizer = new NodePrioritizer(allNodes, applicationId, cluster, nodeSpec, + true, false, allocationContext, nodeRepository().nodes(), + nodeRepository().resourcesCalculator(), nodeRepository().spareCount(), + nodeRepository().exclusiveAllocation(cluster), makeExclusive); + List<NodeCandidate> nodeCandidates = prioritizer.collect() + .stream() + .filter(node -> node.violatesExclusivity(cluster, + applicationId, + nodeRepository().exclusiveAllocation(cluster), + false, + nodeRepository().zone().cloud().allowHostSharing(), + allNodes, + makeExclusive) + != NodeCandidate.ExclusivityViolation.YES) + .toList(); MutableInteger index = new MutableInteger(0); return nodeCandidates .stream() .limit(clusterCapacity.count()) .map(candidate -> candidate.toNode() .allocate(applicationId, - ClusterMembership.from(clusterSpec, index.next()), + ClusterMembership.from(cluster, index.next()), nodeResources, nodeRepository().clock().instant())) .toList(); } + private static ClusterSpec asSpec(Optional<String> clusterType, int index) { + return ClusterSpec.request(clusterType.map(ClusterSpec.Type::from).orElse(ClusterSpec.Type.content), + ClusterSpec.Id.from(String.valueOf(index))) + .vespaVersion(Vtag.currentVersion) // Needed, but should not be used here. + .build(); + } + private static NodeResources toNodeResources(ClusterCapacity clusterCapacity) { return new NodeResources(clusterCapacity.vcpu(), clusterCapacity.memoryGb(), |