aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java
diff options
context:
space:
mode:
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java58
1 files changed, 44 insertions, 14 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java
index 67dae48cff7..270fda8314e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.provisioning;
import com.yahoo.component.Version;
@@ -8,9 +8,13 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeAllocationException;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.ApplicationMutex;
import com.yahoo.jdisc.Metric;
import com.yahoo.text.internal.SnippetGenerator;
import com.yahoo.transaction.Mutex;
+import com.yahoo.vespa.applicationmodel.InfrastructureApplication;
+import com.yahoo.vespa.flags.BooleanFlag;
+import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
@@ -18,6 +22,7 @@ import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.IP;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing;
+import com.yahoo.yolean.Exceptions;
import java.util.LinkedHashSet;
import java.util.List;
@@ -44,12 +49,14 @@ public class Preparer {
private final Optional<HostProvisioner> hostProvisioner;
private final Optional<LoadBalancerProvisioner> loadBalancerProvisioner;
private final ProvisioningThrottler throttler;
+ private final BooleanFlag makeExclusiveFlag;
public Preparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, Optional<LoadBalancerProvisioner> loadBalancerProvisioner, Metric metric) {
this.nodeRepository = nodeRepository;
this.hostProvisioner = hostProvisioner;
this.loadBalancerProvisioner = loadBalancerProvisioner;
this.throttler = new ProvisioningThrottler(nodeRepository, metric);
+ this.makeExclusiveFlag = Flags.MAKE_EXCLUSIVE.bindTo(nodeRepository.flagSource());
}
/**
@@ -69,11 +76,12 @@ public class Preparer {
loadBalancerProvisioner.ifPresent(provisioner -> provisioner.prepare(application, cluster, requested));
+ boolean makeExclusive = makeExclusiveFlag.value();
// Try preparing in memory without global unallocated lock. Most of the time there should be no changes,
// and we can return nodes previously allocated.
LockedNodeList allNodes = nodeRepository.nodes().list(PROBE_LOCK);
NodeIndices indices = new NodeIndices(cluster.id(), allNodes);
- NodeAllocation probeAllocation = prepareAllocation(application, cluster, requested, indices::probeNext, allNodes);
+ NodeAllocation probeAllocation = prepareAllocation(application, cluster, requested, indices::probeNext, allNodes, makeExclusive);
if (probeAllocation.fulfilledAndNoChanges()) {
List<Node> acceptedNodes = probeAllocation.finalNodes();
indices.commitProbe();
@@ -81,16 +89,25 @@ public class Preparer {
} else {
// There were some changes, so re-do the allocation with locks
indices.resetProbe();
- return prepareWithLocks(application, cluster, requested, indices);
+ return prepareWithLocks(application, cluster, requested, indices, makeExclusive);
}
}
+ private ApplicationMutex parentLockOrNull(boolean makeExclusive, NodeType type) {
+ return NodeCandidate.canMakeHostExclusive(makeExclusive, type, nodeRepository.zone().cloud().allowHostSharing()) ?
+ nodeRepository.applications().lock(InfrastructureApplication.withNodeType(type.parentNodeType()).id()) :
+ null;
+ }
+
/// Note that this will write to the node repo.
- private List<Node> prepareWithLocks(ApplicationId application, ClusterSpec cluster, NodeSpec requested, NodeIndices indices) {
+ private List<Node> prepareWithLocks(ApplicationId application, ClusterSpec cluster, NodeSpec requested, NodeIndices indices, boolean makeExclusive) {
+ Runnable waiter = null;
+ List<Node> acceptedNodes;
try (Mutex lock = nodeRepository.applications().lock(application);
+ ApplicationMutex parentLockOrNull = parentLockOrNull(makeExclusive, requested.type());
Mutex allocationLock = nodeRepository.nodes().lockUnallocated()) {
LockedNodeList allNodes = nodeRepository.nodes().list(allocationLock);
- NodeAllocation allocation = prepareAllocation(application, cluster, requested, indices::next, allNodes);
+ NodeAllocation allocation = prepareAllocation(application, cluster, requested, indices::next, allNodes, makeExclusive);
NodeType hostType = allocation.nodeType().hostType();
if (canProvisionDynamically(hostType) && allocation.hostDeficit().isPresent()) {
HostSharing sharing = hostSharing(cluster, hostType);
@@ -127,12 +144,13 @@ public class Preparer {
requested.cloudAccount(),
deficit.dueToFlavorUpgrade());
Predicate<NodeResources> realHostResourcesWithinLimits = resources -> nodeRepository.nodeResourceLimits().isWithinRealLimits(resources, application, cluster);
- hostProvisioner.get().provisionHosts(request, realHostResourcesWithinLimits, whenProvisioned);
+ waiter = hostProvisioner.get().provisionHosts(request, realHostResourcesWithinLimits, whenProvisioned);
} catch (NodeAllocationException e) {
// Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do
// not exist, we cannot remove them from ZK here because other nodes may already have been
// allocated on them, so let HostDeprovisioner deal with it
- hosts.forEach(host -> nodeRepository.nodes().deprovision(host.hostname(), Agent.system, nodeRepository.clock().instant()));
+ hosts.forEach(host -> nodeRepository.nodes().parkRecursively(host.hostname(), Agent.system, true,
+ "Failed to provision: " + Exceptions.toMessageString(e)));
throw e;
}
} else if (allocation.hostDeficit().isPresent() && requested.canFail() &&
@@ -140,7 +158,7 @@ public class Preparer {
// Non-dynamically provisioned zone with a deficit because we just now retired some nodes.
// Try again, but without retiring
indices.resetProbe();
- List<Node> accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), indices);
+ List<Node> accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), indices, makeExclusive);
log.warning("Prepared " + application + " " + cluster.id() + " without retirement due to lack of capacity");
return accepted;
}
@@ -150,7 +168,12 @@ public class Preparer {
allocation.allocationFailureDetails(), true);
// Carry out and return allocation
- List<Node> acceptedNodes = allocation.finalNodes();
+ if (parentLockOrNull != null) {
+ List<Node> exclusiveParents = allocation.parentsRequiredToBeExclusive();
+ nodeRepository.nodes().setExclusiveToApplicationId(exclusiveParents, parentLockOrNull);
+ // TODO: also update tags
+ }
+ acceptedNodes = allocation.finalNodes();
nodeRepository.nodes().reserve(allocation.reservableNodes());
nodeRepository.nodes().addReservedNodes(new LockedNodeList(allocation.newNodes(), allocationLock));
@@ -160,14 +183,16 @@ public class Preparer {
.filter(node -> node.parentHostname().isEmpty() || activeHosts.parentOf(node).isPresent())
.toList();
}
- return acceptedNodes;
}
+
+ if (waiter != null) waiter.run();
+ return acceptedNodes;
}
private NodeAllocation prepareAllocation(ApplicationId application, ClusterSpec cluster, NodeSpec requested,
- Supplier<Integer> nextIndex, LockedNodeList allNodes) {
+ Supplier<Integer> nextIndex, LockedNodeList allNodes, boolean makeExclusive) {
validateAccount(requested.cloudAccount(), application, allNodes);
- NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requested, nextIndex, nodeRepository);
+ NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requested, nextIndex, nodeRepository, makeExclusive);
var allocationContext = IP.Allocation.Context.from(nodeRepository.zone().cloud().name(),
requested.cloudAccount().isExclave(nodeRepository.zone()),
nodeRepository.nameResolver());
@@ -176,10 +201,13 @@ public class Preparer {
cluster,
requested,
nodeRepository.zone().cloud().dynamicProvisioning(),
+ nodeRepository.zone().cloud().allowHostSharing(),
allocationContext,
nodeRepository.nodes(),
nodeRepository.resourcesCalculator(),
- nodeRepository.spareCount());
+ nodeRepository.spareCount(),
+ nodeRepository.exclusiveAllocation(cluster),
+ makeExclusive);
allocation.offer(prioritizer.collect());
return allocation;
}
@@ -208,7 +236,9 @@ public class Preparer {
private HostSharing hostSharing(ClusterSpec cluster, NodeType hostType) {
if ( hostType.isSharable())
- return nodeRepository.exclusiveAllocation(cluster) ? HostSharing.exclusive : HostSharing.any;
+ return nodeRepository.exclusiveProvisioning(cluster) ? HostSharing.provision :
+ nodeRepository.exclusiveAllocation(cluster) ? HostSharing.exclusive :
+ HostSharing.any;
else
return HostSharing.any;
}