diff options
Diffstat (limited to 'node-repository/src/main/java/com/yahoo')
59 files changed, 1396 insertions, 909 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/LockedNodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/LockedNodeList.java index dc86daf2c67..e760e36f90b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/LockedNodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/LockedNodeList.java @@ -17,9 +17,16 @@ import java.util.Objects; */ public final class LockedNodeList extends NodeList { + private final Mutex lock; + public LockedNodeList(List<Node> nodes, Mutex lock) { super(nodes, false); - Objects.requireNonNull(lock, "lock must be non-null"); + this.lock = Objects.requireNonNull(lock, "lock must be non-null"); + } + + /** Returns a new LockedNodeList with the same lock. */ + public LockedNodeList childList(List<Node> nodes) { + return new LockedNodeList(nodes, lock); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 81ac4544223..a80f07acba2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -46,6 +46,7 @@ public final class Node implements Nodelike { private final String hostname; private final IP.Config ipConfig; private final String id; + private final Optional<String> extraId; private final Optional<String> parentHostname; private final Flavor flavor; private final Status status; @@ -89,13 +90,14 @@ public final class Node implements Nodelike { } /** DO NOT USE: public for serialization purposes. See {@code create} helper methods. */ - public Node(String id, IP.Config ipConfig, String hostname, Optional<String> parentHostname, + public Node(String id, Optional<String> extraId, IP.Config ipConfig, String hostname, Optional<String> parentHostname, Flavor flavor, Status status, State state, Optional<Allocation> allocation, History history, NodeType type, Reports reports, Optional<String> modelName, Optional<TenantName> reservedTo, Optional<ApplicationId> exclusiveToApplicationId, Optional<Duration> hostTTL, Optional<Instant> hostEmptyAt, Optional<ClusterSpec.Type> exclusiveToClusterType, Optional<String> switchHostname, List<TrustStoreItem> trustStoreItems, CloudAccount cloudAccount, Optional<WireguardKey> wireguardPubKey) { this.id = Objects.requireNonNull(id, "A node must have an ID"); + this.extraId = Objects.requireNonNull(extraId, "Extra ID cannot be null"); this.hostname = requireNonEmptyString(hostname, "A node must have a hostname"); this.ipConfig = Objects.requireNonNull(ipConfig, "A node must a have an IP config"); this.parentHostname = requireNonEmptyString(parentHostname, "A parent host name must be a proper value"); @@ -160,9 +162,13 @@ public final class Node implements Nodelike { * - OpenStack: UUID * - AWS: Instance ID * - Linux containers: UUID + * - GCP: Instance name */ public String id() { return id; } + /** Additional unique identifier for this node, if any, as above. GCP instance ID. */ + public Optional<String> extraId() { return extraId; } + @Override public Optional<String> parentHostname() { return parentHostname; } @@ -276,7 +282,7 @@ public final class Node implements Nodelike { * If both given wantToRetire and wantToDeprovision are equal to the current values, the method is no-op. */ public Node withWantToRetire(boolean wantToRetire, boolean wantToDeprovision, Agent agent, Instant at) { - return withWantToRetire(wantToRetire, wantToDeprovision, false, false, agent, at); + return withWantToRetire(wantToRetire, wantToDeprovision, status.wantToRebuild(), status.wantToUpgradeFlavor(), agent, at); } /** @@ -351,14 +357,14 @@ public final class Node implements Nodelike { /** Returns a node with the status assigned to the given value */ public Node with(Status status) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } /** Returns a node with the type assigned to the given value */ public Node with(NodeType type) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } @@ -367,35 +373,35 @@ public final class Node implements Nodelike { public Node with(Flavor flavor, Agent agent, Instant instant) { if (flavor.equals(this.flavor)) return this; History updateHistory = history.with(new History.Event(History.Event.Type.resized, agent, instant)); - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, updateHistory, type, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, updateHistory, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } /** Returns a copy of this with the reboot generation set to generation */ public Node withReboot(Generation generation) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status.withReboot(generation), state, allocation, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status.withReboot(generation), state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } /** Returns a copy of this with given id set */ public Node withId(String id) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } /** Returns a copy of this with model name set to given value */ public Node withModelName(String modelName) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, Optional.of(modelName), reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } /** Returns a copy of this with model name cleared */ public Node withoutModelName() { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, Optional.empty(), reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } @@ -437,21 +443,21 @@ public final class Node implements Nodelike { * Do not use this to allocate a node. */ public Node with(Allocation allocation) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, Optional.of(allocation), history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, Optional.of(allocation), history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } /** Returns a copy of this node with IP config set to the given value. */ public Node with(IP.Config ipConfig) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } /** Returns a copy of this node with the parent hostname assigned to the given value. */ public Node withParentHostname(String parentHostname) { - return new Node(id, ipConfig, hostname, Optional.of(parentHostname), flavor, status, state, allocation, + return new Node(id, extraId, ipConfig, hostname, Optional.of(parentHostname), flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } @@ -459,51 +465,57 @@ public final class Node implements Nodelike { public Node withReservedTo(TenantName tenant) { if (type != NodeType.host) throw new IllegalArgumentException("Only host nodes can be reserved, " + hostname + " has type " + type); - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, Optional.of(tenant), exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } /** Returns a copy of this node which is not reserved to a tenant */ public Node withoutReservedTo() { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, Optional.empty(), exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } public Node withExclusiveToApplicationId(ApplicationId exclusiveTo) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, Optional.ofNullable(exclusiveTo), hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } + public Node withExtraId(Optional<String> extraId) { + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, + exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); + } + public Node withHostTTL(Duration hostTTL) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, Optional.ofNullable(hostTTL), hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } public Node withHostEmptyAt(Instant hostEmptyAt) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, Optional.ofNullable(hostEmptyAt), exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } public Node withExclusiveToClusterType(ClusterSpec.Type exclusiveTo) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, Optional.ofNullable(exclusiveTo), switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } public Node withWireguardPubkey(WireguardKey wireguardPubkey) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, Optional.ofNullable(wireguardPubkey)); } /** Returns a copy of this node with switch hostname set to given value */ public Node withSwitchHostname(String switchHostname) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, Optional.ofNullable(switchHostname), trustStoreItems, cloudAccount, wireguardPubKey); } @@ -556,19 +568,19 @@ public final class Node implements Nodelike { /** Returns a copy of this node with the given history. */ public Node with(History history) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } public Node with(Reports reports) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } public Node with(List<TrustStoreItem> trustStoreItems) { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, + return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, hostTTL, hostEmptyAt, exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey); } @@ -814,7 +826,7 @@ public final class Node implements Nodelike { } public Node build() { - return new Node(id, Optional.ofNullable(ipConfig).orElse(IP.Config.EMPTY), hostname, Optional.ofNullable(parentHostname), + return new Node(id, Optional.empty(), Optional.ofNullable(ipConfig).orElse(IP.Config.EMPTY), hostname, Optional.ofNullable(parentHostname), flavor, Optional.ofNullable(status).orElseGet(Status::initial), state, Optional.ofNullable(allocation), Optional.ofNullable(history).orElseGet(History::empty), type, Optional.ofNullable(reports).orElseGet(Reports::new), Optional.ofNullable(modelName), Optional.ofNullable(reservedTo), Optional.ofNullable(exclusiveToApplicationId), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index d1b900b9969..f355e66b1e4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -238,7 +238,7 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { /** Returns the parent node of the given child node */ public Optional<Node> parentOf(Node child) { - return child.parentHostname().flatMap(this::get).map(NodeFamily::node); + return child.parentHostname().flatMap(this::node); } /** Returns the nodes contained in the group identified by given index */ @@ -391,26 +391,19 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { } private Map<String, NodeFamily> cache() { - return nodeCache.updateAndGet((oldValue) -> { - if (oldValue != null) { - return oldValue; - } - Map<String, NodeFamily> newValue = new HashMap<>(); - for (var node : this) { - NodeFamily family; - if (node.parentHostname().isEmpty()) { - family = new NodeFamily(node, new ArrayList<>()); - for (var child : this) { - if (child.hasParent(node.hostname())) { - family.children.add(child); - } - } - } else { - family = new NodeFamily(node, List.of()); - } - newValue.put(node.hostname(), family); - } - return newValue; + return nodeCache.updateAndGet((cached) -> { + if (cached != null) + return cached; + + Map<String, List<Node>> children = new HashMap<>(); + for (Node node : this) + node.parentHostname().ifPresent(parent -> children.computeIfAbsent(parent, __ -> new ArrayList<>()).add(node)); + + Map<String, NodeFamily> families = new HashMap<>(); + for (Node node : this) + families.put(node.hostname(), new NodeFamily(node, children.getOrDefault(node.hostname(), List.of()))); + + return families; }); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeMutex.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeMutex.java index 60fd07951c6..20c246b3ebd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeMutex.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeMutex.java @@ -28,4 +28,5 @@ public class NodeMutex implements Mutex { return new NodeMutex(updatedNode, mutex); } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 8be3fbd1bf2..2d4e7142622 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -205,7 +205,7 @@ public class NodeRepository extends AbstractComponent { */ public boolean exclusiveAllocation(ClusterSpec clusterSpec) { return clusterSpec.isExclusive() || - ( clusterSpec.type().isContainer() && zone.system().isPublic() && !zone.environment().isTest() ) || + ( clusterSpec.type().isContainer() && zone.system().isPublic() && !zone.environment().isTest() ) || ( !zone().cloud().allowHostSharing() && !sharedHosts.value().isEnabled(clusterSpec.type().name())); } @@ -229,7 +229,7 @@ public class NodeRepository extends AbstractComponent { applicationNodes.asList(), Agent.system, Optional.of("Application is removed"), - transaction.nested()); + transaction); applications.remove(transaction); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index a2ef76e84d0..40d1d50e0e8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -195,6 +195,7 @@ public class AllocatableClusterResources { else { // Return the cheapest flavor satisfying the requested resources, if any NodeResources cappedWantedResources = applicationLimits.cap(wantedResources.nodeResources()); Optional<AllocatableClusterResources> best = Optional.empty(); + Optional<AllocatableClusterResources> bestDisregardingDiskLimit = Optional.empty(); for (Flavor flavor : nodeRepository.flavors().getFlavors()) { // Flavor decide resources: Real resources are the worst case real resources we'll get if we ask for these advertised resources NodeResources advertisedResources = nodeRepository.resourcesCalculator().advertisedResourcesOf(flavor); @@ -202,7 +203,9 @@ public class AllocatableClusterResources { // Adjust where we don't need exact match to the flavor if (flavor.resources().storageType() == NodeResources.StorageType.remote) { - double diskGb = systemLimits.enlargeToLegal(cappedWantedResources, applicationId, clusterSpec, exclusive).diskGb(); + double diskGb = systemLimits.enlargeToLegal(cappedWantedResources, applicationId, clusterSpec, exclusive, true).diskGb(); + if (diskGb > applicationLimits.max().nodeResources().diskGb() || diskGb < applicationLimits.min().nodeResources().diskGb()) // TODO: Remove when disk limit is enforced + diskGb = systemLimits.enlargeToLegal(cappedWantedResources, applicationId, clusterSpec, exclusive, false).diskGb(); advertisedResources = advertisedResources.withDiskGb(diskGb); realResources = realResources.withDiskGb(diskGb); } @@ -213,14 +216,24 @@ public class AllocatableClusterResources { if ( ! between(applicationLimits.min().nodeResources(), applicationLimits.max().nodeResources(), advertisedResources)) continue; if ( ! systemLimits.isWithinRealLimits(realResources, applicationId, clusterSpec)) continue; + var candidate = new AllocatableClusterResources(wantedResources.with(realResources), advertisedResources, wantedResources, clusterSpec); + + if ( ! systemLimits.isWithinAdvertisedDiskLimits(advertisedResources, clusterSpec)) { // TODO: Remove when disk limit is enforced + if (bestDisregardingDiskLimit.isEmpty() || candidate.preferableTo(bestDisregardingDiskLimit.get())) { + bestDisregardingDiskLimit = Optional.of(candidate); + } + continue; + } if (best.isEmpty() || candidate.preferableTo(best.get())) { best = Optional.of(candidate); } } + if (best.isEmpty()) + best = bestDisregardingDiskLimit; return best; } } @@ -234,7 +247,7 @@ public class AllocatableClusterResources { boolean bestCase) { var systemLimits = new NodeResourceLimits(nodeRepository); var advertisedResources = nodeRepository.resourcesCalculator().realToRequest(wantedResources.nodeResources(), exclusive, bestCase); - advertisedResources = systemLimits.enlargeToLegal(advertisedResources, applicationId, clusterSpec, exclusive); // Ask for something legal + advertisedResources = systemLimits.enlargeToLegal(advertisedResources, applicationId, clusterSpec, exclusive, true); // Ask for something legal advertisedResources = applicationLimits.cap(advertisedResources); // Overrides other conditions, even if it will then fail var realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive, bestCase); // What we'll really get if ( ! systemLimits.isWithinRealLimits(realResources, applicationId, clusterSpec) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index b56e8d1b247..e586e6277d5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -5,6 +5,7 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.IntRange; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceLimits; import java.util.Optional; @@ -63,9 +64,8 @@ public class AllocationOptimizer { availableRealHostResources, nodeRepository); if (allocatableResources.isEmpty()) continue; - if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) { + if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) bestAllocation = allocatableResources; - } } } return bestAllocation; @@ -92,6 +92,7 @@ public class AllocationOptimizer { .multiply(clusterModel.loadWith(nodes, groups)) // redundancy aware adjustment with these counts .divide(clusterModel.redundancyAdjustment()) // correct for double redundancy adjustment .scaled(current.realResources().nodeResources()); + // Combine the scaled resource values computed here // with the currently configured non-scaled values, given in the limits, if any var nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index a7d5cc50828..795cbd59c4b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ClusterResources; -import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; @@ -10,7 +9,6 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling.Status; import java.time.Duration; -import java.time.Instant; import java.util.Optional; /** @@ -23,7 +21,9 @@ public class Autoscaler { /** What cost difference is worth a reallocation? */ private static final double costDifferenceWorthReallocation = 0.1; /** What resource difference is worth a reallocation? */ - private static final double resourceDifferenceWorthReallocation = 0.03; + private static final double resourceIncreaseWorthReallocation = 0.03; + /** The load increase headroom (as a fraction) we should have before needing to scale up, to decide to scale down */ + private static final double headroomRequiredToScaleDown = 0.1; private final NodeRepository nodeRepository; private final AllocationOptimizer allocationOptimizer; @@ -70,22 +70,53 @@ public class Autoscaler { if ( ! clusterModel.isStable(nodeRepository)) return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", clusterModel); - var currentAllocation = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository); - Optional<AllocatableClusterResources> bestAllocation = - allocationOptimizer.findBestAllocation(clusterModel.loadAdjustment(), currentAllocation, clusterModel, limits); - if (bestAllocation.isEmpty()) + var current = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository); + var loadAdjustment = clusterModel.loadAdjustment(); + + // Ensure we only scale down if we'll have enough headroom to not scale up again given a small load increase + var target = allocationOptimizer.findBestAllocation(loadAdjustment, current, clusterModel, limits); + var headroomAdjustedLoadAdjustment = adjustForHeadroom(loadAdjustment, clusterModel, target); + if ( ! headroomAdjustedLoadAdjustment.equals(loadAdjustment)) { + loadAdjustment = headroomAdjustedLoadAdjustment; + target = allocationOptimizer.findBestAllocation(loadAdjustment, current, clusterModel, limits); + } + + if (target.isEmpty()) return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", clusterModel); - if (! worthRescaling(currentAllocation.realResources(), bestAllocation.get().realResources())) { - if (bestAllocation.get().fulfilment() < 0.9999999) + if (! worthRescaling(current.realResources(), target.get().realResources())) { + if (target.get().fulfilment() < 0.9999999) return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", clusterModel); else if ( ! clusterModel.safeToScaleDown() && clusterModel.idealLoad().any(v -> v < 1.0)) return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", clusterModel); else - return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled (within limits)", clusterModel); + return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled (within configured limits)", clusterModel); } - return Autoscaling.scaleTo(bestAllocation.get().advertisedResources(), clusterModel); + return Autoscaling.scaleTo(target.get().advertisedResources(), clusterModel); + } + + /** + * When scaling down we may end up with resources that are just barely below the new ideal with the new number + * of nodes, as fewer nodes leads to a lower ideal load (due to redundancy). + * If that headroom is too small, then do not scale down as it will likely lead to scaling back up again soon. + */ + private Load adjustForHeadroom(Load loadAdjustment, ClusterModel clusterModel, + Optional<AllocatableClusterResources> target) { + if (target.isEmpty()) return loadAdjustment; + + // If we change to this target, what would our current peak be compared to the ideal + var relativeLoadWithTarget = + loadAdjustment // redundancy aware target relative to current load + .multiply(clusterModel.loadWith(target.get().nodes(), target.get().groups())) // redundancy aware adjustment with target + .divide(clusterModel.redundancyAdjustment()); // correct for double redundancy adjustment + if (loadAdjustment.cpu() < 1 && (1.0 - relativeLoadWithTarget.cpu()) < headroomRequiredToScaleDown) + loadAdjustment = loadAdjustment.withCpu(1.0); + if (loadAdjustment.memory() < 1 && (1.0 - relativeLoadWithTarget.memory()) < headroomRequiredToScaleDown) + loadAdjustment = loadAdjustment.withMemory(1.0); + if (loadAdjustment.disk() < 1 && (1.0 - relativeLoadWithTarget.disk()) < headroomRequiredToScaleDown) + loadAdjustment = loadAdjustment.withDisk(1.0); + return loadAdjustment; } /** Returns true if it is worthwhile to make the given resource change, false if it is too insignificant */ @@ -95,12 +126,14 @@ public class Autoscaler { if (meaningfulIncrease(from.totalResources().memoryGb(), to.totalResources().memoryGb())) return true; if (meaningfulIncrease(from.totalResources().diskGb(), to.totalResources().diskGb())) return true; - // Otherwise, only *decrease* if it reduces cost meaningfully + // Otherwise, only *decrease* if + // - cost is reduced meaningfully + // - the new resources won't be so much smaller that a small fluctuation in load will cause an increase return ! similar(from.cost(), to.cost(), costDifferenceWorthReallocation); } public static boolean meaningfulIncrease(double from, double to) { - return from < to && ! similar(from, to, resourceDifferenceWorthReallocation); + return from < to && ! similar(from, to, resourceIncreaseWorthReallocation); } private static boolean similar(double r1, double r2, double threshold) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index c75a5ca0b26..a5490996a2c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -168,7 +168,6 @@ public class ClusterModel { } public static Duration minScalingDuration(ClusterSpec clusterSpec) { - if (clusterSpec.isStateful()) return Duration.ofHours(6); return Duration.ofMinutes(5); } @@ -176,9 +175,9 @@ public class ClusterModel { * Returns the relative load adjustment accounting for redundancy given these nodes+groups * relative to node nodes+groups in this. */ - public Load loadWith(int trueNodes, int trueGroups) { - int nodes = nodesAdjustedForRedundancy(trueNodes, trueGroups); - int groups = groupsAdjustedForRedundancy(trueNodes, trueGroups); + public Load loadWith(int givenNodes, int givenGroups) { + int nodes = nodesAdjustedForRedundancy(givenNodes, givenGroups); + int groups = groupsAdjustedForRedundancy(givenNodes, givenGroups); if (clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content int groupSize = nodes / groups; @@ -273,7 +272,7 @@ public class ClusterModel { /** The number of nodes this cluster has, or will have if not deployed yet. */ // TODO: Make this the deployed, not current count - private int nodeCount() { + public int nodeCount() { if ( ! nodes.isEmpty()) return (int)nodes.not().retired().stream().count(); return cluster.minResources().nodes(); } @@ -290,12 +289,12 @@ public class ClusterModel { return (int)Math.ceil((double)nodeCount() / groupCount()); } - private int nodesAdjustedForRedundancy(int nodes, int groups) { + private static int nodesAdjustedForRedundancy(int nodes, int groups) { int groupSize = (int)Math.ceil((double)nodes / groups); return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes; } - private int groupsAdjustedForRedundancy(int nodes, int groups) { + private static int groupsAdjustedForRedundancy(int nodes, int groups) { return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups; } @@ -341,8 +340,7 @@ public class ClusterModel { /** Ideal cpu load must take the application traffic fraction into account. */ double idealLoad() { double queryCpuFraction = queryFraction(); - - // Assumptions: 1) Write load is not organic so we should not grow to handle more. + // Assumptions: 1) Write load is not organic so we should not increase to handle potential future growth. // (TODO: But allow applications to set their target write rate and size for that) // 2) Write load does not change in BCP scenarios. return queryCpuFraction * 1/growthRateHeadroom() * 1/trafficShiftHeadroom() * idealQueryCpuLoad + diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index a1eae42da38..91a10a1d08e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -80,16 +80,16 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer { if ( ! canDeployNow(application)) return; // redeployment is no longer needed log.log(Level.INFO, () -> application + " will be redeployed" + (reason == null || reason.isBlank() ? "" : " due to " + reason) + - ", last deploy time " + getLastDeployTime(application)); + ", last activated " + activationTime(application)); deployment.activate(); } finally { pendingDeployments.remove(application); } } - /** Returns the last time application was activated. Epoch is returned if the application has never been deployed. */ - protected final Instant getLastDeployTime(ApplicationId application) { - return deployer.lastDeployTime(application).orElse(Instant.EPOCH); + /** Returns the last time application was activated. Epoch is returned if the application has never been activated. */ + protected final Instant activationTime(ApplicationId application) { + return deployer.activationTime(application).orElse(Instant.EPOCH); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 856d6e07156..92f86325cf7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -7,6 +7,9 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.flags.BooleanFlag; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -34,6 +37,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { private final Autoscaler autoscaler; private final Deployer deployer; private final Metric metric; + private final BooleanFlag enabledFlag; public AutoscalingMaintainer(NodeRepository nodeRepository, Deployer deployer, @@ -43,6 +47,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { this.autoscaler = new Autoscaler(nodeRepository); this.deployer = deployer; this.metric = metric; + this.enabledFlag = PermanentFlags.AUTOSCALING.bindTo(nodeRepository.flagSource()); } @Override @@ -53,6 +58,9 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { int attempts = 0; int failures = 0; for (var applicationNodes : activeNodesByApplication().entrySet()) { + boolean enabled = enabledFlag.with(FetchVector.Dimension.APPLICATION_ID, + applicationNodes.getKey().serializedForm()).value(); + if (!enabled) continue; for (var clusterNodes : nodesByCluster(applicationNodes.getValue()).entrySet()) { attempts++; if ( ! autoscale(applicationNodes.getKey(), clusterNodes.getKey())) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java index fcfde53c0df..92062f13f1a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java @@ -3,29 +3,58 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.History; +import com.yahoo.vespa.hosted.provision.node.History.Event.Type; import java.time.Duration; +import java.time.Instant; +import java.util.ArrayDeque; +import java.util.Deque; import java.util.List; +import static java.util.Comparator.comparing; + /** - * This removes hosts from {@link com.yahoo.vespa.hosted.provision.Node.State#deprovisioned}, after a grace period. + * This removes hosts from {@link com.yahoo.vespa.hosted.provision.Node.State#deprovisioned}, in dynamically provisioned + * zones, after a grace period. * * @author mpolden */ public class DeprovisionedExpirer extends Expirer { + private static final int maxDeprovisionedNodes = 1000; + DeprovisionedExpirer(NodeRepository nodeRepository, Duration expiryTime, Metric metric) { super(Node.State.deprovisioned, History.Event.Type.deprovisioned, nodeRepository, expiryTime, metric); } @Override - protected void expire(List<Node> expired) { - if (!nodeRepository().zone().cloud().dynamicProvisioning()) return; // Never expire in statically provisioned zones - for (var node : expired) { - nodeRepository().nodes().forget(node); + protected boolean isExpired(Node node) { + return nodeRepository().zone().cloud().dynamicProvisioning() && super.isExpired(node); + } + + @Override + protected NodeList getExpiredNodes() { + List<Node> deprovisioned = nodeRepository().nodes().list(Node.State.deprovisioned) + .sortedBy(comparing(node -> node.history().event(Type.deprovisioned) + .map(History.Event::at) + .orElse(Instant.EPOCH))) + .asList(); + Deque<Node> expired = new ArrayDeque<>(deprovisioned); + int kept = 0; + while ( ! expired.isEmpty()) { + if (isExpired(expired.getLast()) || kept++ >= maxDeprovisionedNodes) break; // If we encounter an expired node, the rest are also expired. + expired.removeLast(); } + return NodeList.copyOf(List.copyOf(expired)); + } + + @Override + protected void expire(List<Node> expired) { + nodeRepository().nodes().performOn(NodeList.copyOf(expired), + (node, lock) -> { nodeRepository().nodes().forget(node); return node; }); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java index 8766dea3d61..e300591fbb2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java @@ -3,6 +3,8 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.Node.State; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.History; @@ -33,8 +35,12 @@ public class DirtyExpirer extends Expirer { @Override protected void expire(List<Node> expired) { - for (Node expiredNode : expired) - nodeRepository().nodes().fail(expiredNode.hostname(), wantToDeprovisionOnExpiry, Agent.DirtyExpirer, "Node is stuck in dirty"); + nodeRepository().nodes().performOn(NodeList.copyOf(expired), + node -> node.state() == State.dirty && isExpired(node), + (node, lock) -> nodeRepository().nodes().fail(node.hostname(), + wantToDeprovisionOnExpiry, + Agent.DirtyExpirer, + "Node is stuck in dirty")); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java index 6f2eb726e91..5baa4f63867 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java @@ -7,6 +7,7 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeMutex; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; +import com.yahoo.yolean.Exceptions; import java.time.Duration; import java.util.Optional; @@ -48,7 +49,8 @@ public class DiskReplacer extends NodeRepositoryMaintainer { } } catch (RuntimeException e) { failures++; - log.log(Level.WARNING, "Failed to rebuild " + host.hostname() + ", will retry in " + interval(), e); + log.log(Level.WARNING, "Failed to rebuild " + host.hostname() + ", will retry in " + + interval() + ": " + Exceptions.toMessageString(e)); } } return this.asSuccessFactorDeviation(nodes.size(), failures); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java index ffa125230a8..1b81a977019 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java @@ -68,15 +68,15 @@ public class ExpeditedChangeApplicationMaintainer extends ApplicationMaintainer /** Returns the reason for doing an expedited deploy. */ private Optional<String> hasNodesWithChanges(ApplicationId applicationId, NodeList nodes) { - Optional<Instant> lastDeployTime = deployer().lastDeployTime(applicationId); - if (lastDeployTime.isEmpty()) return Optional.empty(); + Optional<Instant> activationTime = deployer().activationTime(applicationId); + if (activationTime.isEmpty()) return Optional.empty(); List<String> reasons = nodes.stream() .flatMap(node -> node.history() .events() .stream() .filter(event -> expediteChangeBy(event.agent())) - .filter(event -> lastDeployTime.get().isBefore(event.at())) + .filter(event -> activationTime.get().isBefore(event.at())) .map(event -> event.type() + (event.agent() == Agent.system ? "" : " by " + event.agent()))) .sorted() .distinct() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java index a8929cf9d22..1684ebbb38f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java @@ -40,7 +40,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer { @Override protected double maintain() { - NodeList expired = nodeRepository().nodes().list(fromState).matching(this::isExpired); + NodeList expired = getExpiredNodes(); if ( ! expired.isEmpty()) { log.info(fromState + " expirer found " + expired.size() + " expired nodes: " + expired); @@ -51,6 +51,10 @@ public abstract class Expirer extends NodeRepositoryMaintainer { return 1.0; } + protected NodeList getExpiredNodes() { + return nodeRepository().nodes().list(fromState).matching(this::isExpired); + } + protected boolean isExpired(Node node) { return isExpired(node, expiryTime); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java index fa3f9435c70..4f2201adba0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java @@ -6,13 +6,14 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Zone; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeMutex; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; -import com.yahoo.vespa.hosted.provision.node.History; +import com.yahoo.vespa.hosted.provision.node.History.Event.Type; import java.time.Duration; -import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.function.Predicate; @@ -67,55 +68,47 @@ public class FailedExpirer extends NodeRepositoryMaintainer { @Override protected double maintain() { - NodeList allNodes = nodeRepository.nodes().list(); - List<Node> remainingNodes = new ArrayList<>(allNodes.state(Node.State.failed) - .nodeType(NodeType.tenant, NodeType.host) - .asList()); + Predicate<Node> isExpired = node -> node.state() == State.failed + && node.history().hasEventBefore(Type.failed, clock().instant().minus(expiryFor(node))); + NodeList allNodes = nodeRepository.nodes().list(); // Stale snapshot, not critical. - recycleIf(node -> node.allocation().isEmpty(), remainingNodes, allNodes); - recycleIf(node -> !node.allocation().get().membership().cluster().isStateful() && - node.history().hasEventBefore(History.Event.Type.failed, clock().instant().minus(statelessExpiry)), - remainingNodes, - allNodes); - recycleIf(node -> node.allocation().get().membership().cluster().isStateful() && - node.history().hasEventBefore(History.Event.Type.failed, clock().instant().minus(statefulExpiry)), - remainingNodes, - allNodes); + nodeRepository.nodes().performOn(allNodes.nodeType(NodeType.tenant), + isExpired, + (node, lock) -> recycle(node, List.of(), allNodes).get()); + + nodeRepository.nodes().performOnRecursively(allNodes.nodeType(NodeType.host).matching(isExpired), + nodes -> isExpired.test(nodes.parent().node()), + nodes -> recycle(nodes.parent().node(), + nodes.children().stream().map(NodeMutex::node).toList(), + allNodes) + .map(List::of).orElse(List.of())); return 1.0; } - /** Recycle the nodes matching condition, and remove those nodes from the nodes list. */ - private void recycleIf(Predicate<Node> condition, List<Node> failedNodes, NodeList allNodes) { - List<Node> nodesToRecycle = failedNodes.stream().filter(condition).toList(); - failedNodes.removeAll(nodesToRecycle); - recycle(nodesToRecycle, allNodes); + private Duration expiryFor(Node node) { + return node.allocation().isEmpty() ? Duration.ZERO + : node.allocation().get().membership().cluster().isStateful() ? statefulExpiry + : statelessExpiry; } - /** Move eligible nodes to dirty or parked. This may be a subset of the given nodes */ - private void recycle(List<Node> nodes, NodeList allNodes) { - List<Node> nodesToRecycle = new ArrayList<>(); - for (Node candidate : nodes) { - Optional<String> reason = shouldPark(candidate, allNodes); - if (reason.isPresent()) { - List<String> unparkedChildren = candidate.type().isHost() ? - allNodes.childrenOf(candidate) - .not() - .state(Node.State.parked) - .mapToList(Node::hostname) : - List.of(); - - if (unparkedChildren.isEmpty()) { - nodeRepository.nodes().park(candidate.hostname(), true, Agent.FailedExpirer, - "Parked by FailedExpirer due to " + reason.get()); - } else { - log.info(String.format("Expired failed node %s was not parked because of unparked children: %s", - candidate.hostname(), String.join(", ", unparkedChildren))); - } + private Optional<Node> recycle(Node node, List<Node> children, NodeList allNodes) { + Optional<String> reason = shouldPark(node, allNodes); + if (reason.isPresent()) { + List<String> unparkedChildren = children.stream() + .filter(child -> child.state() != Node.State.parked) + .map(Node::hostname) + .toList(); + if (unparkedChildren.isEmpty()) { + return Optional.of(nodeRepository.nodes().park(node.hostname(), true, Agent.FailedExpirer, + "Parked by FailedExpirer due to " + reason.get())); } else { - nodesToRecycle.add(candidate); + log.info(String.format("Expired failed node %s was not parked because of unparked children: %s", + node.hostname(), String.join(", ", unparkedChildren))); + return Optional.empty(); } + } else { + return Optional.of(nodeRepository.nodes().deallocate(node, Agent.FailedExpirer, "Expired by FailedExpirer")); } - nodeRepository.nodes().deallocate(nodesToRecycle, Agent.FailedExpirer, "Expired by FailedExpirer"); } /** Returns whether the node should be parked instead of recycled */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java index d70ee825860..8a9a29f58c6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java @@ -30,6 +30,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing import com.yahoo.vespa.hosted.provision.provisioning.NodeCandidate; import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer; import com.yahoo.vespa.hosted.provision.provisioning.NodeSpec; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningThrottler; import java.time.Duration; import java.time.Instant; @@ -57,6 +58,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { private final HostProvisioner hostProvisioner; private final ListFlag<ClusterCapacity> preprovisionCapacityFlag; + private final ProvisioningThrottler throttler; HostCapacityMaintainer(NodeRepository nodeRepository, Duration interval, @@ -66,6 +68,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { super(nodeRepository, interval, metric); this.hostProvisioner = hostProvisioner; this.preprovisionCapacityFlag = PermanentFlags.PREPROVISION_CAPACITY.bindTo(flagSource); + this.throttler = new ProvisioningThrottler(nodeRepository, metric); } @Override @@ -199,18 +202,27 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { throw new IllegalStateException("Have provisioned " + numProvisions + " times but there's still deficit: aborting"); } - nodesPlusProvisioned.addAll(provisionHosts(deficit.get().count(), toNodeResources(deficit.get()))); + ClusterCapacity clusterCapacityDeficit = deficit.get(); + var clusterType = Optional.ofNullable(clusterCapacityDeficit.clusterType()); + nodesPlusProvisioned.addAll(provisionHosts(clusterCapacityDeficit.count(), + toNodeResources(clusterCapacityDeficit), + clusterType.map(ClusterSpec.Type::from), + nodeList)); } } - private List<Node> provisionHosts(int count, NodeResources nodeResources) { + private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<ClusterSpec.Type> clusterType, NodeList allNodes) { try { + if (throttler.throttle(allNodes, Agent.HostCapacityMaintainer)) { + throw new NodeAllocationException("Host provisioning is being throttled", true); + } Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); List<Integer> provisionIndices = nodeRepository().database().readProvisionIndices(count); - List<Node> hosts = new ArrayList<>(); - HostProvisionRequest request = new HostProvisionRequest(provisionIndices, NodeType.host, nodeResources, ApplicationId.defaultId(), osVersion, - HostSharing.shared, Optional.empty(), Optional.empty(), + HostProvisionRequest request = new HostProvisionRequest(provisionIndices, NodeType.host, nodeResources, + ApplicationId.defaultId(), osVersion, + HostSharing.shared, clusterType, Optional.empty(), nodeRepository().zone().cloud().account(), false); + List<Node> hosts = new ArrayList<>(); hostProvisioner.provisionHosts(request, provisionedHosts -> { hosts.addAll(provisionedHosts.stream().map(host -> host.generateHost(Duration.ZERO)).toList()); @@ -256,18 +268,19 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { // We'll allocate each ClusterCapacity as a unique cluster in a dummy application ApplicationId applicationId = ApplicationId.defaultId(); ClusterSpec.Id clusterId = ClusterSpec.Id.from(String.valueOf(clusterIndex)); - ClusterSpec clusterSpec = ClusterSpec.request(ClusterSpec.Type.content, clusterId) + ClusterSpec.Type type = clusterCapacity.clusterType() != null + ? ClusterSpec.Type.from(clusterCapacity.clusterType()) + : ClusterSpec.Type.content; + ClusterSpec clusterSpec = ClusterSpec.request(type, clusterId) // build() requires a version, even though it is not (should not be) used .vespaVersion(Vtag.currentVersion) .build(); - NodeSpec nodeSpec = NodeSpec.from(clusterCapacity.count(), nodeResources, false, true, + NodeSpec nodeSpec = NodeSpec.from(clusterCapacity.count(), 1, nodeResources, false, true, nodeRepository().zone().cloud().account(), Duration.ZERO); - int wantedGroups = 1; - - NodePrioritizer prioritizer = new NodePrioritizer(allNodes, applicationId, clusterSpec, nodeSpec, wantedGroups, + NodePrioritizer prioritizer = new NodePrioritizer(allNodes, applicationId, clusterSpec, nodeSpec, true, nodeRepository().nameResolver(), nodeRepository().nodes(), nodeRepository().resourcesCalculator(), nodeRepository().spareCount(), nodeSpec.cloudAccount().isExclave(nodeRepository().zone())); - List<NodeCandidate> nodeCandidates = prioritizer.collect(List.of()); + List<NodeCandidate> nodeCandidates = prioritizer.collect(); MutableInteger index = new MutableInteger(0); return nodeCandidates .stream() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostResumeProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostResumeProvisioner.java index fe89ba17469..f78d717e010 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostResumeProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostResumeProvisioner.java @@ -7,6 +7,7 @@ import com.yahoo.jdisc.Metric; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeMutex; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; @@ -17,6 +18,8 @@ import com.yahoo.yolean.Exceptions; import javax.naming.NamingException; import java.time.Duration; +import java.util.ArrayList; +import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; @@ -77,7 +80,19 @@ public class HostResumeProvisioner extends NodeRepositoryMaintainer { if (hostIpConfig.isEmpty()) return; hostIpConfig.asMap().forEach((hostname, ipConfig) -> verifyDns(hostname, host.type(), host.cloudAccount(), ipConfig)); - nodeRepository().nodes().setIpConfig(hostIpConfig); + + nodeRepository().nodes().performOnRecursively(NodeList.of(host), __ -> true, nodes -> { + List<Node> updated = new ArrayList<>(); + for (NodeMutex mutex : nodes.children()) + updated.add(nodeRepository().nodes().write(mutex.node().with(hostIpConfig.require(mutex.node().hostname())), mutex)); + + updated.add(nodeRepository().nodes().write(nodes.parent().node() + .with(hostIpConfig.require(nodes.parent().node().hostname())) + .withExtraId(hostIpConfig.hostId()), + nodes.parent())); + + return updated; + }); } /** Verify DNS configuration of given node */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java index aa7aac34389..503ac4be86c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java @@ -3,6 +3,8 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.Node.State; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.History; @@ -40,9 +42,9 @@ public class InactiveExpirer extends Expirer { @Override protected void expire(List<Node> expired) { - expired.forEach(node -> { - nodeRepository.nodes().deallocate(node, Agent.InactiveExpirer, "Expired by InactiveExpirer"); - }); + nodeRepository.nodes().performOn(NodeList.copyOf(expired), + node -> node.state() == State.inactive && isExpired(node), + (node, lock) -> nodeRepository.nodes().deallocate(node, Agent.InactiveExpirer, "Expired by InactiveExpirer")); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index 3b846351b36..15913fec5ed 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -21,11 +21,13 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.ClusterId; +import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.hosted.provision.persistence.CacheStats; import com.yahoo.vespa.service.monitor.ServiceModel; import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.time.Duration; +import java.time.Instant; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -64,7 +66,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { @Override public double maintain() { // Sort by hostname to get deterministic metric reporting order (and hopefully avoid changes - // to metric reporting time so we get double reporting or no reporting within a minute) + // to metric reporting time, so we get double reporting or no reporting within a minute) NodeList nodes = nodeRepository().nodes().list().sortedBy(Comparator.comparing(Node::hostname)); ServiceModel serviceModel = serviceMonitor.getServiceModelSnapshot(); @@ -79,6 +81,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { updateRepairTicketMetrics(nodes); updateAllocationMetrics(nodes); updateClusterMetrics(nodes); + updateEmptyExclusiveHosts(nodes); return 1.0; } @@ -386,6 +389,19 @@ public class MetricsReporter extends NodeRepositoryMaintainer { .forEach((status, number) -> metric.set(ConfigServerMetrics.HOSTED_VESPA_BREAKFIXED_HOSTS.baseName(), number, getContext(Map.of("status", status)))); } + private void updateEmptyExclusiveHosts(NodeList nodes) { + Instant now = nodeRepository().clock().instant(); + Duration minActivePeriod = Duration.ofMinutes(10); + int emptyHosts = nodes.parents().state(State.active) + .matching(node -> (node.type() != NodeType.host && node.type().isHost()) || + node.exclusiveToApplicationId().isPresent()) + .matching(host -> host.history().hasEventBefore(History.Event.Type.activated, + now.minus(minActivePeriod))) + .matching(host -> nodes.childrenOf(host).state(State.active).isEmpty()) + .size(); + metric.set(ConfigServerMetrics.NODES_EMPTY_EXCLUSIVE.baseName(), emptyHosts, null); + } + static Map<String, String> dimensions(ApplicationId application, ClusterSpec.Id cluster) { Map<String, String> dimensions = new HashMap<>(dimensions(application)); dimensions.put("clusterid", cluster.value()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index 766bc688c62..585a7f341b5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -195,39 +195,48 @@ public class NodeFailer extends NodeRepositoryMaintainer { /** * Called when a node should be moved to the failed state: Do that if it seems safe, * which is when the node repo has available capacity to replace the node (and all its tenant nodes if host). - * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken. + * Otherwise, not replacing the node ensures (by Orchestrator check) that no further action will be taken. */ private void failActive(FailingNode failing) { Optional<Deployment> deployment = deployer.deployFromLocalActive(failing.node().allocation().get().owner(), Duration.ofMinutes(5)); if (deployment.isEmpty()) return; + boolean redeploy = false; // If the active node that we are trying to fail is of type host, we need to successfully fail all // the children nodes running on it before we fail the host. Failing a child node in a dynamically // provisioned zone may require provisioning new hosts that require the host application lock to be held, // so we must release ours before failing the children. - List<FailingNode> activeChildrenToFail = new ArrayList<>(); - boolean redeploy = false; - try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) { - // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail - if (!Objects.equals(failing.node().allocation().map(Allocation::owner), lock.node().allocation().map(Allocation::owner))) - return; - if (lock.node().state() == Node.State.failed) - return; - if (!Objects.equals(failing.node().state(), lock.node().state())) - return; - failing = new FailingNode(lock.node(), failing.reason); - - String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason(); - for (Node failingTenantNode : nodeRepository().nodes().list().childrenOf(failing.node())) { - if (failingTenantNode.state() == Node.State.active) { - activeChildrenToFail.add(new FailingNode(failingTenantNode, reasonForChildFailure)); - } else if (failingTenantNode.state() != Node.State.failed) { - nodeRepository().nodes().fail(failingTenantNode.hostname(), Agent.NodeFailer, reasonForChildFailure); + if (failing.node.type().isHost()) { + List<FailingNode> activeChildrenToFail = new ArrayList<>(); + try (var lock = nodeRepository().nodes().lockAndGetRecursively(failing.node.hostname(), Optional.empty())) { + failing = shouldFail(lock.parent().node(), failing); + if (failing == null) return; + + String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason(); + for (var failingTenantNode : lock.children()) { + if (failingTenantNode.node().state() == Node.State.active) { + activeChildrenToFail.add(new FailingNode(failingTenantNode.node(), reasonForChildFailure)); + } else if (failingTenantNode.node().state() != Node.State.failed) { + nodeRepository().nodes().fail(failingTenantNode.node().hostname(), Agent.NodeFailer, reasonForChildFailure); + } + } + + if (activeChildrenToFail.isEmpty()) { + log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason); + markWantToFail(failing.node(), true, lock.parent()); + redeploy = true; } } + // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned, + // so failActive() may take a long time to complete, but the remaining children should be fast. + activeChildrenToFail.forEach(this::failActive); + } + else { + try (var lock = nodeRepository().nodes().lockAndGetRequired(failing.node)) { + failing = shouldFail(lock.node(), failing); + if (failing == null) return; - if (activeChildrenToFail.isEmpty()) { log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason); markWantToFail(failing.node(), true, lock); redeploy = true; @@ -237,13 +246,19 @@ public class NodeFailer extends NodeRepositoryMaintainer { // Redeploy to replace failing node if (redeploy) { redeploy(deployment.get(), failing); - return; } + } - // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned, - // so failActive() may take a long time to complete, but the remaining children should be fast. - activeChildrenToFail.forEach(this::failActive); - + // Returns an updated FailingNode if we should still fail the node, otherwise null + private static FailingNode shouldFail(Node fresh, FailingNode stale) { + // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail + if (!Objects.equals(stale.node.allocation().map(Allocation::owner), fresh.allocation().map(Allocation::owner))) + return null; + if (fresh.state() == Node.State.failed) + return null; + if (!Objects.equals(stale.node.state(), fresh.state())) + return null; + return new FailingNode(fresh, stale.reason); } private void redeploy(Deployment deployment, FailingNode failing) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java index fcc8e904a47..36db8bff8e9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java @@ -90,9 +90,9 @@ public abstract class NodeMover<MOVE> extends NodeRepositoryMaintainer { private boolean deployedRecently(ApplicationId application) { Instant now = nodeRepository().clock().instant(); - return deployer.lastDeployTime(application) - .map(lastDeployTime -> lastDeployTime.isAfter(now.minus(waitTimeAfterPreviousDeployment))) - // We only know last deploy time for applications that were deployed on this config server, + return deployer.activationTime(application) + .map(lastActivatedTime -> lastActivatedTime.isAfter(now.minus(waitTimeAfterPreviousDeployment))) + // We only know last activated time for applications that were deployed on this config server, // the rest will be deployed on another config server .orElse(true); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index f6391a7d475..573298c142c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -128,6 +128,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { DefaultTimes(Zone zone, Deployer deployer) { boolean isCdZone = zone.system().isCd(); + boolean isProduction = zone.environment().isProduction(); + boolean isTest = zone.environment().isTest(); autoscalingInterval = Duration.ofMinutes(5); dynamicProvisionerInterval = Duration.ofMinutes(3); @@ -157,10 +159,10 @@ public class NodeRepositoryMaintenance extends AbstractComponent { throttlePolicy = NodeFailer.ThrottlePolicy.hosted; hostRetirerInterval = Duration.ofMinutes(30); hostFlavorUpgraderInterval = Duration.ofMinutes(30); - // CD (de)provisions hosts frequently. Expire deprovisioned ones earlier - deprovisionedExpiry = isCdZone ? Duration.ofDays(1) : Duration.ofDays(30); + // CD, test and staging (de)provisions hosts frequently. Expire deprovisioned ones earlier + deprovisionedExpiry = (isCdZone || isTest) ? Duration.ofDays(3) : Duration.ofDays(30); - if (zone.environment().isProduction() && ! isCdZone) { + if (isProduction && ! isCdZone) { inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy retiredInterval = Duration.ofMinutes(15); dirtyExpiry = Duration.ofHours(2); // enough time to clean the node diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java index 10a828c887a..28679b504aa 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java @@ -39,7 +39,7 @@ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { @Override protected boolean canDeployNow(ApplicationId application) { - return deployer().lastDeployTime(application) + return deployer().activationTime(application) // Don't deploy if a regular deploy just happened .map(lastDeployTime -> lastDeployTime.isBefore(nodeRepository().clock().instant().minus(minTimeBetweenRedeployments))) // We only know last deploy time for applications that were deployed on this config server, @@ -57,7 +57,7 @@ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { .map(node -> node.allocation().get().owner()) .distinct() .filter(this::canDeployNow) - .collect(Collectors.toMap(Function.identity(), this::getLastDeployTime)); + .collect(Collectors.toMap(Function.identity(), this::activationTime)); return deploymentTimes.entrySet().stream() .sorted(Map.Entry.comparingByValue()) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java index 8ef0f107eb0..98407c9d13c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java @@ -37,9 +37,9 @@ public class Rebalancer extends NodeMover<Rebalancer.Move> { protected double maintain() { if ( ! nodeRepository().nodes().isWorking()) return 0.0; - if ( ! nodeRepository().zone().cloud().allowHostSharing()) return 1.0; // Rebalancing not necessary - if (nodeRepository().zone().environment().isTest()) return 1.0; // Short lived deployments; no need to rebalance - if (nodeRepository().zone().system().isCd()) return 1.0; // CD tests assert on # of nodes, avoid rebalnacing as it make tests unstable + if ( ! nodeRepository().zone().cloud().allowHostSharing()) return 1.0; // Re-balancing not necessary + if (nodeRepository().zone().environment().isTest()) return 1.0; // Short-lived deployments; no need to rebalance + if (nodeRepository().zone().system().isCd()) return 1.0; // CD tests assert on # of nodes, avoid re-balancing as it make tests unstable // Work with an unlocked snapshot as this can take a long time and full consistency is not needed NodeList allNodes = nodeRepository().nodes().list(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirer.java index 6f06a2ac22e..2484f496ece 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirer.java @@ -25,6 +25,8 @@ public class ReservationExpirer extends Expirer { } @Override - protected void expire(List<Node> expired) { nodeRepository().nodes().deallocate(expired, Agent.ReservationExpirer, "Expired by ReservationExpirer"); } + protected void expire(List<Node> expired) { + nodeRepository().nodes().deallocate(expired, Agent.ReservationExpirer, "Expired by ReservationExpirer"); + } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java index 8e27e6d34a8..c34b357b758 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java @@ -113,7 +113,7 @@ public record IP() { * * @throws IllegalArgumentException if there are IP conflicts with existing nodes */ - public static List<Node> verify(List<Node> nodes, LockedNodeList allNodes) { + public static LockedNodeList verify(List<Node> nodes, LockedNodeList allNodes) { NodeList sortedNodes = allNodes.sortedBy(Comparator.comparing(Node::hostname)); for (var node : nodes) { for (var other : sortedNodes) { @@ -135,7 +135,7 @@ public record IP() { other.hostname()); } } - return nodes; + return allNodes.childList(nodes); } /** Returns whether IP address of existing node can be assigned to node */ @@ -152,7 +152,7 @@ public record IP() { } public static Node verify(Node node, LockedNodeList allNodes) { - return verify(List.of(node), allNodes).get(0); + return verify(List.of(node), allNodes).asList().get(0); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index deaf3054362..0bb045dc6a1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -1,7 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.node; -import com.yahoo.collections.ListMap; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationTransaction; @@ -10,6 +9,7 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Zone; +import com.yahoo.time.TimeBudget; import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.applicationmodel.HostName; @@ -17,13 +17,13 @@ import com.yahoo.vespa.applicationmodel.InfrastructureApplication; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.NoSuchNodeException; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeMutex; import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer; import com.yahoo.vespa.hosted.provision.node.filter.NodeFilter; import com.yahoo.vespa.hosted.provision.persistence.CuratorDb; -import com.yahoo.vespa.hosted.provision.provisioning.HostIpConfig; import com.yahoo.vespa.orchestrator.HostNameNotFoundException; import com.yahoo.vespa.orchestrator.Orchestrator; @@ -31,20 +31,26 @@ import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; import java.util.EnumSet; +import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.Objects; +import java.util.NavigableSet; import java.util.Optional; import java.util.Set; +import java.util.TreeSet; import java.util.function.BiFunction; +import java.util.function.Function; import java.util.function.Predicate; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.stream.Collectors; -import java.util.stream.Stream; +import static com.yahoo.collections.Iterables.reversed; import static com.yahoo.vespa.hosted.provision.restapi.NodePatcher.DROP_DOCUMENTS_REPORT; +import static java.util.Comparator.comparing; +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.joining; /** * The nodes in the node repo and their state transitions @@ -148,7 +154,7 @@ public class Nodes { if (existing.isPresent()) throw new IllegalStateException("Cannot add " + node + ": A node with this name already exists"); } - return db.addNodesInState(nodes.asList(), Node.State.reserved, Agent.system); + return db.addNodesInState(nodes, Node.State.reserved, Agent.system); } /** @@ -157,7 +163,7 @@ public class Nodes { * with the history of that node. */ public List<Node> addNodes(List<Node> nodes, Agent agent) { - try (Mutex lock = lockUnallocated()) { + try (Mutex allocationLock = lockUnallocated()) { List<Node> nodesToAdd = new ArrayList<>(); List<Node> nodesToRemove = new ArrayList<>(); for (int i = 0; i < nodes.size(); i++) { @@ -194,7 +200,7 @@ public class Nodes { } NestedTransaction transaction = new NestedTransaction(); db.removeNodes(nodesToRemove, transaction); - List<Node> resultingNodes = db.addNodesInState(IP.Config.verify(nodesToAdd, list(lock)), Node.State.provisioned, agent, transaction); + List<Node> resultingNodes = db.addNodesInState(IP.Config.verify(nodesToAdd, list(allocationLock)), Node.State.provisioned, agent, transaction); transaction.commit(); return resultingNodes; } @@ -218,7 +224,7 @@ public class Nodes { } /** Activate nodes. This method does <b>not</b> lock the node repository. */ - public List<Node> activate(List<Node> nodes, NestedTransaction transaction) { + public List<Node> activate(List<Node> nodes, ApplicationTransaction transaction) { return db.writeTo(Node.State.active, nodes, Agent.application, Optional.empty(), transaction); } @@ -229,8 +235,7 @@ public class Nodes { * @param reusable move the node directly to {@link Node.State#dirty} after removal */ public void setRemovable(NodeList nodes, boolean reusable) { - performOn(nodes, (node, mutex) -> write(node.with(node.allocation().get().removable(true, reusable)), - mutex)); + performOn(nodes, (node, mutex) -> write(node.with(node.allocation().get().removable(true, reusable)), mutex)); } /** @@ -239,7 +244,7 @@ public class Nodes { */ public List<Node> deactivate(List<Node> nodes, ApplicationTransaction transaction) { if ( ! zone.environment().isProduction() || zone.system().isCd()) - return deallocate(nodes, Agent.application, "Deactivated by application", transaction.nested()); + return deallocate(nodes, Agent.application, "Deactivated by application", transaction); NodeList nodeList = NodeList.copyOf(nodes); NodeList stateless = nodeList.stateless(); @@ -247,9 +252,9 @@ public class Nodes { NodeList statefulToInactive = stateful.not().reusable(); NodeList statefulToDirty = stateful.reusable(); List<Node> written = new ArrayList<>(); - written.addAll(deallocate(stateless.asList(), Agent.application, "Deactivated by application", transaction.nested())); - written.addAll(deallocate(statefulToDirty.asList(), Agent.application, "Deactivated by application (recycled)", transaction.nested())); - written.addAll(db.writeTo(Node.State.inactive, statefulToInactive.asList(), Agent.application, Optional.empty(), transaction.nested())); + written.addAll(deallocate(stateless.asList(), Agent.application, "Deactivated by application", transaction)); + written.addAll(deallocate(statefulToDirty.asList(), Agent.application, "Deactivated by application (recycled)", transaction)); + written.addAll(db.writeTo(Node.State.inactive, statefulToInactive.asList(), Agent.application, Optional.empty(), transaction)); return written; } @@ -258,21 +263,9 @@ public class Nodes { * transaction commits. */ public List<Node> fail(List<Node> nodes, ApplicationTransaction transaction) { - return fail(nodes, Agent.application, "Failed by application", transaction.nested()); - } - - public List<Node> fail(List<Node> nodes, Agent agent, String reason) { - NestedTransaction transaction = new NestedTransaction(); - nodes = fail(nodes, agent, reason, transaction); - transaction.commit(); - return nodes; - } - - private List<Node> fail(List<Node> nodes, Agent agent, String reason, NestedTransaction transaction) { - nodes = nodes.stream() - .map(n -> n.withWantToFail(false, agent, clock.instant())) - .toList(); - return db.writeTo(Node.State.failed, nodes, agent, Optional.of(reason), transaction); + return db.writeTo(Node.State.failed, + nodes.stream().map(n -> n.withWantToFail(false, Agent.application, clock.instant())).toList(), + Agent.application, Optional.of("Failed by application"), transaction); } /** Move nodes to the dirty state */ @@ -282,40 +275,48 @@ public class Nodes { public List<Node> deallocateRecursively(String hostname, Agent agent, String reason) { Node nodeToDirty = node(hostname).orElseThrow(() -> new NoSuchNodeException("Could not deallocate " + hostname + ": Node not found")); - - List<Node> nodesToDirty = - (nodeToDirty.type().isHost() ? - Stream.concat(list().childrenOf(hostname).asList().stream(), Stream.of(nodeToDirty)) : - Stream.of(nodeToDirty)).filter(node -> node.state() != Node.State.dirty).toList(); - List<String> hostnamesNotAllowedToDirty = nodesToDirty.stream() - .filter(node -> node.state() != Node.State.provisioned) - .filter(node -> node.state() != Node.State.failed) - .filter(node -> node.state() != Node.State.parked) - .filter(node -> node.state() != Node.State.breakfixed) - .map(Node::hostname).toList(); - if ( ! hostnamesNotAllowedToDirty.isEmpty()) - illegal("Could not deallocate " + nodeToDirty + ": " + - hostnamesNotAllowedToDirty + " are not in states [provisioned, failed, parked, breakfixed]"); - - return nodesToDirty.stream().map(node -> deallocate(node, agent, reason)).toList(); + List<Node> nodesToDirty = new ArrayList<>(); + try (RecursiveNodeMutexes locked = lockAndGetRecursively(hostname, Optional.empty())) { + for (NodeMutex child : locked.children()) + if (child.node().state() != Node.State.dirty) + nodesToDirty.add(child.node()); + + if (locked.parent().node().state() != State.dirty) + nodesToDirty.add(locked.parent().node()); + + List<String> hostnamesNotAllowedToDirty = nodesToDirty.stream() + .filter(node -> node.state() != Node.State.provisioned) + .filter(node -> node.state() != Node.State.failed) + .filter(node -> node.state() != Node.State.parked) + .filter(node -> node.state() != Node.State.breakfixed) + .map(Node::hostname).toList(); + if ( ! hostnamesNotAllowedToDirty.isEmpty()) + illegal("Could not deallocate " + nodeToDirty + ": " + + hostnamesNotAllowedToDirty + " are not in states [provisioned, failed, parked, breakfixed]"); + + return nodesToDirty.stream().map(node -> deallocate(node, agent, reason)).toList(); + } } /** - * Set a node dirty or parked, allowed if it is in the provisioned, inactive, failed or parked state. + * Set a node dirty or parked, allowed if it is in the provisioned, inactive, failed or parked state. * Use this to clean newly provisioned nodes or to recycle failed nodes which have been repaired or put on hold. */ public Node deallocate(Node node, Agent agent, String reason) { - NestedTransaction transaction = new NestedTransaction(); - Node deallocated = deallocate(node, agent, reason, transaction); - transaction.commit(); - return deallocated; + try (NodeMutex locked = lockAndGetRequired(node)) { + NestedTransaction transaction = new NestedTransaction(); + Node deallocated = deallocate(locked.node(), agent, reason, transaction); + transaction.commit(); + return deallocated; + } } - public List<Node> deallocate(List<Node> nodes, Agent agent, String reason, NestedTransaction transaction) { - return nodes.stream().map(node -> deallocate(node, agent, reason, transaction)).toList(); + public List<Node> deallocate(List<Node> nodes, Agent agent, String reason, ApplicationTransaction transaction) { + return nodes.stream().map(node -> deallocate(node, agent, reason, transaction.nested())).toList(); } - public Node deallocate(Node node, Agent agent, String reason, NestedTransaction transaction) { + // Be sure to hold the right lock! + private Node deallocate(Node node, Agent agent, String reason, NestedTransaction transaction) { if (parkOnDeallocationOf(node, agent)) { return park(node.hostname(), false, agent, reason, transaction); } else { @@ -339,7 +340,9 @@ public class Nodes { } public Node fail(String hostname, boolean forceDeprovision, Agent agent, String reason) { - return move(hostname, Node.State.failed, agent, forceDeprovision, Optional.of(reason)); + try (NodeMutex lock = lockAndGetRequired(hostname)) { + return move(hostname, Node.State.failed, agent, forceDeprovision, Optional.of(reason), lock); + } } /** @@ -350,14 +353,16 @@ public class Nodes { * @return all the nodes that were changed by this request */ public List<Node> failOrMarkRecursively(String hostname, Agent agent, String reason) { - NodeList children = list().childrenOf(hostname); - List<Node> changed = performOn(children, (node, lock) -> failOrMark(node, agent, reason, lock)); - - if (children.state(Node.State.active).isEmpty()) - changed.add(move(hostname, Node.State.failed, agent, false, Optional.of(reason))); - else - changed.addAll(performOn(NodeList.of(node(hostname).orElseThrow()), (node, lock) -> failOrMark(node, agent, reason, lock))); + List<Node> changed = new ArrayList<>(); + try (RecursiveNodeMutexes nodes = lockAndGetRecursively(hostname, Optional.empty())) { + for (NodeMutex child : nodes.children()) + changed.add(failOrMark(child.node(), agent, reason, child)); + if (changed.stream().noneMatch(child -> child.state() == Node.State.active)) + changed.add(move(hostname, Node.State.failed, agent, false, Optional.of(reason), nodes.parent())); + else + changed.add(failOrMark(nodes.parent().node(), agent, reason, nodes.parent())); + } return changed; } @@ -367,21 +372,10 @@ public class Nodes { write(node, lock); return node; } else { - return move(node.hostname(), Node.State.failed, agent, false, Optional.of(reason)); + return move(node.hostname(), Node.State.failed, agent, false, Optional.of(reason), lock); } } - /** Update IP config for nodes in given config */ - public void setIpConfig(HostIpConfig hostIpConfig) { - // Ideally this should hold the unallocated lock over the entire method, but unallocated lock must be taken - // after the application lock, making this impossible - Predicate<Node> nodeInConfig = (node) -> hostIpConfig.contains(node.hostname()); - performOn(nodeInConfig, (node, lock) -> { - IP.Config ipConfig = hostIpConfig.require(node.hostname()); - return write(node.with(ipConfig), lock); - }); - } - /** * Parks this node and returns it in its new state. * @@ -389,10 +383,12 @@ public class Nodes { * @throws NoSuchNodeException if the node is not found */ public Node park(String hostname, boolean forceDeprovision, Agent agent, String reason) { - NestedTransaction transaction = new NestedTransaction(); - Node parked = park(hostname, forceDeprovision, agent, reason, transaction); - transaction.commit(); - return parked; + try (NodeMutex locked = lockAndGetRequired(hostname)) { + NestedTransaction transaction = new NestedTransaction(); + Node parked = park(hostname, forceDeprovision, agent, reason, transaction); + transaction.commit(); + return parked; + } } private Node park(String hostname, boolean forceDeprovision, Agent agent, String reason, NestedTransaction transaction) { @@ -415,36 +411,38 @@ public class Nodes { * @throws NoSuchNodeException if the node is not found */ public Node reactivate(String hostname, Agent agent, String reason) { - return move(hostname, Node.State.active, agent, false, Optional.of(reason)); + try (NodeMutex lock = lockAndGetRequired(hostname)) { + return move(hostname, Node.State.active, agent, false, Optional.of(reason), lock); + } } /** * Moves a host to breakfixed state, removing any children. */ public List<Node> breakfixRecursively(String hostname, Agent agent, String reason) { - Node node = requireNode(hostname); - try (Mutex lock = lockUnallocated()) { - requireBreakfixable(node); + try (RecursiveNodeMutexes locked = lockAndGetRecursively(hostname, Optional.empty())) { + requireBreakfixable(locked.parent().node()); NestedTransaction transaction = new NestedTransaction(); - List<Node> removed = removeChildren(node, false, transaction); - removed.add(move(node.hostname(), Node.State.breakfixed, agent, false, Optional.of(reason), transaction)); + removeChildren(locked, false, transaction); + move(hostname, Node.State.breakfixed, agent, false, Optional.of(reason), transaction); transaction.commit(); - return removed; + return locked.nodes().nodes().stream().map(NodeMutex::node).toList(); } } private List<Node> moveRecursively(String hostname, Node.State toState, Agent agent, Optional<String> reason) { - NestedTransaction transaction = new NestedTransaction(); - List<Node> moved = list().childrenOf(hostname).asList().stream() - .map(child -> move(child.hostname(), toState, agent, false, reason, transaction)) - .collect(Collectors.toCollection(ArrayList::new)); - moved.add(move(hostname, toState, agent, false, reason, transaction)); - transaction.commit(); - return moved; + try (RecursiveNodeMutexes locked = lockAndGetRecursively(hostname, Optional.empty())) { + List<Node> moved = new ArrayList<>(); + NestedTransaction transaction = new NestedTransaction(); + for (NodeMutex node : locked.nodes().nodes()) + moved.add(move(node.node().hostname(), toState, agent, false, reason, transaction)); + transaction.commit(); + return moved; + } } /** Move a node to given state */ - private Node move(String hostname, Node.State toState, Agent agent, boolean forceDeprovision, Optional<String> reason) { + private Node move(String hostname, Node.State toState, Agent agent, boolean forceDeprovision, Optional<String> reason, Mutex lock) { NestedTransaction transaction = new NestedTransaction(); Node moved = move(hostname, toState, agent, forceDeprovision, reason, transaction); transaction.commit(); @@ -453,8 +451,7 @@ public class Nodes { /** Move a node to given state as part of a transaction */ private Node move(String hostname, Node.State toState, Agent agent, boolean forceDeprovision, Optional<String> reason, NestedTransaction transaction) { - // TODO: Work out a safe lock acquisition strategy for moves. Lock is only held while adding operations to - // transaction, but lock must also be held while committing + // TODO: This lock is already held here, but we still need to read the node. Perhaps change to requireNode(hostname) later. try (NodeMutex lock = lockAndGetRequired(hostname)) { Node node = lock.node(); if (toState == Node.State.active) { @@ -476,7 +473,7 @@ public class Nodes { .withPreferToRetire(false, agent, now); } if (forceDeprovision) - node = node.withWantToRetire(true, true, agent, now); + node = node.withWantToRetire(true, true, false, false, agent, now); if (toState == Node.State.deprovisioned) { node = node.with(IP.Config.EMPTY); } @@ -523,17 +520,18 @@ public class Nodes { } public List<Node> removeRecursively(Node node, boolean force) { - try (Mutex lock = lockUnallocated()) { - requireRemovable(node, false, force); + try (RecursiveNodeMutexes locked = lockAndGetRecursively(node.hostname(), Optional.empty())) { + requireRemovable(locked.parent().node(), false, force); NestedTransaction transaction = new NestedTransaction(); List<Node> removed; - if (!node.type().isHost()) { + if ( ! node.type().isHost()) { removed = List.of(node); db.removeNodes(removed, transaction); - } else { - removed = removeChildren(node, force, transaction); + } + else { + removeChildren(locked, force, transaction); move(node.hostname(), Node.State.deprovisioned, Agent.system, false, Optional.empty(), transaction); - removed.add(node); + removed = locked.nodes().nodes().stream().map(NodeMutex::node).toList(); } transaction.commit(); return removed; @@ -542,20 +540,22 @@ public class Nodes { /** Forgets a deprovisioned node. This removes all traces of the node in the node repository. */ public void forget(Node node) { - if (node.state() != Node.State.deprovisioned) - throw new IllegalArgumentException(node + " must be deprovisioned before it can be forgotten"); - if (node.status().wantToRebuild()) - throw new IllegalArgumentException(node + " is rebuilding and cannot be forgotten"); - NestedTransaction transaction = new NestedTransaction(); - db.removeNodes(List.of(node), transaction); - transaction.commit(); + try (NodeMutex locked = lockAndGetRequired(node.hostname())) { + if (node.state() != Node.State.deprovisioned) + throw new IllegalArgumentException(node + " must be deprovisioned before it can be forgotten"); + if (node.status().wantToRebuild()) + throw new IllegalArgumentException(node + " is rebuilding and cannot be forgotten"); + NestedTransaction transaction = new NestedTransaction(); + db.removeNodes(List.of(node), transaction); + transaction.commit(); + } } - private List<Node> removeChildren(Node node, boolean force, NestedTransaction transaction) { - List<Node> children = list().childrenOf(node).asList(); + private void removeChildren(RecursiveNodeMutexes nodes, boolean force, NestedTransaction transaction) { + if (nodes.children().isEmpty()) return; + List<Node> children = nodes.children().stream().map(NodeMutex::node).toList(); children.forEach(child -> requireRemovable(child, true, force)); db.removeNodes(children, transaction); - return new ArrayList<>(children); } /** @@ -717,8 +717,8 @@ public class Nodes { return db.writeTo(nodes, Agent.system, Optional.empty()); } - private List<Node> performOn(Predicate<Node> filter, BiFunction<Node, Mutex, Node> action) { - return performOn(list().matching(filter), action); + public List<Node> performOn(Predicate<Node> filter, BiFunction<Node, Mutex, Node> action) { + return performOn(list(), filter, action); } /** @@ -727,35 +727,33 @@ public class Nodes { * @param action the action to perform * @return the set of nodes on which the action was performed, as they became as a result of the operation */ - private List<Node> performOn(NodeList nodes, BiFunction<Node, Mutex, Node> action) { - List<Node> unallocatedNodes = new ArrayList<>(); - ListMap<ApplicationId, Node> allocatedNodes = new ListMap<>(); + public List<Node> performOn(NodeList nodes, BiFunction<Node, Mutex, Node> action) { + return performOn(nodes, __ -> true, action); + } - // Group matching nodes by the lock needed - for (Node node : nodes) { - Optional<ApplicationId> applicationId = applicationIdForLock(node); - if (applicationId.isPresent()) - allocatedNodes.put(applicationId.get(), node); - else - unallocatedNodes.add(node); - } + public List<Node> performOn(NodeList nodes, Predicate<Node> filter, BiFunction<Node, Mutex, Node> action) { + List<Node> resultingNodes = new ArrayList<>(); + nodes.matching(filter).stream().collect(groupingBy(Nodes::applicationIdForLock)) + .forEach((applicationId, nodeList) -> { // Grouped only to reduce number of lock acquire/release cycles. + try (NodeMutexes locked = lockAndGetAll(nodeList, Optional.empty())) { + for (NodeMutex node : locked.nodes()) + if (filter.test(node.node())) + resultingNodes.add(action.apply(node.node(), node)); + } + }); + return resultingNodes; + } + + public List<Node> performOnRecursively(NodeList parents, Predicate<RecursiveNodeMutexes> filter, Function<RecursiveNodeMutexes, List<Node>> action) { + for (Node node : parents) + if (node.parentHostname().isPresent()) + throw new IllegalArgumentException(node + " is not a parent host"); - // Perform operation while holding appropriate lock List<Node> resultingNodes = new ArrayList<>(); - try (Mutex lock = lockUnallocated()) { - for (Node node : unallocatedNodes) { - Optional<Node> currentNode = db.readNode(node.hostname()); // Re-read while holding lock - if (currentNode.isEmpty()) continue; - resultingNodes.add(action.apply(currentNode.get(), lock)); - } - } - for (Map.Entry<ApplicationId, List<Node>> applicationNodes : allocatedNodes.entrySet()) { - try (Mutex lock = applications.lock(applicationNodes.getKey())) { - for (Node node : applicationNodes.getValue()) { - Optional<Node> currentNode = db.readNode(node.hostname()); // Re-read while holding lock - if (currentNode.isEmpty()) continue; - resultingNodes.add(action.apply(currentNode.get(), lock)); - } + for (Node parent : parents) { + try (RecursiveNodeMutexes locked = lockAndGetRecursively(parent.hostname(), Optional.empty())) { + if (filter.test(locked)) + resultingNodes.addAll(action.apply(locked)); } } return resultingNodes; @@ -818,9 +816,7 @@ public class Nodes { return Optional.empty(); } - if (node.type() != NodeType.tenant || - Objects.equals(freshNode.get().allocation().map(Allocation::owner), - staleNode.allocation().map(Allocation::owner))) { + if (applicationIdForLock(freshNode.get()).equals(applicationIdForLock(staleNode))) { NodeMutex nodeMutex = new NodeMutex(freshNode.get(), lockToClose); lockToClose = null; return Optional.of(nodeMutex); @@ -881,6 +877,179 @@ public class Nodes { return node(hostname).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + hostname + "'")); } + /** + * Locks the children of the given node, the node itself, and finally takes the unallocated lock. + * <br> + * When taking multiple locks, it's crucial that we always take them in the same order, to avoid deadlocks. + * We want to take the most contended locks last, so that we don't block other operations for longer than necessary. + * This method does that, by first taking the locks for any children the given node may have, and then the node itself. + * (This is enforced by taking host locks after tenant node locks, in {@link #lockAndGetAll(Collection, Optional)}.) + * Finally, the allocation lock is taken, to ensure no new children are added while we hold this snapshot. + * Unfortunately, since that lock is taken last, we may detect new nodes after taking it, and then we have to retry. + * Closing the returned {@link RecursiveNodeMutexes} will release all the locks, and the locks should not be closed elsewhere. + */ + public RecursiveNodeMutexes lockAndGetRecursively(String hostname, Optional<Duration> timeout) { + TimeBudget budget = TimeBudget.fromNow(clock, timeout.orElse(Duration.ofMinutes(2))); + Set<Node> children = new HashSet<>(list().childrenOf(hostname).asList()); + Optional<Node> node = node(hostname); + + int attempts = 5; // We'll retry locking the whole list of children this many times, in case new children appear. + for (int attempt = 0; attempt < attempts; attempt++) { + NodeMutexes mutexes = null; + Mutex unallocatedLock = null; + try { + // First, we lock all the children, and the host; then we take the allocation lock to ensure our snapshot is valid. + List<Node> nodes = new ArrayList<>(children.size() + 1); + nodes.addAll(children); + node.ifPresent(nodes::add); + mutexes = lockAndGetAll(nodes, budget.timeLeftOrThrow()); + unallocatedLock = db.lockInactive(budget.timeLeftOrThrow().get()); + RecursiveNodeMutexes recursive = new RecursiveNodeMutexes(hostname, mutexes, unallocatedLock); + Set<Node> freshChildren = list().childrenOf(hostname).asSet(); + Optional<Node> freshNode = recursive.parent.map(NodeMutex::node); + if (children.equals(freshChildren) && node.equals(freshNode)) { + // No new nodes have appeared, and none will now, so we have a consistent snapshot. + if (node.isEmpty() && ! children.isEmpty()) + throw new IllegalStateException("node '" + hostname + "' was not found, but it has children: " + children); + + mutexes = null; + unallocatedLock = null; + return recursive; + } + else { + // New nodes have appeared, so we need to let go of the locks and try again with the new set of nodes. + children = freshChildren; + node = freshNode; + } + } + finally { + if (unallocatedLock != null) unallocatedLock.close(); + if (mutexes != null) mutexes.close(); + } + } + throw new IllegalStateException("giving up (after " + attempts + " attempts) fetching an up to " + + "date recursive node set under lock for node " + hostname); + } + + /** Locks all nodes in the given list, in a universal order, and returns the locks and nodes required. */ + public NodeMutexes lockAndRequireAll(Collection<Node> nodes, Optional<Duration> timeout) { + return lockAndGetAll(nodes, timeout, true); + } + + /** Locks all nodes in the given list, in a universal order, and returns the locks and nodes acquired. */ + public NodeMutexes lockAndGetAll(Collection<Node> nodes, Optional<Duration> timeout) { + return lockAndGetAll(nodes, timeout, false); + } + + /** Locks all nodes in the given list, in a universal order, and returns the locks and nodes. */ + private NodeMutexes lockAndGetAll(Collection<Node> nodes, Optional<Duration> timeout, boolean required) { + TimeBudget budget = TimeBudget.fromNow(clock, timeout.orElse(Duration.ofMinutes(2))); + Comparator<Node> universalOrder = (a, b) -> { + Optional<ApplicationId> idA = applicationIdForLock(a); + Optional<ApplicationId> idB = applicationIdForLock(b); + if (idA.isPresent() != idB.isPresent()) return idA.isPresent() ? -1 : 1; // Allocated nodes first. + if (a.type() != b.type()) return a.type().compareTo(b.type()); // Tenant nodes first among those. + if ( ! idA.equals(idB)) return idA.get().compareTo(idB.get()); // Sort primarily by tenant owner id. + return a.hostname().compareTo(b.hostname()); // Sort secondarily by hostname. + }; + NavigableSet<NodeMutex> locked = new TreeSet<>(comparing(NodeMutex::node, universalOrder)); + NavigableSet<Node> unlocked = new TreeSet<>(universalOrder); + unlocked.addAll(nodes); + try { + int attempts = 10; // We'll accept getting the wrong lock at most this many times before giving up. + for (int attempt = 0; attempt < attempts; ) { + if (unlocked.isEmpty()) { + NodeMutexes mutexes = new NodeMutexes(List.copyOf(locked)); + locked.clear(); + return mutexes; + } + + // If the first node is now earlier in lock order than some other locks we have, we need to close those and re-acquire them. + Node next = unlocked.pollFirst(); + Set<NodeMutex> outOfOrder = locked.tailSet(new NodeMutex(next, () -> { }), false); + NodeMutexes.close(outOfOrder); + for (NodeMutex node : outOfOrder) unlocked.add(node.node()); + outOfOrder.clear(); + + boolean nextLockSameAsPrevious = ! locked.isEmpty() && applicationIdForLock(locked.last().node()).equals(applicationIdForLock(next)); + Mutex lock = nextLockSameAsPrevious ? () -> { } : lock(next, budget.timeLeftOrThrow()); + try { + Optional<Node> fresh = node(next.hostname()); + if (fresh.isEmpty()) { + if (required) throw new NoSuchNodeException("No node with hostname '" + next.hostname() + "'"); + continue; // Node is gone; skip to close lock. + } + + if (applicationIdForLock(fresh.get()).equals(applicationIdForLock(next))) { + // We held the right lock, so this node is ours now. + locked.add(new NodeMutex(fresh.get(), lock)); + lock = null; + } + else { + // We held the wrong lock, and need to try again. + ++attempt; + unlocked.add(fresh.get()); + } + } + finally { + // If we didn't hold the right lock, we must close the wrong one before we continue. + if (lock != null) lock.close(); + } + } + throw new IllegalStateException("giving up (after " + attempts + " extra attempts) to lock nodes: " + + nodes.stream().map(Node::hostname).collect(joining(", "))); + } + finally { + // If we didn't manage to lock all nodes, we must close the ones we did lock before we throw. + NodeMutexes.close(locked); + } + } + + /** A node with their locks, acquired in a universal order. */ + public record NodeMutexes(List<NodeMutex> nodes) implements AutoCloseable { + @Override public void close() { close(nodes); } + private static void close(Collection<NodeMutex> nodes) { + RuntimeException thrown = null; + for (NodeMutex node : reversed(List.copyOf(nodes))) { + try { + node.close(); + } + catch (RuntimeException e) { + if (thrown == null) thrown = e; + else thrown.addSuppressed(e); + } + } + if (thrown != null) throw thrown; + } + } + + /** A parent node, all its children, their locks acquired in a universal order, and then the unallocated lock. */ + public static class RecursiveNodeMutexes implements AutoCloseable { + + private final String hostname; + private final NodeMutexes nodes; + private final Mutex unallocatedLock; + private final List<NodeMutex> children; + private final Optional<NodeMutex> parent; + + public RecursiveNodeMutexes(String hostname, NodeMutexes nodes, Mutex unallocatedLock) { + this.hostname = hostname; + this.nodes = nodes; + this.unallocatedLock = unallocatedLock; + this.children = nodes.nodes().stream().filter(node -> ! node.node().hostname().equals(hostname)).toList(); + this.parent = nodes.nodes().stream().filter(node -> node.node().hostname().equals(hostname)).findFirst(); + } + + /** Any children of the node. */ + public List<NodeMutex> children() { return children; } + /** The node itself, or throws if the node was not found. */ + public NodeMutex parent() { return parent.orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + hostname + "'")); } + /** Empty if the node was not found, or the node, and any children. */ + public NodeMutexes nodes() { return nodes; } + /** Closes the allocation lock, and all the node locks. */ + @Override public void close() { try (nodes; unallocatedLock) { } } + } + /** Returns the application ID that should be used for locking when modifying this node */ private static Optional<ApplicationId> applicationIdForLock(Node node) { return switch (node.type()) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java index 2e0e780182b..ffb9477c88e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java @@ -43,6 +43,11 @@ public class OsVersion { return wanted.isPresent() && !current.equals(wanted); } + /** Returns whether this node is downgrading its version */ + public boolean downgrading() { + return (wanted.isPresent() && current.isPresent()) && wanted.get().isBefore(current.get()); + } + /** Returns whether this is before the given version */ public boolean isBefore(Version version) { return current.isEmpty() || current.get().isBefore(version); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java index 23aa03a5315..4ee0774db8f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java @@ -12,8 +12,8 @@ import java.util.Optional; import java.util.logging.Logger; /** - * An upgrader that delegates the upgrade to the node itself, triggered by changing its wanted OS version. This - * implementation limits the number of parallel upgrades to avoid overloading the orchestrator with suspension requests. + * An upgrader that delegates the upgrade to the node itself, triggered by changing its wanted OS version. Downgrades + * are not supported. * * Used in clouds where nodes can upgrade themselves in-place, without data loss. * @@ -32,6 +32,8 @@ public class DelegatingOsUpgrader extends OsUpgrader { NodeList activeNodes = nodeRepository.nodes().list(Node.State.active).nodeType(target.nodeType()); Instant now = nodeRepository.clock().instant(); NodeList nodesToUpgrade = activeNodes.not().changingOsVersionTo(target.version()) + // This upgrader cannot downgrade nodes. We therefore consider only nodes + // on a lower version than the target .osVersionIsBefore(target.version()) .matching(node -> canUpgradeAt(now, node)) .byIncreasingOsVersion() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java index 5def863113c..f8becd31792 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java @@ -43,9 +43,10 @@ public abstract class OsUpgrader { return Math.max(0, max - upgrading); } - /** Returns whether node can upgrade at given instant */ + /** Returns whether node can change version at given instant */ final boolean canUpgradeAt(Instant instant, Node node) { - return node.history().age(instant).compareTo(gracePeriod()) > 0; + return node.status().osVersion().downgrading() || // Fast-track downgrades + node.history().age(instant).compareTo(gracePeriod()) > 0; } /** The duration this leaves new nodes alone before scheduling any upgrade */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java index e0affaae666..805793b41a4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java @@ -66,13 +66,13 @@ public class RebuildingOsUpgrader extends OsUpgrader { .statefulClusters()); // Rebuild hosts not containing stateful clusters with retiring nodes, up to rebuild limit - NodeList activeHosts = hostsOfTargetType.state(Node.State.active); - int rebuildLimit = upgradeSlots(target, activeHosts.rebuilding(softRebuild)); + NodeList hosts = hostsOfTargetType.state(Node.State.active, Node.State.provisioned); + int rebuildLimit = upgradeSlots(target, hosts.rebuilding(softRebuild)); List<Node> hostsToRebuild = new ArrayList<>(rebuildLimit); - NodeList candidates = activeHosts.not().rebuilding(softRebuild) - .osVersionIsBefore(target.version()) - .matching(node -> canUpgradeAt(now, node)) - .byIncreasingOsVersion(); + NodeList candidates = hosts.not().rebuilding(softRebuild) + .not().onOsVersion(target.version()) + .matching(node -> canUpgradeAt(now, node)) + .byIncreasingOsVersion(); for (Node host : candidates) { if (hostsToRebuild.size() == rebuildLimit) break; Set<ClusterId> clustersOnHost = activeNodes.childrenOf(host).statefulClusters(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java index de4915d60aa..ccb7f40b0de 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java @@ -47,16 +47,16 @@ public class RetiringOsUpgrader extends OsUpgrader { /** Returns nodes that are candidates for upgrade */ private NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) { - NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType()); + NodeList nodes = allNodes.state(Node.State.active, Node.State.provisioned).nodeType(target.nodeType()); if (softRebuild) { // Retire only hosts which do not have a replaceable root disk - activeNodes = activeNodes.not().replaceableRootDisk(); + nodes = nodes.not().replaceableRootDisk(); } - return activeNodes.not().deprovisioning() - .osVersionIsBefore(target.version()) - .matching(node -> canUpgradeAt(instant, node)) - .byIncreasingOsVersion() - .first(upgradeSlots(target, activeNodes.deprovisioning())); + return nodes.not().deprovisioning() + .not().onOsVersion(target.version()) + .matching(node -> canUpgradeAt(instant, node)) + .byIncreasingOsVersion() + .first(upgradeSlots(target, nodes.deprovisioning())); } /** Upgrade given host by retiring and deprovisioning it */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CachingCurator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CachingCurator.java index 9b2517cd421..f26703dc598 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CachingCurator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CachingCurator.java @@ -9,12 +9,14 @@ import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.curator.recipes.CuratorCounter; import com.yahoo.vespa.curator.transaction.CuratorTransaction; +import org.apache.zookeeper.data.Stat; import java.time.Duration; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalInt; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; @@ -145,8 +147,9 @@ public class CachingCurator { // The content of the map is immutable. private final Map<Path, List<String>> children = new ConcurrentHashMap<>(); private final Map<Path, Optional<byte[]>> data = new ConcurrentHashMap<>(); + private final Map<Path, Optional<Integer>> stats = new ConcurrentHashMap<>(); - private final AbstractCache.SimpleStatsCounter stats = new AbstractCache.SimpleStatsCounter(); + private final AbstractCache.SimpleStatsCounter statistics = new AbstractCache.SimpleStatsCounter(); /** Create an empty snapshot at a given generation (as an empty snapshot is a valid partial snapshot) */ private Cache(long generation, Curator curator) { @@ -164,19 +167,24 @@ public class CachingCurator { return get(data, path, () -> curator.getData(path)).map(data -> Arrays.copyOf(data, data.length)); } + @Override + public Optional<Integer> getStat(Path path) { + return get(stats, path, () -> curator.getStat(path).map(Stat::getVersion)); + } + private <T> T get(Map<Path, T> values, Path path, Supplier<T> loader) { return values.compute(path, (key, value) -> { if (value == null) { - stats.recordMisses(1); + statistics.recordMisses(1); return loader.get(); } - stats.recordHits(1); + statistics.recordHits(1); return value; }); } public CacheStats stats() { - var stats = this.stats.snapshot(); + var stats = this.statistics.snapshot(); return new CacheStats(stats.hitRate(), stats.evictionCount(), children.size() + data.size()); } @@ -193,6 +201,11 @@ public class CachingCurator { @Override public Optional<byte[]> getData(Path path) { return curator.getData(path); } + @Override + public Optional<Integer> getStat(Path path) { + return curator.getStat(path).map(Stat::getVersion); + } + } interface Session { @@ -207,6 +220,8 @@ public class CachingCurator { */ Optional<byte[]> getData(Path path); + Optional<Integer> getStat(Path path); + } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java index fc008b7b9dc..c388273b1a6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java @@ -2,6 +2,10 @@ package com.yahoo.vespa.hosted.provision.persistence; import ai.vespa.http.DomainName; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.util.concurrent.UncheckedExecutionException; +import com.yahoo.collections.Pair; import com.yahoo.component.Version; import com.yahoo.concurrent.UncheckedTimeoutException; import com.yahoo.config.provision.ApplicationId; @@ -18,6 +22,7 @@ import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.curator.recipes.CuratorCounter; import com.yahoo.vespa.curator.transaction.CuratorOperations; import com.yahoo.vespa.curator.transaction.CuratorTransaction; +import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.archive.ArchiveUris; @@ -37,6 +42,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; import java.util.TreeMap; +import java.util.concurrent.ExecutionException; import java.util.function.Function; import java.util.function.Predicate; import java.util.logging.Level; @@ -79,6 +85,9 @@ public class CuratorDb { private final Clock clock; private final CuratorCounter provisionIndexCounter; + /** Simple cache for deserialized node objects, based on their ZK node version. */ + private final Cache<Path, Pair<Integer, Node>> cachedNodes = CacheBuilder.newBuilder().recordStats().build(); + public CuratorDb(NodeFlavors flavors, Curator curator, Clock clock, boolean useCache, long nodeCacheSize) { this.nodeSerializer = new NodeSerializer(flavors, nodeCacheSize); this.db = new CachingCurator(curator, root, useCache); @@ -105,7 +114,7 @@ public class CuratorDb { } /** Adds a set of nodes. Rollbacks/fails transaction if any node is not in the expected state. */ - public List<Node> addNodesInState(List<Node> nodes, Node.State expectedState, Agent agent, NestedTransaction transaction) { + public List<Node> addNodesInState(LockedNodeList nodes, Node.State expectedState, Agent agent, NestedTransaction transaction) { CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); for (Node node : nodes) { if (node.state() != expectedState) @@ -116,10 +125,10 @@ public class CuratorDb { curatorTransaction.add(CuratorOperations.create(nodePath(node).getAbsolute(), serialized)); } transaction.onCommitted(() -> nodes.forEach(node -> log.log(Level.INFO, "Added " + node))); - return nodes; + return nodes.asList(); } - public List<Node> addNodesInState(List<Node> nodes, Node.State expectedState, Agent agent) { + public List<Node> addNodesInState(LockedNodeList nodes, Node.State expectedState, Agent agent) { NestedTransaction transaction = new NestedTransaction(); List<Node> writtenNodes = addNodesInState(nodes, expectedState, agent, transaction); transaction.commit(); @@ -175,6 +184,7 @@ public class CuratorDb { return writtenNodes; } } + public Node writeTo(Node.State toState, Node node, Agent agent, Optional<String> reason) { return writeTo(toState, Collections.singletonList(node), agent, reason).get(0); } @@ -192,6 +202,12 @@ public class CuratorDb { */ public List<Node> writeTo(Node.State toState, List<Node> nodes, Agent agent, Optional<String> reason, + ApplicationTransaction transaction) { + return writeTo(toState, nodes, agent, reason, transaction.nested()); + } + + public List<Node> writeTo(Node.State toState, List<Node> nodes, + Agent agent, Optional<String> reason, NestedTransaction transaction) { if (nodes.isEmpty()) return nodes; @@ -199,7 +215,7 @@ public class CuratorDb { CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); for (Node node : nodes) { - Node newNode = new Node(node.id(), node.ipConfig(), node.hostname(), + Node newNode = new Node(node.id(), node.extraId(), node.ipConfig(), node.hostname(), node.parentHostname(), node.flavor(), newNodeStatus(node, toState), toState, @@ -236,7 +252,17 @@ public class CuratorDb { } private Optional<Node> readNode(CachingCurator.Session session, String hostname) { - return session.getData(nodePath(hostname)).map(nodeSerializer::fromJson); + Path path = nodePath(hostname); + return session.getStat(path) + .map(stat -> { + Pair<Integer, Node> cached = cachedNodes.getIfPresent(path); + if (cached != null && cached.getFirst().equals(stat)) return cached.getSecond(); + cachedNodes.invalidate(path); + Optional<Node> node = session.getData(path).filter(data -> data.length > 0).map(nodeSerializer::fromJson); + if (node.isEmpty()) return null; + cachedNodes.put(path, new Pair<>(stat, node.get())); + return node.get(); + }); } /** Read node with given hostname, if any such node exists */ @@ -478,7 +504,8 @@ public class CuratorDb { } public CacheStats nodeSerializerCacheStats() { - return nodeSerializer.cacheStats(); + var stats = cachedNodes.stats(); + return new CacheStats(stats.hitRate(), stats.evictionCount(), cachedNodes.size()); } private <T> Optional<T> read(Path path, Function<byte[], T> mapper) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index f52ce5d690d..7e82ef55917 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -1,11 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.persistence; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; import com.google.common.collect.ImmutableSet; -import com.google.common.hash.Hashing; -import com.google.common.util.concurrent.UncheckedExecutionException; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; @@ -46,7 +42,6 @@ import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.Set; -import java.util.concurrent.ExecutionException; /** * Serializes a node to/from JSON. @@ -74,6 +69,7 @@ public class NodeSerializer { private static final String containersKey = "containers"; private static final String containerHostnameKey = "hostname"; private static final String idKey = "openStackId"; + private static final String extraIdKey = "extraId"; private static final String parentHostnameKey = "parentHostname"; private static final String historyKey = "history"; private static final String logKey = "log"; @@ -136,17 +132,10 @@ public class NodeSerializer { private static final String fingerprintKey = "fingerprint"; private static final String expiresKey = "expires"; - // A cache of deserialized Node objects. The cache is keyed on the hash of serialized node data. - // - // Deserializing a Node from slime is expensive, and happens frequently. Node instances that have already been - // deserialized are returned from this cache instead of being deserialized again. - private final Cache<Long, Node> cache; - // ---------------- Serialization ---------------------------------------------------- public NodeSerializer(NodeFlavors flavors, long cacheSize) { this.flavors = flavors; - this.cache = CacheBuilder.newBuilder().maximumSize(cacheSize).recordStats().build(); } public byte[] toJson(Node node) { @@ -160,12 +149,6 @@ public class NodeSerializer { } } - /** Returns cache statistics for this serializer */ - public CacheStats cacheStats() { - var stats = cache.stats(); - return new CacheStats(stats.hitRate(), stats.evictionCount(), cache.size()); - } - private void toSlime(Node node, Cursor object) { object.setString(hostnameKey, node.hostname()); object.setString(stateKey, toString(node.state())); @@ -173,6 +156,7 @@ public class NodeSerializer { toSlime(node.ipConfig().pool().asSet(), object.setArray(ipAddressPoolKey)); toSlime(node.ipConfig().pool().hostnames(), object); object.setString(idKey, node.id()); + node.extraId().ifPresent(id -> object.setString(extraIdKey, id)); node.parentHostname().ifPresent(hostname -> object.setString(parentHostnameKey, hostname)); toSlime(node.flavor(), object); object.setLong(rebootGenerationKey, node.status().reboot().wanted()); @@ -272,17 +256,13 @@ public class NodeSerializer { // ---------------- Deserialization -------------------------------------------------- public Node fromJson(byte[] data) { - long key = Hashing.sipHash24().newHasher().putBytes(data).hash().asLong(); - try { - return cache.get(key, () -> nodeFromSlime(SlimeUtils.jsonToSlime(data).get())); - } catch (ExecutionException e) { - throw new UncheckedExecutionException(e); - } + return nodeFromSlime(SlimeUtils.jsonToSlime(data).get()); } private Node nodeFromSlime(Inspector object) { Flavor flavor = flavorFromSlime(object); return new Node(object.field(idKey).asString(), + SlimeUtils.optionalString(object.field(extraIdKey)), IP.Config.of(ipAddressesFromSlime(object, ipAddressesKey), ipAddressesFromSlime(object, ipAddressPoolKey), hostnamesFromSlime(object)), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index caf936e8aeb..9adff9f9d7a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -14,7 +14,6 @@ import com.yahoo.vespa.hosted.provision.NodeMutex; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; -import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; @@ -71,8 +70,8 @@ class Activator { NodeList allNodes = nodeRepository.nodes().list(); NodeList applicationNodes = allNodes.owner(application); - NodeList reserved = updatePortsFrom(hosts, applicationNodes.state(Node.State.reserved) - .matching(node -> hostnames.contains(node.hostname()))); + NodeList reserved = applicationNodes.state(Node.State.reserved).matching(node -> hostnames.contains(node.hostname())); + reserved = updatePortsFrom(hosts, reserved); nodeRepository.nodes().reserve(reserved.asList()); // Re-reserve nodes to avoid reservation expiry NodeList oldActive = applicationNodes.state(Node.State.active); // All nodes active now @@ -88,7 +87,7 @@ class Activator { NodeList activeToRemove = oldActive.matching(node -> ! hostnames.contains(node.hostname())); remove(activeToRemove, transaction); // TODO: Pass activation time in this call and next line - nodeRepository.nodes().activate(newActive.asList(), transaction.nested()); // activate also continued active to update node state + nodeRepository.nodes().activate(newActive.asList(), transaction); // activate also continued active to update node state rememberResourceChange(transaction, generation, activationTime, oldActive.not().retired(), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java index fc2873318d9..8a39f309935 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java @@ -9,11 +9,11 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; -import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.flags.StringFlag; import com.yahoo.vespa.hosted.provision.NodeRepository; + import java.util.Map; import java.util.TreeMap; @@ -98,9 +98,10 @@ public class CapacityPolicies { Architecture architecture = adminClusterArchitecture(applicationId); if (nodeRepository.exclusiveAllocation(clusterSpec)) { - var resources = smallestExclusiveResources(); + var resources = legacySmallestExclusiveResources(); //TODO: use 8Gb as default when no apps are using 4Gb return versioned(clusterSpec, Map.of(new Version(0), resources, - new Version(8, 182, 12), resources.with(architecture))); + new Version(8, 182, 12), resources.with(architecture), + new Version(8, 187), smallestExclusiveResources().with(architecture))); } if (clusterSpec.id().value().equals("cluster-controllers")) { @@ -114,10 +115,6 @@ public class CapacityPolicies { return versioned(clusterSpec, Map.of(new Version(0), smallestSharedResources())).with(architecture); } - if (zone.environment() == Environment.dev && zone.system() == SystemName.cd) { - return versioned(clusterSpec, Map.of(new Version(0), new NodeResources(1.5, 4, 50, 0.3))); - } - if (clusterSpec.type() == ClusterSpec.Type.content) { return zone.cloud().dynamicProvisioning() ? versioned(clusterSpec, Map.of(new Version(0), new NodeResources(2, 16, 300, 0.3))) @@ -162,10 +159,17 @@ public class CapacityPolicies { } // The lowest amount of resources that can be exclusive allocated (i.e. a matching host flavor for this exists) + private NodeResources legacySmallestExclusiveResources() { + return (zone.cloud().name().equals(CloudName.GCP)) + ? new NodeResources(1, 4, 50, 0.3) + : new NodeResources(0.5, 4, 50, 0.3); + } + + // The lowest amount of resources that can be exclusive allocated (i.e. a matching host flavor for this exists) private NodeResources smallestExclusiveResources() { return (zone.cloud().name().equals(CloudName.GCP)) - ? new NodeResources(1, 4, 50, 0.3) - : new NodeResources(0.5, 4, 50, 0.3); + ? new NodeResources(2, 8, 50, 0.3) + : new NodeResources(0.5, 8, 50, 0.3); } // The lowest amount of resources that can be shared (i.e. a matching host flavor for this exists) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupAssigner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupAssigner.java new file mode 100644 index 00000000000..542f2c00f95 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupAssigner.java @@ -0,0 +1,163 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.provisioning; + +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Flavor; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.node.Agent; + +import java.time.Clock; +import java.util.Collection; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; + +/** + * Knows how to assign a group index to a number of nodes (some of which have an index already), + * such that the nodes are placed in the desired groups with minimal group movement. + * + * @author bratseth + */ +class GroupAssigner { + + private final NodeSpec requested; + private final NodeList allNodes; + private final Clock clock; + + GroupAssigner(NodeSpec requested, NodeList allNodes, Clock clock) { + if (requested.groups() > 1 && requested.count().isEmpty()) + throw new IllegalArgumentException("Unlimited nodes cannot be grouped"); + this.requested = requested; + this.allNodes = allNodes; + this.clock = clock; + } + + Collection<NodeCandidate> assignTo(Collection<NodeCandidate> candidates) { + int[] countInGroup = countInEachGroup(candidates); + candidates = byUnretiringPriority(candidates).stream().map(node -> unretireNodeInExpandedGroup(node, countInGroup)).toList(); + candidates = candidates.stream().map(node -> assignGroupToNewNode(node, countInGroup)).toList(); + candidates = byUnretiringPriority(candidates).stream().map(node -> moveNodeInSurplusGroup(node, countInGroup)).toList(); + candidates = byRetiringPriority(candidates).stream().map(node -> retireSurplusNodeInGroup(node, countInGroup)).toList(); + candidates = candidates.stream().filter(node -> ! shouldRemove(node)).toList(); + return candidates; + } + + /** Prefer to retire nodes we want the least */ + private List<NodeCandidate> byRetiringPriority(Collection<NodeCandidate> candidates) { + return candidates.stream().sorted(Comparator.reverseOrder()).toList(); + } + + /** Prefer to unretire nodes we don't want to retire, and otherwise those with lower index */ + private List<NodeCandidate> byUnretiringPriority(Collection<NodeCandidate> candidates) { + return candidates.stream() + .sorted(Comparator.comparing(NodeCandidate::wantToRetire) + .thenComparing(n -> n.allocation().get().membership().index())) + .toList(); + } + + private int[] countInEachGroup(Collection<NodeCandidate> candidates) { + int[] countInGroup = new int[requested.groups()]; + for (var candidate : candidates) { + if (candidate.allocation().get().membership().retired()) continue; + var currentGroup = candidate.allocation().get().membership().cluster().group(); + if (currentGroup.isEmpty()) continue; + if (currentGroup.get().index() >= requested.groups()) continue; + countInGroup[currentGroup.get().index()]++; + } + return countInGroup; + } + + /** Assign a group to new or to be reactivated nodes. */ + private NodeCandidate assignGroupToNewNode(NodeCandidate candidate, int[] countInGroup) { + if (candidate.state() == Node.State.active && candidate.allocation().get().membership().retired()) return candidate; + if (candidate.state() == Node.State.active && candidate.allocation().get().membership().cluster().group().isPresent()) return candidate; + return inFirstGroupWithDeficiency(candidate, countInGroup); + } + + private NodeCandidate moveNodeInSurplusGroup(NodeCandidate candidate, int[] countInGroup) { + var currentGroup = candidate.allocation().get().membership().cluster().group(); + if (currentGroup.isEmpty()) return candidate; + if (currentGroup.get().index() < requested.groups()) return candidate; + return inFirstGroupWithDeficiency(candidate, countInGroup); + } + + private NodeCandidate retireSurplusNodeInGroup(NodeCandidate candidate, int[] countInGroup) { + if (candidate.allocation().get().membership().retired()) return candidate; + var currentGroup = candidate.allocation().get().membership().cluster().group(); + if (currentGroup.isEmpty()) return candidate; + if (currentGroup.get().index() >= requested.groups()) return candidate; + if (requested.count().isEmpty()) return candidate; // Can't retire + if (countInGroup[currentGroup.get().index()] <= requested.groupSize()) return candidate; + countInGroup[currentGroup.get().index()]--; + return candidate.withNode(candidate.toNode().retire(Agent.application, clock.instant())); + } + + /** Unretire nodes that are already in the correct group when the group is deficient. */ + private NodeCandidate unretireNodeInExpandedGroup(NodeCandidate candidate, int[] countInGroup) { + if ( ! candidate.allocation().get().membership().retired()) return candidate; + var currentGroup = candidate.allocation().get().membership().cluster().group(); + if (currentGroup.isEmpty()) return candidate; + if (currentGroup.get().index() >= requested.groups()) return candidate; + if (candidate.preferToRetire() || candidate.wantToRetire()) return candidate; + if (requested.count().isPresent() && countInGroup[currentGroup.get().index()] >= requested.groupSize()) return candidate; + candidate = unretire(candidate); + if (candidate.allocation().get().membership().retired()) return candidate; + countInGroup[currentGroup.get().index()]++; + return candidate; + } + + private NodeCandidate inFirstGroupWithDeficiency(NodeCandidate candidate, int[] countInGroup) { + for (int group = 0; group < requested.groups(); group++) { + if (requested.count().isEmpty() || countInGroup[group] < requested.groupSize()) { + return inGroup(group, candidate, countInGroup); + } + } + return candidate; + } + + private boolean shouldRemove(NodeCandidate candidate) { + var currentGroup = candidate.allocation().get().membership().cluster().group(); + if (currentGroup.isEmpty()) return true; // new and not assigned an index: Not needed + return currentGroup.get().index() >= requested.groups(); + } + + private NodeCandidate inGroup(int group, NodeCandidate candidate, int[] countInGroup) { + candidate = unretire(candidate); + if (candidate.allocation().get().membership().retired()) return candidate; + var membership = candidate.allocation().get().membership(); + var currentGroup = membership.cluster().group(); + countInGroup[group]++; + if ( ! currentGroup.isEmpty() && currentGroup.get().index() < requested.groups()) + countInGroup[membership.cluster().group().get().index()]--; + return candidate.withNode(candidate.toNode().with(candidate.allocation().get().with(membership.with(membership.cluster().with(Optional.of(ClusterSpec.Group.from(group))))))); + } + + /** Attempt to unretire the given node if it is retired. */ + private NodeCandidate unretire(NodeCandidate candidate) { + if (candidate.retiredNow()) return candidate; + if ( ! candidate.allocation().get().membership().retired()) return candidate; + if ( ! hasCompatibleResources(candidate) ) return candidate; + var parent = candidate.parentHostname().flatMap(hostname -> allNodes.node(hostname)); + if (parent.isPresent() && (parent.get().status().wantToRetire() || parent.get().status().preferToRetire())) return candidate; + candidate = candidate.withNode(); + if ( ! requested.isCompatible(candidate.resources())) + candidate = candidate.withNode(resize(candidate.toNode())); + return candidate.withNode(candidate.toNode().unretire()); + } + + private Node resize(Node node) { + NodeResources hostResources = allNodes.parentOf(node).get().flavor().resources(); + return node.with(new Flavor(requested.resources().get() + .with(hostResources.diskSpeed()) + .with(hostResources.storageType()) + .with(hostResources.architecture())), + Agent.application, clock.instant()); + } + + private boolean hasCompatibleResources(NodeCandidate candidate) { + return requested.isCompatible(candidate.resources()) || candidate.isResizable; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java deleted file mode 100644 index 0c4838abe4d..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.provisioning; - -import com.yahoo.component.Version; -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.NodeAllocationException; -import com.yahoo.config.provision.NodeResources; -import com.yahoo.config.provision.NodeType; -import com.yahoo.transaction.Mutex; -import com.yahoo.vespa.hosted.provision.LockedNodeList; -import com.yahoo.vespa.hosted.provision.Node; -import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.node.Agent; -import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing; - -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.function.Consumer; -import java.util.function.Supplier; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Performs preparation of node activation changes for a single host group in an application. - * - * @author bratseth - */ -public class GroupPreparer { - - private static final Mutex PROBE_LOCK = () -> {}; - private static final Logger log = Logger.getLogger(GroupPreparer.class.getName()); - - private final NodeRepository nodeRepository; - private final Optional<HostProvisioner> hostProvisioner; - - /** Contains list of prepared nodes and the {@link LockedNodeList} object to use for next prepare call */ - record PrepareResult(List<Node> prepared, LockedNodeList allNodes) {} - - public GroupPreparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner) { - this.nodeRepository = nodeRepository; - this.hostProvisioner = hostProvisioner; - } - - /** - * Ensure sufficient nodes are reserved or active for the given application, group and cluster - * - * @param application the application we are allocating to - * @param cluster the cluster and group we are allocating to - * @param requestedNodes a specification of the requested nodes - * @param surplusActiveNodes currently active nodes which are available to be assigned to this group. - * This method will remove from this list if it finds it needs additional nodes - * @param indices the next available node indices for this cluster. - * This method will consume these when it allocates new nodes to the cluster. - * @param allNodes list of all nodes and hosts. Use {@link #createUnlockedNodeList()} to create param for - * first invocation. Then use previous {@link PrepareResult#allNodes()} for the following. - * @return the list of nodes this cluster group will have allocated if activated, and - */ - // Note: This operation may make persisted changes to the set of reserved and inactive nodes, - // but it may not change the set of active nodes, as the active nodes must stay in sync with the - // active config model which is changed on activate - public PrepareResult prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - List<Node> surplusActiveNodes, NodeIndices indices, int wantedGroups, - LockedNodeList allNodes) { - log.log(Level.FINE, () -> "Preparing " + cluster.type().name() + " " + cluster.id() + " with requested resources " + - requestedNodes.resources().orElse(NodeResources.unspecified())); - // Try preparing in memory without global unallocated lock. Most of the time there should be no changes, - // and we can return nodes previously allocated. - NodeAllocation probeAllocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, - indices::probeNext, wantedGroups, allNodes); - if (probeAllocation.fulfilledAndNoChanges()) { - List<Node> acceptedNodes = probeAllocation.finalNodes(); - surplusActiveNodes.removeAll(acceptedNodes); - indices.commitProbe(); - return new PrepareResult(acceptedNodes, allNodes); - } else { - // There were some changes, so re-do the allocation with locks - indices.resetProbe(); - List<Node> prepared = prepareWithLocks(application, cluster, requestedNodes, surplusActiveNodes, indices, wantedGroups); - return new PrepareResult(prepared, createUnlockedNodeList()); - } - } - - // Use this to create allNodes param to prepare method for first invocation of prepare - LockedNodeList createUnlockedNodeList() { return nodeRepository.nodes().list(PROBE_LOCK); } - - /// Note that this will write to the node repo. - private List<Node> prepareWithLocks(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - List<Node> surplusActiveNodes, NodeIndices indices, int wantedGroups) { - try (Mutex lock = nodeRepository.applications().lock(application); - Mutex allocationLock = nodeRepository.nodes().lockUnallocated()) { - LockedNodeList allNodes = nodeRepository.nodes().list(allocationLock); - NodeAllocation allocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, - indices::next, wantedGroups, allNodes); - NodeType hostType = allocation.nodeType().hostType(); - if (canProvisionDynamically(hostType) && allocation.hostDeficit().isPresent()) { - HostSharing sharing = hostSharing(cluster, hostType); - Version osVersion = nodeRepository.osVersions().targetFor(hostType).orElse(Version.emptyVersion); - NodeAllocation.HostDeficit deficit = allocation.hostDeficit().get(); - List<Node> hosts = new ArrayList<>(); - Consumer<List<ProvisionedHost>> whenProvisioned = provisionedHosts -> { - hosts.addAll(provisionedHosts.stream().map(host -> host.generateHost(requestedNodes.hostTTL())).toList()); - nodeRepository.nodes().addNodes(hosts, Agent.application); - - // Offer the nodes on the newly provisioned hosts, this should be enough to cover the deficit - List<NodeCandidate> candidates = provisionedHosts.stream() - .map(host -> NodeCandidate.createNewExclusiveChild(host.generateNode(), - host.generateHost(requestedNodes.hostTTL()))) - .toList(); - allocation.offer(candidates); - }; - try { - HostProvisionRequest request = new HostProvisionRequest(allocation.provisionIndices(deficit.count()), - hostType, - deficit.resources(), - application, - osVersion, - sharing, - Optional.of(cluster.type()), - Optional.of(cluster.id()), - requestedNodes.cloudAccount(), - deficit.dueToFlavorUpgrade()); - hostProvisioner.get().provisionHosts(request, whenProvisioned); - } catch (NodeAllocationException e) { - // Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do - // not exist, we cannot remove them from ZK here because other nodes may already have been - // allocated on them, so let HostDeprovisioner deal with it - hosts.forEach(host -> nodeRepository.nodes().deprovision(host.hostname(), Agent.system, nodeRepository.clock().instant())); - throw e; - } - } else if (allocation.hostDeficit().isPresent() && requestedNodes.canFail() && - allocation.hasRetiredJustNow() && requestedNodes instanceof NodeSpec.CountNodeSpec cns) { - // Non-dynamically provisioned zone with a deficit because we just now retired some nodes. - // Try again, but without retiring - indices.resetProbe(); - List<Node> accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), surplusActiveNodes, indices, wantedGroups); - log.warning("Prepared " + application + " " + cluster.id() + " without retirement due to lack of capacity"); - return accepted; - } - - if (! allocation.fulfilled() && requestedNodes.canFail()) - throw new NodeAllocationException((cluster.group().isPresent() ? "Node allocation failure on " + cluster.group().get() - : "") + allocation.allocationFailureDetails(), - true); - - // Carry out and return allocation - nodeRepository.nodes().reserve(allocation.reservableNodes()); - nodeRepository.nodes().addReservedNodes(new LockedNodeList(allocation.newNodes(), allocationLock)); - List<Node> acceptedNodes = allocation.finalNodes(); - surplusActiveNodes.removeAll(acceptedNodes); - return acceptedNodes; - } - } - - private NodeAllocation prepareAllocation(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - List<Node> surplusActiveNodes, Supplier<Integer> nextIndex, int wantedGroups, - LockedNodeList allNodes) { - - NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requestedNodes, nextIndex, nodeRepository); - NodePrioritizer prioritizer = new NodePrioritizer(allNodes, - application, - cluster, - requestedNodes, - wantedGroups, - nodeRepository.zone().cloud().dynamicProvisioning(), - nodeRepository.nameResolver(), - nodeRepository.nodes(), - nodeRepository.resourcesCalculator(), - nodeRepository.spareCount(), - requestedNodes.cloudAccount().isExclave(nodeRepository.zone())); - allocation.offer(prioritizer.collect(surplusActiveNodes)); - return allocation; - } - - private boolean canProvisionDynamically(NodeType hostType) { - return nodeRepository.zone().cloud().dynamicProvisioning() && - (hostType == NodeType.host || hostType.isConfigServerHostLike()); - } - - private HostSharing hostSharing(ClusterSpec cluster, NodeType hostType) { - if ( hostType.isSharable()) - return nodeRepository.exclusiveAllocation(cluster) ? HostSharing.exclusive : HostSharing.any; - else - return HostSharing.any; - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostIpConfig.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostIpConfig.java index 3c28d9d1196..225f4827a18 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostIpConfig.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostIpConfig.java @@ -5,18 +5,20 @@ import com.yahoo.vespa.hosted.provision.node.IP; import java.util.Map; import java.util.Objects; +import java.util.Optional; /** - * IP config of a host and its children. + * IP config of a host and its children, and an optional extra host ID. * * @author mpolden */ -public record HostIpConfig(Map<String, IP.Config> ipConfigByHostname) { +public record HostIpConfig(Map<String, IP.Config> ipConfigByHostname, Optional<String> hostId) { - public static final HostIpConfig EMPTY = new HostIpConfig(Map.of()); + public static final HostIpConfig EMPTY = new HostIpConfig(Map.of(), Optional.empty()); - public HostIpConfig(Map<String, IP.Config> ipConfigByHostname) { + public HostIpConfig(Map<String, IP.Config> ipConfigByHostname, Optional<String> hostId) { this.ipConfigByHostname = Map.copyOf(Objects.requireNonNull(ipConfigByHostname)); + this.hostId = Objects.requireNonNull(hostId); } public Map<String, IP.Config> asMap() { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java index 35b2fef2c78..1f424a1e1d5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java @@ -89,23 +89,20 @@ public class InfraDeployerImpl implements InfraDeployer { public void prepare() { if (prepared) return; - try (Mutex lock = nodeRepository.applications().lock(application.getApplicationId())) { - NodeType nodeType = application.getCapacity().type(); - Version targetVersion = infrastructureVersions.getTargetVersionFor(nodeType); - hostSpecs = provisioner.prepare(application.getApplicationId(), - application.getClusterSpecWithVersion(targetVersion), - application.getCapacity(), - logger::log); - - prepared = true; - } + NodeType nodeType = application.getCapacity().type(); + Version targetVersion = infrastructureVersions.getTargetVersionFor(nodeType); + hostSpecs = provisioner.prepare(application.getApplicationId(), + application.getClusterSpecWithVersion(targetVersion), + application.getCapacity(), + logger::log); + + prepared = true; } @Override public long activate() { + prepare(); try (var lock = provisioner.lock(application.getApplicationId())) { - prepare(); - if (hostSpecs.isEmpty()) { logger.log(Level.FINE, () -> "No nodes to provision for " + application.getCapacity().type() + ", removing application"); removeApplication(application.getApplicationId()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java index 10aae94a986..5a35ed1cc42 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java @@ -88,12 +88,12 @@ public class LoadBalancerProvisioner { * <p> * Calling this for irrelevant node or cluster types is a no-op. */ - public void prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes) { - if (!shouldProvision(application, requestedNodes.type(), cluster.type())) return; + public void prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requested) { + if (!shouldProvision(application, requested.type(), cluster.type())) return; try (var lock = db.lock(application)) { ClusterSpec.Id clusterId = effectiveId(cluster); LoadBalancerId loadBalancerId = requireNonClashing(new LoadBalancerId(application, clusterId)); - prepare(loadBalancerId, cluster.zoneEndpoint(), requestedNodes.cloudAccount()); + prepare(loadBalancerId, cluster.zoneEndpoint(), requested.cloudAccount()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index b202a10c4d3..28caf26353b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -19,7 +19,6 @@ import com.yahoo.vespa.hosted.provision.node.Allocation; import java.util.ArrayList; import java.util.Collection; -import java.util.Comparator; import java.util.EnumSet; import java.util.HashSet; import java.util.LinkedHashMap; @@ -52,18 +51,15 @@ class NodeAllocation { private final ClusterSpec cluster; /** The requested nodes of this list */ - private final NodeSpec requestedNodes; + private final NodeSpec requested; /** The node candidates this has accepted so far, keyed on hostname */ private final Map<String, NodeCandidate> nodes = new LinkedHashMap<>(); - /** The number of already allocated nodes accepted and not retired */ - private int accepted = 0; - /** The number of already allocated nodes of compatible size */ private int acceptedAndCompatible = 0; - /** The number of already allocated nodes which can be made compatible*/ + /** The number of already allocated nodes which can be made compatible */ private int acceptedAndCompatibleOrResizable = 0; /** The number of nodes rejected because of clashing parentHostname */ @@ -90,12 +86,12 @@ class NodeAllocation { private final NodeResourceLimits nodeResourceLimits; private final Optional<String> requiredHostFlavor; - NodeAllocation(NodeList allNodes, ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, + NodeAllocation(NodeList allNodes, ApplicationId application, ClusterSpec cluster, NodeSpec requested, Supplier<Integer> nextIndex, NodeRepository nodeRepository) { this.allNodes = allNodes; this.application = application; this.cluster = cluster; - this.requestedNodes = requestedNodes; + this.requested = requested; this.nextIndex = nextIndex; this.nodeRepository = nodeRepository; this.nodeResourceLimits = new NodeResourceLimits(nodeRepository); @@ -123,15 +119,14 @@ class NodeAllocation { ClusterMembership membership = allocation.membership(); if ( ! allocation.owner().equals(application)) continue; // wrong application if ( ! membership.cluster().satisfies(cluster)) continue; // wrong cluster id/type - if ((! candidate.isSurplus || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group, and we can't or have no reason to change it if ( candidate.state() == Node.State.active && allocation.removable()) continue; // don't accept; causes removal if ( candidate.state() == Node.State.active && candidate.wantToFail()) continue; // don't accept; causes failing if ( indexes.contains(membership.index())) continue; // duplicate index (just to be sure) - if (nodeRepository.zone().cloud().allowEnclave() && candidate.parent.isPresent() && ! candidate.parent.get().cloudAccount().equals(requestedNodes.cloudAccount())) continue; // wrong account + if (nodeRepository.zone().cloud().allowEnclave() && candidate.parent.isPresent() && ! candidate.parent.get().cloudAccount().equals(requested.cloudAccount())) continue; // wrong account - boolean resizeable = requestedNodes.considerRetiring() && candidate.isResizable; + boolean resizeable = requested.considerRetiring() && candidate.isResizable; - if ((! saturated() && hasCompatibleResources(candidate) && requestedNodes.acceptable(candidate)) || acceptIncompatible(candidate)) { + if ((! saturated() && hasCompatibleResources(candidate) && requested.acceptable(candidate)) || acceptIncompatible(candidate)) { candidate = candidate.withNode(); if (candidate.isValid()) acceptNode(candidate, shouldRetire(candidate, candidates), resizeable); @@ -155,7 +150,7 @@ class NodeAllocation { } candidate = candidate.allocate(application, ClusterMembership.from(cluster, nextIndex.get()), - requestedNodes.resources().orElse(candidate.resources()), + requested.resources().orElse(candidate.resources()), nodeRepository.clock().instant()); if (candidate.isValid()) { acceptNode(candidate, Retirement.none, false); @@ -166,7 +161,7 @@ class NodeAllocation { /** Returns the cause of retirement for given candidate */ private Retirement shouldRetire(NodeCandidate candidate, List<NodeCandidate> candidates) { - if ( ! requestedNodes.considerRetiring()) { + if ( ! requested.considerRetiring()) { boolean alreadyRetired = candidate.allocation().map(a -> a.membership().retired()).orElse(false); return alreadyRetired ? Retirement.alreadyRetired : Retirement.none; } @@ -178,6 +173,7 @@ class NodeAllocation { if (candidate.preferToRetire() && candidate.replaceableBy(candidates)) return Retirement.softRequest; if (violatesExclusivity(candidate)) return Retirement.violatesExclusivity; if (requiredHostFlavor.isPresent() && ! candidate.parent.map(node -> node.flavor().name()).equals(requiredHostFlavor)) return Retirement.violatesHostFlavor; + if (candidate.violatesSpares) return Retirement.violatesSpares; return Retirement.none; } @@ -203,21 +199,24 @@ class NodeAllocation { private boolean violatesExclusivity(NodeCandidate candidate) { if (candidate.parentHostname().isEmpty()) return false; + if (requested.type() != NodeType.tenant) return false; - // In nodes which does not allow host sharing, exclusivity is violated if... + // In zones which does not allow host sharing, exclusivity is violated if... if ( ! nodeRepository.zone().cloud().allowHostSharing()) { // TODO: Write this in a way that is simple to read // If either the parent is dedicated to a cluster type different from this cluster return ! candidate.parent.flatMap(Node::exclusiveToClusterType).map(cluster.type()::equals).orElse(true) || - // or this cluster is requiring exclusivity, but the host is exclusive to a different owner - (requestedNodes.isExclusive() && !candidate.parent.flatMap(Node::exclusiveToApplicationId).map(application::equals).orElse(false)); + // or the parent is dedicated to a different application + ! candidate.parent.flatMap(Node::exclusiveToApplicationId).map(application::equals).orElse(true) || + // or this cluster requires exclusivity, but the host is not exclusive + (nodeRepository.exclusiveAllocation(cluster) && candidate.parent.flatMap(Node::exclusiveToApplicationId).isEmpty()); } // In zones with shared hosts we require that if either of the nodes on the host requires exclusivity, // then all the nodes on the host must have the same owner for (Node nodeOnHost : allNodes.childrenOf(candidate.parentHostname().get())) { if (nodeOnHost.allocation().isEmpty()) continue; - if (requestedNodes.isExclusive() || nodeOnHost.allocation().get().membership().cluster().isExclusive()) { + if (nodeRepository.exclusiveAllocation(cluster) || nodeOnHost.allocation().get().membership().cluster().isExclusive()) { if ( ! nodeOnHost.allocation().get().owner().equals(application)) return true; } } @@ -243,36 +242,33 @@ class NodeAllocation { */ private boolean acceptIncompatible(NodeCandidate candidate) { if (candidate.state() != Node.State.active) return false; - if (! candidate.allocation().get().membership().cluster().group().equals(cluster.group())) return false; if (candidate.allocation().get().membership().retired()) return true; // don't second-guess if already retired - if ( ! requestedNodes.considerRetiring()) // the node is active and we are not allowed to remove gracefully, so keep + if ( ! requested.considerRetiring()) // the node is active and we are not allowed to remove gracefully, so keep return true; - return cluster.isStateful() || (cluster.type() == ClusterSpec.Type.container && !hasCompatibleResources(candidate)); } private boolean hasCompatibleResources(NodeCandidate candidate) { - return requestedNodes.isCompatible(candidate.resources()) || candidate.isResizable; + return requested.isCompatible(candidate.resources()) || candidate.isResizable; } private Node acceptNode(NodeCandidate candidate, Retirement retirement, boolean resizeable) { Node node = candidate.toNode(); - if (node.allocation().isPresent()) // Record the currently requested resources - node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.resources()))); + node = node.with(node.allocation().get().withRequestedResources(requested.resources().orElse(node.resources()))); if (retirement == Retirement.none) { - accepted++; // We want to allocate new nodes rather than unretiring with resize, so count without those // for the purpose of deciding when to stop accepting nodes (saturation) if (node.allocation().isEmpty() - || ! ( requestedNodes.needsResize(node) && - (node.allocation().get().membership().retired() || ! requestedNodes.considerRetiring()))) { + || (canBeUsedInGroupWithDeficiency(node) && + ! (requested.needsResize(node) && (node.allocation().get().membership().retired() || ! requested.considerRetiring())))) { acceptedAndCompatible++; } + if (hasCompatibleResources(candidate)) acceptedAndCompatibleOrResizable++; @@ -290,21 +286,34 @@ class NodeAllocation { node = node.retire(nodeRepository.clock().instant()); } if ( ! node.allocation().get().membership().cluster().equals(cluster)) { - // group may be different - node = setCluster(cluster, node); + // Cluster has the updated settings but do not set a group + node = setCluster(cluster.with(node.allocation().get().membership().cluster().group()), node); } - candidate = candidate.withNode(node); + candidate = candidate.withNode(node, retirement != Retirement.none && retirement != Retirement.alreadyRetired ); indexes.add(node.allocation().get().membership().index()); nodes.put(node.hostname(), candidate); return node; } + private boolean canBeUsedInGroupWithDeficiency(Node node) { + if (requested.count().isEmpty()) return true; + if (node.allocation().isEmpty()) return true; + var group = node.allocation().get().membership().cluster().group(); + if (group.isEmpty()) return true; + long nodesInGroup = nodes.values().stream().filter(n -> groupOf(n).equals(group)).count(); + return nodesInGroup < requested.groupSize(); + } + + private Optional<ClusterSpec.Group> groupOf(NodeCandidate candidate) { + return candidate.allocation().flatMap(a -> a.membership().cluster().group()); + } + private Node resize(Node node) { NodeResources hostResources = allNodes.parentOf(node).get().flavor().resources(); - return node.with(new Flavor(requestedNodes.resources().get() - .with(hostResources.diskSpeed()) - .with(hostResources.storageType()) - .with(hostResources.architecture())), + return node.with(new Flavor(requested.resources().get() + .with(hostResources.diskSpeed()) + .with(hostResources.storageType()) + .with(hostResources.architecture())), Agent.application, nodeRepository.clock().instant()); } @@ -315,12 +324,12 @@ class NodeAllocation { /** Returns true if no more nodes are needed in this list */ public boolean saturated() { - return requestedNodes.saturatedBy(acceptedAndCompatible); + return requested.saturatedBy(acceptedAndCompatible); } /** Returns true if the content of this list is sufficient to meet the request */ boolean fulfilled() { - return requestedNodes.fulfilledBy(acceptedAndCompatibleOrResizable()); + return requested.fulfilledBy(acceptedAndCompatibleOrResizable()); } /** Returns true if this allocation was already fulfilled and resulted in no new changes */ @@ -343,10 +352,10 @@ class NodeAllocation { if (nodeType().isHost()) { return Optional.empty(); // Hosts are provisioned as required by the child application } - int deficit = requestedNodes.fulfilledDeficitCount(acceptedAndCompatibleOrResizable()); + int deficit = requested.fulfilledDeficitCount(acceptedAndCompatibleOrResizable()); // We can only require flavor upgrade if the entire deficit is caused by upgrades boolean dueToFlavorUpgrade = deficit == wasRetiredDueToFlavorUpgrade; - return Optional.of(new HostDeficit(requestedNodes.resources().orElseGet(NodeResources::unspecified), + return Optional.of(new HostDeficit(requested.resources().orElseGet(NodeResources::unspecified), deficit, dueToFlavorUpgrade)) .filter(hostDeficit -> hostDeficit.count() > 0); @@ -355,7 +364,7 @@ class NodeAllocation { /** Returns the indices to use when provisioning hosts for this */ List<Integer> provisionIndices(int count) { if (count < 1) throw new IllegalArgumentException("Count must be positive"); - NodeType hostType = requestedNodes.type().hostType(); + NodeType hostType = requested.type().hostType(); // Tenant hosts have a continuously increasing index if (hostType == NodeType.host) return nodeRepository.database().readProvisionIndices(count); @@ -389,55 +398,15 @@ class NodeAllocation { /** The node type this is allocating */ NodeType nodeType() { - return requestedNodes.type(); + return requested.type(); } - /** - * Make the number of <i>non-retired</i> nodes in the list equal to the requested number - * of nodes, and retire the rest of the list. Only retire currently active nodes. - * Prefer to retire nodes of the wrong flavor. - * Make as few changes to the retired set as possible. - * - * @return the final list of nodes - */ List<Node> finalNodes() { - int wantToRetireCount = (int) matching(NodeCandidate::wantToRetire).count(); - int currentRetiredCount = (int) matching(node -> node.allocation().get().membership().retired()).count(); - int deltaRetiredCount = requestedNodes.idealRetiredCount(nodes.size(), wantToRetireCount, currentRetiredCount); - - if (deltaRetiredCount > 0) { // retire until deltaRetiredCount is 0 - for (NodeCandidate candidate : byRetiringPriority(nodes.values())) { - if ( ! candidate.allocation().get().membership().retired() && candidate.state() == Node.State.active) { - candidate = candidate.withNode(); - candidate = candidate.withNode(candidate.toNode().retire(Agent.application, nodeRepository.clock().instant())); - nodes.put(candidate.toNode().hostname(), candidate); - if (--deltaRetiredCount == 0) break; - } - } - } - else if (deltaRetiredCount < 0) { // unretire until deltaRetiredCount is 0 - for (NodeCandidate candidate : byUnretiringPriority(nodes.values())) { - if (candidate.allocation().get().membership().retired() && hasCompatibleResources(candidate) ) { - candidate = candidate.withNode(); - if (candidate.isResizable) - candidate = candidate.withNode(resize(candidate.toNode())); - candidate = candidate.withNode(candidate.toNode().unretire()); - nodes.put(candidate.toNode().hostname(), candidate); - if (++deltaRetiredCount == 0) break; - } - } - } - - for (NodeCandidate candidate : nodes.values()) { - // Set whether the node is exclusive - candidate = candidate.withNode(); - Allocation allocation = candidate.allocation().get(); - candidate = candidate.withNode(candidate.toNode().with(allocation.with(allocation.membership() - .with(allocation.membership().cluster().exclusive(requestedNodes.isExclusive()))))); - nodes.put(candidate.toNode().hostname(), candidate); - } - - return nodes.values().stream().map(NodeCandidate::toNode).toList(); + GroupAssigner groupAssigner = new GroupAssigner(requested, allNodes, nodeRepository.clock()); + Collection<NodeCandidate> finalNodes = groupAssigner.assignTo(nodes.values()); + nodes.clear(); + finalNodes.forEach(candidate -> nodes.put(candidate.toNode().hostname(), candidate)); + return finalNodes.stream().map(NodeCandidate::toNode).toList(); } List<Node> reservableNodes() { @@ -462,19 +431,6 @@ class NodeAllocation { return allNodes.nodeType(nodeType()).size(); } - /** Prefer to retire nodes we want the least */ - private List<NodeCandidate> byRetiringPriority(Collection<NodeCandidate> candidates) { - return candidates.stream().sorted(Comparator.reverseOrder()).toList(); - } - - /** Prefer to unretire nodes we don't want to retire, and otherwise those with lower index */ - private List<NodeCandidate> byUnretiringPriority(Collection<NodeCandidate> candidates) { - return candidates.stream() - .sorted(Comparator.comparing(NodeCandidate::wantToRetire) - .thenComparing(n -> n.allocation().get().membership().index())) - .toList(); - } - String allocationFailureDetails() { List<String> reasons = new ArrayList<>(); if (rejectedDueToExclusivity > 0) @@ -511,6 +467,7 @@ class NodeAllocation { violatesExclusivity("node violates host exclusivity"), violatesHostFlavor("node violates host flavor"), violatesHostFlavorGeneration("node violates host flavor generation"), + violatesSpares("node is assigned to a host we want to use as a spare"), none(""); private final String description; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java index 8462e23fbfd..adc04c491e2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java @@ -81,6 +81,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public abstract boolean preferToRetire(); + /** Returns true if we have decided to retire this node as part of this deployment */ + public boolean retiredNow() { return false; } + public abstract boolean wantToFail(); public abstract Flavor flavor(); @@ -217,7 +220,12 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat /** Returns a copy of this with node set to given value */ NodeCandidate withNode(Node node) { - return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); + return withNode(node, retiredNow()); + } + + /** Returns a copy of this with node set to given value */ + NodeCandidate withNode(Node node, boolean retiredNow) { + return new ConcreteNodeCandidate(node, retiredNow, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); } /** Returns the switch priority, based on switch exclusivity, of this compared to other */ @@ -260,7 +268,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat boolean isSurplus, boolean isNew, boolean isResizeable) { - return new ConcreteNodeCandidate(node, freeParentCapacity, Optional.of(parent), violatesSpares, true, isSurplus, isNew, isResizeable); + return new ConcreteNodeCandidate(node, false, freeParentCapacity, Optional.of(parent), violatesSpares, true, isSurplus, isNew, isResizeable); } public static NodeCandidate createNewChild(NodeResources resources, @@ -274,26 +282,33 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat } public static NodeCandidate createNewExclusiveChild(Node node, Node parent) { - return new ConcreteNodeCandidate(node, node.resources(), Optional.of(parent), false, true, false, true, false); + return new ConcreteNodeCandidate(node, false, node.resources(), Optional.of(parent), false, true, false, true, false); } public static NodeCandidate createStandalone(Node node, boolean isSurplus, boolean isNew) { - return new ConcreteNodeCandidate(node, node.resources(), Optional.empty(), false, true, isSurplus, isNew, false); + return new ConcreteNodeCandidate(node, false, node.resources(), Optional.empty(), false, true, isSurplus, isNew, false); } /** A candidate backed by a node */ static class ConcreteNodeCandidate extends NodeCandidate { private final Node node; + private final boolean retiredNow; - ConcreteNodeCandidate(Node node, NodeResources freeParentCapacity, Optional<Node> parent, + ConcreteNodeCandidate(Node node, + boolean retiredNow, + NodeResources freeParentCapacity, Optional<Node> parent, boolean violatesSpares, boolean exclusiveSwitch, boolean isSurplus, boolean isNew, boolean isResizeable) { super(freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizeable); + this.retiredNow = retiredNow; this.node = Objects.requireNonNull(node, "Node cannot be null"); } @Override + public boolean retiredNow() { return retiredNow; } + + @Override public NodeResources resources() { return node.resources(); } @Override @@ -322,7 +337,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat @Override public NodeCandidate allocate(ApplicationId owner, ClusterMembership membership, NodeResources requestedResources, Instant at) { - return new ConcreteNodeCandidate(node.allocate(owner, membership, requestedResources, at), + return new ConcreteNodeCandidate(node.allocate(owner, membership, requestedResources, at), retiredNow, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); } @@ -332,7 +347,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat @Override public NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch) { - return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, + return new ConcreteNodeCandidate(node, retiredNow, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); } @@ -439,7 +454,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat NodeType.tenant) .cloudAccount(parent.get().cloudAccount()) .build(); - return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); + return new ConcreteNodeCandidate(node, false, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeIndices.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeIndices.java index a5a098dbfd6..1ffd54872d3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeIndices.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeIndices.java @@ -1,9 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.provisioning; -import java.util.List; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.vespa.hosted.provision.NodeList; -import static java.util.Comparator.naturalOrder; +import java.util.List; /** * Tracks indices of a node cluster, and proposes the index of the next allocation. @@ -18,8 +19,12 @@ class NodeIndices { private int probe; /** Pass the list of current indices in the cluster. */ + NodeIndices(ClusterSpec.Id cluster, NodeList allNodes) { + this(allNodes.cluster(cluster).mapToList(node -> node.allocation().get().membership().index())); + } + NodeIndices(List<Integer> used) { - this.used = List.copyOf(used); + this.used = used; this.last = -1; this.probe = last; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java index 4f21c8dcd50..4ac90753ed1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java @@ -33,7 +33,7 @@ public class NodePrioritizer { private final LockedNodeList allNodes; private final HostCapacity capacity; private final HostResourcesCalculator calculator; - private final NodeSpec requestedNodes; + private final NodeSpec requested; private final ApplicationId application; private final ClusterSpec clusterSpec; private final NameResolver nameResolver; @@ -46,12 +46,12 @@ public class NodePrioritizer { private final boolean enclave; public NodePrioritizer(LockedNodeList allNodes, ApplicationId application, ClusterSpec clusterSpec, NodeSpec nodeSpec, - int wantedGroups, boolean dynamicProvisioning, NameResolver nameResolver, Nodes nodes, + boolean dynamicProvisioning, NameResolver nameResolver, Nodes nodes, HostResourcesCalculator hostResourcesCalculator, int spareCount, boolean enclave) { this.allNodes = allNodes; this.calculator = hostResourcesCalculator; this.capacity = new HostCapacity(this.allNodes, hostResourcesCalculator); - this.requestedNodes = nodeSpec; + this.requested = nodeSpec; this.clusterSpec = clusterSpec; this.application = application; this.dynamicProvisioning = dynamicProvisioning; @@ -70,12 +70,9 @@ public class NodePrioritizer { .stream()) .distinct() .count(); - this.topologyChange = currentGroups != wantedGroups; + this.topologyChange = currentGroups != requested.groups(); - this.currentClusterSize = (int) nonRetiredNodesInCluster.state(Node.State.active).stream() - .map(node -> node.allocation().flatMap(alloc -> alloc.membership().cluster().group())) - .filter(clusterSpec.group()::equals) - .count(); + this.currentClusterSize = (int) nonRetiredNodesInCluster.state(Node.State.active).stream().count(); // In dynamically provisioned zones, we can always take spare hosts since we can provision new on-demand, // NodeCandidate::compareTo will ensure that they will not be used until there is no room elsewhere. @@ -84,9 +81,8 @@ public class NodePrioritizer { } /** Collects all node candidates for this application and returns them in the most-to-least preferred order */ - public List<NodeCandidate> collect(List<Node> surplusActiveNodes) { + public List<NodeCandidate> collect() { addApplicationNodes(); - addSurplusNodes(surplusActiveNodes); addReadyNodes(); addCandidatesOnExistingHosts(); return prioritize(); @@ -118,22 +114,9 @@ public class NodePrioritizer { return nodes; } - /** - * Add nodes that have been previously reserved to the same application from - * an earlier downsizing of a cluster - */ - private void addSurplusNodes(List<Node> surplusNodes) { - for (Node node : surplusNodes) { - NodeCandidate candidate = candidateFrom(node, true); - if (!candidate.violatesSpares || canAllocateToSpareHosts) { - candidates.add(candidate); - } - } - } - /** Add a node on each host with enough capacity for the requested flavor */ private void addCandidatesOnExistingHosts() { - if (requestedNodes.resources().isEmpty()) return; + if (requested.resources().isEmpty()) return; for (Node host : allNodes) { if ( ! nodes.canAllocateTenantNodeTo(host, dynamicProvisioning)) continue; @@ -143,10 +126,10 @@ public class NodePrioritizer { if (host.exclusiveToApplicationId().isPresent() && ! fitsPerfectly(host)) continue; if ( ! host.exclusiveToClusterType().map(clusterSpec.type()::equals).orElse(true)) continue; if (spareHosts.contains(host) && !canAllocateToSpareHosts) continue; - if ( ! capacity.hasCapacity(host, requestedNodes.resources().get())) continue; + if ( ! capacity.hasCapacity(host, requested.resources().get())) continue; if ( ! allNodes.childrenOf(host).owner(application).cluster(clusterSpec.id()).isEmpty()) continue; - candidates.add(NodeCandidate.createNewChild(requestedNodes.resources().get(), + candidates.add(NodeCandidate.createNewChild(requested.resources().get(), capacity.availableCapacityOf(host), host, spareHosts.contains(host), @@ -157,14 +140,14 @@ public class NodePrioritizer { } private boolean fitsPerfectly(Node host) { - return calculator.advertisedResourcesOf(host.flavor()).compatibleWith(requestedNodes.resources().get()); + return calculator.advertisedResourcesOf(host.flavor()).compatibleWith(requested.resources().get()); } /** Add existing nodes allocated to the application */ private void addApplicationNodes() { EnumSet<Node.State> legalStates = EnumSet.of(Node.State.active, Node.State.inactive, Node.State.reserved); allNodes.stream() - .filter(node -> node.type() == requestedNodes.type()) + .filter(node -> node.type() == requested.type()) .filter(node -> legalStates.contains(node.state())) .filter(node -> node.allocation().isPresent()) .filter(node -> node.allocation().get().owner().equals(application)) @@ -177,7 +160,7 @@ public class NodePrioritizer { /** Add nodes already provisioned, but not allocated to any application */ private void addReadyNodes() { allNodes.stream() - .filter(node -> node.type() == requestedNodes.type()) + .filter(node -> node.type() == requested.type()) .filter(node -> node.state() == Node.State.ready) .map(node -> candidateFrom(node, false)) .filter(n -> !n.violatesSpares || canAllocateToSpareHosts) @@ -196,11 +179,11 @@ public class NodePrioritizer { isSurplus, false, parent.exclusiveToApplicationId().isEmpty() - && requestedNodes.canResize(node.resources(), - capacity.unusedCapacityOf(parent), - clusterSpec.type(), - topologyChange, - currentClusterSize)); + && requested.canResize(node.resources(), + capacity.unusedCapacityOf(parent), + clusterSpec.type(), + topologyChange, + currentClusterSize)); } else { return NodeCandidate.createStandalone(node, isSurplus, false); } @@ -213,7 +196,7 @@ public class NodePrioritizer { .orElse(nodesInCluster); int failedNodesInGroup = nodesInGroup.failing().size() + nodesInGroup.state(Node.State.failed).size(); if (failedNodesInGroup == 0) return false; - return ! requestedNodes.fulfilledBy(nodesInGroup.size() - failedNodesInGroup); + return ! requested.fulfilledBy(nodesInGroup.size() - failedNodesInGroup); } /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index ffd2805bcff..bcc63a6704a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -16,6 +16,7 @@ import com.yahoo.config.provision.ProvisionLock; import com.yahoo.config.provision.ProvisionLogger; import com.yahoo.config.provision.Provisioner; import com.yahoo.config.provision.Zone; +import com.yahoo.jdisc.Metric; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; @@ -61,7 +62,8 @@ public class NodeRepositoryProvisioner implements Provisioner { @Inject public NodeRepositoryProvisioner(NodeRepository nodeRepository, Zone zone, - ProvisionServiceProvider provisionServiceProvider) { + ProvisionServiceProvider provisionServiceProvider, + Metric metric) { this.nodeRepository = nodeRepository; this.allocationOptimizer = new AllocationOptimizer(nodeRepository); this.capacityPolicies = new CapacityPolicies(nodeRepository); @@ -71,7 +73,8 @@ public class NodeRepositoryProvisioner implements Provisioner { this.nodeResourceLimits = new NodeResourceLimits(nodeRepository); this.preparer = new Preparer(nodeRepository, provisionServiceProvider.getHostProvisioner(), - loadBalancerProvisioner); + loadBalancerProvisioner, + metric); this.activator = new Activator(nodeRepository, loadBalancerProvisioner); } @@ -81,13 +84,11 @@ public class NodeRepositoryProvisioner implements Provisioner { * The nodes are ordered by increasing index number. */ @Override - public List<HostSpec> prepare(ApplicationId application, ClusterSpec cluster, Capacity requested, - ProvisionLogger logger) { + public List<HostSpec> prepare(ApplicationId application, ClusterSpec cluster, Capacity requested, ProvisionLogger logger) { log.log(Level.FINE, "Received deploy prepare request for " + requested + " for application " + application + ", cluster " + cluster); - validate(application, cluster, requested); + validate(application, cluster, requested, logger); - int groups; NodeResources resources; NodeSpec nodeSpec; if (requested.type() == NodeType.tenant) { @@ -97,23 +98,21 @@ public class NodeRepositoryProvisioner implements Provisioner { validate(actual, target, cluster, application); logIfDownscaled(requested.minResources().nodes(), actual.minResources().nodes(), cluster, logger); - groups = target.groups(); resources = getNodeResources(cluster, target.nodeResources(), application); - nodeSpec = NodeSpec.from(target.nodes(), resources, cluster.isExclusive(), actual.canFail(), + nodeSpec = NodeSpec.from(target.nodes(), target.groups(), resources, cluster.isExclusive(), actual.canFail(), requested.cloudAccount().orElse(nodeRepository.zone().cloud().account()), requested.clusterInfo().hostTTL()); } else { - groups = 1; // type request with multiple groups is not supported cluster = cluster.withExclusivity(true); resources = getNodeResources(cluster, requested.minResources().nodeResources(), application); nodeSpec = NodeSpec.from(requested.type(), nodeRepository.zone().cloud().account()); } - return asSortedHosts(preparer.prepare(application, cluster, nodeSpec, groups), + return asSortedHosts(preparer.prepare(application, cluster, nodeSpec), requireCompatibleResources(resources, cluster)); } - private void validate(ApplicationId application, ClusterSpec cluster, Capacity requested) { + private void validate(ApplicationId application, ClusterSpec cluster, Capacity requested, ProvisionLogger logger) { if (cluster.group().isPresent()) throw new IllegalArgumentException("Node requests cannot specify a group"); nodeResourceLimits.ensureWithinAdvertisedLimits("Min", requested.minResources().nodeResources(), application, cluster); @@ -121,6 +120,18 @@ public class NodeRepositoryProvisioner implements Provisioner { if ( ! requested.minResources().nodeResources().gpuResources().equals(requested.maxResources().nodeResources().gpuResources())) throw new IllegalArgumentException(requested + " is invalid: Gpu capacity cannot have ranges"); + + logInsufficientDiskResources(cluster, requested, logger); + } + + private void logInsufficientDiskResources(ClusterSpec cluster, Capacity requested, ProvisionLogger logger) { + var resources = requested.minResources().nodeResources(); + if ( ! nodeResourceLimits.isWithinAdvertisedDiskLimits(resources, cluster)) { + logger.logApplicationPackage(Level.WARNING, "Requested disk (" + resources.diskGb() + + "Gb) in " + cluster.id() + " is not large enough to fit " + + "core/heap dumps. Minimum recommended disk resources " + + "is 2x memory for containers and 3x memory for content"); + } } private NodeResources getNodeResources(ClusterSpec cluster, NodeResources nodeResources, ApplicationId applicationId) { @@ -230,6 +241,7 @@ public class NodeRepositoryProvisioner implements Provisioner { } private List<HostSpec> asSortedHosts(List<Node> nodes, NodeResources requestedResources) { + nodes = new ArrayList<>(nodes); nodes.sort(Comparator.comparingInt(node -> node.allocation().get().membership().index())); List<HostSpec> hosts = new ArrayList<>(nodes.size()); for (Node node : nodes) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java index 9ded1a2735c..8c5a7b6c61e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java @@ -2,14 +2,17 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.ProvisionLogger; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.util.Locale; +import java.util.logging.Level; /** * Defines the resource limits for nodes in various zones @@ -35,6 +38,12 @@ public class NodeResourceLimits { illegal(type, "diskGb", "Gb", cluster, requested.diskGb(), minAdvertisedDiskGb(requested, cluster.isExclusive())); } + // TODO: Remove this when we are ready to fail, not just warn on this. */ + public boolean isWithinAdvertisedDiskLimits(NodeResources requested, ClusterSpec cluster) { + if (requested.diskGbIsUnspecified() || requested.memoryGbIsUnspecified()) return true; + return requested.diskGb() >= minAdvertisedDiskGb(requested, cluster); + } + /** Returns whether the real resources we'll end up with on a given tenant node are within limits */ public boolean isWithinRealLimits(NodeCandidate candidateNode, ApplicationId applicationId, ClusterSpec cluster) { if (candidateNode.type() != NodeType.tenant) return true; // Resource limits only apply to tenant nodes @@ -52,9 +61,12 @@ public class NodeResourceLimits { return true; } - public NodeResources enlargeToLegal(NodeResources requested, ApplicationId applicationId, ClusterSpec cluster, boolean exclusive) { + public NodeResources enlargeToLegal(NodeResources requested, ApplicationId applicationId, ClusterSpec cluster, boolean exclusive, boolean followRecommendations) { if (requested.isUnspecified()) return requested; + if (followRecommendations) // TODO: Do unconditionally when we enforce this limit + requested = requested.withDiskGb(Math.max(minAdvertisedDiskGb(requested, cluster), requested.diskGb())); + return requested.withVcpu(Math.max(minAdvertisedVcpu(applicationId, cluster), requested.vcpu())) .withMemoryGb(Math.max(minAdvertisedMemoryGb(cluster), requested.memoryGb())) .withDiskGb(Math.max(minAdvertisedDiskGb(requested, exclusive), requested.diskGb())); @@ -78,6 +90,15 @@ public class NodeResourceLimits { return minRealDiskGb() + reservedDiskSpaceGb(requested.storageType(), exclusive); } + // TODO: Move this check into the above when we are ready to fail, not just warn on this. */ + private double minAdvertisedDiskGb(NodeResources requested, ClusterSpec cluster) { + return requested.memoryGb() * switch (cluster.type()) { + case combined, content -> 3; + case container -> 2; + default -> 0; // No constraint on other types + }; + } + // Note: Assumes node type 'host' private long reservedDiskSpaceGb(NodeResources.StorageType storageType, boolean exclusive) { if (storageType == NodeResources.StorageType.local && ! zone().cloud().allowHostSharing()) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java index bfe6f4211cb..cea0608013d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java @@ -24,9 +24,6 @@ public interface NodeSpec { /** The node type this requests */ NodeType type(); - /** Returns whether the hosts running the nodes of this application can also run nodes of other applications. */ - boolean isExclusive(); - /** Returns whether the given node resources is compatible with this spec */ boolean isCompatible(NodeResources resources); @@ -38,21 +35,23 @@ public interface NodeSpec { return fulfilledDeficitCount(count) == 0; } + /** Returns the total number of nodes this is requesting, or empty if not specified */ + Optional<Integer> count(); + + int groups(); + + /** Returns the group size requested if count() is present. Throws RuntimeException otherwise. */ + default int groupSize() { return count().get() / groups(); } + /** Returns whether this should throw an exception if the requested nodes are not fully available */ boolean canFail(); /** Returns whether we should retire nodes at all when fulfilling this spec */ boolean considerRetiring(); - /** Returns the ideal number of nodes that should be retired to fulfill this spec */ - int idealRetiredCount(int acceptedCount, int wantToRetireCount, int currentRetiredCount); - /** Returns number of additional nodes needed for this spec to be fulfilled given the current node count */ int fulfilledDeficitCount(int count); - /** Returns a specification of a fraction of all the nodes of this. It is assumed the argument is a valid divisor. */ - NodeSpec fraction(int divisor); - /** Returns the resources requested by this or empty if none are explicitly requested */ Optional<NodeResources> resources(); @@ -80,9 +79,9 @@ public interface NodeSpec { return false; } - static NodeSpec from(int nodeCount, NodeResources resources, boolean exclusive, boolean canFail, + static NodeSpec from(int nodeCount, int groupCount, NodeResources resources, boolean exclusive, boolean canFail, CloudAccount cloudAccount, Duration hostTTL) { - return new CountNodeSpec(nodeCount, resources, exclusive, canFail, canFail, cloudAccount, hostTTL); + return new CountNodeSpec(nodeCount, groupCount, resources, exclusive, canFail, canFail, cloudAccount, hostTTL); } static NodeSpec from(NodeType type, CloudAccount cloudAccount) { @@ -93,6 +92,7 @@ public interface NodeSpec { class CountNodeSpec implements NodeSpec { private final int count; + private final int groups; private final NodeResources requestedNodeResources; private final boolean exclusive; private final boolean canFail; @@ -100,9 +100,10 @@ public interface NodeSpec { private final CloudAccount cloudAccount; private final Duration hostTTL; - private CountNodeSpec(int count, NodeResources resources, boolean exclusive, boolean canFail, + private CountNodeSpec(int count, int groups, NodeResources resources, boolean exclusive, boolean canFail, boolean considerRetiring, CloudAccount cloudAccount, Duration hostTTL) { this.count = count; + this.groups = groups; this.requestedNodeResources = Objects.requireNonNull(resources, "Resources must be specified"); this.exclusive = exclusive; this.canFail = canFail; @@ -115,14 +116,17 @@ public interface NodeSpec { } @Override + public Optional<Integer> count() { return Optional.of(count); } + + @Override + public int groups() { return groups; } + + @Override public Optional<NodeResources> resources() { return Optional.of(requestedNodeResources); } @Override - public boolean isExclusive() { return exclusive; } - - @Override public NodeType type() { return NodeType.tenant; } @Override @@ -142,22 +146,12 @@ public interface NodeSpec { } @Override - public int idealRetiredCount(int acceptedCount, int wantToRetireCount, int currentRetiredCount) { - return acceptedCount - this.count - currentRetiredCount; - } - - @Override public int fulfilledDeficitCount(int count) { return Math.max(this.count - count, 0); } - @Override - public NodeSpec fraction(int divisor) { - return new CountNodeSpec(count/divisor, requestedNodeResources, exclusive, canFail, considerRetiring, cloudAccount, hostTTL); - } - public NodeSpec withoutRetiring() { - return new CountNodeSpec(count, requestedNodeResources, exclusive, canFail, false, cloudAccount, hostTTL); + return new CountNodeSpec(count, groups, requestedNodeResources, exclusive, canFail, false, cloudAccount, hostTTL); } @Override @@ -169,7 +163,6 @@ public interface NodeSpec { public boolean canResize(NodeResources currentNodeResources, NodeResources currentSpareHostResources, ClusterSpec.Type type, boolean hasTopologyChange, int currentClusterSize) { if (exclusive) return false; // exclusive resources must match the host - // Never allow in-place resize when also changing topology or decreasing cluster size if (hasTopologyChange || count < currentClusterSize) return false; @@ -198,7 +191,10 @@ public interface NodeSpec { public Duration hostTTL() { return hostTTL; } @Override - public String toString() { return "request for " + count + " nodes with " + requestedNodeResources; } + public String toString() { + return "request for " + count + " nodes" + + ( groups > 1 ? " (in " + groups + " groups)" : "") + + " with " + requestedNodeResources; } } @@ -211,16 +207,19 @@ public interface NodeSpec { private final NodeType type; private final CloudAccount cloudAccount; - public TypeNodeSpec(NodeType type, CloudAccount cloudAccount) { + private TypeNodeSpec(NodeType type, CloudAccount cloudAccount) { this.type = type; this.cloudAccount = cloudAccount; } @Override - public NodeType type() { return type; } + public Optional<Integer> count() { return Optional.empty(); } + + @Override + public int groups() { return 1; } @Override - public boolean isExclusive() { return false; } + public NodeType type() { return type; } @Override public boolean isCompatible(NodeResources resources) { return true; } @@ -235,20 +234,12 @@ public interface NodeSpec { public boolean considerRetiring() { return true; } @Override - public int idealRetiredCount(int acceptedCount, int wantToRetireCount, int currentRetiredCount) { - return wantToRetireCount - currentRetiredCount; - } - - @Override public int fulfilledDeficitCount(int count) { // If no wanted count is specified for this node type, then any count fulfills the deficit return Math.max(0, WANTED_NODE_COUNT.getOrDefault(type, 0) - count); } @Override - public NodeSpec fraction(int divisor) { return this; } - - @Override public Optional<NodeResources> resources() { return Optional.empty(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index b6c7324c75c..79b1bccbbde 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -1,135 +1,186 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.provisioning; +import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterMembership; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeAllocationException; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; +import com.yahoo.jdisc.Metric; +import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing; import java.util.ArrayList; import java.util.List; -import java.util.ListIterator; import java.util.Optional; +import java.util.function.Consumer; +import java.util.function.Supplier; +import java.util.logging.Level; +import java.util.logging.Logger; /** - * Performs preparation of node activation changes for an application. + * Performs preparation of node activation changes for a cluster of an application. * * @author bratseth */ -class Preparer { +public class Preparer { - private final GroupPreparer groupPreparer; + private static final Mutex PROBE_LOCK = () -> {}; + private static final Logger log = Logger.getLogger(Preparer.class.getName()); + + private final NodeRepository nodeRepository; + private final Optional<HostProvisioner> hostProvisioner; private final Optional<LoadBalancerProvisioner> loadBalancerProvisioner; + private final ProvisioningThrottler throttler; - public Preparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, - Optional<LoadBalancerProvisioner> loadBalancerProvisioner) { + public Preparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, Optional<LoadBalancerProvisioner> loadBalancerProvisioner, Metric metric) { + this.nodeRepository = nodeRepository; + this.hostProvisioner = hostProvisioner; this.loadBalancerProvisioner = loadBalancerProvisioner; - this.groupPreparer = new GroupPreparer(nodeRepository, hostProvisioner); - } - - /** Prepare all required resources for the given application and cluster */ - public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, int wantedGroups) { - try { - var nodes = prepareNodes(application, cluster, requestedNodes, wantedGroups); - prepareLoadBalancer(application, cluster, requestedNodes); - return nodes; - } - catch (NodeAllocationException e) { - throw new NodeAllocationException("Could not satisfy " + requestedNodes + - ( wantedGroups > 1 ? " (in " + wantedGroups + " groups)" : "") + - " in " + application + " " + cluster + ": " + e.getMessage(), - e.retryable()); - } + this.throttler = new ProvisioningThrottler(nodeRepository, metric); } /** - * Ensure sufficient nodes are reserved or active for the given application and cluster + * Ensure sufficient nodes are reserved or active for the given application, group and cluster * - * @return the list of nodes this cluster will have allocated if activated + * @param application the application we are allocating to + * @param cluster the cluster and group we are allocating to + * @param requested a specification of the requested nodes + * @return the list of nodes this cluster group will have allocated if activated */ - // Note: This operation may make persisted changes to the set of reserved and inactive nodes, - // but it may not change the set of active nodes, as the active nodes must stay in sync with the - // active config model which is changed on activate - private List<Node> prepareNodes(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - int wantedGroups) { - LockedNodeList allNodes = groupPreparer.createUnlockedNodeList(); - NodeList appNodes = allNodes.owner(application); - List<Node> surplusNodes = findNodesInRemovableGroups(appNodes, cluster, wantedGroups); - - List<Integer> usedIndices = appNodes.cluster(cluster.id()).mapToList(node -> node.allocation().get().membership().index()); - NodeIndices indices = new NodeIndices(usedIndices); - List<Node> acceptedNodes = new ArrayList<>(); - - for (int groupIndex = 0; groupIndex < wantedGroups; groupIndex++) { - ClusterSpec clusterGroup = cluster.with(Optional.of(ClusterSpec.Group.from(groupIndex))); - GroupPreparer.PrepareResult result = groupPreparer.prepare(application, clusterGroup, - requestedNodes.fraction(wantedGroups), - surplusNodes, indices, wantedGroups, - allNodes); - allNodes = result.allNodes(); // Might have changed - List<Node> accepted = result.prepared(); - if (requestedNodes.rejectNonActiveParent()) { - NodeList activeHosts = allNodes.state(Node.State.active).parents().nodeType(requestedNodes.type().hostType()); - accepted = accepted.stream() - .filter(node -> node.parentHostname().isEmpty() || activeHosts.parentOf(node).isPresent()) - .toList(); - } + // Note: This operation may make persisted changes to the set of reserved and inactive nodes, + // but it may not change the set of active nodes, as the active nodes must stay in sync with the + // active config model which is changed on activate + public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requested) { + log.log(Level.FINE, () -> "Preparing " + cluster.type().name() + " " + cluster.id() + " with requested resources " + + requested.resources().orElse(NodeResources.unspecified())); - replace(acceptedNodes, accepted); + loadBalancerProvisioner.ifPresent(provisioner -> provisioner.prepare(application, cluster, requested)); + + // Try preparing in memory without global unallocated lock. Most of the time there should be no changes, + // and we can return nodes previously allocated. + LockedNodeList allNodes = nodeRepository.nodes().list(PROBE_LOCK); + NodeIndices indices = new NodeIndices(cluster.id(), allNodes); + NodeAllocation probeAllocation = prepareAllocation(application, cluster, requested, indices::probeNext, allNodes); + if (probeAllocation.fulfilledAndNoChanges()) { + List<Node> acceptedNodes = probeAllocation.finalNodes(); + indices.commitProbe(); + return acceptedNodes; + } else { + // There were some changes, so re-do the allocation with locks + indices.resetProbe(); + return prepareWithLocks(application, cluster, requested, indices); } - moveToActiveGroup(surplusNodes, wantedGroups, cluster.group()); - acceptedNodes.removeAll(surplusNodes); - return acceptedNodes; } - /** Prepare a load balancer for given application and cluster */ - public void prepareLoadBalancer(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes) { - loadBalancerProvisioner.ifPresent(provisioner -> provisioner.prepare(application, cluster, requestedNodes)); - } + /// Note that this will write to the node repo. + private List<Node> prepareWithLocks(ApplicationId application, ClusterSpec cluster, NodeSpec requested, NodeIndices indices) { + try (Mutex lock = nodeRepository.applications().lock(application); + Mutex allocationLock = nodeRepository.nodes().lockUnallocated()) { + LockedNodeList allNodes = nodeRepository.nodes().list(allocationLock); + NodeAllocation allocation = prepareAllocation(application, cluster, requested, indices::next, allNodes); + NodeType hostType = allocation.nodeType().hostType(); + if (canProvisionDynamically(hostType) && allocation.hostDeficit().isPresent()) { + HostSharing sharing = hostSharing(cluster, hostType); + Version osVersion = nodeRepository.osVersions().targetFor(hostType).orElse(Version.emptyVersion); + NodeAllocation.HostDeficit deficit = allocation.hostDeficit().get(); + List<Node> hosts = new ArrayList<>(); + Consumer<List<ProvisionedHost>> whenProvisioned = provisionedHosts -> { + hosts.addAll(provisionedHosts.stream().map(host -> host.generateHost(requested.hostTTL())).toList()); + nodeRepository.nodes().addNodes(hosts, Agent.application); - /** - * Returns a list of the nodes which are - * in groups with index number above or equal the group count - */ - private List<Node> findNodesInRemovableGroups(NodeList appNodes, ClusterSpec requestedCluster, int wantedGroups) { - List<Node> surplusNodes = new ArrayList<>(); - for (Node node : appNodes.state(Node.State.active)) { - ClusterSpec nodeCluster = node.allocation().get().membership().cluster(); - if ( ! nodeCluster.id().equals(requestedCluster.id())) continue; - if ( ! nodeCluster.type().equals(requestedCluster.type())) continue; - if (nodeCluster.group().get().index() >= wantedGroups) - surplusNodes.add(node); - } - return surplusNodes; - } - - /** Move nodes from unwanted groups to wanted groups to avoid lingering groups consisting of retired nodes */ - private void moveToActiveGroup(List<Node> surplusNodes, int wantedGroups, Optional<ClusterSpec.Group> targetGroup) { - for (ListIterator<Node> i = surplusNodes.listIterator(); i.hasNext(); ) { - Node node = i.next(); - ClusterMembership membership = node.allocation().get().membership(); - ClusterSpec cluster = membership.cluster(); - if (cluster.group().get().index() >= wantedGroups) { - ClusterSpec.Group newGroup = targetGroup.orElse(ClusterSpec.Group.from(0)); - ClusterMembership newGroupMembership = membership.with(cluster.with(Optional.of(newGroup))); - i.set(node.with(node.allocation().get().with(newGroupMembership))); + // Offer the nodes on the newly provisioned hosts, this should be enough to cover the deficit + List<NodeCandidate> candidates = provisionedHosts.stream() + .map(host -> NodeCandidate.createNewExclusiveChild(host.generateNode(), + host.generateHost(requested.hostTTL()))) + .toList(); + allocation.offer(candidates); + }; + try { + HostProvisionRequest request = new HostProvisionRequest(allocation.provisionIndices(deficit.count()), + hostType, + deficit.resources(), + application, + osVersion, + sharing, + Optional.of(cluster.type()), + Optional.of(cluster.id()), + requested.cloudAccount(), + deficit.dueToFlavorUpgrade()); + if (throttler.throttle(allNodes, Agent.system)) { + throw new NodeAllocationException("Host provisioning is being throttled", true); + } + hostProvisioner.get().provisionHosts(request, whenProvisioned); + } catch (NodeAllocationException e) { + // Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do + // not exist, we cannot remove them from ZK here because other nodes may already have been + // allocated on them, so let HostDeprovisioner deal with it + hosts.forEach(host -> nodeRepository.nodes().deprovision(host.hostname(), Agent.system, nodeRepository.clock().instant())); + throw e; + } + } else if (allocation.hostDeficit().isPresent() && requested.canFail() && + allocation.hasRetiredJustNow() && requested instanceof NodeSpec.CountNodeSpec cns) { + // Non-dynamically provisioned zone with a deficit because we just now retired some nodes. + // Try again, but without retiring + indices.resetProbe(); + List<Node> accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), indices); + log.warning("Prepared " + application + " " + cluster.id() + " without retirement due to lack of capacity"); + return accepted; } + + if (! allocation.fulfilled() && requested.canFail()) + throw new NodeAllocationException("Could not satisfy " + requested + " in " + application + " " + cluster + + allocation.allocationFailureDetails(), true); + + // Carry out and return allocation + List<Node> acceptedNodes = allocation.finalNodes(); + nodeRepository.nodes().reserve(allocation.reservableNodes()); + nodeRepository.nodes().addReservedNodes(new LockedNodeList(allocation.newNodes(), allocationLock)); + + if (requested.rejectNonActiveParent()) { // TODO: Move into offer() - currently this must be done *after* reserving + NodeList activeHosts = allNodes.state(Node.State.active).parents().nodeType(requested.type().hostType()); + acceptedNodes = acceptedNodes.stream() + .filter(node -> node.parentHostname().isEmpty() || activeHosts.parentOf(node).isPresent()) + .toList(); + } + return acceptedNodes; } } - /** - * Nodes are immutable so when changing attributes to the node we create a new instance. - * - * This method is used to both add new nodes and replaces old node references with the new references. - */ - private List<Node> replace(List<Node> list, List<Node> changed) { - list.removeAll(changed); - list.addAll(changed); - return list; + private NodeAllocation prepareAllocation(ApplicationId application, ClusterSpec cluster, NodeSpec requested, + Supplier<Integer> nextIndex, LockedNodeList allNodes) { + + NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requested, nextIndex, nodeRepository); + NodePrioritizer prioritizer = new NodePrioritizer(allNodes, + application, + cluster, + requested, + nodeRepository.zone().cloud().dynamicProvisioning(), + nodeRepository.nameResolver(), + nodeRepository.nodes(), + nodeRepository.resourcesCalculator(), + nodeRepository.spareCount(), + requested.cloudAccount().isExclave(nodeRepository.zone())); + allocation.offer(prioritizer.collect()); + return allocation; + } + + private boolean canProvisionDynamically(NodeType hostType) { + return nodeRepository.zone().cloud().dynamicProvisioning() && + (hostType == NodeType.host || hostType.isConfigServerHostLike()); + } + + private HostSharing hostSharing(ClusterSpec cluster, NodeType hostType) { + if ( hostType.isSharable()) + return nodeRepository.exclusiveAllocation(cluster) ? HostSharing.exclusive : HostSharing.any; + else + return HostSharing.any; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottler.java new file mode 100644 index 00000000000..cfdc3e8dbe8 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottler.java @@ -0,0 +1,62 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.provisioning; + +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.flags.IntFlag; +import com.yahoo.vespa.flags.PermanentFlags; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.History; + +import java.time.Duration; +import java.time.Instant; +import java.util.Objects; +import java.util.logging.Logger; + +/** + * Throttles provisioning of new hosts in dynamically provisioned zones. + * + * @author mpolden + */ +public class ProvisioningThrottler { + + /** Metric that indicates whether throttling is active where 1 means active and 0 means inactive */ + private static final String throttlingActiveMetric = "throttledHostProvisioning"; + + private static final Logger LOG = Logger.getLogger(ProvisioningThrottler.class.getName()); + private static final Duration WINDOW = Duration.ofDays(1); + + private final NodeRepository nodeRepository; + private final Metric metric; + private final IntFlag maxHostsPerHourFlag; + + public ProvisioningThrottler(NodeRepository nodeRepository, Metric metric) { + this.nodeRepository = Objects.requireNonNull(nodeRepository); + this.metric = Objects.requireNonNull(metric); + this.maxHostsPerHourFlag = PermanentFlags.MAX_HOSTS_PER_HOUR.bindTo(nodeRepository.flagSource()); + } + + /** Returns whether provisioning should be throttled at given instant */ + public boolean throttle(NodeList allNodes, Agent agent) { + Instant startOfWindow = nodeRepository.clock().instant().minus(WINDOW); + int provisionedRecently = allNodes.matching(host -> host.history().hasEventAfter(History.Event.Type.provisioned, + startOfWindow)) + .size(); + boolean throttle = throttle(provisionedRecently, maxHostsPerHourFlag.value(), agent); + metric.set(throttlingActiveMetric, throttle ? 1 : 0, null); + return throttle; + } + + static boolean throttle(int recentHosts, int maxPerHour, Agent agent) { + double rate = recentHosts / (double) WINDOW.toMillis(); + boolean throttle = (rate * Duration.ofHours(1).toMillis()) > maxPerHour; + if (throttle) { + LOG.warning(String.format("Throttling provisioning of new hosts by %s: %d hosts have been provisioned " + + "in the past %s, which exceeds maximum rate of %d hosts/hour", agent, + recentHosts, WINDOW, maxPerHour)); + } + return throttle; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java index 407961dc054..32bba9336a2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java @@ -223,6 +223,7 @@ public class NodePatcher { case WANT_TO_RETIRE: case WANT_TO_DEPROVISION: case WANT_TO_REBUILD: + case WANT_TO_UPGRADE_FLAVOR: // These needs to be handled as one, because certain combinations are not allowed. return node.withWantToRetire(asOptionalBoolean(root.field(WANT_TO_RETIRE)) .orElseGet(node.status()::wantToRetire), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java index ff814af7390..5221fa8875b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java @@ -177,6 +177,7 @@ class NodesResponse extends SlimeJsonResponse { object.setBool("preferToRetire", node.status().preferToRetire()); object.setBool("wantToDeprovision", node.status().wantToDeprovision()); object.setBool("wantToRebuild", node.status().wantToRebuild()); + object.setBool("wantToUpgradeFlavor", node.status().wantToUpgradeFlavor()); object.setBool("down", node.isDown()); toSlime(node.history().events(), object.setArray("history")); toSlime(node.history().log(), object.setArray("log")); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java new file mode 100644 index 00000000000..7ded74b7451 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java @@ -0,0 +1,38 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.testutils; + +import com.yahoo.config.provision.ProvisionLogger; + +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * A logger which remembers all messages logged in addition to writing them to standard out. + * + * @author bratseth + */ +public class InMemoryProvisionLogger implements ProvisionLogger { + + private static final Logger LOG = Logger.getLogger(InMemoryProvisionLogger.class.getName()); + + private final List<String> systemLog = new ArrayList<>(); + private final List<String> applicationLog = new ArrayList<>(); + + @Override + public void log(Level level, String message) { + LOG.info("ProvisionLogger system " + level + ": " + message); + systemLog.add(level + ": " + message); + } + + @Override + public void logApplicationPackage(Level level, String message) { + LOG.info("ProvisionLogger application " + level + ": " + message); + applicationLog.add(level + ": " + message); + } + + public List<String> systemLog() { return systemLog; } + public List<String> applicationLog() { return applicationLog; } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index 63386449f0c..3ed01e00ee6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.testutils; +import com.yahoo.component.Version; import com.yahoo.component.annotation.Inject; import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; @@ -16,6 +17,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeMutex; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner; +import com.yahoo.vespa.service.duper.InfraApplication; import java.time.Clock; import java.time.Duration; @@ -52,7 +54,7 @@ public class MockDeployer implements Deployer { @Inject @SuppressWarnings("unused") public MockDeployer() { - this(null, Clock.systemUTC(), Map.of()); + this(null, Clock.systemUTC(), List.of()); } /** @@ -74,9 +76,9 @@ public class MockDeployer implements Deployer { */ public MockDeployer(NodeRepositoryProvisioner provisioner, Clock clock, - Map<ApplicationId, ApplicationContext> applications) { + List<ApplicationContext> applications) { this.provisioner = provisioner; - this.applications = new HashMap<>(applications); + this.applications = applications.stream().collect(Collectors.toMap(ApplicationContext::id, c -> c)); this.nodeRepository = null; this.clock = clock; @@ -118,7 +120,7 @@ public class MockDeployer implements Deployer { } @Override - public Optional<Instant> lastDeployTime(ApplicationId application) { + public Optional<Instant> activationTime(ApplicationId application) { return Optional.ofNullable(lastDeployTimes.get(application)); } @@ -223,6 +225,11 @@ public class MockDeployer implements Deployer { this(id, List.of(new ClusterContext(id, cluster, capacity))); } + public ApplicationContext(InfraApplication application, Version version) { + this(application.getApplicationId(), List.of(new ClusterContext( + application.getApplicationId(), application.getClusterSpecWithVersion(version), application.getCapacity()))); + } + public ApplicationId id() { return id; } /** Returns list of cluster specs of this application. */ @@ -254,7 +261,7 @@ public class MockDeployer implements Deployer { public ClusterSpec cluster() { return cluster; } private List<HostSpec> prepare(NodeRepositoryProvisioner provisioner) { - return provisioner.prepare(id, cluster, capacity, null); + return provisioner.prepare(id, cluster, capacity, new InMemoryProvisionLogger()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java index bc10a97068e..e6a064c7bf5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java @@ -104,7 +104,7 @@ public class MockHostProvisioner implements HostProvisioner { result.put(host.hostname(), createIpConfig(host)); host.ipConfig().pool().hostnames().forEach(hostname -> result.put(hostname.value(), IP.Config.ofEmptyPool(nameResolver.resolveAll(hostname.value())))); - return new HostIpConfig(result); + return new HostIpConfig(result, Optional.empty()); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java index 5ad0dd327c8..e3f67721eb5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java @@ -1,8 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.testutils; -import com.yahoo.config.provision.ClusterInfo; -import com.yahoo.config.provision.IntRange; import com.yahoo.component.Version; import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; @@ -10,12 +8,14 @@ import com.yahoo.config.provision.ApplicationName; import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.CloudAccount; +import com.yahoo.config.provision.ClusterInfo; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.InstanceName; +import com.yahoo.config.provision.IntRange; import com.yahoo.config.provision.NodeFlavors; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; @@ -25,6 +25,7 @@ import com.yahoo.config.provision.Zone; import com.yahoo.config.provision.ZoneEndpoint; import com.yahoo.config.provision.ZoneEndpoint.AccessType; import com.yahoo.config.provision.ZoneEndpoint.AllowedUrn; +import com.yahoo.jdisc.test.MockMetric; import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.curator.mock.MockCurator; @@ -42,6 +43,9 @@ import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.node.Status; import com.yahoo.vespa.hosted.provision.provisioning.EmptyProvisionServiceProvider; import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner; +import com.yahoo.vespa.service.duper.ConfigServerApplication; +import com.yahoo.vespa.service.duper.InfraApplication; +import com.yahoo.vespa.service.duper.TenantHostApplication; import javax.inject.Inject; import java.time.Clock; @@ -101,7 +105,7 @@ public class MockNodeRepository extends NodeRepository { } private void populate() { - NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(this, Zone.defaultZone(), new MockProvisionServiceProvider()); + NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(this, Zone.defaultZone(), new MockProvisionServiceProvider(), new MockMetric()); List<Node> nodes = new ArrayList<>(); // Regular nodes @@ -178,7 +182,7 @@ public class MockNodeRepository extends NodeRepository { .build()); // Ready all nodes, except 7 and 55 - nodes = nodes().addNodes(nodes, Agent.system); + nodes = new ArrayList<>(nodes().addNodes(nodes, Agent.system)); nodes.remove(node7); nodes.remove(node55); nodes = nodes().deallocate(nodes, Agent.system, getClass().getSimpleName()); @@ -191,13 +195,11 @@ public class MockNodeRepository extends NodeRepository { nodes().removeRecursively("dockerhost6.yahoo.com"); // Activate config servers - ApplicationId cfgApp = ApplicationId.from("cfg", "cfg", "cfg"); - ClusterSpec cfgCluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("configservers")).vespaVersion("6.42").build(); - activate(provisioner.prepare(cfgApp, cfgCluster, Capacity.fromRequiredNodeType(NodeType.config), null), cfgApp, provisioner); + InfraApplication cfgApp = new ConfigServerApplication(); + activate(provisioner.prepare(cfgApp.getApplicationId(), cfgApp.getClusterSpecWithVersion(Version.fromString("6.42")), cfgApp.getCapacity(), null), cfgApp.getApplicationId(), provisioner); - ApplicationId zoneApp = ApplicationId.from(TenantName.from("zoneapp"), ApplicationName.from("zoneapp"), InstanceName.from("zoneapp")); - ClusterSpec zoneCluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("node-admin")).vespaVersion("6.42").build(); - activate(provisioner.prepare(zoneApp, zoneCluster, Capacity.fromRequiredNodeType(NodeType.host), null), zoneApp, provisioner); + InfraApplication tenantHostApp = new TenantHostApplication(); + activate(provisioner.prepare(tenantHostApp.getApplicationId(), tenantHostApp.getClusterSpecWithVersion(Version.fromString("6.42")), tenantHostApp.getCapacity(), null), tenantHostApp.getApplicationId(), provisioner); ApplicationId app1Id = ApplicationId.from(TenantName.from("tenant1"), ApplicationName.from("application1"), InstanceName.from("instance1")); ClusterSpec cluster1Id = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("id1")) |