diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-12-09 22:42:19 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-12-09 22:42:19 +0100 |
commit | 73d01f281f0951d3e215b67bd8ce7eb653bd1d4f (patch) | |
tree | 5037abf30cb550dd6da3a4dca2283122c526d5a6 /node-repository | |
parent | 8b50a53485999cbae22852cd1a11489d50d018f5 (diff) | |
parent | 68dbb0d83a1846fc729cef36985956b002f6d7e4 (diff) |
Merge branch 'master' into bratseth/ignore-warmup-period
Diffstat (limited to 'node-repository')
25 files changed, 279 insertions, 173 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index e0ccbe10b10..ad20f68ca33 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -26,7 +26,7 @@ public class Cluster { private final ClusterSpec.Id id; private final boolean exclusive; private final ClusterResources min, max; - private boolean required; + private final boolean required; private final Optional<Suggestion> suggested; private final Optional<ClusterResources> target; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index 078b0621a99..849ea03665b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -13,6 +13,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceLimits; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; /** * @author bratseth @@ -139,23 +140,21 @@ public class AllocatableClusterResources { public static Optional<AllocatableClusterResources> from(ClusterResources wantedResources, ClusterSpec clusterSpec, Limits applicationLimits, - boolean required, NodeList hosts, NodeRepository nodeRepository) { - var capacityPolicies = new CapacityPolicies(nodeRepository); var systemLimits = new NodeResourceLimits(nodeRepository); boolean exclusive = clusterSpec.isExclusive(); - int actualNodes = capacityPolicies.decideSize(wantedResources.nodes(), required, true, false, clusterSpec); if ( !clusterSpec.isExclusive() && !nodeRepository.zone().getCloud().dynamicProvisioning()) { // We decide resources: Add overhead to what we'll request (advertised) to make sure real becomes (at least) cappedNodeResources var advertisedResources = nodeRepository.resourcesCalculator().realToRequest(wantedResources.nodeResources(), exclusive); advertisedResources = systemLimits.enlargeToLegal(advertisedResources, clusterSpec.type(), exclusive); // Ask for something legal advertisedResources = applicationLimits.cap(advertisedResources); // Overrides other conditions, even if it will then fail - advertisedResources = capacityPolicies.decideNodeResources(advertisedResources, required, clusterSpec); // Adjust to what we can request var realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive); // What we'll really get - if ( ! systemLimits.isWithinRealLimits(realResources, clusterSpec.type())) return Optional.empty(); + if ( ! systemLimits.isWithinRealLimits(realResources, clusterSpec.type())) + return Optional.empty(); + if (matchesAny(hosts, advertisedResources)) - return Optional.of(new AllocatableClusterResources(wantedResources.withNodes(actualNodes).with(realResources), + return Optional.of(new AllocatableClusterResources(wantedResources.with(realResources), advertisedResources, wantedResources, clusterSpec)); @@ -168,7 +167,6 @@ public class AllocatableClusterResources { for (Flavor flavor : nodeRepository.flavors().getFlavors()) { // Flavor decide resources: Real resources are the worst case real resources we'll get if we ask for these advertised resources NodeResources advertisedResources = nodeRepository.resourcesCalculator().advertisedResourcesOf(flavor); - advertisedResources = capacityPolicies.decideNodeResources(advertisedResources, required, clusterSpec); // Adjust to what we can get NodeResources realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive); // Adjust where we don't need exact match to the flavor @@ -184,7 +182,7 @@ public class AllocatableClusterResources { if ( ! between(applicationLimits.min().nodeResources(), applicationLimits.max().nodeResources(), advertisedResources)) continue; if ( ! systemLimits.isWithinRealLimits(realResources, clusterSpec.type())) continue; - var candidate = new AllocatableClusterResources(wantedResources.withNodes(actualNodes).with(realResources), + var candidate = new AllocatableClusterResources(wantedResources.with(realResources), advertisedResources, wantedResources, clusterSpec); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index b8a80a9bd2b..30432c1c078 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -66,15 +66,12 @@ public class AllocationOptimizer { groupsAdjustedForRedundancy, limits, target, current, clusterModel)); var allocatableResources = AllocatableClusterResources.from(next, current.clusterSpec(), limits, - clusterModel.cluster().required(), hosts, nodeRepository); - if (allocatableResources.isEmpty()) continue; if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) bestAllocation = allocatableResources; } } - return bestAllocation; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index e1e670c5b01..3c26eef41d9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -32,9 +32,11 @@ public class ClusterModel { static final double idealQueryCpuLoad = 0.8; static final double idealWriteCpuLoad = 0.95; static final double idealMemoryLoad = 0.65; - static final double idealDiskLoad = 0.6; + static final double idealContainerDiskLoad = 0.95; + static final double idealContentDiskLoad = 0.6; private final Application application; + private final ClusterSpec clusterSpec; private final Cluster cluster; /** The current nodes of this cluster, or empty if this models a new cluster not yet deployed */ private final NodeList nodes; @@ -54,6 +56,7 @@ public class ClusterModel { MetricsDb metricsDb, Clock clock) { this.application = application; + this.clusterSpec = clusterSpec; this.cluster = cluster; this.nodes = clusterNodes; this.clock = clock; @@ -64,12 +67,14 @@ public class ClusterModel { /** For testing */ ClusterModel(Application application, + ClusterSpec clusterSpec, Cluster cluster, Clock clock, Duration scalingDuration, ClusterTimeseries clusterTimeseries, ClusterNodesTimeseries nodeTimeseries) { this.application = application; + this.clusterSpec = clusterSpec; this.cluster = cluster; this.nodes = null; this.clock = clock; @@ -79,6 +84,8 @@ public class ClusterModel { this.nodeTimeseries = nodeTimeseries; } + public Application application() { return application; } + public ClusterSpec clusterSpec() { return clusterSpec; } public Cluster cluster() { return cluster; } /** Returns the predicted duration of a rescaling of this cluster */ @@ -110,7 +117,7 @@ public class ClusterModel { public Load averageLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(scalingDuration())); } public Load idealLoad() { - return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad); + return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad()); } /** Ideal cpu load must take the application traffic fraction into account */ @@ -193,6 +200,12 @@ public class ClusterModel { return duration; } + private double idealDiskLoad() { + // Stateless clusters are not expected to consume more disk over time - + // if they do it is due to logs which will be rotated away right before the disk is full + return clusterSpec.isStateful() ? idealContentDiskLoad : idealContainerDiskLoad; + } + /** * Create a cluster model if possible and logs a warning and returns empty otherwise. * This is useful in cases where it's possible to continue without the cluser model, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java index 3b74533772b..fbc3d236421 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java @@ -79,7 +79,7 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer { allocatedNodes(lb.id()).isEmpty(), lb -> { try { attempts.add(1); - log.log(Level.INFO, () -> "Removing expired inactive load balancer " + lb.id()); + log.log(Level.INFO, () -> "Removing expired inactive " + lb.id()); service.remove(lb.id().application(), lb.id().cluster()); db.removeLoadBalancer(lb.id()); } catch (Exception e){ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java index 6c103627ad4..57db874fb84 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java @@ -59,7 +59,7 @@ public abstract class NodeMover<MOVE> extends NodeRepositoryMaintainer { protected final MOVE findBestMove(NodesAndHosts<? extends NodeList> allNodes) { HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator()); MOVE bestMove = emptyMove; - // Shuffle nodes so we did not get stuck if the chosen move is consistently discarded. Node moves happen through + // Shuffle nodes to not get stuck if the chosen move is consistently discarded. Node moves happen through // a soft request to retire (preferToRetire), which node allocation can disregard NodeList activeNodes = allNodes.nodes().nodeType(NodeType.tenant) .state(Node.State.active) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java index 7bea671fbac..f01e8ecd301 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java @@ -16,6 +16,7 @@ import java.time.Duration; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.logging.Logger; /** * Ensure that nodes within a cluster a spread across hosts on exclusive network switches. @@ -24,6 +25,8 @@ import java.util.Set; */ public class SwitchRebalancer extends NodeMover<Move> { + private static final Logger LOG = Logger.getLogger(SwitchRebalancer.class.getName()); + private final Metric metric; private final Deployer deployer; @@ -40,7 +43,12 @@ public class SwitchRebalancer extends NodeMover<Move> { NodesAndHosts<NodeList> allNodes = NodesAndHosts.create(nodeRepository().nodes().list()); // Lockless as strong consistency is not needed if (!zoneIsStable(allNodes.nodes())) return 1.0; - findBestMove(allNodes).execute(false, Agent.SwitchRebalancer, deployer, metric, nodeRepository()); + Move bestMove = findBestMove(allNodes); + if (!bestMove.isEmpty()) { + LOG.info("Trying " + bestMove + " (" + bestMove.fromHost().switchHostname().orElse("<none>") + + " -> " + bestMove.toHost().switchHostname().orElse("<none>") + ")"); + } + bestMove.execute(false, Agent.SwitchRebalancer, deployer, metric, nodeRepository()); return 1.0; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeResourcesSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeResourcesSerializer.java index 8c421443a65..1c3d3f5c489 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeResourcesSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeResourcesSerializer.java @@ -20,6 +20,7 @@ public class NodeResourcesSerializer { private static final String storageTypeKey = "storageType"; static void toSlime(NodeResources resources, Cursor resourcesObject) { + if (resources.isUnspecified()) return; resourcesObject.setDouble(vcpuKey, resources.vcpu()); resourcesObject.setDouble(memoryKey, resources.memoryGb()); resourcesObject.setDouble(diskKey, resources.diskGb()); @@ -29,6 +30,7 @@ public class NodeResourcesSerializer { } static NodeResources resourcesFromSlime(Inspector resources) { + if ( ! resources.field(vcpuKey).valid()) return NodeResources.unspecified(); return new NodeResources(resources.field(vcpuKey).asDouble(), resources.field(memoryKey).asDouble(), resources.field(diskKey).asDouble(), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index 0d32b21016c..8c358301b85 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -113,7 +113,8 @@ class Activator { var cluster = modified.cluster(clusterEntry.getKey()).get(); var previousResources = oldNodes.cluster(clusterEntry.getKey()).toResources(); var currentResources = clusterEntry.getValue().toResources(); - if ( ! previousResources.justNumbers().equals(currentResources.justNumbers())) { + if ( previousResources.nodeResources().isUnspecified() + || ! previousResources.justNumbers().equals(currentResources.justNumbers())) { cluster = cluster.with(ScalingEvent.create(previousResources, currentResources, generation, at)); } if (cluster.targetResources().isPresent() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java index 0c2c3c48df1..4088d717a67 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; @@ -29,10 +30,21 @@ public class CapacityPolicies { this.sharedHosts = type -> PermanentFlags.SHARED_HOST.bindTo(nodeRepository.flagSource()).value().isEnabled(type.name()); } - public int decideSize(int requested, boolean required, boolean canFail, boolean isTester, ClusterSpec cluster) { + public Capacity applyOn(Capacity capacity, ApplicationId application) { + return capacity.withLimits(applyOn(capacity.minResources(), capacity, application), + applyOn(capacity.maxResources(), capacity, application)); + } + + private ClusterResources applyOn(ClusterResources resources, Capacity capacity, ApplicationId application) { + int nodes = decideSize(resources.nodes(), capacity.isRequired(), application.instance().isTester()); + int groups = Math.min(resources.groups(), nodes); // cannot have more groups than nodes + var nodeResources = decideNodeResources(resources.nodeResources(), capacity.isRequired()); + return new ClusterResources(nodes, groups, nodeResources); + } + + private int decideSize(int requested, boolean required, boolean isTester) { if (isTester) return 1; - ensureRedundancy(requested, cluster, canFail); if (required) return requested; switch(zone.environment()) { case dev : case test : return 1; @@ -43,11 +55,9 @@ public class CapacityPolicies { } } - public NodeResources decideNodeResources(NodeResources target, boolean required, ClusterSpec cluster) { - if (target.isUnspecified()) - target = defaultNodeResources(cluster.type()); - + private NodeResources decideNodeResources(NodeResources target, boolean required) { if (required) return target; + if (target.isUnspecified()) return target; // Cannot be modified // Dev does not cap the cpu or network of containers since usage is spotty: Allocate just a small amount exclusively if (zone.environment() == Environment.dev && !zone.getCloud().dynamicProvisioning()) @@ -77,28 +87,11 @@ public class CapacityPolicies { } /** - * Whether or not the nodes requested can share physical host with other applications. + * Returns whether the nodes requested can share physical host with other applications. * A security feature which only makes sense for prod. */ public boolean decideExclusivity(Capacity capacity, boolean requestedExclusivity) { return requestedExclusivity && (capacity.isRequired() || zone.environment() == Environment.prod); } - /** - * Throw if the node count is 1 for container and content clusters and we're in a production zone - * - * @throws IllegalArgumentException if only one node is requested and we can fail - */ - private void ensureRedundancy(int nodeCount, ClusterSpec cluster, boolean canFail) { - if (canFail && - nodeCount == 1 && - requiresRedundancy(cluster.type()) && - zone.environment().isProduction()) - throw new IllegalArgumentException("Deployments to prod require at least 2 nodes per cluster for redundancy. Not fulfilled for " + cluster); - } - - private static boolean requiresRedundancy(ClusterSpec.Type clusterType) { - return clusterType.isContent() || clusterType.isContainer(); - } - } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java index ba46f0a9535..2d93763c631 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java @@ -71,7 +71,7 @@ public class GroupPreparer { // Try preparing in memory without global unallocated lock. Most of the time there should be no changes and we // can return nodes previously allocated. NodeAllocation probeAllocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, - indices::probeNext, wantedGroups, allNodesAndHosts); + indices::probeNext, wantedGroups, allNodesAndHosts); if (probeAllocation.fulfilledAndNoChanges()) { List<Node> acceptedNodes = probeAllocation.finalNodes(); surplusActiveNodes.removeAll(acceptedNodes); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index 7cc4acc20b0..6c22a26d88a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -99,12 +99,12 @@ class NodeAllocation { * Note that if unallocated nodes are offered before allocated nodes, this will unnecessarily * reject allocated nodes due to index duplicates. * - * @param nodesPrioritized the nodes which are potentially on offer. These may belong to a different application etc. + * @param candidates the nodes which are potentially on offer. These may belong to a different application etc. * @return the subset of offeredNodes which was accepted, with the correct allocation assigned */ - List<Node> offer(List<NodeCandidate> nodesPrioritized) { + List<Node> offer(List<NodeCandidate> candidates) { List<Node> accepted = new ArrayList<>(); - for (NodeCandidate candidate : nodesPrioritized) { + for (NodeCandidate candidate : candidates) { if (candidate.allocation().isPresent()) { Allocation allocation = candidate.allocation().get(); ClusterMembership membership = allocation.membership(); @@ -121,7 +121,7 @@ class NodeAllocation { if ((! saturated() && hasCompatibleFlavor(candidate) && requestedNodes.acceptable(candidate)) || acceptToRetire) { candidate = candidate.withNode(); if (candidate.isValid()) - accepted.add(acceptNode(candidate, shouldRetire(candidate, nodesPrioritized), resizeable)); + accepted.add(acceptNode(candidate, shouldRetire(candidate, candidates), resizeable)); } } else if (! saturated() && hasCompatibleFlavor(candidate)) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java index 4f0ae688b1c..62ac1f0d0e6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java @@ -238,7 +238,6 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat private double skewWith(NodeResources resources) { if (parent.isEmpty()) return 0; - NodeResources free = freeParentCapacity.justNumbers().subtract(resources.justNumbers()); return Node.skew(parent.get().flavor().resources(), free); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index b35b0a5e301..7d15a2b30b1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -84,8 +84,8 @@ public class NodeRepositoryProvisioner implements Provisioner { @Override public List<HostSpec> prepare(ApplicationId application, ClusterSpec cluster, Capacity requested, ProvisionLogger logger) { - log.log(Level.FINE, () -> "Received deploy prepare request for " + requested + - " for application " + application + ", cluster " + cluster); + log.log(Level.FINE, "Received deploy prepare request for " + requested + + " for application " + application + ", cluster " + cluster); if (cluster.group().isPresent()) throw new IllegalArgumentException("Node requests cannot specify a group"); @@ -96,21 +96,21 @@ public class NodeRepositoryProvisioner implements Provisioner { NodeResources resources; NodeSpec nodeSpec; if (requested.type() == NodeType.tenant) { - ClusterResources target = decideTargetResources(application, cluster, requested); - int nodeCount = capacityPolicies.decideSize(target.nodes(), - requested.isRequired(), - requested.canFail(), - application.instance().isTester(), - cluster); - groups = Math.min(target.groups(), nodeCount); // cannot have more groups than nodes - resources = capacityPolicies.decideNodeResources(target.nodeResources(), requested.isRequired(), cluster); - boolean exclusive = capacityPolicies.decideExclusivity(requested, cluster.isExclusive()); - nodeSpec = NodeSpec.from(nodeCount, resources, exclusive, requested.canFail()); - logIfDownscaled(target.nodes(), nodeCount, cluster, logger); + var actual = capacityPolicies.applyOn(requested, application); + ClusterResources target = decideTargetResources(application, cluster, actual); + boolean exclusive = capacityPolicies.decideExclusivity(actual, cluster.isExclusive()); + ensureRedundancy(target.nodes(), cluster, actual.canFail(), application); + logIfDownscaled(requested.minResources().nodes(), actual.minResources().nodes(), cluster, logger); + + groups = target.groups(); + resources = target.nodeResources().isUnspecified() ? capacityPolicies.defaultNodeResources(cluster.type()) + : target.nodeResources(); + nodeSpec = NodeSpec.from(target.nodes(), resources, exclusive, actual.canFail()); } else { groups = 1; // type request with multiple groups is not supported - resources = requested.minResources().nodeResources(); + resources = requested.minResources().nodeResources().isUnspecified() ? capacityPolicies.defaultNodeResources(cluster.type()) + : requested.minResources().nodeResources(); nodeSpec = NodeSpec.from(requested.type()); } return asSortedHosts(preparer.prepare(application, cluster, nodeSpec, groups), resources); @@ -164,12 +164,20 @@ public class NodeRepositoryProvisioner implements Provisioner { boolean firstDeployment = nodes.isEmpty(); AllocatableClusterResources currentResources = firstDeployment // start at min, preserve current resources otherwise - ? new AllocatableClusterResources(requested.minResources(), clusterSpec, nodeRepository) + ? new AllocatableClusterResources(initialResourcesFrom(requested, clusterSpec), clusterSpec, nodeRepository) : new AllocatableClusterResources(nodes.asList(), nodeRepository); var clusterModel = new ClusterModel(application, cluster, clusterSpec, nodes, nodeRepository.metricsDb(), nodeRepository.clock()); return within(Limits.of(requested), currentResources, firstDeployment, clusterModel); } + private ClusterResources initialResourcesFrom(Capacity requested, ClusterSpec clusterSpec) { + var initial = requested.minResources(); + if (initial.nodeResources().isUnspecified()) + initial = initial.with(capacityPolicies.defaultNodeResources(clusterSpec.type())); + return initial; + } + + /** Make the minimal adjustments needed to the current resources to stay within the limits */ private ClusterResources within(Limits limits, AllocatableClusterResources current, @@ -190,10 +198,28 @@ public class NodeRepositoryProvisioner implements Provisioner { .advertisedResources(); } - private void logIfDownscaled(int targetNodes, int actualNodes, ClusterSpec cluster, ProvisionLogger logger) { - if (zone.environment().isManuallyDeployed() && actualNodes < targetNodes) - logger.log(Level.INFO, "Requested " + targetNodes + " nodes for " + cluster + - ", downscaling to " + actualNodes + " nodes in " + zone.environment()); + /** + * Throw if the node count is 1 for container and content clusters and we're in a production zone + * + * @throws IllegalArgumentException if only one node is requested and we can fail + */ + private void ensureRedundancy(int nodeCount, ClusterSpec cluster, boolean canFail, ApplicationId application) { + if (! application.instance().isTester() && + canFail && + nodeCount == 1 && + requiresRedundancy(cluster.type()) && + zone.environment().isProduction()) + throw new IllegalArgumentException("Deployments to prod require at least 2 nodes per cluster for redundancy. Not fulfilled for " + cluster); + } + + private static boolean requiresRedundancy(ClusterSpec.Type clusterType) { + return clusterType.isContent() || clusterType.isContainer(); + } + + private void logIfDownscaled(int requestedMinNodes, int actualMinNodes, ClusterSpec cluster, ProvisionLogger logger) { + if (zone.environment().isManuallyDeployed() && actualMinNodes < requestedMinNodes) + logger.log(Level.INFO, "Requested " + requestedMinNodes + " nodes for " + cluster + + ", downscaling to " + actualMinNodes + " nodes in " + zone.environment()); } private List<HostSpec> asSortedHosts(List<Node> nodes, NodeResources requestedResources) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index 282b0d96cf4..b12368b2834 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -10,7 +10,6 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.NodesAndHosts; -import com.yahoo.vespa.hosted.provision.node.Nodes; import java.util.ArrayList; import java.util.List; @@ -25,13 +24,11 @@ import java.util.stream.Collectors; */ class Preparer { - private final NodeRepository nodeRepository; private final GroupPreparer groupPreparer; private final Optional<LoadBalancerProvisioner> loadBalancerProvisioner; public Preparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, Optional<LoadBalancerProvisioner> loadBalancerProvisioner) { - this.nodeRepository = nodeRepository; this.loadBalancerProvisioner = loadBalancerProvisioner; this.groupPreparer = new GroupPreparer(nodeRepository, hostProvisioner); } @@ -69,9 +66,10 @@ class Preparer { for (int groupIndex = 0; groupIndex < wantedGroups; groupIndex++) { ClusterSpec clusterGroup = cluster.with(Optional.of(ClusterSpec.Group.from(groupIndex))); - GroupPreparer.PrepareResult result = groupPreparer.prepare( - application, clusterGroup, requestedNodes.fraction(wantedGroups), - surplusNodes, indices, wantedGroups, allNodesAndHosts); + GroupPreparer.PrepareResult result = groupPreparer.prepare(application, clusterGroup, + requestedNodes.fraction(wantedGroups), + surplusNodes, indices, wantedGroups, + allNodesAndHosts); allNodesAndHosts = result.allNodesAndHosts; // Might have changed List<Node> accepted = result.prepared; if (requestedNodes.rejectNonActiveParent()) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 230278878b4..a04a3828f13 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -15,6 +15,7 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; +import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies; @@ -51,10 +52,10 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 5, 1, hostResources); tester.clock().advance(Duration.ofDays(1)); - assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1, capacity).isEmpty()); tester.addCpuMeasurements(0.25f, 1f, 59, application1); - assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1, capacity).isEmpty()); tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.25f, 1f, 120, application1); @@ -62,10 +63,10 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.2, 28.6, 28.6, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); tester.deploy(application1, cluster1, scaledResources); - assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1, capacity).isEmpty()); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -74,19 +75,19 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only assertTrue("Load change is large, but insufficient measurements for new config -> No change", - tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + tester.autoscale(application1, cluster1, capacity).isEmpty()); tester.addCpuMeasurements(0.19f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only - assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), capacity).target()); + assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1, capacity).target()); tester.addCpuMeasurements(0.1f, 1f, 120, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down to minimum since usage has gone down significantly", 7, 1, 1.0, 66.7, 66.7, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); var events = tester.nodeRepository().applications().get(application1).get().cluster(cluster1.id()).get().scalingEvents(); } @@ -109,8 +110,8 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high", - 7, 1, 2.5, 80.0, 80.0, - tester.autoscale(application1, cluster1.id(), capacity)); + 7, 1, 2.5, 80.0, 50.5, + tester.autoscale(application1, cluster1, capacity)); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -119,8 +120,8 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down since cpu usage has gone down", - 4, 1, 2.5, 68.6, 68.6, - tester.autoscale(application1, cluster1.id(), capacity)); + 4, 1, 2.5, 68.6, 27.4, + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -147,7 +148,7 @@ public class AutoscalingTest { var capacity = Capacity.from(min, max); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 14, 1, 1.4, 30.8, 30.8, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); assertEquals("Disk speed from min/max is used", NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed()); tester.deploy(application1, cluster1, scaledResources); @@ -180,7 +181,7 @@ public class AutoscalingTest { // Autoscaling: Uses disk-speed any as well tester.clock().advance(Duration.ofDays(2)); tester.addCpuMeasurements(0.8f, 1f, 120, application1); - Autoscaler.Advice advice = tester.autoscale(application1, cluster1.id(), capacity); + Autoscaler.Advice advice = tester.autoscale(application1, cluster1, capacity); assertEquals(NodeResources.DiskSpeed.any, advice.target().get().nodeResources().diskSpeed()); @@ -204,8 +205,8 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up to limit since resource usage is too high", - 6, 1, 2.4, 78.0, 79.0, - tester.autoscale(application1, cluster1.id(), capacity)); + 6, 1, 2.4, 78.0, 70.0, + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -224,7 +225,7 @@ public class AutoscalingTest { tester.addMeasurements(0.05f, 0.05f, 0.05f, 0, 120, application1); tester.assertResources("Scaling down to limit since resource usage is low", 4, 1, 1.8, 7.7, 10.0, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -252,7 +253,7 @@ public class AutoscalingTest { tester.assertResources("Scaling up to limit since resource usage is too high", 4, 1, defaultResources.vcpu(), defaultResources.memoryGb(), defaultResources.diskGb(), - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -273,7 +274,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up since resource usage is too high", 6, 6, 3.6, 8.0, 10.0, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -291,7 +292,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 5, 1, resources); tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.25f, 1f, 120, application1); - assertTrue(tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + assertTrue(tester.autoscale(application1, cluster1, capacity).isEmpty()); } @Test @@ -342,7 +343,7 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up since resource usage is too high", - 7, 1, 2.5, 80.0, 80.0, + 7, 1, 2.5, 80.0, 50.5, tester.suggest(application1, cluster1.id(), min, max)); } @@ -361,7 +362,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 2, 1, resources); tester.addMeasurements(0.5f, 0.6f, 0.7f, 1, false, true, 120, application1); assertTrue("Not scaling up since nodes were measured while cluster was unstable", - tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + tester.autoscale(application1, cluster1, capacity).isEmpty()); } @Test @@ -379,7 +380,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 2, 1, resources); tester.addMeasurements(0.5f, 0.6f, 0.7f, 1, true, false, 120, application1); assertTrue("Not scaling up since nodes were measured while cluster was unstable", - tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); + tester.autoscale(application1, cluster1, capacity).isEmpty()); } @Test @@ -399,8 +400,8 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up since resource usage is too high", - 7, 7, 2.5, 80.0, 80.0, - tester.autoscale(application1, cluster1.id(), capacity)); + 7, 7, 2.5, 80.0, 50.5, + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -422,8 +423,8 @@ public class AutoscalingTest { t -> t == 0 ? 20.0 : 10.0, t -> 1.0); tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper", - 8, 1, 2.6, 83.3, 83.3, - tester.autoscale(application1, cluster1.id(), capacity)); + 8, 1, 2.6, 83.3, 52.6, + tester.autoscale(application1, cluster1, capacity)); } /** Same as above but mostly write traffic, which favors smaller groups */ @@ -446,8 +447,8 @@ public class AutoscalingTest { t -> t == 0 ? 20.0 : 10.0, t -> 100.0); tester.assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper", - 4, 1, 2.1, 83.3, 83.3, - tester.autoscale(application1, cluster1.id(), capacity)); + 4, 1, 2.1, 83.3, 52.6, + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -469,7 +470,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Increase group size to reduce memory load", 8, 2, 12.4, 96.2, 62.5, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -490,7 +491,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", 6, 1, 2.9, 4.0, 95.0, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -510,7 +511,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only - assertTrue(tester.autoscale(application1, cluster1.id(), capacity).target().isEmpty()); + assertTrue(tester.autoscale(application1, cluster1, capacity).target().isEmpty()); // Trying the same later causes autoscaling tester.clock().advance(Duration.ofDays(2)); @@ -519,7 +520,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down", 6, 1, 1.4, 4.0, 95.0, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -530,7 +531,8 @@ public class AutoscalingTest { var capacity = Capacity.from(min, max); { // No memory tax - AutoscalingTester tester = new AutoscalingTester(Environment.prod, hostResources, + AutoscalingTester tester = new AutoscalingTester(new Zone(Environment.prod, RegionName.from("us-east")), + hostResources, new OnlySubtractingWhenForecastingCalculator(0)); ApplicationId application1 = tester.applicationId("app1"); @@ -542,11 +544,12 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up", 4, 1, 6.7, 20.5, 200, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } { // 15 Gb memory tax - AutoscalingTester tester = new AutoscalingTester(Environment.prod, hostResources, + AutoscalingTester tester = new AutoscalingTester(new Zone(Environment.prod, RegionName.from("us-east")), + hostResources, new OnlySubtractingWhenForecastingCalculator(15)); ApplicationId application1 = tester.applicationId("app1"); @@ -558,7 +561,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up", 4, 1, 6.7, 35.5, 200, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } } @@ -589,7 +592,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.9f, 0.6f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", 8, 1, 3, 83, 34.3, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -600,7 +603,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down since resource usage has gone down", 5, 1, 3, 83, 36.0, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -622,17 +625,17 @@ public class AutoscalingTest { // (no read share stored) tester.assertResources("Advice to scale up since we set aside for bcp by default", 7, 1, 3, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); tester.storeReadShare(0.25, 0.5, application1); tester.assertResources("Half of global share is the same as the default assumption used above", 7, 1, 3, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); tester.storeReadShare(0.5, 0.5, application1); tester.assertResources("Advice to scale down since we don't need room for bcp", 4, 1, 3, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -656,7 +659,7 @@ public class AutoscalingTest { // (no query rate data) tester.assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data", 5, 1, 6.3, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(5)); timeAdded = tester.addQueryRateMeasurements(application1, cluster1.id(), @@ -666,7 +669,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 200, application1); tester.assertResources("Scale down since observed growth is slower than scaling time", 5, 1, 3.4, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); tester.clearQueryRateMeasurements(application1, cluster1.id()); @@ -678,7 +681,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 200, application1); tester.assertResources("Scale up since observed growth is faster than scaling time", 5, 1, 10.0, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } @Test @@ -705,63 +708,63 @@ public class AutoscalingTest { tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0); tester.assertResources("Query and write load is equal -> scale up somewhat", 5, 1, 7.3, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0); tester.assertResources("Query load is 4x write load -> scale up more", 5, 1, 9.5, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); tester.addCpuMeasurements(0.3f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0); tester.assertResources("Write load is 10x query load -> scale down", 5, 1, 2.9, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0); tester.assertResources("Query only -> largest possible", 5, 1, 10.0, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> 0.0, t -> 10.0); tester.assertResources("Write only -> smallest possible", 5, 1, 2.1, 100, 100, - tester.autoscale(application1, cluster1.id(), capacity)); + tester.autoscale(application1, cluster1, capacity)); } @Test - public void test_cd_autoscaling_test() { + public void test_autoscaling_in_dev() { NodeResources resources = new NodeResources(1, 4, 50, 1); - ClusterResources min = new ClusterResources( 2, 1, resources); + ClusterResources min = new ClusterResources( 1, 1, resources); ClusterResources max = new ClusterResources(3, 1, resources); - var capacity = Capacity.from(min, max); - AutoscalingTester tester = new AutoscalingTester(resources.withVcpu(resources.vcpu() * 2)); + Capacity capacity = Capacity.from(min, max, false, true); + + AutoscalingTester tester = new AutoscalingTester(Environment.dev, resources.withVcpu(resources.vcpu() * 2)); ApplicationId application1 = tester.applicationId("application1"); ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); - tester.deploy(application1, cluster1, 2, 1, resources); + tester.deploy(application1, cluster1, capacity); tester.addQueryRateMeasurements(application1, cluster1.id(), - 500, t -> 0.0); - tester.addCpuMeasurements(0.5f, 1f, 10, application1); - - tester.assertResources("Advice to scale up since observed growth is much faster than scaling time", - 3, 1, 1, 4, 50, - tester.autoscale(application1, cluster1.id(), capacity)); + 500, t -> 100.0); + tester.addCpuMeasurements(1.0f, 1f, 10, application1); + assertTrue("Not attempting to scale up because policies dictate we'll only get one node", + tester.autoscale(application1, cluster1, capacity).target().isEmpty()); } + /** Same setup as test_autoscaling_in_dev(), just with required = true */ @Test - public void test_autoscaling_in_dev() { + public void test_autoscaling_in_dev_with_required_resources() { NodeResources resources = new NodeResources(1, 4, 50, 1); ClusterResources min = new ClusterResources( 1, 1, resources); ClusterResources max = new ClusterResources(3, 1, resources); - Capacity capacity = Capacity.from(min, max, false, true); + Capacity capacity = Capacity.from(min, max, true, true); AutoscalingTester tester = new AutoscalingTester(Environment.dev, resources.withVcpu(resources.vcpu() * 2)); ApplicationId application1 = tester.applicationId("application1"); @@ -771,19 +774,20 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 500, t -> 100.0); tester.addCpuMeasurements(1.0f, 1f, 10, application1); - assertTrue("Not attempting to scale up because policies dictate we'll only get one node", - tester.autoscale(application1, cluster1.id(), capacity).target().isEmpty()); + tester.assertResources("We scale up even in dev because resources are required", + 3, 1, 1.0, 4, 50, + tester.autoscale(application1, cluster1, capacity)); } - /** Same setup as test_autoscaling_in_dev(), just with required = true */ @Test - public void test_autoscaling_in_dev_with_required_resources() { - NodeResources resources = new NodeResources(1, 4, 50, 1); + public void test_autoscaling_in_dev_with_required_unspecified_resources() { + NodeResources resources = NodeResources.unspecified(); ClusterResources min = new ClusterResources( 1, 1, resources); ClusterResources max = new ClusterResources(3, 1, resources); Capacity capacity = Capacity.from(min, max, true, true); - AutoscalingTester tester = new AutoscalingTester(Environment.dev, resources.withVcpu(resources.vcpu() * 2)); + AutoscalingTester tester = new AutoscalingTester(Environment.dev, + new NodeResources(10, 16, 100, 2)); ApplicationId application1 = tester.applicationId("application1"); ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); @@ -792,8 +796,8 @@ public class AutoscalingTest { 500, t -> 100.0); tester.addCpuMeasurements(1.0f, 1f, 10, application1); tester.assertResources("We scale up even in dev because resources are required", - 3, 1, 1.0, 4, 50, - tester.autoscale(application1, cluster1.id(), capacity)); + 3, 1, 1.5, 8, 50, + tester.autoscale(application1, cluster1, capacity)); } /** diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 998a1e86c3e..8586704a426 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -24,6 +24,7 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; +import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; @@ -46,6 +47,7 @@ class AutoscalingTester { private final ProvisioningTester provisioningTester; private final Autoscaler autoscaler; private final MockHostResourcesCalculator hostResourcesCalculator; + private final CapacityPolicies capacityPolicies; /** Creates an autoscaling tester with a single host type ready */ public AutoscalingTester(NodeResources hostResources) { @@ -53,11 +55,15 @@ class AutoscalingTester { } public AutoscalingTester(Environment environment, NodeResources hostResources) { - this(environment, hostResources, null); + this(new Zone(environment, RegionName.from("us-east")), hostResources, null); } - public AutoscalingTester(Environment environment, NodeResources hostResources, HostResourcesCalculator resourcesCalculator) { - this(new Zone(environment, RegionName.from("us-east")), List.of(new Flavor("hostFlavor", hostResources)), resourcesCalculator); + public AutoscalingTester(Zone zone, NodeResources hostResources) { + this(zone, hostResources, null); + } + + public AutoscalingTester(Zone zone, NodeResources hostResources, HostResourcesCalculator resourcesCalculator) { + this(zone, List.of(new Flavor("hostFlavor", hostResources)), resourcesCalculator); provisioningTester.makeReadyNodes(20, "hostFlavor", NodeType.host, 8); provisioningTester.activateTenantHosts(); } @@ -76,6 +82,7 @@ class AutoscalingTester { hostResourcesCalculator = new MockHostResourcesCalculator(zone); autoscaler = new Autoscaler(nodeRepository()); + capacityPolicies = new CapacityPolicies(provisioningTester.nodeRepository()); } public ProvisioningTester provisioning() { return provisioningTester; } @@ -143,7 +150,7 @@ class AutoscalingTester { for (Node node : nodes) { Load load = new Load(value, ClusterModel.idealMemoryLoad * otherResourcesLoad, - ClusterModel.idealDiskLoad * otherResourcesLoad).multiply(oneExtraNodeFactor); + ClusterModel.idealContentDiskLoad * otherResourcesLoad).multiply(oneExtraNodeFactor); nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(clock().instant(), load, @@ -175,7 +182,7 @@ class AutoscalingTester { clock().advance(Duration.ofSeconds(150)); for (Node node : nodes) { Load load = new Load(ClusterModel.idealQueryCpuLoad * otherResourcesLoad, - ClusterModel.idealDiskLoad * otherResourcesLoad, + ClusterModel.idealContentDiskLoad * otherResourcesLoad, value).multiply(oneExtraNodeFactor); nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(clock().instant(), @@ -208,10 +215,10 @@ class AutoscalingTester { for (Node node : nodes) { float cpu = (float) 0.2 * otherResourcesLoad * oneExtraNodeFactor; float memory = value * oneExtraNodeFactor; - float disk = (float) ClusterModel.idealDiskLoad * otherResourcesLoad * oneExtraNodeFactor; + float disk = (float) ClusterModel.idealContentDiskLoad * otherResourcesLoad * oneExtraNodeFactor; Load load = new Load(0.2 * otherResourcesLoad, value, - ClusterModel.idealDiskLoad * otherResourcesLoad).multiply(oneExtraNodeFactor); + ClusterModel.idealContentDiskLoad * otherResourcesLoad).multiply(oneExtraNodeFactor); nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), new NodeMetricSnapshot(clock().instant(), load, @@ -306,13 +313,14 @@ class AutoscalingTester { ((MemoryMetricsDb)nodeMetricsDb()).clearClusterMetrics(application, cluster); } - public Autoscaler.Advice autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId, Capacity capacity) { + public Autoscaler.Advice autoscale(ApplicationId applicationId, ClusterSpec cluster, Capacity capacity) { + capacity = capacityPolicies.applyOn(capacity, applicationId); Application application = nodeRepository().applications().get(applicationId).orElse(Application.empty(applicationId)) - .withCluster(clusterId, false, capacity); + .withCluster(cluster.id(), false, capacity); try (Mutex lock = nodeRepository().nodes().lock(applicationId)) { nodeRepository().applications().put(application, lock); } - return autoscaler.autoscale(application, application.clusters().get(clusterId), + return autoscaler.autoscale(application, application.clusters().get(cluster.id()), nodeRepository().nodes().list(Node.State.active).owner(applicationId)); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java index bd7300ad6bf..516a7a92d04 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java @@ -30,19 +30,20 @@ public class ClusterModelTest { public void test_traffic_headroom() { ManualClock clock = new ManualClock(); Application application = Application.empty(ApplicationId.from("t1", "a1", "i1")); + ClusterSpec clusterSpec = clusterSpec(); Cluster cluster = cluster(new NodeResources(1, 10, 100, 1)); application = application.with(cluster); // No current traffic share: Ideal load is low but capped var model1 = new ClusterModel(application.with(new Status(0.0, 1.0)), - cluster, clock, Duration.ofMinutes(10), + clusterSpec, cluster, clock, Duration.ofMinutes(10), timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0, clock), ClusterNodesTimeseries.empty()); assertEquals(0.131, model1.idealLoad().cpu(), delta); // Almost no current traffic share: Ideal load is low but capped var model2 = new ClusterModel(application.with(new Status(0.0001, 1.0)), - cluster, clock, Duration.ofMinutes(10), + clusterSpec, cluster, clock, Duration.ofMinutes(10), timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0, clock), ClusterNodesTimeseries.empty()); assertEquals(0.131, model2.idealLoad().cpu(), delta); @@ -53,24 +54,32 @@ public class ClusterModelTest { ManualClock clock = new ManualClock(); Application application = Application.empty(ApplicationId.from("t1", "a1", "i1")); + ClusterSpec clusterSpec = clusterSpec(); Cluster cluster = cluster(new NodeResources(1, 10, 100, 1)); application = application.with(cluster); // No current traffic: Ideal load is low but capped var model1 = new ClusterModel(application, - cluster, clock, Duration.ofMinutes(10), + clusterSpec, cluster, clock, Duration.ofMinutes(10), timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0, clock), ClusterNodesTimeseries.empty()); assertEquals(0.275, model1.idealLoad().cpu(), delta); // Almost no current traffic: Ideal load is low but capped var model2 = new ClusterModel(application.with(new Status(0.0001, 1.0)), - cluster, clock, Duration.ofMinutes(10), + clusterSpec, cluster, clock, Duration.ofMinutes(10), timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0001, t -> 0.0, clock), ClusterNodesTimeseries.empty()); assertEquals(0.040, model2.idealLoad().cpu(), delta); } + private ClusterSpec clusterSpec() { + return ClusterSpec.specification(ClusterSpec.Type.content, ClusterSpec.Id.from("test")) + .group(ClusterSpec.Group.from(0)) + .vespaVersion("7.1.1") + .build(); + } + private Cluster cluster(NodeResources resources) { return Cluster.create(ClusterSpec.Id.from("test"), false, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index 6a5b45db8ff..d9037181f59 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -5,8 +5,13 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.RegionName; +import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.Zone; import com.yahoo.test.ManualClock; +import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; @@ -186,7 +191,6 @@ public class AutoscalingMaintainerTest { var tester = new AutoscalingMaintainerTester(new MockDeployer.ApplicationContext(app1, cluster1, app1Capacity)); ManualClock clock = tester.clock(); - // deploy tester.deploy(app1, cluster1, app1Capacity); autoscale(false, Duration.ofMinutes( 1), Duration.ofMinutes( 5), clock, app1, cluster1, tester); @@ -222,6 +226,49 @@ public class AutoscalingMaintainerTest { tester.cluster(app1, cluster1).lastScalingEvent().get().generation()); } + @Test + public void test_cd_autoscaling_test() { + ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1"); + ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec(); + NodeResources resources = new NodeResources(1, 4, 50, 1); + ClusterResources min = new ClusterResources( 2, 1, resources); + ClusterResources max = new ClusterResources(3, 1, resources); + var capacity = Capacity.from(min, max); + var tester = new AutoscalingMaintainerTester(new Zone(SystemName.cd, Environment.prod, RegionName.from("us-east3")), + new MockDeployer.ApplicationContext(app1, cluster1, capacity)); + ManualClock clock = tester.clock(); + + tester.deploy(app1, cluster1, capacity); + assertEquals(2, + tester.nodeRepository().nodes().list(Node.State.active) + .owner(app1) + .cluster(cluster1.id()) + .size()); + + autoscale(false, Duration.ofMinutes( 1), Duration.ofMinutes( 5), clock, app1, cluster1, tester); + assertEquals(3, + tester.nodeRepository().nodes().list(Node.State.active) + .owner(app1) + .cluster(cluster1.id()) + .size()); + } + + @Test + public void test_cd_test_not_specifying_node_resources() { + ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1"); + ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec(); + ClusterResources resources = new ClusterResources( 2, 1, NodeResources.unspecified()); + var capacity = Capacity.from(resources); + var tester = new AutoscalingMaintainerTester(new Zone(SystemName.cd, Environment.prod, RegionName.from("us-east3")), + new MockDeployer.ApplicationContext(app1, cluster1, capacity)); + tester.deploy(app1, cluster1, capacity); // Deploy should succeed and allocate the nodes + assertEquals(2, + tester.nodeRepository().nodes().list(Node.State.active) + .owner(app1) + .cluster(cluster1.id()) + .size()); + } + private void autoscale(boolean down, Duration completionTime, Duration expectedWindow, ManualClock clock, ApplicationId application, ClusterSpec cluster, AutoscalingMaintainerTester tester) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java index a47fb983d21..e1a1a2af5fb 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java @@ -42,9 +42,11 @@ public class AutoscalingMaintainerTester { private final MockDeployer deployer; public AutoscalingMaintainerTester(MockDeployer.ApplicationContext ... appContexts) { - provisioningTester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east3"))) - .flavorsConfig(flavorsConfig()) - .build(); + this(new Zone(Environment.prod, RegionName.from("us-east3")), appContexts); + } + + public AutoscalingMaintainerTester(Zone zone, MockDeployer.ApplicationContext ... appContexts) { + provisioningTester = new ProvisioningTester.Builder().zone(zone).flavorsConfig(flavorsConfig()).build(); provisioningTester.clock().setInstant(Instant.ofEpochMilli(0)); Map<ApplicationId, MockDeployer.ApplicationContext> apps = Arrays.stream(appContexts) .collect(Collectors.toMap(c -> c.id(), c -> c)); @@ -105,7 +107,7 @@ public class AutoscalingMaintainerTester { private FlavorsConfig flavorsConfig() { FlavorConfigBuilder b = new FlavorConfigBuilder(); - b.addFlavor("flt", 30, 30, 40, 3, Flavor.Type.BARE_METAL); + b.addFlavor("flt", 30, 30, 50, 3, Flavor.Type.BARE_METAL); b.addFlavor("cpu", 40, 20, 40, 3, Flavor.Type.BARE_METAL); b.addFlavor("mem", 20, 40, 40, 3, Flavor.Type.BARE_METAL); return b.build(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index 316655e11fb..7ce26354739 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -458,7 +458,7 @@ public class DynamicProvisioningMaintainerTest { // Provision config servers for (int i = 0; i < provisionedHosts.size(); i++) { - tester.makeReadyChildren(1, i + 1, NodeResources.unspecified(), hostType.childNodeType(), + tester.makeReadyChildren(1, i + 1, new NodeResources(1.5, 8, 50, 0.3), hostType.childNodeType(), provisionedHosts.get(i).hostname(), (nodeIndex) -> "cfg" + nodeIndex); } tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index 4c0395a0c7e..b51f4403756 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -70,7 +70,7 @@ public class ScalingSuggestionsMaintainerTest { new TestMetric()); maintainer.maintain(); - assertEquals("11 nodes with [vcpu: 6.5, memory: 5.5 Gb, disk 15.0 Gb, bandwidth: 0.1 Gbps]", + assertEquals("12 nodes with [vcpu: 6.0, memory: 5.5 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps]", suggestionOf(app1, cluster1, tester).get().resources().toString()); assertEquals("8 nodes with [vcpu: 11.0, memory: 4.4 Gb, disk 11.8 Gb, bandwidth: 0.1 Gbps]", suggestionOf(app2, cluster2, tester).get().resources().toString()); @@ -80,7 +80,7 @@ public class ScalingSuggestionsMaintainerTest { addMeasurements(0.10f, 0.10f, 0.10f, 0, 500, app1, tester.nodeRepository()); maintainer.maintain(); assertEquals("Suggestion stays at the peak value observed", - "11 nodes with [vcpu: 6.5, memory: 5.5 Gb, disk 15.0 Gb, bandwidth: 0.1 Gbps]", + "12 nodes with [vcpu: 6.0, memory: 5.5 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps]", suggestionOf(app1, cluster1, tester).get().resources().toString()); // Utilization is still way down and a week has passed tester.clock().advance(Duration.ofDays(7)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index db165aae919..95f25612dd7 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -523,7 +523,7 @@ public class ProvisioningTest { ApplicationId application = ProvisioningTester.applicationId(); tester.makeReadyHosts(10, defaultResources).activateTenantHosts(); - prepare(application, 1, 2, 3, 3, defaultResources, tester); + prepare(application, 1, 1, 1, 1, defaultResources, tester); } @Test @@ -1015,10 +1015,10 @@ public class ProvisioningTest { allHosts.addAll(content1); Function<Integer, Capacity> capacity = count -> Capacity.from(new ClusterResources(count, 1, NodeResources.unspecified()), required, true); - int expectedContainer0Size = tester.decideSize(container0Size, capacity.apply(container0Size), containerCluster0, application); - int expectedContainer1Size = tester.decideSize(container1Size, capacity.apply(container1Size), containerCluster1, application); - int expectedContent0Size = tester.decideSize(content0Size, capacity.apply(content0Size), contentCluster0, application); - int expectedContent1Size = tester.decideSize(content1Size, capacity.apply(content1Size), contentCluster1, application); + int expectedContainer0Size = tester.decideSize(capacity.apply(container0Size), application); + int expectedContainer1Size = tester.decideSize(capacity.apply(container1Size), application); + int expectedContent0Size = tester.decideSize(capacity.apply(content0Size), application); + int expectedContent1Size = tester.decideSize(capacity.apply(content1Size), application); assertEquals("Hosts in each group cluster is disjunct and the total number of unretired nodes is correct", expectedContainer0Size + expectedContainer1Size + expectedContent0Size + expectedContent1Size, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index 6ca93671087..c478840780f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -152,8 +152,8 @@ public class ProvisioningTester { public NodeList getNodes(ApplicationId id, Node.State ... inState) { return nodeRepository.nodes().list(inState).owner(id); } public InMemoryFlagSource flagSource() { return (InMemoryFlagSource) nodeRepository.flagSource(); } - public int decideSize(int size, Capacity capacity, ClusterSpec cluster, ApplicationId application) { - return capacityPolicies.decideSize(size, capacity.isRequired(), capacity.canFail(), application.instance().isTester(), cluster); + public int decideSize(Capacity capacity, ApplicationId application) { + return capacityPolicies.applyOn(capacity, application).minResources().nodes(); } public Node patchNode(Node node, UnaryOperator<Node> patcher) { @@ -493,6 +493,7 @@ public class ProvisioningTester { public List<Node> makeReadyNodes(int n, Flavor flavor, Optional<TenantName> reservedTo, NodeType type, int ipAddressPoolSize, boolean dualStack) { List<Node> nodes = makeProvisionedNodes(n, flavor, reservedTo, type, ipAddressPoolSize, dualStack); nodes = nodeRepository.nodes().deallocate(nodes, Agent.system, getClass().getSimpleName()); + nodes.forEach(node -> { if (node.resources().isUnspecified()) throw new IllegalArgumentException(); }); return nodeRepository.nodes().setReady(nodes, Agent.system, getClass().getSimpleName()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json index 689b6f3816b..fcdcdf1a8ca 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json @@ -72,7 +72,7 @@ "idealMemory": 0.65, "currentMemory": 0.0, "disk" : 0.0, - "idealDisk": 0.6, + "idealDisk": 0.95, "currentDisk": 0.0 }, "scalingEvents" : [ |