diff options
16 files changed, 471 insertions, 282 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java index 11ae0845fb0..9aaf0d365cd 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java @@ -37,6 +37,8 @@ public class ClusterResources { public boolean smallerThan(ClusterResources other) { if (this.nodes < other.nodes) return true; if (this.groups < other.groups) return true; + if (this.nodeResources.isUnspecified() || other.nodeResources.isUnspecified()) return false; + if ( ! this.nodeResources.justNumbers().satisfies(other.nodeResources.justNumbers())) return true; return false; } diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeResources.java b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeResources.java index 05b604b263f..bed36d0d5d2 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeResources.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeResources.java @@ -241,6 +241,8 @@ public class NodeResources { return true; } + public boolean isUnspecified() { return this == unspecified; } + /** * Create this from serial form. * diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index 15a5545bc2c..847ec1290f6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -72,19 +72,6 @@ public class Cluster { return new Cluster(id, min, max, suggested, target); } - public NodeResources capAtLimits(NodeResources resources) { - resources = resources.withVcpu(between(min.nodeResources().vcpu(), max.nodeResources().vcpu(), resources.vcpu())); - resources = resources.withMemoryGb(between(min.nodeResources().memoryGb(), max.nodeResources().memoryGb(), resources.memoryGb())); - resources = resources.withDiskGb(between(min.nodeResources().diskGb(), max.nodeResources().diskGb(), resources.diskGb())); - return resources; - } - - private double between(double min, double max, double value) { - value = Math.max(min, value); - value = Math.min(max, value); - return value; - } - @Override public int hashCode() { return id.hashCode(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index 3d2af221d5b..6249020a8a5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -133,11 +133,9 @@ public class AllocatableClusterResources { */ public static Optional<AllocatableClusterResources> from(ClusterResources resources, ClusterSpec.Type clusterType, - Optional<Cluster> limits, + Limits limits, NodeRepository nodeRepository) { - NodeResources nodeResources = resources.nodeResources(); - if (limits.isPresent()) - nodeResources = limits.get().capAtLimits(nodeResources); + NodeResources nodeResources = limits.cap(resources.nodeResources()); nodeResources = new NodeResourceLimits(nodeRepository.zone()).enlargeToLegal(nodeResources, clusterType); if (allowsHostSharing(nodeRepository.zone().cloud())) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationBasedResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationBasedResourceTarget.java new file mode 100644 index 00000000000..28704cb93fc --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationBasedResourceTarget.java @@ -0,0 +1,45 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ClusterResources; + +/** + * This is used when the target of an allocation search is to come as close as possible to the current allocation + * + * @author bratseth + */ +public class AllocationBasedResourceTarget extends ResourceTarget { + + private final AllocatableClusterResources resources; + + public AllocationBasedResourceTarget(AllocatableClusterResources resources) { + super(resources.nodes(), resources.groups()); + this.resources = resources; + } + + @Override + public double clusterCpu() { + return resources.toAdvertisedClusterResources().nodeResources().vcpu() * resources.nodes(); + } + + @Override + public double groupMemory() { + return resources.toAdvertisedClusterResources().nodeResources().memoryGb() * sourceGroupSize(); + } + + @Override + public double groupDisk() { + return resources.toAdvertisedClusterResources().nodeResources().diskGb() * sourceGroupSize(); + } + + @Override + public double nodeMemory() { + return resources.toAdvertisedClusterResources().nodeResources().memoryGb(); + } + + @Override + public double nodeDisk() { + return resources.toAdvertisedClusterResources().nodeResources().diskGb(); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java new file mode 100644 index 00000000000..fb603960fb3 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -0,0 +1,170 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ClusterResources; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.NodeRepository; + +import java.util.Optional; + +/** + * A searcher of the space of possible allocation + * + * @author bratseth + */ +public class AllocationOptimizer { + + private final NodeRepository nodeRepository; + + public AllocationOptimizer(NodeRepository nodeRepository) { + this.nodeRepository = nodeRepository; + } + + /** + * An AllocationSearcher searches the space of possible allocations given a target + * and (optionally) cluster limits and returns the best alternative. + * + * @return the best allocation, if there are any possible legal allocations, fulfilling the target + * fully or partially, within the limits + */ + public Optional<AllocatableClusterResources> findBestAllocation(ResourceTarget target, + AllocatableClusterResources current, + Limits limits) { + Optional<AllocatableClusterResources> bestAllocation = Optional.empty(); + for (ResourceIterator i = new ResourceIterator(target, current, limits); i.hasNext(); ) { + var allocatableResources = AllocatableClusterResources.from(i.next(), current.clusterType(), limits, nodeRepository); + if (allocatableResources.isEmpty()) continue; + if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) + bestAllocation = allocatableResources; + } + return bestAllocation; + } + + /** + * Provides iteration over possible cluster resource allocations given a target total load + * and current groups/nodes allocation. + */ + private static class ResourceIterator { + + // The min and max nodes to consider when not using application supplied limits + private static final int minimumNodes = 3; // Since this number includes redundancy it cannot be lower than 2 + private static final int maximumNodes = 150; + + // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component + // proportional to document count. We must account for this when comparing configurations with more or fewer nodes. + // TODO: Measure this, and only take it into account with queries + private static final double fixedCpuCostFraction = 0.1; + + // Given state + private final Limits limits; + private final AllocatableClusterResources current; + private final ResourceTarget target; + + // Derived from the observed state + private final int nodeIncrement; + private final boolean singleGroupMode; + + // Iterator state + private int currentNodes; + + public ResourceIterator(ResourceTarget target, AllocatableClusterResources current, Limits limits) { + this.target = target; + this.current = current; + this.limits = limits; + + // What number of nodes is it effective to add or remove at the time from this cluster? + // This is the group size, since we (for now) assume the group size is decided by someone wiser than us + // and we decide the number of groups. + // The exception is when we only have one group, where we can add and remove single nodes in it. + singleGroupMode = target.sourceGroups() == 1; + nodeIncrement = singleGroupMode ? 1 : target.sourceGroupSize(); + + // Step to the right starting point + currentNodes = target.sourceNodes(); + if (currentNodes < minNodes()) { // step up + while (currentNodes < minNodes() + && (singleGroupMode || currentNodes + nodeIncrement > target.sourceGroupSize())) // group level redundancy + currentNodes += nodeIncrement; + } + else { // step down + while (currentNodes - nodeIncrement >= minNodes() + && (singleGroupMode || currentNodes - nodeIncrement > target.sourceGroupSize())) // group level redundancy + currentNodes -= nodeIncrement; + } + } + + public ClusterResources next() { + ClusterResources next = resourcesWith(currentNodes); + currentNodes += nodeIncrement; + return next; + } + + public boolean hasNext() { + return currentNodes <= maxNodes(); + } + + private int minNodes() { + if (limits.isEmpty()) return minimumNodes; + if (singleGroupMode) return limits.min().nodes(); + return Math.max(limits.min().nodes(), limits.min().groups() * target.sourceGroupSize() ); + } + + private int maxNodes() { + if (limits.isEmpty()) return maximumNodes; + if (singleGroupMode) return limits.max().nodes(); + return Math.min(limits.max().nodes(), limits.max().groups() * target.sourceGroupSize() ); + } + + private ClusterResources resourcesWith(int nodes) { + int nodesWithRedundancy = nodes - (singleGroupMode ? 1 : target.sourceGroupSize()); + return new ClusterResources(nodes, + singleGroupMode ? 1 : nodes / target.sourceGroupSize(), + nodeResourcesWith(nodesWithRedundancy)); + } + + /** + * For the observed load this instance is initialized with, returns the resources needed per node to be at + * ideal load given a target node count + */ + private NodeResources nodeResourcesWith(int nodeCount) { + // Cpu: Scales with cluster size (TODO: Only reads, writes scales with group size) + // Memory and disk: Scales with group size + + double cpu, memory, disk; + if (singleGroupMode) { + // The fixed cost portion of cpu does not scale with changes to the node count + // TODO: Only for the portion of cpu consumed by queries + cpu = fixedCpuCostFraction * target.clusterCpu() / target.sourceGroupSize() + + (1 - fixedCpuCostFraction) * target.clusterCpu() / nodeCount; + + if (current.clusterType().isContent()) { // load scales with node share of content + memory = target.groupMemory() / nodeCount; + disk = target.groupDisk() / nodeCount; + } + else { + memory = target.nodeMemory(); + disk = target.nodeDisk(); + } + } + else { + cpu = target.clusterCpu() / nodeCount; + if (current.clusterType().isContent()) { // load scales with node share of content + memory = target.groupMemory() / target.sourceGroupSize(); + disk = target.groupDisk() / target.sourceGroupSize(); + } + else { + memory = target.nodeMemory(); + disk = target.nodeDisk(); + } + } + + // Combine the scaled resource values computed here + // with the currently configured non-scaled values, given in the limits, if any + NodeResources nonScaled = limits.isEmpty() ? current.toAdvertisedClusterResources().nodeResources() + : limits.min().nodeResources(); // min=max for non-scaled + return nonScaled.withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk); + } + + } +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 6238299e1c8..bfeb77b026f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -1,18 +1,11 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; -import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.Flavor; -import com.yahoo.config.provision.NodeResources; -import com.yahoo.config.provision.Zone; -import com.yahoo.config.provision.host.FlavorOverrides; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Cluster; -import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; -import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceLimits; import java.time.Duration; import java.util.List; @@ -44,10 +37,12 @@ public class Autoscaler { private final NodeMetricsDb metricsDb; private final NodeRepository nodeRepository; + private final AllocationOptimizer allocationOptimizer; public Autoscaler(NodeMetricsDb metricsDb, NodeRepository nodeRepository) { this.metricsDb = metricsDb; this.nodeRepository = nodeRepository; + this.allocationOptimizer = new AllocationOptimizer(nodeRepository); } /** @@ -58,24 +53,24 @@ public class Autoscaler { * @return a new suggested allocation for this cluster, or empty if it should not be rescaled at this time */ public Optional<ClusterResources> suggest(Cluster cluster, List<Node> clusterNodes) { - return autoscale(cluster, clusterNodes, false) + return autoscale(clusterNodes, Limits.empty()) .map(AllocatableClusterResources::toAdvertisedClusterResources); } /** - * Autoscale a cluster. This returns a better allocation (if found) inside the min and max limits. + * Autoscale a cluster by load. This returns a better allocation (if found) inside the min and max limits. * * @param clusterNodes the list of all the active nodes in a cluster * @return a new suggested allocation for this cluster, or empty if it should not be rescaled at this time */ public Optional<ClusterResources> autoscale(Cluster cluster, List<Node> clusterNodes) { if (cluster.minResources().equals(cluster.maxResources())) return Optional.empty(); // Shortcut - return autoscale(cluster, clusterNodes, true) + return autoscale(clusterNodes, Limits.of(cluster)) .map(AllocatableClusterResources::toAdvertisedClusterResources); } - private Optional<AllocatableClusterResources> autoscale(Cluster cluster, List<Node> clusterNodes, boolean respectLimits) { + private Optional<AllocatableClusterResources> autoscale(List<Node> clusterNodes, Limits limits) { if (unstable(clusterNodes)) return Optional.empty(); ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); @@ -85,35 +80,15 @@ public class Autoscaler { Optional<Double> memoryLoad = averageLoad(Resource.memory, clusterNodes, clusterType); Optional<Double> diskLoad = averageLoad(Resource.disk, clusterNodes, clusterType); if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) return Optional.empty(); + var target = new LoadBasedResourceTarget(cpuLoad.get(), memoryLoad.get(), diskLoad.get(), currentAllocation); - Optional<AllocatableClusterResources> bestAllocation = findBestAllocation(cpuLoad.get(), - memoryLoad.get(), - diskLoad.get(), - currentAllocation, - cluster, - respectLimits); + Optional<AllocatableClusterResources> bestAllocation = + allocationOptimizer.findBestAllocation(target, currentAllocation, limits); if (bestAllocation.isEmpty()) return Optional.empty(); if (similar(bestAllocation.get(), currentAllocation)) return Optional.empty(); return bestAllocation; } - private Optional<AllocatableClusterResources> findBestAllocation(double cpuLoad, double memoryLoad, double diskLoad, - AllocatableClusterResources currentAllocation, - Cluster cluster, boolean respectLimits) { - Optional<AllocatableClusterResources> bestAllocation = Optional.empty(); - for (ResourceIterator i = new ResourceIterator(cpuLoad, memoryLoad, diskLoad, currentAllocation, cluster, respectLimits); - i.hasNext(); ) { - var allocatableResources = AllocatableClusterResources.from(i.next(), - currentAllocation.clusterType(), - respectLimits ? Optional.of(cluster) : Optional.empty(), - nodeRepository); - if (allocatableResources.isEmpty()) continue; - if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) - bestAllocation = allocatableResources; - } - return bestAllocation; - } - /** Returns true if both total real resources and total cost are similar */ private boolean similar(AllocatableClusterResources a, AllocatableClusterResources b) { return similar(a.cost(), b.cost(), costDifferenceWorthReallocation) && diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Limits.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Limits.java new file mode 100644 index 00000000000..68c4df7f5f6 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Limits.java @@ -0,0 +1,68 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.ClusterResources; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.applications.Cluster; + +/** + * Optional allocation limits + * + * @author bratseth + */ +public class Limits { + + private static final Limits empty = new Limits(null, null); + + private final ClusterResources min, max; + + private Limits(ClusterResources min, ClusterResources max) { + this.min = min; + this.max = max; + } + + public static Limits empty() { return empty; } + + public boolean isEmpty() { return this == empty; } + + public ClusterResources min() { + if (isEmpty()) throw new IllegalStateException("Empty: No min"); + return min; + } + + public ClusterResources max() { + if (isEmpty()) throw new IllegalStateException("Empty: No max"); + return max; + } + + /** Caps the given resources at the limits of this. If it is empty the node resources are returned as-is */ + public NodeResources cap(NodeResources resources) { + if (isEmpty()) return resources; + resources = resources.withVcpu(between(min.nodeResources().vcpu(), max.nodeResources().vcpu(), resources.vcpu())); + resources = resources.withMemoryGb(between(min.nodeResources().memoryGb(), max.nodeResources().memoryGb(), resources.memoryGb())); + resources = resources.withDiskGb(between(min.nodeResources().diskGb(), max.nodeResources().diskGb(), resources.diskGb())); + return resources; + } + + private double between(double min, double max, double value) { + value = Math.max(min, value); + value = Math.min(max, value); + return value; + } + + public static Limits of(Cluster cluster) { + return new Limits(cluster.minResources(), cluster.maxResources()); + } + + public static Limits of(Capacity capacity) { + return new Limits(capacity.minResources(), capacity.maxResources()); + } + + @Override + public String toString() { + if (isEmpty()) return "no limits"; + return "limits: from " + min + " to " + max; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/LoadBasedResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/LoadBasedResourceTarget.java new file mode 100644 index 00000000000..d1c726d01df --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/LoadBasedResourceTarget.java @@ -0,0 +1,57 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +class LoadBasedResourceTarget extends ResourceTarget { + + private final double cpuLoad; + private final double memoryLoad; + private final double diskLoad; + private final AllocatableClusterResources allocation; + + public LoadBasedResourceTarget(double cpuLoad, double memoryLoad, double diskLoad, + AllocatableClusterResources sourceAllocation) { + super(sourceAllocation.nodes(), sourceAllocation.groups()); + this.cpuLoad = cpuLoad; + this.memoryLoad = memoryLoad; + this.diskLoad = diskLoad; + this.allocation = sourceAllocation; + } + + @Override + public double clusterCpu() { + return clusterUsage(Resource.cpu, cpuLoad) / Resource.cpu.idealAverageLoad(); + } + + @Override + public double groupMemory() { + return groupUsage(Resource.memory, memoryLoad) / Resource.memory.idealAverageLoad(); + } + + @Override + public double groupDisk() { + return groupUsage(Resource.disk, diskLoad) / Resource.disk.idealAverageLoad(); + } + + @Override + public double nodeMemory() { + return nodeUsage(Resource.memory, memoryLoad) / Resource.memory.idealAverageLoad(); + } + + @Override + public double nodeDisk() { + return nodeUsage(Resource.disk, diskLoad) / Resource.disk.idealAverageLoad(); + } + + private double clusterUsage(Resource resource, double load) { + return nodeUsage(resource, load) * allocation.nodes(); + } + + private double groupUsage(Resource resource, double load) { + return nodeUsage(resource, load) * sourceGroupSize(); + } + + private double nodeUsage(Resource resource, double load) { + return load * resource.valueFrom(allocation.realResources()); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java deleted file mode 100644 index 207eecc1871..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.autoscale; - -import com.yahoo.config.provision.ClusterResources; -import com.yahoo.config.provision.NodeResources; -import com.yahoo.vespa.hosted.provision.applications.Cluster; - -/** - * Provides iteration over possible cluster resource allocations given a target total load - * and current groups/nodes allocation. - */ -public class ResourceIterator { - - // The min and max nodes to consider when not using application supplied limits - private static final int minimumNodes = 3; // Since this number includes redundancy it cannot be lower than 2 - private static final int maximumNodes = 150; - - // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component - // proportional to document count. We must account for this when comparing configurations with more or fewer nodes. - // TODO: Measure this, and only take it into account with queries - private static final double fixedCpuCostFraction = 0.1; - - // Prescribed state - private final Cluster cluster; - private final boolean respectLimits; - - // Observed state - private final AllocatableClusterResources allocation; - private final double cpuLoad; - private final double memoryLoad; - private final double diskLoad; - private final int groupSize; - - // Derived from the observed state - private final int nodeIncrement; - private final boolean singleGroupMode; - - // Iterator state - private int currentNodes; - - public ResourceIterator(double cpuLoad, double memoryLoad, double diskLoad, - AllocatableClusterResources currentAllocation, - Cluster cluster, - boolean respectLimits) { - this.cpuLoad = cpuLoad; - this.memoryLoad = memoryLoad; - this.diskLoad = diskLoad; - this.respectLimits = respectLimits; - - // ceil: If the division does not produce a whole number we assume some node is missing - groupSize = (int)Math.ceil((double)currentAllocation.nodes() / currentAllocation.groups()); - allocation = currentAllocation; - - this.cluster = cluster; - - // What number of nodes is it effective to add or remove at the time from this cluster? - // This is the group size, since we (for now) assume the group size is decided by someone wiser than us - // and we decide the number of groups. - // The exception is when we only have one group, where we can add and remove single nodes in it. - singleGroupMode = currentAllocation.groups() == 1; - nodeIncrement = singleGroupMode ? 1 : groupSize; - - // Step down to the right starting point - currentNodes = currentAllocation.nodes(); - while (currentNodes - nodeIncrement >= minNodes() - && ( singleGroupMode || currentNodes - nodeIncrement > groupSize)) // group level redundancy - currentNodes -= nodeIncrement; - } - - public ClusterResources next() { - ClusterResources next = resourcesWith(currentNodes); - currentNodes += nodeIncrement; - return next; - } - - public boolean hasNext() { - return currentNodes <= maxNodes(); - } - - private int minNodes() { - if ( ! respectLimits) return minimumNodes; - if (singleGroupMode) return cluster.minResources().nodes(); - return Math.max(cluster.minResources().nodes(), cluster.minResources().groups() * groupSize ); - } - - private int maxNodes() { - if ( ! respectLimits) return maximumNodes; - if (singleGroupMode) return cluster.maxResources().nodes(); - return Math.min(cluster.maxResources().nodes(), cluster.maxResources().groups() * groupSize ); - } - - private ClusterResources resourcesWith(int nodes) { - int nodesWithRedundancy = nodes - (singleGroupMode ? 1 : groupSize); - return new ClusterResources(nodes, - singleGroupMode ? 1 : nodes / groupSize, - nodeResourcesWith(nodesWithRedundancy)); - } - - /** - * For the observed load this instance is initialized with, returns the resources needed per node to be at - * ideal load given a target node count - */ - private NodeResources nodeResourcesWith(int nodeCount) { - // Cpu: Scales with cluster size (TODO: Only reads, writes scales with group size) - // Memory and disk: Scales with group size - - double cpu, memory, disk; - if (singleGroupMode) { - // The fixed cost portion of cpu does not scale with changes to the node count - // TODO: Only for the portion of cpu consumed by queries - double totalCpu = clusterUsage(Resource.cpu, cpuLoad); - cpu = fixedCpuCostFraction * totalCpu / groupSize / Resource.cpu.idealAverageLoad() + - (1 - fixedCpuCostFraction) * totalCpu / nodeCount / Resource.cpu.idealAverageLoad(); - if (allocation.clusterType().isContent()) { // load scales with node share of content - memory = groupUsage(Resource.memory, memoryLoad) / nodeCount / Resource.memory.idealAverageLoad(); - disk = groupUsage(Resource.disk, diskLoad) / nodeCount / Resource.disk.idealAverageLoad(); - } - else { - memory = nodeUsage(Resource.memory, memoryLoad) / Resource.memory.idealAverageLoad(); - disk = nodeUsage(Resource.disk, diskLoad) / Resource.disk.idealAverageLoad(); - } - } - else { - cpu = clusterUsage(Resource.cpu, cpuLoad) / nodeCount / Resource.cpu.idealAverageLoad(); - if (allocation.clusterType().isContent()) { // load scales with node share of content - memory = groupUsage(Resource.memory, memoryLoad) / groupSize / Resource.memory.idealAverageLoad(); - disk = groupUsage(Resource.disk, diskLoad) / groupSize / Resource.disk.idealAverageLoad(); - } - else { - memory = nodeUsage(Resource.memory, memoryLoad) / Resource.memory.idealAverageLoad(); - disk = nodeUsage(Resource.disk, diskLoad) / Resource.disk.idealAverageLoad(); - } - } - - // Combine the scaled resource values computed here - // and the currently combined values of non-scaled resources - return new NodeResources(cpu, memory, disk, - cluster.minResources().nodeResources().bandwidthGbps(), - cluster.minResources().nodeResources().diskSpeed(), - cluster.minResources().nodeResources().storageType()); - } - - private double clusterUsage(Resource resource, double load) { - return nodeUsage(resource, load) * allocation.nodes(); - } - - private double groupUsage(Resource resource, double load) { - return nodeUsage(resource, load) * groupSize; - } - - private double nodeUsage(Resource resource, double load) { - return load * resource.valueFrom(allocation.realResources()); - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java new file mode 100644 index 00000000000..8af3ab17ca4 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java @@ -0,0 +1,41 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +public abstract class ResourceTarget { + + private final int sourceNodes; + private final int sourceGroups; + + public ResourceTarget(int sourceNodes, int sourceGroups) { + this.sourceNodes = sourceNodes; + this.sourceGroups = sourceGroups; + } + + /** Returns the number of nodes of the *source* allocation causing this target */ + public int sourceNodes() { return sourceNodes; } + + /** Returns the number of groups of the *source* allocation causing this target */ + public int sourceGroups() { return sourceGroups; } + + /** Returns the group size of the source allocation producing this target */ + public int sourceGroupSize() { + // ceil: If the division does not produce a whole number we assume some node is missing + return (int)Math.ceil((double)sourceNodes / sourceGroups); + } + + /** Returns the target total cpu to allocate to the entire cluster */ + public abstract double clusterCpu(); + + /** Returns the target total memory to allocate to each group */ + public abstract double groupMemory(); + + /** Returns the target total disk to allocate to each group */ + public abstract double groupDisk(); + + /** Returns the target memory to allocate to each node */ + public abstract double nodeMemory(); + + /** Returns the target disk to allocate to each node */ + public abstract double nodeDisk(); + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 4000354243f..8838c45a8a7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -22,6 +22,10 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; +import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; +import com.yahoo.vespa.hosted.provision.autoscale.AllocationBasedResourceTarget; +import com.yahoo.vespa.hosted.provision.autoscale.AllocationOptimizer; +import com.yahoo.vespa.hosted.provision.autoscale.Limits; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.filter.ApplicationFilter; import com.yahoo.vespa.hosted.provision.node.filter.NodeHostFilter; @@ -31,7 +35,6 @@ import java.util.Collection; import java.util.Comparator; import java.util.List; import java.util.Optional; -import java.util.logging.Level; import java.util.logging.Logger; /** @@ -47,6 +50,7 @@ public class NodeRepositoryProvisioner implements Provisioner { private static final int SPARE_CAPACITY_NONPROD = 0; private final NodeRepository nodeRepository; + private final AllocationOptimizer allocationOptimizer; private final CapacityPolicies capacityPolicies; private final Zone zone; private final Preparer preparer; @@ -61,6 +65,7 @@ public class NodeRepositoryProvisioner implements Provisioner { public NodeRepositoryProvisioner(NodeRepository nodeRepository, Zone zone, ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource) { this.nodeRepository = nodeRepository; + this.allocationOptimizer = new AllocationOptimizer(nodeRepository); this.capacityPolicies = new CapacityPolicies(zone); this.zone = zone; this.loadBalancerProvisioner = provisionServiceProvider.getLoadBalancerService().map(lbService -> new LoadBalancerProvisioner(nodeRepository, lbService)); @@ -94,7 +99,7 @@ public class NodeRepositoryProvisioner implements Provisioner { NodeResources resources; NodeSpec nodeSpec; if ( requested.type() == NodeType.tenant) { - ClusterResources target = decideTargetResources(application, cluster.id(), requested); + ClusterResources target = decideTargetResources(application, cluster, requested); int nodeCount = capacityPolicies.decideSize(target.nodes(), requested, cluster, application); resources = capacityPolicies.decideNodeResources(target.nodeResources(), requested, cluster); boolean exclusive = capacityPolicies.decideExclusivity(cluster.isExclusive()); @@ -131,66 +136,50 @@ public class NodeRepositoryProvisioner implements Provisioner { * Returns the target cluster resources, a value between the min and max in the requested capacity, * and updates the application store with the received min and max. */ - private ClusterResources decideTargetResources(ApplicationId applicationId, ClusterSpec.Id clusterId, Capacity requested) { + private ClusterResources decideTargetResources(ApplicationId applicationId, ClusterSpec clusterSpec, Capacity requested) { try (Mutex lock = nodeRepository.lock(applicationId)) { Application application = nodeRepository.applications().get(applicationId).orElse(new Application(applicationId)); - application = application.withClusterLimits(clusterId, requested.minResources(), requested.maxResources()); + application = application.withClusterLimits(clusterSpec.id(), requested.minResources(), requested.maxResources()); nodeRepository.applications().put(application, lock); - return application.clusters().get(clusterId).targetResources() - .orElseGet(() -> currentResources(applicationId, clusterId, requested)); + return application.clusters().get(clusterSpec.id()).targetResources() + .orElseGet(() -> currentResources(applicationId, clusterSpec, requested)); } } /** Returns the current resources of this cluster, or the closes */ private ClusterResources currentResources(ApplicationId applicationId, - ClusterSpec.Id clusterId, + ClusterSpec clusterSpec, Capacity requested) { List<Node> nodes = NodeList.copyOf(nodeRepository.getNodes(applicationId, Node.State.active)) - .cluster(clusterId) + .cluster(clusterSpec.id()) .not().retired() .not().removable() .asList(); if (nodes.isEmpty()) return requested.minResources(); // New deployment: Start at min - long groups = nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(); - var currentResources = new ClusterResources(nodes.size(), (int)groups, nodes.get(0).flavor().resources()); - return ensureWithin(requested.minResources(), requested.maxResources(), currentResources); + AllocatableClusterResources currentResources = new AllocatableClusterResources(nodes, nodeRepository.resourcesCalculator()); + return ensureWithin(Limits.of(requested), currentResources); } /** Make the minimal adjustments needed to the current resources to stay within the limits */ - private ClusterResources ensureWithin(ClusterResources min, ClusterResources max, ClusterResources current) { - int nodes = between(min.nodes(), max.nodes(), current.nodes()); - int groups = between(min.groups(), max.groups(), current.groups()); - if (nodes % groups != 0) { - // That didn't work - try to preserve current group size instead. - // Rounding here is needed because a node may be missing due to node failing. - int currentGroupsSize = Math.round((float)current.nodes() / current.groups()); - nodes = currentGroupsSize * groups; - if (nodes != between(min.nodes(), max.nodes(), nodes)) { - // Give up: Use max - nodes = max.nodes(); - groups = max.groups(); - } - } - if (min.nodeResources() != NodeResources.unspecified && max.nodeResources() != NodeResources.unspecified) { - double vcpu = between(min.nodeResources().vcpu(), max.nodeResources().vcpu(), current.nodeResources().vcpu()); - double memoryGb = between(min.nodeResources().memoryGb(), max.nodeResources().memoryGb(), current.nodeResources().memoryGb()); - double diskGb = between(min.nodeResources().diskGb(), max.nodeResources().diskGb(), current.nodeResources().diskGb()); - // Combine computed scaled resources with requested non-scaled resources (for which min=max) - NodeResources nodeResources = min.nodeResources().withVcpu(vcpu).withMemoryGb(memoryGb).withDiskGb(diskGb); - return new ClusterResources(nodes, groups, nodeResources); - } - else { - return new ClusterResources(nodes, groups, current.nodeResources()); - } - } + private ClusterResources ensureWithin(Limits limits, AllocatableClusterResources current) { + if (limits.isEmpty()) return current.toAdvertisedClusterResources(); + if (limits.min().equals(limits.max())) return limits.min(); + + if (current.toAdvertisedClusterResources().isWithin(limits.min(), limits.max())) + return combine(current.toAdvertisedClusterResources(), limits.min()); // for unscaled values min==max - private int between(int min, int max, int n) { - return Math.min(max, Math.max(min, n)); + return allocationOptimizer.findBestAllocation(new AllocationBasedResourceTarget(current), current, limits) + .orElseThrow(() -> new IllegalArgumentException("No allocation possible within " + limits)) + .toAdvertisedClusterResources(); } - private double between(double min, double max, double n) { - return Math.min(max, Math.max(min, n)); + /** Combine autoscaled values with unscaled values, such that the latter can be changed by a deployment. */ + private ClusterResources combine(ClusterResources scaledValues, ClusterResources unscaledValues) { + if (unscaledValues.nodeResources() == NodeResources.unspecified) return scaledValues; + return scaledValues.with(unscaledValues.nodeResources().withVcpu(scaledValues.nodeResources().vcpu()) + .withMemoryGb(scaledValues.nodeResources().memoryGb()) + .withDiskGb(scaledValues.nodeResources().diskGb())); } private void logIfDownscaled(int targetNodes, int actualNodes, ClusterSpec cluster, ProvisionLogger logger) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 830bf8b686d..c0896826502 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -47,21 +47,20 @@ class AutoscalingTester { /** Creates an autoscaling tester with a single host type ready */ public AutoscalingTester(NodeResources hostResources) { - this(new Zone(Environment.prod, RegionName.from("us-east")), null, null, asConfig(hostResources)); - provisioningTester.makeReadyNodes(20, "hostFlavor", NodeType.host, 8); // "hostFlavor" generated by asConfig + this(new Zone(Environment.prod, RegionName.from("us-east")), List.of(new Flavor("hostFlavor", hostResources)), null); + provisioningTester.makeReadyNodes(20, "hostFlavor", NodeType.host, 8); provisioningTester.deployZoneApp(); } public AutoscalingTester(Zone zone, List<Flavor> flavors) { this(zone, flavors, - new InMemoryFlagSource().withBooleanFlag(Flags.ENABLE_DYNAMIC_PROVISIONING.id(), true), - asConfig(flavors)); + new InMemoryFlagSource().withBooleanFlag(Flags.ENABLE_DYNAMIC_PROVISIONING.id(), true)); } - private AutoscalingTester(Zone zone, List<Flavor> flavors, FlagSource flagSource, FlavorsConfig flavorsConfig) { + private AutoscalingTester(Zone zone, List<Flavor> flavors, FlagSource flagSource) { provisioningTester = new ProvisioningTester.Builder().zone(zone) - .flavorsConfig(flavorsConfig) + .flavors(flavors) .resourcesCalculator(new MockHostResourcesCalculator(zone)) .hostProvisioner(new MockHostProvisioner(flavors)) .flagSource(flagSource) @@ -197,31 +196,6 @@ class AutoscalingTester { public NodeMetricsDb nodeMetricsDb() { return db; } - private static FlavorsConfig asConfig(NodeResources hostResources) { - FlavorsConfig.Builder b = new FlavorsConfig.Builder(); - b.flavor(asFlavorConfig("hostFlavor", hostResources)); - return b.build(); - } - - private static FlavorsConfig asConfig(List<Flavor> flavors) { - FlavorsConfig.Builder b = new FlavorsConfig.Builder(); - for (Flavor flavor : flavors) - b.flavor(asFlavorConfig(flavor.name(), flavor.resources())); - return b.build(); - } - - private static FlavorsConfig.Flavor.Builder asFlavorConfig(String flavorName, NodeResources resources) { - FlavorsConfig.Flavor.Builder flavor = new FlavorsConfig.Flavor.Builder(); - flavor.name(flavorName); - flavor.minCpuCores(resources.vcpu()); - flavor.minMainMemoryAvailableGb(resources.memoryGb()); - flavor.minDiskAvailableGb(resources.diskGb()); - flavor.bandwidth(resources.bandwidthGbps() * 1000); - flavor.fastDisk(resources.diskSpeed().compatibleWith(NodeResources.DiskSpeed.fast)); - flavor.remoteStorage(resources.storageType().compatibleWith(NodeResources.StorageType.remote)); - return flavor; - } - private static class MockHostResourcesCalculator implements HostResourcesCalculator { private final Zone zone; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index a9473866b4e..59da88790d7 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -71,9 +71,9 @@ public class ScalingSuggestionsMaintainerTest { Duration.ofMinutes(1)); maintainer.maintain(); - assertEquals("7 nodes with [vcpu: 15.3, memory: 5.1 Gb, disk 15.0 Gb, bandwidth: 0.1 Gbps]", + assertEquals("7 nodes with [vcpu: 15.3, memory: 5.1 Gb, disk 15.0 Gb, bandwidth: 0.1 Gbps, storage type: remote]", tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().suggestedResources().get().toString()); - assertEquals("7 nodes with [vcpu: 16.8, memory: 5.7 Gb, disk 16.5 Gb, bandwidth: 0.1 Gbps]", + assertEquals("7 nodes with [vcpu: 16.8, memory: 5.7 Gb, disk 16.5 Gb, bandwidth: 0.1 Gbps, storage type: remote]", tester.nodeRepository().applications().get(app2).get().cluster(cluster2.id()).get().suggestedResources().get().toString()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index 463a3ef3fb9..573a05baab5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -9,6 +9,7 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.HostFilter; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; @@ -408,8 +409,11 @@ public class ProvisioningTest { @Test public void test_changing_limits() { - ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); - tester.makeReadyHosts(30, new NodeResources(20, 40, 100, 4)).deployZoneApp(); + Flavor hostFlavor = new Flavor(new NodeResources(20, 40, 100, 4)); + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))) + .flavors(List.of(hostFlavor)) + .build(); + tester.makeReadyHosts(30, hostFlavor.resources()).deployZoneApp(); ApplicationId app1 = tester.makeApplicationId("app1"); ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.content, new ClusterSpec.Id("cluster1")).vespaVersion("7").build(); @@ -455,6 +459,13 @@ public class ProvisioningTest { tester.assertNodes("Groups changed", 6, 3, 10, 30, 10, app1, cluster1); + + // Stop specifying node resources + tester.activate(app1, cluster1, Capacity.from(new ClusterResources(6, 3, NodeResources.unspecified), + new ClusterResources(9, 3, NodeResources.unspecified))); + tester.assertNodes("Groups changed", + 6, 3, 10, 30, 10, + app1, cluster1); } @Test(expected = IllegalArgumentException.class) diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index ef988cad39d..128edce5465 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -509,6 +509,11 @@ public class ProvisioningTester { return this; } + public Builder flavors(List<Flavor> flavors) { + this.flavorsConfig = asConfig(flavors); + return this; + } + public Builder resourcesCalculator(HostResourcesCalculator resourcesCalculator) { this.resourcesCalculator = resourcesCalculator; return this; @@ -566,6 +571,26 @@ public class ProvisioningTester { Optional.ofNullable(loadBalancerService).orElseGet(LoadBalancerServiceMock::new), Optional.ofNullable(flagSource).orElseGet(InMemoryFlagSource::new)); } + + private static FlavorsConfig asConfig(List<Flavor> flavors) { + FlavorsConfig.Builder b = new FlavorsConfig.Builder(); + for (Flavor flavor : flavors) + b.flavor(asFlavorConfig(flavor.name(), flavor.resources())); + return b.build(); + } + + private static FlavorsConfig.Flavor.Builder asFlavorConfig(String flavorName, NodeResources resources) { + FlavorsConfig.Flavor.Builder flavor = new FlavorsConfig.Flavor.Builder(); + flavor.name(flavorName); + flavor.minCpuCores(resources.vcpu()); + flavor.minMainMemoryAvailableGb(resources.memoryGb()); + flavor.minDiskAvailableGb(resources.diskGb()); + flavor.bandwidth(resources.bandwidthGbps() * 1000); + flavor.fastDisk(resources.diskSpeed().compatibleWith(NodeResources.DiskSpeed.fast)); + flavor.remoteStorage(resources.storageType().compatibleWith(NodeResources.StorageType.remote)); + return flavor; + } + } private static class NullProvisionLogger implements ProvisionLogger { |