From 0ba8b0001cee9ae1aad8fbdfac863a79da212d1c Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Mon, 13 Feb 2023 12:23:55 +0100 Subject: Cold autoscaling WIP Core support for autoscaling using data from cluster deployments in other regions when there is little or no traffic in our own deployment. --- .../provision/applications/BcpGroupInfo.java | 66 +++++++++ .../hosted/provision/applications/Cluster.java | 22 ++- .../hosted/provision/autoscale/ClusterModel.java | 157 +++++++++++++-------- .../vespa/hosted/provision/autoscale/Load.java | 4 + .../persistence/ApplicationSerializer.java | 22 +++ 5 files changed, 206 insertions(+), 65 deletions(-) create mode 100644 node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java (limited to 'node-repository/src/main/java/com') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java new file mode 100644 index 00000000000..6b0ea8532be --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java @@ -0,0 +1,66 @@ +package com.yahoo.vespa.hosted.provision.applications; + +import java.util.Objects; + +/** + * When there are multiple deployments of an application in different regions, + * instances of the cluster across regions may form a "BCP group". + * By default the clusters in all production regions form such a group, but other arrangements + * may be specified in deployment.xml, see com.yahoo.config.application.api.Bcp. + * + * This contains metrics averaged over the other clusters in the group this belongs to, + * which is used to amend scaling decisions in this cluster when it has little traffic on its own. + * + * @author bratseth + */ +public class BcpGroupInfo { + + private static final BcpGroupInfo empty = new BcpGroupInfo(0, 0, 0); + + private final double queryRate; + private final double growthRateHeadroom; + private final double cpuCostPerQuery; + + public BcpGroupInfo(double queryRate, double growthRateHeadroom, double cpuCostPerQuery) { + this.queryRate = queryRate; + this.growthRateHeadroom = growthRateHeadroom; + this.cpuCostPerQuery = cpuCostPerQuery; + } + + /** Returns the average query rate (queries/second) of the other clusters in the group this belongs to. */ + public double queryRate() { return queryRate; } + + /** Returns the average growth rate headroom of the other clusters in the group this belongs to. */ + public double growthRateHeadroom() { return growthRateHeadroom; } + + /** Returns the average total cluster CPU cost per query of the other clusters in the group this belongs to. */ + public double cpuCostPerQuery() { return cpuCostPerQuery; } + + public boolean isEmpty() { + return queryRate == 0 && growthRateHeadroom == 0 && cpuCostPerQuery == 0; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if ( ! (o instanceof BcpGroupInfo other)) return false; + if ( other.queryRate != this.queryRate) return false; + if ( other.growthRateHeadroom != this.growthRateHeadroom) return false; + if ( other.cpuCostPerQuery != this.cpuCostPerQuery) return false; + return true; + } + + @Override + public int hashCode() { + return Objects.hash(queryRate, growthRateHeadroom, cpuCostPerQuery); + } + + @Override + public String toString() { + return "BCP group info: " + queryRate + " q/s, " + growthRateHeadroom + " q/s headroom, " + + cpuCostPerQuery + " CPU cost per q/s"; + } + + public static BcpGroupInfo empty() { return empty; } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index 8da6bd6937b..ea4944c2bd5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -33,6 +33,7 @@ public class Cluster { private final boolean required; private final Autoscaling suggested; private final Autoscaling target; + private final BcpGroupInfo bcpGroupInfo; /** The maxScalingEvents last scaling events of this, sorted by increasing time (newest last) */ private final List scalingEvents; @@ -45,6 +46,7 @@ public class Cluster { boolean required, Autoscaling suggested, Autoscaling target, + BcpGroupInfo bcpGroupInfo, List scalingEvents) { this.id = Objects.requireNonNull(id); this.exclusive = exclusive; @@ -58,6 +60,7 @@ public class Cluster { this.target = Autoscaling.empty(); else this.target = target; + this.bcpGroupInfo = Objects.requireNonNull(bcpGroupInfo); this.scalingEvents = List.copyOf(scalingEvents); } @@ -77,7 +80,7 @@ public class Cluster { /** * Returns whether the resources of this cluster are required to be within the specified min and max. - * Otherwise they may be adjusted by capacity policies. + * Otherwise, they may be adjusted by capacity policies. */ public boolean required() { return required; } @@ -102,6 +105,9 @@ public class Cluster { return true; } + /** Returns info about the BCP group of clusters this belongs to. */ + public BcpGroupInfo bcpGroupInfo() { return bcpGroupInfo; } + /** Returns the recent scaling events in this cluster */ public List scalingEvents() { return scalingEvents; } @@ -113,15 +119,19 @@ public class Cluster { public Cluster withConfiguration(boolean exclusive, Capacity capacity) { return new Cluster(id, exclusive, capacity.minResources(), capacity.maxResources(), capacity.groupSize(), capacity.isRequired(), - suggested, target, scalingEvents); + suggested, target, bcpGroupInfo, scalingEvents); } public Cluster withSuggested(Autoscaling suggested) { - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents); } public Cluster withTarget(Autoscaling target) { - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents); + } + + public Cluster with(BcpGroupInfo bcpGroupInfo) { + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents); } /** Add or update (based on "at" time) a scaling event */ @@ -135,7 +145,7 @@ public class Cluster { scalingEvents.add(scalingEvent); prune(scalingEvents); - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents); } @Override @@ -167,7 +177,7 @@ public class Cluster { public static Cluster create(ClusterSpec.Id id, boolean exclusive, Capacity requested) { return new Cluster(id, exclusive, requested.minResources(), requested.maxResources(), requested.groupSize(), requested.isRequired(), - Autoscaling.empty(), Autoscaling.empty(), List.of()); + Autoscaling.empty(), Autoscaling.empty(), BcpGroupInfo.empty(), List.of()); } /** The predicted time it will take to rescale this cluster. */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index da74ad0b63b..4edcdbd3fa5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -30,6 +30,9 @@ public class ClusterModel { /** Containers typically use more cpu right after generation change, so discard those metrics */ public static final Duration warmupDuration = Duration.ofMinutes(7); + /** If we have less than this query rate, we cannot be fully confident in our load data, which influences some decisions. */ + public static final double queryRateGivingFullConfidence = 100.0; + static final double idealQueryCpuLoad = 0.8; static final double idealWriteCpuLoad = 0.95; @@ -48,8 +51,13 @@ public class ClusterModel { private final Application application; private final ClusterSpec clusterSpec; private final Cluster cluster; - /** The current nodes of this cluster, or empty if this models a new cluster not yet deployed */ + + /** + * The current active nodes of this cluster, including retired, + * or empty if this models a new cluster not yet deployed. + */ private final NodeList nodes; + private final Clock clock; private final Duration scalingDuration; private final ClusterTimeseries clusterTimeseries; @@ -118,6 +126,14 @@ public class ClusterModel { return adjustment; } + public OptionalDouble cpuCostPerQuery() { + if (averageQueryRate().isEmpty()) return OptionalDouble.empty(); + // TODO: Query rate should generally be sampled at the time where we see the peak resource usage + int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize(); + return OptionalDouble.of(peakLoad().cpu() * queryCpuFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu() + / averageQueryRate().getAsDouble() / groupCount()); + } + public boolean isStable(NodeRepository nodeRepository) { // An autoscaling decision was recently made if (hasScaledIn(Duration.ofMinutes(5))) @@ -143,59 +159,12 @@ public class ClusterModel { return true; } - private boolean hasScaledIn(Duration period) { - return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN) - .isAfter(clock.instant().minus(period)); - } - /** Returns the predicted duration of a rescaling of this cluster */ public Duration scalingDuration() { return scalingDuration; } - public ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; } - - public ClusterTimeseries clusterTimeseries() { return clusterTimeseries; } - - /** - * Returns the predicted max query growth rate per minute as a fraction of the average traffic - * in the scaling window. - */ - public double maxQueryGrowthRate() { - if (maxQueryGrowthRate != null) return maxQueryGrowthRate; - return maxQueryGrowthRate = clusterTimeseries().maxQueryGrowthRate(scalingDuration(), clock); - } - - /** Returns the average query rate in the scaling window as a fraction of the max observed query rate. */ - public double queryFractionOfMax() { - if (queryFractionOfMax != null) return queryFractionOfMax; - return queryFractionOfMax = clusterTimeseries().queryFractionOfMax(scalingDuration(), clock); - } - - /** Returns the average query rate in the scaling window. */ - public OptionalDouble averageQueryRate() { - if (averageQueryRate != null) return averageQueryRate; - return averageQueryRate = clusterTimeseries().queryRate(scalingDuration(), clock); - } - /** Returns the average of the peak load measurement in each dimension, from each node. */ - public Load peakLoad() { return nodeTimeseries().peakLoad(); } - - /** The number of nodes this cluster has, or will have if not deployed yet. */ - // TODO: Make this the deployed, not current count - public int nodeCount() { - if ( ! nodes.isEmpty()) return (int)nodes.stream().count(); - return cluster.minResources().nodes(); - } - - /** The number of groups this cluster has, or will have if not deployed yet. */ - // TODO: Make this the deployed, not current count - public int groupCount() { - if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count(); - return cluster.minResources().groups(); - } - - public int groupSize() { - // ceil: If the division does not produce a whole number we assume some node is missing - return (int)Math.ceil((double)nodeCount() / groupCount()); + public Load peakLoad() { + return nodeTimeseries().peakLoad(); } /** Returns the relative load adjustment accounting for redundancy in this. */ @@ -235,15 +204,88 @@ public class ClusterModel { * if one of the nodes go down. */ public Load idealLoad() { - return new Load(idealCpuLoad(), idealMemoryLoad(), idealDiskLoad()).divide(redundancyAdjustment()); + var ideal = new Load(idealCpuLoad(), idealMemoryLoad(), idealDiskLoad()).divide(redundancyAdjustment()); + if (! cluster.bcpGroupInfo().isEmpty()) { + // Do a weighted sum of the ideal "vote" based on local and bcp group info. + // This avoids any discontinuities with a near-zero local query rate. + double localInformationWeight = Math.min(1, averageQueryRate().orElse(0) / + Math.min(queryRateGivingFullConfidence, cluster.bcpGroupInfo().queryRate())); + Load bcpGroupIdeal = adjustQueryDependentIdealLoadByBcpGroupInfo(ideal); + ideal = ideal.multiply(localInformationWeight).add(bcpGroupIdeal.multiply(1 - localInformationWeight)); + } + return ideal; } - public int nodesAdjustedForRedundancy(int nodes, int groups) { + /** Returns the instant this model was created. */ + public Instant at() { return at;} + + private Load adjustQueryDependentIdealLoadByBcpGroupInfo(Load ideal) { + double currentClusterTotalVcpuPerGroup = nodes.not().retired().first().get().resources().vcpu() * groupSize(); + + double targetQueryRateToHandle = cluster.bcpGroupInfo().queryRate() * cluster.bcpGroupInfo().growthRateHeadroom() * trafficShiftHeadroom(); + double neededTotalVcpPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() + + ( 1 - queryCpuFraction()) * idealCpuLoad() * + (clusterSpec.type().isContainer() ? 1 : groupSize()); + + double cpuAdjustment = neededTotalVcpPerGroup / currentClusterTotalVcpuPerGroup; + return ideal.withCpu(peakLoad().cpu() / cpuAdjustment); + } + + private boolean hasScaledIn(Duration period) { + return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN) + .isAfter(clock.instant().minus(period)); + } + + private ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; } + + private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; } + + /** + * Returns the predicted max query growth rate per minute as a fraction of the average traffic + * in the scaling window. + */ + private double maxQueryGrowthRate() { + if (maxQueryGrowthRate != null) return maxQueryGrowthRate; + return maxQueryGrowthRate = clusterTimeseries().maxQueryGrowthRate(scalingDuration(), clock); + } + + /** Returns the average query rate in the scaling window as a fraction of the max observed query rate. */ + private double queryFractionOfMax() { + if (queryFractionOfMax != null) return queryFractionOfMax; + return queryFractionOfMax = clusterTimeseries().queryFractionOfMax(scalingDuration(), clock); + } + + /** Returns the average query rate in the scaling window. */ + private OptionalDouble averageQueryRate() { + if (averageQueryRate != null) return averageQueryRate; + return averageQueryRate = clusterTimeseries().queryRate(scalingDuration(), clock); + } + + /** The number of nodes this cluster has, or will have if not deployed yet. */ + // TODO: Make this the deployed, not current count + private int nodeCount() { + if ( ! nodes.isEmpty()) return (int)nodes.stream().count(); + return cluster.minResources().nodes(); + } + + /** The number of groups this cluster has, or will have if not deployed yet. */ + // TODO: Make this the deployed, not current count + private int groupCount() { + if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count(); + return cluster.minResources().groups(); + } + + private int groupSize() { + // ceil: If the division does not produce a whole number we assume some node is missing + return (int)Math.ceil((double)nodeCount() / groupCount()); + } + + private int nodesAdjustedForRedundancy(int nodes, int groups) { int groupSize = (int)Math.ceil((double)nodes / groups); return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes; } - public int groupsAdjustedForRedundancy(int nodes, int groups) { + private int groupsAdjustedForRedundancy(int nodes, int groups) { return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups; } @@ -258,9 +300,6 @@ public class ClusterModel { (1 - queryCpuFraction) * idealWriteCpuLoad; } - /** Returns the instant this model was created. */ - public Instant at() { return at;} - /** Returns the headroom for growth during organic traffic growth as a multiple of current resources. */ private double growthRateHeadroom() { if ( ! zone.environment().isProduction()) return 1; @@ -280,7 +319,7 @@ public class ClusterModel { if ( ! zone.environment().isProduction()) return 1; double trafficShiftHeadroom; if (application.status().maxReadShare() == 0) // No traffic fraction data - trafficShiftHeadroom = 2.0; // assume we currently get half of the global share of traffic + trafficShiftHeadroom = 2.0; // assume we currently get half of the max possible share of traffic else if (application.status().currentReadShare() == 0) trafficShiftHeadroom = 1/application.status().maxReadShare(); else @@ -294,11 +333,11 @@ public class ClusterModel { * with high confidence to avoid large adjustments caused by random noise due to low traffic numbers. */ private double adjustByConfidence(double headroom) { - return ( (headroom -1 ) * Math.min(1, averageQueryRate().orElse(0) / 100.0) ) + 1; + return ( (headroom -1 ) * Math.min(1, averageQueryRate().orElse(0) / queryRateGivingFullConfidence) ) + 1; } /** The estimated fraction of cpu usage which goes to processing queries vs. writes */ - public double queryCpuFraction() { + private double queryCpuFraction() { OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock); if (averageQueryRate().orElse(0) == 0 && writeRate.orElse(0) == 0) return queryCpuFraction(0.5); return queryCpuFraction(averageQueryRate().orElse(0) / (averageQueryRate().orElse(0) + writeRate.orElse(0))); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java index 6ab5ff731d3..a2fa6e63922 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java @@ -30,6 +30,10 @@ public class Load { public double memory() { return memory; } public double disk() { return disk; } + public Load withCpu(double cpu) { return new Load(cpu, memory, disk); } + public Load withMemory(double memory) { return new Load(cpu, memory, disk); } + public Load withDisk(double disk) { return new Load(cpu, memory, disk); } + public Load add(Load other) { return join(other, (a, b) -> a + b); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java index 469cabc4ee4..1b73dee8b6c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java @@ -12,6 +12,7 @@ import com.yahoo.slime.Slime; import com.yahoo.slime.SlimeUtils; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.applications.Status; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; @@ -53,6 +54,10 @@ public class ApplicationSerializer { private static final String groupSizeKey = "groupSize"; private static final String requiredKey = "required"; private static final String suggestedKey = "suggested"; + private static final String bcpGroupInfoKey = "bcpGroupInfo"; + private static final String queryRateKey = "queryRateKey"; + private static final String growthRateHeadroomKey = "growthRateHeadroomKey"; + private static final String cpuCostPerQueryKey = "cpuCostPerQueryKey"; private static final String resourcesKey = "resources"; private static final String targetKey = "target"; private static final String nodesKey = "nodes"; @@ -129,6 +134,8 @@ public class ApplicationSerializer { clusterObject.setBool(requiredKey, cluster.required()); toSlime(cluster.suggested(), clusterObject.setObject(suggestedKey)); toSlime(cluster.target(), clusterObject.setObject(targetKey)); + if (! cluster.bcpGroupInfo().isEmpty()) + toSlime(cluster.bcpGroupInfo(), clusterObject.setObject(bcpGroupInfoKey)); scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray(scalingEventsKey)); } @@ -141,6 +148,7 @@ public class ApplicationSerializer { clusterObject.field(requiredKey).asBool(), autoscalingFromSlime(clusterObject.field(suggestedKey), clusterObject.field("nonExisting")), autoscalingFromSlime(clusterObject.field(targetKey), clusterObject.field(autoscalingStatusObjectKey)), + bcpGroupInfoFromSlime(clusterObject.field(bcpGroupInfoKey)), scalingEventsFromSlime(clusterObject.field(scalingEventsKey))); } @@ -222,6 +230,20 @@ public class ApplicationSerializer { loadFromSlime(autoscalingObject.field(idealKey))); } + private static void toSlime(BcpGroupInfo bcpGroupInfo, Cursor bcpGroupInfoObject) { + if (bcpGroupInfo.isEmpty()) return; + bcpGroupInfoObject.setDouble(queryRateKey, bcpGroupInfo.queryRate()); + bcpGroupInfoObject.setDouble(growthRateHeadroomKey, bcpGroupInfo.growthRateHeadroom()); + bcpGroupInfoObject.setDouble(cpuCostPerQueryKey, bcpGroupInfo.cpuCostPerQuery()); + } + + private static BcpGroupInfo bcpGroupInfoFromSlime(Inspector bcpGroupInfoObject) { + if ( ! bcpGroupInfoObject.valid()) return BcpGroupInfo.empty(); + return new BcpGroupInfo(bcpGroupInfoObject.field(queryRateKey).asDouble(), + bcpGroupInfoObject.field(growthRateHeadroomKey).asDouble(), + bcpGroupInfoObject.field(cpuCostPerQueryKey).asDouble()); + } + private static void scalingEventsToSlime(List scalingEvents, Cursor eventArray) { scalingEvents.forEach(event -> toSlime(event, eventArray.addObject())); } -- cgit v1.2.3