summaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-02-13 12:23:55 +0100
committerJon Bratseth <bratseth@gmail.com>2023-02-13 12:23:55 +0100
commit0ba8b0001cee9ae1aad8fbdfac863a79da212d1c (patch)
tree2903f5356a7efa3646b1bce6c16e000286335f06 /node-repository/src/main/java
parentfa526bcc311ae6080905b61fb9248aca82aa4991 (diff)
Cold autoscaling WIP
Core support for autoscaling using data from cluster deployments in other regions when there is little or no traffic in our own deployment.
Diffstat (limited to 'node-repository/src/main/java')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java66
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java22
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java157
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java22
5 files changed, 206 insertions, 65 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java
new file mode 100644
index 00000000000..6b0ea8532be
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java
@@ -0,0 +1,66 @@
+package com.yahoo.vespa.hosted.provision.applications;
+
+import java.util.Objects;
+
+/**
+ * When there are multiple deployments of an application in different regions,
+ * instances of the cluster across regions may form a "BCP group".
+ * By default the clusters in all production regions form such a group, but other arrangements
+ * may be specified in deployment.xml, see com.yahoo.config.application.api.Bcp.
+ *
+ * This contains metrics averaged over the other clusters in the group this belongs to,
+ * which is used to amend scaling decisions in this cluster when it has little traffic on its own.
+ *
+ * @author bratseth
+ */
+public class BcpGroupInfo {
+
+ private static final BcpGroupInfo empty = new BcpGroupInfo(0, 0, 0);
+
+ private final double queryRate;
+ private final double growthRateHeadroom;
+ private final double cpuCostPerQuery;
+
+ public BcpGroupInfo(double queryRate, double growthRateHeadroom, double cpuCostPerQuery) {
+ this.queryRate = queryRate;
+ this.growthRateHeadroom = growthRateHeadroom;
+ this.cpuCostPerQuery = cpuCostPerQuery;
+ }
+
+ /** Returns the average query rate (queries/second) of the other clusters in the group this belongs to. */
+ public double queryRate() { return queryRate; }
+
+ /** Returns the average growth rate headroom of the other clusters in the group this belongs to. */
+ public double growthRateHeadroom() { return growthRateHeadroom; }
+
+ /** Returns the average total cluster CPU cost per query of the other clusters in the group this belongs to. */
+ public double cpuCostPerQuery() { return cpuCostPerQuery; }
+
+ public boolean isEmpty() {
+ return queryRate == 0 && growthRateHeadroom == 0 && cpuCostPerQuery == 0;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if ( ! (o instanceof BcpGroupInfo other)) return false;
+ if ( other.queryRate != this.queryRate) return false;
+ if ( other.growthRateHeadroom != this.growthRateHeadroom) return false;
+ if ( other.cpuCostPerQuery != this.cpuCostPerQuery) return false;
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(queryRate, growthRateHeadroom, cpuCostPerQuery);
+ }
+
+ @Override
+ public String toString() {
+ return "BCP group info: " + queryRate + " q/s, " + growthRateHeadroom + " q/s headroom, " +
+ cpuCostPerQuery + " CPU cost per q/s";
+ }
+
+ public static BcpGroupInfo empty() { return empty; }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
index 8da6bd6937b..ea4944c2bd5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
@@ -33,6 +33,7 @@ public class Cluster {
private final boolean required;
private final Autoscaling suggested;
private final Autoscaling target;
+ private final BcpGroupInfo bcpGroupInfo;
/** The maxScalingEvents last scaling events of this, sorted by increasing time (newest last) */
private final List<ScalingEvent> scalingEvents;
@@ -45,6 +46,7 @@ public class Cluster {
boolean required,
Autoscaling suggested,
Autoscaling target,
+ BcpGroupInfo bcpGroupInfo,
List<ScalingEvent> scalingEvents) {
this.id = Objects.requireNonNull(id);
this.exclusive = exclusive;
@@ -58,6 +60,7 @@ public class Cluster {
this.target = Autoscaling.empty();
else
this.target = target;
+ this.bcpGroupInfo = Objects.requireNonNull(bcpGroupInfo);
this.scalingEvents = List.copyOf(scalingEvents);
}
@@ -77,7 +80,7 @@ public class Cluster {
/**
* Returns whether the resources of this cluster are required to be within the specified min and max.
- * Otherwise they may be adjusted by capacity policies.
+ * Otherwise, they may be adjusted by capacity policies.
*/
public boolean required() { return required; }
@@ -102,6 +105,9 @@ public class Cluster {
return true;
}
+ /** Returns info about the BCP group of clusters this belongs to. */
+ public BcpGroupInfo bcpGroupInfo() { return bcpGroupInfo; }
+
/** Returns the recent scaling events in this cluster */
public List<ScalingEvent> scalingEvents() { return scalingEvents; }
@@ -113,15 +119,19 @@ public class Cluster {
public Cluster withConfiguration(boolean exclusive, Capacity capacity) {
return new Cluster(id, exclusive,
capacity.minResources(), capacity.maxResources(), capacity.groupSize(), capacity.isRequired(),
- suggested, target, scalingEvents);
+ suggested, target, bcpGroupInfo, scalingEvents);
}
public Cluster withSuggested(Autoscaling suggested) {
- return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, scalingEvents);
+ return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents);
}
public Cluster withTarget(Autoscaling target) {
- return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, scalingEvents);
+ return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents);
+ }
+
+ public Cluster with(BcpGroupInfo bcpGroupInfo) {
+ return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents);
}
/** Add or update (based on "at" time) a scaling event */
@@ -135,7 +145,7 @@ public class Cluster {
scalingEvents.add(scalingEvent);
prune(scalingEvents);
- return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, scalingEvents);
+ return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents);
}
@Override
@@ -167,7 +177,7 @@ public class Cluster {
public static Cluster create(ClusterSpec.Id id, boolean exclusive, Capacity requested) {
return new Cluster(id, exclusive,
requested.minResources(), requested.maxResources(), requested.groupSize(), requested.isRequired(),
- Autoscaling.empty(), Autoscaling.empty(), List.of());
+ Autoscaling.empty(), Autoscaling.empty(), BcpGroupInfo.empty(), List.of());
}
/** The predicted time it will take to rescale this cluster. */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index da74ad0b63b..4edcdbd3fa5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -30,6 +30,9 @@ public class ClusterModel {
/** Containers typically use more cpu right after generation change, so discard those metrics */
public static final Duration warmupDuration = Duration.ofMinutes(7);
+ /** If we have less than this query rate, we cannot be fully confident in our load data, which influences some decisions. */
+ public static final double queryRateGivingFullConfidence = 100.0;
+
static final double idealQueryCpuLoad = 0.8;
static final double idealWriteCpuLoad = 0.95;
@@ -48,8 +51,13 @@ public class ClusterModel {
private final Application application;
private final ClusterSpec clusterSpec;
private final Cluster cluster;
- /** The current nodes of this cluster, or empty if this models a new cluster not yet deployed */
+
+ /**
+ * The current active nodes of this cluster, including retired,
+ * or empty if this models a new cluster not yet deployed.
+ */
private final NodeList nodes;
+
private final Clock clock;
private final Duration scalingDuration;
private final ClusterTimeseries clusterTimeseries;
@@ -118,6 +126,14 @@ public class ClusterModel {
return adjustment;
}
+ public OptionalDouble cpuCostPerQuery() {
+ if (averageQueryRate().isEmpty()) return OptionalDouble.empty();
+ // TODO: Query rate should generally be sampled at the time where we see the peak resource usage
+ int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize();
+ return OptionalDouble.of(peakLoad().cpu() * queryCpuFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu()
+ / averageQueryRate().getAsDouble() / groupCount());
+ }
+
public boolean isStable(NodeRepository nodeRepository) {
// An autoscaling decision was recently made
if (hasScaledIn(Duration.ofMinutes(5)))
@@ -143,59 +159,12 @@ public class ClusterModel {
return true;
}
- private boolean hasScaledIn(Duration period) {
- return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN)
- .isAfter(clock.instant().minus(period));
- }
-
/** Returns the predicted duration of a rescaling of this cluster */
public Duration scalingDuration() { return scalingDuration; }
- public ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; }
-
- public ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }
-
- /**
- * Returns the predicted max query growth rate per minute as a fraction of the average traffic
- * in the scaling window.
- */
- public double maxQueryGrowthRate() {
- if (maxQueryGrowthRate != null) return maxQueryGrowthRate;
- return maxQueryGrowthRate = clusterTimeseries().maxQueryGrowthRate(scalingDuration(), clock);
- }
-
- /** Returns the average query rate in the scaling window as a fraction of the max observed query rate. */
- public double queryFractionOfMax() {
- if (queryFractionOfMax != null) return queryFractionOfMax;
- return queryFractionOfMax = clusterTimeseries().queryFractionOfMax(scalingDuration(), clock);
- }
-
- /** Returns the average query rate in the scaling window. */
- public OptionalDouble averageQueryRate() {
- if (averageQueryRate != null) return averageQueryRate;
- return averageQueryRate = clusterTimeseries().queryRate(scalingDuration(), clock);
- }
-
/** Returns the average of the peak load measurement in each dimension, from each node. */
- public Load peakLoad() { return nodeTimeseries().peakLoad(); }
-
- /** The number of nodes this cluster has, or will have if not deployed yet. */
- // TODO: Make this the deployed, not current count
- public int nodeCount() {
- if ( ! nodes.isEmpty()) return (int)nodes.stream().count();
- return cluster.minResources().nodes();
- }
-
- /** The number of groups this cluster has, or will have if not deployed yet. */
- // TODO: Make this the deployed, not current count
- public int groupCount() {
- if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count();
- return cluster.minResources().groups();
- }
-
- public int groupSize() {
- // ceil: If the division does not produce a whole number we assume some node is missing
- return (int)Math.ceil((double)nodeCount() / groupCount());
+ public Load peakLoad() {
+ return nodeTimeseries().peakLoad();
}
/** Returns the relative load adjustment accounting for redundancy in this. */
@@ -235,15 +204,88 @@ public class ClusterModel {
* if one of the nodes go down.
*/
public Load idealLoad() {
- return new Load(idealCpuLoad(), idealMemoryLoad(), idealDiskLoad()).divide(redundancyAdjustment());
+ var ideal = new Load(idealCpuLoad(), idealMemoryLoad(), idealDiskLoad()).divide(redundancyAdjustment());
+ if (! cluster.bcpGroupInfo().isEmpty()) {
+ // Do a weighted sum of the ideal "vote" based on local and bcp group info.
+ // This avoids any discontinuities with a near-zero local query rate.
+ double localInformationWeight = Math.min(1, averageQueryRate().orElse(0) /
+ Math.min(queryRateGivingFullConfidence, cluster.bcpGroupInfo().queryRate()));
+ Load bcpGroupIdeal = adjustQueryDependentIdealLoadByBcpGroupInfo(ideal);
+ ideal = ideal.multiply(localInformationWeight).add(bcpGroupIdeal.multiply(1 - localInformationWeight));
+ }
+ return ideal;
}
- public int nodesAdjustedForRedundancy(int nodes, int groups) {
+ /** Returns the instant this model was created. */
+ public Instant at() { return at;}
+
+ private Load adjustQueryDependentIdealLoadByBcpGroupInfo(Load ideal) {
+ double currentClusterTotalVcpuPerGroup = nodes.not().retired().first().get().resources().vcpu() * groupSize();
+
+ double targetQueryRateToHandle = cluster.bcpGroupInfo().queryRate() * cluster.bcpGroupInfo().growthRateHeadroom() * trafficShiftHeadroom();
+ double neededTotalVcpPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() +
+ ( 1 - queryCpuFraction()) * idealCpuLoad() *
+ (clusterSpec.type().isContainer() ? 1 : groupSize());
+
+ double cpuAdjustment = neededTotalVcpPerGroup / currentClusterTotalVcpuPerGroup;
+ return ideal.withCpu(peakLoad().cpu() / cpuAdjustment);
+ }
+
+ private boolean hasScaledIn(Duration period) {
+ return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN)
+ .isAfter(clock.instant().minus(period));
+ }
+
+ private ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; }
+
+ private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }
+
+ /**
+ * Returns the predicted max query growth rate per minute as a fraction of the average traffic
+ * in the scaling window.
+ */
+ private double maxQueryGrowthRate() {
+ if (maxQueryGrowthRate != null) return maxQueryGrowthRate;
+ return maxQueryGrowthRate = clusterTimeseries().maxQueryGrowthRate(scalingDuration(), clock);
+ }
+
+ /** Returns the average query rate in the scaling window as a fraction of the max observed query rate. */
+ private double queryFractionOfMax() {
+ if (queryFractionOfMax != null) return queryFractionOfMax;
+ return queryFractionOfMax = clusterTimeseries().queryFractionOfMax(scalingDuration(), clock);
+ }
+
+ /** Returns the average query rate in the scaling window. */
+ private OptionalDouble averageQueryRate() {
+ if (averageQueryRate != null) return averageQueryRate;
+ return averageQueryRate = clusterTimeseries().queryRate(scalingDuration(), clock);
+ }
+
+ /** The number of nodes this cluster has, or will have if not deployed yet. */
+ // TODO: Make this the deployed, not current count
+ private int nodeCount() {
+ if ( ! nodes.isEmpty()) return (int)nodes.stream().count();
+ return cluster.minResources().nodes();
+ }
+
+ /** The number of groups this cluster has, or will have if not deployed yet. */
+ // TODO: Make this the deployed, not current count
+ private int groupCount() {
+ if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count();
+ return cluster.minResources().groups();
+ }
+
+ private int groupSize() {
+ // ceil: If the division does not produce a whole number we assume some node is missing
+ return (int)Math.ceil((double)nodeCount() / groupCount());
+ }
+
+ private int nodesAdjustedForRedundancy(int nodes, int groups) {
int groupSize = (int)Math.ceil((double)nodes / groups);
return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
}
- public int groupsAdjustedForRedundancy(int nodes, int groups) {
+ private int groupsAdjustedForRedundancy(int nodes, int groups) {
return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
}
@@ -258,9 +300,6 @@ public class ClusterModel {
(1 - queryCpuFraction) * idealWriteCpuLoad;
}
- /** Returns the instant this model was created. */
- public Instant at() { return at;}
-
/** Returns the headroom for growth during organic traffic growth as a multiple of current resources. */
private double growthRateHeadroom() {
if ( ! zone.environment().isProduction()) return 1;
@@ -280,7 +319,7 @@ public class ClusterModel {
if ( ! zone.environment().isProduction()) return 1;
double trafficShiftHeadroom;
if (application.status().maxReadShare() == 0) // No traffic fraction data
- trafficShiftHeadroom = 2.0; // assume we currently get half of the global share of traffic
+ trafficShiftHeadroom = 2.0; // assume we currently get half of the max possible share of traffic
else if (application.status().currentReadShare() == 0)
trafficShiftHeadroom = 1/application.status().maxReadShare();
else
@@ -294,11 +333,11 @@ public class ClusterModel {
* with high confidence to avoid large adjustments caused by random noise due to low traffic numbers.
*/
private double adjustByConfidence(double headroom) {
- return ( (headroom -1 ) * Math.min(1, averageQueryRate().orElse(0) / 100.0) ) + 1;
+ return ( (headroom -1 ) * Math.min(1, averageQueryRate().orElse(0) / queryRateGivingFullConfidence) ) + 1;
}
/** The estimated fraction of cpu usage which goes to processing queries vs. writes */
- public double queryCpuFraction() {
+ private double queryCpuFraction() {
OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock);
if (averageQueryRate().orElse(0) == 0 && writeRate.orElse(0) == 0) return queryCpuFraction(0.5);
return queryCpuFraction(averageQueryRate().orElse(0) / (averageQueryRate().orElse(0) + writeRate.orElse(0)));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
index 6ab5ff731d3..a2fa6e63922 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
@@ -30,6 +30,10 @@ public class Load {
public double memory() { return memory; }
public double disk() { return disk; }
+ public Load withCpu(double cpu) { return new Load(cpu, memory, disk); }
+ public Load withMemory(double memory) { return new Load(cpu, memory, disk); }
+ public Load withDisk(double disk) { return new Load(cpu, memory, disk); }
+
public Load add(Load other) {
return join(other, (a, b) -> a + b);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
index 469cabc4ee4..1b73dee8b6c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
@@ -12,6 +12,7 @@ import com.yahoo.slime.Slime;
import com.yahoo.slime.SlimeUtils;
import com.yahoo.vespa.hosted.provision.applications.Application;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
+import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo;
import com.yahoo.vespa.hosted.provision.applications.ScalingEvent;
import com.yahoo.vespa.hosted.provision.applications.Status;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling;
@@ -53,6 +54,10 @@ public class ApplicationSerializer {
private static final String groupSizeKey = "groupSize";
private static final String requiredKey = "required";
private static final String suggestedKey = "suggested";
+ private static final String bcpGroupInfoKey = "bcpGroupInfo";
+ private static final String queryRateKey = "queryRateKey";
+ private static final String growthRateHeadroomKey = "growthRateHeadroomKey";
+ private static final String cpuCostPerQueryKey = "cpuCostPerQueryKey";
private static final String resourcesKey = "resources";
private static final String targetKey = "target";
private static final String nodesKey = "nodes";
@@ -129,6 +134,8 @@ public class ApplicationSerializer {
clusterObject.setBool(requiredKey, cluster.required());
toSlime(cluster.suggested(), clusterObject.setObject(suggestedKey));
toSlime(cluster.target(), clusterObject.setObject(targetKey));
+ if (! cluster.bcpGroupInfo().isEmpty())
+ toSlime(cluster.bcpGroupInfo(), clusterObject.setObject(bcpGroupInfoKey));
scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray(scalingEventsKey));
}
@@ -141,6 +148,7 @@ public class ApplicationSerializer {
clusterObject.field(requiredKey).asBool(),
autoscalingFromSlime(clusterObject.field(suggestedKey), clusterObject.field("nonExisting")),
autoscalingFromSlime(clusterObject.field(targetKey), clusterObject.field(autoscalingStatusObjectKey)),
+ bcpGroupInfoFromSlime(clusterObject.field(bcpGroupInfoKey)),
scalingEventsFromSlime(clusterObject.field(scalingEventsKey)));
}
@@ -222,6 +230,20 @@ public class ApplicationSerializer {
loadFromSlime(autoscalingObject.field(idealKey)));
}
+ private static void toSlime(BcpGroupInfo bcpGroupInfo, Cursor bcpGroupInfoObject) {
+ if (bcpGroupInfo.isEmpty()) return;
+ bcpGroupInfoObject.setDouble(queryRateKey, bcpGroupInfo.queryRate());
+ bcpGroupInfoObject.setDouble(growthRateHeadroomKey, bcpGroupInfo.growthRateHeadroom());
+ bcpGroupInfoObject.setDouble(cpuCostPerQueryKey, bcpGroupInfo.cpuCostPerQuery());
+ }
+
+ private static BcpGroupInfo bcpGroupInfoFromSlime(Inspector bcpGroupInfoObject) {
+ if ( ! bcpGroupInfoObject.valid()) return BcpGroupInfo.empty();
+ return new BcpGroupInfo(bcpGroupInfoObject.field(queryRateKey).asDouble(),
+ bcpGroupInfoObject.field(growthRateHeadroomKey).asDouble(),
+ bcpGroupInfoObject.field(cpuCostPerQueryKey).asDouble());
+ }
+
private static void scalingEventsToSlime(List<ScalingEvent> scalingEvents, Cursor eventArray) {
scalingEvents.forEach(event -> toSlime(event, eventArray.addObject()));
}