From 0ba8b0001cee9ae1aad8fbdfac863a79da212d1c Mon Sep 17 00:00:00 2001
From: Jon Bratseth <bratseth@gmail.com>
Date: Mon, 13 Feb 2023 12:23:55 +0100
Subject: Cold autoscaling WIP

Core support for autoscaling using data from cluster deployments in other regions
when there is little or no traffic in our own deployment.
---
 .../provision/applications/BcpGroupInfo.java       |  66 ++++++
 .../hosted/provision/applications/Cluster.java     |  22 +-
 .../hosted/provision/autoscale/ClusterModel.java   | 157 +++++++++-----
 .../vespa/hosted/provision/autoscale/Load.java     |   4 +
 .../persistence/ApplicationSerializer.java         |  22 ++
 .../provision/autoscale/AutoscalingTester.java     |   2 +-
 .../AutoscalingUsingBcpGroupInfoTest.java          | 238 +++++++++++++++++++++
 .../vespa/hosted/provision/autoscale/Fixture.java  |   7 +
 .../vespa/hosted/provision/autoscale/Loader.java   |   8 +-
 .../persistence/ApplicationSerializerTest.java     |   4 +
 10 files changed, 460 insertions(+), 70 deletions(-)
 create mode 100644 node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java
 create mode 100644 node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java

(limited to 'node-repository')

diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java
new file mode 100644
index 00000000000..6b0ea8532be
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/BcpGroupInfo.java
@@ -0,0 +1,66 @@
+package com.yahoo.vespa.hosted.provision.applications;
+
+import java.util.Objects;
+
+/**
+ * When there are multiple deployments of an application in different regions,
+ * instances of the cluster across regions may form a "BCP group".
+ * By default the clusters in all production regions form such a group, but other arrangements
+ * may be specified in deployment.xml, see com.yahoo.config.application.api.Bcp.
+ *
+ * This contains metrics averaged over the other clusters in the group this belongs to,
+ * which is used to amend scaling decisions in this cluster when it has little traffic on its own.
+ *
+ * @author bratseth
+ */
+public class BcpGroupInfo {
+
+    private static final BcpGroupInfo empty = new BcpGroupInfo(0, 0, 0);
+
+    private final double queryRate;
+    private final double growthRateHeadroom;
+    private final double cpuCostPerQuery;
+
+    public BcpGroupInfo(double queryRate, double growthRateHeadroom, double cpuCostPerQuery) {
+        this.queryRate = queryRate;
+        this.growthRateHeadroom = growthRateHeadroom;
+        this.cpuCostPerQuery = cpuCostPerQuery;
+    }
+
+    /** Returns the average query rate (queries/second) of the other clusters in the group this belongs to. */
+    public double queryRate() { return queryRate; }
+
+    /** Returns the average growth rate headroom of the other clusters in the group this belongs to. */
+    public double growthRateHeadroom() { return growthRateHeadroom; }
+
+    /** Returns the average total cluster CPU cost per query of the other clusters in the group this belongs to. */
+    public double cpuCostPerQuery() { return cpuCostPerQuery; }
+
+    public boolean isEmpty() {
+        return queryRate == 0 && growthRateHeadroom == 0 && cpuCostPerQuery == 0;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if ( ! (o instanceof BcpGroupInfo other)) return false;
+        if ( other.queryRate != this.queryRate) return false;
+        if ( other.growthRateHeadroom != this.growthRateHeadroom) return false;
+        if ( other.cpuCostPerQuery != this.cpuCostPerQuery) return false;
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(queryRate, growthRateHeadroom, cpuCostPerQuery);
+    }
+
+    @Override
+    public String toString() {
+        return "BCP group info: " + queryRate + " q/s, " + growthRateHeadroom + " q/s headroom, " +
+               cpuCostPerQuery + " CPU cost per q/s";
+    }
+
+    public static BcpGroupInfo empty() { return empty; }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
index 8da6bd6937b..ea4944c2bd5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
@@ -33,6 +33,7 @@ public class Cluster {
     private final boolean required;
     private final Autoscaling suggested;
     private final Autoscaling target;
+    private final BcpGroupInfo bcpGroupInfo;
 
     /** The maxScalingEvents last scaling events of this, sorted by increasing time (newest last) */
     private final List<ScalingEvent> scalingEvents;
@@ -45,6 +46,7 @@ public class Cluster {
                    boolean required,
                    Autoscaling suggested,
                    Autoscaling target,
+                   BcpGroupInfo bcpGroupInfo,
                    List<ScalingEvent> scalingEvents) {
         this.id = Objects.requireNonNull(id);
         this.exclusive = exclusive;
@@ -58,6 +60,7 @@ public class Cluster {
             this.target = Autoscaling.empty();
         else
             this.target = target;
+        this.bcpGroupInfo = Objects.requireNonNull(bcpGroupInfo);
         this.scalingEvents = List.copyOf(scalingEvents);
     }
 
@@ -77,7 +80,7 @@ public class Cluster {
 
     /**
      * Returns whether the resources of this cluster are required to be within the specified min and max.
-     * Otherwise they may be adjusted by capacity policies.
+     * Otherwise, they may be adjusted by capacity policies.
      */
     public boolean required() { return required; }
 
@@ -102,6 +105,9 @@ public class Cluster {
         return true;
     }
 
+    /** Returns info about the BCP group of clusters this belongs to. */
+    public BcpGroupInfo bcpGroupInfo() { return bcpGroupInfo; }
+
     /** Returns the recent scaling events in this cluster */
     public List<ScalingEvent> scalingEvents() { return scalingEvents; }
 
@@ -113,15 +119,19 @@ public class Cluster {
     public Cluster withConfiguration(boolean exclusive, Capacity capacity) {
         return new Cluster(id, exclusive,
                            capacity.minResources(), capacity.maxResources(), capacity.groupSize(), capacity.isRequired(),
-                           suggested, target, scalingEvents);
+                           suggested, target, bcpGroupInfo, scalingEvents);
     }
 
     public Cluster withSuggested(Autoscaling suggested) {
-        return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, scalingEvents);
+        return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents);
     }
 
     public Cluster withTarget(Autoscaling target) {
-        return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, scalingEvents);
+        return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents);
+    }
+
+    public Cluster with(BcpGroupInfo bcpGroupInfo) {
+        return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents);
     }
 
     /** Add or update (based on "at" time) a scaling event */
@@ -135,7 +145,7 @@ public class Cluster {
             scalingEvents.add(scalingEvent);
 
         prune(scalingEvents);
-        return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, scalingEvents);
+        return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, bcpGroupInfo, scalingEvents);
     }
 
     @Override
@@ -167,7 +177,7 @@ public class Cluster {
     public static Cluster create(ClusterSpec.Id id, boolean exclusive, Capacity requested) {
         return new Cluster(id, exclusive,
                            requested.minResources(), requested.maxResources(), requested.groupSize(), requested.isRequired(),
-                           Autoscaling.empty(), Autoscaling.empty(), List.of());
+                           Autoscaling.empty(), Autoscaling.empty(), BcpGroupInfo.empty(), List.of());
     }
 
     /** The predicted time it will take to rescale this cluster. */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index da74ad0b63b..4edcdbd3fa5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -30,6 +30,9 @@ public class ClusterModel {
     /** Containers typically use more cpu right after generation change, so discard those metrics */
     public static final Duration warmupDuration = Duration.ofMinutes(7);
 
+    /** If we have less than this query rate, we cannot be fully confident in our load data, which influences some decisions. */
+    public static final double queryRateGivingFullConfidence = 100.0;
+
     static final double idealQueryCpuLoad = 0.8;
     static final double idealWriteCpuLoad = 0.95;
 
@@ -48,8 +51,13 @@ public class ClusterModel {
     private final Application application;
     private final ClusterSpec clusterSpec;
     private final Cluster cluster;
-    /** The current nodes of this cluster, or empty if this models a new cluster not yet deployed */
+
+    /**
+     * The current active nodes of this cluster, including retired,
+     * or empty if this models a new cluster not yet deployed.
+     */
     private final NodeList nodes;
+
     private final Clock clock;
     private final Duration scalingDuration;
     private final ClusterTimeseries clusterTimeseries;
@@ -118,6 +126,14 @@ public class ClusterModel {
         return adjustment;
     }
 
+    public OptionalDouble cpuCostPerQuery() {
+        if (averageQueryRate().isEmpty()) return OptionalDouble.empty();
+        // TODO: Query rate should generally be sampled at the time where we see the peak resource usage
+        int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize();
+        return OptionalDouble.of(peakLoad().cpu()  * queryCpuFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu()
+                                 / averageQueryRate().getAsDouble() / groupCount());
+    }
+
     public boolean isStable(NodeRepository nodeRepository) {
         // An autoscaling decision was recently made
         if (hasScaledIn(Duration.ofMinutes(5)))
@@ -143,59 +159,12 @@ public class ClusterModel {
         return true;
     }
 
-    private boolean hasScaledIn(Duration period) {
-        return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN)
-                      .isAfter(clock.instant().minus(period));
-    }
-
     /** Returns the predicted duration of a rescaling of this cluster */
     public Duration scalingDuration() { return scalingDuration; }
 
-    public ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; }
-
-    public ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }
-
-    /**
-     * Returns the predicted max query growth rate per minute as a fraction of the average traffic
-     * in the scaling window.
-     */
-    public double maxQueryGrowthRate() {
-        if (maxQueryGrowthRate != null) return maxQueryGrowthRate;
-        return maxQueryGrowthRate = clusterTimeseries().maxQueryGrowthRate(scalingDuration(), clock);
-    }
-
-    /** Returns the average query rate in the scaling window as a fraction of the max observed query rate. */
-    public double queryFractionOfMax() {
-        if (queryFractionOfMax != null) return queryFractionOfMax;
-        return queryFractionOfMax = clusterTimeseries().queryFractionOfMax(scalingDuration(), clock);
-    }
-
-    /** Returns the average query rate in the scaling window. */
-    public OptionalDouble averageQueryRate() {
-        if (averageQueryRate != null) return averageQueryRate;
-        return averageQueryRate = clusterTimeseries().queryRate(scalingDuration(), clock);
-    }
-
     /** Returns the average of the peak load measurement in each dimension, from each node. */
-    public Load peakLoad() { return nodeTimeseries().peakLoad(); }
-
-    /** The number of nodes this cluster has, or will have if not deployed yet. */
-    // TODO: Make this the deployed, not current count
-    public int nodeCount() {
-        if ( ! nodes.isEmpty()) return (int)nodes.stream().count();
-        return cluster.minResources().nodes();
-    }
-
-    /** The number of groups this cluster has, or will have if not deployed yet. */
-    // TODO: Make this the deployed, not current count
-    public int groupCount() {
-        if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count();
-        return cluster.minResources().groups();
-    }
-
-    public int groupSize() {
-        // ceil: If the division does not produce a whole number we assume some node is missing
-        return (int)Math.ceil((double)nodeCount() / groupCount());
+    public Load peakLoad() {
+        return nodeTimeseries().peakLoad();
     }
 
     /** Returns the relative load adjustment accounting for redundancy in this. */
@@ -235,15 +204,88 @@ public class ClusterModel {
      * if one of  the nodes go down.
      */
     public Load idealLoad() {
-        return new Load(idealCpuLoad(), idealMemoryLoad(), idealDiskLoad()).divide(redundancyAdjustment());
+        var ideal = new Load(idealCpuLoad(), idealMemoryLoad(), idealDiskLoad()).divide(redundancyAdjustment());
+        if (! cluster.bcpGroupInfo().isEmpty()) {
+            // Do a weighted sum of the ideal "vote" based on local and bcp group info.
+            // This avoids any discontinuities with a near-zero local query rate.
+            double localInformationWeight = Math.min(1, averageQueryRate().orElse(0) /
+                                                        Math.min(queryRateGivingFullConfidence, cluster.bcpGroupInfo().queryRate()));
+            Load bcpGroupIdeal = adjustQueryDependentIdealLoadByBcpGroupInfo(ideal);
+            ideal = ideal.multiply(localInformationWeight).add(bcpGroupIdeal.multiply(1 - localInformationWeight));
+        }
+        return ideal;
     }
 
-    public int nodesAdjustedForRedundancy(int nodes, int groups) {
+    /** Returns the instant this model was created. */
+    public Instant at() { return at;}
+
+    private Load adjustQueryDependentIdealLoadByBcpGroupInfo(Load ideal) {
+        double currentClusterTotalVcpuPerGroup = nodes.not().retired().first().get().resources().vcpu() * groupSize();
+
+        double targetQueryRateToHandle = cluster.bcpGroupInfo().queryRate() * cluster.bcpGroupInfo().growthRateHeadroom() * trafficShiftHeadroom();
+        double neededTotalVcpPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() +
+                                        ( 1 - queryCpuFraction()) * idealCpuLoad() *
+                                        (clusterSpec.type().isContainer() ? 1 : groupSize());
+
+        double cpuAdjustment = neededTotalVcpPerGroup / currentClusterTotalVcpuPerGroup;
+        return ideal.withCpu(peakLoad().cpu() / cpuAdjustment);
+    }
+
+    private boolean hasScaledIn(Duration period) {
+        return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN)
+                      .isAfter(clock.instant().minus(period));
+    }
+
+    private ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; }
+
+    private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }
+
+    /**
+     * Returns the predicted max query growth rate per minute as a fraction of the average traffic
+     * in the scaling window.
+     */
+    private double maxQueryGrowthRate() {
+        if (maxQueryGrowthRate != null) return maxQueryGrowthRate;
+        return maxQueryGrowthRate = clusterTimeseries().maxQueryGrowthRate(scalingDuration(), clock);
+    }
+
+    /** Returns the average query rate in the scaling window as a fraction of the max observed query rate. */
+    private double queryFractionOfMax() {
+        if (queryFractionOfMax != null) return queryFractionOfMax;
+        return queryFractionOfMax = clusterTimeseries().queryFractionOfMax(scalingDuration(), clock);
+    }
+
+    /** Returns the average query rate in the scaling window. */
+    private OptionalDouble averageQueryRate() {
+        if (averageQueryRate != null) return averageQueryRate;
+        return averageQueryRate = clusterTimeseries().queryRate(scalingDuration(), clock);
+    }
+
+    /** The number of nodes this cluster has, or will have if not deployed yet. */
+    // TODO: Make this the deployed, not current count
+    private int nodeCount() {
+        if ( ! nodes.isEmpty()) return (int)nodes.stream().count();
+        return cluster.minResources().nodes();
+    }
+
+    /** The number of groups this cluster has, or will have if not deployed yet. */
+    // TODO: Make this the deployed, not current count
+    private int groupCount() {
+        if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count();
+        return cluster.minResources().groups();
+    }
+
+    private int groupSize() {
+        // ceil: If the division does not produce a whole number we assume some node is missing
+        return (int)Math.ceil((double)nodeCount() / groupCount());
+    }
+
+    private int nodesAdjustedForRedundancy(int nodes, int groups) {
         int groupSize = (int)Math.ceil((double)nodes / groups);
         return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
     }
 
-    public int groupsAdjustedForRedundancy(int nodes, int groups) {
+    private int groupsAdjustedForRedundancy(int nodes, int groups) {
         return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
     }
 
@@ -258,9 +300,6 @@ public class ClusterModel {
                (1 - queryCpuFraction) * idealWriteCpuLoad;
     }
 
-    /** Returns the instant this model was created. */
-    public Instant at() { return at;}
-
     /** Returns the headroom for growth during organic traffic growth as a multiple of current resources. */
     private double growthRateHeadroom() {
         if ( ! zone.environment().isProduction()) return 1;
@@ -280,7 +319,7 @@ public class ClusterModel {
         if ( ! zone.environment().isProduction()) return 1;
         double trafficShiftHeadroom;
         if (application.status().maxReadShare() == 0) // No traffic fraction data
-            trafficShiftHeadroom = 2.0; // assume we currently get half of the global share of traffic
+            trafficShiftHeadroom = 2.0; // assume we currently get half of the max possible share of traffic
         else if (application.status().currentReadShare() == 0)
             trafficShiftHeadroom = 1/application.status().maxReadShare();
         else
@@ -294,11 +333,11 @@ public class ClusterModel {
      * with high confidence to avoid large adjustments caused by random noise due to low traffic numbers.
      */
     private double adjustByConfidence(double headroom) {
-        return ( (headroom -1 ) * Math.min(1, averageQueryRate().orElse(0) / 100.0) ) + 1;
+        return ( (headroom -1 ) * Math.min(1, averageQueryRate().orElse(0) / queryRateGivingFullConfidence) ) + 1;
     }
 
     /** The estimated fraction of cpu usage which goes to processing queries vs. writes */
-    public double queryCpuFraction() {
+    private double queryCpuFraction() {
         OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock);
         if (averageQueryRate().orElse(0) == 0 && writeRate.orElse(0) == 0) return queryCpuFraction(0.5);
         return queryCpuFraction(averageQueryRate().orElse(0) / (averageQueryRate().orElse(0) + writeRate.orElse(0)));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
index 6ab5ff731d3..a2fa6e63922 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
@@ -30,6 +30,10 @@ public class Load {
     public double memory() { return memory; }
     public double disk() { return disk; }
 
+    public Load withCpu(double cpu) { return new Load(cpu, memory, disk); }
+    public Load withMemory(double memory) { return new Load(cpu, memory, disk); }
+    public Load withDisk(double disk) { return new Load(cpu, memory, disk); }
+
     public Load add(Load other) {
         return join(other, (a, b) -> a + b);
     }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
index 469cabc4ee4..1b73dee8b6c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
@@ -12,6 +12,7 @@ import com.yahoo.slime.Slime;
 import com.yahoo.slime.SlimeUtils;
 import com.yahoo.vespa.hosted.provision.applications.Application;
 import com.yahoo.vespa.hosted.provision.applications.Cluster;
+import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo;
 import com.yahoo.vespa.hosted.provision.applications.ScalingEvent;
 import com.yahoo.vespa.hosted.provision.applications.Status;
 import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling;
@@ -53,6 +54,10 @@ public class ApplicationSerializer {
     private static final String groupSizeKey = "groupSize";
     private static final String requiredKey = "required";
     private static final String suggestedKey = "suggested";
+    private static final String bcpGroupInfoKey = "bcpGroupInfo";
+    private static final String queryRateKey = "queryRateKey";
+    private static final String growthRateHeadroomKey = "growthRateHeadroomKey";
+    private static final String cpuCostPerQueryKey = "cpuCostPerQueryKey";
     private static final String resourcesKey = "resources";
     private static final String targetKey = "target";
     private static final String nodesKey = "nodes";
@@ -129,6 +134,8 @@ public class ApplicationSerializer {
         clusterObject.setBool(requiredKey, cluster.required());
         toSlime(cluster.suggested(), clusterObject.setObject(suggestedKey));
         toSlime(cluster.target(), clusterObject.setObject(targetKey));
+        if (! cluster.bcpGroupInfo().isEmpty())
+            toSlime(cluster.bcpGroupInfo(), clusterObject.setObject(bcpGroupInfoKey));
         scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray(scalingEventsKey));
     }
 
@@ -141,6 +148,7 @@ public class ApplicationSerializer {
                            clusterObject.field(requiredKey).asBool(),
                            autoscalingFromSlime(clusterObject.field(suggestedKey), clusterObject.field("nonExisting")),
                            autoscalingFromSlime(clusterObject.field(targetKey), clusterObject.field(autoscalingStatusObjectKey)),
+                           bcpGroupInfoFromSlime(clusterObject.field(bcpGroupInfoKey)),
                            scalingEventsFromSlime(clusterObject.field(scalingEventsKey)));
     }
 
@@ -222,6 +230,20 @@ public class ApplicationSerializer {
                                loadFromSlime(autoscalingObject.field(idealKey)));
     }
 
+    private static void toSlime(BcpGroupInfo bcpGroupInfo, Cursor bcpGroupInfoObject) {
+        if (bcpGroupInfo.isEmpty()) return;
+        bcpGroupInfoObject.setDouble(queryRateKey, bcpGroupInfo.queryRate());
+        bcpGroupInfoObject.setDouble(growthRateHeadroomKey, bcpGroupInfo.growthRateHeadroom());
+        bcpGroupInfoObject.setDouble(cpuCostPerQueryKey, bcpGroupInfo.cpuCostPerQuery());
+    }
+
+    private static BcpGroupInfo bcpGroupInfoFromSlime(Inspector bcpGroupInfoObject) {
+        if ( ! bcpGroupInfoObject.valid()) return BcpGroupInfo.empty();
+        return new BcpGroupInfo(bcpGroupInfoObject.field(queryRateKey).asDouble(),
+                                bcpGroupInfoObject.field(growthRateHeadroomKey).asDouble(),
+                                bcpGroupInfoObject.field(cpuCostPerQueryKey).asDouble());
+    }
+
     private static void scalingEventsToSlime(List<ScalingEvent> scalingEvents, Cursor eventArray) {
         scalingEvents.forEach(event -> toSlime(event, eventArray.addObject()));
     }
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index 05d0822758d..19c6ce16674 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.provision.autoscale;
 
 import com.yahoo.config.provision.ApplicationId;
 import com.yahoo.config.provision.Capacity;
-import com.yahoo.config.provision.Cloud;
 import com.yahoo.config.provision.ClusterResources;
 import com.yahoo.config.provision.ClusterSpec;
 import com.yahoo.config.provision.Flavor;
@@ -135,6 +134,7 @@ class AutoscalingTester {
                               cluster.required(),
                               cluster.suggested(),
                               cluster.target(),
+                              cluster.bcpGroupInfo(),
                               List.of()); // Remove scaling events
         cluster = cluster.with(ScalingEvent.create(cluster.minResources(), cluster.minResources(),
                                                    0,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
new file mode 100644
index 00000000000..0bd94872557
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
@@ -0,0 +1,238 @@
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.ClusterResources;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.Optional;
+
+/**
+ * Tests autoscaling using information from the BCP group this cluster deployment
+ * is part of to supplement local data when the local deployment lacks sufficient traffic.
+ *
+ * @author bratseth
+ */
+public class AutoscalingUsingBcpGroupInfoTest {
+
+    /** Tests with varying BCP group info parameters. */
+    @Test
+    public void test_autoscaling_single_content_group() {
+        var fixture = AutoscalingTester.fixture().awsProdSetup(true).build();
+
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         8, 1, 4.0,  7.6, 37.8,
+                                         fixture.autoscale());
+
+        // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         8, 1, 8.0,  7.4, 32.8,
+                                         fixture.autoscale());
+
+        // Higher headroom
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         9, 1, 4.2,  6.6, 33.1,
+                                         fixture.autoscale());
+
+        // Higher per query cost
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         9, 1, 5.4,  6.6, 33.1,
+                                         fixture.autoscale());
+    }
+
+    /** Tests with varying BCP group info parameters. */
+    @Test
+    public void test_autoscaling_multiple_content_groups() {
+        var min = new ClusterResources(3, 3,
+                                       new NodeResources(1, 4, 10, 1, NodeResources.DiskSpeed.any));
+        var max = new ClusterResources(21, 3,
+                                       new NodeResources(100, 1000, 1000, 1, NodeResources.DiskSpeed.any));
+        var fixture = AutoscalingTester.fixture()
+                                       .awsProdSetup(true)
+                                       .initialResources(Optional.of(new ClusterResources(9, 3, new NodeResources(2, 16, 75, 1))))
+                                       .capacity(Capacity.from(min, max))
+                                       .build();
+
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         3, 3, 10.5,  42.3, 187.0,
+                                         fixture.autoscale());
+
+        // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         3, 3, 20.9,  42.3, 178.0,
+                                         fixture.autoscale());
+
+        // Higher headroom
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         3, 3, 12.4,  42.3, 187.0,
+                                         fixture.autoscale());
+
+        // Higher per query cost
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         3, 3, 15.7,  42.3, 187.0,
+                                         fixture.autoscale());
+    }
+
+    /**
+     * Tests with varying BCP group info parameters for containers.
+     * Differences from content
+     * - No host sharing.
+     * - Memory and disk is independent of cluster size.
+     */
+    @Test
+    public void test_autoscaling_container() {
+        var fixture = AutoscalingTester.fixture().clusterType(ClusterSpec.Type.container).awsProdSetup(true).build();
+
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         8, 1, 4.0,  16.0, 40.8,
+                                         fixture.autoscale());
+
+        // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         8, 1, 8.0,  16.0, 40.8,
+                                         fixture.autoscale());
+
+        // Higher headroom
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         5, 1, 8.0,  16.0, 40.8,
+                                         fixture.autoscale());
+
+        // Higher per query cost
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         6, 1, 8.0,  16.0, 40.8,
+                                         fixture.autoscale());
+    }
+
+    @Test
+    public void test_autoscaling_single_content_group_with_some_local_traffic() {
+        var fixture = AutoscalingTester.fixture().awsProdSetup(true).build();
+
+        // Baseline: No local traffic, group traffic indicates much higher cpu usage than local
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
+        fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         8, 1, 14.2,  7.4, 32.8,
+                                         fixture.autoscale());
+
+        // Some local traffic
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
+        Duration duration1 = fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().clock().advance(duration1.negated());
+        fixture.loader().addQueryRateMeasurements(10, __ -> 10.0);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         8, 1, 6.9,  7.6, 37.8,
+                                         fixture.autoscale());
+
+        // Enough local traffic to get half the votes
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
+        Duration duration2 = fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().clock().advance(duration2.negated());
+        fixture.loader().addQueryRateMeasurements(10, __ -> 50.0);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         7, 1, 3.5,  8.9, 55.5,
+                                         fixture.autoscale());
+
+        // Mostly local
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
+        Duration duration3 = fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().clock().advance(duration3.negated());
+        fixture.loader().addQueryRateMeasurements(10, __ -> 90.0);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         7, 1, 2.7,  8.9, 55.5,
+                                         fixture.autoscale());
+
+        // Local only
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(200, 1.3, 0.45));
+        Duration duration4 = fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().clock().advance(duration4.negated());
+        fixture.loader().addQueryRateMeasurements(10, __ -> 100.0);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         7, 1, 2.6,  8.9, 55.5,
+                                         fixture.autoscale());
+
+        // No group info, should be the same as the above
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(BcpGroupInfo.empty());
+        Duration duration5 = fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().clock().advance(duration5.negated());
+        fixture.loader().addQueryRateMeasurements(10, __ -> 100.0);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         7, 1, 2.6,  8.9, 55.5,
+                                         fixture.autoscale());
+
+        // 40 query rate, no group info (for reference to the below)
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(BcpGroupInfo.empty());
+        Duration duration6 = fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().clock().advance(duration6.negated());
+        fixture.loader().addQueryRateMeasurements(10, __ -> 40.0);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         6, 1, 2.2,  10.6, 66.5,
+                                         fixture.autoscale());
+
+        // Local query rate is too low but global is even lower so disregard it, giving the same as above
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(200/40.0, 1.3, 0.45*40.0));
+        Duration duration7 = fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().clock().advance(duration7.negated());
+        fixture.loader().addQueryRateMeasurements(10, __ -> 40.0);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         6, 1, 2.2,  10.6, 66.5,
+                                         fixture.autoscale());
+
+        // Local query rate is too low to be fully confident, and so is global but as it is slightly larger, incorporate it slightly
+        fixture.tester().clock().advance(Duration.ofDays(2));
+        fixture.store(new BcpGroupInfo(200/4.0, 1.3, 0.45*4.0));
+        Duration duration8 = fixture.loader().addCpuMeasurements(0.7f, 10);
+        fixture.tester().clock().advance(duration8.negated());
+        fixture.loader().addQueryRateMeasurements(10, __ -> 40.0);
+        fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
+                                         7, 1, 2.2,  8.9, 55.5,
+                                         fixture.autoscale());
+    }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
index 1e1e00a10db..5caf50a4e83 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
@@ -21,6 +21,7 @@ import com.yahoo.vespa.hosted.provision.Node;
 import com.yahoo.vespa.hosted.provision.NodeList;
 import com.yahoo.vespa.hosted.provision.applications.Application;
 import com.yahoo.vespa.hosted.provision.applications.Cluster;
+import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo;
 import com.yahoo.vespa.hosted.provision.autoscale.awsnodes.AwsHostResourcesCalculatorImpl;
 import com.yahoo.vespa.hosted.provision.autoscale.awsnodes.AwsNodeTypes;
 import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
@@ -132,6 +133,12 @@ public class Fixture {
         tester.nodeRepository().applications().put(application, tester.nodeRepository().applications().lock(applicationId));
     }
 
+    public void store(BcpGroupInfo bcpGroupInfo) {
+        var application = application();
+        application = application.with(application.cluster(clusterId()).get().with(bcpGroupInfo));
+        tester.nodeRepository().applications().put(application, tester.nodeRepository().applications().lock(applicationId));
+    }
+
     public static class Builder {
 
         ApplicationId application = AutoscalingTester.applicationId("application1");
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
index 10c8c7434b1..a104f0b1bc8 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
@@ -79,10 +79,10 @@ public class Loader {
         return Duration.between(initialTime, fixture.tester().clock().instant());
     }
 
-    public void applyCpuLoad(double cpuLoad, int measurements) {
-        addCpuMeasurements((float)cpuLoad, measurements);
-        fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
-        addQueryRateMeasurements(measurements, t -> t == 0 ? 200.0 : 100.0); // Query traffic only
+    public Duration applyCpuLoad(double cpuLoad, int measurements) {
+        Duration duration = addCpuMeasurements((float)cpuLoad, measurements);
+        fixture.tester().clock().advance(duration.negated());
+        return addQueryRateMeasurements(measurements, t -> t == 0 ? 200.0 : 100.0); // Query traffic only
     }
 
     public void applyMemLoad(double memLoad, int measurements) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java
index bce10b999bb..c8dc0d97320 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java
@@ -8,6 +8,7 @@ import com.yahoo.config.provision.ClusterSpec;
 import com.yahoo.config.provision.NodeResources;
 import com.yahoo.vespa.hosted.provision.applications.Application;
 import com.yahoo.vespa.hosted.provision.applications.Cluster;
+import com.yahoo.vespa.hosted.provision.applications.BcpGroupInfo;
 import com.yahoo.vespa.hosted.provision.applications.ScalingEvent;
 import com.yahoo.vespa.hosted.provision.applications.Status;
 import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling;
@@ -39,6 +40,7 @@ public class ApplicationSerializerTest {
                                  true,
                                  Autoscaling.empty(),
                                  Autoscaling.empty(),
+                                 BcpGroupInfo.empty(),
                                  List.of()));
         var minResources = new NodeResources(1, 2, 3, 4);
         clusters.add(new Cluster(ClusterSpec.Id.from("c2"),
@@ -61,6 +63,7 @@ public class ApplicationSerializerTest {
                                                  Instant.ofEpochMilli(5678L),
                                                  Load.zero(),
                                                  Load.one()),
+                                 new BcpGroupInfo(0.1, 0.2, 0.3),
                                  List.of(new ScalingEvent(new ClusterResources(10, 5, minResources),
                                                           new ClusterResources(12, 6, minResources),
                                                           7L,
@@ -90,6 +93,7 @@ public class ApplicationSerializerTest {
             assertEquals(originalCluster.required(), serializedCluster.required());
             assertEquals(originalCluster.suggested(), serializedCluster.suggested());
             assertEquals(originalCluster.target(), serializedCluster.target());
+            assertEquals(originalCluster.bcpGroupInfo(), serializedCluster.bcpGroupInfo());
             assertEquals(originalCluster.scalingEvents(), serializedCluster.scalingEvents());
         }
     }
-- 
cgit v1.2.3