1 files changed, 85 insertions, 2 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 5b1ee6cc496..ae18e7ffb91 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -33,6 +33,11 @@ public class ClusterModel {
     static final double idealContainerDiskLoad = 0.95;
     static final double idealContentDiskLoad = 0.6;
 
+    // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component
+    // proportional to document count. We must account for this when comparing configurations with more or fewer nodes.
+    // TODO: Measure this, and only take it into account with queries
+    private static final double fixedCpuCostFraction = 0.1;
+
     private final Application application;
     private final ClusterSpec clusterSpec;
     private final Cluster cluster;
@@ -74,7 +79,7 @@ public class ClusterModel {
         this.application = application;
         this.clusterSpec = clusterSpec;
         this.cluster = cluster;
-        this.nodes = null;
+        this.nodes = NodeList.of();
         this.clock = clock;
 
         this.scalingDuration = scalingDuration;
@@ -86,6 +91,20 @@ public class ClusterModel {
     public ClusterSpec clusterSpec() { return clusterSpec; }
     public Cluster cluster() { return cluster; }
 
+    /** Returns the relative load adjustment that should be made to this cluster given available measurements. */
+    public Load loadAdjustment() {
+        if (nodeTimeseries().measurementsPerNode() == 0) return Load.one(); // No info, no change
+        /*
+        // Should we scale up?
+        Load relativePeak = nodeTimeseries().peakLoad().divide(idealLoad());
+        if (relativePeak.any(v -> v > 1))
+            return relativePeak.max(Load.one()); // Don't downscale any dimension if we upscale
+
+        // Should we scale down?
+        */
+        return averageLoad().divide(idealLoad());
+    }
+
     /** Returns the predicted duration of a rescaling of this cluster */
     public Duration scalingDuration() { return scalingDuration; }
 
@@ -114,8 +133,72 @@ public class ClusterModel {
     /** Returns average load during the last {@link #scalingDuration()} */
     public Load averageLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(scalingDuration())); }
 
+    /** The number of nodes this cluster has, or will have if not deployed yet. */
+    // TODO: Make this the deployed, not current count
+    public int nodeCount() {
+        if ( ! nodes.isEmpty()) return (int)nodes.stream().count();
+        return cluster.minResources().nodes();
+    }
+
+    /** The number of groups this cluster has, or will have if not deployed yet. */
+    // TODO: Make this the deployed, not current count
+    public int groupCount() {
+        if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count();
+        return cluster.minResources().groups();
+    }
+
+    public int groupSize() {
+        // ceil: If the division does not produce a whole number we assume some node is missing
+        return (int)Math.ceil((double)nodeCount() / groupCount());
+    }
+
+    /** Returns the relative load adjustment accounting for redundancy in this. */
+    public Load redundancyAdjustment() {
+        return loadWith(nodeCount(), groupCount());
+    }
+
+    /**
+     * Returns the relative load adjustment accounting for redundancy given these nodes+groups
+     * relative to node nodes+groups in this.
+     */
+    public Load loadWith(int trueNodes, int trueGroups) {
+        int nodes = nodesAdjustedForRedundancy(trueNodes, trueGroups);
+        int groups = groupsAdjustedForRedundancy(trueNodes, trueGroups);
+        if (clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content
+            int groupSize = nodes / groups;
+
+            // Cpu: Query cpu scales with cluster size, write cpu scales with group size
+            // Memory and disk: Scales with group size
+
+            // The fixed cost portion of cpu does not scale with changes to the node count
+            double queryCpuPerGroup = fixedCpuCostFraction + (1 - fixedCpuCostFraction) * groupSize() / groupSize;
+
+            double queryCpu = queryCpuPerGroup * groupCount() / groups;
+            double writeCpu = (double)groupSize() / groupSize;
+            return new Load(queryCpuFraction() * queryCpu + (1 - queryCpuFraction()) * writeCpu,
+                            (double)groupSize() / groupSize,
+                            (double)groupSize() / groupSize);
+        }
+        else {
+            return new Load((double)nodeCount() / nodes, 1, 1);
+        }
+    }
+
+    /**
+     * Returns the ideal load across the nodes of this sich that each node will be at ideal load
+     * if one of  the nodes go down.
+     */
     public Load idealLoad() {
-        return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad());
+        return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad()).divide(redundancyAdjustment());
+    }
+
+    public int nodesAdjustedForRedundancy(int nodes, int groups) {
+        int groupSize = (int)Math.ceil((double)nodes / groups);
+        return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
+    }
+
+    public int groupsAdjustedForRedundancy(int nodes, int groups) {
+        return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
     }
 
     /** Ideal cpu load must take the application traffic fraction into account */