Merge pull request #23573 from vespa-engine/bratseth/autoscale-faster

Bratseth/autoscale faster
author: Valerij Fredriksen <freva@users.noreply.github.com> 2022-08-03 17:35:11 +0200
committer: GitHub <noreply@github.com> 2022-08-03 17:35:11 +0200
commit: cc0e0a6919a4fccd7ef6d6016ba186136d49c956 (patch)
tree: af409afdb07d6fe2290124ae7b57ee1dba767d9b
parent: 66df56662aaa775732c5b2f23c49ffaed668a276 (diff)
parent: 38f54c8d1ae746377ce2260c39a9cce377148e84 (diff)
16 files changed, 289 insertions, 121 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
index 88a4b492a0b..4a1545cc66c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
@@ -64,6 +64,33 @@ public class AllocatableClusterResources {
         this.fulfilment = fulfilment(realResources, idealResources);
     }
 
+    private AllocatableClusterResources(int nodes,
+                                        int groups,
+                                        NodeResources realResources,
+                                        NodeResources advertisedResources,
+                                        ClusterSpec clusterSpec,
+                                        double fulfilment) {
+        this.nodes = nodes;
+        this.groups = groups;
+        this.realResources = realResources;
+        this.advertisedResources = advertisedResources;
+        this.clusterSpec = clusterSpec;
+        this.fulfilment = fulfilment;
+    }
+
+    /** Returns this with the redundant node or group removed from counts. */
+    public AllocatableClusterResources withoutRedundancy() {
+        int groupSize = nodes / groups;
+        int nodesAdjustedForRedundancy   = nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
+        int groupsAdjustedForRedundancy  = nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
+        return new AllocatableClusterResources(nodesAdjustedForRedundancy,
+                                               groupsAdjustedForRedundancy,
+                                               realResources,
+                                               advertisedResources,
+                                               clusterSpec,
+                                               fulfilment);
+    }
+
     /**
      * Returns the resources which will actually be available per node in this cluster with this allocation.
      * These should be used for reasoning about allocation to meet measured demand.
@@ -83,11 +110,6 @@ public class AllocatableClusterResources {
     public int nodes() { return nodes; }
     public int groups() { return groups; }
 
-    public int groupSize() {
-        // ceil: If the division does not produce a whole number we assume some node is missing
-        return (int)Math.ceil((double)nodes / groups);
-    }
-
     public ClusterSpec clusterSpec() { return clusterSpec; }
 
     public double cost() { return nodes * advertisedResources.cost(); }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
index 5bebd346bdb..29f53f0336d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
@@ -2,7 +2,6 @@
 package com.yahoo.vespa.hosted.provision.autoscale;
 
 import com.yahoo.config.provision.ClusterResources;
-import com.yahoo.config.provision.ClusterSpec;
 import com.yahoo.config.provision.NodeResources;
 import com.yahoo.vespa.hosted.provision.NodeList;
 import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -20,11 +19,6 @@ public class AllocationOptimizer {
     private static final int minimumNodes = 2; // Since this number includes redundancy it cannot be lower than 2
     private static final int maximumNodes = 150;
 
-    // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component
-    // proportional to document count. We must account for this when comparing configurations with more or fewer nodes.
-    // TODO: Measure this, and only take it into account with queries
-    private static final double fixedCpuCostFraction = 0.1;
-
     private final NodeRepository nodeRepository;
 
     public AllocationOptimizer(NodeRepository nodeRepository) {
@@ -53,17 +47,10 @@ public class AllocationOptimizer {
         for (int groups = limits.min().groups(); groups <= limits.max().groups(); groups++) {
             for (int nodes = limits.min().nodes(); nodes <= limits.max().nodes(); nodes++) {
                 if (nodes % groups != 0) continue;
-                int groupSize = nodes / groups;
-
-                // Adjust for redundancy: Node in group if groups = 1, an extra group if multiple groups
-                // TODO: Make the best choice based on size and redundancy setting instead
-                int nodesAdjustedForRedundancy =  target.adjustForRedundancy() && nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
-                int groupsAdjustedForRedundancy = target.adjustForRedundancy() && nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
 
                 ClusterResources next = new ClusterResources(nodes,
                                                              groups,
-                                                             nodeResourcesWith(nodesAdjustedForRedundancy,
-                                                                               groupsAdjustedForRedundancy,
+                                                             nodeResourcesWith(nodes, groups,
                                                                                limits, target, current, clusterModel));
                 var allocatableResources = AllocatableClusterResources.from(next, current.clusterSpec(), limits,
                                                                             hosts, nodeRepository);
@@ -85,34 +72,14 @@ public class AllocationOptimizer {
                                             ResourceTarget target,
                                             AllocatableClusterResources current,
                                             ClusterModel clusterModel) {
-        double cpu, memory, disk;
-        int groupSize = nodes / groups;
-
-        if (current.clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content
-            // Cpu: Query cpu scales with cluster size, write cpu scales with group size
-            // Memory and disk: Scales with group size
-
-            // The fixed cost portion of cpu does not scale with changes to the node count
-            double queryCpuPerGroup = fixedCpuCostFraction * target.resources().vcpu() +
-                                      (1 - fixedCpuCostFraction) * target.resources().vcpu() * current.groupSize() / groupSize;
-
-            double queryCpu = queryCpuPerGroup * current.groups() / groups;
-            double writeCpu = target.resources().vcpu() * current.groupSize() / groupSize;
-            cpu = clusterModel.queryCpuFraction() * queryCpu + (1 - clusterModel.queryCpuFraction()) * writeCpu;
-            memory = target.resources().memoryGb() * current.groupSize() / groupSize;
-            disk = target.resources().diskGb() * current.groupSize() / groupSize;
-        }
-        else {
-            cpu = target.resources().vcpu() * current.nodes() / nodes;
-            memory = target.resources().memoryGb();
-            disk = target.resources().diskGb();
-        }
+        var scaled = clusterModel.loadWith(nodes, groups)
+                                 .scaled(Load.one().divide(clusterModel.redundancyAdjustment()).scaled(target.resources()));
         // Combine the scaled resource values computed here
         // with the currently configured non-scaled values, given in the limits, if any
-        NodeResources nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified()
-                                  ? current.advertisedResources().nodeResources()
-                                  : limits.min().nodeResources(); // min=max for non-scaled
-        return nonScaled.withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk);
+        var nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified()
+                        ? current.advertisedResources().nodeResources()
+                        : limits.min().nodeResources(); // min=max for non-scaled
+        return nonScaled.withVcpu(scaled.vcpu()).withMemoryGb(scaled.memoryGb()).withDiskGb(scaled.diskGb());
     }
 
     /** Returns a copy of the given limits where the minimum nodes are at least the given value when allowed */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 5b1ee6cc496..ae18e7ffb91 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -33,6 +33,11 @@ public class ClusterModel {
     static final double idealContainerDiskLoad = 0.95;
     static final double idealContentDiskLoad = 0.6;
 
+    // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component
+    // proportional to document count. We must account for this when comparing configurations with more or fewer nodes.
+    // TODO: Measure this, and only take it into account with queries
+    private static final double fixedCpuCostFraction = 0.1;
+
     private final Application application;
     private final ClusterSpec clusterSpec;
     private final Cluster cluster;
@@ -74,7 +79,7 @@ public class ClusterModel {
         this.application = application;
         this.clusterSpec = clusterSpec;
         this.cluster = cluster;
-        this.nodes = null;
+        this.nodes = NodeList.of();
         this.clock = clock;
 
         this.scalingDuration = scalingDuration;
@@ -86,6 +91,20 @@ public class ClusterModel {
     public ClusterSpec clusterSpec() { return clusterSpec; }
     public Cluster cluster() { return cluster; }
 
+    /** Returns the relative load adjustment that should be made to this cluster given available measurements. */
+    public Load loadAdjustment() {
+        if (nodeTimeseries().measurementsPerNode() == 0) return Load.one(); // No info, no change
+        /*
+        // Should we scale up?
+        Load relativePeak = nodeTimeseries().peakLoad().divide(idealLoad());
+        if (relativePeak.any(v -> v > 1))
+            return relativePeak.max(Load.one()); // Don't downscale any dimension if we upscale
+
+        // Should we scale down?
+        */
+        return averageLoad().divide(idealLoad());
+    }
+
     /** Returns the predicted duration of a rescaling of this cluster */
     public Duration scalingDuration() { return scalingDuration; }
 
@@ -114,8 +133,72 @@ public class ClusterModel {
     /** Returns average load during the last {@link #scalingDuration()} */
     public Load averageLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(scalingDuration())); }
 
+    /** The number of nodes this cluster has, or will have if not deployed yet. */
+    // TODO: Make this the deployed, not current count
+    public int nodeCount() {
+        if ( ! nodes.isEmpty()) return (int)nodes.stream().count();
+        return cluster.minResources().nodes();
+    }
+
+    /** The number of groups this cluster has, or will have if not deployed yet. */
+    // TODO: Make this the deployed, not current count
+    public int groupCount() {
+        if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count();
+        return cluster.minResources().groups();
+    }
+
+    public int groupSize() {
+        // ceil: If the division does not produce a whole number we assume some node is missing
+        return (int)Math.ceil((double)nodeCount() / groupCount());
+    }
+
+    /** Returns the relative load adjustment accounting for redundancy in this. */
+    public Load redundancyAdjustment() {
+        return loadWith(nodeCount(), groupCount());
+    }
+
+    /**
+     * Returns the relative load adjustment accounting for redundancy given these nodes+groups
+     * relative to node nodes+groups in this.
+     */
+    public Load loadWith(int trueNodes, int trueGroups) {
+        int nodes = nodesAdjustedForRedundancy(trueNodes, trueGroups);
+        int groups = groupsAdjustedForRedundancy(trueNodes, trueGroups);
+        if (clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content
+            int groupSize = nodes / groups;
+
+            // Cpu: Query cpu scales with cluster size, write cpu scales with group size
+            // Memory and disk: Scales with group size
+
+            // The fixed cost portion of cpu does not scale with changes to the node count
+            double queryCpuPerGroup = fixedCpuCostFraction + (1 - fixedCpuCostFraction) * groupSize() / groupSize;
+
+            double queryCpu = queryCpuPerGroup * groupCount() / groups;
+            double writeCpu = (double)groupSize() / groupSize;
+            return new Load(queryCpuFraction() * queryCpu + (1 - queryCpuFraction()) * writeCpu,
+                            (double)groupSize() / groupSize,
+                            (double)groupSize() / groupSize);
+        }
+        else {
+            return new Load((double)nodeCount() / nodes, 1, 1);
+        }
+    }
+
+    /**
+     * Returns the ideal load across the nodes of this sich that each node will be at ideal load
+     * if one of  the nodes go down.
+     */
     public Load idealLoad() {
-        return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad());
+        return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad()).divide(redundancyAdjustment());
+    }
+
+    public int nodesAdjustedForRedundancy(int nodes, int groups) {
+        int groupSize = (int)Math.ceil((double)nodes / groups);
+        return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
+    }
+
+    public int groupsAdjustedForRedundancy(int nodes, int groups) {
+        return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
     }
 
     /** Ideal cpu load must take the application traffic fraction into account */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
index 36056665a15..ab5be045dd4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
@@ -8,6 +8,7 @@ import java.time.Duration;
 import java.time.Instant;
 import java.util.List;
 import java.util.Optional;
+import java.util.OptionalDouble;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
 
@@ -49,6 +50,7 @@ public class ClusterNodesTimeseries {
 
     /** Returns the average number of measurements per node */
     public int measurementsPerNode() {
+        if (clusterNodes.size() == 0) return 0;
         int measurementCount = timeseries.stream().mapToInt(m -> m.size()).sum();
         return measurementCount / clusterNodes.size();
     }
@@ -84,6 +86,27 @@ public class ClusterNodesTimeseries {
         return total.divide(count);
     }
 
+    /**
+     * Returns the "peak load" in this: Which is for each load dimension,
+     * the average of the highest reading for that dimension on each node.
+     */
+    public Load peakLoad() {
+        return new Load(peakLoad(Load.Dimension.cpu), peakLoad(Load.Dimension.memory), peakLoad(Load.Dimension.disk));
+    }
+
+    private double peakLoad(Load.Dimension dimension) {
+        double total = 0;
+        int count = 0;
+        for (var nodeTimeseries : timeseries) {
+            OptionalDouble value = nodeTimeseries.peak(dimension);
+            if (value.isEmpty()) continue;
+            total += value.getAsDouble();
+            count++;
+        }
+        if (count == 0) return 0;
+        return total / count;
+    }
+
     private static List<NodeTimeseries> keep(List<NodeTimeseries> timeseries, Predicate<NodeMetricSnapshot> filter) {
         return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.keep(filter)).collect(Collectors.toList());
     }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
index a52b048a9e0..88c7e70cd35 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
@@ -3,6 +3,12 @@ package com.yahoo.vespa.hosted.provision.autoscale;
 
 import com.yahoo.config.provision.NodeResources;
 
+import java.util.Objects;
+import java.util.function.DoubleBinaryOperator;
+import java.util.function.DoubleFunction;
+import java.util.function.DoubleUnaryOperator;
+import java.util.function.Predicate;
+
 /**
  * The load of a node or system, measured as fractions of max (1.0) in three dimensions.
  *
@@ -10,6 +16,8 @@ import com.yahoo.config.provision.NodeResources;
  */
 public class Load {
 
+    public enum Dimension { cpu, memory, disk }
+
     private final double cpu, memory, disk;
 
     public Load(double cpu, double memory, double disk) {
@@ -23,27 +31,51 @@ public class Load {
     public double disk() { return disk; }
 
     public Load add(Load other) {
-        return new Load(cpu + other.cpu(), memory + other.memory(), disk + other.disk());
+        return join(other, (a, b) -> a + b);
     }
 
     public Load multiply(NodeResources resources) {
         return new Load(cpu * resources.vcpu(), memory * resources.memoryGb(), disk * resources.diskGb());
     }
-
     public Load multiply(double factor) {
-        return new Load(cpu * factor, memory * factor, disk * factor);
+        return map(v -> v * factor);
+    }
+    public Load multiply(Load other) {
+        return join(other, (a, b) -> a * b);
     }
 
+    public Load divide(Load divisor) {
+        return join(divisor, (a, b) -> divide(a, b));
+    }
+    public Load divide(double divisor) {
+        return map(v -> divide(v, divisor));
+    }
     public Load divide(NodeResources resources) {
         return new Load(divide(cpu, resources.vcpu()), divide(memory, resources.memoryGb()), divide(disk, resources.diskGb()));
     }
 
-    public Load divide(Load divisor) {
-        return new Load(divide(cpu, divisor.cpu()), divide(memory, divisor.memory()), divide(disk, divisor.disk()));
+    /** Returns the load having the max value of this and the given load in each dimension. */
+    public Load max(Load other) {
+        return join(other, (a, b) -> Math.max(a, b));
     }
 
-    public Load divide(double divisor) {
-        return new Load(divide(cpu, divisor), divide(memory, divisor), divide(disk, divisor));
+    /** Returns the load where the given function is applied to each dimension of this. */
+    public Load map(DoubleUnaryOperator f) {
+        return new Load(f.applyAsDouble(cpu),
+                        f.applyAsDouble(memory),
+                        f.applyAsDouble(disk));
+    }
+
+    /** Returns the load where the given function is applied to each dimension of this and the given load. */
+    public Load join(Load other, DoubleBinaryOperator f) {
+        return new Load(f.applyAsDouble(this.cpu(), other.cpu()),
+                        f.applyAsDouble(this.memory(), other.memory()),
+                        f.applyAsDouble(this.disk(), other.disk()));
+    }
+
+    /** Returns true if any dimension matches the predicate. */
+    public boolean any(Predicate<Double> test) {
+        return test.test(cpu) || test.test(memory) || test.test(disk);
     }
 
     public NodeResources scaled(NodeResources resources) {
@@ -52,6 +84,14 @@ public class Load {
                         .withDiskGb(disk * resources.diskGb());
     }
 
+    public double get(Dimension dimension) {
+        return switch (dimension) {
+            case cpu -> cpu();
+            case memory -> memory();
+            case disk -> disk();
+        };
+    }
+
     private double requireNormalized(double value, String name) {
         if (Double.isNaN(value))
             throw new IllegalArgumentException(name + " must be a number but is NaN");
@@ -60,17 +100,31 @@ public class Load {
         return value;
     }
 
+    private static double divide(double a, double b) {
+        if (a == 0 && b == 0) return 0;
+        return a / b;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (o == this) return true;
+        if ( ! (o instanceof Load other)) return false;
+        if (other.cpu() != this.cpu()) return false;
+        if (other.memory() != this.memory()) return false;
+        if (other.disk() != this.disk()) return false;
+        return true;
+    }
+
+    @Override
+    public int hashCode() { return Objects.hash(cpu, memory, disk); }
+
     @Override
     public String toString() {
         return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk";
     }
 
     public static Load zero() { return new Load(0, 0, 0); }
-
-    private static double divide(double a, double b) {
-        if (a == 0 && b == 0) return 0;
-        return a / b;
-    }
+    public static Load one() { return new Load(1, 1, 1); }
 
     public static Load byDividing(NodeResources a, NodeResources b) {
         return new Load(divide(a.vcpu(), b.vcpu()), divide(a.memoryGb(), b.memoryGb()), divide(a.diskGb(), b.diskGb()));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
index 4a5f8972e11..500dbf0f66f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
@@ -6,6 +6,7 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Optional;
+import java.util.OptionalDouble;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
 
@@ -40,6 +41,10 @@ public class NodeTimeseries {
         return Optional.of(snapshots.get(snapshots.size() - 1));
     }
 
+    public OptionalDouble peak(Load.Dimension dimension) {
+        return snapshots.stream().mapToDouble(snapshot -> snapshot.load().get(dimension)).max();
+    }
+
     public List<NodeMetricSnapshot> asList() { return snapshots; }
 
     public String hostname() { return hostname; }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
index 72836baaf5b..7bc019caabb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
@@ -10,43 +10,37 @@ import java.util.OptionalDouble;
 
 /**
  * A resource target to hit for the allocation optimizer.
- * The target is measured in cpu, memory and disk per node in the allocation given by current.
+ * The target is measured in cpu, memory and disk per node in the current allocation.
  *
  * @author bratseth
  */
 public class ResourceTarget {
 
-    private final boolean adjustForRedundancy;
-
     /** The target real resources per node, assuming the node assignment where this was decided */
     private final NodeResources resources;
 
-    private ResourceTarget(NodeResources resources, boolean adjustForRedundancy) {
+    private ResourceTarget(NodeResources resources) {
         this.resources = resources;
-        this.adjustForRedundancy = adjustForRedundancy;
     }
 
-    /** Are the target resources given by this including redundancy or not */
-    public boolean adjustForRedundancy() { return adjustForRedundancy; }
-    
     /** Returns the target resources per node in terms of the current allocation */
     public NodeResources resources() { return resources; }
 
     @Override
     public String toString() {
-        return "target " + resources + (adjustForRedundancy ? "(with redundancy adjustment) " : "");
+        return "target " + resources;
     }
 
     /** Create a target of achieving ideal load given a current load */
     public static ResourceTarget idealLoad(ClusterModel clusterModel,
                                            AllocatableClusterResources current) {
-        var loadAdjustment = clusterModel.averageLoad().divide(clusterModel.idealLoad());
-        return new ResourceTarget(loadAdjustment.scaled(current.realResources().nodeResources()), true);
+        return new ResourceTarget(clusterModel.loadAdjustment().scaled(current.realResources().nodeResources()));
     }
 
     /** Crete a target of preserving a current allocation */
-    public static ResourceTarget preserve(AllocatableClusterResources current) {
-        return new ResourceTarget(current.realResources().nodeResources(), false);
+    public static ResourceTarget preserve(ClusterModel clusterModel,
+                                          AllocatableClusterResources current) {
+        return new ResourceTarget(current.realResources().nodeResources());
     }
 
 }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
index 4ffe04d748c..8e00a623e1c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
@@ -196,7 +196,7 @@ public class NodeRepositoryProvisioner implements Provisioner {
         if (! firstDeployment && currentAsAdvertised.isWithin(limits.min(), limits.max())) return currentAsAdvertised;
 
         // Otherwise, find an allocation that preserves the current resources as well as possible
-        return allocationOptimizer.findBestAllocation(ResourceTarget.preserve(current),
+        return allocationOptimizer.findBestAllocation(ResourceTarget.preserve(clusterModel, current),
                                                       current,
                                                       clusterModel,
                                                       limits)
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index e6873e7118f..28f37546eb6 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -19,6 +19,7 @@ import com.yahoo.vespa.hosted.provision.NodeRepository;
 import com.yahoo.vespa.hosted.provision.Nodelike;
 import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies;
 import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
+import org.junit.Ignore;
 import org.junit.Test;
 
 import java.time.Duration;
@@ -69,10 +70,18 @@ public class AutoscalingTest {
 
     /** Using too many resources for a short period is proof we should scale up regardless of the time that takes. */
     @Test
-    public void test_autoscaling_up_is_fast_TODO() {
+    public void test_no_autoscaling_with_no_measurements() {
         var fixture = AutoscalingTester.fixture().build();
-        fixture.tester().clock().advance(Duration.ofDays(1)); // TODO: Remove the need for this
-        fixture.loader().applyLoad(1.0, 1.0, 1.0, 120); // TODO: Make this low
+        System.out.println(fixture.autoscale());
+        assertTrue(fixture.autoscale().target().isEmpty());
+    }
+
+    /** Using too many resources for a short period is proof we should scale up regardless of the time that takes. */
+    @Test
+    @Ignore // TODO
+    public void test_autoscaling_up_is_fast() {
+        var fixture = AutoscalingTester.fixture().build();
+        fixture.loader().applyLoad(1.0, 1.0, 1.0, 1);
         fixture.tester().assertResources("Scaling up since resource usage is too high",
                                          10, 1, 9.4, 8.5, 92.6,
                                          fixture.autoscale());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java
index 516a7a92d04..0559a232065 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java
@@ -27,50 +27,60 @@ public class ClusterModelTest {
     private static final double delta = 0.001;
 
     @Test
-    public void test_traffic_headroom() {
-        ManualClock clock = new ManualClock();
-        Application application = Application.empty(ApplicationId.from("t1", "a1", "i1"));
-        ClusterSpec clusterSpec = clusterSpec();
-        Cluster cluster = cluster(new NodeResources(1, 10, 100, 1));
-        application = application.with(cluster);
+    public void unit_adjustment_should_cause_no_change() {
+        var model = clusterModelWithNoData(); // 5 nodes, 1 group
+        assertEquals(Load.one(), model.loadAdjustment());
+        var target = model.loadAdjustment().scaled(resources());
+        int testingNodes = 5 - 1;
+        int currentNodes = 5 - 1;
+        assertEquals(resources(), model.loadWith(testingNodes, 1).scaled(Load.one().divide(model.loadWith(currentNodes, 1)).scaled(target)));
+    }
 
+    @Test
+    public void test_traffic_headroom() {
         // No current traffic share: Ideal load is low but capped
-        var model1 = new ClusterModel(application.with(new Status(0.0, 1.0)),
-                                      clusterSpec, cluster, clock, Duration.ofMinutes(10),
-                                      timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0, clock),
-                                      ClusterNodesTimeseries.empty());
-        assertEquals(0.131, model1.idealLoad().cpu(), delta);
+        var model1 = clusterModel(new Status(0.0, 1.0),
+                                  t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
+        assertEquals(0.10672097759674132, model1.idealLoad().cpu(), delta);
 
         // Almost no current traffic share: Ideal load is low but capped
-        var model2 = new ClusterModel(application.with(new Status(0.0001, 1.0)),
-                                      clusterSpec, cluster, clock, Duration.ofMinutes(10),
-                                      timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0, clock),
-                                      ClusterNodesTimeseries.empty());
-        assertEquals(0.131, model2.idealLoad().cpu(), delta);
+        var model2 = clusterModel(new Status(0.0001, 1.0),
+                                  t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
+        assertEquals(0.10672097759674132, model2.idealLoad().cpu(), delta);
     }
 
     @Test
     public void test_growth_headroom() {
-        ManualClock clock = new ManualClock();
+        // No current traffic: Ideal load is low but capped
+        var model1 = clusterModel(new Status(0.0, 0.0),
+                                  t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
+        assertEquals(0.2240325865580448, model1.idealLoad().cpu(), delta);
 
+        // Almost no current traffic: Ideal load is low but capped
+        var model2 = clusterModel(new Status(0.0001, 1.0),
+                                  t -> t == 0 ? 10000.0 : 0.0001, t -> 0.0);
+        assertEquals(0.0326530612244898, model2.idealLoad().cpu(), delta);
+    }
+
+    private ClusterModel clusterModelWithNoData() {
+        return clusterModel(new Status(0.0, 1.0), t -> 0.0, t -> 0.0);
+    }
+
+    private ClusterModel clusterModel(Status status, IntFunction<Double> queryRate, IntFunction<Double> writeRate) {
+        ManualClock clock = new ManualClock();
         Application application = Application.empty(ApplicationId.from("t1", "a1", "i1"));
         ClusterSpec clusterSpec = clusterSpec();
-        Cluster cluster = cluster(new NodeResources(1, 10, 100, 1));
+        Cluster cluster = cluster(resources());
         application = application.with(cluster);
 
-        // No current traffic: Ideal load is low but capped
-        var model1 = new ClusterModel(application,
-                                      clusterSpec, cluster, clock, Duration.ofMinutes(10),
-                                      timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0, clock),
-                                      ClusterNodesTimeseries.empty());
-        assertEquals(0.275, model1.idealLoad().cpu(), delta);
+        return new ClusterModel(application.with(status),
+                                clusterSpec, cluster, clock, Duration.ofMinutes(10),
+                                timeseries(cluster,100, queryRate, writeRate, clock),
+                                ClusterNodesTimeseries.empty());
+    }
 
-        // Almost no current traffic: Ideal load is low but capped
-        var model2 = new ClusterModel(application.with(new Status(0.0001, 1.0)),
-                                      clusterSpec, cluster, clock, Duration.ofMinutes(10),
-                                      timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0001, t -> 0.0, clock),
-                                      ClusterNodesTimeseries.empty());
-        assertEquals(0.040, model2.idealLoad().cpu(), delta);
+    private NodeResources resources() {
+        return new NodeResources(1, 10, 100, 1);
     }
 
     private ClusterSpec clusterSpec() {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
index db4fe917b53..c0203f5f202 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
@@ -33,6 +33,7 @@ public class Loader {
      * @param count the number of measurements
      */
     public Duration addCpuMeasurements(double value, int count) {
+        var idealLoad = fixture.clusterModel().idealLoad(); // TODO: Use this
         NodeList nodes = fixture.nodes();
         float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
         Instant initialTime = fixture.tester().clock().instant();
@@ -88,6 +89,7 @@ public class Loader {
      * wanting to see the ideal load with one node missing.)
      */
     public void addMemMeasurements(double value, int count) {
+        var idealLoad = fixture.clusterModel().idealLoad(); // TODO: Use this
         NodeList nodes = fixture.nodes();
         float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
         for (int i = 0; i < count; i++) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
index 34219a15caa..8c9c8939616 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
@@ -546,14 +546,14 @@ public class ProvisioningTest {
         tester.activate(app1, cluster1, Capacity.from(resources(6, 3, 8, 25,  10),
                                                       resources(9, 3, 12, 35, 15)));
         tester.assertNodes("Groups changed",
-                           9, 3, 8, 35, 15,
+                           9, 3, 8, 30, 13,
                            app1, cluster1);
 
         // Stop specifying node resources
         tester.activate(app1, cluster1, Capacity.from(new ClusterResources(6, 3, NodeResources.unspecified()),
                                                       new ClusterResources(9, 3, NodeResources.unspecified())));
         tester.assertNodes("No change",
-                           9, 3, 8, 35, 15,
+                           9, 3, 8, 30, 13,
                            app1, cluster1);
     }
 
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java
index 2f0caf8092f..d703ecf44e8 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java
@@ -37,24 +37,23 @@ public class VirtualNodeProvisioningCompleteHostCalculatorTest {
         ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.content, new ClusterSpec.Id("cluster1")).vespaVersion("7").build();
 
         var initialResources = new NodeResources(20, 16, 50, 1);
-        tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, initialResources),
-                                                      new ClusterResources(2, 1, initialResources)));
+        tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, initialResources)));
         tester.assertNodes("Initial allocation",
                            2, 1, 20, 16, 50, 1.0,
                            app1, cluster1);
 
         var newMinResources = new NodeResources( 5,  4, 11, 1);
         var newMaxResources = new NodeResources(20, 10, 30, 1);
+
         tester.activate(app1, cluster1, Capacity.from(new ClusterResources(7, 1, newMinResources),
                                                       new ClusterResources(7, 1, newMaxResources)));
-        tester.assertNodes("New allocation preserves total resources",
-                           7, 1, 7, 4.6, 14.3, 1.0,
+        tester.assertNodes("New allocation preserves (redundancy adjusted) total resources",
+                           7, 1, 5, 4.0, 11, 1.0,
                            app1, cluster1);
-
         tester.activate(app1, cluster1, Capacity.from(new ClusterResources(7, 1, newMinResources),
                                                       new ClusterResources(7, 1, newMaxResources)));
         tester.assertNodes("Redeploying the same ranges does not cause changes",
-                           7, 1, 7, 4.6, 14.3, 1.0,
+                           7, 1, 5, 4.0, 11, 1.0,
                            app1, cluster1);
     }
 
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java
index a1c55833862..7728e0ac9c8 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java
@@ -522,14 +522,14 @@ public class VirtualNodeProvisioningTest {
         var newMaxResources = new NodeResources(20, 10, 30, 1);
         tester.activate(app1, cluster1, Capacity.from(new ClusterResources(7, 1, newMinResources),
                                                       new ClusterResources(7, 1, newMaxResources)));
-        tester.assertNodes("New allocation preserves total resources",
-                           7, 1, 7, 6.7, 14.3, 1.0,
+        tester.assertNodes("New allocation preserves total (redundancy adjusted) resources",
+                           7, 1, 5, 6.0, 11, 1.0,
                            app1, cluster1);
 
         tester.activate(app1, cluster1, Capacity.from(new ClusterResources(7, 1, newMinResources),
                                                       new ClusterResources(7, 1, newMaxResources)));
         tester.assertNodes("Redeploying does not cause changes",
-                           7, 1, 7, 6.7, 14.3, 1.0,
+                           7, 1, 5, 6.0, 11, 1.0,
                            app1, cluster1);
     }
 
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
index 63a604bf4eb..40719153b9e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
@@ -71,7 +71,7 @@
       },
       "utilization" : {
           "cpu" : 0.0,
-          "idealCpu": 0.275,
+          "idealCpu": 0.1375,
           "currentCpu": 0.0,
           "memory" : 0.0,
           "idealMemory": 0.65,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
index eddf9b957a7..41aa4257c00 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
@@ -45,13 +45,13 @@
       },
       "utilization" : {
         "cpu" : 0.0,
-        "idealCpu": 0.2664285714285714,
+        "idealCpu": 0.1394913986537023,
         "currentCpu": 0.0,
         "memory" : 0.0,
-        "idealMemory": 0.65,
+        "idealMemory": 0.325,
         "currentMemory": 0.0,
         "disk" : 0.0,
-        "idealDisk": 0.6,
+        "idealDisk": 0.3,
         "currentDisk": 0.0
       },
       "scalingEvents" : [
author	Valerij Fredriksen <freva@users.noreply.github.com>	2022-08-03 17:35:11 +0200
committer	GitHub <noreply@github.com>	2022-08-03 17:35:11 +0200
commit	cc0e0a6919a4fccd7ef6d6016ba186136d49c956 (patch)
tree	af409afdb07d6fe2290124ae7b57ee1dba767d9b
parent	66df56662aaa775732c5b2f23c49ffaed668a276 (diff)
parent	38f54c8d1ae746377ce2260c39a9cce377148e84 (diff)