Don't scale down if we are likely to scale back up

When we decide to scale number of nodes down we'll see a lower ideal load because we need to be able to handle one node going down. This may lead us to be closer to ideal (at current peak load) than otherwise anticipated, such that we are quite likely to soon scale back up. This checks for that and avoids scaling down dimensions where this is the case.
author: Jon Bratseth <bratseth@vespa.ai> 2023-07-20 14:50:22 +0200
committer: Jon Bratseth <bratseth@vespa.ai> 2023-07-20 14:50:22 +0200
commit: 97bd65b51e942fb81eeb43b14b03cad8d2474c6d (patch)
tree: 3dee5ca95c3ac4380e716ba0959e09a97493a820
parent: 06b3744b44d2a2d4fbe18f9121af2a0c57fd9683 (diff)
9 files changed, 114 insertions, 37 deletions
diff --git a/config-application-package/src/test/java/com/yahoo/config/application/XmlPreprocessorTest.java b/config-application-package/src/test/java/com/yahoo/config/application/XmlPreprocessorTest.java
index bbccc8343a1..37a0cceda22 100644
--- a/config-application-package/src/test/java/com/yahoo/config/application/XmlPreprocessorTest.java
+++ b/config-application-package/src/test/java/com/yahoo/config/application/XmlPreprocessorTest.java
@@ -10,7 +10,6 @@ import org.w3c.dom.Document;
 
 import java.io.File;
 import java.io.StringReader;
-import java.util.Set;
 
 /**
  * @author hmusum
diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
index 8a95edd0467..7fd16826667 100644
--- a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
+++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
@@ -188,10 +188,10 @@ public class ConvertParsedFields {
         for (var dictOp : parsed.getDictionaryOptions()) {
             var dictionary = field.getOrSetDictionary();
             switch (dictOp) {
-            case HASH:    dictionary.updateType(Dictionary.Type.HASH); break;
-            case BTREE:   dictionary.updateType(Dictionary.Type.BTREE); break;
-            case CASED:   dictionary.updateMatch(Case.CASED); break;
-            case UNCASED: dictionary.updateMatch(Case.UNCASED); break;
+                case HASH -> dictionary.updateType(Dictionary.Type.HASH);
+                case BTREE -> dictionary.updateType(Dictionary.Type.BTREE);
+                case CASED -> dictionary.updateMatch(Case.CASED);
+                case UNCASED -> dictionary.updateMatch(Case.UNCASED);
             }
         }
         for (var index : parsed.getIndexes()) {
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java b/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java
index 735f4afd974..58b14b3b38a 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java
@@ -103,7 +103,11 @@ public final class Capacity {
 
     /** Create a non-required, failable capacity request */
     public static Capacity from(ClusterResources min, ClusterResources max) {
-        return from(min, max, IntRange.empty(), false, true, Optional.empty(), ClusterInfo.empty());
+        return from(min, max, IntRange.empty());
+    }
+
+    public static Capacity from(ClusterResources min, ClusterResources max, IntRange groupSize) {
+        return from(min, max, groupSize, false, true, Optional.empty(), ClusterInfo.empty());
     }
 
     public static Capacity from(ClusterResources resources, boolean required, boolean canFail) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index a7d5cc50828..795cbd59c4b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -2,7 +2,6 @@
 package com.yahoo.vespa.hosted.provision.autoscale;
 
 import com.yahoo.config.provision.ClusterResources;
-import com.yahoo.vespa.hosted.provision.Node;
 import com.yahoo.vespa.hosted.provision.NodeList;
 import com.yahoo.vespa.hosted.provision.NodeRepository;
 import com.yahoo.vespa.hosted.provision.applications.Application;
@@ -10,7 +9,6 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster;
 import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling.Status;
 
 import java.time.Duration;
-import java.time.Instant;
 import java.util.Optional;
 
 /**
@@ -23,7 +21,9 @@ public class Autoscaler {
     /** What cost difference is worth a reallocation? */
     private static final double costDifferenceWorthReallocation = 0.1;
     /** What resource difference is worth a reallocation? */
-    private static final double resourceDifferenceWorthReallocation = 0.03;
+    private static final double resourceIncreaseWorthReallocation = 0.03;
+    /** The load increase headroom (as a fraction) we should have before needing to scale up, to decide to scale down */
+    private static final double headroomRequiredToScaleDown = 0.1;
 
     private final NodeRepository nodeRepository;
     private final AllocationOptimizer allocationOptimizer;
@@ -70,22 +70,53 @@ public class Autoscaler {
         if ( ! clusterModel.isStable(nodeRepository))
             return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", clusterModel);
 
-        var currentAllocation = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository);
-        Optional<AllocatableClusterResources> bestAllocation =
-                allocationOptimizer.findBestAllocation(clusterModel.loadAdjustment(), currentAllocation, clusterModel, limits);
-        if (bestAllocation.isEmpty())
+        var current = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository);
+        var loadAdjustment = clusterModel.loadAdjustment();
+
+        // Ensure we only scale down if we'll have enough headroom to not scale up again given a small load increase
+        var target = allocationOptimizer.findBestAllocation(loadAdjustment, current, clusterModel, limits);
+        var headroomAdjustedLoadAdjustment = adjustForHeadroom(loadAdjustment, clusterModel, target);
+        if ( ! headroomAdjustedLoadAdjustment.equals(loadAdjustment)) {
+            loadAdjustment = headroomAdjustedLoadAdjustment;
+            target = allocationOptimizer.findBestAllocation(loadAdjustment, current, clusterModel, limits);
+        }
+
+        if (target.isEmpty())
             return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", clusterModel);
 
-        if (! worthRescaling(currentAllocation.realResources(), bestAllocation.get().realResources())) {
-            if (bestAllocation.get().fulfilment() < 0.9999999)
+        if (! worthRescaling(current.realResources(), target.get().realResources())) {
+            if (target.get().fulfilment() < 0.9999999)
                 return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", clusterModel);
             else if ( ! clusterModel.safeToScaleDown() && clusterModel.idealLoad().any(v -> v < 1.0))
                 return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", clusterModel);
             else
-                return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled (within limits)", clusterModel);
+                return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled (within configured limits)", clusterModel);
         }
 
-        return Autoscaling.scaleTo(bestAllocation.get().advertisedResources(), clusterModel);
+        return Autoscaling.scaleTo(target.get().advertisedResources(), clusterModel);
+    }
+
+    /**
+     * When scaling down we may end up with resources that are just barely below the new ideal with the new number
+     * of nodes, as fewer nodes leads to a lower ideal load (due to redundancy).
+     * If that headroom is too small, then do not scale down as it will likely lead to scaling back up again soon.
+     */
+    private Load adjustForHeadroom(Load loadAdjustment, ClusterModel clusterModel,
+                                   Optional<AllocatableClusterResources> target) {
+        if (target.isEmpty()) return loadAdjustment;
+
+        // If we change to this target, what would our current peak be compared to the ideal
+        var relativeLoadWithTarget =
+                loadAdjustment // redundancy aware target relative to current load
+                .multiply(clusterModel.loadWith(target.get().nodes(), target.get().groups())) // redundancy aware adjustment with target
+                .divide(clusterModel.redundancyAdjustment()); // correct for double redundancy adjustment
+        if (loadAdjustment.cpu() < 1 && (1.0 - relativeLoadWithTarget.cpu()) < headroomRequiredToScaleDown)
+            loadAdjustment = loadAdjustment.withCpu(1.0);
+        if (loadAdjustment.memory() < 1 && (1.0 - relativeLoadWithTarget.memory()) < headroomRequiredToScaleDown)
+            loadAdjustment = loadAdjustment.withMemory(1.0);
+        if (loadAdjustment.disk() < 1 && (1.0 - relativeLoadWithTarget.disk()) < headroomRequiredToScaleDown)
+            loadAdjustment = loadAdjustment.withDisk(1.0);
+        return loadAdjustment;
     }
 
     /** Returns true if it is worthwhile to make the given resource change, false if it is too insignificant */
@@ -95,12 +126,14 @@ public class Autoscaler {
         if (meaningfulIncrease(from.totalResources().memoryGb(), to.totalResources().memoryGb())) return true;
         if (meaningfulIncrease(from.totalResources().diskGb(), to.totalResources().diskGb())) return true;
 
-        // Otherwise, only *decrease* if it reduces cost meaningfully
+        // Otherwise, only *decrease* if
+        // - cost is reduced meaningfully
+        // - the new resources won't be so much smaller that a small fluctuation in load will cause an increase
         return ! similar(from.cost(), to.cost(), costDifferenceWorthReallocation);
     }
 
     public static boolean meaningfulIncrease(double from, double to) {
-        return from < to && ! similar(from, to, resourceDifferenceWorthReallocation);
+        return from < to && ! similar(from, to, resourceIncreaseWorthReallocation);
     }
 
     private static boolean similar(double r1, double r2, double threshold) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 289025f9d21..a5490996a2c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -175,9 +175,9 @@ public class ClusterModel {
      * Returns the relative load adjustment accounting for redundancy given these nodes+groups
      * relative to node nodes+groups in this.
      */
-    public Load loadWith(int trueNodes, int trueGroups) {
-        int nodes = nodesAdjustedForRedundancy(trueNodes, trueGroups);
-        int groups = groupsAdjustedForRedundancy(trueNodes, trueGroups);
+    public Load loadWith(int givenNodes, int givenGroups) {
+        int nodes = nodesAdjustedForRedundancy(givenNodes, givenGroups);
+        int groups = groupsAdjustedForRedundancy(givenNodes, givenGroups);
         if (clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content
             int groupSize = nodes / groups;
 
@@ -272,7 +272,7 @@ public class ClusterModel {
 
     /** The number of nodes this cluster has, or will have if not deployed yet. */
     // TODO: Make this the deployed, not current count
-    private int nodeCount() {
+    public int nodeCount() {
         if ( ! nodes.isEmpty()) return (int)nodes.not().retired().stream().count();
         return cluster.minResources().nodes();
     }
@@ -289,12 +289,12 @@ public class ClusterModel {
         return (int)Math.ceil((double)nodeCount() / groupCount());
     }
 
-    private int nodesAdjustedForRedundancy(int nodes, int groups) {
+    private static int nodesAdjustedForRedundancy(int nodes, int groups) {
         int groupSize = (int)Math.ceil((double)nodes / groups);
         return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
     }
 
-    private int groupsAdjustedForRedundancy(int nodes, int groups) {
+    private static int groupsAdjustedForRedundancy(int nodes, int groups) {
         return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
     }
 
@@ -340,8 +340,7 @@ public class ClusterModel {
         /** Ideal cpu load must take the application traffic fraction into account. */
         double idealLoad() {
             double queryCpuFraction = queryFraction();
-
-            // Assumptions: 1) Write load is not organic so we should not grow to handle more.
+            // Assumptions: 1) Write load is not organic so we should not increase to handle potential future growth.
             //                 (TODO: But allow applications to set their target write rate and size for that)
             //              2) Write load does not change in BCP scenarios.
             return queryCpuFraction * 1/growthRateHeadroom() * 1/trafficShiftHeadroom() * idealQueryCpuLoad +
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index 47206265c68..54178865693 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -12,6 +12,7 @@ import com.yahoo.config.provision.NodeResources.DiskSpeed;
 import com.yahoo.config.provision.NodeResources.StorageType;
 import com.yahoo.config.provision.RegionName;
 import com.yahoo.config.provision.Zone;
+import com.yahoo.vespa.hosted.provision.Node;
 import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies;
 import com.yahoo.vespa.hosted.provision.provisioning.DynamicProvisioningTester;
 import org.junit.Test;
@@ -87,7 +88,7 @@ public class AutoscalingTest {
         fixture.tester().clock().advance(Duration.ofDays(7));
         fixture.loader().applyCpuLoad(0.1f, 10);
         fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly",
-                                         6, 1, 1.1, 8.8, 346.8,
+                                         6, 1, 1.1, 9.8, 390.2,
                                          fixture.autoscale());
     }
 
@@ -585,7 +586,7 @@ public class AutoscalingTest {
     @Test
     public void test_autoscaling_groupsize_by_cpu_read_dominated() {
         var min = new ClusterResources( 3, 1, new NodeResources(1, 1, 1, 1));
-        var now = new ClusterResources(6, 2, new NodeResources(3, 100, 100, 1));
+        var now = new ClusterResources( 6, 2, new NodeResources(3, 100, 100, 1));
         var max = new ClusterResources(21, 7, new NodeResources(100, 1000, 1000, 1));
         var fixture = DynamicProvisioningTester.fixture()
                                                .awsProdSetup(true)
@@ -665,7 +666,7 @@ public class AutoscalingTest {
         fixture.tester().clock().advance(Duration.ofHours(12 * 3 + 1));
         fixture.loader().applyCpuLoad(0.02, 5);
         fixture.tester().assertResources("Scaling down since enough time has passed",
-                                         3, 1, 1.0, 23.6, 101.4,
+                                         3, 1, 1.0, 29.5, 126.7,
                                          fixture.autoscale());
     }
 
@@ -798,7 +799,7 @@ public class AutoscalingTest {
         fixture.tester.clock().advance(timeAdded.negated());
         fixture.loader().addCpuMeasurements(0.4, 200);
         fixture.tester().assertResources("Write only -> smallest possible",
-                                         4, 1, 1.1,  16.1, 67.6,
+                                         4, 1, 1.1,  20.1, 84.5,
                                          fixture.autoscale());
     }
 
@@ -881,6 +882,23 @@ public class AutoscalingTest {
     }
 
     @Test
+    public void test_scaling_down_leaves_too_little_headroom() {
+        var r = new NodeResources(16, 32, 100, 1, NodeResources.DiskSpeed.any);
+        var min = new ClusterResources( 3, 3, r);
+        var now = new ClusterResources( 4, 4, r);
+        var max = new ClusterResources( 5, 5, r);
+        var fixture = DynamicProvisioningTester.fixture()
+                                               .awsProdSetup(false)
+                                               .capacity(Capacity.from(min, max, IntRange.from(1)))
+                                               .clusterType(ClusterSpec.Type.content)
+                                               .initialResources(Optional.of(now))
+                                               .build();
+        fixture.loader().applyCpuLoad(0.17, 10);
+        assertTrue("Not scaling down as that would leave just 4.5% headroom before needing to scale up again",
+                   fixture.autoscale().resources().isEmpty());
+    }
+
+    @Test
     public void test_changing_exclusivity() {
         var min = new ClusterResources( 2, 1, new NodeResources(  3,    4,  100, 1));
         var max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1));
@@ -923,7 +941,7 @@ public class AutoscalingTest {
         fixture.loader().applyLoad(new Load(0.06, 0.52, 0.27), 100);
         var autoscaling = fixture.autoscale();
         fixture.tester().assertResources("Scaling down",
-                                         7, 1, 2, 14.5, 384.0,
+                                         7, 1, 2, 15.8, 384.0,
                                          autoscaling);
         fixture.deploy(Capacity.from(autoscaling.resources().get()));
         assertEquals("Initial nodes are kept", initialNodes, fixture.nodes().asList());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
index 637932681ee..379dbb27d87 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
@@ -85,7 +85,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
         fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
         fixture.loader().addCpuMeasurements(0.7f, 10);
         fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
-                                         3, 3, 10.5, 38.4, 168.9,
+                                         3, 3, 10.5, 43.2, 190.0,
                                          fixture.autoscale());
 
         // Higher query rate
@@ -93,7 +93,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
         fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
         fixture.loader().addCpuMeasurements(0.7f, 10);
         fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
-                                         3, 3, 20.9, 38.4, 168.9,
+                                         3, 3, 20.9, 43.2, 190.0,
                                          fixture.autoscale());
 
         // Higher headroom
@@ -101,7 +101,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
         fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
         fixture.loader().addCpuMeasurements(0.7f, 10);
         fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
-                                         3, 3, 12.4, 38.4, 168.9,
+                                         3, 3, 12.4, 43.2, 190.0,
                                          fixture.autoscale());
 
         // Higher per query cost
@@ -109,7 +109,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
         fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
         fixture.loader().addCpuMeasurements(0.7f, 10);
         fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
-                                         3, 3, 15.7, 38.4, 168.9,
+                                         3, 3, 15.7, 43.2, 190.0,
                                          fixture.autoscale());
     }
 
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
index b150b372fe8..33d3d3d50dc 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
@@ -49,6 +49,8 @@ public class Fixture {
     final Capacity capacity;
     final Loader loader;
 
+    Autoscaling lastAutoscaling = Autoscaling.empty();
+
     public Fixture(Fixture.Builder builder, Optional<ClusterResources> initialResources, int hostCount) {
         applicationId = builder.application;
         clusterSpec = builder.cluster;
@@ -105,7 +107,7 @@ public class Fixture {
 
     /** Autoscale within the given capacity. */
     public Autoscaling autoscale(Capacity capacity) {
-        return tester().autoscale(applicationId, clusterSpec, capacity);
+        return lastAutoscaling = tester().autoscale(applicationId, clusterSpec, capacity);
     }
 
     /** Compute an autoscaling suggestion for this. */
@@ -123,6 +125,17 @@ public class Fixture {
         tester().deploy(applicationId, clusterSpec, capacity);
     }
 
+    public void deployTarget() {
+        if (lastAutoscaling.isEmpty()) throw new IllegalStateException("Autoscaling is empty");
+        if (lastAutoscaling.resources().isEmpty()) throw new IllegalStateException("Autoscaling target is empty: " + lastAutoscaling);
+        try (var lock = tester().nodeRepository().applications().lock(applicationId)) {
+            var updated = tester().nodeRepository().applications().require(applicationId).with(cluster().withTarget(lastAutoscaling));
+            tester().nodeRepository().applications().put(updated, lock);
+        }
+        deploy(capacity);
+        deactivateRetired(capacity);
+    }
+
     public void deactivateRetired(Capacity capacity) {
         tester().deactivateRetired(applicationId, clusterSpec, capacity);
     }
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java
index 4799d3b5577..c982b195787 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java
@@ -186,6 +186,17 @@ public class DynamicProvisioningTester {
                         resources);
     }
 
+    public void assertResources(String message,
+                                int nodeCount, int groupCount,
+                                NodeResources expectedResources,
+                                Autoscaling autoscaling) {
+        assertTrue("Resources are present: " + message + " (" + autoscaling + ": " + autoscaling.status() + ")",
+                   autoscaling.resources().isPresent());
+        assertResources(message, nodeCount, groupCount,
+                        expectedResources.vcpu(), expectedResources.memoryGb(), expectedResources.diskGb(),
+                        autoscaling.resources().get());
+    }
+
     public ClusterResources assertResources(String message,
                                             int nodeCount, int groupCount,
                                             double approxCpu, double approxMemory, double approxDisk,
author	Jon Bratseth <bratseth@vespa.ai>	2023-07-20 14:50:22 +0200
committer	Jon Bratseth <bratseth@vespa.ai>	2023-07-20 14:50:22 +0200
commit	97bd65b51e942fb81eeb43b14b03cad8d2474c6d (patch)
tree	3dee5ca95c3ac4380e716ba0959e09a97493a820
parent	06b3744b44d2a2d4fbe18f9121af2a0c57fd9683 (diff)