Merge pull request #31444 from vespa-engine/bratseth/test-unfulfilable-memory

Test that we don't rescale when container memory is unfulfiled
author: Martin Polden <mpolden@mpolden.no> 2024-06-05 12:54:50 +0200
committer: GitHub <noreply@github.com> 2024-06-05 12:54:50 +0200
commit: 31b51c7a2057d8dce70c7185b84bc379be336c68 (patch)
tree: b8881435a7daa1d292a8be64cde16af68b92cecd
parent: 52e29fb822412399938ff1eb32770feabd8d20b3 (diff)
parent: 66ee600aa41ebd0a51a4db2d1ca17bfe343c9088 (diff)
6 files changed, 47 insertions, 27 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java
index a463fb9d0e6..47a4530b9d1 100644
--- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java
+++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java
@@ -311,10 +311,10 @@ public abstract class VespaBackend {
         if (query.getTrace().isTraceable(level + 1) && query.getTrace().getQuery()) {
             query.trace("Current state of query tree: "
                             + new TextualQueryRepresentation(query.getModel().getQueryTree().getRoot()),
-                    false, level+1);
+                    false, level + 1);
         }
         if (query.getTrace().isTraceable(level + 2) && query.getTrace().getQuery()) {
-            query.trace("YQL+ representation: " + query.yqlRepresentation(), level+2);
+            query.trace("YQL+ representation: " + query.yqlRepresentation(), level + 2);
         }
     }
 
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java
index 6d3de628601..7609851d7fe 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java
@@ -116,6 +116,10 @@ public class AllocatableResources {
      */
     public double fulfilment() { return fulfilment; }
 
+    public boolean notFulfiled() {
+        return fulfilment < 0.9999999;
+    }
+
     private static double fulfilment(ClusterResources realResources, ClusterResources idealResources) {
         double vcpuFulfilment     = Math.min(1, realResources.totalResources().vcpu()     / idealResources.totalResources().vcpu());
         double memoryGbFulfilment = Math.min(1, realResources.totalResources().memoryGb() / idealResources.totalResources().memoryGb());
@@ -128,7 +132,7 @@ public class AllocatableResources {
 
     public boolean preferableTo(AllocatableResources other, ClusterModel model) {
         // always fulfil as much as possible unless fulfilment is considered to be equal
-        if (!equal(this.fulfilment(), other.fulfilment()) && (other.fulfilment() < 1 || this.fulfilment() < 1))
+        if ((other.fulfilment() < 1 || this.fulfilment() < 1) && ! equal(this.fulfilment(), other.fulfilment()))
             return this.fulfilment() > other.fulfilment();
 
         return this.cost() * toHours(model.allocationDuration()) + this.costChangingFrom(model)
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index 29ab6d65b9f..d0ea406c91e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -69,11 +69,9 @@ public class Autoscaler {
             return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", model);
 
         var loadAdjustment = model.loadAdjustment();
-        if (enableDetailedLogging) {
+        if (enableDetailedLogging)
             log.info("Application: " + application.id().toShortString() + ", loadAdjustment: " + loadAdjustment.toString());
-        }
 
-        // Ensure we only scale down if we'll have enough headroom to not scale up again given a small load increase
         var target = allocationOptimizer.findBestAllocation(loadAdjustment, model, limits, enableDetailedLogging);
 
         if (target.isEmpty())
@@ -98,8 +96,8 @@ public class Autoscaler {
             return Autoscaling.dontScale(Status.unavailable, "Autoscaling is disabled in single node clusters", model);
 
         if (! worthRescaling(model.current().realResources(), target.realResources())) {
-            if (target.fulfilment() < 0.9999999)
-                return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", model);
+            if (target.notFulfiled())
+                return Autoscaling.dontScale(Status.insufficient, "Cluster cannot be scaled to achieve ideal load", model);
             else if ( ! model.safeToScaleDown() && model.idealLoad().any(v -> v < 1.0))
                 return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", model);
             else
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 504965f1992..10207ea87d5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -125,6 +125,21 @@ public class ClusterModel {
         this.at = clock.instant();
     }
 
+
+    /**
+     * The central decision made in autoscaling.
+     *
+     * @return the relative load adjustment that should be made to this cluster given available measurements.
+     *         For example, a load adjustment of 2 means we should allocate twice the amount of that resources.
+     */
+    public Load loadAdjustment() {
+        if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data
+        Load adjustment = peakLoad().divide(idealLoad());
+        if (! safeToScaleDown())
+            adjustment = adjustment.map(v -> v < 1 ? 1 : v);
+        return adjustment;
+    }
+
     public Application application() { return application; }
     public ClusterSpec clusterSpec() { return clusterSpec; }
     public CloudAccount cloudAccount() { return cluster.cloudAccount().orElse(CloudAccount.empty); }
@@ -133,11 +148,7 @@ public class ClusterModel {
     private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }
 
     /** Returns the instant this model was created. */
-    public Instant at() { return at;}
-
-    public boolean isEmpty() {
-        return nodeTimeseries().isEmpty();
-    }
+    public Instant at() { return at; }
 
     /** Returns the predicted duration of a rescaling of this cluster */
     public Duration scalingDuration() { return scalingDuration; }
@@ -148,9 +159,9 @@ public class ClusterModel {
      */
     public Duration allocationDuration() { return allocationDuration; }
 
-    public boolean isContent() {
-        return clusterSpec.type().isContent();
-    }
+    public boolean isEmpty() { return nodeTimeseries().isEmpty(); }
+
+    public boolean isContent() { return clusterSpec.type().isContent(); }
 
     /** Returns the predicted duration of data redistribution in this cluster. */
     public Duration redistributionDuration() {
@@ -177,15 +188,6 @@ public class ClusterModel {
         return nodeRepository.exclusivity().allocation(clusterSpec);
     }
 
-    /** Returns the relative load adjustment that should be made to this cluster given available measurements. */
-    public Load loadAdjustment() {
-        if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data
-        Load adjustment = peakLoad().divide(idealLoad());
-        if (! safeToScaleDown())
-            adjustment = adjustment.map(v -> v < 1 ? 1 : v);
-        return adjustment;
-    }
-
     public boolean isStable(NodeRepository nodeRepository) {
         // The cluster is processing recent changes
         if (nodes.stream().anyMatch(node -> node.status().wantToRetire() ||
@@ -218,7 +220,6 @@ public class ClusterModel {
                .divide(redundancyAdjustment());   // correct for double redundancy adjustment
     }
 
-
     /**
      * Returns the relative load adjustment accounting for redundancy given these nodes+groups
      * relative to node nodes+groups in this.
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
index 22c13795d18..e00bf17c89f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
@@ -114,7 +114,7 @@ public record Load(double cpu, double memory, double disk, double gpu, double gp
 
     @Override
     public String toString() {
-        return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk," + gpu + " gpu," + gpuMemory + " gpuMemory";
+        return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk, " + gpu + " gpu, " + gpuMemory + " gpuMemory";
     }
 
     public static Load zero() { return new Load(0, 0, 0, 0, 0); }
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index e0c8199a882..5b15327556f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -154,6 +154,23 @@ public class AutoscalingTest {
     }
 
     @Test
+    public void test_containers_wont_scale_up_on_memory() {
+        var min = new ClusterResources(2, 1, new NodeResources(4, 8, 50, 0.1));
+        var now = new ClusterResources(4, 1, new NodeResources(4, 8, 50, 0.1));
+        var max = new ClusterResources(8, 1, new NodeResources(4, 8, 50, 0.1));
+        var fixture = DynamicProvisioningTester.fixture()
+                                               .awsProdSetup(false)
+                                               .clusterType(ClusterSpec.Type.container)
+                                               .initialResources(Optional.of(now))
+                                               .capacity(Capacity.from(min, max))
+                                               .build();
+        fixture.tester().setScalingDuration(fixture.applicationId(), fixture.clusterSpec.id(), Duration.ofMinutes(5));
+
+        fixture.loader().applyLoad(new Load(0.1875, 1.0, 0.95, 0, 0), 50);
+        assertEquals(Autoscaling.Status.insufficient, fixture.autoscale().status());
+    }
+
+    @Test
     public void initial_deployment_with_host_sharing_flag() {
         var min = new ClusterResources(7, 1, new NodeResources(2.0, 10.0, 384.0, 0.1));
         var max = new ClusterResources(7, 1, new NodeResources(2.4, 32.0, 768.0, 0.1));
author	Martin Polden <mpolden@mpolden.no>	2024-06-05 12:54:50 +0200
committer	GitHub <noreply@github.com>	2024-06-05 12:54:50 +0200
commit	31b51c7a2057d8dce70c7185b84bc379be336c68 (patch)
tree	b8881435a7daa1d292a8be64cde16af68b92cecd
parent	52e29fb822412399938ff1eb32770feabd8d20b3 (diff)
parent	66ee600aa41ebd0a51a4db2d1ca17bfe343c9088 (diff)