aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2024-06-05 12:54:50 +0200
committerGitHub <noreply@github.com>2024-06-05 12:54:50 +0200
commit31b51c7a2057d8dce70c7185b84bc379be336c68 (patch)
treeb8881435a7daa1d292a8be64cde16af68b92cecd
parent52e29fb822412399938ff1eb32770feabd8d20b3 (diff)
parent66ee600aa41ebd0a51a4db2d1ca17bfe343c9088 (diff)
Merge pull request #31444 from vespa-engine/bratseth/test-unfulfilable-memory
Test that we don't rescale when container memory is unfulfiled
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java37
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java17
6 files changed, 47 insertions, 27 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java
index a463fb9d0e6..47a4530b9d1 100644
--- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java
+++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java
@@ -311,10 +311,10 @@ public abstract class VespaBackend {
if (query.getTrace().isTraceable(level + 1) && query.getTrace().getQuery()) {
query.trace("Current state of query tree: "
+ new TextualQueryRepresentation(query.getModel().getQueryTree().getRoot()),
- false, level+1);
+ false, level + 1);
}
if (query.getTrace().isTraceable(level + 2) && query.getTrace().getQuery()) {
- query.trace("YQL+ representation: " + query.yqlRepresentation(), level+2);
+ query.trace("YQL+ representation: " + query.yqlRepresentation(), level + 2);
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java
index 6d3de628601..7609851d7fe 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java
@@ -116,6 +116,10 @@ public class AllocatableResources {
*/
public double fulfilment() { return fulfilment; }
+ public boolean notFulfiled() {
+ return fulfilment < 0.9999999;
+ }
+
private static double fulfilment(ClusterResources realResources, ClusterResources idealResources) {
double vcpuFulfilment = Math.min(1, realResources.totalResources().vcpu() / idealResources.totalResources().vcpu());
double memoryGbFulfilment = Math.min(1, realResources.totalResources().memoryGb() / idealResources.totalResources().memoryGb());
@@ -128,7 +132,7 @@ public class AllocatableResources {
public boolean preferableTo(AllocatableResources other, ClusterModel model) {
// always fulfil as much as possible unless fulfilment is considered to be equal
- if (!equal(this.fulfilment(), other.fulfilment()) && (other.fulfilment() < 1 || this.fulfilment() < 1))
+ if ((other.fulfilment() < 1 || this.fulfilment() < 1) && ! equal(this.fulfilment(), other.fulfilment()))
return this.fulfilment() > other.fulfilment();
return this.cost() * toHours(model.allocationDuration()) + this.costChangingFrom(model)
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index 29ab6d65b9f..d0ea406c91e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -69,11 +69,9 @@ public class Autoscaler {
return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", model);
var loadAdjustment = model.loadAdjustment();
- if (enableDetailedLogging) {
+ if (enableDetailedLogging)
log.info("Application: " + application.id().toShortString() + ", loadAdjustment: " + loadAdjustment.toString());
- }
- // Ensure we only scale down if we'll have enough headroom to not scale up again given a small load increase
var target = allocationOptimizer.findBestAllocation(loadAdjustment, model, limits, enableDetailedLogging);
if (target.isEmpty())
@@ -98,8 +96,8 @@ public class Autoscaler {
return Autoscaling.dontScale(Status.unavailable, "Autoscaling is disabled in single node clusters", model);
if (! worthRescaling(model.current().realResources(), target.realResources())) {
- if (target.fulfilment() < 0.9999999)
- return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", model);
+ if (target.notFulfiled())
+ return Autoscaling.dontScale(Status.insufficient, "Cluster cannot be scaled to achieve ideal load", model);
else if ( ! model.safeToScaleDown() && model.idealLoad().any(v -> v < 1.0))
return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", model);
else
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 504965f1992..10207ea87d5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -125,6 +125,21 @@ public class ClusterModel {
this.at = clock.instant();
}
+
+ /**
+ * The central decision made in autoscaling.
+ *
+ * @return the relative load adjustment that should be made to this cluster given available measurements.
+ * For example, a load adjustment of 2 means we should allocate twice the amount of that resources.
+ */
+ public Load loadAdjustment() {
+ if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data
+ Load adjustment = peakLoad().divide(idealLoad());
+ if (! safeToScaleDown())
+ adjustment = adjustment.map(v -> v < 1 ? 1 : v);
+ return adjustment;
+ }
+
public Application application() { return application; }
public ClusterSpec clusterSpec() { return clusterSpec; }
public CloudAccount cloudAccount() { return cluster.cloudAccount().orElse(CloudAccount.empty); }
@@ -133,11 +148,7 @@ public class ClusterModel {
private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }
/** Returns the instant this model was created. */
- public Instant at() { return at;}
-
- public boolean isEmpty() {
- return nodeTimeseries().isEmpty();
- }
+ public Instant at() { return at; }
/** Returns the predicted duration of a rescaling of this cluster */
public Duration scalingDuration() { return scalingDuration; }
@@ -148,9 +159,9 @@ public class ClusterModel {
*/
public Duration allocationDuration() { return allocationDuration; }
- public boolean isContent() {
- return clusterSpec.type().isContent();
- }
+ public boolean isEmpty() { return nodeTimeseries().isEmpty(); }
+
+ public boolean isContent() { return clusterSpec.type().isContent(); }
/** Returns the predicted duration of data redistribution in this cluster. */
public Duration redistributionDuration() {
@@ -177,15 +188,6 @@ public class ClusterModel {
return nodeRepository.exclusivity().allocation(clusterSpec);
}
- /** Returns the relative load adjustment that should be made to this cluster given available measurements. */
- public Load loadAdjustment() {
- if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data
- Load adjustment = peakLoad().divide(idealLoad());
- if (! safeToScaleDown())
- adjustment = adjustment.map(v -> v < 1 ? 1 : v);
- return adjustment;
- }
-
public boolean isStable(NodeRepository nodeRepository) {
// The cluster is processing recent changes
if (nodes.stream().anyMatch(node -> node.status().wantToRetire() ||
@@ -218,7 +220,6 @@ public class ClusterModel {
.divide(redundancyAdjustment()); // correct for double redundancy adjustment
}
-
/**
* Returns the relative load adjustment accounting for redundancy given these nodes+groups
* relative to node nodes+groups in this.
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
index 22c13795d18..e00bf17c89f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
@@ -114,7 +114,7 @@ public record Load(double cpu, double memory, double disk, double gpu, double gp
@Override
public String toString() {
- return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk," + gpu + " gpu," + gpuMemory + " gpuMemory";
+ return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk, " + gpu + " gpu, " + gpuMemory + " gpuMemory";
}
public static Load zero() { return new Load(0, 0, 0, 0, 0); }
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index e0c8199a882..5b15327556f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -154,6 +154,23 @@ public class AutoscalingTest {
}
@Test
+ public void test_containers_wont_scale_up_on_memory() {
+ var min = new ClusterResources(2, 1, new NodeResources(4, 8, 50, 0.1));
+ var now = new ClusterResources(4, 1, new NodeResources(4, 8, 50, 0.1));
+ var max = new ClusterResources(8, 1, new NodeResources(4, 8, 50, 0.1));
+ var fixture = DynamicProvisioningTester.fixture()
+ .awsProdSetup(false)
+ .clusterType(ClusterSpec.Type.container)
+ .initialResources(Optional.of(now))
+ .capacity(Capacity.from(min, max))
+ .build();
+ fixture.tester().setScalingDuration(fixture.applicationId(), fixture.clusterSpec.id(), Duration.ofMinutes(5));
+
+ fixture.loader().applyLoad(new Load(0.1875, 1.0, 0.95, 0, 0), 50);
+ assertEquals(Autoscaling.Status.insufficient, fixture.autoscale().status());
+ }
+
+ @Test
public void initial_deployment_with_host_sharing_flag() {
var min = new ClusterResources(7, 1, new NodeResources(2.0, 10.0, 384.0, 0.1));
var max = new ClusterResources(7, 1, new NodeResources(2.4, 32.0, 768.0, 0.1));