diff options
author | Jon Bratseth <bratseth@vespa.ai> | 2024-06-05 12:31:46 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@vespa.ai> | 2024-06-05 12:31:46 +0200 |
commit | 2b783fb86dacca43c7a72b0a940b667d0183b54e (patch) | |
tree | 4d916892a70038dffe0342100fa1950cce5cddc3 | |
parent | 1165e5c5515fbb04489daa2e6f37b71a39c22255 (diff) |
Test that we don't rescale when container memory is unfulfiled
7 files changed, 49 insertions, 27 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java index a463fb9d0e6..47a4530b9d1 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackend.java @@ -311,10 +311,10 @@ public abstract class VespaBackend { if (query.getTrace().isTraceable(level + 1) && query.getTrace().getQuery()) { query.trace("Current state of query tree: " + new TextualQueryRepresentation(query.getModel().getQueryTree().getRoot()), - false, level+1); + false, level + 1); } if (query.getTrace().isTraceable(level + 2) && query.getTrace().getQuery()) { - query.trace("YQL+ representation: " + query.yqlRepresentation(), level+2); + query.trace("YQL+ representation: " + query.yqlRepresentation(), level + 2); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java index 6d3de628601..7609851d7fe 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableResources.java @@ -116,6 +116,10 @@ public class AllocatableResources { */ public double fulfilment() { return fulfilment; } + public boolean notFulfiled() { + return fulfilment < 0.9999999; + } + private static double fulfilment(ClusterResources realResources, ClusterResources idealResources) { double vcpuFulfilment = Math.min(1, realResources.totalResources().vcpu() / idealResources.totalResources().vcpu()); double memoryGbFulfilment = Math.min(1, realResources.totalResources().memoryGb() / idealResources.totalResources().memoryGb()); @@ -128,7 +132,7 @@ public class AllocatableResources { public boolean preferableTo(AllocatableResources other, ClusterModel model) { // always fulfil as much as possible unless fulfilment is considered to be equal - if (!equal(this.fulfilment(), other.fulfilment()) && (other.fulfilment() < 1 || this.fulfilment() < 1)) + if ((other.fulfilment() < 1 || this.fulfilment() < 1) && ! equal(this.fulfilment(), other.fulfilment())) return this.fulfilment() > other.fulfilment(); return this.cost() * toHours(model.allocationDuration()) + this.costChangingFrom(model) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 29ab6d65b9f..d0ea406c91e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -69,11 +69,9 @@ public class Autoscaler { return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", model); var loadAdjustment = model.loadAdjustment(); - if (enableDetailedLogging) { + if (enableDetailedLogging) log.info("Application: " + application.id().toShortString() + ", loadAdjustment: " + loadAdjustment.toString()); - } - // Ensure we only scale down if we'll have enough headroom to not scale up again given a small load increase var target = allocationOptimizer.findBestAllocation(loadAdjustment, model, limits, enableDetailedLogging); if (target.isEmpty()) @@ -98,8 +96,8 @@ public class Autoscaler { return Autoscaling.dontScale(Status.unavailable, "Autoscaling is disabled in single node clusters", model); if (! worthRescaling(model.current().realResources(), target.realResources())) { - if (target.fulfilment() < 0.9999999) - return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", model); + if (target.notFulfiled()) + return Autoscaling.dontScale(Status.insufficient, "Cluster cannot be scaled to achieve ideal load", model); else if ( ! model.safeToScaleDown() && model.idealLoad().any(v -> v < 1.0)) return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", model); else diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 504965f1992..10207ea87d5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -125,6 +125,21 @@ public class ClusterModel { this.at = clock.instant(); } + + /** + * The central decision made in autoscaling. + * + * @return the relative load adjustment that should be made to this cluster given available measurements. + * For example, a load adjustment of 2 means we should allocate twice the amount of that resources. + */ + public Load loadAdjustment() { + if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data + Load adjustment = peakLoad().divide(idealLoad()); + if (! safeToScaleDown()) + adjustment = adjustment.map(v -> v < 1 ? 1 : v); + return adjustment; + } + public Application application() { return application; } public ClusterSpec clusterSpec() { return clusterSpec; } public CloudAccount cloudAccount() { return cluster.cloudAccount().orElse(CloudAccount.empty); } @@ -133,11 +148,7 @@ public class ClusterModel { private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; } /** Returns the instant this model was created. */ - public Instant at() { return at;} - - public boolean isEmpty() { - return nodeTimeseries().isEmpty(); - } + public Instant at() { return at; } /** Returns the predicted duration of a rescaling of this cluster */ public Duration scalingDuration() { return scalingDuration; } @@ -148,9 +159,9 @@ public class ClusterModel { */ public Duration allocationDuration() { return allocationDuration; } - public boolean isContent() { - return clusterSpec.type().isContent(); - } + public boolean isEmpty() { return nodeTimeseries().isEmpty(); } + + public boolean isContent() { return clusterSpec.type().isContent(); } /** Returns the predicted duration of data redistribution in this cluster. */ public Duration redistributionDuration() { @@ -177,15 +188,6 @@ public class ClusterModel { return nodeRepository.exclusivity().allocation(clusterSpec); } - /** Returns the relative load adjustment that should be made to this cluster given available measurements. */ - public Load loadAdjustment() { - if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data - Load adjustment = peakLoad().divide(idealLoad()); - if (! safeToScaleDown()) - adjustment = adjustment.map(v -> v < 1 ? 1 : v); - return adjustment; - } - public boolean isStable(NodeRepository nodeRepository) { // The cluster is processing recent changes if (nodes.stream().anyMatch(node -> node.status().wantToRetire() || @@ -218,7 +220,6 @@ public class ClusterModel { .divide(redundancyAdjustment()); // correct for double redundancy adjustment } - /** * Returns the relative load adjustment accounting for redundancy given these nodes+groups * relative to node nodes+groups in this. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java index 22c13795d18..e00bf17c89f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java @@ -114,7 +114,7 @@ public record Load(double cpu, double memory, double disk, double gpu, double gp @Override public String toString() { - return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk," + gpu + " gpu," + gpuMemory + " gpuMemory"; + return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk, " + gpu + " gpu, " + gpuMemory + " gpuMemory"; } public static Load zero() { return new Load(0, 0, 0, 0, 0); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index e0c8199a882..5b15327556f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -154,6 +154,23 @@ public class AutoscalingTest { } @Test + public void test_containers_wont_scale_up_on_memory() { + var min = new ClusterResources(2, 1, new NodeResources(4, 8, 50, 0.1)); + var now = new ClusterResources(4, 1, new NodeResources(4, 8, 50, 0.1)); + var max = new ClusterResources(8, 1, new NodeResources(4, 8, 50, 0.1)); + var fixture = DynamicProvisioningTester.fixture() + .awsProdSetup(false) + .clusterType(ClusterSpec.Type.container) + .initialResources(Optional.of(now)) + .capacity(Capacity.from(min, max)) + .build(); + fixture.tester().setScalingDuration(fixture.applicationId(), fixture.clusterSpec.id(), Duration.ofMinutes(5)); + + fixture.loader().applyLoad(new Load(0.1875, 1.0, 0.95, 0, 0), 50); + assertEquals(Autoscaling.Status.insufficient, fixture.autoscale().status()); + } + + @Test public void initial_deployment_with_host_sharing_flag() { var min = new ClusterResources(7, 1, new NodeResources(2.0, 10.0, 384.0, 0.1)); var max = new ClusterResources(7, 1, new NodeResources(2.4, 32.0, 768.0, 0.1)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java index 8dc3945223f..8f053e427e2 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java @@ -102,6 +102,8 @@ public class Loader { NodeList nodes = fixture.nodes(); float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); Load load = new Load(idealLoad.cpu(), value, idealLoad.disk(), 0, 0).multiply(oneExtraNodeFactor); + System.out.println(" idealLoad: " + idealLoad); + System.out.println("adjusted idealLoad: " + load); for (int i = 0; i < count; i++) { fixture.tester().clock().advance(samplingInterval); for (Node node : nodes) { |