summaryrefslogtreecommitdiffstats
path: root/node-repository/src
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2022-08-09 13:46:56 +0200
committerJon Bratseth <bratseth@gmail.com>2022-08-09 13:46:56 +0200
commit180a265397ab329ae8f8f34f68cae09d48790785 (patch)
treeedb0ebb37b9607b6011dc9e8c5bd9d5d986130ba /node-repository/src
parent5be0e33d7e749858f107e9ffa1446fb615801e69 (diff)
Scale down when we have sufficient confidence
Diffstat (limited to 'node-repository/src')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java27
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java5
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java6
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java18
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java5
5 files changed, 36 insertions, 25 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 9b9b7dcecb0..5dbc6465411 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -9,6 +9,7 @@ import com.yahoo.vespa.hosted.provision.applications.ScalingEvent;
import java.time.Clock;
import java.time.Duration;
+import java.time.Instant;
import java.util.Optional;
import java.util.OptionalDouble;
import java.util.logging.Level;
@@ -94,15 +95,31 @@ public class ClusterModel {
/** Returns the relative load adjustment that should be made to this cluster given available measurements. */
public Load loadAdjustment() {
if (nodeTimeseries().measurementsPerNode() == 0) return Load.one(); // No info, no change
+
+ Load peak = nodeTimeseries().peakLoad().divide(idealLoad()); // Peak relative to ideal
+
// Should we scale up?
- Load relativePeak = nodeTimeseries().peakLoad().divide(idealLoad());
- if (relativePeak.any(v -> v > 1.01)) // "meaningful growth": 1% over status quo.
- return relativePeak.max(Load.one()); // Don't downscale any dimension if we upscale
+ if (peak.any(v -> v > 1.01)) // "meaningful growth": 1% over status quo.
+ return peak.map(v -> v < 1 ? 1 : v); // Don't downscale any dimension if we upscale
// Should we scale down?
- // TODO
+ if (canScaleDown())
+ return averageLoad().divide(idealLoad());
+
+ return Load.one();
+ }
+
+ /** Are we in a position to make decisions to scale down at this point? */
+ private boolean canScaleDown() {
+ if (hasScaledIn(scalingDuration().multipliedBy(3))) return false;
+ if (nodeTimeseries().measurementsPerNode() < 4) return false;
+ if (nodeTimeseries().nodesMeasured() != nodeCount()) return false;
+ return true;
+ }
- return averageLoad().divide(idealLoad());
+ private boolean hasScaledIn(Duration period) {
+ return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN)
+ .isAfter(clock.instant().minus(period));
}
/** Returns the predicted duration of a rescaling of this cluster */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
index 88c7e70cd35..6ab5ff731d3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
@@ -54,11 +54,6 @@ public class Load {
return new Load(divide(cpu, resources.vcpu()), divide(memory, resources.memoryGb()), divide(disk, resources.diskGb()));
}
- /** Returns the load having the max value of this and the given load in each dimension. */
- public Load max(Load other) {
- return join(other, (a, b) -> Math.max(a, b));
- }
-
/** Returns the load where the given function is applied to each dimension of this. */
public Load map(DoubleUnaryOperator f) {
return new Load(f.applyAsDouble(cpu),
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index 007b2629952..6f3182b6e44 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -247,6 +247,7 @@ public class AutoscalingTest {
fixture.tester().clock().advance(Duration.ofDays(2));
fixture.loader().applyLoad(0.01, 0.01, 0.01, 120);
+ System.out.println("Asking for suggestion ...");
Autoscaler.Advice suggestion = fixture.suggest();
fixture.tester().assertResources("Choosing the remote disk flavor as it has less disk",
2, 1, 3.0, 100.0, 10.0,
@@ -464,19 +465,22 @@ public class AutoscalingTest {
var fixture = AutoscalingTester.fixture()
.capacity(Capacity.from(min, max))
.build();
+
fixture.tester.clock().advance(Duration.ofDays(1));
fixture.loader().applyCpuLoad(0.25, 120);
-
// (no read share stored)
fixture.tester().assertResources("Advice to scale up since we set aside for bcp by default",
7, 1, 3, 100, 100,
fixture.autoscale());
+ fixture.loader().applyCpuLoad(0.25, 120);
fixture.storeReadShare(0.25, 0.5);
fixture.tester().assertResources("Half of global share is the same as the default assumption used above",
7, 1, 3, 100, 100,
fixture.autoscale());
+ fixture.tester.clock().advance(Duration.ofDays(1));
+ fixture.loader().applyCpuLoad(0.25, 120);
fixture.storeReadShare(0.5, 0.5);
fixture.tester().assertResources("Advice to scale down since we don't need room for bcp",
6, 1, 3, 100, 100,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
index cee80459176..f74ace5bd3b 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
@@ -152,8 +152,9 @@ public class AutoscalingMaintainerTest {
// deploy
tester.deploy(app1, cluster1, app1Capacity);
- tester.addQueryRateMeasurements(app1, cluster1.id(), 12, t -> t == 0 ? 20.0 : 10.0);
+ int measurements = 5;
+ Duration samplePeriod = Duration.ofSeconds(150);
for (int i = 0; i < 20; i++) {
// Record completion to keep scaling window at minimum
tester.addMeasurements(0.1f, 0.1f, 0.1f, i, 1, app1);
@@ -162,25 +163,20 @@ public class AutoscalingMaintainerTest {
tester.clock().advance(Duration.ofDays(1));
if (i % 2 == 0) { // high load
- for (int j = 0; j < 200; j++ ) {
- tester.addMeasurements(0.99f, 0.99f, 0.99f, i, 1, app1);
- tester.clock().advance(Duration.ofMinutes(1));
- }
+ tester.addMeasurements(0.99f, 0.99f, 0.99f, i, measurements, app1);
}
else { // low load
- for (int j = 0; j < 200; j++ ) {
- tester.addMeasurements(0.2f, 0.2f, 0.2f, i, 1, app1);
- tester.clock().advance(Duration.ofMinutes(1));
- }
+ tester.addMeasurements(0.2f, 0.2f, 0.2f, i, measurements, app1);
}
- tester.addQueryRateMeasurements(app1, cluster1.id(), 2, t -> (t == 0 ? 20.0 : 10.0 ));
+ tester.clock().advance(samplePeriod.negated().multipliedBy(measurements));
+ tester.addQueryRateMeasurements(app1, cluster1.id(), measurements, t -> (t == 0 ? 20.0 : 10.0 ));
tester.maintainer().maintain();
}
assertEquals(Cluster.maxScalingEvents, tester.cluster(app1, cluster1).scalingEvents().size());
assertEquals("The latest rescaling is the last event stored",
tester.clock().instant(),
- tester.cluster(app1, cluster1).scalingEvents().get(Cluster.maxScalingEvents - 1).at());
+ tester.cluster(app1, cluster1).scalingEvents().get(Cluster.maxScalingEvents - 1).completion().get());
}
@Test
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
index e1a1a2af5fb..d921af9543e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
@@ -93,11 +93,10 @@ public class AutoscalingMaintainerTester {
ClusterSpec.Id cluster,
int measurements,
IntFunction<Double> queryRate) {
- Instant time = clock().instant();
for (int i = 0; i < measurements; i++) {
nodeRepository().metricsDb().addClusterMetrics(application,
- Map.of(cluster, new ClusterMetricSnapshot(time, queryRate.apply(i), 0.0)));
- time = time.plus(Duration.ofMinutes(5));
+ Map.of(cluster, new ClusterMetricSnapshot(clock().instant(), queryRate.apply(i), 0.0)));
+ clock().advance(Duration.ofSeconds(150));
}
}