summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-07-21 09:11:53 +0200
committerGitHub <noreply@github.com>2023-07-21 09:11:53 +0200
commit8d315ba956eb0dc814e92e180e3b8533b81c6e61 (patch)
treeb5e62cdc21315b84e92214858b0e193a46d88468 /node-repository
parent3e40e5363a3b76a72910dbe701ec05294d17ec30 (diff)
parent97bd65b51e942fb81eeb43b14b03cad8d2474c6d (diff)
Merge pull request #27854 from vespa-engine/bratseth/scale-down-less
Don't scale down if we are likely to scale back up
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java59
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java15
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java28
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java15
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java11
6 files changed, 105 insertions, 31 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index a7d5cc50828..795cbd59c4b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -2,7 +2,6 @@
package com.yahoo.vespa.hosted.provision.autoscale;
import com.yahoo.config.provision.ClusterResources;
-import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Application;
@@ -10,7 +9,6 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling.Status;
import java.time.Duration;
-import java.time.Instant;
import java.util.Optional;
/**
@@ -23,7 +21,9 @@ public class Autoscaler {
/** What cost difference is worth a reallocation? */
private static final double costDifferenceWorthReallocation = 0.1;
/** What resource difference is worth a reallocation? */
- private static final double resourceDifferenceWorthReallocation = 0.03;
+ private static final double resourceIncreaseWorthReallocation = 0.03;
+ /** The load increase headroom (as a fraction) we should have before needing to scale up, to decide to scale down */
+ private static final double headroomRequiredToScaleDown = 0.1;
private final NodeRepository nodeRepository;
private final AllocationOptimizer allocationOptimizer;
@@ -70,22 +70,53 @@ public class Autoscaler {
if ( ! clusterModel.isStable(nodeRepository))
return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", clusterModel);
- var currentAllocation = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository);
- Optional<AllocatableClusterResources> bestAllocation =
- allocationOptimizer.findBestAllocation(clusterModel.loadAdjustment(), currentAllocation, clusterModel, limits);
- if (bestAllocation.isEmpty())
+ var current = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository);
+ var loadAdjustment = clusterModel.loadAdjustment();
+
+ // Ensure we only scale down if we'll have enough headroom to not scale up again given a small load increase
+ var target = allocationOptimizer.findBestAllocation(loadAdjustment, current, clusterModel, limits);
+ var headroomAdjustedLoadAdjustment = adjustForHeadroom(loadAdjustment, clusterModel, target);
+ if ( ! headroomAdjustedLoadAdjustment.equals(loadAdjustment)) {
+ loadAdjustment = headroomAdjustedLoadAdjustment;
+ target = allocationOptimizer.findBestAllocation(loadAdjustment, current, clusterModel, limits);
+ }
+
+ if (target.isEmpty())
return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", clusterModel);
- if (! worthRescaling(currentAllocation.realResources(), bestAllocation.get().realResources())) {
- if (bestAllocation.get().fulfilment() < 0.9999999)
+ if (! worthRescaling(current.realResources(), target.get().realResources())) {
+ if (target.get().fulfilment() < 0.9999999)
return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", clusterModel);
else if ( ! clusterModel.safeToScaleDown() && clusterModel.idealLoad().any(v -> v < 1.0))
return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", clusterModel);
else
- return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled (within limits)", clusterModel);
+ return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled (within configured limits)", clusterModel);
}
- return Autoscaling.scaleTo(bestAllocation.get().advertisedResources(), clusterModel);
+ return Autoscaling.scaleTo(target.get().advertisedResources(), clusterModel);
+ }
+
+ /**
+ * When scaling down we may end up with resources that are just barely below the new ideal with the new number
+ * of nodes, as fewer nodes leads to a lower ideal load (due to redundancy).
+ * If that headroom is too small, then do not scale down as it will likely lead to scaling back up again soon.
+ */
+ private Load adjustForHeadroom(Load loadAdjustment, ClusterModel clusterModel,
+ Optional<AllocatableClusterResources> target) {
+ if (target.isEmpty()) return loadAdjustment;
+
+ // If we change to this target, what would our current peak be compared to the ideal
+ var relativeLoadWithTarget =
+ loadAdjustment // redundancy aware target relative to current load
+ .multiply(clusterModel.loadWith(target.get().nodes(), target.get().groups())) // redundancy aware adjustment with target
+ .divide(clusterModel.redundancyAdjustment()); // correct for double redundancy adjustment
+ if (loadAdjustment.cpu() < 1 && (1.0 - relativeLoadWithTarget.cpu()) < headroomRequiredToScaleDown)
+ loadAdjustment = loadAdjustment.withCpu(1.0);
+ if (loadAdjustment.memory() < 1 && (1.0 - relativeLoadWithTarget.memory()) < headroomRequiredToScaleDown)
+ loadAdjustment = loadAdjustment.withMemory(1.0);
+ if (loadAdjustment.disk() < 1 && (1.0 - relativeLoadWithTarget.disk()) < headroomRequiredToScaleDown)
+ loadAdjustment = loadAdjustment.withDisk(1.0);
+ return loadAdjustment;
}
/** Returns true if it is worthwhile to make the given resource change, false if it is too insignificant */
@@ -95,12 +126,14 @@ public class Autoscaler {
if (meaningfulIncrease(from.totalResources().memoryGb(), to.totalResources().memoryGb())) return true;
if (meaningfulIncrease(from.totalResources().diskGb(), to.totalResources().diskGb())) return true;
- // Otherwise, only *decrease* if it reduces cost meaningfully
+ // Otherwise, only *decrease* if
+ // - cost is reduced meaningfully
+ // - the new resources won't be so much smaller that a small fluctuation in load will cause an increase
return ! similar(from.cost(), to.cost(), costDifferenceWorthReallocation);
}
public static boolean meaningfulIncrease(double from, double to) {
- return from < to && ! similar(from, to, resourceDifferenceWorthReallocation);
+ return from < to && ! similar(from, to, resourceIncreaseWorthReallocation);
}
private static boolean similar(double r1, double r2, double threshold) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 289025f9d21..a5490996a2c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -175,9 +175,9 @@ public class ClusterModel {
* Returns the relative load adjustment accounting for redundancy given these nodes+groups
* relative to node nodes+groups in this.
*/
- public Load loadWith(int trueNodes, int trueGroups) {
- int nodes = nodesAdjustedForRedundancy(trueNodes, trueGroups);
- int groups = groupsAdjustedForRedundancy(trueNodes, trueGroups);
+ public Load loadWith(int givenNodes, int givenGroups) {
+ int nodes = nodesAdjustedForRedundancy(givenNodes, givenGroups);
+ int groups = groupsAdjustedForRedundancy(givenNodes, givenGroups);
if (clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content
int groupSize = nodes / groups;
@@ -272,7 +272,7 @@ public class ClusterModel {
/** The number of nodes this cluster has, or will have if not deployed yet. */
// TODO: Make this the deployed, not current count
- private int nodeCount() {
+ public int nodeCount() {
if ( ! nodes.isEmpty()) return (int)nodes.not().retired().stream().count();
return cluster.minResources().nodes();
}
@@ -289,12 +289,12 @@ public class ClusterModel {
return (int)Math.ceil((double)nodeCount() / groupCount());
}
- private int nodesAdjustedForRedundancy(int nodes, int groups) {
+ private static int nodesAdjustedForRedundancy(int nodes, int groups) {
int groupSize = (int)Math.ceil((double)nodes / groups);
return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
}
- private int groupsAdjustedForRedundancy(int nodes, int groups) {
+ private static int groupsAdjustedForRedundancy(int nodes, int groups) {
return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
}
@@ -340,8 +340,7 @@ public class ClusterModel {
/** Ideal cpu load must take the application traffic fraction into account. */
double idealLoad() {
double queryCpuFraction = queryFraction();
-
- // Assumptions: 1) Write load is not organic so we should not grow to handle more.
+ // Assumptions: 1) Write load is not organic so we should not increase to handle potential future growth.
// (TODO: But allow applications to set their target write rate and size for that)
// 2) Write load does not change in BCP scenarios.
return queryCpuFraction * 1/growthRateHeadroom() * 1/trafficShiftHeadroom() * idealQueryCpuLoad +
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index 47206265c68..54178865693 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -12,6 +12,7 @@ import com.yahoo.config.provision.NodeResources.DiskSpeed;
import com.yahoo.config.provision.NodeResources.StorageType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
+import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies;
import com.yahoo.vespa.hosted.provision.provisioning.DynamicProvisioningTester;
import org.junit.Test;
@@ -87,7 +88,7 @@ public class AutoscalingTest {
fixture.tester().clock().advance(Duration.ofDays(7));
fixture.loader().applyCpuLoad(0.1f, 10);
fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly",
- 6, 1, 1.1, 8.8, 346.8,
+ 6, 1, 1.1, 9.8, 390.2,
fixture.autoscale());
}
@@ -585,7 +586,7 @@ public class AutoscalingTest {
@Test
public void test_autoscaling_groupsize_by_cpu_read_dominated() {
var min = new ClusterResources( 3, 1, new NodeResources(1, 1, 1, 1));
- var now = new ClusterResources(6, 2, new NodeResources(3, 100, 100, 1));
+ var now = new ClusterResources( 6, 2, new NodeResources(3, 100, 100, 1));
var max = new ClusterResources(21, 7, new NodeResources(100, 1000, 1000, 1));
var fixture = DynamicProvisioningTester.fixture()
.awsProdSetup(true)
@@ -665,7 +666,7 @@ public class AutoscalingTest {
fixture.tester().clock().advance(Duration.ofHours(12 * 3 + 1));
fixture.loader().applyCpuLoad(0.02, 5);
fixture.tester().assertResources("Scaling down since enough time has passed",
- 3, 1, 1.0, 23.6, 101.4,
+ 3, 1, 1.0, 29.5, 126.7,
fixture.autoscale());
}
@@ -798,7 +799,7 @@ public class AutoscalingTest {
fixture.tester.clock().advance(timeAdded.negated());
fixture.loader().addCpuMeasurements(0.4, 200);
fixture.tester().assertResources("Write only -> smallest possible",
- 4, 1, 1.1, 16.1, 67.6,
+ 4, 1, 1.1, 20.1, 84.5,
fixture.autoscale());
}
@@ -881,6 +882,23 @@ public class AutoscalingTest {
}
@Test
+ public void test_scaling_down_leaves_too_little_headroom() {
+ var r = new NodeResources(16, 32, 100, 1, NodeResources.DiskSpeed.any);
+ var min = new ClusterResources( 3, 3, r);
+ var now = new ClusterResources( 4, 4, r);
+ var max = new ClusterResources( 5, 5, r);
+ var fixture = DynamicProvisioningTester.fixture()
+ .awsProdSetup(false)
+ .capacity(Capacity.from(min, max, IntRange.from(1)))
+ .clusterType(ClusterSpec.Type.content)
+ .initialResources(Optional.of(now))
+ .build();
+ fixture.loader().applyCpuLoad(0.17, 10);
+ assertTrue("Not scaling down as that would leave just 4.5% headroom before needing to scale up again",
+ fixture.autoscale().resources().isEmpty());
+ }
+
+ @Test
public void test_changing_exclusivity() {
var min = new ClusterResources( 2, 1, new NodeResources( 3, 4, 100, 1));
var max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1));
@@ -923,7 +941,7 @@ public class AutoscalingTest {
fixture.loader().applyLoad(new Load(0.06, 0.52, 0.27), 100);
var autoscaling = fixture.autoscale();
fixture.tester().assertResources("Scaling down",
- 7, 1, 2, 14.5, 384.0,
+ 7, 1, 2, 15.8, 384.0,
autoscaling);
fixture.deploy(Capacity.from(autoscaling.resources().get()));
assertEquals("Initial nodes are kept", initialNodes, fixture.nodes().asList());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
index 637932681ee..379dbb27d87 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
@@ -85,7 +85,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 10.5, 38.4, 168.9,
+ 3, 3, 10.5, 43.2, 190.0,
fixture.autoscale());
// Higher query rate
@@ -93,7 +93,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(200, 1.1, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 20.9, 38.4, 168.9,
+ 3, 3, 20.9, 43.2, 190.0,
fixture.autoscale());
// Higher headroom
@@ -101,7 +101,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.3, 0.3));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 12.4, 38.4, 168.9,
+ 3, 3, 12.4, 43.2, 190.0,
fixture.autoscale());
// Higher per query cost
@@ -109,7 +109,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.store(new BcpGroupInfo(100, 1.1, 0.45));
fixture.loader().addCpuMeasurements(0.7f, 10);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 3, 3, 15.7, 38.4, 168.9,
+ 3, 3, 15.7, 43.2, 190.0,
fixture.autoscale());
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
index b150b372fe8..33d3d3d50dc 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
@@ -49,6 +49,8 @@ public class Fixture {
final Capacity capacity;
final Loader loader;
+ Autoscaling lastAutoscaling = Autoscaling.empty();
+
public Fixture(Fixture.Builder builder, Optional<ClusterResources> initialResources, int hostCount) {
applicationId = builder.application;
clusterSpec = builder.cluster;
@@ -105,7 +107,7 @@ public class Fixture {
/** Autoscale within the given capacity. */
public Autoscaling autoscale(Capacity capacity) {
- return tester().autoscale(applicationId, clusterSpec, capacity);
+ return lastAutoscaling = tester().autoscale(applicationId, clusterSpec, capacity);
}
/** Compute an autoscaling suggestion for this. */
@@ -123,6 +125,17 @@ public class Fixture {
tester().deploy(applicationId, clusterSpec, capacity);
}
+ public void deployTarget() {
+ if (lastAutoscaling.isEmpty()) throw new IllegalStateException("Autoscaling is empty");
+ if (lastAutoscaling.resources().isEmpty()) throw new IllegalStateException("Autoscaling target is empty: " + lastAutoscaling);
+ try (var lock = tester().nodeRepository().applications().lock(applicationId)) {
+ var updated = tester().nodeRepository().applications().require(applicationId).with(cluster().withTarget(lastAutoscaling));
+ tester().nodeRepository().applications().put(updated, lock);
+ }
+ deploy(capacity);
+ deactivateRetired(capacity);
+ }
+
public void deactivateRetired(Capacity capacity) {
tester().deactivateRetired(applicationId, clusterSpec, capacity);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java
index 4799d3b5577..c982b195787 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java
@@ -186,6 +186,17 @@ public class DynamicProvisioningTester {
resources);
}
+ public void assertResources(String message,
+ int nodeCount, int groupCount,
+ NodeResources expectedResources,
+ Autoscaling autoscaling) {
+ assertTrue("Resources are present: " + message + " (" + autoscaling + ": " + autoscaling.status() + ")",
+ autoscaling.resources().isPresent());
+ assertResources(message, nodeCount, groupCount,
+ expectedResources.vcpu(), expectedResources.memoryGb(), expectedResources.diskGb(),
+ autoscaling.resources().get());
+ }
+
public ClusterResources assertResources(String message,
int nodeCount, int groupCount,
double approxCpu, double approxMemory, double approxDisk,