summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@vespa.ai>2023-07-25 11:31:10 +0200
committerJon Bratseth <bratseth@vespa.ai>2023-07-25 11:31:10 +0200
commit6b4d2e073788a6c9340d468302faa75581315095 (patch)
tree5eecae61627d5e4edfd58180550736defb6840f9 /node-repository
parent7dcdf5a892a57f02df7edb53500133e9648d33c9 (diff)
Set aside space for headroom in one pass
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java27
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java31
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java6
4 files changed, 25 insertions, 40 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
index e586e6277d5..bbce0442e19 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
@@ -9,6 +9,8 @@ import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceLimits;
import java.util.Optional;
+import static com.yahoo.vespa.hosted.provision.autoscale.Autoscaler.headroomRequiredToScaleDown;
+
/**
* A searcher of the space of possible allocation
*
@@ -88,13 +90,26 @@ public class AllocationOptimizer {
Load loadAdjustment,
AllocatableClusterResources current,
ClusterModel clusterModel) {
- var scaled = loadAdjustment // redundancy aware target relative to current load
- .multiply(clusterModel.loadWith(nodes, groups)) // redundancy aware adjustment with these counts
- .divide(clusterModel.redundancyAdjustment()) // correct for double redundancy adjustment
- .scaled(current.realResources().nodeResources());
+ var loadWithTarget = loadAdjustment // redundancy adjusted target relative to current load
+ .multiply(clusterModel.loadWith(nodes, groups)) // redundancy aware adjustment with these counts
+ .divide(clusterModel.redundancyAdjustment()); // correct for double redundancy adjustment
+
+ // Don't scale down all the way to the ideal as that leaves no headroom before needing to scale back up
+ var oldLoad = loadWithTarget;
+ if (loadAdjustment.cpu() < 1 && (1.0 - loadWithTarget.cpu()) < headroomRequiredToScaleDown)
+ loadAdjustment = loadAdjustment.withCpu(1.0);
+ if (loadAdjustment.memory() < 1 && (1.0 - loadWithTarget.memory()) < headroomRequiredToScaleDown)
+ loadAdjustment = loadAdjustment.withMemory(1.0);
+ if (loadAdjustment.disk() < 1 && (1.0 - loadWithTarget.disk()) < headroomRequiredToScaleDown)
+ loadAdjustment = loadAdjustment.withDisk(1.0);
+
+ loadWithTarget = loadAdjustment // redundancy adjusted target relative to current load
+ .multiply(clusterModel.loadWith(nodes, groups)) // redundancy aware adjustment with these counts
+ .divide(clusterModel.redundancyAdjustment()); // correct for double redundancy adjustment
+
+ System.out.println(nodes + " nodes, headroom adjust: " + oldLoad + " -> " + loadWithTarget);
- // Combine the scaled resource values computed here
- // with the currently configured non-scaled values, given in the limits, if any
+ var scaled = loadWithTarget.scaled(current.realResources().nodeResources());
var nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified()
? current.advertisedResources().nodeResources()
: limits.min().nodeResources(); // min=max for non-scaled
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index 795cbd59c4b..c207e3c7ecc 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -9,7 +9,6 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling.Status;
import java.time.Duration;
-import java.util.Optional;
/**
* The autoscaler gives advice about what resources should be allocated to a cluster based on observed behavior.
@@ -23,7 +22,7 @@ public class Autoscaler {
/** What resource difference is worth a reallocation? */
private static final double resourceIncreaseWorthReallocation = 0.03;
/** The load increase headroom (as a fraction) we should have before needing to scale up, to decide to scale down */
- private static final double headroomRequiredToScaleDown = 0.1;
+ static final double headroomRequiredToScaleDown = 0.1;
private final NodeRepository nodeRepository;
private final AllocationOptimizer allocationOptimizer;
@@ -75,11 +74,6 @@ public class Autoscaler {
// Ensure we only scale down if we'll have enough headroom to not scale up again given a small load increase
var target = allocationOptimizer.findBestAllocation(loadAdjustment, current, clusterModel, limits);
- var headroomAdjustedLoadAdjustment = adjustForHeadroom(loadAdjustment, clusterModel, target);
- if ( ! headroomAdjustedLoadAdjustment.equals(loadAdjustment)) {
- loadAdjustment = headroomAdjustedLoadAdjustment;
- target = allocationOptimizer.findBestAllocation(loadAdjustment, current, clusterModel, limits);
- }
if (target.isEmpty())
return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", clusterModel);
@@ -96,29 +90,6 @@ public class Autoscaler {
return Autoscaling.scaleTo(target.get().advertisedResources(), clusterModel);
}
- /**
- * When scaling down we may end up with resources that are just barely below the new ideal with the new number
- * of nodes, as fewer nodes leads to a lower ideal load (due to redundancy).
- * If that headroom is too small, then do not scale down as it will likely lead to scaling back up again soon.
- */
- private Load adjustForHeadroom(Load loadAdjustment, ClusterModel clusterModel,
- Optional<AllocatableClusterResources> target) {
- if (target.isEmpty()) return loadAdjustment;
-
- // If we change to this target, what would our current peak be compared to the ideal
- var relativeLoadWithTarget =
- loadAdjustment // redundancy aware target relative to current load
- .multiply(clusterModel.loadWith(target.get().nodes(), target.get().groups())) // redundancy aware adjustment with target
- .divide(clusterModel.redundancyAdjustment()); // correct for double redundancy adjustment
- if (loadAdjustment.cpu() < 1 && (1.0 - relativeLoadWithTarget.cpu()) < headroomRequiredToScaleDown)
- loadAdjustment = loadAdjustment.withCpu(1.0);
- if (loadAdjustment.memory() < 1 && (1.0 - relativeLoadWithTarget.memory()) < headroomRequiredToScaleDown)
- loadAdjustment = loadAdjustment.withMemory(1.0);
- if (loadAdjustment.disk() < 1 && (1.0 - relativeLoadWithTarget.disk()) < headroomRequiredToScaleDown)
- loadAdjustment = loadAdjustment.withDisk(1.0);
- return loadAdjustment;
- }
-
/** Returns true if it is worthwhile to make the given resource change, false if it is too insignificant */
public static boolean worthRescaling(ClusterResources from, ClusterResources to) {
// *Increase* if needed with no regard for cost difference to prevent running out of a resource
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index a5490996a2c..61f3dc57d31 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -206,7 +206,6 @@ public class ClusterModel {
var ideal = new Load(cpu.idealLoad(), memory.idealLoad(), disk.idealLoad()).divide(redundancyAdjustment());
if ( !cluster.bcpGroupInfo().isEmpty() && cluster.bcpGroupInfo().queryRate() > 0) {
// Since we have little local information, use information about query cost in other groups
-
Load bcpGroupIdeal = adjustQueryDependentIdealLoadByBcpGroupInfo(ideal);
// Do a weighted sum of the ideal "vote" based on local and bcp group info.
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index 54178865693..d33857d1a1e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -654,7 +654,7 @@ public class AutoscalingTest {
fixture.tester().clock().advance(Duration.ofDays(2));
fixture.loader().applyLoad(new Load(0.16, 0.02, 0.5), 120);
fixture.tester().assertResources("Scaling down memory",
- 6, 1, 3.0, 4.0, 96.2,
+ 7, 1, 2.5, 4.0, 80.2,
fixture.autoscale());
}
@@ -666,7 +666,7 @@ public class AutoscalingTest {
fixture.tester().clock().advance(Duration.ofHours(12 * 3 + 1));
fixture.loader().applyCpuLoad(0.02, 5);
fixture.tester().assertResources("Scaling down since enough time has passed",
- 3, 1, 1.0, 29.5, 126.7,
+ 5, 1, 1.0, 12.3, 50.7,
fixture.autoscale());
}
@@ -799,7 +799,7 @@ public class AutoscalingTest {
fixture.tester.clock().advance(timeAdded.negated());
fixture.loader().addCpuMeasurements(0.4, 200);
fixture.tester().assertResources("Write only -> smallest possible",
- 4, 1, 1.1, 20.1, 84.5,
+ 5, 1, 1.0, 12.3, 50.7,
fixture.autoscale());
}