summaryrefslogtreecommitdiffstats
path: root/node-repository/src
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2020-11-04 22:07:30 +0100
committerJon Bratseth <bratseth@gmail.com>2020-11-04 22:07:30 +0100
commit318f9e54453f4fa5e8e0337f2054bd0fc309906e (patch)
tree8a062c0402671038d83f30225ce1d7467226aa97 /node-repository/src
parent95ca6fdcb45b1d3ae805c5c9f3d20cc7972f136d (diff)
Distinguish between "no opinion" and "keep current allocation"
This is necessary when multiple config servers runs autoscaling in parallel and redeployment takes along time.
Diffstat (limited to 'node-repository/src')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java50
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java13
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java5
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java36
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java4
5 files changed, 64 insertions, 44 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index eace7457615..bc13118c5ec 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -39,29 +39,25 @@ public class Autoscaler {
* without taking min and max limits into account.
*
* @param clusterNodes the list of all the active nodes in a cluster
- * @return a new suggested allocation for this cluster, or empty if it should not be rescaled at this time
+ * @return scaling advice for this cluster
*/
- public Optional<ClusterResources> suggest(Cluster cluster, List<Node> clusterNodes) {
- return autoscale(cluster, clusterNodes, Limits.empty(), cluster.exclusive())
- .map(AllocatableClusterResources::toAdvertisedClusterResources);
-
+ public Advice suggest(Cluster cluster, List<Node> clusterNodes) {
+ return autoscale(cluster, clusterNodes, Limits.empty(), cluster.exclusive());
}
/**
* Autoscale a cluster by load. This returns a better allocation (if found) inside the min and max limits.
*
* @param clusterNodes the list of all the active nodes in a cluster
- * @return a new suggested allocation for this cluster, or empty if it should not be rescaled at this time
+ * @return scaling advice for this cluster
*/
- public Optional<ClusterResources> autoscale(Cluster cluster, List<Node> clusterNodes) {
- if (cluster.minResources().equals(cluster.maxResources())) return Optional.empty(); // Shortcut
- return autoscale(cluster, clusterNodes, Limits.of(cluster), cluster.exclusive())
- .map(AllocatableClusterResources::toAdvertisedClusterResources);
+ public Advice autoscale(Cluster cluster, List<Node> clusterNodes) {
+ if (cluster.minResources().equals(cluster.maxResources())) return Advice.none(); // Shortcut
+ return autoscale(cluster, clusterNodes, Limits.of(cluster), cluster.exclusive());
}
- private Optional<AllocatableClusterResources> autoscale(Cluster cluster,
- List<Node> clusterNodes, Limits limits, boolean exclusive) {
- if (unstable(clusterNodes, nodeRepository)) return Optional.empty();
+ private Advice autoscale(Cluster cluster, List<Node> clusterNodes, Limits limits, boolean exclusive) {
+ if (unstable(clusterNodes, nodeRepository)) return Advice.none();
AllocatableClusterResources currentAllocation = new AllocatableClusterResources(clusterNodes, nodeRepository);
@@ -70,14 +66,14 @@ public class Autoscaler {
Optional<Double> cpuLoad = clusterTimeseries.averageLoad(Resource.cpu);
Optional<Double> memoryLoad = clusterTimeseries.averageLoad(Resource.memory);
Optional<Double> diskLoad = clusterTimeseries.averageLoad(Resource.disk);
- if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) return Optional.empty();
+ if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) return Advice.none();
var target = ResourceTarget.idealLoad(cpuLoad.get(), memoryLoad.get(), diskLoad.get(), currentAllocation);
Optional<AllocatableClusterResources> bestAllocation =
allocationOptimizer.findBestAllocation(target, currentAllocation, limits, exclusive);
- if (bestAllocation.isEmpty()) return Optional.empty();
- if (similar(bestAllocation.get(), currentAllocation)) return Optional.empty();
- return bestAllocation;
+ if (bestAllocation.isEmpty()) return Advice.dontScale();
+ if (similar(bestAllocation.get(), currentAllocation)) return Advice.dontScale();
+ return Advice.scaleTo(bestAllocation.get().toAdvertisedClusterResources());
}
/** Returns true if both total real resources and total cost are similar */
@@ -124,5 +120,25 @@ public class Autoscaler {
return false;
}
+
+ public static class Advice {
+
+ private final boolean present;
+ private final Optional<ClusterResources> target;
+
+ private Advice(Optional<ClusterResources> target, boolean present) {
+ this.target = target;
+ this.present = present;
+ }
+
+ public Optional<ClusterResources> target() { return target; }
+ public boolean isEmpty() { return ! present; }
+ public boolean isPresent() { return present; }
+
+ public static Advice none() { return new Advice(Optional.empty(), false); }
+ public static Advice dontScale() { return new Advice(Optional.empty(), true); }
+ public static Advice scaleTo(ClusterResources target) { return new Advice(Optional.of(target), true); }
+
+ }
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index c0fd7df9b2e..67758dc13b2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -66,11 +66,14 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId));
Optional<Cluster> cluster = application.cluster(clusterId);
if (cluster.isEmpty()) return;
- Optional<ClusterResources> target = autoscaler.autoscale(cluster.get(), clusterNodes);
- if ( ! cluster.get().targetResources().equals(target)) { // New target: Log and try to deploy now
- applications().put(application.with(cluster.get().withTarget(target)), deployment.applicationLock().get());
- if (target.isPresent()) {
- logAutoscaling(target.get(), applicationId, clusterId, clusterNodes);
+ var advice = autoscaler.autoscale(cluster.get(), clusterNodes);
+
+ if (advice.isEmpty()) return;
+
+ if ( ! cluster.get().targetResources().equals(advice.target())) {
+ applications().put(application.with(cluster.get().withTarget(advice.target())), deployment.applicationLock().get());
+ if (advice.target().isPresent()) {
+ logAutoscaling(advice.target().get(), applicationId, clusterId, clusterNodes);
deployment.activate();
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
index 9ef5a841a7a..3546c8d8afb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
@@ -65,10 +65,11 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer {
Application application = applications().get(applicationId).orElse(new Application(applicationId));
Optional<Cluster> cluster = application.cluster(clusterId);
if (cluster.isEmpty()) return true;
- Optional<ClusterResources> suggestion = autoscaler.suggest(cluster.get(), clusterNodes);
+ var suggestion = autoscaler.suggest(cluster.get(), clusterNodes);
+ if (suggestion.isEmpty()) return false;
// Wait only a short time for the lock to avoid interfering with change deployments
try (Mutex lock = nodeRepository().lock(applicationId, Duration.ofSeconds(1))) {
- applications().get(applicationId).ifPresent(a -> storeSuggestion(suggestion, clusterId, a, lock));
+ applications().get(applicationId).ifPresent(a -> storeSuggestion(suggestion.target(), clusterId, a, lock));
return true;
}
catch (ApplicationLockException e) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index 33e2ec88d0a..9c0c67f7aed 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -52,7 +52,7 @@ public class AutoscalingTest {
tester.addCpuMeasurements(0.25f, 1f, 60, application1);
ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high",
15, 1, 1.3, 28.6, 28.6,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
tester.deploy(application1, cluster1, scaledResources);
assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty());
@@ -63,12 +63,12 @@ public class AutoscalingTest {
tester.autoscale(application1, cluster1.id(), min, max).isEmpty());
tester.addCpuMeasurements(0.19f, 1f, 100, application1);
- assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), min, max));
+ assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), min, max).target());
tester.addCpuMeasurements(0.1f, 1f, 120, application1);
tester.assertResources("Scaling down to minimum since usage has gone down significantly",
14, 1, 1.0, 30.8, 30.8,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
/** We prefer fewer nodes for container clusters as (we assume) they all use the same disk and memory */
@@ -88,7 +88,7 @@ public class AutoscalingTest {
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high",
7, 1, 2.5, 80.0, 80.0,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
tester.deploy(application1, cluster1, scaledResources);
tester.deactivateRetired(application1, cluster1, scaledResources);
@@ -96,7 +96,7 @@ public class AutoscalingTest {
tester.addCpuMeasurements(0.1f, 1f, 120, application1);
tester.assertResources("Scaling down since cpu usage has gone down",
4, 1, 2.5, 68.6, 68.6,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
@Test
@@ -120,7 +120,7 @@ public class AutoscalingTest {
new NodeResources(100, 1000, 1000, 1, NodeResources.DiskSpeed.any));
ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high",
15, 1, 1.3, 28.6, 28.6,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
assertEquals("Disk speed from min/max is used",
NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed());
tester.deploy(application1, cluster1, scaledResources);
@@ -144,7 +144,7 @@ public class AutoscalingTest {
tester.addMeasurements(0.25f, 0.95f, 0.95f, 0, 120, application1);
tester.assertResources("Scaling up to limit since resource usage is too high",
6, 1, 2.4, 78.0, 79.0,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
@Test
@@ -162,7 +162,7 @@ public class AutoscalingTest {
tester.addMeasurements(0.05f, 0.05f, 0.05f, 0, 120, application1);
tester.assertResources("Scaling down to limit since resource usage is low",
4, 1, 1.8, 7.4, 10.0,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
@Test
@@ -180,7 +180,7 @@ public class AutoscalingTest {
tester.addCpuMeasurements( 0.3f, 1f, 240, application1);
tester.assertResources("Scaling up since resource usage is too high",
6, 6, 3.6, 8.0, 10.0,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
@Test
@@ -214,7 +214,7 @@ public class AutoscalingTest {
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
tester.assertResources("Scaling up since resource usage is too high",
7, 1, 2.5, 80.0, 80.0,
- tester.suggest(application1, cluster1.id(), min, max));
+ tester.suggest(application1, cluster1.id(), min, max).target());
}
@Test
@@ -232,7 +232,7 @@ public class AutoscalingTest {
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
tester.assertResources("Scaling up since resource usage is too high",
7, 7, 2.5, 80.0, 80.0,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
@Test
@@ -250,7 +250,7 @@ public class AutoscalingTest {
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper",
8, 1, 2.7, 83.3, 83.3,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
@Test
@@ -268,7 +268,7 @@ public class AutoscalingTest {
tester.addMemMeasurements(1.0f, 1f, 1000, application1);
tester.assertResources("Increase group size to reduce memory load",
8, 2, 12.9, 89.3, 62.5,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
@Test
@@ -286,7 +286,7 @@ public class AutoscalingTest {
tester.addMemMeasurements(0.02f, 0.95f, 120, application1);
tester.assertResources("Scaling down",
6, 1, 2.8, 4.0, 95.0,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
@Test
@@ -305,7 +305,7 @@ public class AutoscalingTest {
tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1);
tester.assertResources("Scaling up",
4, 1, 7.0, 20, 200,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
{ // 15 Gb memory tax
@@ -318,7 +318,7 @@ public class AutoscalingTest {
tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1);
tester.assertResources("Scaling up",
4, 1, 7.0, 34, 200,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
}
@@ -347,7 +347,7 @@ public class AutoscalingTest {
tester.addMemMeasurements(0.9f, 0.6f, 120, application1);
ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.",
8, 1, 3, 83, 34.3,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
tester.deploy(application1, cluster1, scaledResources);
tester.deactivateRetired(application1, cluster1, scaledResources);
@@ -355,7 +355,7 @@ public class AutoscalingTest {
tester.addMemMeasurements(0.3f, 0.6f, 1000, application1);
tester.assertResources("Scaling down since resource usage has gone down",
5, 1, 3, 83, 36,
- tester.autoscale(application1, cluster1.id(), min, max));
+ tester.autoscale(application1, cluster1.id(), min, max).target());
}
/**
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index 61e7bdfc546..cee9ad6965e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -184,7 +184,7 @@ class AutoscalingTester {
}
}
- public Optional<ClusterResources> autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId,
+ public Autoscaler.Advice autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId,
ClusterResources min, ClusterResources max) {
Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId))
.withCluster(clusterId, false, min, max);
@@ -195,7 +195,7 @@ class AutoscalingTester {
nodeRepository().getNodes(applicationId, Node.State.active));
}
- public Optional<ClusterResources> suggest(ApplicationId applicationId, ClusterSpec.Id clusterId,
+ public Autoscaler.Advice suggest(ApplicationId applicationId, ClusterSpec.Id clusterId,
ClusterResources min, ClusterResources max) {
Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId))
.withCluster(clusterId, false, min, max);