diff options
author | Jon Bratseth <bratseth@gmail.com> | 2020-11-04 22:07:30 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2020-11-04 22:07:30 +0100 |
commit | 318f9e54453f4fa5e8e0337f2054bd0fc309906e (patch) | |
tree | 8a062c0402671038d83f30225ce1d7467226aa97 /node-repository/src | |
parent | 95ca6fdcb45b1d3ae805c5c9f3d20cc7972f136d (diff) |
Distinguish between "no opinion" and "keep current allocation"
This is necessary when multiple config servers runs autoscaling in
parallel and redeployment takes along time.
Diffstat (limited to 'node-repository/src')
5 files changed, 64 insertions, 44 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index eace7457615..bc13118c5ec 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -39,29 +39,25 @@ public class Autoscaler { * without taking min and max limits into account. * * @param clusterNodes the list of all the active nodes in a cluster - * @return a new suggested allocation for this cluster, or empty if it should not be rescaled at this time + * @return scaling advice for this cluster */ - public Optional<ClusterResources> suggest(Cluster cluster, List<Node> clusterNodes) { - return autoscale(cluster, clusterNodes, Limits.empty(), cluster.exclusive()) - .map(AllocatableClusterResources::toAdvertisedClusterResources); - + public Advice suggest(Cluster cluster, List<Node> clusterNodes) { + return autoscale(cluster, clusterNodes, Limits.empty(), cluster.exclusive()); } /** * Autoscale a cluster by load. This returns a better allocation (if found) inside the min and max limits. * * @param clusterNodes the list of all the active nodes in a cluster - * @return a new suggested allocation for this cluster, or empty if it should not be rescaled at this time + * @return scaling advice for this cluster */ - public Optional<ClusterResources> autoscale(Cluster cluster, List<Node> clusterNodes) { - if (cluster.minResources().equals(cluster.maxResources())) return Optional.empty(); // Shortcut - return autoscale(cluster, clusterNodes, Limits.of(cluster), cluster.exclusive()) - .map(AllocatableClusterResources::toAdvertisedClusterResources); + public Advice autoscale(Cluster cluster, List<Node> clusterNodes) { + if (cluster.minResources().equals(cluster.maxResources())) return Advice.none(); // Shortcut + return autoscale(cluster, clusterNodes, Limits.of(cluster), cluster.exclusive()); } - private Optional<AllocatableClusterResources> autoscale(Cluster cluster, - List<Node> clusterNodes, Limits limits, boolean exclusive) { - if (unstable(clusterNodes, nodeRepository)) return Optional.empty(); + private Advice autoscale(Cluster cluster, List<Node> clusterNodes, Limits limits, boolean exclusive) { + if (unstable(clusterNodes, nodeRepository)) return Advice.none(); AllocatableClusterResources currentAllocation = new AllocatableClusterResources(clusterNodes, nodeRepository); @@ -70,14 +66,14 @@ public class Autoscaler { Optional<Double> cpuLoad = clusterTimeseries.averageLoad(Resource.cpu); Optional<Double> memoryLoad = clusterTimeseries.averageLoad(Resource.memory); Optional<Double> diskLoad = clusterTimeseries.averageLoad(Resource.disk); - if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) return Optional.empty(); + if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) return Advice.none(); var target = ResourceTarget.idealLoad(cpuLoad.get(), memoryLoad.get(), diskLoad.get(), currentAllocation); Optional<AllocatableClusterResources> bestAllocation = allocationOptimizer.findBestAllocation(target, currentAllocation, limits, exclusive); - if (bestAllocation.isEmpty()) return Optional.empty(); - if (similar(bestAllocation.get(), currentAllocation)) return Optional.empty(); - return bestAllocation; + if (bestAllocation.isEmpty()) return Advice.dontScale(); + if (similar(bestAllocation.get(), currentAllocation)) return Advice.dontScale(); + return Advice.scaleTo(bestAllocation.get().toAdvertisedClusterResources()); } /** Returns true if both total real resources and total cost are similar */ @@ -124,5 +120,25 @@ public class Autoscaler { return false; } + + public static class Advice { + + private final boolean present; + private final Optional<ClusterResources> target; + + private Advice(Optional<ClusterResources> target, boolean present) { + this.target = target; + this.present = present; + } + + public Optional<ClusterResources> target() { return target; } + public boolean isEmpty() { return ! present; } + public boolean isPresent() { return present; } + + public static Advice none() { return new Advice(Optional.empty(), false); } + public static Advice dontScale() { return new Advice(Optional.empty(), true); } + public static Advice scaleTo(ClusterResources target) { return new Advice(Optional.of(target), true); } + + } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index c0fd7df9b2e..67758dc13b2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -66,11 +66,14 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId)); Optional<Cluster> cluster = application.cluster(clusterId); if (cluster.isEmpty()) return; - Optional<ClusterResources> target = autoscaler.autoscale(cluster.get(), clusterNodes); - if ( ! cluster.get().targetResources().equals(target)) { // New target: Log and try to deploy now - applications().put(application.with(cluster.get().withTarget(target)), deployment.applicationLock().get()); - if (target.isPresent()) { - logAutoscaling(target.get(), applicationId, clusterId, clusterNodes); + var advice = autoscaler.autoscale(cluster.get(), clusterNodes); + + if (advice.isEmpty()) return; + + if ( ! cluster.get().targetResources().equals(advice.target())) { + applications().put(application.with(cluster.get().withTarget(advice.target())), deployment.applicationLock().get()); + if (advice.target().isPresent()) { + logAutoscaling(advice.target().get(), applicationId, clusterId, clusterNodes); deployment.activate(); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java index 9ef5a841a7a..3546c8d8afb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java @@ -65,10 +65,11 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { Application application = applications().get(applicationId).orElse(new Application(applicationId)); Optional<Cluster> cluster = application.cluster(clusterId); if (cluster.isEmpty()) return true; - Optional<ClusterResources> suggestion = autoscaler.suggest(cluster.get(), clusterNodes); + var suggestion = autoscaler.suggest(cluster.get(), clusterNodes); + if (suggestion.isEmpty()) return false; // Wait only a short time for the lock to avoid interfering with change deployments try (Mutex lock = nodeRepository().lock(applicationId, Duration.ofSeconds(1))) { - applications().get(applicationId).ifPresent(a -> storeSuggestion(suggestion, clusterId, a, lock)); + applications().get(applicationId).ifPresent(a -> storeSuggestion(suggestion.target(), clusterId, a, lock)); return true; } catch (ApplicationLockException e) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 33e2ec88d0a..9c0c67f7aed 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -52,7 +52,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 60, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.3, 28.6, 28.6, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); tester.deploy(application1, cluster1, scaledResources); assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); @@ -63,12 +63,12 @@ public class AutoscalingTest { tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); tester.addCpuMeasurements(0.19f, 1f, 100, application1); - assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), min, max)); + assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), min, max).target()); tester.addCpuMeasurements(0.1f, 1f, 120, application1); tester.assertResources("Scaling down to minimum since usage has gone down significantly", 14, 1, 1.0, 30.8, 30.8, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } /** We prefer fewer nodes for container clusters as (we assume) they all use the same disk and memory */ @@ -88,7 +88,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high", 7, 1, 2.5, 80.0, 80.0, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -96,7 +96,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.1f, 1f, 120, application1); tester.assertResources("Scaling down since cpu usage has gone down", 4, 1, 2.5, 68.6, 68.6, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } @Test @@ -120,7 +120,7 @@ public class AutoscalingTest { new NodeResources(100, 1000, 1000, 1, NodeResources.DiskSpeed.any)); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.3, 28.6, 28.6, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); assertEquals("Disk speed from min/max is used", NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed()); tester.deploy(application1, cluster1, scaledResources); @@ -144,7 +144,7 @@ public class AutoscalingTest { tester.addMeasurements(0.25f, 0.95f, 0.95f, 0, 120, application1); tester.assertResources("Scaling up to limit since resource usage is too high", 6, 1, 2.4, 78.0, 79.0, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } @Test @@ -162,7 +162,7 @@ public class AutoscalingTest { tester.addMeasurements(0.05f, 0.05f, 0.05f, 0, 120, application1); tester.assertResources("Scaling down to limit since resource usage is low", 4, 1, 1.8, 7.4, 10.0, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } @Test @@ -180,7 +180,7 @@ public class AutoscalingTest { tester.addCpuMeasurements( 0.3f, 1f, 240, application1); tester.assertResources("Scaling up since resource usage is too high", 6, 6, 3.6, 8.0, 10.0, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } @Test @@ -214,7 +214,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high", 7, 1, 2.5, 80.0, 80.0, - tester.suggest(application1, cluster1.id(), min, max)); + tester.suggest(application1, cluster1.id(), min, max).target()); } @Test @@ -232,7 +232,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high", 7, 7, 2.5, 80.0, 80.0, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } @Test @@ -250,7 +250,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper", 8, 1, 2.7, 83.3, 83.3, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } @Test @@ -268,7 +268,7 @@ public class AutoscalingTest { tester.addMemMeasurements(1.0f, 1f, 1000, application1); tester.assertResources("Increase group size to reduce memory load", 8, 2, 12.9, 89.3, 62.5, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } @Test @@ -286,7 +286,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", 6, 1, 2.8, 4.0, 95.0, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } @Test @@ -305,7 +305,7 @@ public class AutoscalingTest { tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1); tester.assertResources("Scaling up", 4, 1, 7.0, 20, 200, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } { // 15 Gb memory tax @@ -318,7 +318,7 @@ public class AutoscalingTest { tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1); tester.assertResources("Scaling up", 4, 1, 7.0, 34, 200, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } } @@ -347,7 +347,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.9f, 0.6f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", 8, 1, 3, 83, 34.3, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -355,7 +355,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.3f, 0.6f, 1000, application1); tester.assertResources("Scaling down since resource usage has gone down", 5, 1, 3, 83, 36, - tester.autoscale(application1, cluster1.id(), min, max)); + tester.autoscale(application1, cluster1.id(), min, max).target()); } /** diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 61e7bdfc546..cee9ad6965e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -184,7 +184,7 @@ class AutoscalingTester { } } - public Optional<ClusterResources> autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId, + public Autoscaler.Advice autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId, ClusterResources min, ClusterResources max) { Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId)) .withCluster(clusterId, false, min, max); @@ -195,7 +195,7 @@ class AutoscalingTester { nodeRepository().getNodes(applicationId, Node.State.active)); } - public Optional<ClusterResources> suggest(ApplicationId applicationId, ClusterSpec.Id clusterId, + public Autoscaler.Advice suggest(ApplicationId applicationId, ClusterSpec.Id clusterId, ClusterResources min, ClusterResources max) { Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId)) .withCluster(clusterId, false, min, max); |