diff options
author | Ola Aunronning <olaa@yahooinc.com> | 2024-01-12 16:22:01 +0100 |
---|---|---|
committer | Ola Aunronning <olaa@yahooinc.com> | 2024-01-12 16:22:01 +0100 |
commit | e6f30b96ad5e1d32be4aa29db4c526f5ece50625 (patch) | |
tree | 9f5a1c9503d8e940a7141aa3dbdda87d2141cb37 /node-repository/src/main/java/com/yahoo/vespa | |
parent | f2ded8dd8ebfc2c567fffea98b5e750ab1ed0da1 (diff) |
Store multiple resource suggestions
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa')
7 files changed, 140 insertions, 41 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index 606605ed1e4..4134ea337ab 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -34,6 +34,7 @@ public class Cluster { private final IntRange groupSize; private final boolean required; private final Autoscaling suggested; + private final List<Autoscaling> suggestions; private final Autoscaling target; private final ClusterInfo clusterInfo; private final BcpGroupInfo bcpGroupInfo; @@ -48,6 +49,7 @@ public class Cluster { IntRange groupSize, boolean required, Autoscaling suggested, + List<Autoscaling> suggestions, Autoscaling target, ClusterInfo clusterInfo, BcpGroupInfo bcpGroupInfo, @@ -59,6 +61,7 @@ public class Cluster { this.groupSize = Objects.requireNonNull(groupSize); this.required = required; this.suggested = Objects.requireNonNull(suggested); + this.suggestions = Objects.requireNonNull(suggestions); Objects.requireNonNull(target); if (target.resources().isPresent() && ! target.resources().get().isWithin(minResources, maxResources)) this.target = target.withResources(Optional.empty()); // Delete illegal target @@ -102,12 +105,21 @@ public class Cluster { */ public Autoscaling suggested() { return suggested; } + /** + * The list of suggested resources, which may or may not be within the min and max limits, + * or empty if there is currently no recorded suggestion. + * List is sorted by preference + */ + public List<Autoscaling> suggestions() { return suggestions; } + /** Returns true if there is a current suggestion and we should actually make this suggestion to users. */ public boolean shouldSuggestResources(ClusterResources currentResources) { - if (suggested.resources().isEmpty()) return false; - if (suggested.resources().get().isWithin(min, max)) return false; - if ( ! Autoscaler.worthRescaling(currentResources, suggested.resources().get())) return false; - return true; + if (suggestions.isEmpty()) return false; + return suggestions.stream().noneMatch(suggestion -> + suggestion.resources().isEmpty() + || suggestion.resources().get().isWithin(min, max) + || ! Autoscaler.worthRescaling(currentResources, suggestion.resources().get()) + ); } public ClusterInfo clusterInfo() { return clusterInfo; } @@ -131,19 +143,23 @@ public class Cluster { public Cluster withConfiguration(boolean exclusive, Capacity capacity) { return new Cluster(id, exclusive, capacity.minResources(), capacity.maxResources(), capacity.groupSize(), capacity.isRequired(), - suggested, target, capacity.clusterInfo(), bcpGroupInfo, scalingEvents); + suggested, suggestions, target, capacity.clusterInfo(), bcpGroupInfo, scalingEvents); } public Cluster withSuggested(Autoscaling suggested) { - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, clusterInfo, bcpGroupInfo, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, suggestions, target, clusterInfo, bcpGroupInfo, scalingEvents); + } + + public Cluster withSuggestions(List<Autoscaling> suggestions) { + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, suggestions, target, clusterInfo, bcpGroupInfo, scalingEvents); } public Cluster withTarget(Autoscaling target) { - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, clusterInfo, bcpGroupInfo, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, suggestions, target, clusterInfo, bcpGroupInfo, scalingEvents); } public Cluster with(BcpGroupInfo bcpGroupInfo) { - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, clusterInfo, bcpGroupInfo, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, suggestions, target, clusterInfo, bcpGroupInfo, scalingEvents); } /** Add or update (based on "at" time) a scaling event */ @@ -157,7 +173,7 @@ public class Cluster { scalingEvents.add(scalingEvent); prune(scalingEvents); - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, clusterInfo, bcpGroupInfo, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, suggestions, target, clusterInfo, bcpGroupInfo, scalingEvents); } @Override @@ -189,7 +205,7 @@ public class Cluster { public static Cluster create(ClusterSpec.Id id, boolean exclusive, Capacity requested) { return new Cluster(id, exclusive, requested.minResources(), requested.maxResources(), requested.groupSize(), requested.isRequired(), - Autoscaling.empty(), Autoscaling.empty(), requested.clusterInfo(), BcpGroupInfo.empty(), List.of()); + Autoscaling.empty(), List.of(), Autoscaling.empty(), requested.clusterInfo(), BcpGroupInfo.empty(), List.of()); } /** The predicted time it will take to rescale this cluster. */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index ff30f9d6163..1c160beadf4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -6,7 +6,10 @@ import com.yahoo.config.provision.IntRange; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.NodeRepository; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import static com.yahoo.vespa.hosted.provision.autoscale.Autoscaler.headroomRequiredToScaleDown; @@ -37,13 +40,26 @@ public class AllocationOptimizer { public Optional<AllocatableResources> findBestAllocation(Load loadAdjustment, ClusterModel model, Limits limits) { + return findBestAllocations(loadAdjustment, model, limits).stream().findFirst(); + } + + /** + * Searches the space of possible allocations given a target relative load + * and (optionally) cluster limits and returns the best alternative. + * + * @return the best allocations sorted by preference, if there are any possible legal allocations, fulfilling the target + * fully or partially, within the limits + */ + public List<AllocatableResources> findBestAllocations(Load loadAdjustment, + ClusterModel model, + Limits limits) { if (limits.isEmpty()) limits = Limits.of(new ClusterResources(minimumNodes, 1, NodeResources.unspecified()), new ClusterResources(maximumNodes, maximumNodes, NodeResources.unspecified()), IntRange.empty()); else limits = atLeast(minimumNodes, limits).fullySpecified(model.current().clusterSpec(), nodeRepository, model.application().id()); - Optional<AllocatableResources> bestAllocation = Optional.empty(); + List<AllocatableResources> bestAllocations = new ArrayList<>(); var availableRealHostResources = nodeRepository.zone().cloud().dynamicProvisioning() ? nodeRepository.flavors().getFlavors().stream().map(flavor -> flavor.resources()).toList() : nodeRepository.nodes().list().hosts().stream().map(host -> host.flavor().resources()) @@ -65,11 +81,20 @@ public class AllocationOptimizer { model, nodeRepository); if (allocatableResources.isEmpty()) continue; - if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get(), model)) - bestAllocation = allocatableResources; + bestAllocations.add(allocatableResources.get()); } } - return bestAllocation; + return bestAllocations.stream() + .sorted((one, other) -> { + if (one.preferableTo(other, model)) + return -1; + else if (other.preferableTo(one, model)) { + return 1; + } + return 0; + }) + .limit(3) + .toList(); } /** Returns the max resources of a host one node may allocate. */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 738abddc31a..40819e709de 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -9,6 +9,7 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling.Status; import java.time.Duration; +import java.util.List; /** * The autoscaler gives advice about what resources should be allocated to a cluster based on observed behavior. @@ -39,8 +40,14 @@ public class Autoscaler { * @param clusterNodes the list of all the active nodes in a cluster * @return scaling advice for this cluster */ - public Autoscaling suggest(Application application, Cluster cluster, NodeList clusterNodes) { - return autoscale(application, cluster, clusterNodes, Limits.empty()); + public List<Autoscaling> suggest(Application application, Cluster cluster, NodeList clusterNodes) { + var model = model(application, cluster, clusterNodes); + if (model.isEmpty() || ! model.isStable(nodeRepository)) return List.of(); + + var targets = allocationOptimizer.findBestAllocations(model.loadAdjustment(), model, Limits.empty()); + return targets.stream() + .map(target -> toAutoscaling(target, model)) + .toList(); } /** @@ -50,18 +57,8 @@ public class Autoscaler { * @return scaling advice for this cluster */ public Autoscaling autoscale(Application application, Cluster cluster, NodeList clusterNodes) { - return autoscale(application, cluster, clusterNodes, Limits.of(cluster)); - } - - private Autoscaling autoscale(Application application, Cluster cluster, NodeList clusterNodes, Limits limits) { - var model = new ClusterModel(nodeRepository, - application, - clusterNodes.not().retired().clusterSpec(), - cluster, - clusterNodes, - new AllocatableResources(clusterNodes.not().retired(), nodeRepository), - nodeRepository.metricsDb(), - nodeRepository.clock()); + var limits = Limits.of(cluster); + var model = model(application, cluster, clusterNodes); if (model.isEmpty()) return Autoscaling.empty(); if (! limits.isEmpty() && cluster.minResources().equals(cluster.maxResources())) @@ -78,18 +75,33 @@ public class Autoscaler { if (target.isEmpty()) return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", model); - if (target.get().nodes() == 1) + return toAutoscaling(target.get(), model); + } + + private ClusterModel model(Application application, Cluster cluster, NodeList clusterNodes) { + return new ClusterModel(nodeRepository, + application, + clusterNodes.not().retired().clusterSpec(), + cluster, + clusterNodes, + new AllocatableResources(clusterNodes.not().retired(), nodeRepository), + nodeRepository.metricsDb(), + nodeRepository.clock()); + } + + private Autoscaling toAutoscaling(AllocatableResources target, ClusterModel model) { + if (target.nodes() == 1) return Autoscaling.dontScale(Status.unavailable, "Autoscaling is disabled in single node clusters", model); - if (! worthRescaling(model.current().realResources(), target.get().realResources())) { - if (target.get().fulfilment() < 0.9999999) + if (! worthRescaling(model.current().realResources(), target.realResources())) { + if (target.fulfilment() < 0.9999999) return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", model); else if ( ! model.safeToScaleDown() && model.idealLoad().any(v -> v < 1.0)) return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", model); else return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled (within configured limits)", model); } - return Autoscaling.scaleTo(target.get().advertisedResources(), model); + return Autoscaling.scaleTo(target.advertisedResources(), model); } /** Returns true if it is worthwhile to make the given resource change, false if it is too insignificant */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java index fd93d202795..c2199de247c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; import java.time.Duration; +import java.util.List; import java.util.Map; import java.util.Optional; @@ -64,9 +65,9 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { Optional<Cluster> cluster = application.cluster(clusterId); if (cluster.isEmpty()) return true; var suggestion = autoscaler.suggest(application, cluster.get(), clusterNodes); - if (suggestion.status() == Autoscaling.Status.waiting) return true; - if ( ! shouldUpdateSuggestion(cluster.get().suggested(), suggestion)) return true; + if ( ! shouldUpdateSuggestion(cluster.get().suggestions(), suggestion)) + return true; // Wait only a short time for the lock to avoid interfering with change deployments try (Mutex lock = nodeRepository().applications().lock(applicationId, Duration.ofSeconds(1))) { applications().get(applicationId).ifPresent(a -> updateSuggestion(suggestion, clusterId, a, lock)); @@ -77,19 +78,28 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { } } - private boolean shouldUpdateSuggestion(Autoscaling currentSuggestion, Autoscaling newSuggestion) { - return currentSuggestion.resources().isEmpty() - || currentSuggestion.at().isBefore(nodeRepository().clock().instant().minus(Duration.ofDays(7))) - || (newSuggestion.resources().isPresent() && isHigher(newSuggestion.resources().get(), currentSuggestion.resources().get())); + private boolean shouldUpdateSuggestion(List<Autoscaling> currentSuggestions, List<Autoscaling> newSuggestions) { + // Only compare previous best suggestion with current best suggestion + var currentSuggestion = currentSuggestions.stream().findFirst(); + var newSuggestion = newSuggestions.stream().findFirst(); + + if (currentSuggestion.isEmpty()) return true; + if (newSuggestion.isEmpty()) return false; + + return newSuggestion.get().status() != Autoscaling.Status.waiting + && (currentSuggestion.get().resources().isEmpty() + || currentSuggestion.get().at().isBefore(nodeRepository().clock().instant().minus(Duration.ofDays(7))) + || (newSuggestion.get().resources().isPresent() && isHigher(newSuggestion.get().resources().get(), currentSuggestion.get().resources().get()))); } - private void updateSuggestion(Autoscaling autoscaling, + private void updateSuggestion(List<Autoscaling> suggestions, ClusterSpec.Id clusterId, Application application, Mutex lock) { Optional<Cluster> cluster = application.cluster(clusterId); if (cluster.isEmpty()) return; - applications().put(application.with(cluster.get().withSuggested(autoscaling)), lock); + applications().put(application.with(cluster.get().withSuggestions(suggestions) + .withSuggested(suggestions.stream().findFirst().orElse(Autoscaling.empty()))), lock); } private boolean isHigher(ClusterResources r1, ClusterResources r2) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java index 6f325700401..7aee0610051 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.IntRange; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Cursor; import com.yahoo.slime.Inspector; import com.yahoo.slime.ObjectTraverser; @@ -56,6 +57,7 @@ public class ApplicationSerializer { private static final String groupSizeKey = "groupSize"; private static final String requiredKey = "required"; private static final String suggestedKey = "suggested"; + private static final String suggestionsKey = "suggestionsKey"; private static final String clusterInfoKey = "clusterInfo"; private static final String bcpDeadlineKey = "bcpDeadline"; private static final String hostTTLKey = "hostTTL"; @@ -140,6 +142,7 @@ public class ApplicationSerializer { toSlime(cluster.groupSize(), clusterObject.setObject(groupSizeKey)); clusterObject.setBool(requiredKey, cluster.required()); toSlime(cluster.suggested(), clusterObject.setObject(suggestedKey)); + toSlime(cluster.suggestions(), clusterObject.setArray(suggestionsKey)); toSlime(cluster.target(), clusterObject.setObject(targetKey)); if (! cluster.clusterInfo().isEmpty()) toSlime(cluster.clusterInfo(), clusterObject.setObject(clusterInfoKey)); @@ -156,12 +159,20 @@ public class ApplicationSerializer { intRangeFromSlime(clusterObject.field(groupSizeKey)), clusterObject.field(requiredKey).asBool(), autoscalingFromSlime(clusterObject.field(suggestedKey)), + suggestionsFromSlime(clusterObject.field(suggestionsKey)), autoscalingFromSlime(clusterObject.field(targetKey)), clusterInfoFromSlime(clusterObject.field(clusterInfoKey)), bcpGroupInfoFromSlime(clusterObject.field(bcpGroupInfoKey)), scalingEventsFromSlime(clusterObject.field(scalingEventsKey))); } + private static void toSlime(List<Autoscaling> suggestions, Cursor suggestionsArray) { + suggestions.forEach(suggestion -> { + var suggestionObject = suggestionsArray.addObject(); + toSlime(suggestion, suggestionObject); + }); + } + private static void toSlime(Autoscaling autoscaling, Cursor autoscalingObject) { autoscalingObject.setString(statusKey, toAutoscalingStatusCode(autoscaling.status())); autoscalingObject.setString(descriptionKey, autoscaling.description()); @@ -227,6 +238,13 @@ public class ApplicationSerializer { metricsObject.field(cpuCostPerQueryKey).asDouble()); } + private static List<Autoscaling> suggestionsFromSlime(Inspector suggestionsObject) { + var suggestions = new ArrayList<Autoscaling>(); + if (!suggestionsObject.valid()) return suggestions; + suggestionsObject.traverse((ArrayTraverser) (id, suggestion) -> suggestions.add(autoscalingFromSlime(suggestion))); + return suggestions; + } + private static Autoscaling autoscalingFromSlime(Inspector autoscalingObject) { if ( ! autoscalingObject.valid()) return Autoscaling.empty(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java index 89853896104..0adddb33e6b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java @@ -66,13 +66,23 @@ public class ApplicationSerializer { if ( ! cluster.groupSize().isEmpty()) toSlime(cluster.groupSize(), clusterObject.setObject("groupSize")); toSlime(currentResources, clusterObject.setObject("current")); - if (cluster.shouldSuggestResources(currentResources)) + if (cluster.shouldSuggestResources(currentResources)) { toSlime(cluster.suggested(), clusterObject.setObject("suggested")); + toSlime(cluster.suggestions(), clusterObject.setArray("suggestions")); + + } toSlime(cluster.target(), clusterObject.setObject("target")); scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray("scalingEvents")); clusterObject.setLong("scalingDuration", cluster.scalingDuration(nodes.clusterSpec()).toMillis()); } + private static void toSlime(List<Autoscaling> suggestions, Cursor autoscalingArray) { + suggestions.forEach(suggestion -> { + var autoscalingObject = autoscalingArray.addObject(); + toSlime(suggestion, autoscalingObject); + }); + } + private static void toSlime(Autoscaling autoscaling, Cursor autoscalingObject) { autoscalingObject.setString("status", autoscaling.status().name()); autoscalingObject.setString("description", autoscaling.description()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java index d3b88997059..e7c9d1079fb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java @@ -233,6 +233,14 @@ public class MockNodeRepository extends NodeRepository { Load.zero(), Load.zero(), Autoscaling.Metrics.zero())); + cluster1 = cluster1.withSuggestions(List.of(new Autoscaling(Autoscaling.Status.unavailable, + "", + Optional.of(new ClusterResources(6, 2, + new NodeResources(3, 20, 100, 1))), + clock().instant(), + Load.zero(), + Load.zero(), + Autoscaling.Metrics.zero()))); cluster1 = cluster1.withTarget(new Autoscaling(Autoscaling.Status.unavailable, "", Optional.of(new ClusterResources(4, 1, |