diff options
author | Jon Bratseth <bratseth@vespa.ai> | 2023-08-15 13:06:33 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@vespa.ai> | 2023-08-15 13:06:33 +0200 |
commit | 9c42a3a1bce24444801a5aa62f1650772fdf2209 (patch) | |
tree | 91d7134fd65e9a8efd346f49e404f18d4dabe2bd /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale | |
parent | 69e37f2811297b7f6fbaa6ee1995b8880ba448fc (diff) |
No functional changes only: Wrap current resources in model
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale')
6 files changed, 69 insertions, 68 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index a4e1a62f5b9..69f844e5f5c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -130,14 +130,13 @@ public class AllocatableClusterResources { return (vcpuFulfilment + memoryGbFulfilment + diskGbFulfilment) / 3; } - public boolean preferableTo(AllocatableClusterResources other, - AllocatableClusterResources current, ClusterModel clusterModel) { + public boolean preferableTo(AllocatableClusterResources other, ClusterModel model) { if (other.fulfilment() < 1 || this.fulfilment() < 1) // always fulfil as much as possible return this.fulfilment() > other.fulfilment(); - return this.cost() * toHours(clusterModel.allocationDuration()) + this.costChangingFrom(current, clusterModel) + return this.cost() * toHours(model.allocationDuration()) + this.costChangingFrom(model) < - other.cost() * toHours(clusterModel.allocationDuration()) + other.costChangingFrom(current, clusterModel); + other.cost() * toHours(model.allocationDuration()) + other.costChangingFrom(model); } private double toHours(Duration duration) { @@ -145,8 +144,8 @@ public class AllocatableClusterResources { } /** The estimated cost of changing from the given current resources to this. */ - public double costChangingFrom(AllocatableClusterResources current, ClusterModel clusterModel) { - return new ResourceChange(current, this, clusterModel).cost(); + public double costChangingFrom(ClusterModel model) { + return new ResourceChange(model, this).cost(); } @Override @@ -173,8 +172,7 @@ public class AllocatableClusterResources { ClusterSpec clusterSpec, Limits applicationLimits, List<NodeResources> availableRealHostResources, - AllocatableClusterResources current, - ClusterModel clusterModel, + ClusterModel model, NodeRepository nodeRepository) { var systemLimits = nodeRepository.nodeResourceLimits(); boolean exclusive = nodeRepository.exclusiveAllocation(clusterSpec); @@ -238,12 +236,12 @@ public class AllocatableClusterResources { clusterSpec); if ( ! systemLimits.isWithinAdvertisedDiskLimits(advertisedResources, clusterSpec)) { // TODO: Remove when disk limit is enforced - if (bestDisregardingDiskLimit.isEmpty() || candidate.preferableTo(bestDisregardingDiskLimit.get(), current, clusterModel)) { + if (bestDisregardingDiskLimit.isEmpty() || candidate.preferableTo(bestDisregardingDiskLimit.get(), model)) { bestDisregardingDiskLimit = Optional.of(candidate); } continue; } - if (best.isEmpty() || candidate.preferableTo(best.get(), current, clusterModel)) { + if (best.isEmpty() || candidate.preferableTo(best.get(), model)) { best = Optional.of(candidate); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index deff92628e7..707abd0f4df 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -37,20 +37,19 @@ public class AllocationOptimizer { * fully or partially, within the limits */ public Optional<AllocatableClusterResources> findBestAllocation(Load loadAdjustment, - AllocatableClusterResources current, - ClusterModel clusterModel, + ClusterModel model, Limits limits) { if (limits.isEmpty()) limits = Limits.of(new ClusterResources(minimumNodes, 1, NodeResources.unspecified()), new ClusterResources(maximumNodes, maximumNodes, NodeResources.unspecified()), IntRange.empty()); else - limits = atLeast(minimumNodes, limits).fullySpecified(current.clusterSpec(), nodeRepository, clusterModel.application().id()); + limits = atLeast(minimumNodes, limits).fullySpecified(model.current().clusterSpec(), nodeRepository, model.application().id()); Optional<AllocatableClusterResources> bestAllocation = Optional.empty(); var availableRealHostResources = nodeRepository.zone().cloud().dynamicProvisioning() ? nodeRepository.flavors().getFlavors().stream().map(flavor -> flavor.resources()).toList() : nodeRepository.nodes().list().hosts().stream().map(host -> host.flavor().resources()) - .map(hostResources -> maxResourcesOf(hostResources, clusterModel)) + .map(hostResources -> maxResourcesOf(hostResources, model)) .toList(); for (int groups = limits.min().groups(); groups <= limits.max().groups(); groups++) { for (int nodes = limits.min().nodes(); nodes <= limits.max().nodes(); nodes++) { @@ -59,17 +58,16 @@ public class AllocationOptimizer { var resources = new ClusterResources(nodes, groups, nodeResourcesWith(nodes, groups, - limits, loadAdjustment, current, clusterModel)); + limits, loadAdjustment, model)); var allocatableResources = AllocatableClusterResources.from(resources, - clusterModel.application().id(), - current.clusterSpec(), + model.application().id(), + model.current().clusterSpec(), limits, availableRealHostResources, - current, - clusterModel, + model, nodeRepository); if (allocatableResources.isEmpty()) continue; - if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get(), current, clusterModel)) + if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get(), model)) bestAllocation = allocatableResources; } } @@ -77,8 +75,8 @@ public class AllocationOptimizer { } /** Returns the max resources of a host one node may allocate. */ - private NodeResources maxResourcesOf(NodeResources hostResources, ClusterModel clusterModel) { - if (nodeRepository.exclusiveAllocation(clusterModel.clusterSpec())) return hostResources; + private NodeResources maxResourcesOf(NodeResources hostResources, ClusterModel model) { + if (nodeRepository.exclusiveAllocation(model.clusterSpec())) return hostResources; // static, shared hosts: Allocate at most half of the host cpu to simplify management return hostResources.withVcpu(hostResources.vcpu() / 2); } @@ -91,9 +89,8 @@ public class AllocationOptimizer { int groups, Limits limits, Load loadAdjustment, - AllocatableClusterResources current, - ClusterModel clusterModel) { - var loadWithTarget = clusterModel.loadAdjustmentWith(nodes, groups, loadAdjustment); + ClusterModel model) { + var loadWithTarget = model.loadAdjustmentWith(nodes, groups, loadAdjustment); // Leave some headroom above the ideal allocation to avoid immediately needing to scale back up if (loadAdjustment.cpu() < 1 && (1.0 - loadWithTarget.cpu()) < headroomRequiredToScaleDown) @@ -103,11 +100,11 @@ public class AllocationOptimizer { if (loadAdjustment.disk() < 1 && (1.0 - loadWithTarget.disk()) < headroomRequiredToScaleDown) loadAdjustment = loadAdjustment.withDisk(Math.min(1.0, loadAdjustment.disk() * (1.0 + headroomRequiredToScaleDown))); - loadWithTarget = clusterModel.loadAdjustmentWith(nodes, groups, loadAdjustment); + loadWithTarget = model.loadAdjustmentWith(nodes, groups, loadAdjustment); - var scaled = loadWithTarget.scaled(current.realResources().nodeResources()); + var scaled = loadWithTarget.scaled(model.current().realResources().nodeResources()); var nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified() - ? current.advertisedResources().nodeResources() + ? model.current().advertisedResources().nodeResources() : limits.min().nodeResources(); // min=max for non-scaled return nonScaled.withVcpu(scaled.vcpu()).withMemoryGb(scaled.memoryGb()).withDiskGb(scaled.diskGb()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 32b59319a88..091c15dea69 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -54,40 +54,40 @@ public class Autoscaler { } private Autoscaling autoscale(Application application, Cluster cluster, NodeList clusterNodes, Limits limits) { - ClusterModel clusterModel = new ClusterModel(nodeRepository, - application, - clusterNodes.not().retired().clusterSpec(), - cluster, - clusterNodes, - nodeRepository.metricsDb(), - nodeRepository.clock()); - if (clusterModel.isEmpty()) return Autoscaling.empty(); + var model = new ClusterModel(nodeRepository, + application, + clusterNodes.not().retired().clusterSpec(), + cluster, + clusterNodes, + new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository), + nodeRepository.metricsDb(), + nodeRepository.clock()); + if (model.isEmpty()) return Autoscaling.empty(); if (! limits.isEmpty() && cluster.minResources().equals(cluster.maxResources())) - return Autoscaling.dontScale(Autoscaling.Status.unavailable, "Autoscaling is not enabled", clusterModel); + return Autoscaling.dontScale(Autoscaling.Status.unavailable, "Autoscaling is not enabled", model); - if ( ! clusterModel.isStable(nodeRepository)) - return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", clusterModel); + if ( ! model.isStable(nodeRepository)) + return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", model); - var current = new AllocatableClusterResources(clusterNodes.not().retired(), nodeRepository); - var loadAdjustment = clusterModel.loadAdjustment(); + var loadAdjustment = model.loadAdjustment(); // Ensure we only scale down if we'll have enough headroom to not scale up again given a small load increase - var target = allocationOptimizer.findBestAllocation(loadAdjustment, current, clusterModel, limits); + var target = allocationOptimizer.findBestAllocation(loadAdjustment, model, limits); if (target.isEmpty()) - return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", clusterModel); + return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", model); - if (! worthRescaling(current.realResources(), target.get().realResources())) { + if (! worthRescaling(model.current().realResources(), target.get().realResources())) { if (target.get().fulfilment() < 0.9999999) - return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", clusterModel); - else if ( ! clusterModel.safeToScaleDown() && clusterModel.idealLoad().any(v -> v < 1.0)) - return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", clusterModel); + return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", model); + else if ( ! model.safeToScaleDown() && model.idealLoad().any(v -> v < 1.0)) + return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", model); else - return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled (within configured limits)", clusterModel); + return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled (within configured limits)", model); } - return Autoscaling.scaleTo(target.get().advertisedResources(), clusterModel); + return Autoscaling.scaleTo(target.get().advertisedResources(), model); } /** Returns true if it is worthwhile to make the given resource change, false if it is too insignificant */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java index 0c86108b36c..fad280d6c29 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java @@ -120,25 +120,25 @@ public class Autoscaling { } /** Creates an autoscaling conclusion which does not change the current allocation for a specified reason. */ - public static Autoscaling dontScale(Status status, String description, ClusterModel clusterModel) { + public static Autoscaling dontScale(Status status, String description, ClusterModel model) { return new Autoscaling(status, description, Optional.empty(), - clusterModel.at(), - clusterModel.peakLoad(), - clusterModel.idealLoad(), - clusterModel.metrics()); + model.at(), + model.peakLoad(), + model.idealLoad(), + model.metrics()); } /** Creates an autoscaling conclusion to scale. */ - public static Autoscaling scaleTo(ClusterResources target, ClusterModel clusterModel) { + public static Autoscaling scaleTo(ClusterResources target, ClusterModel model) { return new Autoscaling(Status.rescaling, "Rescaling initiated due to load changes", Optional.of(target), - clusterModel.at(), - clusterModel.peakLoad(), - clusterModel.idealLoad(), - clusterModel.metrics()); + model.at(), + model.peakLoad(), + model.idealLoad(), + model.metrics()); } public enum Status { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 0bb8a4c3222..27352376be1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -50,6 +50,7 @@ public class ClusterModel { private final Application application; private final ClusterSpec clusterSpec; private final Cluster cluster; + private final AllocatableClusterResources current; private final CpuModel cpu = new CpuModel(); private final MemoryModel memory = new MemoryModel(); @@ -78,6 +79,7 @@ public class ClusterModel { ClusterSpec clusterSpec, Cluster cluster, NodeList clusterNodes, + AllocatableClusterResources current, MetricsDb metricsDb, Clock clock) { this.nodeRepository = nodeRepository; @@ -85,6 +87,7 @@ public class ClusterModel { this.clusterSpec = clusterSpec; this.cluster = cluster; this.nodes = clusterNodes; + this.current = current; this.clock = clock; this.scalingDuration = cluster.scalingDuration(clusterSpec); this.allocationDuration = cluster.allocationDuration(clusterSpec); @@ -97,6 +100,7 @@ public class ClusterModel { Application application, ClusterSpec clusterSpec, Cluster cluster, + AllocatableClusterResources current, Clock clock, Duration scalingDuration, Duration allocationDuration, @@ -107,6 +111,7 @@ public class ClusterModel { this.clusterSpec = clusterSpec; this.cluster = cluster; this.nodes = NodeList.of(); + this.current = current; this.clock = clock; this.scalingDuration = scalingDuration; @@ -118,6 +123,7 @@ public class ClusterModel { public Application application() { return application; } public ClusterSpec clusterSpec() { return clusterSpec; } + public AllocatableClusterResources current() { return current; } private ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; } private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java index 3073c22aea7..cd3a052ee49 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java @@ -14,23 +14,23 @@ import java.time.Duration; public class ResourceChange { private final AllocatableClusterResources from, to; - private final ClusterModel clusterModel; + private final ClusterModel model; - public ResourceChange(AllocatableClusterResources from, AllocatableClusterResources to, ClusterModel clusterModel) { - this.from = from; + public ResourceChange(ClusterModel model, AllocatableClusterResources to) { + this.from = model.current(); this.to = to; - this.clusterModel = clusterModel; + this.model = model; } /** Returns the estimated total cost of this resource change (coming in addition to the "to" resource cost). */ public double cost() { - if (requiresRedistribution()) return toHours(clusterModel.redistributionDuration()) * from.cost(); - if (requiresNodeReplacement()) return toHours(clusterModel.nodeReplacementDuration()) * from.cost(); + if (requiresRedistribution()) return toHours(model.redistributionDuration()) * from.cost(); + if (requiresNodeReplacement()) return toHours(model.nodeReplacementDuration()) * from.cost(); return 0; } private boolean requiresRedistribution() { - if ( ! clusterModel.clusterSpec().type().isContent()) return false; + if ( ! model.clusterSpec().type().isContent()) return false; if (from.nodes() != to.nodes()) return true; if (from.groups() != to.groups()) return true; if (requiresNodeReplacement()) return true; @@ -42,7 +42,7 @@ public class ResourceChange { var fromNodes = from.advertisedResources().nodeResources(); var toNodes = to.advertisedResources().nodeResources(); - if (clusterModel.isExclusive()) { + if (model.isExclusive()) { return ! fromNodes.equals(toNodes); } else { @@ -59,7 +59,7 @@ public class ResourceChange { private boolean canInPlaceResize() { return canInPlaceResize(from.nodes(), from.advertisedResources().nodeResources(), to.nodes(), to.advertisedResources().nodeResources(), - clusterModel.clusterSpec().type(), clusterModel.isExclusive(), from.groups() != to.groups()); + model.clusterSpec().type(), model.isExclusive(), from.groups() != to.groups()); } public static boolean canInPlaceResize(int fromCount, NodeResources fromResources, |