aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-02-18 14:25:23 +0100
committerJon Bratseth <bratseth@gmail.com>2023-02-18 14:25:23 +0100
commit370b8742744344fc38dabff4ae7bae65c9316dfc (patch)
tree55282fdee06ee14420de79aa12ef02093116312b /node-repository/src/main/java/com
parent21e0c4913dd0bc88dfec3016d4552f57fc0e7c4b (diff)
Predict best case overhead during autoscaling
Predicting worst case has the unavoidable consequence that we will conclude we cannot fulfill requested resources in the case where there are no resource ranges (only node count ranges), or where they are too narrow, and thus refrain from scaling down when we should. Instead, mostly predict the best case and let the ideal < 1 headroom absorb the deficiency in what we are actually allocated.
Diffstat (limited to 'node-repository/src/main/java/com')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java66
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java19
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java4
4 files changed, 63 insertions, 30 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
index 2ab0ac9d0d3..a2ef76e84d0 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
@@ -38,7 +38,7 @@ public class AllocatableClusterResources {
NodeRepository nodeRepository) {
this.nodes = requested.nodes();
this.groups = requested.groups();
- this.realResources = nodeRepository.resourcesCalculator().requestToReal(requested.nodeResources(), nodeRepository.exclusiveAllocation(clusterSpec));
+ this.realResources = nodeRepository.resourcesCalculator().requestToReal(requested.nodeResources(), nodeRepository.exclusiveAllocation(clusterSpec), false);
this.advertisedResources = requested.nodeResources();
this.clusterSpec = clusterSpec;
this.fulfilment = 1;
@@ -165,24 +165,32 @@ public class AllocatableClusterResources {
boolean exclusive = nodeRepository.exclusiveAllocation(clusterSpec);
if (! exclusive) {
// We decide resources: Add overhead to what we'll request (advertised) to make sure real becomes (at least) cappedNodeResources
- var advertisedResources = nodeRepository.resourcesCalculator().realToRequest(wantedResources.nodeResources(), exclusive);
- advertisedResources = systemLimits.enlargeToLegal(advertisedResources, applicationId, clusterSpec, exclusive); // Ask for something legal
- advertisedResources = applicationLimits.cap(advertisedResources); // Overrides other conditions, even if it will then fail
- var realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive); // What we'll really get
- if ( ! systemLimits.isWithinRealLimits(realResources, applicationId, clusterSpec) && advertisedResources.storageType() == NodeResources.StorageType.any) {
- // Since local disk resreves some of the storage, try to constrain to remote disk
- advertisedResources = advertisedResources.with(NodeResources.StorageType.remote);
- realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive);
+ var allocatableResources = calculateAllocatableResources(wantedResources,
+ nodeRepository,
+ applicationId,
+ clusterSpec,
+ applicationLimits,
+ exclusive,
+ true);
+
+ var worstCaseRealResources = nodeRepository.resourcesCalculator().requestToReal(allocatableResources.advertisedResources,
+ exclusive,
+ false);
+ if ( ! systemLimits.isWithinRealLimits(worstCaseRealResources, applicationId, clusterSpec)) {
+ allocatableResources = calculateAllocatableResources(wantedResources,
+ nodeRepository,
+ applicationId,
+ clusterSpec,
+ applicationLimits,
+ exclusive,
+ false);
}
- if ( ! systemLimits.isWithinRealLimits(realResources, applicationId, clusterSpec))
+
+ if ( ! systemLimits.isWithinRealLimits(allocatableResources.realResources, applicationId, clusterSpec))
return Optional.empty();
- if (anySatisfies(realResources, availableRealHostResources))
- return Optional.of(new AllocatableClusterResources(wantedResources.with(realResources),
- advertisedResources,
- wantedResources,
- clusterSpec));
- else
+ if ( ! anySatisfies(allocatableResources.realResources, availableRealHostResources))
return Optional.empty();
+ return Optional.of(allocatableResources);
}
else { // Return the cheapest flavor satisfying the requested resources, if any
NodeResources cappedWantedResources = applicationLimits.cap(wantedResources.nodeResources());
@@ -190,7 +198,7 @@ public class AllocatableClusterResources {
for (Flavor flavor : nodeRepository.flavors().getFlavors()) {
// Flavor decide resources: Real resources are the worst case real resources we'll get if we ask for these advertised resources
NodeResources advertisedResources = nodeRepository.resourcesCalculator().advertisedResourcesOf(flavor);
- NodeResources realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive);
+ NodeResources realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive, false);
// Adjust where we don't need exact match to the flavor
if (flavor.resources().storageType() == NodeResources.StorageType.remote) {
@@ -217,6 +225,30 @@ public class AllocatableClusterResources {
}
}
+ private static AllocatableClusterResources calculateAllocatableResources(ClusterResources wantedResources,
+ NodeRepository nodeRepository,
+ ApplicationId applicationId,
+ ClusterSpec clusterSpec,
+ Limits applicationLimits,
+ boolean exclusive,
+ boolean bestCase) {
+ var systemLimits = new NodeResourceLimits(nodeRepository);
+ var advertisedResources = nodeRepository.resourcesCalculator().realToRequest(wantedResources.nodeResources(), exclusive, bestCase);
+ advertisedResources = systemLimits.enlargeToLegal(advertisedResources, applicationId, clusterSpec, exclusive); // Ask for something legal
+ advertisedResources = applicationLimits.cap(advertisedResources); // Overrides other conditions, even if it will then fail
+ var realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive, bestCase); // What we'll really get
+ if ( ! systemLimits.isWithinRealLimits(realResources, applicationId, clusterSpec)
+ && advertisedResources.storageType() == NodeResources.StorageType.any) {
+ // Since local disk reserves some of the storage, try to constrain to remote disk
+ advertisedResources = advertisedResources.with(NodeResources.StorageType.remote);
+ realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive, bestCase);
+ }
+ return new AllocatableClusterResources(wantedResources.with(realResources),
+ advertisedResources,
+ wantedResources,
+ clusterSpec);
+ }
+
/** Returns true if the given resources could be allocated on any of the given host flavors */
private static boolean anySatisfies(NodeResources realResources, List<NodeResources> availableRealHostResources) {
return availableRealHostResources.stream().anyMatch(realHostResources -> realHostResources.satisfies(realResources));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
index b3372aee356..b56e8d1b247 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
@@ -32,11 +32,10 @@ public class AllocationOptimizer {
* @return the best allocation, if there are any possible legal allocations, fulfilling the target
* fully or partially, within the limits
*/
- public Optional<AllocatableClusterResources> findBestAllocation(Load targetLoad,
+ public Optional<AllocatableClusterResources> findBestAllocation(Load loadAdjustment,
AllocatableClusterResources current,
ClusterModel clusterModel,
Limits limits) {
- int minimumNodes = AllocationOptimizer.minimumNodes;
if (limits.isEmpty())
limits = Limits.of(new ClusterResources(minimumNodes, 1, NodeResources.unspecified()),
new ClusterResources(maximumNodes, maximumNodes, NodeResources.unspecified()),
@@ -53,14 +52,16 @@ public class AllocationOptimizer {
for (int nodes = limits.min().nodes(); nodes <= limits.max().nodes(); nodes++) {
if (nodes % groups != 0) continue;
if ( ! limits.groupSize().includes(nodes / groups)) continue;
-
var resources = new ClusterResources(nodes,
groups,
nodeResourcesWith(nodes, groups,
- limits, targetLoad, current, clusterModel));
- var allocatableResources = AllocatableClusterResources.from(resources, clusterModel.application().id(),
- current.clusterSpec(), limits,
- availableRealHostResources, nodeRepository);
+ limits, loadAdjustment, current, clusterModel));
+ var allocatableResources = AllocatableClusterResources.from(resources,
+ clusterModel.application().id(),
+ current.clusterSpec(),
+ limits,
+ availableRealHostResources,
+ nodeRepository);
if (allocatableResources.isEmpty()) continue;
if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) {
bestAllocation = allocatableResources;
@@ -84,10 +85,10 @@ public class AllocationOptimizer {
private NodeResources nodeResourcesWith(int nodes,
int groups,
Limits limits,
- Load targetLoad,
+ Load loadAdjustment,
AllocatableClusterResources current,
ClusterModel clusterModel) {
- var scaled = targetLoad // redundancy aware target relative to current load
+ var scaled = loadAdjustment // redundancy aware target relative to current load
.multiply(clusterModel.loadWith(nodes, groups)) // redundancy aware adjustment with these counts
.divide(clusterModel.redundancyAdjustment()) // correct for double redundancy adjustment
.scaled(current.realResources().nodeResources());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java
index e7332f6474d..b7e7ac7ee4b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java
@@ -41,10 +41,10 @@ public class EmptyProvisionServiceProvider implements ProvisionServiceProvider {
public NodeResources advertisedResourcesOf(Flavor flavor) { return flavor.resources(); }
@Override
- public NodeResources requestToReal(NodeResources resources, boolean exclusive) { return resources; }
+ public NodeResources requestToReal(NodeResources resources, boolean exclusive, boolean bestCase) { return resources; }
@Override
- public NodeResources realToRequest(NodeResources resources, boolean exclusive) { return resources; }
+ public NodeResources realToRequest(NodeResources resources, boolean exclusive, boolean bestCase) { return resources; }
@Override
public long reservedDiskSpaceInBase2Gb(NodeType nodeType, boolean sharedHost) { return 0; }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java
index 0f186337b6d..90cdf932f17 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java
@@ -28,13 +28,13 @@ public interface HostResourcesCalculator {
* Used with exclusive hosts:
* Returns the lowest possible real resources we'll get if requesting the given advertised resources
*/
- NodeResources requestToReal(NodeResources advertisedResources, boolean exclusiveAllocation);
+ NodeResources requestToReal(NodeResources advertisedResources, boolean exclusiveAllocation, boolean bestCase);
/**
* Used with shared hosts:
* Returns the advertised resources we need to request to be sure to get at least the given real resources.
*/
- NodeResources realToRequest(NodeResources realResources, boolean exclusiveAllocation);
+ NodeResources realToRequest(NodeResources realResources, boolean exclusiveAllocation, boolean bestCase);
/**
* Returns the disk space to reserve in base2 GB. This space is reserved for use by the host, e.g. for storing