summaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale
diff options
context:
space:
mode:
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java32
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java47
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java87
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java23
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java78
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java20
7 files changed, 220 insertions, 72 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
index 88a4b492a0b..4a1545cc66c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
@@ -64,6 +64,33 @@ public class AllocatableClusterResources {
this.fulfilment = fulfilment(realResources, idealResources);
}
+ private AllocatableClusterResources(int nodes,
+ int groups,
+ NodeResources realResources,
+ NodeResources advertisedResources,
+ ClusterSpec clusterSpec,
+ double fulfilment) {
+ this.nodes = nodes;
+ this.groups = groups;
+ this.realResources = realResources;
+ this.advertisedResources = advertisedResources;
+ this.clusterSpec = clusterSpec;
+ this.fulfilment = fulfilment;
+ }
+
+ /** Returns this with the redundant node or group removed from counts. */
+ public AllocatableClusterResources withoutRedundancy() {
+ int groupSize = nodes / groups;
+ int nodesAdjustedForRedundancy = nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
+ int groupsAdjustedForRedundancy = nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
+ return new AllocatableClusterResources(nodesAdjustedForRedundancy,
+ groupsAdjustedForRedundancy,
+ realResources,
+ advertisedResources,
+ clusterSpec,
+ fulfilment);
+ }
+
/**
* Returns the resources which will actually be available per node in this cluster with this allocation.
* These should be used for reasoning about allocation to meet measured demand.
@@ -83,11 +110,6 @@ public class AllocatableClusterResources {
public int nodes() { return nodes; }
public int groups() { return groups; }
- public int groupSize() {
- // ceil: If the division does not produce a whole number we assume some node is missing
- return (int)Math.ceil((double)nodes / groups);
- }
-
public ClusterSpec clusterSpec() { return clusterSpec; }
public double cost() { return nodes * advertisedResources.cost(); }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
index 5bebd346bdb..29f53f0336d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
@@ -2,7 +2,6 @@
package com.yahoo.vespa.hosted.provision.autoscale;
import com.yahoo.config.provision.ClusterResources;
-import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -20,11 +19,6 @@ public class AllocationOptimizer {
private static final int minimumNodes = 2; // Since this number includes redundancy it cannot be lower than 2
private static final int maximumNodes = 150;
- // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component
- // proportional to document count. We must account for this when comparing configurations with more or fewer nodes.
- // TODO: Measure this, and only take it into account with queries
- private static final double fixedCpuCostFraction = 0.1;
-
private final NodeRepository nodeRepository;
public AllocationOptimizer(NodeRepository nodeRepository) {
@@ -53,17 +47,10 @@ public class AllocationOptimizer {
for (int groups = limits.min().groups(); groups <= limits.max().groups(); groups++) {
for (int nodes = limits.min().nodes(); nodes <= limits.max().nodes(); nodes++) {
if (nodes % groups != 0) continue;
- int groupSize = nodes / groups;
-
- // Adjust for redundancy: Node in group if groups = 1, an extra group if multiple groups
- // TODO: Make the best choice based on size and redundancy setting instead
- int nodesAdjustedForRedundancy = target.adjustForRedundancy() && nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
- int groupsAdjustedForRedundancy = target.adjustForRedundancy() && nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
ClusterResources next = new ClusterResources(nodes,
groups,
- nodeResourcesWith(nodesAdjustedForRedundancy,
- groupsAdjustedForRedundancy,
+ nodeResourcesWith(nodes, groups,
limits, target, current, clusterModel));
var allocatableResources = AllocatableClusterResources.from(next, current.clusterSpec(), limits,
hosts, nodeRepository);
@@ -85,34 +72,14 @@ public class AllocationOptimizer {
ResourceTarget target,
AllocatableClusterResources current,
ClusterModel clusterModel) {
- double cpu, memory, disk;
- int groupSize = nodes / groups;
-
- if (current.clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content
- // Cpu: Query cpu scales with cluster size, write cpu scales with group size
- // Memory and disk: Scales with group size
-
- // The fixed cost portion of cpu does not scale with changes to the node count
- double queryCpuPerGroup = fixedCpuCostFraction * target.resources().vcpu() +
- (1 - fixedCpuCostFraction) * target.resources().vcpu() * current.groupSize() / groupSize;
-
- double queryCpu = queryCpuPerGroup * current.groups() / groups;
- double writeCpu = target.resources().vcpu() * current.groupSize() / groupSize;
- cpu = clusterModel.queryCpuFraction() * queryCpu + (1 - clusterModel.queryCpuFraction()) * writeCpu;
- memory = target.resources().memoryGb() * current.groupSize() / groupSize;
- disk = target.resources().diskGb() * current.groupSize() / groupSize;
- }
- else {
- cpu = target.resources().vcpu() * current.nodes() / nodes;
- memory = target.resources().memoryGb();
- disk = target.resources().diskGb();
- }
+ var scaled = clusterModel.loadWith(nodes, groups)
+ .scaled(Load.one().divide(clusterModel.redundancyAdjustment()).scaled(target.resources()));
// Combine the scaled resource values computed here
// with the currently configured non-scaled values, given in the limits, if any
- NodeResources nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified()
- ? current.advertisedResources().nodeResources()
- : limits.min().nodeResources(); // min=max for non-scaled
- return nonScaled.withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk);
+ var nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified()
+ ? current.advertisedResources().nodeResources()
+ : limits.min().nodeResources(); // min=max for non-scaled
+ return nonScaled.withVcpu(scaled.vcpu()).withMemoryGb(scaled.memoryGb()).withDiskGb(scaled.diskGb());
}
/** Returns a copy of the given limits where the minimum nodes are at least the given value when allowed */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 5b1ee6cc496..ae18e7ffb91 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -33,6 +33,11 @@ public class ClusterModel {
static final double idealContainerDiskLoad = 0.95;
static final double idealContentDiskLoad = 0.6;
+ // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component
+ // proportional to document count. We must account for this when comparing configurations with more or fewer nodes.
+ // TODO: Measure this, and only take it into account with queries
+ private static final double fixedCpuCostFraction = 0.1;
+
private final Application application;
private final ClusterSpec clusterSpec;
private final Cluster cluster;
@@ -74,7 +79,7 @@ public class ClusterModel {
this.application = application;
this.clusterSpec = clusterSpec;
this.cluster = cluster;
- this.nodes = null;
+ this.nodes = NodeList.of();
this.clock = clock;
this.scalingDuration = scalingDuration;
@@ -86,6 +91,20 @@ public class ClusterModel {
public ClusterSpec clusterSpec() { return clusterSpec; }
public Cluster cluster() { return cluster; }
+ /** Returns the relative load adjustment that should be made to this cluster given available measurements. */
+ public Load loadAdjustment() {
+ if (nodeTimeseries().measurementsPerNode() == 0) return Load.one(); // No info, no change
+ /*
+ // Should we scale up?
+ Load relativePeak = nodeTimeseries().peakLoad().divide(idealLoad());
+ if (relativePeak.any(v -> v > 1))
+ return relativePeak.max(Load.one()); // Don't downscale any dimension if we upscale
+
+ // Should we scale down?
+ */
+ return averageLoad().divide(idealLoad());
+ }
+
/** Returns the predicted duration of a rescaling of this cluster */
public Duration scalingDuration() { return scalingDuration; }
@@ -114,8 +133,72 @@ public class ClusterModel {
/** Returns average load during the last {@link #scalingDuration()} */
public Load averageLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(scalingDuration())); }
+ /** The number of nodes this cluster has, or will have if not deployed yet. */
+ // TODO: Make this the deployed, not current count
+ public int nodeCount() {
+ if ( ! nodes.isEmpty()) return (int)nodes.stream().count();
+ return cluster.minResources().nodes();
+ }
+
+ /** The number of groups this cluster has, or will have if not deployed yet. */
+ // TODO: Make this the deployed, not current count
+ public int groupCount() {
+ if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count();
+ return cluster.minResources().groups();
+ }
+
+ public int groupSize() {
+ // ceil: If the division does not produce a whole number we assume some node is missing
+ return (int)Math.ceil((double)nodeCount() / groupCount());
+ }
+
+ /** Returns the relative load adjustment accounting for redundancy in this. */
+ public Load redundancyAdjustment() {
+ return loadWith(nodeCount(), groupCount());
+ }
+
+ /**
+ * Returns the relative load adjustment accounting for redundancy given these nodes+groups
+ * relative to node nodes+groups in this.
+ */
+ public Load loadWith(int trueNodes, int trueGroups) {
+ int nodes = nodesAdjustedForRedundancy(trueNodes, trueGroups);
+ int groups = groupsAdjustedForRedundancy(trueNodes, trueGroups);
+ if (clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content
+ int groupSize = nodes / groups;
+
+ // Cpu: Query cpu scales with cluster size, write cpu scales with group size
+ // Memory and disk: Scales with group size
+
+ // The fixed cost portion of cpu does not scale with changes to the node count
+ double queryCpuPerGroup = fixedCpuCostFraction + (1 - fixedCpuCostFraction) * groupSize() / groupSize;
+
+ double queryCpu = queryCpuPerGroup * groupCount() / groups;
+ double writeCpu = (double)groupSize() / groupSize;
+ return new Load(queryCpuFraction() * queryCpu + (1 - queryCpuFraction()) * writeCpu,
+ (double)groupSize() / groupSize,
+ (double)groupSize() / groupSize);
+ }
+ else {
+ return new Load((double)nodeCount() / nodes, 1, 1);
+ }
+ }
+
+ /**
+ * Returns the ideal load across the nodes of this sich that each node will be at ideal load
+ * if one of the nodes go down.
+ */
public Load idealLoad() {
- return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad());
+ return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad()).divide(redundancyAdjustment());
+ }
+
+ public int nodesAdjustedForRedundancy(int nodes, int groups) {
+ int groupSize = (int)Math.ceil((double)nodes / groups);
+ return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
+ }
+
+ public int groupsAdjustedForRedundancy(int nodes, int groups) {
+ return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
}
/** Ideal cpu load must take the application traffic fraction into account */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
index 36056665a15..ab5be045dd4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
@@ -8,6 +8,7 @@ import java.time.Duration;
import java.time.Instant;
import java.util.List;
import java.util.Optional;
+import java.util.OptionalDouble;
import java.util.function.Predicate;
import java.util.stream.Collectors;
@@ -49,6 +50,7 @@ public class ClusterNodesTimeseries {
/** Returns the average number of measurements per node */
public int measurementsPerNode() {
+ if (clusterNodes.size() == 0) return 0;
int measurementCount = timeseries.stream().mapToInt(m -> m.size()).sum();
return measurementCount / clusterNodes.size();
}
@@ -84,6 +86,27 @@ public class ClusterNodesTimeseries {
return total.divide(count);
}
+ /**
+ * Returns the "peak load" in this: Which is for each load dimension,
+ * the average of the highest reading for that dimension on each node.
+ */
+ public Load peakLoad() {
+ return new Load(peakLoad(Load.Dimension.cpu), peakLoad(Load.Dimension.memory), peakLoad(Load.Dimension.disk));
+ }
+
+ private double peakLoad(Load.Dimension dimension) {
+ double total = 0;
+ int count = 0;
+ for (var nodeTimeseries : timeseries) {
+ OptionalDouble value = nodeTimeseries.peak(dimension);
+ if (value.isEmpty()) continue;
+ total += value.getAsDouble();
+ count++;
+ }
+ if (count == 0) return 0;
+ return total / count;
+ }
+
private static List<NodeTimeseries> keep(List<NodeTimeseries> timeseries, Predicate<NodeMetricSnapshot> filter) {
return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.keep(filter)).collect(Collectors.toList());
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
index a52b048a9e0..88c7e70cd35 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
@@ -3,6 +3,12 @@ package com.yahoo.vespa.hosted.provision.autoscale;
import com.yahoo.config.provision.NodeResources;
+import java.util.Objects;
+import java.util.function.DoubleBinaryOperator;
+import java.util.function.DoubleFunction;
+import java.util.function.DoubleUnaryOperator;
+import java.util.function.Predicate;
+
/**
* The load of a node or system, measured as fractions of max (1.0) in three dimensions.
*
@@ -10,6 +16,8 @@ import com.yahoo.config.provision.NodeResources;
*/
public class Load {
+ public enum Dimension { cpu, memory, disk }
+
private final double cpu, memory, disk;
public Load(double cpu, double memory, double disk) {
@@ -23,27 +31,51 @@ public class Load {
public double disk() { return disk; }
public Load add(Load other) {
- return new Load(cpu + other.cpu(), memory + other.memory(), disk + other.disk());
+ return join(other, (a, b) -> a + b);
}
public Load multiply(NodeResources resources) {
return new Load(cpu * resources.vcpu(), memory * resources.memoryGb(), disk * resources.diskGb());
}
-
public Load multiply(double factor) {
- return new Load(cpu * factor, memory * factor, disk * factor);
+ return map(v -> v * factor);
+ }
+ public Load multiply(Load other) {
+ return join(other, (a, b) -> a * b);
}
+ public Load divide(Load divisor) {
+ return join(divisor, (a, b) -> divide(a, b));
+ }
+ public Load divide(double divisor) {
+ return map(v -> divide(v, divisor));
+ }
public Load divide(NodeResources resources) {
return new Load(divide(cpu, resources.vcpu()), divide(memory, resources.memoryGb()), divide(disk, resources.diskGb()));
}
- public Load divide(Load divisor) {
- return new Load(divide(cpu, divisor.cpu()), divide(memory, divisor.memory()), divide(disk, divisor.disk()));
+ /** Returns the load having the max value of this and the given load in each dimension. */
+ public Load max(Load other) {
+ return join(other, (a, b) -> Math.max(a, b));
}
- public Load divide(double divisor) {
- return new Load(divide(cpu, divisor), divide(memory, divisor), divide(disk, divisor));
+ /** Returns the load where the given function is applied to each dimension of this. */
+ public Load map(DoubleUnaryOperator f) {
+ return new Load(f.applyAsDouble(cpu),
+ f.applyAsDouble(memory),
+ f.applyAsDouble(disk));
+ }
+
+ /** Returns the load where the given function is applied to each dimension of this and the given load. */
+ public Load join(Load other, DoubleBinaryOperator f) {
+ return new Load(f.applyAsDouble(this.cpu(), other.cpu()),
+ f.applyAsDouble(this.memory(), other.memory()),
+ f.applyAsDouble(this.disk(), other.disk()));
+ }
+
+ /** Returns true if any dimension matches the predicate. */
+ public boolean any(Predicate<Double> test) {
+ return test.test(cpu) || test.test(memory) || test.test(disk);
}
public NodeResources scaled(NodeResources resources) {
@@ -52,6 +84,14 @@ public class Load {
.withDiskGb(disk * resources.diskGb());
}
+ public double get(Dimension dimension) {
+ return switch (dimension) {
+ case cpu -> cpu();
+ case memory -> memory();
+ case disk -> disk();
+ };
+ }
+
private double requireNormalized(double value, String name) {
if (Double.isNaN(value))
throw new IllegalArgumentException(name + " must be a number but is NaN");
@@ -60,17 +100,31 @@ public class Load {
return value;
}
+ private static double divide(double a, double b) {
+ if (a == 0 && b == 0) return 0;
+ return a / b;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) return true;
+ if ( ! (o instanceof Load other)) return false;
+ if (other.cpu() != this.cpu()) return false;
+ if (other.memory() != this.memory()) return false;
+ if (other.disk() != this.disk()) return false;
+ return true;
+ }
+
+ @Override
+ public int hashCode() { return Objects.hash(cpu, memory, disk); }
+
@Override
public String toString() {
return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk";
}
public static Load zero() { return new Load(0, 0, 0); }
-
- private static double divide(double a, double b) {
- if (a == 0 && b == 0) return 0;
- return a / b;
- }
+ public static Load one() { return new Load(1, 1, 1); }
public static Load byDividing(NodeResources a, NodeResources b) {
return new Load(divide(a.vcpu(), b.vcpu()), divide(a.memoryGb(), b.memoryGb()), divide(a.diskGb(), b.diskGb()));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
index 4a5f8972e11..500dbf0f66f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
@@ -6,6 +6,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
+import java.util.OptionalDouble;
import java.util.function.Predicate;
import java.util.stream.Collectors;
@@ -40,6 +41,10 @@ public class NodeTimeseries {
return Optional.of(snapshots.get(snapshots.size() - 1));
}
+ public OptionalDouble peak(Load.Dimension dimension) {
+ return snapshots.stream().mapToDouble(snapshot -> snapshot.load().get(dimension)).max();
+ }
+
public List<NodeMetricSnapshot> asList() { return snapshots; }
public String hostname() { return hostname; }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
index 72836baaf5b..7bc019caabb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
@@ -10,43 +10,37 @@ import java.util.OptionalDouble;
/**
* A resource target to hit for the allocation optimizer.
- * The target is measured in cpu, memory and disk per node in the allocation given by current.
+ * The target is measured in cpu, memory and disk per node in the current allocation.
*
* @author bratseth
*/
public class ResourceTarget {
- private final boolean adjustForRedundancy;
-
/** The target real resources per node, assuming the node assignment where this was decided */
private final NodeResources resources;
- private ResourceTarget(NodeResources resources, boolean adjustForRedundancy) {
+ private ResourceTarget(NodeResources resources) {
this.resources = resources;
- this.adjustForRedundancy = adjustForRedundancy;
}
- /** Are the target resources given by this including redundancy or not */
- public boolean adjustForRedundancy() { return adjustForRedundancy; }
-
/** Returns the target resources per node in terms of the current allocation */
public NodeResources resources() { return resources; }
@Override
public String toString() {
- return "target " + resources + (adjustForRedundancy ? "(with redundancy adjustment) " : "");
+ return "target " + resources;
}
/** Create a target of achieving ideal load given a current load */
public static ResourceTarget idealLoad(ClusterModel clusterModel,
AllocatableClusterResources current) {
- var loadAdjustment = clusterModel.averageLoad().divide(clusterModel.idealLoad());
- return new ResourceTarget(loadAdjustment.scaled(current.realResources().nodeResources()), true);
+ return new ResourceTarget(clusterModel.loadAdjustment().scaled(current.realResources().nodeResources()));
}
/** Crete a target of preserving a current allocation */
- public static ResourceTarget preserve(AllocatableClusterResources current) {
- return new ResourceTarget(current.realResources().nodeResources(), false);
+ public static ResourceTarget preserve(ClusterModel clusterModel,
+ AllocatableClusterResources current) {
+ return new ResourceTarget(current.realResources().nodeResources());
}
}