aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2023-06-06 00:01:11 +0200
committerGitHub <noreply@github.com>2023-06-06 00:01:11 +0200
commit4878116a848f0ceff01c49b67657d63a4113789d (patch)
tree1105d9db2b6a3ad8b8bee8ba8887c4ad35129fa8
parentedd6b117da2621eb1e71fc91e92e230ab15e3de1 (diff)
parent76e17a0deb9afd901d6c9157fc7e26e4a5aa8850 (diff)
Merge pull request #27298 from vespa-engine/bratseth/clustermodel-refactor
Bratseth/clustermodel refactor
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java204
1 files changed, 98 insertions, 106 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 578f76654e9..dac9a0f3518 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -58,6 +58,10 @@ public class ClusterModel {
private final ClusterSpec clusterSpec;
private final Cluster cluster;
+ private final CpuModel cpu = new CpuModel();
+ private final MemoryModel memory = new MemoryModel();
+ private final DiskModel disk = new DiskModel();
+
/**
* The current active nodes of this cluster, including retired,
* or empty if this models a new cluster not yet deployed.
@@ -117,12 +121,29 @@ public class ClusterModel {
public Application application() { return application; }
public ClusterSpec clusterSpec() { return clusterSpec; }
- public Cluster cluster() { return cluster; }
+ private ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; }
+ private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }
+
+ /** Returns the instant this model was created. */
+ public Instant at() { return at;}
public boolean isEmpty() {
return nodeTimeseries().isEmpty();
}
+ /** Returns the predicted duration of a rescaling of this cluster */
+ public Duration scalingDuration() { return scalingDuration; }
+
+ /** Returns the average of the peak load measurement in each dimension, from each node. */
+ public Load peakLoad() {
+ return nodeTimeseries().peakLoad();
+ }
+
+ /** Returns the relative load adjustment accounting for redundancy in this. */
+ public Load redundancyAdjustment() {
+ return loadWith(nodeCount(), groupCount());
+ }
+
/** Returns the relative load adjustment that should be made to this cluster given available measurements. */
public Load loadAdjustment() {
if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data
@@ -158,19 +179,6 @@ public class ClusterModel {
return Duration.ofMinutes(5);
}
- /** Returns the predicted duration of a rescaling of this cluster */
- public Duration scalingDuration() { return scalingDuration; }
-
- /** Returns the average of the peak load measurement in each dimension, from each node. */
- public Load peakLoad() {
- return nodeTimeseries().peakLoad();
- }
-
- /** Returns the relative load adjustment accounting for redundancy in this. */
- public Load redundancyAdjustment() {
- return loadWith(nodeCount(), groupCount());
- }
-
/**
* Returns the relative load adjustment accounting for redundancy given these nodes+groups
* relative to node nodes+groups in this.
@@ -189,8 +197,8 @@ public class ClusterModel {
double queryCpu = queryCpuPerGroup * groupCount() / groups;
double writeCpu = (double)groupSize() / groupSize;
- return new Load(queryCpuFraction() * queryCpu + (1 - queryCpuFraction()) * writeCpu,
- (1 - fixedMemoryFraction()) * (double)groupSize() / groupSize + fixedMemoryFraction() * 1,
+ return new Load(cpu.queryFraction() * queryCpu + (1 - cpu.queryFraction()) * writeCpu,
+ (1 - memory.fixedFraction()) * (double)groupSize() / groupSize + memory.fixedFraction() * 1,
(double)groupSize() / groupSize);
}
else {
@@ -203,7 +211,7 @@ public class ClusterModel {
* if one of the nodes go down.
*/
public Load idealLoad() {
- var ideal = new Load(idealCpuLoad(), idealMemoryLoad(), idealDiskLoad()).divide(redundancyAdjustment());
+ var ideal = new Load(cpu.idealLoad(), memory.idealLoad(), disk.idealLoad()).divide(redundancyAdjustment());
if ( !cluster.bcpGroupInfo().isEmpty() && cluster.bcpGroupInfo().queryRate() > 0) {
// Since we have little local information, use information about query cost in other groups
@@ -225,18 +233,7 @@ public class ClusterModel {
public Autoscaling.Metrics metrics() {
return new Autoscaling.Metrics(averageQueryRate().orElse(0),
growthRateHeadroom(),
- cpuCostPerQuery().orElse(0));
- }
-
- /** Returns the instant this model was created. */
- public Instant at() { return at;}
-
- private OptionalDouble cpuCostPerQuery() {
- if (averageQueryRate().isEmpty() || averageQueryRate().getAsDouble() == 0.0) return OptionalDouble.empty();
- // TODO: Query rate should generally be sampled at the time where we see the peak resource usage
- int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize();
- return OptionalDouble.of(peakLoad().cpu() * queryCpuFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu()
- / averageQueryRate().getAsDouble() / groupCount());
+ cpu.costPerQuery().orElse(0));
}
private Load adjustQueryDependentIdealLoadByBcpGroupInfo(Load ideal) {
@@ -246,7 +243,7 @@ public class ClusterModel {
: cluster.bcpGroupInfo().queryRate() )
* cluster.bcpGroupInfo().growthRateHeadroom() * trafficShiftHeadroom();
double neededTotalVcpPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() +
- ( 1 - queryCpuFraction()) * idealCpuLoad() *
+ ( 1 - cpu.queryFraction()) * cpu.idealLoad() *
(clusterSpec.type().isContainer() ? 1 : groupSize());
double cpuAdjustment = neededTotalVcpPerGroup / currentClusterTotalVcpuPerGroup;
@@ -260,10 +257,6 @@ public class ClusterModel {
return lastCompletion.get().isAfter(clock.instant().minus(period));
}
- private ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; }
-
- private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }
-
/**
* Returns the predicted max query growth rate per minute as a fraction of the average traffic
* in the scaling window.
@@ -313,17 +306,6 @@ public class ClusterModel {
return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
}
- /** Ideal cpu load must take the application traffic fraction into account. */
- private double idealCpuLoad() {
- double queryCpuFraction = queryCpuFraction();
-
- // Assumptions: 1) Write load is not organic so we should not grow to handle more.
- // (TODO: But allow applications to set their target write rate and size for that)
- // 2) Write load does not change in BCP scenarios.
- return queryCpuFraction * 1/growthRateHeadroom() * 1/trafficShiftHeadroom() * idealQueryCpuLoad +
- (1 - queryCpuFraction) * idealWriteCpuLoad;
- }
-
/** Returns the headroom for growth during organic traffic growth as a multiple of current resources. */
private double growthRateHeadroom() {
if ( ! nodeRepository.zone().environment().isProduction()) return 1;
@@ -361,78 +343,88 @@ public class ClusterModel {
return ( (headroom -1 ) * Math.min(1, averageQueryRate().orElse(0) / queryRateGivingFullConfidence) ) + 1;
}
- /** The estimated fraction of cpu usage which goes to processing queries vs. writes */
- private double queryCpuFraction() {
- OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock);
- if (averageQueryRate().orElse(0) == 0 && writeRate.orElse(0) == 0) return queryCpuFraction(0.5);
- return queryCpuFraction(averageQueryRate().orElse(0) / (averageQueryRate().orElse(0) + writeRate.orElse(0)));
- }
+ private class CpuModel {
- private double queryCpuFraction(double queryRateFraction) {
- double relativeQueryCost = 9; // How much more expensive are queries than writes? TODO: Measure
- double writeFraction = 1 - queryRateFraction;
- return queryRateFraction * relativeQueryCost / (queryRateFraction * relativeQueryCost + writeFraction);
- }
+ /** Ideal cpu load must take the application traffic fraction into account. */
+ double idealLoad() {
+ double queryCpuFraction = queryFraction();
- private double idealMemoryLoad() {
- if (clusterSpec.type().isContainer()) return idealContainerMemoryLoad;
- if (clusterSpec.type() == ClusterSpec.Type.admin) return idealContainerMemoryLoad; // Not autoscaled, but ideal shown in console
- return idealContentMemoryLoad;
- }
+ // Assumptions: 1) Write load is not organic so we should not grow to handle more.
+ // (TODO: But allow applications to set their target write rate and size for that)
+ // 2) Write load does not change in BCP scenarios.
+ return queryCpuFraction * 1/growthRateHeadroom() * 1/trafficShiftHeadroom() * idealQueryCpuLoad +
+ (1 - queryCpuFraction) * idealWriteCpuLoad;
+ }
- /**
- * Returns the fraction of memory of the current allocation which is currently consumed by
- * fixed data structures which take the same amount of space regardless of document volume.
- */
- private double fixedMemoryFraction() {
- if (clusterSpec().type().isContainer()) return 1.0;
- double fixedMemory = nodeMemoryOverheadGb +
- (averageRealMemory() - nodeMemoryOverheadGb) * 0.05; // TODO: Measure actual content node usage
- return fixedMemory / averageRealMemory();
- }
+ OptionalDouble costPerQuery() {
+ if (averageQueryRate().isEmpty() || averageQueryRate().getAsDouble() == 0.0) return OptionalDouble.empty();
+ // TODO: Query rate should generally be sampled at the time where we see the peak resource usage
+ int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize();
+ return OptionalDouble.of(peakLoad().cpu() * cpu.queryFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu()
+ / averageQueryRate().getAsDouble() / groupCount());
+ }
- private double averageRealMemory() {
- if (nodes.isEmpty()) { // we're estimating
- var initialResources = new CapacityPolicies(nodeRepository).specifyFully(cluster.minResources().nodeResources(),
- clusterSpec,
- application.id());
- return nodeRepository.resourcesCalculator().requestToReal(initialResources,
- nodeRepository.exclusiveAllocation(clusterSpec),
- false).memoryGb();
+ /** The estimated fraction of cpu usage which goes to processing queries vs. writes */
+ double queryFraction() {
+ OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock);
+ if (averageQueryRate().orElse(0) == 0 && writeRate.orElse(0) == 0) return queryFraction(0.5);
+ return queryFraction(averageQueryRate().orElse(0) / (averageQueryRate().orElse(0) + writeRate.orElse(0)));
}
- else {
- return nodes.stream()
- .mapToDouble(node -> nodeRepository.resourcesCalculator().realResourcesOf(node, nodeRepository).memoryGb())
- .average()
- .getAsDouble();
+
+ double queryFraction(double queryRateFraction) {
+ double relativeQueryCost = 9; // How much more expensive are queries than writes? TODO: Measure
+ double writeFraction = 1 - queryRateFraction;
+ return queryRateFraction * relativeQueryCost / (queryRateFraction * relativeQueryCost + writeFraction);
}
- }
- private double idealDiskLoad() {
- // Stateless clusters are not expected to consume more disk over time -
- // if they do it is due to logs which will be rotated away right before the disk is full
- return clusterSpec.isStateful() ? idealContentDiskLoad : idealContainerDiskLoad;
}
- /**
- * Create a cluster model if possible and logs a warning and returns empty otherwise.
- * This is useful in cases where it's possible to continue without the cluster model,
- * as QuestDb is known to temporarily fail during reading of data.
- */
- public static Optional<ClusterModel> create(NodeRepository nodeRepository,
- Application application,
- ClusterSpec clusterSpec,
- Cluster cluster,
- NodeList clusterNodes,
- MetricsDb metricsDb,
- Clock clock) {
- try {
- return Optional.of(new ClusterModel(nodeRepository, application, clusterSpec, cluster, clusterNodes, metricsDb, clock));
+ private class MemoryModel {
+
+ double idealLoad() {
+ if (clusterSpec.type().isContainer()) return idealContainerMemoryLoad;
+ if (clusterSpec.type() == ClusterSpec.Type.admin) return idealContainerMemoryLoad; // Not autoscaled, but ideal shown in console
+ return idealContentMemoryLoad;
+ }
+
+ /**
+ * Returns the fraction of memory of the current allocation which is currently consumed by
+ * fixed data structures which take the same amount of space regardless of document volume.
+ */
+ double fixedFraction() {
+ if (clusterSpec().type().isContainer()) return 1.0;
+ double fixedMemory = nodeMemoryOverheadGb +
+ (averageReal() - nodeMemoryOverheadGb) * 0.05; // TODO: Measure actual content node usage
+ return fixedMemory / averageReal();
+ }
+
+ double averageReal() {
+ if (nodes.isEmpty()) { // we're estimating
+ var initialResources = new CapacityPolicies(nodeRepository).specifyFully(cluster.minResources().nodeResources(),
+ clusterSpec,
+ application.id());
+ return nodeRepository.resourcesCalculator().requestToReal(initialResources,
+ nodeRepository.exclusiveAllocation(clusterSpec),
+ false).memoryGb();
+ }
+ else {
+ return nodes.stream()
+ .mapToDouble(node -> nodeRepository.resourcesCalculator().realResourcesOf(node, nodeRepository).memoryGb())
+ .average()
+ .getAsDouble();
+ }
}
- catch (Exception e) {
- log.log(Level.WARNING, "Failed creating a cluster model for " + application + " " + cluster, e);
- return Optional.empty();
+
+ }
+
+ private class DiskModel {
+
+ double idealLoad() {
+ // Stateless clusters are not expected to consume more disk over time -
+ // if they do it is due to logs which will be rotated away right before the disk is full
+ return clusterSpec.isStateful() ? idealContentDiskLoad : idealContainerDiskLoad;
}
+
}
}