diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2023-06-06 00:01:11 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-06 00:01:11 +0200 |
commit | 4878116a848f0ceff01c49b67657d63a4113789d (patch) | |
tree | 1105d9db2b6a3ad8b8bee8ba8887c4ad35129fa8 | |
parent | edd6b117da2621eb1e71fc91e92e230ab15e3de1 (diff) | |
parent | 76e17a0deb9afd901d6c9157fc7e26e4a5aa8850 (diff) |
Merge pull request #27298 from vespa-engine/bratseth/clustermodel-refactor
Bratseth/clustermodel refactor
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java | 204 |
1 files changed, 98 insertions, 106 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 578f76654e9..dac9a0f3518 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -58,6 +58,10 @@ public class ClusterModel { private final ClusterSpec clusterSpec; private final Cluster cluster; + private final CpuModel cpu = new CpuModel(); + private final MemoryModel memory = new MemoryModel(); + private final DiskModel disk = new DiskModel(); + /** * The current active nodes of this cluster, including retired, * or empty if this models a new cluster not yet deployed. @@ -117,12 +121,29 @@ public class ClusterModel { public Application application() { return application; } public ClusterSpec clusterSpec() { return clusterSpec; } - public Cluster cluster() { return cluster; } + private ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; } + private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; } + + /** Returns the instant this model was created. */ + public Instant at() { return at;} public boolean isEmpty() { return nodeTimeseries().isEmpty(); } + /** Returns the predicted duration of a rescaling of this cluster */ + public Duration scalingDuration() { return scalingDuration; } + + /** Returns the average of the peak load measurement in each dimension, from each node. */ + public Load peakLoad() { + return nodeTimeseries().peakLoad(); + } + + /** Returns the relative load adjustment accounting for redundancy in this. */ + public Load redundancyAdjustment() { + return loadWith(nodeCount(), groupCount()); + } + /** Returns the relative load adjustment that should be made to this cluster given available measurements. */ public Load loadAdjustment() { if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data @@ -158,19 +179,6 @@ public class ClusterModel { return Duration.ofMinutes(5); } - /** Returns the predicted duration of a rescaling of this cluster */ - public Duration scalingDuration() { return scalingDuration; } - - /** Returns the average of the peak load measurement in each dimension, from each node. */ - public Load peakLoad() { - return nodeTimeseries().peakLoad(); - } - - /** Returns the relative load adjustment accounting for redundancy in this. */ - public Load redundancyAdjustment() { - return loadWith(nodeCount(), groupCount()); - } - /** * Returns the relative load adjustment accounting for redundancy given these nodes+groups * relative to node nodes+groups in this. @@ -189,8 +197,8 @@ public class ClusterModel { double queryCpu = queryCpuPerGroup * groupCount() / groups; double writeCpu = (double)groupSize() / groupSize; - return new Load(queryCpuFraction() * queryCpu + (1 - queryCpuFraction()) * writeCpu, - (1 - fixedMemoryFraction()) * (double)groupSize() / groupSize + fixedMemoryFraction() * 1, + return new Load(cpu.queryFraction() * queryCpu + (1 - cpu.queryFraction()) * writeCpu, + (1 - memory.fixedFraction()) * (double)groupSize() / groupSize + memory.fixedFraction() * 1, (double)groupSize() / groupSize); } else { @@ -203,7 +211,7 @@ public class ClusterModel { * if one of the nodes go down. */ public Load idealLoad() { - var ideal = new Load(idealCpuLoad(), idealMemoryLoad(), idealDiskLoad()).divide(redundancyAdjustment()); + var ideal = new Load(cpu.idealLoad(), memory.idealLoad(), disk.idealLoad()).divide(redundancyAdjustment()); if ( !cluster.bcpGroupInfo().isEmpty() && cluster.bcpGroupInfo().queryRate() > 0) { // Since we have little local information, use information about query cost in other groups @@ -225,18 +233,7 @@ public class ClusterModel { public Autoscaling.Metrics metrics() { return new Autoscaling.Metrics(averageQueryRate().orElse(0), growthRateHeadroom(), - cpuCostPerQuery().orElse(0)); - } - - /** Returns the instant this model was created. */ - public Instant at() { return at;} - - private OptionalDouble cpuCostPerQuery() { - if (averageQueryRate().isEmpty() || averageQueryRate().getAsDouble() == 0.0) return OptionalDouble.empty(); - // TODO: Query rate should generally be sampled at the time where we see the peak resource usage - int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize(); - return OptionalDouble.of(peakLoad().cpu() * queryCpuFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu() - / averageQueryRate().getAsDouble() / groupCount()); + cpu.costPerQuery().orElse(0)); } private Load adjustQueryDependentIdealLoadByBcpGroupInfo(Load ideal) { @@ -246,7 +243,7 @@ public class ClusterModel { : cluster.bcpGroupInfo().queryRate() ) * cluster.bcpGroupInfo().growthRateHeadroom() * trafficShiftHeadroom(); double neededTotalVcpPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() + - ( 1 - queryCpuFraction()) * idealCpuLoad() * + ( 1 - cpu.queryFraction()) * cpu.idealLoad() * (clusterSpec.type().isContainer() ? 1 : groupSize()); double cpuAdjustment = neededTotalVcpPerGroup / currentClusterTotalVcpuPerGroup; @@ -260,10 +257,6 @@ public class ClusterModel { return lastCompletion.get().isAfter(clock.instant().minus(period)); } - private ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; } - - private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; } - /** * Returns the predicted max query growth rate per minute as a fraction of the average traffic * in the scaling window. @@ -313,17 +306,6 @@ public class ClusterModel { return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups; } - /** Ideal cpu load must take the application traffic fraction into account. */ - private double idealCpuLoad() { - double queryCpuFraction = queryCpuFraction(); - - // Assumptions: 1) Write load is not organic so we should not grow to handle more. - // (TODO: But allow applications to set their target write rate and size for that) - // 2) Write load does not change in BCP scenarios. - return queryCpuFraction * 1/growthRateHeadroom() * 1/trafficShiftHeadroom() * idealQueryCpuLoad + - (1 - queryCpuFraction) * idealWriteCpuLoad; - } - /** Returns the headroom for growth during organic traffic growth as a multiple of current resources. */ private double growthRateHeadroom() { if ( ! nodeRepository.zone().environment().isProduction()) return 1; @@ -361,78 +343,88 @@ public class ClusterModel { return ( (headroom -1 ) * Math.min(1, averageQueryRate().orElse(0) / queryRateGivingFullConfidence) ) + 1; } - /** The estimated fraction of cpu usage which goes to processing queries vs. writes */ - private double queryCpuFraction() { - OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock); - if (averageQueryRate().orElse(0) == 0 && writeRate.orElse(0) == 0) return queryCpuFraction(0.5); - return queryCpuFraction(averageQueryRate().orElse(0) / (averageQueryRate().orElse(0) + writeRate.orElse(0))); - } + private class CpuModel { - private double queryCpuFraction(double queryRateFraction) { - double relativeQueryCost = 9; // How much more expensive are queries than writes? TODO: Measure - double writeFraction = 1 - queryRateFraction; - return queryRateFraction * relativeQueryCost / (queryRateFraction * relativeQueryCost + writeFraction); - } + /** Ideal cpu load must take the application traffic fraction into account. */ + double idealLoad() { + double queryCpuFraction = queryFraction(); - private double idealMemoryLoad() { - if (clusterSpec.type().isContainer()) return idealContainerMemoryLoad; - if (clusterSpec.type() == ClusterSpec.Type.admin) return idealContainerMemoryLoad; // Not autoscaled, but ideal shown in console - return idealContentMemoryLoad; - } + // Assumptions: 1) Write load is not organic so we should not grow to handle more. + // (TODO: But allow applications to set their target write rate and size for that) + // 2) Write load does not change in BCP scenarios. + return queryCpuFraction * 1/growthRateHeadroom() * 1/trafficShiftHeadroom() * idealQueryCpuLoad + + (1 - queryCpuFraction) * idealWriteCpuLoad; + } - /** - * Returns the fraction of memory of the current allocation which is currently consumed by - * fixed data structures which take the same amount of space regardless of document volume. - */ - private double fixedMemoryFraction() { - if (clusterSpec().type().isContainer()) return 1.0; - double fixedMemory = nodeMemoryOverheadGb + - (averageRealMemory() - nodeMemoryOverheadGb) * 0.05; // TODO: Measure actual content node usage - return fixedMemory / averageRealMemory(); - } + OptionalDouble costPerQuery() { + if (averageQueryRate().isEmpty() || averageQueryRate().getAsDouble() == 0.0) return OptionalDouble.empty(); + // TODO: Query rate should generally be sampled at the time where we see the peak resource usage + int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize(); + return OptionalDouble.of(peakLoad().cpu() * cpu.queryFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu() + / averageQueryRate().getAsDouble() / groupCount()); + } - private double averageRealMemory() { - if (nodes.isEmpty()) { // we're estimating - var initialResources = new CapacityPolicies(nodeRepository).specifyFully(cluster.minResources().nodeResources(), - clusterSpec, - application.id()); - return nodeRepository.resourcesCalculator().requestToReal(initialResources, - nodeRepository.exclusiveAllocation(clusterSpec), - false).memoryGb(); + /** The estimated fraction of cpu usage which goes to processing queries vs. writes */ + double queryFraction() { + OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock); + if (averageQueryRate().orElse(0) == 0 && writeRate.orElse(0) == 0) return queryFraction(0.5); + return queryFraction(averageQueryRate().orElse(0) / (averageQueryRate().orElse(0) + writeRate.orElse(0))); } - else { - return nodes.stream() - .mapToDouble(node -> nodeRepository.resourcesCalculator().realResourcesOf(node, nodeRepository).memoryGb()) - .average() - .getAsDouble(); + + double queryFraction(double queryRateFraction) { + double relativeQueryCost = 9; // How much more expensive are queries than writes? TODO: Measure + double writeFraction = 1 - queryRateFraction; + return queryRateFraction * relativeQueryCost / (queryRateFraction * relativeQueryCost + writeFraction); } - } - private double idealDiskLoad() { - // Stateless clusters are not expected to consume more disk over time - - // if they do it is due to logs which will be rotated away right before the disk is full - return clusterSpec.isStateful() ? idealContentDiskLoad : idealContainerDiskLoad; } - /** - * Create a cluster model if possible and logs a warning and returns empty otherwise. - * This is useful in cases where it's possible to continue without the cluster model, - * as QuestDb is known to temporarily fail during reading of data. - */ - public static Optional<ClusterModel> create(NodeRepository nodeRepository, - Application application, - ClusterSpec clusterSpec, - Cluster cluster, - NodeList clusterNodes, - MetricsDb metricsDb, - Clock clock) { - try { - return Optional.of(new ClusterModel(nodeRepository, application, clusterSpec, cluster, clusterNodes, metricsDb, clock)); + private class MemoryModel { + + double idealLoad() { + if (clusterSpec.type().isContainer()) return idealContainerMemoryLoad; + if (clusterSpec.type() == ClusterSpec.Type.admin) return idealContainerMemoryLoad; // Not autoscaled, but ideal shown in console + return idealContentMemoryLoad; + } + + /** + * Returns the fraction of memory of the current allocation which is currently consumed by + * fixed data structures which take the same amount of space regardless of document volume. + */ + double fixedFraction() { + if (clusterSpec().type().isContainer()) return 1.0; + double fixedMemory = nodeMemoryOverheadGb + + (averageReal() - nodeMemoryOverheadGb) * 0.05; // TODO: Measure actual content node usage + return fixedMemory / averageReal(); + } + + double averageReal() { + if (nodes.isEmpty()) { // we're estimating + var initialResources = new CapacityPolicies(nodeRepository).specifyFully(cluster.minResources().nodeResources(), + clusterSpec, + application.id()); + return nodeRepository.resourcesCalculator().requestToReal(initialResources, + nodeRepository.exclusiveAllocation(clusterSpec), + false).memoryGb(); + } + else { + return nodes.stream() + .mapToDouble(node -> nodeRepository.resourcesCalculator().realResourcesOf(node, nodeRepository).memoryGb()) + .average() + .getAsDouble(); + } } - catch (Exception e) { - log.log(Level.WARNING, "Failed creating a cluster model for " + application + " " + cluster, e); - return Optional.empty(); + + } + + private class DiskModel { + + double idealLoad() { + // Stateless clusters are not expected to consume more disk over time - + // if they do it is due to logs which will be rotated away right before the disk is full + return clusterSpec.isStateful() ? idealContentDiskLoad : idealContainerDiskLoad; } + } } |