Merge pull request #27298 from vespa-engine/bratseth/clustermodel-refactor

Bratseth/clustermodel refactor
author: Valerij Fredriksen <freva@users.noreply.github.com> 2023-06-06 00:01:11 +0200
committer: GitHub <noreply@github.com> 2023-06-06 00:01:11 +0200
commit: 4878116a848f0ceff01c49b67657d63a4113789d (patch)
tree: 1105d9db2b6a3ad8b8bee8ba8887c4ad35129fa8
parent: edd6b117da2621eb1e71fc91e92e230ab15e3de1 (diff)
parent: 76e17a0deb9afd901d6c9157fc7e26e4a5aa8850 (diff)
1 files changed, 98 insertions, 106 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 578f76654e9..dac9a0f3518 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -58,6 +58,10 @@ public class ClusterModel {
     private final ClusterSpec clusterSpec;
     private final Cluster cluster;
 
+    private final CpuModel cpu = new CpuModel();
+    private final MemoryModel memory = new MemoryModel();
+    private final DiskModel disk = new DiskModel();
+
     /**
      * The current active nodes of this cluster, including retired,
      * or empty if this models a new cluster not yet deployed.
@@ -117,12 +121,29 @@ public class ClusterModel {
 
     public Application application() { return application; }
     public ClusterSpec clusterSpec() { return clusterSpec; }
-    public Cluster cluster() { return cluster; }
+    private ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; }
+    private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }
+
+    /** Returns the instant this model was created. */
+    public Instant at() { return at;}
 
     public boolean isEmpty() {
         return nodeTimeseries().isEmpty();
     }
 
+    /** Returns the predicted duration of a rescaling of this cluster */
+    public Duration scalingDuration() { return scalingDuration; }
+
+    /** Returns the average of the peak load measurement in each dimension, from each node. */
+    public Load peakLoad() {
+        return nodeTimeseries().peakLoad();
+    }
+
+    /** Returns the relative load adjustment accounting for redundancy in this. */
+    public Load redundancyAdjustment() {
+        return loadWith(nodeCount(), groupCount());
+    }
+
     /** Returns the relative load adjustment that should be made to this cluster given available measurements. */
     public Load loadAdjustment() {
         if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data
@@ -158,19 +179,6 @@ public class ClusterModel {
         return Duration.ofMinutes(5);
     }
 
-    /** Returns the predicted duration of a rescaling of this cluster */
-    public Duration scalingDuration() { return scalingDuration; }
-
-    /** Returns the average of the peak load measurement in each dimension, from each node. */
-    public Load peakLoad() {
-        return nodeTimeseries().peakLoad();
-    }
-
-    /** Returns the relative load adjustment accounting for redundancy in this. */
-    public Load redundancyAdjustment() {
-        return loadWith(nodeCount(), groupCount());
-    }
-
     /**
      * Returns the relative load adjustment accounting for redundancy given these nodes+groups
      * relative to node nodes+groups in this.
@@ -189,8 +197,8 @@ public class ClusterModel {
 
             double queryCpu = queryCpuPerGroup * groupCount() / groups;
             double writeCpu = (double)groupSize() / groupSize;
-            return new Load(queryCpuFraction() * queryCpu + (1 - queryCpuFraction()) * writeCpu,
-                            (1 - fixedMemoryFraction()) * (double)groupSize() / groupSize + fixedMemoryFraction() * 1,
+            return new Load(cpu.queryFraction() * queryCpu + (1 - cpu.queryFraction()) * writeCpu,
+                            (1 - memory.fixedFraction()) * (double)groupSize() / groupSize + memory.fixedFraction() * 1,
                             (double)groupSize() / groupSize);
         }
         else {
@@ -203,7 +211,7 @@ public class ClusterModel {
      * if one of the nodes go down.
      */
     public Load idealLoad() {
-        var ideal = new Load(idealCpuLoad(), idealMemoryLoad(), idealDiskLoad()).divide(redundancyAdjustment());
+        var ideal = new Load(cpu.idealLoad(), memory.idealLoad(), disk.idealLoad()).divide(redundancyAdjustment());
         if ( !cluster.bcpGroupInfo().isEmpty() && cluster.bcpGroupInfo().queryRate() > 0) {
             // Since we have little local information, use information about query cost in other groups
 
@@ -225,18 +233,7 @@ public class ClusterModel {
     public Autoscaling.Metrics metrics() {
         return new Autoscaling.Metrics(averageQueryRate().orElse(0),
                                        growthRateHeadroom(),
-                                       cpuCostPerQuery().orElse(0));
-    }
-
-    /** Returns the instant this model was created. */
-    public Instant at() { return at;}
-
-    private OptionalDouble cpuCostPerQuery() {
-        if (averageQueryRate().isEmpty() || averageQueryRate().getAsDouble() == 0.0) return OptionalDouble.empty();
-        // TODO: Query rate should generally be sampled at the time where we see the peak resource usage
-        int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize();
-        return OptionalDouble.of(peakLoad().cpu()  * queryCpuFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu()
-                                 / averageQueryRate().getAsDouble() / groupCount());
+                                       cpu.costPerQuery().orElse(0));
     }
 
     private Load adjustQueryDependentIdealLoadByBcpGroupInfo(Load ideal) {
@@ -246,7 +243,7 @@ public class ClusterModel {
                                                                          : cluster.bcpGroupInfo().queryRate() )
                                          * cluster.bcpGroupInfo().growthRateHeadroom() * trafficShiftHeadroom();
         double neededTotalVcpPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() +
-                                        ( 1 - queryCpuFraction()) * idealCpuLoad() *
+                                        ( 1 - cpu.queryFraction()) * cpu.idealLoad() *
                                         (clusterSpec.type().isContainer() ? 1 : groupSize());
 
         double cpuAdjustment = neededTotalVcpPerGroup / currentClusterTotalVcpuPerGroup;
@@ -260,10 +257,6 @@ public class ClusterModel {
         return lastCompletion.get().isAfter(clock.instant().minus(period));
     }
 
-    private ClusterNodesTimeseries nodeTimeseries() { return nodeTimeseries; }
-
-    private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }
-
     /**
      * Returns the predicted max query growth rate per minute as a fraction of the average traffic
      * in the scaling window.
@@ -313,17 +306,6 @@ public class ClusterModel {
         return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
     }
 
-    /** Ideal cpu load must take the application traffic fraction into account. */
-    private double idealCpuLoad() {
-        double queryCpuFraction = queryCpuFraction();
-
-        // Assumptions: 1) Write load is not organic so we should not grow to handle more.
-        //                 (TODO: But allow applications to set their target write rate and size for that)
-        //              2) Write load does not change in BCP scenarios.
-        return queryCpuFraction * 1/growthRateHeadroom() * 1/trafficShiftHeadroom() * idealQueryCpuLoad +
-               (1 - queryCpuFraction) * idealWriteCpuLoad;
-    }
-
     /** Returns the headroom for growth during organic traffic growth as a multiple of current resources. */
     private double growthRateHeadroom() {
         if ( ! nodeRepository.zone().environment().isProduction()) return 1;
@@ -361,78 +343,88 @@ public class ClusterModel {
         return ( (headroom -1 ) * Math.min(1, averageQueryRate().orElse(0) / queryRateGivingFullConfidence) ) + 1;
     }
 
-    /** The estimated fraction of cpu usage which goes to processing queries vs. writes */
-    private double queryCpuFraction() {
-        OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock);
-        if (averageQueryRate().orElse(0) == 0 && writeRate.orElse(0) == 0) return queryCpuFraction(0.5);
-        return queryCpuFraction(averageQueryRate().orElse(0) / (averageQueryRate().orElse(0) + writeRate.orElse(0)));
-    }
+    private class CpuModel {
 
-    private double queryCpuFraction(double queryRateFraction) {
-        double relativeQueryCost = 9; // How much more expensive are queries than writes? TODO: Measure
-        double writeFraction = 1 - queryRateFraction;
-        return queryRateFraction * relativeQueryCost / (queryRateFraction * relativeQueryCost + writeFraction);
-    }
+        /** Ideal cpu load must take the application traffic fraction into account. */
+        double idealLoad() {
+            double queryCpuFraction = queryFraction();
 
-    private double idealMemoryLoad() {
-        if (clusterSpec.type().isContainer()) return idealContainerMemoryLoad;
-        if (clusterSpec.type() == ClusterSpec.Type.admin) return idealContainerMemoryLoad; // Not autoscaled, but ideal shown in console
-        return idealContentMemoryLoad;
-    }
+            // Assumptions: 1) Write load is not organic so we should not grow to handle more.
+            //                 (TODO: But allow applications to set their target write rate and size for that)
+            //              2) Write load does not change in BCP scenarios.
+            return queryCpuFraction * 1/growthRateHeadroom() * 1/trafficShiftHeadroom() * idealQueryCpuLoad +
+                   (1 - queryCpuFraction) * idealWriteCpuLoad;
+        }
 
-    /**
-     * Returns the fraction of memory of the current allocation which is currently consumed by
-     * fixed data structures which take the same amount of space regardless of document volume.
-     */
-    private double fixedMemoryFraction() {
-        if (clusterSpec().type().isContainer()) return 1.0;
-        double fixedMemory = nodeMemoryOverheadGb +
-                             (averageRealMemory() - nodeMemoryOverheadGb) * 0.05; // TODO: Measure actual content node usage
-        return fixedMemory / averageRealMemory();
-    }
+        OptionalDouble costPerQuery() {
+            if (averageQueryRate().isEmpty() || averageQueryRate().getAsDouble() == 0.0) return OptionalDouble.empty();
+            // TODO: Query rate should generally be sampled at the time where we see the peak resource usage
+            int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize();
+            return OptionalDouble.of(peakLoad().cpu()  * cpu.queryFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu()
+                                     / averageQueryRate().getAsDouble() / groupCount());
+        }
 
-    private double averageRealMemory() {
-        if (nodes.isEmpty()) { // we're estimating
-            var initialResources = new CapacityPolicies(nodeRepository).specifyFully(cluster.minResources().nodeResources(),
-                                                                                     clusterSpec,
-                                                                                     application.id());
-            return nodeRepository.resourcesCalculator().requestToReal(initialResources,
-                                                                      nodeRepository.exclusiveAllocation(clusterSpec),
-                                                                      false).memoryGb();
+        /** The estimated fraction of cpu usage which goes to processing queries vs. writes */
+        double queryFraction() {
+            OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock);
+            if (averageQueryRate().orElse(0) == 0 && writeRate.orElse(0) == 0) return queryFraction(0.5);
+            return queryFraction(averageQueryRate().orElse(0) / (averageQueryRate().orElse(0) + writeRate.orElse(0)));
         }
-        else {
-            return nodes.stream()
-                        .mapToDouble(node -> nodeRepository.resourcesCalculator().realResourcesOf(node, nodeRepository).memoryGb())
-                        .average()
-                        .getAsDouble();
+
+        double queryFraction(double queryRateFraction) {
+            double relativeQueryCost = 9; // How much more expensive are queries than writes? TODO: Measure
+            double writeFraction = 1 - queryRateFraction;
+            return queryRateFraction * relativeQueryCost / (queryRateFraction * relativeQueryCost + writeFraction);
         }
-    }
 
-    private double idealDiskLoad() {
-        // Stateless clusters are not expected to consume more disk over time -
-        // if they do it is due to logs which will be rotated away right before the disk is full
-        return clusterSpec.isStateful() ? idealContentDiskLoad : idealContainerDiskLoad;
     }
 
-    /**
-     * Create a cluster model if possible and logs a warning and returns empty otherwise.
-     * This is useful in cases where it's possible to continue without the cluster model,
-     * as QuestDb is known to temporarily fail during reading of data.
-     */
-    public static Optional<ClusterModel> create(NodeRepository nodeRepository,
-                                                Application application,
-                                                ClusterSpec clusterSpec,
-                                                Cluster cluster,
-                                                NodeList clusterNodes,
-                                                MetricsDb metricsDb,
-                                                Clock clock) {
-        try {
-            return Optional.of(new ClusterModel(nodeRepository, application, clusterSpec, cluster, clusterNodes, metricsDb, clock));
+    private class MemoryModel {
+
+        double idealLoad() {
+            if (clusterSpec.type().isContainer()) return idealContainerMemoryLoad;
+            if (clusterSpec.type() == ClusterSpec.Type.admin) return idealContainerMemoryLoad; // Not autoscaled, but ideal shown in console
+            return idealContentMemoryLoad;
+        }
+
+        /**
+         * Returns the fraction of memory of the current allocation which is currently consumed by
+         * fixed data structures which take the same amount of space regardless of document volume.
+         */
+        double fixedFraction() {
+            if (clusterSpec().type().isContainer()) return 1.0;
+            double fixedMemory = nodeMemoryOverheadGb +
+                                 (averageReal() - nodeMemoryOverheadGb) * 0.05; // TODO: Measure actual content node usage
+            return fixedMemory / averageReal();
+        }
+
+        double averageReal() {
+            if (nodes.isEmpty()) { // we're estimating
+                var initialResources = new CapacityPolicies(nodeRepository).specifyFully(cluster.minResources().nodeResources(),
+                                                                                         clusterSpec,
+                                                                                         application.id());
+                return nodeRepository.resourcesCalculator().requestToReal(initialResources,
+                                                                          nodeRepository.exclusiveAllocation(clusterSpec),
+                                                                          false).memoryGb();
+            }
+            else {
+                return nodes.stream()
+                            .mapToDouble(node -> nodeRepository.resourcesCalculator().realResourcesOf(node, nodeRepository).memoryGb())
+                            .average()
+                            .getAsDouble();
+            }
         }
-        catch (Exception e) {
-            log.log(Level.WARNING, "Failed creating a cluster model for " + application + " " + cluster, e);
-            return Optional.empty();
+
+    }
+
+    private class DiskModel {
+
+        double idealLoad() {
+            // Stateless clusters are not expected to consume more disk over time -
+            // if they do it is due to logs which will be rotated away right before the disk is full
+            return clusterSpec.isStateful() ? idealContentDiskLoad : idealContainerDiskLoad;
         }
+
     }
 
 }
author	Valerij Fredriksen <freva@users.noreply.github.com>	2023-06-06 00:01:11 +0200
committer	GitHub <noreply@github.com>	2023-06-06 00:01:11 +0200
commit	4878116a848f0ceff01c49b67657d63a4113789d (patch)
tree	1105d9db2b6a3ad8b8bee8ba8887c4ad35129fa8
parent	edd6b117da2621eb1e71fc91e92e230ab15e3de1 (diff)
parent	76e17a0deb9afd901d6c9157fc7e26e4a5aa8850 (diff)