aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-02-20 13:35:42 +0100
committerGitHub <noreply@github.com>2023-02-20 13:35:42 +0100
commitea2aaf074611153783b0fbfd10659af39e3566a3 (patch)
tree9ee5d2c2cf71729122f6593fc57e8b33cfa20210
parent8895f9401559fe00dbeacfc9eee9f89d4dd8374b (diff)
parent3469c30677bc5f66b09dbb24426db2a33c0f4fbe (diff)
Merge pull request #26111 from vespa-engine/bratseth/autoscaling-measurements
Bratseth/autoscaling measurements
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java35
2 files changed, 5 insertions, 32 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index a30c9b588c2..264664f91b2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -119,7 +119,7 @@ public class ClusterModel {
/** Returns the relative load adjustment that should be made to this cluster given available measurements. */
public Load loadAdjustment() {
- if (nodeTimeseries().isEmpty()) return Load.one();
+ if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data
Load adjustment = peakLoad().divide(idealLoad());
if (! safeToScaleDown())
adjustment = adjustment.map(v -> v < 1 ? 1 : v);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
index b86a24af5c9..0be4175c2c1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
@@ -28,10 +28,10 @@ public class ClusterNodesTimeseries {
public ClusterNodesTimeseries(Duration period, Cluster cluster, NodeList clusterNodes, MetricsDb db) {
this.clusterNodes = clusterNodes;
- // See warmupSeconds*4 into the past to see any generation change in it
+ // See warmupDuration*4 into the past to see any generation change in it.
// If none can be detected we assume the node is new/was down.
// If either this is the case, or there is a generation change, we ignore
- // the first warmupWindow metrics
+ // the first warmupWindow metrics.
var timeseries = db.getNodeTimeseries(period.plus(warmupDuration.multipliedBy(4)), clusterNodes);
if (cluster.lastScalingEvent().isPresent()) {
long currentGeneration = cluster.lastScalingEvent().get().generation();
@@ -52,42 +52,15 @@ public class ClusterNodesTimeseries {
}
/** Returns the average number of measurements per node */
- public int measurementsPerNode() {
+ public double measurementsPerNode() {
if (clusterNodes.size() == 0) return 0;
int measurementCount = timeseries.stream().mapToInt(m -> m.size()).sum();
- return measurementCount / clusterNodes.size();
+ return (double)measurementCount / clusterNodes.size();
}
/** Returns the number of nodes measured in this */
public int nodesMeasured() { return timeseries.size(); }
- /** Returns the average load after the given instant */
- public Load averageLoad() {
- Load total = Load.zero();
- int count = 0;
- for (var nodeTimeseries : timeseries) {
- for (var snapshot : nodeTimeseries.asList()) {
- total = total.add(snapshot.load());
- count++;
- }
- }
- return total.divide(count);
- }
-
- /** Returns average of the latest load reading from each node */
- public Load currentLoad() {
- Load total = Load.zero();
- int count = 0;
- for (var nodeTimeseries : timeseries) {
- Optional<NodeMetricSnapshot> last = nodeTimeseries.last();
- if (last.isEmpty()) continue;
-
- total = total.add(last.get().load());
- count++;
- }
- return total.divide(count);
- }
-
/**
* Returns the "peak load" in this: Which is for each load dimension,
* the average of the highest reading for that dimension on each node.