summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-04-26 22:05:45 +0200
committerGitHub <noreply@github.com>2021-04-26 22:05:45 +0200
commitcb860b594152af5fdf22ec7f8db6d96299daffdb (patch)
treec159662912db7c66cee6d81851b3bb4d32343638
parentc742485abd7ebaa5d325a6b59c9bffc393b907e6 (diff)
parent448becd3307eabea4a7d30c82ca7dfa8b9034975 (diff)
Merge pull request #17609 from vespa-engine/bratseth/max-duration
Set a max scaling duration
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java23
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java9
2 files changed, 22 insertions, 10 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index e3622c8f076..f5eb67f0979 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -150,7 +150,8 @@ public class ClusterModel {
for (ScalingEvent event : cluster.scalingEvents()) {
if (event.duration().isEmpty()) continue;
completedEventCount++;
- totalDuration = totalDuration.plus(event.duration().get());
+ // Assume we have missed timely recording completion if it is longer than 4 days
+ totalDuration = totalDuration.plus(maximum(Duration.ofDays(4), event.duration().get()));
}
if (completedEventCount == 0) { // Use defaults
@@ -160,13 +161,25 @@ public class ClusterModel {
else {
Duration predictedDuration = totalDuration.dividedBy(completedEventCount);
- // TODO: Remove when we have reliable completion for content clusters
- if (clusterSpec.isStateful() && predictedDuration.minus(Duration.ofHours(12)).isNegative())
- return Duration.ofHours(12);
+ if ( clusterSpec.isStateful() ) // TODO: Remove when we have reliable completion for content clusters
+ predictedDuration = minimum(Duration.ofHours(12), predictedDuration);
+
+ predictedDuration = minimum(Duration.ofMinutes(5), predictedDuration);
- if (predictedDuration.minus(Duration.ofMinutes(5)).isNegative()) return Duration.ofMinutes(5); // minimum
return predictedDuration;
}
}
+ private static Duration minimum(Duration smallestAllowed, Duration duration) {
+ if (duration.minus(smallestAllowed).isNegative())
+ return smallestAllowed;
+ return duration;
+ }
+
+ private static Duration maximum(Duration largestAllowed, Duration duration) {
+ if ( ! duration.minus(largestAllowed).isNegative())
+ return largestAllowed;
+ return duration;
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index 17d33ef501c..7da6e0d3ebe 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -111,16 +111,15 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
if (clusterNodes.retired().stream()
.anyMatch(node -> node.history().hasEventAt(History.Event.Type.retired, event.at())))
return cluster;
- // - 2. all nodes have switched to the right config generation
+ // - 2. all nodes have switched to the right config generation (currently only measured on containers)
for (var nodeTimeseries : nodeRepository().metricsDb().getNodeTimeseries(Duration.between(event.at(), clock().instant()),
clusterNodes)) {
- Optional<NodeMetricSnapshot> firstOnNewGeneration =
+ Optional<NodeMetricSnapshot> onNewGeneration =
nodeTimeseries.asList().stream()
- .filter(snapshot -> snapshot.generation() >= event.generation()).findFirst();
- if (firstOnNewGeneration.isEmpty()) return cluster; // Not completed
+ .filter(snapshot -> snapshot.generation() >= event.generation()).findAny();
+ if (onNewGeneration.isEmpty()) return cluster; // Not completed
}
-
// Set the completion time to the instant we notice completion.
Instant completionTime = nodeRepository().clock().instant();
return cluster.with(event.withCompletion(completionTime));