From c006b1fca074a6bc40245a16ae4630a27580ae3c Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Wed, 17 Mar 2021 18:03:40 +0100 Subject: Adjust growth rate by average rate over window --- .../provision/autoscale/ClusterTimeseries.java | 52 +++++++++++++++------- .../hosted/provision/autoscale/ResourceTarget.java | 15 ++++--- .../provision/restapi/ApplicationSerializer.java | 2 +- .../provision/autoscale/ClusterTimeseriesTest.java | 19 ++++---- 4 files changed, 56 insertions(+), 32 deletions(-) (limited to 'node-repository') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java index 75270f8afc6..f8b6d59f20f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java @@ -9,6 +9,8 @@ import java.time.Instant; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; +import java.util.OptionalDouble; /** * A list of metric snapshots from a cluster, sorted by increasing time (newest last). @@ -45,14 +47,16 @@ public class ClusterTimeseries { return new ClusterTimeseries(cluster, list); } - /** The max query growth rate we can predict from this time-series as a fraction of the current traffic per minute */ - public double maxQueryGrowthRate() { + /** + * The max query growth rate we can predict from this time-series as a fraction of the average traffic in the window + */ + public double maxQueryGrowthRate(Duration window, Clock clock) { if (cachedMaxQueryGrowthRate != null) return cachedMaxQueryGrowthRate; - return cachedMaxQueryGrowthRate = computeMaxQueryGrowthRate(); + return cachedMaxQueryGrowthRate = computeMaxQueryGrowthRate(window, clock); } - private double computeMaxQueryGrowthRate() { + private double computeMaxQueryGrowthRate(Duration window, Clock clock) { if (snapshots.isEmpty()) return 0.1; // Find the period having the highest growth rate, where total growth exceeds 30% increase @@ -82,8 +86,9 @@ public class ClusterTimeseries { else return 0.0; // ... because load is stable } - if (currentQueryRate() == 0) return 0.1; // Growth not expressible as a fraction of the current rate - return maxGrowthRate / currentQueryRate(); + OptionalDouble queryRate = queryRate(window, clock); + if (queryRate.orElse(0) == 0) return 0.1; // Growth not expressible as a fraction of the current rate + return maxGrowthRate / queryRate.getAsDouble(); } /** @@ -94,21 +99,38 @@ public class ClusterTimeseries { if (snapshots.isEmpty()) return 0.5; var max = snapshots.stream().mapToDouble(ClusterMetricSnapshot::queryRate).max().getAsDouble(); if (max == 0) return 1.0; - Instant oldest = clock.instant().minus(window); - var average = snapshots.stream() - .filter(snapshot -> snapshot.at().isAfter(oldest)) - .mapToDouble(snapshot -> snapshot.queryRate()) - .average(); + var average = queryRateTemp(window, clock); if (average.isEmpty()) return 0.5; // No measurements in the relevant time period return average.getAsDouble() / max; } - public double currentQueryRate() { - return queryRateAt(snapshots.size() - 1); + /** Returns the average query rate in the given window, or empty if there are no measurements in it */ + public OptionalDouble queryRateTemp(Duration window, Clock clock) { + Instant oldest = clock.instant().minus(window); + return snapshots.stream() + .filter(snapshot -> snapshot.at().isAfter(oldest)) + .mapToDouble(snapshot -> snapshot.queryRate()) + .average(); + } + + /** Returns the average query rate in the given window, or empty if there are no measurements in it */ + public OptionalDouble queryRate(Duration window, Clock clock) { + if (1==1) return snapshots.isEmpty() ? OptionalDouble.empty() : OptionalDouble.of(queryRateAt(snapshots.size() - 1)); // TODO + Instant oldest = clock.instant().minus(window); + return snapshots.stream() + .filter(snapshot -> snapshot.at().isAfter(oldest)) + .mapToDouble(snapshot -> snapshot.queryRate()) + .average(); } - public double currentWriteRate() { - return writeRateAt(snapshots.size() - 1); + /** Returns the average query rate in the given window, or empty if there are no measurements in it */ + public OptionalDouble writeRate(Duration window, Clock clock) { + if (1==1) return snapshots.isEmpty() ? OptionalDouble.empty() : OptionalDouble.of(writeRateAt(snapshots.size() - 1)); // TODO + Instant oldest = clock.instant().minus(window); + return snapshots.stream() + .filter(snapshot -> snapshot.at().isAfter(oldest)) + .mapToDouble(snapshot -> snapshot.queryRate()) + .average(); } private double queryRateAt(int index) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java index d7dd0fc3197..9f6a4fc77cd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java @@ -5,6 +5,7 @@ import com.yahoo.vespa.hosted.provision.applications.Application; import java.time.Clock; import java.time.Duration; +import java.util.OptionalDouble; /** * A resource target to hit for the allocation optimizer. @@ -78,10 +79,10 @@ public class ResourceTarget { ClusterTimeseries clusterTimeseries, Application application, Clock clock) { - double queryCpuFraction = queryCpuFraction(clusterTimeseries); + double queryCpuFraction = queryCpuFraction(clusterTimeseries, scalingDuration, clock); // What's needed to have headroom for growth during scale-up as a fraction of current resources? - double maxGrowthRate = clusterTimeseries.maxQueryGrowthRate(); // in fraction per minute of the current traffic + double maxGrowthRate = clusterTimeseries.maxQueryGrowthRate(scalingDuration, clock); // in fraction per minute of the current traffic double growthRateHeadroom = 1 + maxGrowthRate * scalingDuration.toMinutes(); // Cap headroom at 10% above the historical observed peak double fractionOfMax = clusterTimeseries.queryFractionOfMax(scalingDuration, clock); @@ -106,11 +107,11 @@ public class ResourceTarget { (1 - queryCpuFraction) * idealWriteCpuLoad(); } - private static double queryCpuFraction(ClusterTimeseries clusterTimeseries) { - double queryRate = clusterTimeseries.currentQueryRate(); - double writeRate = clusterTimeseries.currentWriteRate(); - if (queryRate == 0 && writeRate == 0) return queryCpuFraction(0.5); - return queryCpuFraction(queryRate / (queryRate + writeRate)); + private static double queryCpuFraction(ClusterTimeseries clusterTimeseries, Duration scalingDuration, Clock clock) { + OptionalDouble queryRate = clusterTimeseries.queryRate(scalingDuration, clock); + OptionalDouble writeRate = clusterTimeseries.writeRate(scalingDuration, clock); + if (queryRate.orElse(0) == 0 && writeRate.orElse(0) == 0) return queryCpuFraction(0.5); + return queryCpuFraction(queryRate.orElse(0) / (queryRate.orElse(0) + writeRate.orElse(0))); } private static double queryCpuFraction(double queryFraction) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java index 90b898640cf..8d8d7e01049 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java @@ -76,7 +76,7 @@ public class ApplicationSerializer { scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray("scalingEvents")); clusterObject.setString("autoscalingStatus", cluster.autoscalingStatus()); clusterObject.setLong("scalingDuration", scalingDuration.toMillis()); - clusterObject.setDouble("maxQueryGrowthRate", clusterTimeseries.maxQueryGrowthRate()); + clusterObject.setDouble("maxQueryGrowthRate", clusterTimeseries.maxQueryGrowthRate(scalingDuration, metricsDb.clock())); clusterObject.setDouble("currentQueryFractionOfMax", clusterTimeseries.queryFractionOfMax(scalingDuration, metricsDb.clock())); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseriesTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseriesTest.java index 9a08e7b3279..988edb71684 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseriesTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseriesTest.java @@ -22,22 +22,23 @@ public class ClusterTimeseriesTest { @Test public void test_empty() { + ManualClock clock = new ManualClock(); var timeseries = new ClusterTimeseries(cluster, List.of()); - assertEquals(0.1, timeseries.maxQueryGrowthRate(), delta); + assertEquals(0.1, timeseries.maxQueryGrowthRate(Duration.ofMinutes(5), clock), delta); } @Test public void test_constant_rate_short() { var clock = new ManualClock(); var timeseries = new ClusterTimeseries(cluster, queryRate(10, clock, t -> 50.0)); - assertEquals(0.1, timeseries.maxQueryGrowthRate(), delta); + assertEquals(0.1, timeseries.maxQueryGrowthRate(Duration.ofMinutes(5), clock), delta); } @Test public void test_constant_rate_long() { var clock = new ManualClock(); var timeseries = new ClusterTimeseries(cluster, queryRate(10000, clock, t -> 50.0)); - assertEquals(0.0, timeseries.maxQueryGrowthRate(), delta); + assertEquals(0.0, timeseries.maxQueryGrowthRate(Duration.ofMinutes(5), clock), delta); } @Test @@ -47,7 +48,7 @@ public class ClusterTimeseriesTest { snapshots.addAll(queryRate(1000, clock, t -> 50.0)); snapshots.addAll(queryRate(10, clock, t -> 400.0)); snapshots.addAll(queryRate(1000, clock, t -> 50.0)); - assertEquals((400-50)/5.0/50.0, new ClusterTimeseries(cluster, snapshots).maxQueryGrowthRate(), delta); + assertEquals((400-50)/5.0/50.0, new ClusterTimeseries(cluster, snapshots).maxQueryGrowthRate(Duration.ofMinutes(5), clock), delta); } @Test @@ -61,7 +62,7 @@ public class ClusterTimeseriesTest { snapshots.addAll(queryRate(1000, clock, t -> 50.0)); snapshots.addAll(queryRate(10, clock, t -> 800.0)); snapshots.addAll(queryRate(1000, clock, t -> 50.0)); - assertEquals((800-50)/5.0/50.0, new ClusterTimeseries(cluster, snapshots).maxQueryGrowthRate(), delta); + assertEquals((800-50)/5.0/50.0, new ClusterTimeseries(cluster, snapshots).maxQueryGrowthRate(Duration.ofMinutes(5), clock), delta); } @Test @@ -70,7 +71,7 @@ public class ClusterTimeseriesTest { var snapshots = new ArrayList(); snapshots.addAll(queryRate(100, clock, t -> (double)t)); snapshots.addAll(queryRate(100, clock, t -> 100.0 - t)); - assertEquals(1/5.0, new ClusterTimeseries(cluster, snapshots).maxQueryGrowthRate(), delta); + assertEquals(1/5.0, new ClusterTimeseries(cluster, snapshots).maxQueryGrowthRate(Duration.ofMinutes(1), clock), delta); } @Test @@ -78,7 +79,7 @@ public class ClusterTimeseriesTest { var clock = new ManualClock(); var timeseries = new ClusterTimeseries(cluster, queryRate(10000, clock, t -> 10.0 + 100.0 * Math.sin(t))); - assertEquals(0.26, timeseries.maxQueryGrowthRate(), delta); + assertEquals(0.26, timeseries.maxQueryGrowthRate(Duration.ofMinutes(5), clock), delta); } @Test @@ -86,7 +87,7 @@ public class ClusterTimeseriesTest { var clock = new ManualClock(); var timeseries = new ClusterTimeseries(cluster, queryRate(10000, clock, t -> 1000.0 + 10.0 * Math.sin(t))); - assertEquals(0.0, timeseries.maxQueryGrowthRate(), delta); + assertEquals(0.0, timeseries.maxQueryGrowthRate(Duration.ofMinutes(5), clock), delta); } @Test @@ -94,7 +95,7 @@ public class ClusterTimeseriesTest { var clock = new ManualClock(); var timeseries = new ClusterTimeseries(cluster, queryRate(10000, clock, t -> 10.0 + 100.0 * Math.sin(t) + 80.0 * Math.sin(10 * t)) ); - assertEquals(1.765, timeseries.maxQueryGrowthRate(), delta); + assertEquals(1.765, timeseries.maxQueryGrowthRate(Duration.ofMinutes(5), clock), delta); } private List queryRate(int count, ManualClock clock, IntFunction rate) { -- cgit v1.2.3