diff options
author | Jon Bratseth <bratseth@oath.com> | 2021-06-02 23:11:34 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-02 23:11:34 +0200 |
commit | 6d7ba4368a8d81247fa66bfe60557dadaffb4108 (patch) | |
tree | b21118a7b63851f726b121705d9fdfa6506df5e1 | |
parent | a8b3591b02fa02520ceb1d00833215e4146a7a6b (diff) | |
parent | 5e9f864dc3f5eb115173d6123abe6febede8064a (diff) |
Merge pull request #18097 from vespa-engine/bratseth/use-proton-metrics
Use proton metrics for memory and disk utilization
3 files changed, 96 insertions, 28 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/AutoscalingMetrics.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/AutoscalingMetrics.java index 6f467b21535..e2aa325c380 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/AutoscalingMetrics.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/AutoscalingMetrics.java @@ -19,19 +19,31 @@ public class AutoscalingMetrics { private static MetricSet create() { List<String> metrics = new ArrayList<>(); + metrics.add("cpu.util"); - metrics.add("mem.util"); - metrics.add("disk.util"); + + // Memory util + metrics.add("mem.util"); // node level - default + metrics.add("content.proton.resource_usage.memory.average"); // better for content as it is the basis for blocking + + // Disk util + metrics.add("disk.util"); // node level -default + metrics.add("content.proton.resource_usage.disk.average"); // better for content as it is the basis for blocking + metrics.add("application_generation"); + metrics.add("in_service"); + // Query rate metrics.add("queries.rate"); // container metrics.add("content.proton.documentdb.matching.queries.rate"); // content + // Write rate metrics.add("feed.http-requests.rate"); // container metrics.add("vds.filestor.alldisks.allthreads.put.sum.count.rate"); // content metrics.add("vds.filestor.alldisks.allthreads.remove.sum.count.rate"); // content metrics.add("vds.filestor.alldisks.allthreads.update.sum.count.rate"); // content + return new MetricSet("autoscaling", toMetrics(metrics)); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java index 665155ecf68..3a037efed98 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java @@ -112,64 +112,112 @@ public class MetricsResponse { private enum Metric { cpu { // a node resource + + @Override public List<String> metricResponseNames() { return List.of("cpu.util"); } - double computeFinal(List<Double> values) { - return values.stream().mapToDouble(v -> v).average().orElse(0) / 100; // % to ratio + + @Override + double computeFinal(ListMap<String, Double> values) { + return values.values().stream().flatMap(List::stream).mapToDouble(v -> v).average().orElse(0) / 100; // % to ratio } + }, memory { // a node resource - public List<String> metricResponseNames() { return List.of("mem.util"); } - double computeFinal(List<Double> values) { - return values.stream().mapToDouble(v -> v).average().orElse(0) / 100; // % to ratio + + @Override + public List<String> metricResponseNames() { + return List.of("content.proton.resource_usage.memory.average", "mem.util"); } + + @Override + double computeFinal(ListMap<String, Double> values) { + var valueList = values.get("content.proton.resource_usage.memory.average"); // prefer over mem.util + if ( ! valueList.isEmpty()) return valueList.get(0); + + valueList = values.get("mem.util"); + if ( ! valueList.isEmpty()) return valueList.get(0) / 100; // % to ratio + + return 0; + } + }, disk { // a node resource - public List<String> metricResponseNames() { return List.of("disk.util"); } - double computeFinal(List<Double> values) { - return values.stream().mapToDouble(v -> v).average().orElse(0) / 100; // % to ratio + + @Override + public List<String> metricResponseNames() { + return List.of("content.proton.resource_usage.disk.average", "disk.util"); } + + @Override + double computeFinal(ListMap<String, Double> values) { + var valueList = values.get("content.proton.resource_usage.disk.average"); // prefer over mem.util + if ( ! valueList.isEmpty()) return valueList.get(0); + + valueList = values.get("disk.util"); + if ( ! valueList.isEmpty()) return valueList.get(0) / 100; // % to ratio + + return 0; + } + }, generation { // application config generation active on the node + + @Override public List<String> metricResponseNames() { return List.of("application_generation"); } - double computeFinal(List<Double> values) { - return values.stream().mapToDouble(v -> v).min().orElse(-1); + + @Override + double computeFinal(ListMap<String, Double> values) { + return values.values().stream().flatMap(List::stream).mapToDouble(v -> v).min().orElse(-1); } + }, inService { + + @Override public List<String> metricResponseNames() { return List.of("in_service"); } - double computeFinal(List<Double> values) { + + @Override + double computeFinal(ListMap<String, Double> values) { // Really a boolean. Default true. If any is oos -> oos. - return values.stream().anyMatch(v -> v == 0) ? 0 : 1; + return values.values().stream().flatMap(List::stream).anyMatch(v -> v == 0) ? 0 : 1; } + }, queryRate { // queries per second + + @Override public List<String> metricResponseNames() { return List.of("queries.rate", "content.proton.documentdb.matching.queries.rate"); } + }, writeRate { // writes per second + + @Override public List<String> metricResponseNames() { return List.of("feed.http-requests.rate", "vds.filestor.alldisks.allthreads.put.sum.count.rate", "vds.filestor.alldisks.allthreads.remove.sum.count.rate", "vds.filestor.alldisks.allthreads.update.sum.count.rate"); } + }; - /** The name of this metric as emitted from its source */ + /** + * The names of this metric as emitted from its source. + * A map of the values of these names which were present in the response will + * be provided to computeFinal to decide on a single value. + */ public abstract List<String> metricResponseNames(); - double computeFinal(List<Double> values) { return values.stream().mapToDouble(v -> v).sum(); } + /** Computes the final metric value */ + double computeFinal(ListMap<String, Double> values) { + return values.values().stream().flatMap(List::stream).mapToDouble(v -> v).sum(); + } public double from(ListMap<String, Double> metricValues) { - // Multiple metric names may contribute to the same logical metric. - // Usually one per service, but we aggregate here to not require that. - List<Double> values = new ArrayList<>(1); - for (String metricName : metricResponseNames()) { - List<Double> valuesForName = metricValues.get(metricName); - if (valuesForName == null) continue; - values.addAll(valuesForName); - } + ListMap<String, Double> values = new ListMap<>(metricValues); + values.keySet().retainAll(metricResponseNames()); return computeFinal(values); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java index 5f1a36e7b56..07f8fa41d24 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java @@ -56,9 +56,9 @@ public class MetricsV2MetricsFetcherTest { assertEquals(0.820, values.get(0).getSecond().load().disk(), delta); assertEquals("host-2.yahoo.com", values.get(1).getFirst()); - assertEquals(0.2, values.get(1).getSecond().load().cpu(), delta); - assertEquals(0.0, values.get(1).getSecond().load().memory(), delta); - assertEquals(0.4, values.get(1).getSecond().load().disk(), delta); + assertEquals(0.0, values.get(1).getSecond().load().cpu(), delta); + assertEquals(0.35, values.get(1).getSecond().load().memory(), delta); + assertEquals(0.45, values.get(1).getSecond().load().disk(), delta); assertEquals(45.0, values.get(1).getSecond().queryRate(), delta); } @@ -134,7 +134,7 @@ public class MetricsV2MetricsFetcherTest { " \"metrics\": [\n" + " {\n" + " \"values\": {\n" + - " \"cpu.util\": 20,\n" + + " \"mem.util\": 30,\n" + " \"disk.util\": 40\n" + " },\n" + " \"dimensions\": {\n" + @@ -161,6 +161,14 @@ public class MetricsV2MetricsFetcherTest { " },\n" + " {\n" + " \"values\": {\n" + + " \"content.proton.resource_usage.memory.average\": 0.35,\n" + + " \"content.proton.resource_usage.disk.average\": 0.45\n" + + " },\n" + + " \"dimensions\": {\n" + + " }\n" + + " },\n" + + " {\n" + + " \"values\": {\n" + " \"content.proton.documentdb.matching.queries.rate\": 13.5\n" + " },\n" + " \"dimensions\": {\n" + |