diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2023-10-30 11:02:26 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-30 11:02:26 +0100 |
commit | 8c7f1f8febf12a5b8468819ba9ff1ede014143d1 (patch) | |
tree | 992e5ae415cc0bdab039456f09ad2e31c83203fc | |
parent | b74e8dc450f9b14e53225775692ddd35e6ee0d5b (diff) | |
parent | 2137e53839cf0b7649b26ff5c2ebf7c598f0daf3 (diff) |
Merge pull request #29152 from vespa-engine/revert-29137-olaa/infrastructure-metric-sets-prometheus
Revert "Allow filtering prometheus metrics" MERGEOK
5 files changed, 59 insertions, 94 deletions
diff --git a/container-core/src/main/java/com/yahoo/container/jdisc/state/MetricsPacketsHandler.java b/container-core/src/main/java/com/yahoo/container/jdisc/state/MetricsPacketsHandler.java index b94fadec213..13584082ab8 100644 --- a/container-core/src/main/java/com/yahoo/container/jdisc/state/MetricsPacketsHandler.java +++ b/container-core/src/main/java/com/yahoo/container/jdisc/state/MetricsPacketsHandler.java @@ -28,6 +28,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -107,7 +108,7 @@ public class MetricsPacketsHandler extends AbstractRequestHandler { return getMetricsArray(metricSetId); } if ("prometheus".equals(format)) { - return buildPrometheusOutput(metricSetId); + return buildPrometheusOutput(); } String output = getAllMetricsPackets(metricSetId) + "\n"; @@ -132,9 +133,8 @@ public class MetricsPacketsHandler extends AbstractRequestHandler { /** * Returns metrics in Prometheus format */ - private byte[] buildPrometheusOutput(String metricSetId) throws IOException { - var metrics = getPacketsForSnapshot(getSnapshot(), metricSetId, applicationName, timer.currentTimeMillis()); - return PrometheusHelper.buildPrometheusOutput(metrics, timer.currentTimeMillis()); + private byte[] buildPrometheusOutput() throws IOException { + return PrometheusHelper.buildPrometheusOutput(getSnapshot(), applicationName, timer.currentTimeMillis()); } private static String jsonToString(JsonNode jsonObject) throws JsonProcessingException { diff --git a/container-core/src/main/java/com/yahoo/container/jdisc/state/PrometheusHelper.java b/container-core/src/main/java/com/yahoo/container/jdisc/state/PrometheusHelper.java index b38bcfebb48..11ca9e228ec 100644 --- a/container-core/src/main/java/com/yahoo/container/jdisc/state/PrometheusHelper.java +++ b/container-core/src/main/java/com/yahoo/container/jdisc/state/PrometheusHelper.java @@ -1,56 +1,82 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.container.jdisc.state; -import com.fasterxml.jackson.databind.JsonNode; - import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.util.List; +import java.io.OutputStream; +import java.util.Map; +import static com.yahoo.container.jdisc.state.JsonUtil.sanitizeDouble; /** * @author olaa */ public class PrometheusHelper { - private static final String HELP_LINE = "# HELP %s\n# TYPE %s untyped\n"; + private static final String HELP_LINE = "# HELP %s \n# TYPE %s untyped\n"; private static final String METRIC_LINE = "%s{%s} %s %d\n"; - private static final String DIMENSION_KEY = "dimensions"; - private static final String METRIC_KEY = "metrics"; - private static final String APPLICATION_KEY = "application"; - protected static byte[] buildPrometheusOutput(List<JsonNode> metrics, long timestamp) throws IOException { + protected static byte[] buildPrometheusOutput(MetricSnapshot metricSnapshot, String application, long timestamp) throws IOException { var outputStream = new ByteArrayOutputStream(); - for (var metric : metrics) { - var metricDimensions = metric.get(DIMENSION_KEY); + for (Map.Entry<MetricDimensions, MetricSet> snapshotEntry : metricSnapshot) { + var metricDimensions = snapshotEntry.getKey(); + var metricSet = snapshotEntry.getValue(); + var dimensionBuilder = new StringBuilder(); - for (var it = metricDimensions.fieldNames(); it.hasNext(); ) { - var dimension = it.next(); + for (var dimension : metricDimensions) { dimensionBuilder - .append(sanitize(dimension)) + .append(sanitize(dimension.getKey())) .append("=\"") - .append(metricDimensions.get(dimension).asText()) + .append(dimension.getValue()) .append("\","); } - var application = metric.get(APPLICATION_KEY).asText(); dimensionBuilder.append("vespa_service=\"").append(application).append("\","); var dimensions = dimensionBuilder.toString(); - var metricValues = metric.get(METRIC_KEY); - for (var it = metricValues.fieldNames(); it.hasNext(); ) { - var metricName = it.next(); - var metricVal = metricValues.get(metricName).numberValue(); - outputStream.write(getMetricLines(sanitize(metricName), dimensions, metricVal, timestamp)); + + for (var metric : metricSet) { + var metricName = metric.getKey(); + var metricValue = metric.getValue(); + + if (metricValue instanceof CountMetric) { + var sanitizedMetricName = getSanitizedMetricName(metricName, "count"); + var value = ((CountMetric) metricValue).getCount(); + outputStream.write(getMetricLines(sanitizedMetricName, dimensions, value, timestamp)); + } else if (metricValue instanceof GaugeMetric) { + var gauge = (GaugeMetric) metricValue; + writeGaugeMetrics(outputStream, metricName, gauge, dimensions, timestamp); + } } } return outputStream.toByteArray(); } + private static void writeGaugeMetrics(OutputStream outputStream, String metricName, GaugeMetric gaugeMetric, String dimensions, long timestamp) throws IOException { + var sanitizedMetricName = getSanitizedMetricName(metricName, "last"); + var value = sanitizeDouble(gaugeMetric.getLast()); + outputStream.write(getMetricLines(sanitizedMetricName, dimensions, value, timestamp)); + + /* + For now - only push "last" value - to limit metric volume + sanitizedMetricName = getSanitizedMetricName(metricName, "average"); + value = sanitizeDouble(gaugeMetric.getAverage()); + outputStream.write(getMetricLines(sanitizedMetricName, dimensions, value, timestamp)); + + sanitizedMetricName = getSanitizedMetricName(metricName, "max"); + value = sanitizeDouble(gaugeMetric.getMax()); + outputStream.write(getMetricLines(sanitizedMetricName, dimensions, value, timestamp)); + */ + } + private static byte[] getMetricLines(String metricName, String dimensions, Number value, long timestamp) { return (String.format(HELP_LINE, metricName, metricName) + String.format(METRIC_LINE, metricName, dimensions, value, timestamp)).getBytes(); } + private static String getSanitizedMetricName(String metricName, String suffix) { + return sanitize(metricName) + "_" + suffix; + } + private static String sanitize(String name) { return name.replaceAll("([-.])", "_"); } diff --git a/container-core/src/test/java/com/yahoo/container/jdisc/state/MetricsPacketsHandlerTest.java b/container-core/src/test/java/com/yahoo/container/jdisc/state/MetricsPacketsHandlerTest.java index 160d6452473..807e58918da 100644 --- a/container-core/src/test/java/com/yahoo/container/jdisc/state/MetricsPacketsHandlerTest.java +++ b/container-core/src/test/java/com/yahoo/container/jdisc/state/MetricsPacketsHandlerTest.java @@ -7,8 +7,6 @@ import com.yahoo.container.jdisc.RequestHandlerTestDriver; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -148,22 +146,21 @@ public class MetricsPacketsHandlerTest extends StateHandlerTestBase { } @Test - public void prometheus_metrics() throws Exception { + public void prometheus_metrics() { var context = StateMetricContext.newInstance(Map.of("dim-1", "value1")); var snapshot = new MetricSnapshot(); snapshot.set(context, "gauge.metric", 0.2); snapshot.add(context, "counter.metric", 5); - snapshot.add(context, "configserver.requests", 120); - // Infrastructure set only contains max and average - snapshot.set(context, "lockAttempt.lockedLoad", 500); snapshotProvider.setSnapshot(snapshot); - var response = requestAsString("http://localhost/metrics-packets?format=prometheus"); - var expectedResponse = readFile("prometheus-unfiltered"); - assertEquals(expectedResponse, response); - - response = requestAsString("http://localhost/metrics-packets?format=prometheus&metric-set=infrastructure"); - expectedResponse = readFile("prometheus-filtered"); + var expectedResponse = """ + # HELP gauge_metric_last\s + # TYPE gauge_metric_last untyped + gauge_metric_last{dim_1="value1",vespa_service="state-handler-test-base",} 0.2 0 + # HELP counter_metric_count\s + # TYPE counter_metric_count untyped + counter_metric_count{dim_1="value1",vespa_service="state-handler-test-base",} 5 0 + """; assertEquals(expectedResponse, response); } @@ -265,8 +262,4 @@ public class MetricsPacketsHandlerTest extends StateHandlerTestBase { snapshotProvider.setSnapshot(snapshot); } - private String readFile(String fileName) throws Exception { - return Files.readString(Path.of("src/test/resources/metrics-packets-handler-responses/" + fileName + ".txt")); - } - } diff --git a/container-core/src/test/resources/metrics-packets-handler-responses/prometheus-filtered.txt b/container-core/src/test/resources/metrics-packets-handler-responses/prometheus-filtered.txt deleted file mode 100644 index 73b825b2712..00000000000 --- a/container-core/src/test/resources/metrics-packets-handler-responses/prometheus-filtered.txt +++ /dev/null @@ -1,12 +0,0 @@ -# HELP configserver_requests_count -# TYPE configserver_requests_count untyped -configserver_requests_count{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 120 0 -# HELP lockAttempt_lockedLoad_average -# TYPE lockAttempt_lockedLoad_average untyped -lockAttempt_lockedLoad_average{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 500.0 0 -# HELP lockAttempt_lockedLoad_max -# TYPE lockAttempt_lockedLoad_max untyped -lockAttempt_lockedLoad_max{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 500.0 0 -# HELP alive -# TYPE alive untyped -alive{vespaVersion="8.0.0",vespa_service="host_life",} 1 0 diff --git a/container-core/src/test/resources/metrics-packets-handler-responses/prometheus-unfiltered.txt b/container-core/src/test/resources/metrics-packets-handler-responses/prometheus-unfiltered.txt deleted file mode 100644 index 1fa14284bf5..00000000000 --- a/container-core/src/test/resources/metrics-packets-handler-responses/prometheus-unfiltered.txt +++ /dev/null @@ -1,42 +0,0 @@ -# HELP gauge_metric_average -# TYPE gauge_metric_average untyped -gauge_metric_average{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 0.2 0 -# HELP gauge_metric_last -# TYPE gauge_metric_last untyped -gauge_metric_last{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 0.2 0 -# HELP gauge_metric_max -# TYPE gauge_metric_max untyped -gauge_metric_max{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 0.2 0 -# HELP gauge_metric_min -# TYPE gauge_metric_min untyped -gauge_metric_min{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 0.2 0 -# HELP gauge_metric_sum -# TYPE gauge_metric_sum untyped -gauge_metric_sum{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 0.2 0 -# HELP gauge_metric_count -# TYPE gauge_metric_count untyped -gauge_metric_count{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 1 0 -# HELP configserver_requests_count -# TYPE configserver_requests_count untyped -configserver_requests_count{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 120 0 -# HELP lockAttempt_lockedLoad_average -# TYPE lockAttempt_lockedLoad_average untyped -lockAttempt_lockedLoad_average{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 500.0 0 -# HELP lockAttempt_lockedLoad_last -# TYPE lockAttempt_lockedLoad_last untyped -lockAttempt_lockedLoad_last{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 500.0 0 -# HELP lockAttempt_lockedLoad_max -# TYPE lockAttempt_lockedLoad_max untyped -lockAttempt_lockedLoad_max{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 500.0 0 -# HELP lockAttempt_lockedLoad_min -# TYPE lockAttempt_lockedLoad_min untyped -lockAttempt_lockedLoad_min{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 500.0 0 -# HELP lockAttempt_lockedLoad_sum -# TYPE lockAttempt_lockedLoad_sum untyped -lockAttempt_lockedLoad_sum{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 500.0 0 -# HELP lockAttempt_lockedLoad_count -# TYPE lockAttempt_lockedLoad_count untyped -lockAttempt_lockedLoad_count{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 1 0 -# HELP counter_metric_count -# TYPE counter_metric_count untyped -counter_metric_count{dim_1="value1",host="some-hostname",vespa_service="state-handler-test-base",} 5 0 |