diff options
author | gjoranv <gv@verizonmedia.com> | 2020-01-03 00:13:21 +0100 |
---|---|---|
committer | gjoranv <gv@verizonmedia.com> | 2020-01-03 11:25:32 +0100 |
commit | 0c301c98d930a5e0a48b62bf8a5eda6abfc58257 (patch) | |
tree | 4d6e6788104441cddd8fbc2373a7db24432e7b72 /metrics-proxy | |
parent | 869e9e83274e037cae548ac5eb3c72881e90859a (diff) |
Properly support multiple consumers when caching metrics.
Diffstat (limited to 'metrics-proxy')
2 files changed, 72 insertions, 14 deletions
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java index 8987ca25be1..d1ff47f3c3f 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java @@ -16,7 +16,9 @@ import java.io.IOException; import java.time.Clock; import java.time.Duration; import java.time.Instant; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.logging.Logger; import static ai.vespa.metricsproxy.http.ValuesFetcher.DEFAULT_PUBLIC_CONSUMER_ID; @@ -24,9 +26,12 @@ import static com.yahoo.log.LogLevel.DEBUG; import static java.util.Collections.emptyList; /** - * This class is used to retrieve metrics from a single Vespa node over http. - * Keeps and serves a snapshot of the node's metrics, with a fixed TTL, to - * avoid unnecessary load on metrics proxies. + * Retrieves metrics from a single Vespa node over http. To avoid unnecessary load on metrics + * proxies, a cached snapshot per consumer is retained and served for a fixed TTL period. + * Upon failure to retrieve metrics, an empty snapshot is cached. + * + * This class assumes that the consumer id is a valid and existing one, which is already + * ensured by the {@link ApplicationMetricsHandler}. * * @author gjoranv */ @@ -39,8 +44,7 @@ public class NodeMetricsClient { private final HttpClient httpClient; private final Clock clock; - private List<MetricsPacket.Builder> metrics = emptyList(); - private Instant metricsTimestamp = Instant.EPOCH; + private final Map<ConsumerId, Snapshot> snapshots = new HashMap<>(); private long snapshotsRetrieved = 0; public NodeMetricsClient(HttpClient httpClient, Node node, Clock clock) { @@ -54,26 +58,29 @@ public class NodeMetricsClient { } public List<MetricsPacket.Builder> getMetrics(ConsumerId consumer) { - if (Instant.now(clock).isAfter(metricsTimestamp.plus(METRICS_TTL))) { - retrieveMetrics(consumer); + var currentSnapshot = snapshots.get(consumer); + if (currentSnapshot == null || currentSnapshot.isStale(clock) || currentSnapshot.metrics.isEmpty()) { + Snapshot snapshot = retrieveMetrics(consumer); + snapshots.put(consumer, snapshot); + return snapshot.metrics; + } else { + return snapshots.get(consumer).metrics; } - return metrics; } - private void retrieveMetrics(ConsumerId consumer) { + private Snapshot retrieveMetrics(ConsumerId consumer) { String metricsUri = node.metricsUri(consumer).toString(); log.log(DEBUG, () -> "Retrieving metrics from host " + metricsUri); try { String metricsJson = httpClient.execute(new HttpGet(metricsUri), new BasicResponseHandler()); - metrics = GenericJsonUtil.toMetricsPackets(metricsJson); - metricsTimestamp = Instant.now(clock); + var newMetrics = GenericJsonUtil.toMetricsPackets(metricsJson); snapshotsRetrieved ++; - log.log(DEBUG, () -> "Successfully retrieved " + metrics.size() + " metrics packets from " + metricsUri); - + log.log(DEBUG, () -> "Successfully retrieved " + newMetrics.size() + " metrics packets from " + metricsUri); + return new Snapshot(Instant.now(clock), newMetrics); } catch (IOException e) { log.warning("Unable to retrieve metrics from " + metricsUri + ": " + Exceptions.toMessageString(e)); - metrics = emptyList(); + return new Snapshot(Instant.now(clock), emptyList()); } } @@ -81,4 +88,23 @@ public class NodeMetricsClient { return snapshotsRetrieved; } + + /** + * Convenience class for storing a metrics snapshot with its timestamp. + */ + static class Snapshot { + + final Instant timestamp; + final List<MetricsPacket.Builder> metrics; + + Snapshot(Instant timestamp, List<MetricsPacket.Builder> metrics) { + this.timestamp = timestamp; + this.metrics = metrics; + } + + boolean isStale(Clock clock) { + return Instant.now(clock).isAfter(timestamp.plus(METRICS_TTL)); + } + } + } diff --git a/metrics-proxy/src/test/java/ai/vespa/metricsproxy/http/application/NodeMetricsClientTest.java b/metrics-proxy/src/test/java/ai/vespa/metricsproxy/http/application/NodeMetricsClientTest.java index d028db93d43..6c319a4d74c 100644 --- a/metrics-proxy/src/test/java/ai/vespa/metricsproxy/http/application/NodeMetricsClientTest.java +++ b/metrics-proxy/src/test/java/ai/vespa/metricsproxy/http/application/NodeMetricsClientTest.java @@ -20,11 +20,15 @@ import java.util.List; import static ai.vespa.metricsproxy.TestUtil.getFileContents; import static ai.vespa.metricsproxy.http.ValuesFetcher.DEFAULT_PUBLIC_CONSUMER_ID; +import static ai.vespa.metricsproxy.metric.model.ConsumerId.toConsumerId; +import static ai.vespa.metricsproxy.metric.model.MetricId.toMetricId; import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; +import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; import static com.github.tomakehurst.wiremock.client.WireMock.get; import static com.github.tomakehurst.wiremock.client.WireMock.urlPathEqualTo; import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.options; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * Two optimizations worth noting: @@ -40,6 +44,10 @@ public class NodeMetricsClientTest { private static final String RESPONSE = getFileContents(TEST_FILE); private static final CloseableHttpClient httpClient = HttpClients.createDefault(); + private static final String CPU_METRIC = "cpu.util"; + private static final String REPLACED_CPU_METRIC = "replaced_cpu_util"; + private static final String CUSTOM_CONSUMER = "custom-consumer"; + private static Node node; private ManualClock clock; @@ -54,6 +62,17 @@ public class NodeMetricsClientTest { URI metricsUri = node.metricsUri(DEFAULT_PUBLIC_CONSUMER_ID); wireMockRule.stubFor(get(urlPathEqualTo(metricsUri.getPath())) .willReturn(aResponse().withBody(RESPONSE))); + + wireMockRule.stubFor(get(urlPathEqualTo(metricsUri.getPath())) + .withQueryParam("consumer", equalTo(DEFAULT_PUBLIC_CONSUMER_ID.id)) + .willReturn(aResponse().withBody(RESPONSE))); + + // Add a slightly different response for a custom consumer. + String myConsumerResponse = RESPONSE.replaceAll(CPU_METRIC, REPLACED_CPU_METRIC); + wireMockRule.stubFor(get(urlPathEqualTo(metricsUri.getPath())) + .withQueryParam("consumer", equalTo(CUSTOM_CONSUMER)) + .willReturn(aResponse().withBody(myConsumerResponse))); + } @Before @@ -94,4 +113,17 @@ public class NodeMetricsClientTest { assertEquals(2, nodeMetricsClient.snapshotsRetrieved()); } + @Test + public void metrics_for_different_consumers_are_cached_separately() { + List<MetricsPacket.Builder> defaultMetrics = nodeMetricsClient.getMetrics(DEFAULT_PUBLIC_CONSUMER_ID); + assertEquals(1, nodeMetricsClient.snapshotsRetrieved()); + assertEquals(4, defaultMetrics.size()); + + List<MetricsPacket.Builder> customMetrics = nodeMetricsClient.getMetrics(toConsumerId(CUSTOM_CONSUMER)); + assertEquals(2, nodeMetricsClient.snapshotsRetrieved()); + assertEquals(4, customMetrics.size()); + + MetricsPacket replacedCpuMetric = customMetrics.get(0).build(); + assertTrue(replacedCpuMetric.metrics().containsKey(toMetricId(REPLACED_CPU_METRIC))); + } } |