summaryrefslogtreecommitdiffstats
path: root/metrics-proxy
diff options
context:
space:
mode:
authorgjoranv <gv@verizonmedia.com>2020-01-03 00:13:21 +0100
committergjoranv <gv@verizonmedia.com>2020-01-03 11:25:32 +0100
commit0c301c98d930a5e0a48b62bf8a5eda6abfc58257 (patch)
tree4d6e6788104441cddd8fbc2373a7db24432e7b72 /metrics-proxy
parent869e9e83274e037cae548ac5eb3c72881e90859a (diff)
Properly support multiple consumers when caching metrics.
Diffstat (limited to 'metrics-proxy')
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java54
-rw-r--r--metrics-proxy/src/test/java/ai/vespa/metricsproxy/http/application/NodeMetricsClientTest.java32
2 files changed, 72 insertions, 14 deletions
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java
index 8987ca25be1..d1ff47f3c3f 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java
@@ -16,7 +16,9 @@ import java.io.IOException;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.logging.Logger;
import static ai.vespa.metricsproxy.http.ValuesFetcher.DEFAULT_PUBLIC_CONSUMER_ID;
@@ -24,9 +26,12 @@ import static com.yahoo.log.LogLevel.DEBUG;
import static java.util.Collections.emptyList;
/**
- * This class is used to retrieve metrics from a single Vespa node over http.
- * Keeps and serves a snapshot of the node's metrics, with a fixed TTL, to
- * avoid unnecessary load on metrics proxies.
+ * Retrieves metrics from a single Vespa node over http. To avoid unnecessary load on metrics
+ * proxies, a cached snapshot per consumer is retained and served for a fixed TTL period.
+ * Upon failure to retrieve metrics, an empty snapshot is cached.
+ *
+ * This class assumes that the consumer id is a valid and existing one, which is already
+ * ensured by the {@link ApplicationMetricsHandler}.
*
* @author gjoranv
*/
@@ -39,8 +44,7 @@ public class NodeMetricsClient {
private final HttpClient httpClient;
private final Clock clock;
- private List<MetricsPacket.Builder> metrics = emptyList();
- private Instant metricsTimestamp = Instant.EPOCH;
+ private final Map<ConsumerId, Snapshot> snapshots = new HashMap<>();
private long snapshotsRetrieved = 0;
public NodeMetricsClient(HttpClient httpClient, Node node, Clock clock) {
@@ -54,26 +58,29 @@ public class NodeMetricsClient {
}
public List<MetricsPacket.Builder> getMetrics(ConsumerId consumer) {
- if (Instant.now(clock).isAfter(metricsTimestamp.plus(METRICS_TTL))) {
- retrieveMetrics(consumer);
+ var currentSnapshot = snapshots.get(consumer);
+ if (currentSnapshot == null || currentSnapshot.isStale(clock) || currentSnapshot.metrics.isEmpty()) {
+ Snapshot snapshot = retrieveMetrics(consumer);
+ snapshots.put(consumer, snapshot);
+ return snapshot.metrics;
+ } else {
+ return snapshots.get(consumer).metrics;
}
- return metrics;
}
- private void retrieveMetrics(ConsumerId consumer) {
+ private Snapshot retrieveMetrics(ConsumerId consumer) {
String metricsUri = node.metricsUri(consumer).toString();
log.log(DEBUG, () -> "Retrieving metrics from host " + metricsUri);
try {
String metricsJson = httpClient.execute(new HttpGet(metricsUri), new BasicResponseHandler());
- metrics = GenericJsonUtil.toMetricsPackets(metricsJson);
- metricsTimestamp = Instant.now(clock);
+ var newMetrics = GenericJsonUtil.toMetricsPackets(metricsJson);
snapshotsRetrieved ++;
- log.log(DEBUG, () -> "Successfully retrieved " + metrics.size() + " metrics packets from " + metricsUri);
-
+ log.log(DEBUG, () -> "Successfully retrieved " + newMetrics.size() + " metrics packets from " + metricsUri);
+ return new Snapshot(Instant.now(clock), newMetrics);
} catch (IOException e) {
log.warning("Unable to retrieve metrics from " + metricsUri + ": " + Exceptions.toMessageString(e));
- metrics = emptyList();
+ return new Snapshot(Instant.now(clock), emptyList());
}
}
@@ -81,4 +88,23 @@ public class NodeMetricsClient {
return snapshotsRetrieved;
}
+
+ /**
+ * Convenience class for storing a metrics snapshot with its timestamp.
+ */
+ static class Snapshot {
+
+ final Instant timestamp;
+ final List<MetricsPacket.Builder> metrics;
+
+ Snapshot(Instant timestamp, List<MetricsPacket.Builder> metrics) {
+ this.timestamp = timestamp;
+ this.metrics = metrics;
+ }
+
+ boolean isStale(Clock clock) {
+ return Instant.now(clock).isAfter(timestamp.plus(METRICS_TTL));
+ }
+ }
+
}
diff --git a/metrics-proxy/src/test/java/ai/vespa/metricsproxy/http/application/NodeMetricsClientTest.java b/metrics-proxy/src/test/java/ai/vespa/metricsproxy/http/application/NodeMetricsClientTest.java
index d028db93d43..6c319a4d74c 100644
--- a/metrics-proxy/src/test/java/ai/vespa/metricsproxy/http/application/NodeMetricsClientTest.java
+++ b/metrics-proxy/src/test/java/ai/vespa/metricsproxy/http/application/NodeMetricsClientTest.java
@@ -20,11 +20,15 @@ import java.util.List;
import static ai.vespa.metricsproxy.TestUtil.getFileContents;
import static ai.vespa.metricsproxy.http.ValuesFetcher.DEFAULT_PUBLIC_CONSUMER_ID;
+import static ai.vespa.metricsproxy.metric.model.ConsumerId.toConsumerId;
+import static ai.vespa.metricsproxy.metric.model.MetricId.toMetricId;
import static com.github.tomakehurst.wiremock.client.WireMock.aResponse;
+import static com.github.tomakehurst.wiremock.client.WireMock.equalTo;
import static com.github.tomakehurst.wiremock.client.WireMock.get;
import static com.github.tomakehurst.wiremock.client.WireMock.urlPathEqualTo;
import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.options;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
/**
* Two optimizations worth noting:
@@ -40,6 +44,10 @@ public class NodeMetricsClientTest {
private static final String RESPONSE = getFileContents(TEST_FILE);
private static final CloseableHttpClient httpClient = HttpClients.createDefault();
+ private static final String CPU_METRIC = "cpu.util";
+ private static final String REPLACED_CPU_METRIC = "replaced_cpu_util";
+ private static final String CUSTOM_CONSUMER = "custom-consumer";
+
private static Node node;
private ManualClock clock;
@@ -54,6 +62,17 @@ public class NodeMetricsClientTest {
URI metricsUri = node.metricsUri(DEFAULT_PUBLIC_CONSUMER_ID);
wireMockRule.stubFor(get(urlPathEqualTo(metricsUri.getPath()))
.willReturn(aResponse().withBody(RESPONSE)));
+
+ wireMockRule.stubFor(get(urlPathEqualTo(metricsUri.getPath()))
+ .withQueryParam("consumer", equalTo(DEFAULT_PUBLIC_CONSUMER_ID.id))
+ .willReturn(aResponse().withBody(RESPONSE)));
+
+ // Add a slightly different response for a custom consumer.
+ String myConsumerResponse = RESPONSE.replaceAll(CPU_METRIC, REPLACED_CPU_METRIC);
+ wireMockRule.stubFor(get(urlPathEqualTo(metricsUri.getPath()))
+ .withQueryParam("consumer", equalTo(CUSTOM_CONSUMER))
+ .willReturn(aResponse().withBody(myConsumerResponse)));
+
}
@Before
@@ -94,4 +113,17 @@ public class NodeMetricsClientTest {
assertEquals(2, nodeMetricsClient.snapshotsRetrieved());
}
+ @Test
+ public void metrics_for_different_consumers_are_cached_separately() {
+ List<MetricsPacket.Builder> defaultMetrics = nodeMetricsClient.getMetrics(DEFAULT_PUBLIC_CONSUMER_ID);
+ assertEquals(1, nodeMetricsClient.snapshotsRetrieved());
+ assertEquals(4, defaultMetrics.size());
+
+ List<MetricsPacket.Builder> customMetrics = nodeMetricsClient.getMetrics(toConsumerId(CUSTOM_CONSUMER));
+ assertEquals(2, nodeMetricsClient.snapshotsRetrieved());
+ assertEquals(4, customMetrics.size());
+
+ MetricsPacket replacedCpuMetric = customMetrics.get(0).build();
+ assertTrue(replacedCpuMetric.metrics().containsKey(toMetricId(REPLACED_CPU_METRIC)));
+ }
}