summaryrefslogtreecommitdiffstats
path: root/configserver
diff options
context:
space:
mode:
authorValerij Fredriksen <valerij92@gmail.com>2021-04-27 17:25:13 +0200
committerValerij Fredriksen <valerij92@gmail.com>2021-04-29 21:07:37 +0200
commitf4b248296b9322bbd59f8802ffe9f5d5b56c5ef0 (patch)
treec5ae2eaf270e818848fe2fef6eb88f29df45bbc5 /configserver
parentc878b148c9bf13cd7e6475217a68d8f47df5df88 (diff)
Expose content resource usage metrics from cluster-controller
Diffstat (limited to 'configserver')
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/DeploymentMetricsResponse.java9
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java12
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsAggregator.java70
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetrieverTest.java21
-rw-r--r--configserver/src/test/resources/metrics/clustercontroller_metrics.json24
5 files changed, 112 insertions, 24 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/DeploymentMetricsResponse.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/DeploymentMetricsResponse.java
index 8244a486f1c..062a21b1f80 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/DeploymentMetricsResponse.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/DeploymentMetricsResponse.java
@@ -32,7 +32,14 @@ public class DeploymentMetricsResponse extends SlimeJsonResponse {
aggregator.aggregateDocumentCount().ifPresent(documentCount -> metrics.setDouble("documentCount", documentCount));
aggregator.aggregateQueryLatency().ifPresent(queryLatency -> metrics.setDouble("queryLatency",queryLatency));
aggregator.aggregateFeedLatency().ifPresent(feedLatency -> metrics.setDouble("feedLatency", feedLatency));
- aggregator.feedingBlocked().ifPresent(feedingBlocked -> metrics.setDouble("feedingBlocked", feedingBlocked));
+ aggregator.memoryUsage().ifPresent(memory -> {
+ metrics.setDouble("memoryUtil", memory.util());
+ metrics.setDouble("memoryFeedBlockLimit", memory.feedBlockLimit());
+ });
+ aggregator.diskUsage().ifPresent(disk -> {
+ metrics.setDouble("diskUtil", disk.util());
+ metrics.setDouble("diskFeedBlockLimit", disk.feedBlockLimit());
+ });
}
}
}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java
index 2903f0fadcc..0c8b33b9002 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java
@@ -46,8 +46,8 @@ public class ClusterDeploymentMetricsRetriever {
private static final String VESPA_CONTAINER = "vespa.container";
private static final String VESPA_QRSERVER = "vespa.qrserver";
private static final String VESPA_DISTRIBUTOR = "vespa.distributor";
- private static final String VESPA_SEARCHNODE = "vespa.searchnode";
- private static final List<String> WANTED_METRIC_SERVICES = List.of(VESPA_CONTAINER, VESPA_QRSERVER, VESPA_DISTRIBUTOR, VESPA_SEARCHNODE);
+ private static final String VESPA_CONTAINER_CLUSTERCONTROLLER = "vespa.container-clustercontroller";
+ private static final List<String> WANTED_METRIC_SERVICES = List.of(VESPA_CONTAINER, VESPA_QRSERVER, VESPA_DISTRIBUTOR, VESPA_CONTAINER_CLUSTERCONTROLLER);
private static final ExecutorService executor = Executors.newFixedThreadPool(10, new DaemonThreadFactory("cluster-deployment-metrics-retriever-"));
@@ -138,8 +138,12 @@ public class ClusterDeploymentMetricsRetriever {
case VESPA_DISTRIBUTOR:
deploymentMetricsAggregator.addDocumentCount(values.field("vds.distributor.docsstored.average").asDouble());
break;
- case VESPA_SEARCHNODE:
- deploymentMetricsAggregator.addFeedingBlocked((int) values.field("content.proton.resource_usage.feeding_blocked.last").asLong());
+ case VESPA_CONTAINER_CLUSTERCONTROLLER:
+ deploymentMetricsAggregator
+ .addMemoryUsage(values.field("cluster-controller.resource_usage.max_memory_utilization.last").asDouble(),
+ values.field("cluster-controller.resource_usage.memory_limit.last").asDouble())
+ .addDiskUsage(values.field("cluster-controller.resource_usage.max_disk_utilization.last").asDouble(),
+ values.field("cluster-controller.resource_usage.disk_limit.last").asDouble());
break;
}
}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsAggregator.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsAggregator.java
index 916f5ff5613..f27cf942dd8 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsAggregator.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsAggregator.java
@@ -13,7 +13,8 @@ public class DeploymentMetricsAggregator {
private LatencyMetrics qr;
private LatencyMetrics container;
private Double documentCount;
- private Integer feedingBlocked;
+ private ResourceUsage memoryUsage;
+ private ResourceUsage diskUsage;
public synchronized DeploymentMetricsAggregator addFeedLatency(double sum, double count) {
this.feed = combineLatency(this.feed, sum, count);
@@ -35,50 +36,87 @@ public class DeploymentMetricsAggregator {
return this;
}
- public synchronized DeploymentMetricsAggregator addFeedingBlocked(int feedingBlocked) {
- this.feedingBlocked = Math.max(Optional.ofNullable(this.feedingBlocked).orElse(0), feedingBlocked);
+ public synchronized DeploymentMetricsAggregator addDiskUsage(double feedBlockUtil, double feedBlockLimit) {
+ this.diskUsage = combineResourceUtil(this.diskUsage, feedBlockUtil, feedBlockLimit);
+ return this;
+ }
+
+ public synchronized DeploymentMetricsAggregator addMemoryUsage(double feedBlockUtil, double feedBlockLimit) {
+ this.memoryUsage = combineResourceUtil(this.memoryUsage, feedBlockUtil, feedBlockLimit);
return this;
}
public Optional<Double> aggregateFeedLatency() {
- return Optional.ofNullable(feed).map(m -> m.latencySum / m.latencyCount).filter(num -> !num.isNaN());
+ return Optional.ofNullable(feed).map(m -> m.sum / m.count).filter(num -> !num.isNaN());
}
public Optional<Double> aggregateFeedRate() {
- return Optional.ofNullable(feed).map(m -> m.latencyCount / 60);
+ return Optional.ofNullable(feed).map(m -> m.count / 60);
}
public Optional<Double> aggregateQueryLatency() {
if (container == null && qr == null) return Optional.empty();
var c = Optional.ofNullable(container).orElseGet(LatencyMetrics::new);
var q = Optional.ofNullable(qr).orElseGet(LatencyMetrics::new);
- return Optional.of((c.latencySum + q.latencySum) / (c.latencyCount + q.latencyCount)).filter(num -> !num.isNaN());
+ return Optional.of((c.sum + q.sum) / (c.count + q.count)).filter(num -> !num.isNaN());
}
public Optional<Double> aggregateQueryRate() {
if (container == null && qr == null) return Optional.empty();
var c = Optional.ofNullable(container).orElseGet(LatencyMetrics::new);
var q = Optional.ofNullable(qr).orElseGet(LatencyMetrics::new);
- return Optional.of((c.latencyCount + q.latencyCount) / 60);
+ return Optional.of((c.count + q.count) / 60);
}
public Optional<Double> aggregateDocumentCount() {
return Optional.ofNullable(documentCount);
}
- public Optional<Integer> feedingBlocked() {
- return Optional.ofNullable(feedingBlocked);
+ public Optional<ResourceUsage> memoryUsage() {
+ return Optional.ofNullable(memoryUsage);
}
- private LatencyMetrics combineLatency(LatencyMetrics metricsOrNull, double sum, double count) {
- var metrics = Optional.ofNullable(metricsOrNull).orElseGet(LatencyMetrics::new);
- metrics.latencyCount += count;
- metrics.latencySum += sum;
- return metrics;
+ public Optional<ResourceUsage> diskUsage() {
+ return Optional.ofNullable(diskUsage);
+ }
+
+
+ private static LatencyMetrics combineLatency(LatencyMetrics metricsOrNull, double sum, double count) {
+ return Optional.ofNullable(metricsOrNull).orElseGet(LatencyMetrics::new).combine(sum, count);
+ }
+
+ private static ResourceUsage combineResourceUtil(ResourceUsage resourceUsageOrNull, double util, double limit) {
+ return Optional.ofNullable(resourceUsageOrNull).orElseGet(ResourceUsage::new).combine(util, limit);
}
private static class LatencyMetrics {
- double latencySum;
- double latencyCount;
+ private double sum;
+ private double count;
+
+ private LatencyMetrics combine(double sum, double count) {
+ this.sum += sum;
+ this.count += count;
+ return this;
+ }
+ }
+
+ public static class ResourceUsage {
+ /**
+ * Current resource utilization relative to feed block limit, i.e. value of >= 1 means utilization at or above
+ * feed block limit.
+ */
+ private double feedBlockUtil;
+
+ /** Resource utilization limit at which further external feed is blocked */
+ private double feedBlockLimit;
+
+ private ResourceUsage combine(double feedBlockUtil, double feedBlockLimit) {
+ if (feedBlockUtil > this.feedBlockUtil) this.feedBlockUtil = feedBlockUtil;
+ if (feedBlockLimit > this.feedBlockLimit) this.feedBlockLimit = feedBlockLimit;
+ return this;
+ }
+
+ public double util() { return feedBlockUtil * feedBlockLimit; }
+ public double feedBlockLimit() { return feedBlockLimit; }
}
}
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetrieverTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetrieverTest.java
index 5aa3e196222..a5a46b67d13 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetrieverTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetrieverTest.java
@@ -35,7 +35,7 @@ public class ClusterDeploymentMetricsRetrieverTest {
@Test
public void testMetricAggregation() throws IOException {
- List<URI> hosts = Stream.of(1, 2, 3)
+ List<URI> hosts = Stream.of(1, 2, 3, 4)
.map(item -> URI.create("http://localhost:" + wireMock.port() + "/" + item))
.collect(Collectors.toList());
@@ -54,13 +54,21 @@ public class ClusterDeploymentMetricsRetrieverTest {
.withStatus(200)
.withBody(containerMetrics())));
+ stubFor(get(urlEqualTo("/4"))
+ .willReturn(aResponse()
+ .withStatus(200)
+ .withBody(clustercontrollerMetrics())));
+
ClusterInfo expectedContentCluster = new ClusterInfo("content_cluster_id", "content");
ClusterInfo expectedContainerCluster = new ClusterInfo("container_cluster_id", "container");
Map<ClusterInfo, DeploymentMetricsAggregator> aggregatorMap = new ClusterDeploymentMetricsRetriever().requestMetricsGroupedByCluster(hosts);
compareAggregators(
- new DeploymentMetricsAggregator().addDocumentCount(6000.0).addFeedingBlocked(0),
+ new DeploymentMetricsAggregator()
+ .addDocumentCount(6000.0)
+ .addMemoryUsage(0.89074, 0.8)
+ .addDiskUsage(0.83517, 0.75),
aggregatorMap.get(expectedContentCluster)
);
@@ -84,6 +92,10 @@ public class ClusterDeploymentMetricsRetrieverTest {
return Files.readString(Path.of("src/test/resources/metrics/content_metrics.json"));
}
+ private String clustercontrollerMetrics() throws IOException {
+ return Files.readString(Path.of("src/test/resources/metrics/clustercontroller_metrics.json"));
+ }
+
// Same tolerance value as used internally in MetricsAggregator.isZero
private static final double metricsTolerance = 0.001;
@@ -95,7 +107,10 @@ public class ClusterDeploymentMetricsRetrieverTest {
compareOptionals(expected.aggregateFeedRate(), actual.aggregateFeedRate(), assertDoubles);
compareOptionals(expected.aggregateQueryLatency(), actual.aggregateQueryLatency(), assertDoubles);
compareOptionals(expected.aggregateFeedLatency(), actual.aggregateFeedLatency(), assertDoubles);
- assertEquals(expected.feedingBlocked(), actual.feedingBlocked());
+ compareOptionals(expected.diskUsage(), actual.diskUsage(), (a, b) -> assertDoubles.accept(a.util(), b.util()));
+ compareOptionals(expected.diskUsage(), actual.diskUsage(), (a, b) -> assertDoubles.accept(a.feedBlockLimit(), b.feedBlockLimit()));
+ compareOptionals(expected.memoryUsage(), actual.memoryUsage(), (a, b) -> assertDoubles.accept(a.util(), b.util()));
+ compareOptionals(expected.memoryUsage(), actual.memoryUsage(), (a, b) -> assertDoubles.accept(a.feedBlockLimit(), b.feedBlockLimit()));
}
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
diff --git a/configserver/src/test/resources/metrics/clustercontroller_metrics.json b/configserver/src/test/resources/metrics/clustercontroller_metrics.json
new file mode 100644
index 00000000000..f487f95802c
--- /dev/null
+++ b/configserver/src/test/resources/metrics/clustercontroller_metrics.json
@@ -0,0 +1,24 @@
+{
+ "services": [
+ {
+ "name": "vespa.container-clustercontroller",
+ "timestamp": 1619529109,
+ "metrics": [
+ {
+ "values": {
+ "cluster-controller.resource_usage.disk_limit.last": 0.75,
+ "cluster-controller.resource_usage.nodes_above_limit.last": 0,
+ "cluster-controller.resource_usage.max_memory_utilization.last": 0.8907474348626,
+ "cluster-controller.resource_usage.max_disk_utilization.last": 0.8351705494609,
+ "cluster-controller.cluster-state-change.count": 2,
+ "cluster-controller.resource_usage.memory_limit.last": 0.8
+ },
+ "dimensions": {
+ "clustertype": "content",
+ "clusterid": "content_cluster_id"
+ }
+ }
+ ]
+ }
+ ]
+}