diff options
author | Ola Aunrønning <olaa@verizonmedia.com> | 2019-09-04 12:09:56 +0200 |
---|---|---|
committer | Ola Aunrønning <olaa@verizonmedia.com> | 2019-09-04 12:10:06 +0200 |
commit | d0af058cec48504bc95aa98e970fe92f9e4cb746 (patch) | |
tree | ffc92295fd5582104a2b966540828413b9265932 /configserver | |
parent | 1b7d245acd4862b31930e419b5e0ad61589d6b3f (diff) |
Find clusters using metric dimensions instead of service info.
Diffstat (limited to 'configserver')
10 files changed, 250 insertions, 134 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index c165705dc8c..db2c737a8f3 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -644,7 +644,8 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye public MetricsResponse getMetrics(ApplicationId applicationId) { Application application = getApplication(applicationId); - return ClusterMetricsRetriever.getMetrics(application); + ClusterMetricsRetriever clusterMetricsRetriever = new ClusterMetricsRetriever(); + return clusterMetricsRetriever.getMetrics(application); } // ---------------- Misc operations ---------------------------------------------------------------- diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/MetricsResponse.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/MetricsResponse.java index 0abc59f54b2..7cf9357217e 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/MetricsResponse.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/MetricsResponse.java @@ -32,7 +32,7 @@ public class MetricsResponse extends HttpResponse { for (var entry : aggregatedMetrics.entrySet()) { Cursor cluster = clusters.addObject(); cluster.setString("clusterId", entry.getKey().getClusterId()); - cluster.setString("clusterType", entry.getKey().getClusterType().name()); + cluster.setString("clusterType", entry.getKey().getClusterType()); MetricsAggregator aggregator = entry.getValue(); Cursor metrics = cluster.setObject("metrics"); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterInfo.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterInfo.java index bd453dddd74..86144c1ddc3 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterInfo.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterInfo.java @@ -1,64 +1,40 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.config.server.metrics; -import com.yahoo.config.model.api.ServiceInfo; -import java.net.URI; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.Set; +import java.util.Objects; /** * @author olaa */ public class ClusterInfo { - private static final Set<String> CONTENT_SERVICES = Set.of("storagenode", "searchnode", "distributor", "container-clustercontroller"); - private static final Set<String> CONTAINER_SERVICES = Set.of("qrserver", "container"); - private final String clusterId; - private final ClusterType clusterType; - private final List<URI> hostnames; - - public ClusterInfo(String clusterId, ClusterType clusterType) { - this(clusterId, clusterType, new ArrayList<>()); - } + private final String clusterType; - public ClusterInfo(String clusterId, ClusterType clusterType, List<URI> hostnames) { + public ClusterInfo(String clusterId, String clusterType) { this.clusterId = clusterId; this.clusterType = clusterType; - this.hostnames = hostnames; } public String getClusterId() { return clusterId; } - public ClusterType getClusterType() { + public String getClusterType() { return clusterType; } - public List<URI> getHostnames() { - return hostnames; + @Override + public int hashCode() { + return Objects.hash(clusterId, clusterType); } - public void addHost(URI host) { - hostnames.add(host); + @Override + public boolean equals(Object o) { + if (!(o instanceof ClusterInfo)) return false; + ClusterInfo other = (ClusterInfo) o; + return clusterId.equals(other.clusterId) && clusterType.equals(other.clusterType); } - // Try to determine whether host is content or container based on service - public static Optional<ClusterInfo> fromServiceInfo(ServiceInfo serviceInfo) { - String serviceType = serviceInfo.getServiceType(); - ClusterType clusterType; - if (CONTENT_SERVICES.contains(serviceType)) clusterType = ClusterType.content; - else if (CONTAINER_SERVICES.contains(serviceType)) clusterType = ClusterType.container; - else return Optional.empty(); - return Optional.of(new ClusterInfo(serviceInfo.getServiceName(), clusterType)); - } - - public enum ClusterType { - content, - container - } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java index 50d5dd41d9c..1fb45c65df9 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java @@ -9,57 +9,39 @@ import com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyContainer; import java.net.URI; import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ForkJoinPool; -import java.util.concurrent.TimeUnit; import java.util.function.Predicate; +import java.util.stream.Collectors; /** - * @author olaa + * Finds all hosts we want to fetch metrics for, generates the appropriate URIs + * and returns the metrics grouped by cluster. * - * Retrieves metrics for given application, grouped by cluster + * @author olaa */ public class ClusterMetricsRetriever { - public static MetricsResponse getMetrics(Application application) { - var clusters = getClustersOfApplication(application); - var clusterMetrics = new ConcurrentHashMap<ClusterInfo, MetricsAggregator>(); + private final MetricsRetriever metricsRetriever; - Runnable retrieveMetricsJob = () -> - clusters.parallelStream().forEach(cluster -> { - MetricsAggregator metrics = MetricsRetriever.requestMetricsForCluster(cluster); - clusterMetrics.put(cluster, metrics); - }); - - ForkJoinPool threadPool = new ForkJoinPool(5); - threadPool.submit(retrieveMetricsJob); - threadPool.shutdown(); + public ClusterMetricsRetriever() { + this(new MetricsRetriever()); + } - try { - threadPool.awaitTermination(1, TimeUnit.MINUTES); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } + public ClusterMetricsRetriever(MetricsRetriever metricsRetriever) { + this.metricsRetriever = metricsRetriever; + } + public MetricsResponse getMetrics(Application application) { + var hosts = getHostsOfApplication(application); + var clusterMetrics = metricsRetriever.requestMetricsGroupedByCluster(hosts); return new MetricsResponse(200, application.getId(), clusterMetrics); } - /** Finds the hosts of an application, grouped by cluster name */ - private static Collection<ClusterInfo> getClustersOfApplication(Application application) { - Map<String, ClusterInfo> clusters = new HashMap<>(); - - application.getModel().getHosts().stream() + private static Collection<URI> getHostsOfApplication(Application application) { + return application.getModel().getHosts().stream() .filter(host -> host.getServices().stream().noneMatch(isLogserver())) - .forEach(hostInfo -> { - ClusterInfo clusterInfo = createClusterInfo(hostInfo); - URI metricsProxyURI = createMetricsProxyURI(hostInfo.getHostname()); - clusters.computeIfAbsent(clusterInfo.getClusterId(), c -> clusterInfo).addHost(metricsProxyURI); - } - ); - return clusters.values(); + .map(HostInfo::getHostname) + .map(ClusterMetricsRetriever::createMetricsProxyURI) + .collect(Collectors.toList()); } @@ -71,10 +53,4 @@ public class ClusterMetricsRetriever { return URI.create("http://" + hostname + ":" + MetricsProxyContainer.BASEPORT + "/metrics/v1/values?consumer=Vespa"); } - private static ClusterInfo createClusterInfo(HostInfo hostInfo) { - return hostInfo.getServices().stream() - .map(ClusterInfo::fromServiceInfo) - .filter(Optional::isPresent) - .findFirst().get().orElseThrow(); - } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsAggregator.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsAggregator.java index 8fa08275ad5..9934c074b15 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsAggregator.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsAggregator.java @@ -14,7 +14,6 @@ public class MetricsAggregator { private LatencyMetrics qr; private LatencyMetrics container; private Double documentCount; - private Instant timestamp; public MetricsAggregator addFeedLatency(double sum, double count) { this.feed = combineLatency(this.feed, sum, count); @@ -36,11 +35,6 @@ public class MetricsAggregator { return this; } - public MetricsAggregator setTimestamp(Instant timestamp) { - this.timestamp = timestamp; - return this; - } - public Optional<Double> aggregateFeedLatency() { return Optional.ofNullable(feed).map(m -> m.latencySum / m.latencyCount).filter(num -> !num.isNaN()); @@ -68,10 +62,6 @@ public class MetricsAggregator { return Optional.ofNullable(documentCount); } - public Instant getTimestamp() { - return timestamp; - } - private LatencyMetrics combineLatency(LatencyMetrics metricsOrNull, double sum, double count) { var metrics = Optional.ofNullable(metricsOrNull).orElseGet(LatencyMetrics::new); metrics.latencyCount += count; diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsRetriever.java index 8a18843db5f..b58afc727d6 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsRetriever.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsRetriever.java @@ -15,6 +15,15 @@ import java.io.InputStream; import java.io.UncheckedIOException; import java.net.URI; import java.time.Instant; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.TimeUnit; import java.util.logging.Logger; @@ -28,17 +37,36 @@ import java.util.logging.Logger; public class MetricsRetriever { private static final Logger log = Logger.getLogger(MetricsRetriever.class.getName()); + private static final String VESPA_CONTAINER = "vespa.container"; + private static final String VESPA_QRSERVER = "vespa.qrserver"; + private static final String VESPA_DISTRIBUTOR = "vespa.distributor"; + private static final List<String> WANTED_METRIC_SERVICES = List.of(VESPA_CONTAINER, VESPA_QRSERVER, VESPA_DISTRIBUTOR); + /** - * Call the metrics API on each host in the cluster and aggregate the metrics - * into a single value. + * Call the metrics API on each host and aggregate the metrics + * into a single value, grouped by cluster. */ - public static MetricsAggregator requestMetricsForCluster(ClusterInfo clusterInfo) { - var aggregator = new MetricsAggregator(); - clusterInfo.getHostnames().forEach(host -> getHostMetrics(host, aggregator)); - return aggregator; + public Map<ClusterInfo, MetricsAggregator> requestMetricsGroupedByCluster(Collection<URI> hosts) { + Map<ClusterInfo, MetricsAggregator> clusterMetricsMap = new ConcurrentHashMap<>(); + + Runnable retrieveMetricsJob = () -> + hosts.parallelStream().forEach(host -> + getHostMetrics(host, clusterMetricsMap) + ); + + ForkJoinPool threadPool = new ForkJoinPool(5); + threadPool.submit(retrieveMetricsJob); + threadPool.shutdown(); + + try { + threadPool.awaitTermination(1, TimeUnit.MINUTES); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + return clusterMetricsMap; } - private static void getHostMetrics(URI hostURI, MetricsAggregator metrics) { + private static void getHostMetrics(URI hostURI, Map<ClusterInfo, MetricsAggregator> clusterMetricsMap) { Slime responseBody = doMetricsRequest(hostURI); var parseError = responseBody.get().field("error_message"); @@ -47,9 +75,9 @@ public class MetricsRetriever { } Inspector services = responseBody.get().field("services"); - services.traverse((ArrayTraverser) (i, servicesInspector) -> { - parseService(servicesInspector, metrics); - }); + services.traverse((ArrayTraverser) (i, servicesInspector) -> + parseService(servicesInspector, clusterMetricsMap) + ); } private static Slime doMetricsRequest(URI hostURI) { @@ -66,32 +94,40 @@ public class MetricsRetriever { } } - private static void parseService(Inspector service, MetricsAggregator metrics) { + private static void parseService(Inspector service, Map<ClusterInfo, MetricsAggregator> clusterMetricsMap) { String serviceName = service.field("name").asString(); - Instant timestamp = Instant.ofEpochSecond(service.field("timestamp").asLong()); - metrics.setTimestamp(timestamp); - service.field("metrics").traverse((ArrayTraverser) (i, m) -> { - Inspector values = m.field("values"); - switch (serviceName) { - case "vespa.container": - metrics.addContainerLatency( - values.field("query_latency.sum").asDouble(), - values.field("query_latency.count").asDouble()); - metrics.addFeedLatency( - values.field("feed_latency.sum").asDouble(), - values.field("feed_latency.count").asDouble()); - break; - case "vespa.qrserver": - metrics.addQrLatency( - values.field("query_latency.sum").asDouble(), - values.field("query_latency.count").asDouble()); - break; - case "vespa.distributor": - metrics.addDocumentCount(values.field("vds.distributor.docsstored.average").asDouble()); - break; - } - }); + service.field("metrics").traverse((ArrayTraverser) (i, metric) -> + addMetricsToAggeregator(serviceName, metric, clusterMetricsMap) + ); + } + + private static void addMetricsToAggeregator(String serviceName, Inspector metric, Map<ClusterInfo, MetricsAggregator> clusterMetricsMap) { + if (!WANTED_METRIC_SERVICES.contains(serviceName)) return; + Inspector values = metric.field("values"); + ClusterInfo clusterInfo = getClusterInfoFromDimensions(metric.field("dimensions")); + MetricsAggregator metricsAggregator = clusterMetricsMap.computeIfAbsent(clusterInfo, c -> new MetricsAggregator()); + switch (serviceName) { + case "vespa.container": + metricsAggregator.addContainerLatency( + values.field("query_latency.sum").asDouble(), + values.field("query_latency.count").asDouble()); + metricsAggregator.addFeedLatency( + values.field("feed_latency.sum").asDouble(), + values.field("feed_latency.count").asDouble()); + break; + case "vespa.qrserver": + metricsAggregator.addQrLatency( + values.field("query_latency.sum").asDouble(), + values.field("query_latency.count").asDouble()); + break; + case "vespa.distributor": + metricsAggregator.addDocumentCount(values.field("vds.distributor.docsstored.average").asDouble()); + break; + } } + private static ClusterInfo getClusterInfoFromDimensions(Inspector dimensions) { + return new ClusterInfo(dimensions.field("clusterid").asString(), dimensions.field("clustertype").asString()); + } } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetrieverTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetrieverTest.java new file mode 100644 index 00000000000..4d9a20ef6d8 --- /dev/null +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetrieverTest.java @@ -0,0 +1,117 @@ +package com.yahoo.vespa.config.server.metrics; + +import com.yahoo.config.FileReference; +import com.yahoo.config.model.api.FileDistribution; +import com.yahoo.config.model.api.HostInfo; +import com.yahoo.config.model.api.Model; +import com.yahoo.config.model.api.ServiceInfo; +import com.yahoo.config.provision.AllocatedHosts; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.config.ConfigKey; +import com.yahoo.vespa.config.ConfigPayload; +import com.yahoo.vespa.config.buildergen.ConfigDefinition; +import com.yahoo.vespa.config.server.application.Application; +import org.junit.Test; + +import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.junit.Assert.*; + +/** + * @author olaa + */ +public class ClusterMetricsRetrieverTest { + + @Test + public void getMetrics() { + MockModel mockModel = new MockModel(mockHosts()); + MockMetricsRetriever mockMetricsRetriever = new MockMetricsRetriever(); + Application application = new Application(mockModel, null, 0, false, + null, null, ApplicationId.fromSerializedForm("tenant:app:instance")); + + ClusterMetricsRetriever clusterMetricsRetriever = new ClusterMetricsRetriever(mockMetricsRetriever); + clusterMetricsRetriever.getMetrics(application); + + assertEquals(2, mockMetricsRetriever.hosts.size()); // Verify that logserver was ignored + } + + private Collection<HostInfo> mockHosts() { + + HostInfo hostInfo1 = new HostInfo("host1", + List.of(new ServiceInfo("content", "searchnode", null, null, "", "host1")) + ); + HostInfo hostInfo2 = new HostInfo("host2", + List.of(new ServiceInfo("default", "container", null, null, "", "host2")) + ); + HostInfo hostInfo3 = new HostInfo("host3", + List.of(new ServiceInfo("default", "logserver", null, null, "", "host3")) + ); + + return List.of(hostInfo1, hostInfo2, hostInfo3); + } + + class MockMetricsRetriever extends MetricsRetriever { + + Collection<URI> hosts = new ArrayList<>(); + + @Override + public Map<ClusterInfo, MetricsAggregator> requestMetricsGroupedByCluster(Collection<URI> hosts) { + this.hosts = hosts; + + return Map.of( + new ClusterInfo("content_cluster_id", "content"), + new MetricsAggregator().addDocumentCount(1000), + new ClusterInfo("container_cluster_id", "container"), + new MetricsAggregator().addContainerLatency(123, 5) + ); + } + } + + class MockModel implements Model { + + Collection<HostInfo> hosts; + + MockModel(Collection<HostInfo> hosts) { + this.hosts = hosts; + } + + @Override + public ConfigPayload getConfig(ConfigKey<?> configKey, ConfigDefinition targetDef) { + throw new UnsupportedOperationException(); + } + + @Override + public Set<ConfigKey<?>> allConfigsProduced() { + throw new UnsupportedOperationException(); + } + + @Override + public Collection<HostInfo> getHosts() { + return hosts; + } + + @Override + public Set<String> allConfigIds() { + throw new UnsupportedOperationException(); + } + + @Override + public void distributeFiles(FileDistribution fileDistribution) { + throw new UnsupportedOperationException(); + } + + @Override + public Set<FileReference> fileReferences() { return new HashSet<>(); } + + @Override + public AllocatedHosts allocatedHosts() { + throw new UnsupportedOperationException(); + } + } +}
\ No newline at end of file diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/MetricsRetrieverTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/MetricsRetrieverTest.java index 67e91b90803..9187b90e894 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/MetricsRetrieverTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/MetricsRetrieverTest.java @@ -9,7 +9,9 @@ import java.io.IOException; import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; +import java.time.Instant; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.function.BiConsumer; @@ -31,12 +33,8 @@ public class MetricsRetrieverTest { @Test public void testMetricAggregation() throws IOException { - var metricsRetriever = new MetricsRetriever(); - var clusters = List.of( - new ClusterInfo("cluster1", ClusterInfo.ClusterType.content, List.of(URI.create("http://localhost:8080/1"), URI.create("http://localhost:8080/2"))), - new ClusterInfo("cluster2", ClusterInfo.ClusterType.container, List.of(URI.create("http://localhost:8080/3"))) - ); + List<URI> hosts = List.of(URI.create("http://localhost:8080/1"), URI.create("http://localhost:8080/2"), URI.create("http://localhost:8080/3")); stubFor(get(urlEqualTo("/1")) .willReturn(aResponse() @@ -53,9 +51,14 @@ public class MetricsRetrieverTest { .withStatus(200) .withBody(containerMetrics()))); + ClusterInfo expectedContentCluster = new ClusterInfo("content_cluster_id", "content"); + ClusterInfo expectedContainerCluster = new ClusterInfo("container_cluster_id", "container"); + + Map<ClusterInfo, MetricsAggregator> aggregatorMap = new MetricsRetriever().requestMetricsGroupedByCluster(hosts); + compareAggregators( new MetricsAggregator().addDocumentCount(6000.0), - MetricsRetriever.requestMetricsForCluster(clusters.get(0)) + aggregatorMap.get(expectedContentCluster) ); compareAggregators( @@ -64,7 +67,8 @@ public class MetricsRetrieverTest { .addContainerLatency(2000, 0) .addQrLatency(3000, 43) .addFeedLatency(3000, 43), - MetricsRetriever.requestMetricsForCluster(clusters.get(1)) + aggregatorMap.get(expectedContainerCluster) + ); wireMock.stop(); diff --git a/configserver/src/test/resources/metrics/container_metrics b/configserver/src/test/resources/metrics/container_metrics index a6a5828934c..c2118fa39a5 100644 --- a/configserver/src/test/resources/metrics/container_metrics +++ b/configserver/src/test/resources/metrics/container_metrics @@ -11,6 +11,10 @@ "document.count": 300000, "feed.rate": 23.0, "write_latency.sum": 2000 + }, + "dimensions": { + "clustertype": "container", + "clusterid": "container_cluster_id" } }, { @@ -20,7 +24,11 @@ "feed_latency.count": 43.0, "feed_latency.sum": 3000 - } + }, + "dimensions": { + "clustertype": "container", + "clusterid": "container_cluster_id" + } } ] }, @@ -33,6 +41,10 @@ "values": { "query_latency.count": 43.0, "query_latency.sum": 3000 + }, + "dimensions": { + "clustertype": "container", + "clusterid": "container_cluster_id" } } ] diff --git a/configserver/src/test/resources/metrics/content_metrics b/configserver/src/test/resources/metrics/content_metrics index c881e574f8b..4a1deced181 100644 --- a/configserver/src/test/resources/metrics/content_metrics +++ b/configserver/src/test/resources/metrics/content_metrics @@ -8,7 +8,11 @@ "values": { "vds.distributor.docsstored.average": 3000 - } + }, + "dimensions": { + "clustertype": "content", + "clusterid": "content_cluster_id" + } } ] } |