diff options
27 files changed, 1715 insertions, 89 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 935b455c947..c11d7b12a66 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -19,6 +19,7 @@ import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.Zone; +import com.yahoo.container.handler.metrics.JsonResponse; import com.yahoo.container.jdisc.HttpResponse; import com.yahoo.docproc.jdisc.metric.NullMetric; import com.yahoo.io.IOUtils; @@ -44,9 +45,11 @@ import com.yahoo.vespa.config.server.http.InternalServerException; import com.yahoo.vespa.config.server.http.LogRetriever; import com.yahoo.vespa.config.server.http.SimpleHttpFetcher; import com.yahoo.vespa.config.server.http.TesterClient; -import com.yahoo.vespa.config.server.http.v2.MetricsResponse; +import com.yahoo.vespa.config.server.http.v2.DeploymentMetricsResponse; import com.yahoo.vespa.config.server.http.v2.PrepareResult; -import com.yahoo.vespa.config.server.metrics.ApplicationMetricsRetriever; +import com.yahoo.vespa.config.server.http.v2.ProtonMetricsResponse; +import com.yahoo.vespa.config.server.metrics.DeploymentMetricsRetriever; +import com.yahoo.vespa.config.server.metrics.ProtonMetricsRetriever; import com.yahoo.vespa.config.server.provision.HostProvisionerProvider; import com.yahoo.vespa.config.server.session.LocalSession; import com.yahoo.vespa.config.server.session.PrepareParams; @@ -745,13 +748,21 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye private List<ApplicationId> activeApplications(TenantName tenantName) { return tenantRepository.getTenant(tenantName).getApplicationRepo().activeApplications(); } + // ---------------- Proton Metrics V1 ------------------------------------------------------------------------ - // ---------------- Metrics ------------------------------------------------------------------------ + public ProtonMetricsResponse getProtonMetrics(ApplicationId applicationId) { + Application application = getApplication(applicationId); + ProtonMetricsRetriever protonMetricsRetriever = new ProtonMetricsRetriever(); + return protonMetricsRetriever.getMetrics(application); + } + + + // ---------------- Deployment Metrics V1 ------------------------------------------------------------------------ - public MetricsResponse getMetrics(ApplicationId applicationId) { + public DeploymentMetricsResponse getDeploymentMetrics(ApplicationId applicationId) { Application application = getApplication(applicationId); - ApplicationMetricsRetriever applicationMetricsRetriever = new ApplicationMetricsRetriever(); - return applicationMetricsRetriever.getMetrics(application); + DeploymentMetricsRetriever deploymentMetricsRetriever = new DeploymentMetricsRetriever(); + return deploymentMetricsRetriever.getMetrics(application); } // ---------------- Misc operations ---------------------------------------------------------------- diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java index 749f57b3104..8426d6b56cf 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java @@ -55,7 +55,8 @@ public class ApplicationHandler extends HttpHandler { "http://*/application/v2/tenant/*/application/*/environment/*/region/*/instance/*/serviceconverge", "http://*/application/v2/tenant/*/application/*/environment/*/region/*/instance/*/serviceconverge/*", "http://*/application/v2/tenant/*/application/*/environment/*/region/*/instance/*/clustercontroller/*/status/*", - "http://*/application/v2/tenant/*/application/*/environment/*/region/*/instance/*/metrics", + "http://*/application/v2/tenant/*/application/*/environment/*/region/*/instance/*/metrics/*", + "http://*/application/v2/tenant/*/application/*/environment/*/region/*/instance/*/metrics/*", "http://*/application/v2/tenant/*/application/*/environment/*/region/*/instance/*/logs", "http://*/application/v2/tenant/*/application/*/environment/*/region/*/instance/*/tester/*/*", "http://*/application/v2/tenant/*/application/*/environment/*/region/*/instance/*/tester/*", @@ -137,8 +138,12 @@ public class ApplicationHandler extends HttpHandler { return applicationRepository.getLogs(applicationId, hostname, apiParams); } - if (isMetricsRequest(request)) { - return applicationRepository.getMetrics(applicationId); + if (isProtonMetricsRequest(request)) { + return applicationRepository.getProtonMetrics(applicationId); + } + + if (isDeploymentMetricsRequest(request)) { + return applicationRepository.getDeploymentMetrics(applicationId); } if (isIsSuspendedRequest(request)) { @@ -231,9 +236,14 @@ public class ApplicationHandler extends HttpHandler { request.getUri().getPath().endsWith("/suspended"); } - private static boolean isMetricsRequest(HttpRequest request) { - return getBindingMatch(request).groupCount() == 7 && - request.getUri().getPath().endsWith("/metrics"); + private static boolean isProtonMetricsRequest(HttpRequest request) { + return getBindingMatch(request).groupCount() == 8 && + request.getUri().getPath().endsWith("/metrics/proton"); + } + + private static boolean isDeploymentMetricsRequest(HttpRequest request) { + return getBindingMatch(request).groupCount() == 8 && + request.getUri().getPath().endsWith("/metrics/deployment"); } private static boolean isLogRequest(HttpRequest request) { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/MetricsResponse.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/DeploymentMetricsResponse.java index 7cf9357217e..c503b60b3a3 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/MetricsResponse.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/DeploymentMetricsResponse.java @@ -8,7 +8,7 @@ import com.yahoo.slime.JsonFormat; import com.yahoo.slime.Slime; import com.yahoo.vespa.config.server.http.HttpConfigResponse; import com.yahoo.vespa.config.server.metrics.ClusterInfo; -import com.yahoo.vespa.config.server.metrics.MetricsAggregator; +import com.yahoo.vespa.config.server.metrics.DeploymentMetricsAggregator; import java.io.IOException; import java.io.OutputStream; @@ -17,11 +17,11 @@ import java.util.Map; /** * @author olaa */ -public class MetricsResponse extends HttpResponse { +public class DeploymentMetricsResponse extends HttpResponse { private final Slime slime = new Slime(); - public MetricsResponse(int status, ApplicationId applicationId, Map<ClusterInfo, MetricsAggregator> aggregatedMetrics) { + public DeploymentMetricsResponse(int status, ApplicationId applicationId, Map<ClusterInfo, DeploymentMetricsAggregator> aggregatedMetrics) { super(status); Cursor application = slime.setObject(); @@ -34,7 +34,7 @@ public class MetricsResponse extends HttpResponse { cluster.setString("clusterId", entry.getKey().getClusterId()); cluster.setString("clusterType", entry.getKey().getClusterType()); - MetricsAggregator aggregator = entry.getValue(); + DeploymentMetricsAggregator aggregator = entry.getValue(); Cursor metrics = cluster.setObject("metrics"); aggregator.aggregateQueryRate().ifPresent(queryRate -> metrics.setDouble("queriesPerSecond", queryRate)); aggregator.aggregateFeedRate().ifPresent(feedRate -> metrics.setDouble("feedPerSecond", feedRate)); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ProtonMetricsResponse.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ProtonMetricsResponse.java new file mode 100644 index 00000000000..a0ad87a39c9 --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ProtonMetricsResponse.java @@ -0,0 +1,51 @@ +package com.yahoo.vespa.config.server.http.v2; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.container.jdisc.HttpResponse; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.JsonFormat; +import com.yahoo.slime.Slime; +import com.yahoo.vespa.config.server.http.HttpConfigResponse; +import com.yahoo.vespa.config.server.metrics.ProtonMetricsAggregator; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Map; + +public class ProtonMetricsResponse extends HttpResponse { + + private final Slime slime = new Slime(); + + /** + * @author akvalsvik + */ + public ProtonMetricsResponse(int status, ApplicationId applicationId, Map<String, ProtonMetricsAggregator> aggregatedProtonMetrics) { + super(status); + + Cursor application = slime.setObject(); + application.setString("applicationId", applicationId.serializedForm()); + + Cursor clusters = application.setArray("clusters"); + + for (var entry : aggregatedProtonMetrics.entrySet()) { + Cursor cluster = clusters.addObject(); + cluster.setString("clusterId", entry.getKey()); + + ProtonMetricsAggregator aggregator = entry.getValue(); + Cursor metrics = cluster.setObject("metrics"); + metrics.setDouble("documentsActiveCount", aggregator.aggregateDocumentActiveCount()); + metrics.setDouble("documentsReadyCount", aggregator.aggregateDocumentReadyCount()); + metrics.setDouble("documentsTotalCount", aggregator.aggregateDocumentTotalCount()); + metrics.setDouble("documentDiskUsage", aggregator.aggregateDocumentDiskUsage()); + metrics.setDouble("resourceDiskUsageAverage", aggregator.aggregateResourceDiskUsageAverage()); + metrics.setDouble("resourceMemoryUsageAverage", aggregator.aggregateResourceMemoryUsageAverage()); + } + } + + @Override + public void render(OutputStream outputStream) throws IOException { + new JsonFormat(false).encode(outputStream, slime); + } + + @Override + public String getContentType() { return HttpConfigResponse.JSON_CONTENT_TYPE; } +} diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java index 42d3ace6bd9..8e7e4eec9b0 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java @@ -6,16 +6,12 @@ import java.util.logging.Level; import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Inspector; import com.yahoo.slime.Slime; -import com.yahoo.slime.SlimeUtils; import com.yahoo.yolean.Exceptions; import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import java.io.IOException; -import java.io.InputStream; import java.net.URI; import java.util.Collection; import java.util.List; @@ -33,9 +29,9 @@ import java.util.logging.Logger; * @author olaa * @author ogronnesby */ -public class ClusterMetricsRetriever { +public class ClusterDeploymentMetricsRetriever { - private static final Logger log = Logger.getLogger(ClusterMetricsRetriever.class.getName()); + private static final Logger log = Logger.getLogger(ClusterDeploymentMetricsRetriever.class.getName()); private static final String VESPA_CONTAINER = "vespa.container"; private static final String VESPA_QRSERVER = "vespa.qrserver"; @@ -57,8 +53,8 @@ public class ClusterMetricsRetriever { * Call the metrics API on each host and aggregate the metrics * into a single value, grouped by cluster. */ - public Map<ClusterInfo, MetricsAggregator> requestMetricsGroupedByCluster(Collection<URI> hosts) { - Map<ClusterInfo, MetricsAggregator> clusterMetricsMap = new ConcurrentHashMap<>(); + public Map<ClusterInfo, DeploymentMetricsAggregator> requestMetricsGroupedByCluster(Collection<URI> hosts) { + Map<ClusterInfo, DeploymentMetricsAggregator> clusterMetricsMap = new ConcurrentHashMap<>(); long startTime = System.currentTimeMillis(); Runnable retrieveMetricsJob = () -> @@ -83,68 +79,59 @@ public class ClusterMetricsRetriever { return clusterMetricsMap; } - private static void getHostMetrics(URI hostURI, Map<ClusterInfo, MetricsAggregator> clusterMetricsMap) { - Slime responseBody = doMetricsRequest(hostURI); - var parseError = responseBody.get().field("error_message"); - - if (parseError.valid()) { - log.info("Failed to retrieve metrics from " + hostURI + ": " + parseError.asString()); - } - - Inspector services = responseBody.get().field("services"); - services.traverse((ArrayTraverser) (i, servicesInspector) -> - parseService(servicesInspector, clusterMetricsMap) - ); - } - - private static Slime doMetricsRequest(URI hostURI) { - HttpGet get = new HttpGet(hostURI); - try (CloseableHttpResponse response = httpClient.execute(get)) { - InputStream is = response.getEntity().getContent(); - Slime slime = SlimeUtils.jsonToSlime(is.readAllBytes()); - is.close(); - return slime; + private static void getHostMetrics(URI hostURI, Map<ClusterInfo, DeploymentMetricsAggregator> clusterMetricsMap) { + Slime responseBody; + try { + responseBody = MetricsSlime.doMetricsRequest(hostURI, httpClient); } catch (IOException e) { // Usually caused by applications being deleted during metric retrieval log.info("Was unable to fetch metrics from " + hostURI + " : " + Exceptions.toMessageString(e)); - return new Slime(); + responseBody = new Slime(); + } + var parseError = responseBody.get().field("error_message"); + + if (parseError.valid()) { + log.info("Failed to retrieve metrics from " + hostURI + ": " + parseError.asString()); } + + Inspector services = responseBody.get().field("services"); + services.traverse((ArrayTraverser) (i, servicesInspector) -> + parseService(servicesInspector, clusterMetricsMap) + ); } - private static void parseService(Inspector service, Map<ClusterInfo, MetricsAggregator> clusterMetricsMap) { + + + private static void parseService(Inspector service, Map<ClusterInfo, DeploymentMetricsAggregator> clusterMetricsMap) { String serviceName = service.field("name").asString(); service.field("metrics").traverse((ArrayTraverser) (i, metric) -> addMetricsToAggeregator(serviceName, metric, clusterMetricsMap) ); } - private static void addMetricsToAggeregator(String serviceName, Inspector metric, Map<ClusterInfo, MetricsAggregator> clusterMetricsMap) { + private static void addMetricsToAggeregator(String serviceName, Inspector metric, Map<ClusterInfo, DeploymentMetricsAggregator> clusterMetricsMap) { if (!WANTED_METRIC_SERVICES.contains(serviceName)) return; Inspector values = metric.field("values"); - ClusterInfo clusterInfo = getClusterInfoFromDimensions(metric.field("dimensions")); - MetricsAggregator metricsAggregator = clusterMetricsMap.computeIfAbsent(clusterInfo, c -> new MetricsAggregator()); + ClusterInfo clusterInfo = MetricsSlime.getClusterInfoFromDimensions(metric.field("dimensions")); + DeploymentMetricsAggregator deploymentMetricsAggregator = clusterMetricsMap.computeIfAbsent(clusterInfo, c -> new DeploymentMetricsAggregator()); switch (serviceName) { case "vespa.container": - metricsAggregator.addContainerLatency( + deploymentMetricsAggregator.addContainerLatency( values.field("query_latency.sum").asDouble(), values.field("query_latency.count").asDouble()); - metricsAggregator.addFeedLatency( + deploymentMetricsAggregator.addFeedLatency( values.field("feed.latency.sum").asDouble(), values.field("feed.latency.count").asDouble()); break; case "vespa.qrserver": - metricsAggregator.addQrLatency( + deploymentMetricsAggregator.addQrLatency( values.field("query_latency.sum").asDouble(), values.field("query_latency.count").asDouble()); break; case "vespa.distributor": - metricsAggregator.addDocumentCount(values.field("vds.distributor.docsstored.average").asDouble()); + deploymentMetricsAggregator.addDocumentCount(values.field("vds.distributor.docsstored.average").asDouble()); break; } } - - private static ClusterInfo getClusterInfoFromDimensions(Inspector dimensions) { - return new ClusterInfo(dimensions.field("clusterid").asString(), dimensions.field("clustertype").asString()); - } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterProtonMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterProtonMetricsRetriever.java new file mode 100644 index 00000000000..e859bdadd28 --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterProtonMetricsRetriever.java @@ -0,0 +1,105 @@ +package com.yahoo.vespa.config.server.metrics; + +import ai.vespa.util.http.VespaHttpClientBuilder; +import com.yahoo.slime.ArrayTraverser; +import com.yahoo.slime.Inspector; +import com.yahoo.slime.Slime; +import com.yahoo.yolean.Exceptions; +import java.io.IOException; +import java.net.URI; +import java.util.Collection; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; + +import static com.yahoo.vespa.config.server.metrics.MetricsSlime.doMetricsRequest; + +public class ClusterProtonMetricsRetriever { + + private static final Logger log = Logger.getLogger(ClusterProtonMetricsRetriever.class.getName()); + + private static final CloseableHttpClient httpClient = VespaHttpClientBuilder + .create(PoolingHttpClientConnectionManager::new) + .setDefaultRequestConfig( + RequestConfig.custom() + .setConnectTimeout(10 * 1000) + .setSocketTimeout(10 * 1000) + .build()) + .build(); + + + public Map<String, ProtonMetricsAggregator> requestMetricsGroupedByCluster(Collection<URI> hosts) { + Map<String, ProtonMetricsAggregator> clusterMetricsMap = new ConcurrentHashMap<>(); + for (URI uri : hosts) { + addMetricsFromHost(uri, clusterMetricsMap); + } +/* long startTime = System.currentTimeMillis(); + Runnable retrieveMetricsJob = () -> + hosts.parallelStream().forEach(host -> + addMetricsFromHost(host, clusterMetricsMap) + ); + + ForkJoinPool threadPool = new ForkJoinPool(10); + threadPool.submit(retrieveMetricsJob); + threadPool.shutdown(); + + try { + threadPool.awaitTermination(1, TimeUnit.MINUTES); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + + log.log(Level.FINE, () -> + String.format("Proton metric retrieval for %d nodes took %d milliseconds", hosts.size(), System.currentTimeMillis() - startTime) + );*/ + + return clusterMetricsMap; + } + + private static void addMetricsFromHost(URI hostURI, Map<String, ProtonMetricsAggregator> clusterMetricsMap) { + Slime hostResponseBody; + try { + hostResponseBody = doMetricsRequest(hostURI, httpClient); + } catch (IOException e) { + log.info("Was unable to fetch metrics from " + hostURI + " : " + Exceptions.toMessageString(e)); + hostResponseBody = new Slime(); + } + var parseError = hostResponseBody.get().field("error_message"); + + if (parseError.valid()) { + log.info("Failed to retrieve metrics from " + hostURI + ": " + parseError.asString()); + } + + Inspector nodes = hostResponseBody.get().field("nodes"); + nodes.traverse((ArrayTraverser) (i, nodesInspector) -> + parseNode(nodesInspector, clusterMetricsMap) + ); + } + + private static void parseNode(Inspector node, Map<String, ProtonMetricsAggregator> clusterMetricsMap) { + String nodeRole = node.field("role").asString(); + if(nodeRole.contains("content")) { + ProtonMetricsAggregator aggregator = new ProtonMetricsAggregator(); + clusterMetricsMap.put(nodeRole, aggregator); + node.field("services").traverse((ArrayTraverser) (i, servicesInspector) -> + addServicesToAggregator(servicesInspector, aggregator) + ); + } + } + + private static void addServicesToAggregator(Inspector services, ProtonMetricsAggregator aggregator) { + services.field("metrics").traverse((ArrayTraverser) (i, metricsInspector) -> + addMetricsToAggregator(metricsInspector, aggregator) + ); + } + + private static void addMetricsToAggregator(Inspector metrics, ProtonMetricsAggregator aggregator) { + aggregator.addAll(metrics.field("values")); + } +} diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsAggregator.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsAggregator.java index 9ecec471b07..a4066fc212d 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsAggregator.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsAggregator.java @@ -7,29 +7,29 @@ import java.util.Optional; * @author olaa * @author ogronnesby */ -public class MetricsAggregator { +public class DeploymentMetricsAggregator { private LatencyMetrics feed; private LatencyMetrics qr; private LatencyMetrics container; private Double documentCount; - public synchronized MetricsAggregator addFeedLatency(double sum, double count) { + public synchronized DeploymentMetricsAggregator addFeedLatency(double sum, double count) { this.feed = combineLatency(this.feed, sum, count); return this; } - public synchronized MetricsAggregator addQrLatency(double sum, double count) { + public synchronized DeploymentMetricsAggregator addQrLatency(double sum, double count) { this.qr = combineLatency(this.qr, sum, count); return this; } - public synchronized MetricsAggregator addContainerLatency(double sum, double count) { + public synchronized DeploymentMetricsAggregator addContainerLatency(double sum, double count) { this.container = combineLatency(this.container, sum, count); return this; } - public synchronized MetricsAggregator addDocumentCount(double count) { + public synchronized DeploymentMetricsAggregator addDocumentCount(double count) { this.documentCount = (this.documentCount == null ? 0.0 : this.documentCount) + count; return this; } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ApplicationMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsRetriever.java index c6daaaae2f5..43847cd9c3d 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ApplicationMetricsRetriever.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsRetriever.java @@ -4,7 +4,7 @@ package com.yahoo.vespa.config.server.metrics; import com.yahoo.config.model.api.HostInfo; import com.yahoo.config.model.api.ServiceInfo; import com.yahoo.vespa.config.server.application.Application; -import com.yahoo.vespa.config.server.http.v2.MetricsResponse; +import com.yahoo.vespa.config.server.http.v2.DeploymentMetricsResponse; import java.net.URI; import java.util.Collection; @@ -17,29 +17,29 @@ import java.util.stream.Collectors; * * @author olaa */ -public class ApplicationMetricsRetriever { +public class DeploymentMetricsRetriever { - private final ClusterMetricsRetriever metricsRetriever; + private final ClusterDeploymentMetricsRetriever metricsRetriever; - public ApplicationMetricsRetriever() { - this(new ClusterMetricsRetriever()); + public DeploymentMetricsRetriever() { + this(new ClusterDeploymentMetricsRetriever()); } - public ApplicationMetricsRetriever(ClusterMetricsRetriever metricsRetriever) { + public DeploymentMetricsRetriever(ClusterDeploymentMetricsRetriever metricsRetriever) { this.metricsRetriever = metricsRetriever; } - public MetricsResponse getMetrics(Application application) { + public DeploymentMetricsResponse getMetrics(Application application) { var hosts = getHostsOfApplication(application); var clusterMetrics = metricsRetriever.requestMetricsGroupedByCluster(hosts); - return new MetricsResponse(200, application.getId(), clusterMetrics); + return new DeploymentMetricsResponse(200, application.getId(), clusterMetrics); } private static Collection<URI> getHostsOfApplication(Application application) { return application.getModel().getHosts().stream() .filter(host -> host.getServices().stream().noneMatch(isLogserver())) .map(HostInfo::getHostname) - .map(ApplicationMetricsRetriever::createMetricsProxyURI) + .map(DeploymentMetricsRetriever::createMetricsProxyURI) .collect(Collectors.toList()); } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsSlime.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsSlime.java new file mode 100644 index 00000000000..617767bc6a7 --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/MetricsSlime.java @@ -0,0 +1,28 @@ +package com.yahoo.vespa.config.server.metrics; + +import com.yahoo.slime.Inspector; +import com.yahoo.slime.Slime; +import com.yahoo.slime.SlimeUtils; +import com.yahoo.yolean.Exceptions; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; + +public class MetricsSlime { + + static Slime doMetricsRequest(URI hostURI, CloseableHttpClient httpClient) throws IOException { + HttpGet get = new HttpGet(hostURI); + CloseableHttpResponse response = httpClient.execute(get); + InputStream is = response.getEntity().getContent(); + Slime slime = SlimeUtils.jsonToSlime(is.readAllBytes()); + is.close(); + return slime; + } + + static ClusterInfo getClusterInfoFromDimensions(Inspector dimensions) { + return new ClusterInfo(dimensions.field("clusterid").asString(), dimensions.field("clustertype").asString()); + } +} diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ProtonMetricsAggregator.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ProtonMetricsAggregator.java new file mode 100644 index 00000000000..e1a0c2dc253 --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ProtonMetricsAggregator.java @@ -0,0 +1,113 @@ +package com.yahoo.vespa.config.server.metrics; + +import com.yahoo.slime.Inspector; + +public class ProtonMetricsAggregator { + + private static final String DOCUMENT_ACTIVE = "content.proton.documentdb.documents.active.last"; + private static final String DOCUMENT_READY = "content.proton.documentdb.documents.ready.last"; + private static final String DOCUMENT_TOTAL = "content.proton.documentdb.documents.total.last"; + private static final String DOCUMENT_DISK_USAGE = "content.proton.documentdb.disk_usage.last"; + private static final String RESOURCE_DISK_AVERAGE = "content.proton.resource_usage.disk.average"; + private static final String RESOURCE_MEMORY_AVERAGE = "content.proton.resource_usage.memory.average"; + + private Double documentActiveCount = 0.0; + private Double documentReadyCount = 0.0; + private Double documentTotalCount = 0.0; + private Double documentDiskUsage = 0.0; + + private final AverageMetric resourceDiskUsageAverage = new AverageMetric(); + private final AverageMetric resourceMemoryUsageAverage = new AverageMetric(); + + public synchronized ProtonMetricsAggregator addAll(Inspector metric) { + if (metric.field(DOCUMENT_ACTIVE).valid()) addDocumentActiveCount(metric.field(DOCUMENT_ACTIVE).asDouble()); + if (metric.field(DOCUMENT_READY).valid()) addDocumentReadyCount(metric.field(DOCUMENT_READY).asDouble()); + if (metric.field(DOCUMENT_TOTAL).valid()) addDocumentTotalCount(metric.field(DOCUMENT_TOTAL).asDouble()); + if (metric.field(DOCUMENT_DISK_USAGE).valid()) addDocumentDiskUsage(metric.field(DOCUMENT_DISK_USAGE).asDouble()); + if (metric.field(RESOURCE_DISK_AVERAGE).valid()) addResourceDiskUsageAverage(metric.field(RESOURCE_DISK_AVERAGE).asDouble()); + if (metric.field(RESOURCE_MEMORY_AVERAGE).valid()) addResourceMemoryUsageAverage(metric.field(RESOURCE_MEMORY_AVERAGE).asDouble()); + return this; + } + + public ProtonMetricsAggregator addAll(ProtonMetricsAggregator aggregator) { + this.documentActiveCount += aggregator.aggregateDocumentActiveCount(); + this.documentReadyCount += aggregator.aggregateDocumentReadyCount(); + this.documentTotalCount += aggregator.aggregateDocumentTotalCount(); + this.documentDiskUsage += aggregator.aggregateDocumentDiskUsage(); + addResourceDiskUsageAverage(aggregator); + addResourceMemoryUsageAverage(aggregator); + return this; + } + + public ProtonMetricsAggregator addResourceDiskUsageAverage(ProtonMetricsAggregator aggregator) { + this.resourceDiskUsageAverage.averageCount += aggregator.resourceDiskUsageAverage.averageCount; + this.resourceDiskUsageAverage.averageSum += aggregator.resourceDiskUsageAverage.averageSum; + return this; + } + + public ProtonMetricsAggregator addResourceMemoryUsageAverage(ProtonMetricsAggregator aggregator) { + this.resourceMemoryUsageAverage.averageCount += aggregator.resourceMemoryUsageAverage.averageCount; + this.resourceMemoryUsageAverage.averageSum += aggregator.resourceMemoryUsageAverage.averageSum; + return this; + } + + public synchronized ProtonMetricsAggregator addDocumentActiveCount(double documentActiveCount) { + this.documentActiveCount += documentActiveCount; + return this; + } + + public synchronized ProtonMetricsAggregator addDocumentReadyCount(double documentReadyCount) { + this.documentReadyCount += documentReadyCount; + return this; + } + + public synchronized ProtonMetricsAggregator addDocumentTotalCount(double documentTotalCount) { + this.documentTotalCount += documentTotalCount; + return this; + } + + public synchronized ProtonMetricsAggregator addDocumentDiskUsage(double documentDiskUsage) { + this.documentDiskUsage += documentDiskUsage; + return this; + } + + public synchronized ProtonMetricsAggregator addResourceDiskUsageAverage(double resourceDiskUsageAverage) { + this.resourceDiskUsageAverage.averageCount++; + this.resourceDiskUsageAverage.averageSum += resourceDiskUsageAverage; + return this; + } + + public synchronized ProtonMetricsAggregator addResourceMemoryUsageAverage(double resourceMemoryUsageAverage) { + this.resourceMemoryUsageAverage.averageCount++; + this.resourceMemoryUsageAverage.averageSum += resourceMemoryUsageAverage; + return this; + } + + public Double aggregateDocumentActiveCount() { + return this.documentActiveCount; + } + + public Double aggregateDocumentReadyCount() { + return this.documentReadyCount; + } + + public Double aggregateDocumentTotalCount() { + return this.documentTotalCount; + } + + public Double aggregateDocumentDiskUsage() {return this.documentDiskUsage;} + + public Double aggregateResourceDiskUsageAverage() { + return this.resourceDiskUsageAverage.averageSum / this.resourceDiskUsageAverage.averageCount; + } + + public Double aggregateResourceMemoryUsageAverage() { + return this.resourceMemoryUsageAverage.averageSum / this.resourceMemoryUsageAverage.averageCount; + } + + private static class AverageMetric { + double averageSum = 0.0; + double averageCount = 0.0; + } + +} diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ProtonMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ProtonMetricsRetriever.java new file mode 100644 index 00000000000..91295ca8bee --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ProtonMetricsRetriever.java @@ -0,0 +1,43 @@ +package com.yahoo.vespa.config.server.metrics; + +import com.yahoo.config.model.api.HostInfo; +import com.yahoo.config.model.api.ServiceInfo; +import com.yahoo.vespa.config.server.application.Application; +import com.yahoo.vespa.config.server.http.v2.ProtonMetricsResponse; +import java.net.URI; +import java.util.Collection; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +public class ProtonMetricsRetriever { + + private final ClusterProtonMetricsRetriever metricsRetriever; + public ProtonMetricsRetriever() { + this( new ClusterProtonMetricsRetriever()); + } + + public ProtonMetricsRetriever(ClusterProtonMetricsRetriever metricsRetriever) { + this.metricsRetriever = metricsRetriever; + } + + public ProtonMetricsResponse getMetrics(Application application) { + var hosts = getHostsOfApplication(application); + var clusterMetrics = metricsRetriever.requestMetricsGroupedByCluster(hosts); + return new ProtonMetricsResponse(200, application.getId(), clusterMetrics); + } + + private static Collection<URI> getHostsOfApplication(Application application) { + return application.getModel().getHosts().stream() + .filter(host -> host.getServices().stream().anyMatch(isSearchNode())) + .map(HostInfo::getHostname) + .map(ProtonMetricsRetriever::createMetricsProxyURI) + .collect(Collectors.toList()); + } + + private static Predicate<ServiceInfo> isSearchNode() { + return serviceInfo -> serviceInfo.getServiceType().equalsIgnoreCase("searchnode"); + } + private static URI createMetricsProxyURI(String hostname) { + return URI.create("http://" + hostname + ":19092/metrics/v2/values"); + } +} diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetrieverTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetrieverTest.java index 3f67d8e2cac..27934da683d 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetrieverTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetrieverTest.java @@ -28,7 +28,7 @@ import static org.junit.Assert.assertEquals; /** * @author olaa */ -public class ClusterMetricsRetrieverTest { +public class ClusterDeploymentMetricsRetrieverTest { @Rule public final WireMockRule wireMock = new WireMockRule(options().dynamicPort(), true); @@ -57,15 +57,15 @@ public class ClusterMetricsRetrieverTest { ClusterInfo expectedContentCluster = new ClusterInfo("content_cluster_id", "content"); ClusterInfo expectedContainerCluster = new ClusterInfo("container_cluster_id", "container"); - Map<ClusterInfo, MetricsAggregator> aggregatorMap = new ClusterMetricsRetriever().requestMetricsGroupedByCluster(hosts); + Map<ClusterInfo, DeploymentMetricsAggregator> aggregatorMap = new ClusterDeploymentMetricsRetriever().requestMetricsGroupedByCluster(hosts); compareAggregators( - new MetricsAggregator().addDocumentCount(6000.0), + new DeploymentMetricsAggregator().addDocumentCount(6000.0), aggregatorMap.get(expectedContentCluster) ); compareAggregators( - new MetricsAggregator() + new DeploymentMetricsAggregator() .addContainerLatency(3000, 43) .addContainerLatency(2000, 0) .addQrLatency(3000, 43) @@ -87,7 +87,7 @@ public class ClusterMetricsRetrieverTest { // Same tolerance value as used internally in MetricsAggregator.isZero private static final double metricsTolerance = 0.001; - private void compareAggregators(MetricsAggregator expected, MetricsAggregator actual) { + private void compareAggregators(DeploymentMetricsAggregator expected, DeploymentMetricsAggregator actual) { BiConsumer<Double, Double> assertDoubles = (a, b) -> assertEquals(a.doubleValue(), b.doubleValue(), metricsTolerance); compareOptionals(expected.aggregateDocumentCount(), actual.aggregateDocumentCount(), assertDoubles); diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterProtonMetricsRetrieverTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterProtonMetricsRetrieverTest.java new file mode 100644 index 00000000000..211a14acd76 --- /dev/null +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ClusterProtonMetricsRetrieverTest.java @@ -0,0 +1,86 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.config.server.metrics; + +import com.github.tomakehurst.wiremock.junit.WireMockRule; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.junit.Rule; +import org.junit.Test; + +import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; +import static com.github.tomakehurst.wiremock.client.WireMock.get; +import static com.github.tomakehurst.wiremock.client.WireMock.stubFor; +import static com.github.tomakehurst.wiremock.client.WireMock.urlEqualTo; +import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.options; +import static org.junit.Assert.assertEquals; + +public class ClusterProtonMetricsRetrieverTest { + + @Rule + public final WireMockRule wireMock = new WireMockRule(options().dynamicPort(), true); + + + @Test + public void testMetricAggregation() throws IOException { + List<URI> hosts = Stream.of(1, 2) + .map(item -> URI.create("http://localhost:" + wireMock.port() + "/metrics" + item + "/v2/values")) + .collect(Collectors.toList()); + + stubFor(get(urlEqualTo("/metrics1/v2/values")) + .willReturn(aResponse() + .withStatus(200) + .withBody(nodeMetrics("_1")))); + + stubFor(get(urlEqualTo("/metrics2/v2/values")) + .willReturn(aResponse() + .withStatus(200) + .withBody(nodeMetrics("_2")))); + + String expectedClusterNameContent = "content/content/0/0"; + String expectedClusterNameMusic = "content/music/0/0"; + Map<String, ProtonMetricsAggregator> aggregatorMap = new ClusterProtonMetricsRetriever().requestMetricsGroupedByCluster(hosts); + + compareAggregators( + new ProtonMetricsAggregator() + .addDocumentReadyCount(1275) + .addDocumentActiveCount(1275) + .addDocumentTotalCount(1275) + .addDocumentDiskUsage(14781856) + .addResourceDiskUsageAverage(0.0009083386306) + .addResourceMemoryUsageAverage(0.0183488434436), + aggregatorMap.get(expectedClusterNameContent) + ); + + compareAggregators( + new ProtonMetricsAggregator() + .addDocumentReadyCount(3008) + .addDocumentActiveCount(3008) + .addDocumentTotalCount(3008) + .addDocumentDiskUsage(331157) + .addResourceDiskUsageAverage(0.0000152263558) + .addResourceMemoryUsageAverage(0.0156505524171), + aggregatorMap.get(expectedClusterNameMusic) + ); + + wireMock.stop(); + } + + private String nodeMetrics(String extension) throws IOException { + return Files.readString(Path.of("src/test/resources/metrics/node_metrics" + extension)); + } + + // Same tolerance value as used internally in MetricsAggregator.isZero + private static final double metricsTolerance = 0.001; + + private void compareAggregators(ProtonMetricsAggregator expected, ProtonMetricsAggregator actual) { + assertEquals(expected.aggregateDocumentDiskUsage(), actual.aggregateDocumentDiskUsage(), metricsTolerance); + } + +} diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ApplicationMetricsRetrieverTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsRetrieverTest.java index 49de9b41ca1..0bafb3de168 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ApplicationMetricsRetrieverTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsRetrieverTest.java @@ -27,16 +27,16 @@ import static org.junit.Assert.assertEquals; /** * @author olaa */ -public class ApplicationMetricsRetrieverTest { +public class DeploymentMetricsRetrieverTest { @Test public void getMetrics() { MockModel mockModel = new MockModel(mockHosts()); - MockMetricsRetriever mockMetricsRetriever = new MockMetricsRetriever(); + MockDeploymentMetricsRetriever mockMetricsRetriever = new MockDeploymentMetricsRetriever(); Application application = new Application(mockModel, null, 0, false, null, null, ApplicationId.fromSerializedForm("tenant:app:instance")); - ApplicationMetricsRetriever clusterMetricsRetriever = new ApplicationMetricsRetriever(mockMetricsRetriever); + DeploymentMetricsRetriever clusterMetricsRetriever = new DeploymentMetricsRetriever(mockMetricsRetriever); clusterMetricsRetriever.getMetrics(application); assertEquals(2, mockMetricsRetriever.hosts.size()); // Verify that logserver was ignored @@ -57,19 +57,19 @@ public class ApplicationMetricsRetrieverTest { return List.of(hostInfo1, hostInfo2, hostInfo3); } - class MockMetricsRetriever extends ClusterMetricsRetriever { + class MockDeploymentMetricsRetriever extends ClusterDeploymentMetricsRetriever { Collection<URI> hosts = new ArrayList<>(); @Override - public Map<ClusterInfo, MetricsAggregator> requestMetricsGroupedByCluster(Collection<URI> hosts) { + public Map<ClusterInfo, DeploymentMetricsAggregator> requestMetricsGroupedByCluster(Collection<URI> hosts) { this.hosts = hosts; return Map.of( new ClusterInfo("content_cluster_id", "content"), - new MetricsAggregator().addDocumentCount(1000), + new DeploymentMetricsAggregator().addDocumentCount(1000), new ClusterInfo("container_cluster_id", "container"), - new MetricsAggregator().addContainerLatency(123, 5) + new DeploymentMetricsAggregator().addContainerLatency(123, 5) ); } } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ProtonMetricsRetrieverTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ProtonMetricsRetrieverTest.java new file mode 100644 index 00000000000..e9789578b5e --- /dev/null +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/metrics/ProtonMetricsRetrieverTest.java @@ -0,0 +1,111 @@ +package com.yahoo.vespa.config.server.metrics; + +import com.yahoo.config.FileReference; +import com.yahoo.config.model.api.FileDistribution; +import com.yahoo.config.model.api.HostInfo; +import com.yahoo.config.model.api.Model; +import com.yahoo.config.model.api.ServiceInfo; +import com.yahoo.config.provision.AllocatedHosts; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.config.ConfigKey; +import com.yahoo.vespa.config.ConfigPayload; +import com.yahoo.vespa.config.buildergen.ConfigDefinition; +import com.yahoo.vespa.config.server.application.Application; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class ProtonMetricsRetrieverTest { + + @Test + public void getMetrics() { + ProtonMetricsRetrieverTest.MockModel mockModel = new ProtonMetricsRetrieverTest.MockModel(mockHosts()); + ProtonMetricsRetrieverTest.MockProtonMetricsRetriever mockMetricsRetriever = new ProtonMetricsRetrieverTest.MockProtonMetricsRetriever(); + Application application = new Application(mockModel, null, 0, false, + null, null, ApplicationId.fromSerializedForm("tenant:app:instance")); + + ProtonMetricsRetriever clusterMetricsRetriever = new ProtonMetricsRetriever(mockMetricsRetriever); + clusterMetricsRetriever.getMetrics(application); + + assertEquals(1, mockMetricsRetriever.hosts.size()); // Verify that logserver was ignored + } + + private Collection<HostInfo> mockHosts() { + + HostInfo hostInfo1 = new HostInfo("host1", + List.of(new ServiceInfo("content", "searchnode", null, null, "", "host1")) + ); + HostInfo hostInfo2 = new HostInfo("host2", + List.of(new ServiceInfo("default", "container", null, null, "", "host2")) + ); + HostInfo hostInfo3 = new HostInfo("host3", + List.of(new ServiceInfo("default", "logserver", null, null, "", "host3")) + ); + + return List.of(hostInfo1, hostInfo2, hostInfo3); + } + + class MockProtonMetricsRetriever extends ClusterProtonMetricsRetriever { + + Collection<URI> hosts = new ArrayList<>(); + + @Override + public Map<String, ProtonMetricsAggregator> requestMetricsGroupedByCluster(Collection<URI> hosts) { + this.hosts = hosts; + + return Map.of( + ("content_cluster_id"), + new ProtonMetricsAggregator() + ); + } + } + + class MockModel implements Model { + + Collection<HostInfo> hosts; + + MockModel(Collection<HostInfo> hosts) { + this.hosts = hosts; + } + + @Override + public ConfigPayload getConfig(ConfigKey<?> configKey, ConfigDefinition targetDef) { + throw new UnsupportedOperationException(); + } + + @Override + public Set<ConfigKey<?>> allConfigsProduced() { + throw new UnsupportedOperationException(); + } + + @Override + public Collection<HostInfo> getHosts() { + return hosts; + } + + @Override + public Set<String> allConfigIds() { + throw new UnsupportedOperationException(); + } + + @Override + public void distributeFiles(FileDistribution fileDistribution) { + throw new UnsupportedOperationException(); + } + + @Override + public Set<FileReference> fileReferences() { return new HashSet<>(); } + + @Override + public AllocatedHosts allocatedHosts() { + throw new UnsupportedOperationException(); + } + } +} diff --git a/configserver/src/test/resources/metrics/node_metrics_1 b/configserver/src/test/resources/metrics/node_metrics_1 new file mode 100644 index 00000000000..4c75f0c4c02 --- /dev/null +++ b/configserver/src/test/resources/metrics/node_metrics_1 @@ -0,0 +1,441 @@ +{ + "nodes": [ + { + "hostname": "generic-host-name.test.com", + "role": "content/content/0/0", + "node": { + "timestamp": 1596117620, + "metrics": [ + { + "values": { + "cpu.util": 269.1793453856039, + "cpu.sys.util": 83.3299189868703, + "cpu.vcpus": 0.1, + "disk.limit": 50000000000, + "disk.used": 659972096, + "disk.util": 1.319944192, + "mem.limit": 8589934592, + "mem.used": 1048584192, + "mem.util": 12.2071266174316, + "mem_total.used": 1572433920, + "mem_total.util": 18.3055400848389, + "bandwidth.limit": 300000000 + }, + "dimensions": { + "applicationId": "tenant.name-of-app.user", + "host": "generic_host_name", + "zone": "dev.generic-1", + "clusterId": "content/content" + } + }, + { + "values": { + "net.in.bytes": 24793122, + "net.in.errors": 0, + "net.in.dropped": 0, + "net.out.bytes": 19822788, + "net.out.errors": 0, + "net.out.dropped": 0 + }, + "dimensions": { + "applicationId": "tenant.name-of-app.user", + "host": "generic_host_name", + "zone": "dev.generic-1", + "clusterId": "content/content" + } + } + ] + }, + "services": [ + { + "name": "vespa.searchnode", + "timestamp": 1596117620, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 460963840, + "memory_rss": 206643200, + "cpu": 5.7167235494881 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "searchnode", + "clusterId": "content/content" + } + }, + { + "values": { + "content.proton.documentdb.matching.rank_profile.query_setup_time.average": 0 + }, + "dimensions": { + "rankProfile": "unranked", + "documenttype": "doc", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "searchnode", + "clusterId": "content/content" + } + }, + { + "values": { + "content.proton.documentdb.matching.rank_profile.rerank_time.average": 0, + "content.proton.documentdb.matching.rank_profile.query_latency.average": 0 + }, + "dimensions": { + "rankProfile": "default", + "documenttype": "doc", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "searchnode", + "clusterId": "content/content" + } + }, + { + "values": { + "content.proton.documentdb.matching.docs_reranked.rate": 0, + "content.proton.documentdb.memory_usage.allocated_bytes.last": 8167610, + "content.proton.transactionlog.disk_usage.last": 31315422 + }, + "dimensions": { + "documenttype": "doc", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "searchnode", + "clusterId": "content/content" + } + }, + { + "values": { + "content.proton.search_protocol.query.latency.average": 0, + "content.proton.search_protocol.docsum.latency.average": 0, + "content.proton.search_protocol.docsum.requested_documents.rate": 0 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "searchnode", + "clusterId": "content/content" + } + }, + { + "values": { + "content.proton.documentdb.matching.docs_matched.rate": 0, + "content.proton.documentdb.documents.active.last": 1275, + "content.proton.documentdb.documents.ready.last": 1275, + "content.proton.documentdb.documents.total.last": 1275, + "content.proton.documentdb.disk_usage.last": 14781856 + }, + "dimensions": { + "documenttype": "doc", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "searchnode", + "clusterId": "content/content" + } + }, + { + "values": { + "content.proton.resource_usage.disk.average": 0.0009083386306, + "content.proton.resource_usage.memory.average": 0.0183488434436, + "content.proton.resource_usage.feeding_blocked.last": 0 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "searchnode", + "clusterId": "content/content" + } + }, + { + "values": { + "content.proton.documentdb.matching.rank_profile.query_setup_time.average": 0 + }, + "dimensions": { + "rankProfile": "default", + "documenttype": "doc", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "searchnode", + "clusterId": "content/content" + } + }, + { + "values": { + "content.proton.documentdb.matching.rank_profile.rerank_time.average": 0, + "content.proton.documentdb.matching.rank_profile.query_latency.average": 0 + }, + "dimensions": { + "rankProfile": "unranked", + "documenttype": "doc", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "searchnode", + "clusterId": "content/content" + } + } + ] + }, + { + "name": "vespa.distributor", + "timestamp": 1596117620, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 323432448, + "memory_rss": 110497792, + "cpu": 6.6040955631399 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "distributor", + "clusterId": "content/content" + } + } + ] + }, + { + "name": "vespa.container-clustercontroller", + "timestamp": 1596117620, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 1375522816, + "memory_rss": 327823360, + "cpu": 1.3139931740614 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "container-clustercontroller", + "clusterId": "content/content" + } + }, + { + "values": { + "jdisc.gc.ms.average": 1.6666666666667 + }, + "dimensions": { + "gcName": "G1YoungGeneration", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "container-clustercontroller", + "clusterId": "content/content" + } + }, + { + "values": { + "jdisc.gc.ms.average": 0 + }, + "dimensions": { + "gcName": "G1OldGeneration", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "container-clustercontroller", + "clusterId": "content/content" + } + }, + { + "values": { + "serverActiveThreads.average": 0 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "container-clustercontroller", + "clusterId": "content/content" + } + }, + { + "values": { + "http.status.2xx.rate": 0.0666666666667 + }, + "dimensions": { + "httpMethod": "GET", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "container-clustercontroller", + "clusterId": "content/content" + } + }, + { + "values": { + "http.status.2xx.rate": 0 + }, + "dimensions": { + "httpMethod": "POST", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "container-clustercontroller", + "clusterId": "content/content" + } + }, + { + "values": { + "mem.heap.free.average": 15434608 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "container-clustercontroller", + "clusterId": "content/content" + } + } + ] + }, + { + "name": "vespa.metricsproxy-container", + "timestamp": 1596117620, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 1364717568, + "memory_rss": 379650048, + "cpu": 1.3993174061433 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "metricsproxy-container", + "clusterId": "content/content" + } + }, + { + "values": { + "jdisc.gc.ms.average": 0 + }, + "dimensions": { + "gcName": "G1OldGeneration", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "metricsproxy-container", + "clusterId": "content/content" + } + }, + { + "values": { + "mem.heap.free.average": 38444168 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "metricsproxy-container", + "clusterId": "content/content" + } + }, + { + "values": { + "http.status.2xx.rate": 0.0666666666667 + }, + "dimensions": { + "httpMethod": "GET", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "metricsproxy-container", + "clusterId": "content/content" + } + }, + { + "values": { + "serverActiveThreads.average": 0.05 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "metricsproxy-container", + "clusterId": "content/content" + } + }, + { + "values": { + "jdisc.gc.ms.average": 4.6666666666667 + }, + "dimensions": { + "gcName": "G1YoungGeneration", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "metricsproxy-container", + "clusterId": "content/content" + } + }, + { + "values": { + "http.status.2xx.rate": 0.0333333333333 + }, + "dimensions": { + "httpMethod": "GET", + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "metricsproxy-container", + "clusterId": "content/content" + } + } + ] + }, + { + "name": "vespa.config-sentinel", + "timestamp": 1596117620, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 0, + "memory_rss": 0, + "cpu": 0 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "config-sentinel", + "clusterId": "content/content" + } + } + ] + }, + { + "name": "vespa.logd", + "timestamp": 1596117620, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 120094720, + "memory_rss": 13119488, + "cpu": 0.3242320819113 + }, + "dimensions": { + "zone": "dev.generic-1", + "applicationId": "tenant.name-of-app.user", + "serviceId": "logd", + "clusterId": "content/content" + } + } + ] + } + ] + } + ] +}
\ No newline at end of file diff --git a/configserver/src/test/resources/metrics/node_metrics_2 b/configserver/src/test/resources/metrics/node_metrics_2 new file mode 100644 index 00000000000..d916cec54fd --- /dev/null +++ b/configserver/src/test/resources/metrics/node_metrics_2 @@ -0,0 +1,456 @@ +{ + "nodes": [ + { + "hostname": "generic-hostname2.vespa.com", + "role": "content/music/0/0", + "node": { + "timestamp": 1596209314, + "metrics": [ + { + "values": { + "cpu.util": 6.2395760990999, + "cpu.sys.util": 1.6980606847917, + "cpu.vcpus": 2, + "disk.limit": 50000000000, + "disk.used": 431521792, + "disk.util": 0.863043584, + "mem.limit": 7890911232, + "mem.used": 1030008832, + "mem.util": 13.0531037761901, + "mem_total.used": 1098739712, + "mem_total.util": 13.9241169960737, + "bandwidth.limit": 300000000 + }, + "dimensions": { + "applicationId": "generic-tenant.generic-app.generic-deploy", + "host": "generic-host.vespa.com", + "zone": "generic.dev-generic-2", + "clusterId": "content/music" + } + }, + { + "values": { + "net.in.bytes": 9170973, + "net.in.errors": 0, + "net.in.dropped": 0, + "net.out.bytes": 5858380, + "net.out.errors": 0, + "net.out.dropped": 0 + }, + "dimensions": { + "applicationId": "generic-tenant.generic-app.generic-deploy", + "host": "generic-host.vespa.com", + "zone": "generic.dev-generic-2", + "clusterId": "content/music" + } + } + ] + }, + "services": [ + { + "name": "vespa.searchnode", + "timestamp": 1596209314, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 399941632, + "memory_rss": 151085056, + "cpu": 3.8755736868944 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + }, + { + "values": { + "content.proton.documentdb.matching.docs_reranked.rate": 0, + "content.proton.documentdb.memory_usage.allocated_bytes.last": 8372852, + "content.proton.transactionlog.disk_usage.last": 1128681 + }, + "dimensions": { + "documenttype": "music", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + }, + { + "values": { + "content.proton.documentdb.matching.rank_profile.rerank_time.average": 0, + "content.proton.documentdb.matching.rank_profile.query_latency.average": 0 + }, + "dimensions": { + "rankProfile": "unranked", + "documenttype": "music", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + }, + { + "values": { + "content.proton.documentdb.matching.rank_profile.query_setup_time.average": 0 + }, + "dimensions": { + "rankProfile": "unranked", + "documenttype": "music", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + }, + { + "values": { + "content.proton.documentdb.matching.rank_profile.rerank_time.average": 0, + "content.proton.documentdb.matching.rank_profile.query_latency.average": 0 + }, + "dimensions": { + "rankProfile": "default", + "documenttype": "music", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + }, + { + "values": { + "content.proton.documentdb.matching.rank_profile.query_setup_time.average": 0 + }, + "dimensions": { + "rankProfile": "default", + "documenttype": "music", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + }, + { + "values": { + "content.proton.documentdb.matching.rank_profile.query_setup_time.average": 0 + }, + "dimensions": { + "rankProfile": "rank_albums", + "documenttype": "music", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + }, + { + "values": { + "content.proton.documentdb.matching.rank_profile.rerank_time.average": 0, + "content.proton.documentdb.matching.rank_profile.query_latency.average": 0 + }, + "dimensions": { + "rankProfile": "rank_albums", + "documenttype": "music", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + }, + { + "values": { + "content.proton.documentdb.matching.docs_matched.rate": 0, + "content.proton.documentdb.documents.active.last": 3008, + "content.proton.documentdb.documents.ready.last": 3008, + "content.proton.documentdb.documents.total.last": 3008, + "content.proton.documentdb.disk_usage.last": 331157 + }, + "dimensions": { + "documenttype": "music", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + }, + { + "values": { + "content.proton.resource_usage.disk.average": 0.0000152263558, + "content.proton.resource_usage.memory.average": 0.0156505524171, + "content.proton.resource_usage.feeding_blocked.last": 0 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + }, + { + "values": { + "content.proton.search_protocol.query.latency.average": 0, + "content.proton.search_protocol.docsum.latency.average": 0, + "content.proton.search_protocol.docsum.requested_documents.rate": 0 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "searchnode", + "clusterId": "content/music" + } + } + ] + }, + { + "name": "vespa.distributor", + "timestamp": 1596209314, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 323338240, + "memory_rss": 110653440, + "cpu": 3.9945605983342 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "distributor", + "clusterId": "content/music" + } + } + ] + }, + { + "name": "vespa.container-clustercontroller", + "timestamp": 1596209314, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 1384386560, + "memory_rss": 357957632, + "cpu": 0.6799252082271 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "container-clustercontroller", + "clusterId": "content/music" + } + }, + { + "values": { + "jdisc.gc.ms.average": 0 + }, + "dimensions": { + "gcName": "G1OldGeneration", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "container-clustercontroller", + "clusterId": "content/music" + } + }, + { + "values": { + "serverActiveThreads.average": 0 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "container-clustercontroller", + "clusterId": "content/music" + } + }, + { + "values": { + "mem.heap.free.average": 39484545.333333336 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "container-clustercontroller", + "clusterId": "content/music" + } + }, + { + "values": { + "http.status.2xx.rate": 0.0833333333333 + }, + "dimensions": { + "httpMethod": "GET", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "container-clustercontroller", + "clusterId": "content/music" + } + }, + { + "values": { + "jdisc.gc.ms.average": 0.6666666666667 + }, + "dimensions": { + "gcName": "G1YoungGeneration", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "container-clustercontroller", + "clusterId": "content/music" + } + } + ] + }, + { + "name": "vespa.metricsproxy-container", + "timestamp": 1596209314, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 1375203328, + "memory_rss": 406228992, + "cpu": 1.1558728539861 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "metricsproxy-container", + "clusterId": "content/music" + } + }, + { + "values": { + "jdisc.gc.ms.average": 0 + }, + "dimensions": { + "gcName": "G1OldGeneration", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "metricsproxy-container", + "clusterId": "content/music" + } + }, + { + "values": { + "mem.heap.free.average": 43048620 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "metricsproxy-container", + "clusterId": "content/music" + } + }, + { + "values": { + "http.status.2xx.rate": 0.0833333333333 + }, + "dimensions": { + "httpMethod": "GET", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "metricsproxy-container", + "clusterId": "content/music" + } + }, + { + "values": { + "serverActiveThreads.average": 0.0433333333333 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "metricsproxy-container", + "clusterId": "content/music" + } + }, + { + "values": { + "jdisc.gc.ms.average": 6.5 + }, + "dimensions": { + "gcName": "G1YoungGeneration", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "metricsproxy-container", + "clusterId": "content/music" + } + }, + { + "values": { + "http.status.2xx.rate": 0.05 + }, + "dimensions": { + "httpMethod": "GET", + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "metricsproxy-container", + "clusterId": "content/music" + } + } + ] + }, + { + "name": "vespa.config-sentinel", + "timestamp": 1596209314, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 0, + "memory_rss": 0, + "cpu": 0 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "config-sentinel", + "clusterId": "content/music" + } + } + ] + }, + { + "name": "vespa.logd", + "timestamp": 1596209314, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 119803904, + "memory_rss": 12697600, + "cpu": 0.1699813020568 + }, + "dimensions": { + "zone": "generic.dev-generic-2", + "applicationId": "generic-tenant.generic-app.generic-deploy", + "serviceId": "logd", + "clusterId": "content/music" + } + } + ] + } + ] + } + ] +}
\ No newline at end of file diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/ClusterMetrics.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/ClusterMetrics.java index f16e2e403ed..cc10041992c 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/ClusterMetrics.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/ClusterMetrics.java @@ -10,7 +10,7 @@ import java.util.Optional; */ public class ClusterMetrics { - // These field names originate from the MetricsResponse class + // These field names originate from the DeploymentMetricsResponse class public static final String QUERIES_PER_SECOND = "queriesPerSecond"; public static final String FEED_PER_SECOND = "feedPerSecond"; public static final String DOCUMENT_COUNT = "documentCount"; diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/ProtonMetrics.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/ProtonMetrics.java new file mode 100644 index 00000000000..c6d907ec7fc --- /dev/null +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/ProtonMetrics.java @@ -0,0 +1,63 @@ +package com.yahoo.vespa.hosted.controller.api.application.v4.model; + +import java.util.HashMap; +import java.util.Map; +import java.util.logging.LogManager; +import java.util.logging.Logger; +import org.json.JSONException; +import org.json.JSONObject; + +public class ProtonMetrics { + + private static final Logger logger = LogManager.getLogManager().getLogger(ProtonMetrics.class.getName()); + + public static final String DOCUMENTS_ACTIVE_COUNT = "documentsActiveCount"; + public static final String DOCUMENTS_READY_COUNT = "documentsReadyCount"; + public static final String DOCUMENTS_TOTAL_COUNT = "documentsTotalCount"; + public static final String DOCUMENT_DISK_USAGE = "documentDiskUsage"; + public static final String RESOURCE_DISK_USAGE_AVERAGE = "resourceDiskUsageAverage"; + public static final String RESOURCE_MEMORY_USAGE_AVERAGE = "resourceMemoryUsageAverage"; + + private final String clusterId; + private final Map<String, Double> metrics; + + public ProtonMetrics(String clusterId) { + this.clusterId = clusterId; + metrics = new HashMap<>(); + } + + public String getClusterId() { return clusterId; } + + public double documentsActiveCount() { return metrics.get(DOCUMENTS_ACTIVE_COUNT); } + + public double documentsReadyCount() { return metrics.get(DOCUMENTS_READY_COUNT); } + + public double documentsTotalCount() { return metrics.get(DOCUMENTS_TOTAL_COUNT); } + + public double documentDiskUsage() { return metrics.get(DOCUMENT_DISK_USAGE); } + + public double resourceDiskUsageAverage() { return metrics.get(RESOURCE_DISK_USAGE_AVERAGE); } + + public double resourceMemoryUsageAverage() { return metrics.get(RESOURCE_MEMORY_USAGE_AVERAGE); } + + public ProtonMetrics addMetric(String name, double value) { + metrics.put(name, value); + return this; + } + + public JSONObject toJson() { + try { + JSONObject protonMetrics = new JSONObject(); + protonMetrics.put("clusterId", clusterId); + JSONObject jsonMetrics = new JSONObject(); + for (Map.Entry<String, Double> entry : metrics.entrySet()) { + jsonMetrics.put(entry.getKey(), entry.getValue()); + } + protonMetrics.put("metrics", jsonMetrics); + return protonMetrics; + } catch (JSONException e) { + logger.severe("Unable to convert Proton Metrics to JSON Object"); + } + return new JSONObject(); + } +} diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/identifiers/MetricsType.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/identifiers/MetricsType.java new file mode 100644 index 00000000000..aeb828a21f3 --- /dev/null +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/identifiers/MetricsType.java @@ -0,0 +1,24 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.api.identifiers; + +/** + * @author akvalsvik + */ +public class MetricsType extends SerializedIdentifier { + + public MetricsType(String id) { + super(id); + } + + @Override + public void validate() { + super.validate(); + validateNoUpperCase(); + } + + public static void validate(String id) { + if (!(id.equals("deployment") || id.equals("proton"))) { + throwInvalidId(id, "MetricsType be \"deployment\" or \"proton\""); + } + } +} diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ConfigServer.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ConfigServer.java index c04adcec594..9ad9324e216 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ConfigServer.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ConfigServer.java @@ -8,6 +8,7 @@ import com.yahoo.vespa.flags.json.FlagData; import com.yahoo.vespa.hosted.controller.api.application.v4.model.ClusterMetrics; import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeploymentData; import com.yahoo.vespa.hosted.controller.api.application.v4.model.EndpointStatus; +import com.yahoo.vespa.hosted.controller.api.application.v4.model.ProtonMetrics; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.identifiers.Hostname; import com.yahoo.vespa.hosted.controller.api.integration.LogEntry; @@ -54,7 +55,9 @@ public interface ConfigServer { */ InputStream getLogs(DeploymentId deployment, Map<String, String> queryParameters); - List<ClusterMetrics> getMetrics(DeploymentId deployment); + List<ClusterMetrics> getDeploymentMetrics(DeploymentId deployment); + + List<ProtonMetrics> getProtonMetrics(DeploymentId deployment); List<String> getContentClusters(DeploymentId deployment); diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/role/PathGroup.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/role/PathGroup.java index aaddd3811bc..571fd649f04 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/role/PathGroup.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/role/PathGroup.java @@ -118,6 +118,7 @@ enum PathGroup { "/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{ignored}/nodes", "/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{ignored}/clusters", "/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{ignored}/logs", + "/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{ignored}/metrics", "/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{ignored}/suspended", "/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{ignored}/service/{*}", "/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{ignored}/global-rotation/{*}", diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/metric/ConfigServerMetrics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/metric/ConfigServerMetrics.java index 567cb664788..266af5e35fe 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/metric/ConfigServerMetrics.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/metric/ConfigServerMetrics.java @@ -31,7 +31,7 @@ public class ConfigServerMetrics { public DeploymentMetrics getDeploymentMetrics(ApplicationId application, ZoneId zone) { var deploymentId = new DeploymentId(application, zone); - var metrics = configServer.getMetrics(deploymentId); + var metrics = configServer.getDeploymentMetrics(deploymentId); // The field names here come from the MetricsResponse class. return new DeploymentMetrics( @@ -61,5 +61,4 @@ public class ConfigServerMetrics { return weightedLatency / rateSum; } - } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index 427d1d66ee8..20aa96e890d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -18,6 +18,7 @@ import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.zone.RoutingMethod; import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.container.handler.metrics.JsonResponse; import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.container.jdisc.HttpResponse; import com.yahoo.container.jdisc.LoggingRequestHandler; @@ -40,6 +41,7 @@ import com.yahoo.vespa.hosted.controller.api.ActivateResult; import com.yahoo.vespa.hosted.controller.api.application.v4.EnvironmentResource; import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeployOptions; import com.yahoo.vespa.hosted.controller.api.application.v4.model.EndpointStatus; +import com.yahoo.vespa.hosted.controller.api.application.v4.model.ProtonMetrics; import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbindings.RefeedAction; import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbindings.RestartAction; import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbindings.ServiceInfo; @@ -118,6 +120,9 @@ import java.util.Scanner; import java.util.StringJoiner; import java.util.logging.Level; import java.util.stream.Collectors; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; import static com.yahoo.jdisc.Response.Status.BAD_REQUEST; import static com.yahoo.jdisc.Response.Status.CONFLICT; @@ -228,6 +233,7 @@ public class ApplicationApiHandler extends LoggingRequestHandler { if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}/nodes")) return nodes(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region")); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}/clusters")) return clusters(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region")); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}/logs")) return logs(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region"), request.propertyMap()); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{instance}/metrics")) return metrics(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region")); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}/global-rotation")) return rotationStatus(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region"), Optional.ofNullable(request.getProperty("endpointId"))); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}/global-rotation/override")) return getGlobalRotationOverride(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region")); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{instance}")) return deployment(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region"), request); @@ -647,6 +653,31 @@ public class ApplicationApiHandler extends LoggingRequestHandler { }; } + private HttpResponse metrics(String tenantName, String applicationName, String instanceName, String environment, String region) { + ApplicationId application = ApplicationId.from(tenantName, applicationName, instanceName); + ZoneId zone = ZoneId.from(environment, region); + DeploymentId deployment = new DeploymentId(application, zone); + List<ProtonMetrics> protonMetrics = controller.serviceRegistry().configServer().getProtonMetrics(deployment); + return buildResponseFromProtonMetrics(protonMetrics); + } + + private JsonResponse buildResponseFromProtonMetrics(List<ProtonMetrics> protonMetrics) { + try { + var jsonObject = new JSONObject(); + var jsonArray = new JSONArray(); + for (ProtonMetrics metrics : protonMetrics) { + jsonArray.put(metrics.toJson()); + } + jsonObject.put("metrics", jsonArray); + return new JsonResponse(200, jsonObject.toString()); + } catch (JSONException e) { + log.severe("Unable to build JsonResponse with Proton data"); + return new JsonResponse(500, ""); + } + } + + + private HttpResponse trigger(ApplicationId id, JobType type, HttpRequest request) { Inspector requestObject = toSlime(request.getData()).get(); boolean requireTests = ! requestObject.field("skipTests").asBool(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java index 251a5ce9acb..ec513e770fa 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java @@ -17,6 +17,7 @@ import com.yahoo.vespa.flags.json.FlagData; import com.yahoo.vespa.hosted.controller.api.application.v4.model.ClusterMetrics; import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeploymentData; import com.yahoo.vespa.hosted.controller.api.application.v4.model.EndpointStatus; +import com.yahoo.vespa.hosted.controller.api.application.v4.model.ProtonMetrics; import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbindings.ConfigChangeActions; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.identifiers.TenantId; @@ -33,6 +34,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.configserver.ServiceCon import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterCloud; import com.yahoo.vespa.hosted.controller.api.integration.noderepository.RestartFilter; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; +import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.SystemApplication; import com.yahoo.vespa.serviceview.bindings.ApplicationView; import com.yahoo.vespa.serviceview.bindings.ClusterView; @@ -57,6 +59,7 @@ import java.util.UUID; import java.util.logging.Level; import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.json.JSONObject; import static com.yahoo.config.provision.NodeResources.DiskSpeed.slow; import static com.yahoo.config.provision.NodeResources.StorageType.remote; @@ -81,6 +84,7 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer private final Map<DeploymentId, List<Log>> warnings = new HashMap<>(); private final Map<DeploymentId, Set<String>> rotationNames = new HashMap<>(); private final Map<DeploymentId, List<ClusterMetrics>> clusterMetrics = new HashMap<>(); + private List<ProtonMetrics> protonMetrics; private Version lastPrepareVersion = null; private RuntimeException prepareException = null; @@ -261,6 +265,10 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer this.clusterMetrics.put(deployment, clusterMetrics); } + public void setProtonMetrics(List<ProtonMetrics> protonMetrics) { + this.protonMetrics = protonMetrics; + } + public void deferLoadBalancerProvisioningIn(Set<Environment> environments) { deferLoadBalancerProvisioning.addAll(environments); } @@ -444,10 +452,15 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer } @Override - public List<ClusterMetrics> getMetrics(DeploymentId deployment) { + public List<ClusterMetrics> getDeploymentMetrics(DeploymentId deployment) { return Collections.unmodifiableList(clusterMetrics.getOrDefault(deployment, List.of())); } + @Override + public List<ProtonMetrics> getProtonMetrics(DeploymentId deployment) { + return this.protonMetrics; + } + // Returns a canned example response @Override public Map<?,?> getServiceApiResponse(DeploymentId deployment, String serviceName, String restPath) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java index 10682218353..57397cfd856 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java @@ -27,6 +27,7 @@ import com.yahoo.vespa.hosted.controller.LockedTenant; import com.yahoo.vespa.hosted.controller.RoutingController; import com.yahoo.vespa.hosted.controller.api.application.v4.EnvironmentResource; import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeployOptions; +import com.yahoo.vespa.hosted.controller.api.application.v4.model.ProtonMetrics; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.identifiers.Property; import com.yahoo.vespa.hosted.controller.api.identifiers.PropertyId; @@ -494,6 +495,13 @@ public class ApplicationApiTest extends ControllerContainerTest { .userIdentity(USER_ID), "INFO - All good"); + updateMetrics(); + + // GET metrics + tester.assertResponse(request("/application/v4/tenant/tenant2/application/application1/environment/dev/region/us-central-1/instance/default/metrics", GET) + .userIdentity(USER_ID), + new File("proton-metrics.json")); + // DELETE (cancel) ongoing change tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying", DELETE) .userIdentity(HOSTED_VESPA_OPERATOR), @@ -1584,6 +1592,25 @@ public class ApplicationApiTest extends ControllerContainerTest { "queue", Optional.empty())); } + private void updateMetrics() { + tester.serviceRegistry().configServerMock().setProtonMetrics(List.of( + (new ProtonMetrics("content/doc/")) + .addMetric(ProtonMetrics.DOCUMENTS_ACTIVE_COUNT, 11430) + .addMetric(ProtonMetrics.DOCUMENTS_READY_COUNT, 11430) + .addMetric(ProtonMetrics.DOCUMENTS_TOTAL_COUNT, 11430) + .addMetric(ProtonMetrics.DOCUMENT_DISK_USAGE, 44021) + .addMetric(ProtonMetrics.RESOURCE_DISK_USAGE_AVERAGE, 0.0168421) + .addMetric(ProtonMetrics.RESOURCE_MEMORY_USAGE_AVERAGE, 0.103482), + (new ProtonMetrics("content/music/")) + .addMetric(ProtonMetrics.DOCUMENTS_ACTIVE_COUNT, 32210) + .addMetric(ProtonMetrics.DOCUMENTS_READY_COUNT, 32000) + .addMetric(ProtonMetrics.DOCUMENTS_TOTAL_COUNT, 32210) + .addMetric(ProtonMetrics.DOCUMENT_DISK_USAGE, 90113) + .addMetric(ProtonMetrics.RESOURCE_DISK_USAGE_AVERAGE, 0.23912) + .addMetric(ProtonMetrics.RESOURCE_MEMORY_USAGE_AVERAGE, 0.00912) + )); + } + private void assertGlobalRouting(DeploymentId deployment, GlobalRouting.Status status, GlobalRouting.Agent agent) { var changedAt = tester.controller().clock().instant(); var westPolicies = tester.controller().routing().policies().get(deployment); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/proton-metrics.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/proton-metrics.json new file mode 100644 index 00000000000..a7e5b3918d8 --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/proton-metrics.json @@ -0,0 +1,23 @@ +{ + "metrics": [{ + "clusterId": "content/doc/", + "metrics": { + "resourceMemoryUsageAverage": 0.103482, + "documentsReadyCount": 11430, + "documentDiskUsage": 44021, + "resourceDiskUsageAverage": 0.0168421, + "documentsTotalCount": 11430, + "documentsActiveCount": 11430 + } + }, { + "clusterId": "content/music/", + "metrics": { + "resourceMemoryUsageAverage": 0.00912, + "documentsReadyCount": 32000, + "documentDiskUsage": 90113, + "resourceDiskUsageAverage": 0.23912, + "documentsTotalCount": 32210, + "documentsActiveCount": 32210 + } + }] +}
\ No newline at end of file |