diff options
Diffstat (limited to 'metrics')
25 files changed, 1355 insertions, 249 deletions
diff --git a/metrics/pom.xml b/metrics/pom.xml index e8303e5a01f..a3c045ec2f9 100644 --- a/metrics/pom.xml +++ b/metrics/pom.xml @@ -12,8 +12,14 @@ <packaging>jar</packaging> <version>8-SNAPSHOT</version> <name>metrics</name> + <dependencies> <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <scope>test</scope> + </dependency> + <dependency> <groupId>com.yahoo.vespa</groupId> <artifactId>annotations</artifactId> <version>${project.version}</version> @@ -25,7 +31,18 @@ <version>${project.version}</version> <scope>provided</scope> </dependency> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-api</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-engine</artifactId> + <scope>test</scope> + </dependency> </dependencies> + <build> <plugins> <plugin> @@ -86,11 +103,9 @@ <artifactId>maven-install-plugin</artifactId> </plugin> <plugin> - <!-- Remove when v2.1 is the default - - it is required by maven-project-info-reports-plugin v2.2 --> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-site-plugin</artifactId> - <version>2.1</version> + <version>${maven-site-plugin.vespa.version}</version> </plugin> </plugins> </build> diff --git a/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java index e515bbf9275..ac7ecfa124a 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java @@ -17,6 +17,7 @@ public enum ContainerMetrics implements VespaMetrics { JDISC_GC_COUNT("jdisc.gc.count", Unit.OPERATION, "Number of JVM garbage collections done"), JDISC_GC_MS("jdisc.gc.ms", Unit.MILLISECOND, "Time spent in JVM garbage collection"), JDISC_JVM("jdisc.jvm", Unit.VERSION, "JVM runtime version"), + CPU("cpu", Unit.THREAD, "Container service CPU pressure"), JDISC_MEMORY_MAPPINGS("jdisc.memory_mappings", Unit.OPERATION, "JDISC Memory mappings"), JDISC_OPEN_FILE_DESCRIPTORS("jdisc.open_file_descriptors", Unit.ITEM, "JDISC Open file descriptors"), @@ -28,7 +29,7 @@ public enum ContainerMetrics implements VespaMetrics { JDISC_THREAD_POOL_MAX_ALLOWED_SIZE("jdisc.thread_pool.max_allowed_size", Unit.THREAD, "The maximum allowed number of threads in the pool"), JDISC_THREAD_POOL_ACTIVE_THREADS("jdisc.thread_pool.active_threads", Unit.THREAD, "Number of threads that are active"), - JDISC_DEACTIVATED_CONTAINERS("jdisc.deactivated_containers.total", Unit.ITEM, "JDISC Deactivated container instances"), + JDISC_DEACTIVATED_CONTAINERS_TOTAL("jdisc.deactivated_containers.total", Unit.ITEM, "JDISC Deactivated container instances"), JDISC_DEACTIVATED_CONTAINERS_WITH_RETAINED_REFS("jdisc.deactivated_containers.with_retained_refs.last", Unit.ITEM, "JDISC Deactivated container nodes with retained refs"), JDISC_APPLICATION_FAILED_COMPONENT_GRAPHS("jdisc.application.failed_component_graphs", Unit.ITEM, "JDISC Application failed component graphs"), JDISC_APPLICATION_COMPONENT_GRAPH_CREATION_TIME_MILLIS("jdisc.application.component_graph.creation_time_millis", Unit.MILLISECOND, "JDISC Application component graph creation time"), @@ -45,7 +46,7 @@ public enum ContainerMetrics implements VespaMetrics { JDISC_HTTP_SSL_HANDSHAKE_FAILURE_MISSING_CLIENT_CERT("jdisc.http.ssl.handshake.failure.missing_client_cert", Unit.OPERATION, "JDISC HTTP SSL Handshake failures due to missing client certificate"), JDISC_HTTP_SSL_HANDSHAKE_FAILURE_EXPIRED_CLIENT_CERT("jdisc.http.ssl.handshake.failure.expired_client_cert", Unit.OPERATION, "JDISC HTTP SSL Handshake failures due to expired client certificate"), JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INVALID_CLIENT_CERT("jdisc.http.ssl.handshake.failure.invalid_client_cert", Unit.OPERATION, "JDISC HTTP SSL Handshake failures due to invalid client certificate"), - JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_PROTOCOLS("jdisc.http.ssl.handshake.failure.incompatible_protocols", Unit.OPERATION, "JDISC HTTP SSL Handshake failures due to inincompatible protocols"), + JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_PROTOCOLS("jdisc.http.ssl.handshake.failure.incompatible_protocols", Unit.OPERATION, "JDISC HTTP SSL Handshake failures due to incompatible protocols"), JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_CHIFERS("jdisc.http.ssl.handshake.failure.incompatible_chifers", Unit.OPERATION, "JDISC HTTP SSL Handshake failures due to incompatible chifers"), JDISC_HTTP_SSL_HANDSHAKE_FAILURE_CONNECTION_CLOSED("jdisc.http.ssl.handshake.failure.connection_closed", Unit.OPERATION, "JDISC HTTP SSL Handshake failures due to connection closed"), JDISC_HTTP_SSL_HANDSHAKE_FAILURE_UNKNOWN("jdisc.http.ssl.handshake.failure.unknown", Unit.OPERATION, "JDISC HTTP SSL Handshake failures for unknown reason"), @@ -117,7 +118,7 @@ public enum ContainerMetrics implements VespaMetrics { // SearchChain metrics - PEAK_QPS("peak_qps", Unit.QUERY_PER_SECOND, "The highest number of qps for a second for this metrics shapshot"), + PEAK_QPS("peak_qps", Unit.QUERY_PER_SECOND, "The highest number of qps for a second for this metrics snapshot"), SEARCH_CONNECTIONS("search_connections", Unit.CONNECTION, "Number of search connections"), FEED_OPERATIONS("feed.operations", Unit.OPERATION, "Number of document feed operations"), FEED_LATENCY("feed.latency", Unit.MILLISECOND, "Feed latency"), @@ -125,9 +126,9 @@ public enum ContainerMetrics implements VespaMetrics { QUERIES("queries", Unit.OPERATION, "Query volume"), QUERY_CONTAINER_LATENCY("query_container_latency", Unit.MILLISECOND, "The query execution time consumed in the container"), QUERY_LATENCY("query_latency", Unit.MILLISECOND, "The overall query latency as seen by the container"), - QUERY_TIMEOUT("query_timeout", Unit.MILLISECOND, "The amount of time allowed for query execytion, from the client"), + QUERY_TIMEOUT("query_timeout", Unit.MILLISECOND, "The amount of time allowed for query execution, from the client"), FAILED_QUERIES("failed_queries", Unit.OPERATION, "The number of failed queries"), - DEGRADED_QUERIES("degraded_queries", Unit.OPERATION, "The number of degraded queries, e.g. due to some conent nodes not responding in time"), + DEGRADED_QUERIES("degraded_queries", Unit.OPERATION, "The number of degraded queries, e.g. due to some content nodes not responding in time"), HITS_PER_QUERY("hits_per_query", Unit.HIT_PER_QUERY, "The number of hits returned"), QUERY_HIT_OFFSET("query_hit_offset", Unit.HIT, "The offset for hits returned"), DOCUMENTS_COVERED("documents_covered", Unit.DOCUMENT, "The combined number of documents considered during query evaluation"), @@ -169,7 +170,7 @@ public enum ContainerMetrics implements VespaMetrics { // Java (JRT) TLS metrics JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES("jrt.transport.tls-certificate-verification-failures", Unit.FAILURE, "TLS certificate verification failures"), JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES("jrt.transport.peer-authorization-failures", Unit.FAILURE, "TLS peer authorization failures"), - JRT_TRANSPORT_SERVER_TLS_CONNECIONTS_ESTABLISHED("jrt.transport.server.tls-connections-established", Unit.CONNECTION, "TLS server connections established"), + JRT_TRANSPORT_SERVER_TLS_CONNECTIONS_ESTABLISHED("jrt.transport.server.tls-connections-established", Unit.CONNECTION, "TLS server connections established"), JRT_TRANSPORT_CLIENT_TLS_CONNECTIONS_ESTABLISHED("jrt.transport.client.tls-connections-established", Unit.CONNECTION, "TLS client connections established"), JRT_TRANSPORT_SERVER_UNENCRYPTED_CONNECTIONS_ESTABLISHED("jrt.transport.server.unencrypted-connections-established", Unit.CONNECTION, "Unencrypted server connections established"), JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED("jrt.transport.client.unencrypted-connections-established", Unit.CONNECTION, "Unencrypted client connections established"), diff --git a/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java index 4770fe51830..0f200308862 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java @@ -37,6 +37,7 @@ public enum ControllerMetrics implements VespaMetrics { DNS_QUEUED_REQUESTS("dns.queuedRequests", Unit.REQUEST, "Queued DNS requests"), ZMS_QUOTA_USAGE("zms.quota.usage", Unit.FRACTION, "ZMS Quota usage per resource type"), COREDUMP_PROCESSED("coredump.processed", Unit.FAILURE,"Controller: Core dumps processed"), + AUTH0_EXCEPTIONS("auth0.exceptions", Unit.FAILURE, "Controller: Auth0 exceptions"), // Metrics per API, metrics names generated in ControllerMaintainer/MetricsReporter OPERATION_APPLICATION("operation.application", Unit.REQUEST, "Controller: Requests for /application API"), diff --git a/metrics/src/main/java/ai/vespa/metrics/HostedNodeAdminMetrics.java b/metrics/src/main/java/ai/vespa/metrics/HostedNodeAdminMetrics.java index 97185e9c703..a5f21eeba44 100644 --- a/metrics/src/main/java/ai/vespa/metrics/HostedNodeAdminMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/HostedNodeAdminMetrics.java @@ -21,6 +21,10 @@ public enum HostedNodeAdminMetrics implements VespaMetrics { MEM_UTIL("mem.util", Unit.PERCENTAGE, "Memory utilisation"), MEM_TOTAL_USED("mem_total.used", Unit.BYTE, "Total amount of memory used by the node, including OS buffer caches"), MEM_TOTAL_UTIL("mem_total.util", Unit.PERCENTAGE, "Total memory utilisation"), + MEM_SOCK("mem.sock", Unit.BYTE, "Amount of memory used in network transmission buffers"), + MEM_SLAB_RECLAIMABLE("mem.slab_reclaimable", Unit.BYTE, "Amount of 'slab' that might be reclaimed"), + MEM_SLAB("mem.slab", Unit.BYTE, "Amount of memory used for storing in-kernel data structures"), + MEM_ANON("mem.anon", Unit.BYTE, "Amount of memory used in anonymous mappings"), GPU_UTIL("gpu.util", Unit.PERCENTAGE, "GPU utilisation"), GPU_MEM_USED("gpu.memory.used", Unit.BYTE, "GPU memory used"), GPU_MEM_TOTAL("gpu.memory.total", Unit.BYTE, "GPU memory available"), diff --git a/metrics/src/main/java/ai/vespa/metrics/VespaMetrics.java b/metrics/src/main/java/ai/vespa/metrics/VespaMetrics.java index 3a17d8a3155..9a498abc911 100644 --- a/metrics/src/main/java/ai/vespa/metrics/VespaMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/VespaMetrics.java @@ -17,6 +17,8 @@ public interface VespaMetrics { return baseName() + "." + suffix.suffix(); } + // TODO: make the below methods return Metric objects instead of Strings. + default String ninety_five_percentile() { return withSuffix(Suffix.ninety_five_percentile); } diff --git a/metrics/src/main/java/ai/vespa/metrics/set/BasicMetricSets.java b/metrics/src/main/java/ai/vespa/metrics/set/BasicMetricSets.java new file mode 100644 index 00000000000..f167e654e6f --- /dev/null +++ b/metrics/src/main/java/ai/vespa/metrics/set/BasicMetricSets.java @@ -0,0 +1,23 @@ +package ai.vespa.metrics.set; + +import ai.vespa.metrics.ContainerMetrics; + +/** + * Defines metric sets that are meant to be used as building blocks for other metric sets. + * + * @author gjoranv + */ +public class BasicMetricSets { + + static MetricSet containerHttpStatusMetrics() { + return new MetricSet.Builder("basic-container-http-status") + .metric(ContainerMetrics.HTTP_STATUS_1XX.rate()) + + .metric(ContainerMetrics.HTTP_STATUS_2XX.rate()) + .metric(ContainerMetrics.HTTP_STATUS_3XX.rate()) + .metric(ContainerMetrics.HTTP_STATUS_4XX.rate()) + .metric(ContainerMetrics.HTTP_STATUS_5XX.rate()) + .build(); + } + +} diff --git a/metrics/src/main/java/ai/vespa/metrics/set/DefaultMetrics.java b/metrics/src/main/java/ai/vespa/metrics/set/DefaultMetrics.java index 9e23a7625cb..d75f94e7c7e 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/DefaultMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/DefaultMetrics.java @@ -1,21 +1,19 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// TODO: Keep the set of metrics in this set stable until Vespa 9. +// TODO: Vespa 9: Let this class be replaced by Vespa9DefaultMetricSet. package ai.vespa.metrics.set; +import ai.vespa.metrics.ClusterControllerMetrics; import ai.vespa.metrics.ContainerMetrics; -import ai.vespa.metrics.SearchNodeMetrics; -import ai.vespa.metrics.StorageMetrics; import ai.vespa.metrics.DistributorMetrics; -import ai.vespa.metrics.ClusterControllerMetrics; -import ai.vespa.metrics.SentinelMetrics; import ai.vespa.metrics.NodeAdminMetrics; -import ai.vespa.metrics.Suffix; -import ai.vespa.metrics.VespaMetrics; +import ai.vespa.metrics.SearchNodeMetrics; +import ai.vespa.metrics.SentinelMetrics; +import ai.vespa.metrics.StorageMetrics; -import java.util.Collections; import java.util.EnumSet; -import java.util.LinkedHashSet; -import java.util.Set; +import java.util.List; import static ai.vespa.metrics.Suffix.average; import static ai.vespa.metrics.Suffix.count; @@ -41,135 +39,141 @@ public class DefaultMetrics { private static MetricSet createMetricSet() { return new MetricSet(defaultMetricSetId, - getAllMetrics(), - Set.of(defaultVespaMetricSet)); + List.of(), + List.of(defaultVespaMetricSet, + getContainerMetrics(), + getSearchChainMetrics(), + getDocprocMetrics(), + getSearchNodeMetrics(), + getContentMetrics(), + getStorageMetrics(), + getDistributorMetrics(), + getClusterControllerMetrics(), + getSentinelMetrics(), + getOtherMetrics())); } - private static Set<Metric> getAllMetrics() { - Set<Metric> metrics = new LinkedHashSet<>(); - - addContainerMetrics(metrics); - addSearchChainMetrics(metrics); - addDocprocMetrics(metrics); - addSearchNodeMetrics(metrics); - addContentMetrics(metrics); - addStorageMetrics(metrics); - addDistributorMetrics(metrics); - addClusterControllerMetrics(metrics); - addOtherMetrics(metrics); - return Collections.unmodifiableSet(metrics); + private static MetricSet getContainerMetrics() { + return new MetricSet.Builder("default-container") + .metric(ContainerMetrics.HTTP_STATUS_1XX.rate()) + .metric(ContainerMetrics.HTTP_STATUS_2XX.rate()) + .metric(ContainerMetrics.HTTP_STATUS_3XX.rate()) + .metric(ContainerMetrics.HTTP_STATUS_4XX.rate()) + .metric(ContainerMetrics.HTTP_STATUS_5XX.rate()) + .metric(ContainerMetrics.JDISC_GC_MS, EnumSet.of(max, average)) + .metric(ContainerMetrics.MEM_HEAP_FREE.average()) + .metric(ContainerMetrics.FEED_LATENCY, EnumSet.of(sum, count)) + // .metric(ContainerMetrics.CPU.baseName()) // TODO: Add to container metrics + .metric(ContainerMetrics.JDISC_THREAD_POOL_SIZE.max()) + .metric(ContainerMetrics.JDISC_THREAD_POOL_ACTIVE_THREADS, EnumSet.of(sum, count, min, max)) + .metric(ContainerMetrics.JDISC_THREAD_POOL_WORK_QUEUE_CAPACITY.max()) + .metric(ContainerMetrics.JDISC_THREAD_POOL_WORK_QUEUE_SIZE, EnumSet.of(sum, count, min, max)) + .metric(ContainerMetrics.SERVER_ACTIVE_THREADS.average()) + + // Metrics needed for alerting + .metric(ContainerMetrics.JDISC_SINGLETON_IS_ACTIVE, EnumSet.of(max, last)) // TODO: Vespa 9: Remove last + .metric(ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_MISSING_CLIENT_CERT.rate()) + .metric(ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_PROTOCOLS.rate()) + .metric(ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_CHIFERS.rate()) + .metric(ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_UNKNOWN.rate()) + .metric(ContainerMetrics.JDISC_APPLICATION_FAILED_COMPONENT_GRAPHS.rate()) + .metric(ContainerMetrics.ATHENZ_TENANT_CERT_EXPIRY_SECONDS, EnumSet.of(min, max, last)) // TODO: Vespa 9: Remove max, last + .build(); } - private static void addContainerMetrics(Set<Metric> metrics) { - addMetric(metrics, ContainerMetrics.HTTP_STATUS_1XX.rate()); - addMetric(metrics, ContainerMetrics.HTTP_STATUS_2XX.rate()); - addMetric(metrics, ContainerMetrics.HTTP_STATUS_3XX.rate()); - addMetric(metrics, ContainerMetrics.HTTP_STATUS_4XX.rate()); - addMetric(metrics, ContainerMetrics.HTTP_STATUS_5XX.rate()); - addMetric(metrics, ContainerMetrics.JDISC_GC_MS.average()); - addMetric(metrics, ContainerMetrics.MEM_HEAP_FREE.average()); - addMetric(metrics, ContainerMetrics.FEED_LATENCY, EnumSet.of(sum, count)); - addMetric(metrics, ContainerMetrics.JDISC_GC_MS.max()); - // addMetric(metrics, ContainerMetrics.CPU.baseName()); // TODO: Add to container metrics - addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_SIZE.max()); - addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_ACTIVE_THREADS, EnumSet.of(sum, count, min, max)); - addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_WORK_QUEUE_CAPACITY.max()); - addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_WORK_QUEUE_SIZE, EnumSet.of(sum, count, min, max)); - addMetric(metrics, ContainerMetrics.SERVER_ACTIVE_THREADS.average()); - - // Metrics needed for alerting - addMetric(metrics, ContainerMetrics.JDISC_SINGLETON_IS_ACTIVE.last()); - addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_MISSING_CLIENT_CERT.rate()); - addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_PROTOCOLS.rate()); - addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_CHIFERS.rate()); - addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_UNKNOWN.rate()); - addMetric(metrics, ContainerMetrics.JDISC_APPLICATION_FAILED_COMPONENT_GRAPHS.rate()); - addMetric(metrics, ContainerMetrics.ATHENZ_TENANT_CERT_EXPIRY_SECONDS.last()); - } - - private static void addSearchChainMetrics(Set<Metric> metrics) { - addMetric(metrics, ContainerMetrics.QUERIES.rate()); - addMetric(metrics, ContainerMetrics.QUERY_LATENCY, EnumSet.of(sum, count, max, ninety_five_percentile, ninety_nine_percentile, average)); // TODO: Remove average with Vespa 9 - addMetric(metrics, ContainerMetrics.HITS_PER_QUERY, EnumSet.of(sum, count, max, average)); // TODO: Remove average with Vespa 9 - addMetric(metrics, ContainerMetrics.TOTAL_HITS_PER_QUERY, EnumSet.of(sum, count, max, average)); // TODO: Remove average with Vespa 9 - addMetric(metrics, ContainerMetrics.DEGRADED_QUERIES.rate()); - addMetric(metrics, ContainerMetrics.FAILED_QUERIES.rate()); + private static MetricSet getSearchChainMetrics() { + return new MetricSet.Builder("default-search-chain") + .metric(ContainerMetrics.QUERIES.rate()) + .metric(ContainerMetrics.QUERY_LATENCY, EnumSet.of(sum, count, max, ninety_five_percentile, ninety_nine_percentile, average)) // TODO: Remove average with Vespa 9 + .metric(ContainerMetrics.HITS_PER_QUERY, EnumSet.of(sum, count, max, average)) // TODO: Remove average with Vespa 9 + .metric(ContainerMetrics.TOTAL_HITS_PER_QUERY, EnumSet.of(sum, count, max, average)) // TODO: Remove average with Vespa 9 + .metric(ContainerMetrics.DEGRADED_QUERIES.rate()) + .metric(ContainerMetrics.FAILED_QUERIES.rate()) + .build(); } - private static void addDocprocMetrics(Set<Metric> metrics) { - addMetric(metrics, ContainerMetrics.DOCPROC_DOCUMENTS.sum()); + private static MetricSet getDocprocMetrics() { + return new MetricSet.Builder("default-docproc") + .metric(ContainerMetrics.DOCPROC_DOCUMENTS.sum()) + .build(); } - private static void addSearchNodeMetrics(Set<Metric> metrics) { + private static MetricSet getSearchNodeMetrics() { // Metrics needed for alerting - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK.average()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_FEEDING_BLOCKED, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + return new MetricSet.Builder("default-search-node") + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK.average()) + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average()) + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_FEEDING_BLOCKED, EnumSet.of(max, last)) // TODO: Vespa 9: Remove last + .build(); } - private static void addContentMetrics(Set<Metric> metrics) { - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_DOCSUM_REQUESTED_DOCUMENTS.rate()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_DOCSUM_LATENCY, EnumSet.of(sum, count, max, average)); // TODO: Remove average with Vespa 9 - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_QUERY_LATENCY, EnumSet.of(sum, count, max, average)); // TODO: Remove average with Vespa 9 - - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_TOTAL.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_READY.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_ACTIVE.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DISK_USAGE.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MEMORY_USAGE_ALLOCATED_BYTES.last()); - - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK.average()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average()); - - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_DOCS_MATCHED.rate()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_DOCS_RERANKED.rate()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_QUERY_SETUP_TIME, EnumSet.of(sum, count, max, average)); // TODO: Remove average with Vespa 9 - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_QUERY_LATENCY, EnumSet.of(sum, count, max, average)); // TODO: Remove average with Vespa 9 - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_RERANK_TIME, EnumSet.of(sum, count, max, average)); // TODO: Remove average with Vespa 9 - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_TRANSACTIONLOG_DISK_USAGE.last()); + private static MetricSet getContentMetrics() { + return new MetricSet.Builder("default-content") + .metric(SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_DOCSUM_REQUESTED_DOCUMENTS.rate()) + .metric(SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_DOCSUM_LATENCY, EnumSet.of(sum, count, max, average)) // TODO: Remove average with Vespa 9 + .metric(SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_QUERY_LATENCY, EnumSet.of(sum, count, max, average)) // TODO: Remove average with Vespa 9 + + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_TOTAL, EnumSet.of(max,last)) // TODO: Vespa 9: Remove last + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_READY, EnumSet.of(max,last)) // TODO: Vespa 9: Remove last + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_ACTIVE, EnumSet.of(max,last)) // TODO: Vespa 9: Remove last + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DISK_USAGE.last()) + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MEMORY_USAGE_ALLOCATED_BYTES.last()) + + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK.average()) + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average()) + + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_DOCS_MATCHED.rate()) + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_DOCS_RERANKED.rate()) + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_QUERY_SETUP_TIME, EnumSet.of(sum, count, max, average)) // TODO: Remove average with Vespa 9 + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_QUERY_LATENCY, EnumSet.of(sum, count, max, average)) // TODO: Remove average with Vespa 9 + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_RERANK_TIME, EnumSet.of(sum, count, max, average)) // TODO: Remove average with Vespa 9 + .metric(SearchNodeMetrics.CONTENT_PROTON_TRANSACTIONLOG_DISK_USAGE.last()) + .build(); } - private static void addStorageMetrics(Set<Metric> metrics) { - addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_COUNT.rate()); - addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_COUNT.rate()); - addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_COUNT.rate()); + private static MetricSet getStorageMetrics() { + return new MetricSet.Builder("default-storage") + .metric(StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_COUNT.rate()) + .metric(StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_COUNT.rate()) + .metric(StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_COUNT.rate()) + .build(); } - private static void addDistributorMetrics(Set<Metric> metrics) { - addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_DOCSSTORED.average()); + private static MetricSet getDistributorMetrics() { + return new MetricSet.Builder("default-distributor") + .metric(DistributorMetrics.VDS_DISTRIBUTOR_DOCSSTORED.average()) - // Metrics needed for alerting - addMetric(metrics, DistributorMetrics.VDS_BOUNCER_CLOCK_SKEW_ABORTS.count()); + // Metrics needed for alerting + .metric(DistributorMetrics.VDS_BOUNCER_CLOCK_SKEW_ABORTS.count()) + .build(); } - private static void addClusterControllerMetrics(Set<Metric> metrics) { + private static MetricSet getClusterControllerMetrics() { // Metrics needed for alerting - addMetric(metrics, ClusterControllerMetrics.DOWN_COUNT.last()); - addMetric(metrics, ClusterControllerMetrics.MAINTENANCE_COUNT.last()); - addMetric(metrics, ClusterControllerMetrics.UP_COUNT.last()); - addMetric(metrics, ClusterControllerMetrics.IS_MASTER.last()); - addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_NODES_ABOVE_LIMIT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MAX_MEMORY_UTILIZATION, EnumSet.of(last, max)); // TODO: Vespa 9: Remove last - addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MAX_DISK_UTILIZATION, EnumSet.of(last, max)); // TODO: Vespa 9: Remove last + return new MetricSet.Builder("default-cluster-controller") + .metric(ClusterControllerMetrics.DOWN_COUNT, EnumSet.of(max, last)) // TODO: Vespa 9: Remove last + .metric(ClusterControllerMetrics.MAINTENANCE_COUNT, EnumSet.of(max, last)) // TODO: Vespa 9: Remove last + .metric(ClusterControllerMetrics.UP_COUNT, EnumSet.of(max, last)) // TODO: Vespa 9: Remove last + .metric(ClusterControllerMetrics.IS_MASTER, EnumSet.of(max, last)) // TODO: Vespa 9: Remove last + .metric(ClusterControllerMetrics.RESOURCE_USAGE_NODES_ABOVE_LIMIT, EnumSet.of(max, last)) // TODO: Vespa 9: Remove last + .metric(ClusterControllerMetrics.RESOURCE_USAGE_MAX_MEMORY_UTILIZATION, EnumSet.of(last, max)) // TODO: Vespa 9: Remove last + .metric(ClusterControllerMetrics.RESOURCE_USAGE_MAX_DISK_UTILIZATION, EnumSet.of(last, max)) // TODO: Vespa 9: Remove last + .build(); } - private static void addSentinelMetrics(Set<Metric> metrics) { + private static MetricSet getSentinelMetrics() { // Metrics needed for alerting - addMetric(metrics, SentinelMetrics.SENTINEL_TOTAL_RESTARTS.last()); + return new MetricSet.Builder("default-sentinel") + .metric(SentinelMetrics.SENTINEL_TOTAL_RESTARTS, EnumSet.of(max, sum, last)) // TODO: Vespa 9: Remove last, sum? + .build(); } - private static void addOtherMetrics(Set<Metric> metrics) { + private static MetricSet getOtherMetrics() { // Metrics needed for alerting - addMetric(metrics, NodeAdminMetrics.ENDPOINT_CERTIFICATE_EXPIRY_SECONDS.baseName()); - addMetric(metrics, NodeAdminMetrics.NODE_CERTIFICATE_EXPIRY_SECONDS.baseName()); - } - - private static void addMetric(Set<Metric> metrics, String nameWithSuffix) { - metrics.add(new Metric(nameWithSuffix)); - } - - private static void addMetric(Set<Metric> metrics, VespaMetrics metric, EnumSet<Suffix> suffixes) { - suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + return new MetricSet.Builder("default-other") + .metric(NodeAdminMetrics.ENDPOINT_CERTIFICATE_EXPIRY_SECONDS.baseName()) + .metric(NodeAdminMetrics.NODE_CERTIFICATE_EXPIRY_SECONDS.baseName()) + .build(); } private DefaultMetrics() { } diff --git a/metrics/src/main/java/ai/vespa/metrics/set/DefaultVespaMetrics.java b/metrics/src/main/java/ai/vespa/metrics/set/DefaultVespaMetrics.java index 93b6bfab002..e34c8ee68eb 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/DefaultVespaMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/DefaultVespaMetrics.java @@ -4,9 +4,6 @@ package ai.vespa.metrics.set; import ai.vespa.metrics.ContainerMetrics; import ai.vespa.metrics.SearchNodeMetrics; -import java.util.LinkedHashSet; -import java.util.Set; - /** * Encapsulates a minimal set of Vespa metrics to be used as default for all metrics consumers. * @@ -19,11 +16,11 @@ public class DefaultVespaMetrics { public static final MetricSet defaultVespaMetricSet = createDefaultVespaMetricSet(); private static MetricSet createDefaultVespaMetricSet() { - Set<Metric> metrics = new LinkedHashSet<>(); - - metrics.add(new Metric(ContainerMetrics.FEED_OPERATIONS.rate())); - metrics.add(new Metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_FEEDING_BLOCKED.last())); - return new MetricSet("default-vespa", metrics); + return new MetricSet.Builder("default-vespa") + .metric(ContainerMetrics.FEED_OPERATIONS.rate()) + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_FEEDING_BLOCKED.last()) + .build(); } + } diff --git a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java index 571d292b54d..b3f27fa6117 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java @@ -17,6 +17,7 @@ import static ai.vespa.metrics.Suffix.average; import static ai.vespa.metrics.Suffix.count; import static ai.vespa.metrics.Suffix.last; import static ai.vespa.metrics.Suffix.max; +import static ai.vespa.metrics.Suffix.min; import static ai.vespa.metrics.Suffix.sum; /** @@ -51,72 +52,73 @@ public class InfrastructureMetricSet { addMetric(metrics, ConfigServerMetrics.DELAYED_RESPONSES.count()); addMetric(metrics, ConfigServerMetrics.SESSION_CHANGE_ERRORS.count()); - addMetric(metrics, ConfigServerMetrics.ZK_Z_NODES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. - addMetric(metrics, ConfigServerMetrics.ZK_AVG_LATENCY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. - addMetric(metrics, ConfigServerMetrics.ZK_MAX_LATENCY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. - addMetric(metrics, ConfigServerMetrics.ZK_CONNECTIONS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ConfigServerMetrics.ZK_Z_NODES.max()); + addMetric(metrics, ConfigServerMetrics.ZK_MAX_LATENCY, EnumSet.of(max, average)); + addMetric(metrics, ConfigServerMetrics.ZK_CONNECTIONS.max()); addMetric(metrics, ConfigServerMetrics.ZK_CONNECTION_LOST.count()); addMetric(metrics, ConfigServerMetrics.ZK_RECONNECTED.count()); addMetric(metrics, ConfigServerMetrics.ZK_SUSPENDED.count()); - addMetric(metrics, ConfigServerMetrics.ZK_OUTSTANDING_REQUESTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ConfigServerMetrics.ZK_OUTSTANDING_REQUESTS.max()); // Node repository metrics - addMetric(metrics, ConfigServerMetrics.NODES_NON_ACTIVE_FRACTION.last()); - addMetric(metrics, ConfigServerMetrics.CLUSTER_COST.last()); - addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.last()); - addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.last()); - addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.last()); + addMetric(metrics, ConfigServerMetrics.NODES_ACTIVE.max()); + addMetric(metrics, ConfigServerMetrics.NODES_NON_ACTIVE_FRACTION.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_COST.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.max()); addMetric(metrics, ConfigServerMetrics.WANT_TO_REBOOT.max()); addMetric(metrics, ConfigServerMetrics.WANT_TO_RESTART.max()); addMetric(metrics, ConfigServerMetrics.WANT_TO_RETIRE.max()); addMetric(metrics, ConfigServerMetrics.RETIRED.max()); addMetric(metrics, ConfigServerMetrics.WANT_TO_CHANGE_VESPA_VERSION.max()); - addMetric(metrics, ConfigServerMetrics.HAS_WIRE_GUARD_KEY.last()); + addMetric(metrics, ConfigServerMetrics.HAS_WIRE_GUARD_KEY.max()); addMetric(metrics, ConfigServerMetrics.WANT_TO_DEPROVISION.max()); - addMetric(metrics, ConfigServerMetrics.SUSPENDED, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.SUSPENDED.max()); addMetric(metrics, ConfigServerMetrics.SOME_SERVICES_DOWN.max()); - addMetric(metrics, ConfigServerMetrics.NODE_FAILER_BAD_NODE.last()); + addMetric(metrics, ConfigServerMetrics.NODE_FAILER_BAD_NODE.max()); addMetric(metrics, ConfigServerMetrics.LOCK_ATTEMPT_LOCKED_LOAD, EnumSet.of(max,average)); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_CPU, EnumSet.of(average, last)); // TODO: Vespa 9: Remove last? - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_MEM, EnumSet.of(average, last)); // TODO: Vespa 9: Remove last? - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_DISK, EnumSet.of(average, last)); // TODO: Vespa 9: Remove last? + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_CPU.max()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_MEM.max()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_DISK.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_CPU.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_MEM.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_DISK.max()); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_CPU, EnumSet.of(max,average)); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_DISK, EnumSet.of(max,average)); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_MEM, EnumSet.of(max,average)); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_SKEW.last()); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PENDING_REDEPLOYMENTS.last()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_CPU.max()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_DISK.max()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_MEM.max()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_SKEW.max()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PENDING_REDEPLOYMENTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_ACTIVE_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DIRTY_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_FAILED_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_INACTIVE_HOSTS.max()); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PROVISIONED_HOSTS.last()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PROVISIONED_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_READY_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_RESERVED_HOSTS.max()); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_HOSTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_ACTIVE_NODES.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_FAILED_NODES.max()); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_NODES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_NODES.max()); addMetric(metrics, ConfigServerMetrics.RPC_SERVER_WORK_QUEUE_SIZE.average()); - addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_ACTIVATE_MILLIS.last()); - addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_PREPARE_MILLIS.last()); + addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_ACTIVATE_MILLIS.max()); + addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_PREPARE_MILLIS.max()); addMetric(metrics, ConfigServerMetrics.LOCK_ATTEMPT_LOCKED_LOAD, EnumSet.of(max, average)); - addMetric(metrics, ConfigServerMetrics.MAINTENANCE_SUCCESS_FACTOR_DEVIATION.last()); + addMetric(metrics, ConfigServerMetrics.MAINTENANCE_SUCCESS_FACTOR_DEVIATION.max()); addMetric(metrics, ConfigServerMetrics.MAINTENANCE_DEPLOYMENT_FAILURE.count()); addMetric(metrics, ConfigServerMetrics.MAINTENANCE_DEPLOYMENT_TRANSIENT_FAILURE.count()); addMetric(metrics, ConfigServerMetrics.OVERCOMMITTED_HOSTS.max()); - addMetric(metrics, ConfigServerMetrics.SPARE_HOST_CAPACITY.last()); - addMetric(metrics, ConfigServerMetrics.THROTTLED_NODE_FAILURES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ConfigServerMetrics.NODE_FAIL_THROTTLING.last()); + addMetric(metrics, ConfigServerMetrics.SPARE_HOST_CAPACITY, EnumSet.of(min, max, last)); // TODO: Vespa 9: Remove last. WAIT + addMetric(metrics, ConfigServerMetrics.THROTTLED_HOST_FAILURES.max()); + addMetric(metrics, ConfigServerMetrics.THROTTLED_NODE_FAILURES.max()); + addMetric(metrics, ConfigServerMetrics.NODE_FAIL_THROTTLING.max()); addMetric(metrics, ConfigServerMetrics.ORCHESTRATOR_LOCK_ACQUIRE_SUCCESS.count()); addMetric(metrics, ConfigServerMetrics.ORCHESTRATOR_LOCK_ACQUIRE_TIMEOUT.count()); - addMetric(metrics, ConfigServerMetrics.ZONE_WORKING.last()); + addMetric(metrics, ConfigServerMetrics.ZONE_WORKING.max()); addMetric(metrics, ConfigServerMetrics.THROTTLED_HOST_PROVISIONING.max()); // Container metrics that should be stored for the config-server @@ -125,11 +127,11 @@ public class InfrastructureMetricSet { addMetric(metrics, ContainerMetrics.HTTP_STATUS_2XX.count()); addMetric(metrics, ContainerMetrics.HTTP_STATUS_4XX.count()); addMetric(metrics, ContainerMetrics.HTTP_STATUS_5XX.count()); - addMetric(metrics, ContainerMetrics.JDISC_GC_MS.last()); + addMetric(metrics, ContainerMetrics.JDISC_GC_MS.max()); addMetric(metrics, ContainerMetrics.MEM_HEAP_USED.average()); addMetric(metrics, ContainerMetrics.SERVER_NUM_REQUESTS.count()); - addMetric(metrics, ContainerMetrics.SERVER_STARTED_MILLIS.last()); - addMetric(metrics, ContainerMetrics.SERVER_TOTAL_SUCCESSFUL_RESPONSE_LATENCY.last()); + addMetric(metrics, ContainerMetrics.SERVER_STARTED_MILLIS.max()); + addMetric(metrics, ContainerMetrics.SERVER_TOTAL_SUCCESSFUL_RESPONSE_LATENCY.max()); return metrics; } @@ -138,40 +140,42 @@ public class InfrastructureMetricSet { Set<Metric> metrics = new LinkedHashSet<>(); addMetric(metrics, ControllerMetrics.ATHENZ_REQUEST_ERROR.count()); - addMetric(metrics, ControllerMetrics.ARCHIVE_BUCKET_COUNT.last()); - addMetric(metrics, ControllerMetrics.BILLING_TENANTS.last()); + addMetric(metrics, ControllerMetrics.ARCHIVE_BUCKET_COUNT.max()); + addMetric(metrics, ControllerMetrics.BILLING_TENANTS.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_ABORT.count()); - addMetric(metrics, ControllerMetrics.DEPLOYMENT_AVERAGE_DURATION, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ControllerMetrics.DEPLOYMENT_AVERAGE_DURATION.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_CONVERGENCE_FAILURE.count()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_DEPLOYMENT_FAILURE.count()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_ERROR.count()); - addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILING_UPGRADES.last()); - addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILURE_PERCENTAGE.last()); + addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILING_UPGRADES.min()); + addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILURE_PERCENTAGE.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_NODE_COUNT_BY_OS_VERSION.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_OS_CHANGE_DURATION.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_START.count()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_SUCCESS.count()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_TEST_FAILURE.count()); - addMetric(metrics, ControllerMetrics.DEPLOYMENT_WARNINGS.last()); + addMetric(metrics, ControllerMetrics.DEPLOYMENT_WARNINGS.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_ENDPOINT_CERTIFICATE_TIMEOUT.count()); - addMetric(metrics, ControllerMetrics.DEPLOYMENT_BROKEN_SYSTEM_VERSION.last()); - - addMetric(metrics, ControllerMetrics.OPERATION_APPLICATION.last()); - addMetric(metrics, ControllerMetrics.OPERATION_CHANGEMANAGEMENT.last()); - addMetric(metrics, ControllerMetrics.OPERATION_CONFIGSERVER.last()); - addMetric(metrics, ControllerMetrics.OPERATION_CONTROLLER.last()); - addMetric(metrics, ControllerMetrics.OPERATION_FLAGS.last()); - addMetric(metrics, ControllerMetrics.OPERATION_OS.last()); - addMetric(metrics, ControllerMetrics.OPERATION_ROUTING.last()); - addMetric(metrics, ControllerMetrics.OPERATION_ZONE.last()); - - addMetric(metrics, ControllerMetrics.REMAINING_ROTATIONS.last()); - addMetric(metrics, ControllerMetrics.DNS_QUEUED_REQUESTS.last()); - addMetric(metrics, ControllerMetrics.ZMS_QUOTA_USAGE.last()); + addMetric(metrics, ControllerMetrics.DEPLOYMENT_BROKEN_SYSTEM_VERSION.max()); + + addMetric(metrics, ControllerMetrics.OPERATION_APPLICATION.max()); + addMetric(metrics, ControllerMetrics.OPERATION_CHANGEMANAGEMENT.max()); + addMetric(metrics, ControllerMetrics.OPERATION_CONFIGSERVER.max()); + addMetric(metrics, ControllerMetrics.OPERATION_CONTROLLER.max()); + addMetric(metrics, ControllerMetrics.OPERATION_FLAGS.max()); + addMetric(metrics, ControllerMetrics.OPERATION_OS.max()); + addMetric(metrics, ControllerMetrics.OPERATION_ROUTING.max()); + addMetric(metrics, ControllerMetrics.OPERATION_ZONE.max()); + + addMetric(metrics, ControllerMetrics.REMAINING_ROTATIONS, EnumSet.of(min, max, last)); // TODO: Vespa 9: Remove last WAIT + addMetric(metrics, ControllerMetrics.DNS_QUEUED_REQUESTS.max()); + addMetric(metrics, ControllerMetrics.ZMS_QUOTA_USAGE.max()); addMetric(metrics, ControllerMetrics.COREDUMP_PROCESSED.count()); + addMetric(metrics, ControllerMetrics.AUTH0_EXCEPTIONS.count()); - addMetric(metrics, ControllerMetrics.METERING_AGE_SECONDS.last()); + addMetric(metrics, ControllerMetrics.METERING_AGE_SECONDS.min()); + addMetric(metrics, ControllerMetrics.METERING_LAST_REPORTED.max()); return metrics; } diff --git a/metrics/src/main/java/ai/vespa/metrics/set/MetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/MetricSet.java index b8409fb7663..f334690a7ca 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/MetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/MetricSet.java @@ -1,7 +1,11 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package ai.vespa.metrics.set; +import ai.vespa.metrics.Suffix; +import ai.vespa.metrics.VespaMetrics; + import java.util.Collection; +import java.util.EnumSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.Map; @@ -88,4 +92,44 @@ public class MetricSet { return metricMap; } + + public static class Builder { + private final String id; + private final Set<Metric> metrics = new LinkedHashSet<>(); + private final Set<MetricSet> children = new LinkedHashSet<>(); + + public Builder(String id) { + this.id = id; + } + + public Builder metric(String metric) { + return metric(new Metric(metric)); + } + + /** Adds all given suffixes of the given metric to this set. */ + public Builder metric(VespaMetrics metric, EnumSet<Suffix> suffixes) { + suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + return this; + } + + public Builder metric(Metric metric) { + metrics.add(metric); + return this; + } + + public Builder metrics(Collection<Metric> metrics) { + this.metrics.addAll(metrics); + return this; + } + + public Builder metricSet(MetricSet child) { + children.add(child); + return this; + } + + public MetricSet build() { + return new MetricSet(id, metrics, children); + } + } + } diff --git a/metrics/src/main/java/ai/vespa/metrics/set/SystemMetrics.java b/metrics/src/main/java/ai/vespa/metrics/set/SystemMetrics.java index 0560daebc43..a86deb3830b 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/SystemMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/SystemMetrics.java @@ -30,6 +30,10 @@ public class SystemMetrics { new Metric(HostedNodeAdminMetrics.MEM_UTIL.baseName()), new Metric(HostedNodeAdminMetrics.MEM_TOTAL_USED.baseName()), new Metric(HostedNodeAdminMetrics.MEM_TOTAL_UTIL.baseName()), + new Metric(HostedNodeAdminMetrics.MEM_SOCK.baseName()), + new Metric(HostedNodeAdminMetrics.MEM_SLAB_RECLAIMABLE.baseName()), + new Metric(HostedNodeAdminMetrics.MEM_SLAB.baseName()), + new Metric(HostedNodeAdminMetrics.MEM_ANON.baseName()), new Metric(HostedNodeAdminMetrics.GPU_UTIL.baseName()), new Metric(HostedNodeAdminMetrics.GPU_MEM_USED.baseName()), new Metric(HostedNodeAdminMetrics.GPU_MEM_TOTAL.baseName()) diff --git a/metrics/src/main/java/ai/vespa/metrics/set/Vespa9DefaultMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/Vespa9DefaultMetricSet.java new file mode 100644 index 00000000000..a87557981b7 --- /dev/null +++ b/metrics/src/main/java/ai/vespa/metrics/set/Vespa9DefaultMetricSet.java @@ -0,0 +1,175 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +// TODO: This class to be used for managed Vespa. +// TODO: Vespa 9: Let this class replace DefaultMetrics. +package ai.vespa.metrics.set; + +import ai.vespa.metrics.ClusterControllerMetrics; +import ai.vespa.metrics.ContainerMetrics; +import ai.vespa.metrics.DistributorMetrics; +import ai.vespa.metrics.NodeAdminMetrics; +import ai.vespa.metrics.SearchNodeMetrics; +import ai.vespa.metrics.SentinelMetrics; +import ai.vespa.metrics.StorageMetrics; + +import java.util.EnumSet; +import java.util.List; + +import static ai.vespa.metrics.Suffix.average; +import static ai.vespa.metrics.Suffix.count; +import static ai.vespa.metrics.Suffix.last; +import static ai.vespa.metrics.Suffix.max; +import static ai.vespa.metrics.Suffix.min; +import static ai.vespa.metrics.Suffix.ninety_five_percentile; +import static ai.vespa.metrics.Suffix.ninety_nine_percentile; +import static ai.vespa.metrics.Suffix.sum; +import static ai.vespa.metrics.set.DefaultVespaMetrics.defaultVespaMetricSet; + +/** + * Metrics for the 'default' consumer, which is used by default for the generic metrics api and + * other user facing apis, e.g. 'prometheus/'. + * + * @author gjoranv + * @author yngve + */ +public class Vespa9DefaultMetricSet { + + public static final String defaultMetricSetId = "vespa9default"; + + public static final MetricSet vespa9defaultMetricSet = createMetricSet(); + + private static MetricSet createMetricSet() { + return new MetricSet(defaultMetricSetId, + List.of(), + List.of(defaultVespaMetricSet, + BasicMetricSets.containerHttpStatusMetrics(), + getContainerMetrics(), + getSearchChainMetrics(), + getDocprocMetrics(), + getSearchNodeMetrics(), + getContentMetrics(), + getStorageMetrics(), + getDistributorMetrics(), + getClusterControllerMetrics(), + getSentinelMetrics(), + getOtherMetrics())); + } + + private static MetricSet getContainerMetrics() { + return new MetricSet.Builder("default-container") + .metric(ContainerMetrics.JDISC_GC_MS, EnumSet.of(max, average)) + .metric(ContainerMetrics.MEM_HEAP_FREE.average()) + .metric(ContainerMetrics.FEED_LATENCY, EnumSet.of(sum, count)) + .metric(ContainerMetrics.CPU.baseName()) + .metric(ContainerMetrics.JDISC_THREAD_POOL_SIZE.max()) + .metric(ContainerMetrics.JDISC_THREAD_POOL_ACTIVE_THREADS, EnumSet.of(sum, count, min, max)) + .metric(ContainerMetrics.JDISC_THREAD_POOL_WORK_QUEUE_CAPACITY.max()) + .metric(ContainerMetrics.JDISC_THREAD_POOL_WORK_QUEUE_SIZE, EnumSet.of(sum, count, min, max)) + .metric(ContainerMetrics.SERVER_ACTIVE_THREADS.average()) + + // Metrics needed for alerting + .metric(ContainerMetrics.JDISC_SINGLETON_IS_ACTIVE.max()) + .metric(ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_MISSING_CLIENT_CERT.rate()) + .metric(ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_PROTOCOLS.rate()) + .metric(ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_CHIFERS.rate()) + .metric(ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_UNKNOWN.rate()) + .metric(ContainerMetrics.JDISC_APPLICATION_FAILED_COMPONENT_GRAPHS.rate()) + .metric(ContainerMetrics.ATHENZ_TENANT_CERT_EXPIRY_SECONDS, EnumSet.of(min, max, last)) // TODO: Vespa 9: Remove max, last + .build(); + } + + private static MetricSet getSearchChainMetrics() { + return new MetricSet.Builder("default-search-chain") + .metric(ContainerMetrics.QUERIES.rate()) + .metric(ContainerMetrics.QUERY_LATENCY, EnumSet.of(sum, count, max, ninety_five_percentile, ninety_nine_percentile)) + .metric(ContainerMetrics.HITS_PER_QUERY, EnumSet.of(sum, count, max)) + .metric(ContainerMetrics.TOTAL_HITS_PER_QUERY, EnumSet.of(sum, count, max)) + .metric(ContainerMetrics.DEGRADED_QUERIES.rate()) + .metric(ContainerMetrics.FAILED_QUERIES.rate()) + .build(); + } + + private static MetricSet getDocprocMetrics() { + return new MetricSet.Builder("default-docproc") + .metric(ContainerMetrics.DOCPROC_DOCUMENTS.sum()) + .build(); + } + + private static MetricSet getSearchNodeMetrics() { + // Metrics needed for alerting + return new MetricSet.Builder("default-search-node") + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK.average()) + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average()) + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_FEEDING_BLOCKED.max()) + .build(); + } + + private static MetricSet getContentMetrics() { + return new MetricSet.Builder("default-content") + .metric(SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_DOCSUM_REQUESTED_DOCUMENTS.rate()) + .metric(SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_DOCSUM_LATENCY, EnumSet.of(sum, count, max)) + .metric(SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_QUERY_LATENCY, EnumSet.of(sum, count, max)) + + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_TOTAL.max()) + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_READY.max()) + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_ACTIVE.max()) + + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK.average()) + .metric(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average()) + + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_DOCS_MATCHED.rate()) + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_DOCS_RERANKED.rate()) + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_QUERY_SETUP_TIME, EnumSet.of(sum, count, max)) + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_QUERY_LATENCY, EnumSet.of(sum, count, max)) + .metric(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_RERANK_TIME, EnumSet.of(sum, count, max)) + .build(); + } + + private static MetricSet getStorageMetrics() { + return new MetricSet.Builder("default-storage") + .metric(StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_COUNT.rate()) + .metric(StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_COUNT.rate()) + .metric(StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_COUNT.rate()) + .build(); + } + + private static MetricSet getDistributorMetrics() { + return new MetricSet.Builder("default-distributor") + .metric(DistributorMetrics.VDS_DISTRIBUTOR_DOCSSTORED.average()) + + // Metrics needed for alerting + .metric(DistributorMetrics.VDS_BOUNCER_CLOCK_SKEW_ABORTS.count()) + .build(); + } + + private static MetricSet getClusterControllerMetrics() { + // Metrics needed for alerting + return new MetricSet.Builder("default-cluster-controller") + .metric(ClusterControllerMetrics.DOWN_COUNT.max()) + .metric(ClusterControllerMetrics.MAINTENANCE_COUNT.max()) + .metric(ClusterControllerMetrics.UP_COUNT, EnumSet.of(max, last)) // TODO: Remove last + .metric(ClusterControllerMetrics.IS_MASTER.max()) + .metric(ClusterControllerMetrics.RESOURCE_USAGE_NODES_ABOVE_LIMIT.max()) + .metric(ClusterControllerMetrics.RESOURCE_USAGE_MAX_MEMORY_UTILIZATION.max()) + .metric(ClusterControllerMetrics.RESOURCE_USAGE_MAX_DISK_UTILIZATION.max()) + .build(); + } + + private static MetricSet getSentinelMetrics() { + // Metrics needed for alerting + return new MetricSet.Builder("default-sentinel") + .metric(SentinelMetrics.SENTINEL_TOTAL_RESTARTS.max()) + .build(); + } + + private static MetricSet getOtherMetrics() { + // Metrics needed for alerting + return new MetricSet.Builder("default-other") + .metric(NodeAdminMetrics.ENDPOINT_CERTIFICATE_EXPIRY_SECONDS.baseName()) + .metric(NodeAdminMetrics.NODE_CERTIFICATE_EXPIRY_SECONDS.baseName()) + .build(); + } + + private Vespa9DefaultMetricSet() { } + +} diff --git a/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java new file mode 100644 index 00000000000..2744f6e4ae0 --- /dev/null +++ b/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java @@ -0,0 +1,640 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +// TODO: This class to be used for managed Vespa. +// TODO: Vespa 9: Let this class replace VespaMetricSet. +package ai.vespa.metrics.set; + +import ai.vespa.metrics.ClusterControllerMetrics; +import ai.vespa.metrics.ContainerMetrics; +import ai.vespa.metrics.DistributorMetrics; +import ai.vespa.metrics.LogdMetrics; +import ai.vespa.metrics.NodeAdminMetrics; +import ai.vespa.metrics.RoutingLayerMetrics; +import ai.vespa.metrics.SearchNodeMetrics; +import ai.vespa.metrics.SentinelMetrics; +import ai.vespa.metrics.SlobrokMetrics; +import ai.vespa.metrics.StorageMetrics; +import ai.vespa.metrics.Suffix; +import ai.vespa.metrics.VespaMetrics; + +import java.util.Collections; +import java.util.EnumSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import static ai.vespa.metrics.Suffix.average; +import static ai.vespa.metrics.Suffix.count; +import static ai.vespa.metrics.Suffix.last; +import static ai.vespa.metrics.Suffix.max; +import static ai.vespa.metrics.Suffix.min; +import static ai.vespa.metrics.Suffix.ninety_five_percentile; +import static ai.vespa.metrics.Suffix.ninety_nine_percentile; +import static ai.vespa.metrics.Suffix.rate; +import static ai.vespa.metrics.Suffix.sum; +import static ai.vespa.metrics.set.DefaultVespaMetrics.defaultVespaMetricSet; + +/** + * Encapsulates vespa service metrics. + * + * @author gjoranv + * @author yngve + */ +public class Vespa9VespaMetricSet { + + public static final MetricSet vespa9vespaMetricSet = createMetricSet(); + + private static MetricSet createMetricSet() { + return new MetricSet("vespa9vespa", + getVespaMetrics(), + List.of(defaultVespaMetricSet, + BasicMetricSets.containerHttpStatusMetrics())); + } + + private static Set<Metric> getVespaMetrics() { + Set<Metric> metrics = new LinkedHashSet<>(); + + metrics.addAll(getSearchNodeMetrics()); + metrics.addAll(getStorageMetrics()); + metrics.addAll(getDistributorMetrics()); + metrics.addAll(getDocprocMetrics()); + metrics.addAll(getClusterControllerMetrics()); + metrics.addAll(getSearchChainMetrics()); + metrics.addAll(getContainerMetrics()); + metrics.addAll(getSentinelMetrics()); + metrics.addAll(getOtherMetrics()); + + return Collections.unmodifiableSet(metrics); + } + + private static Set<Metric> getSentinelMetrics() { + Set<Metric> metrics = new LinkedHashSet<>(); + + addMetric(metrics, SentinelMetrics.SENTINEL_RESTARTS.count()); + addMetric(metrics, SentinelMetrics.SENTINEL_TOTAL_RESTARTS.max()); + + return metrics; + } + + private static Set<Metric> getOtherMetrics() { + Set<Metric> metrics = new LinkedHashSet<>(); + + addMetric(metrics, SlobrokMetrics.SLOBROK_HEARTBEATS_FAILED.count()); + addMetric(metrics, SlobrokMetrics.SLOBROK_MISSING_CONSENSUS.count()); + + addMetric(metrics, LogdMetrics.LOGD_PROCESSED_LINES.count()); + + // Java (JRT) TLS metrics + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES.baseName()); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES.baseName()); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_SERVER_TLS_CONNECTIONS_ESTABLISHED.baseName()); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_CLIENT_TLS_CONNECTIONS_ESTABLISHED.baseName()); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_SERVER_UNENCRYPTED_CONNECTIONS_ESTABLISHED.baseName()); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED.baseName()); + + // C++ TLS metrics + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_TLS_HANDSHAKES_FAILED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_PEER_AUTHORIZATION_FAILURES.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_CLIENT_TLS_CONNECTIONS_ESTABLISHED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_SERVER_TLS_CONNECTIONS_ESTABLISHED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_CLIENT_INSECURE_CONNECTIONS_ESTABLISHED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_SERVER_INSECURE_CONNECTIONS_ESTABLISHED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_TLS_CONNECTIONS_BROKEN.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_FAILED_TLS_CONFIG_RELOADS.count()); + // C++ capability metrics + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_RPC_CAPABILITY_CHECKS_FAILED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_STATUS_CAPABILITY_CHECKS_FAILED.count()); + + // C++ Fnet metrics + addMetric(metrics, StorageMetrics.VDS_SERVER_FNET_NUM_CONNECTIONS.count()); + + // NodeAdmin certificate + addMetric(metrics, NodeAdminMetrics.ENDPOINT_CERTIFICATE_EXPIRY_SECONDS.baseName()); + addMetric(metrics, NodeAdminMetrics.NODE_CERTIFICATE_EXPIRY_SECONDS.baseName()); + + // Routing layer metrics + addMetric(metrics, RoutingLayerMetrics.WORKER_CONNECTIONS.max()); // Hosted Vespa only (routing layer) + + return metrics; + } + + + private static Set<Metric> getContainerMetrics() { + Set<Metric> metrics = new LinkedHashSet<>(); + + addMetric(metrics, ContainerMetrics.APPLICATION_GENERATION.baseName()); + + addMetric(metrics, ContainerMetrics.HANDLED_REQUESTS.count()); + addMetric(metrics, ContainerMetrics.HANDLED_LATENCY, EnumSet.of(sum, count, max)); + + addMetric(metrics, ContainerMetrics.SERVER_NUM_OPEN_CONNECTIONS, EnumSet.of(max, average)); + addMetric(metrics, ContainerMetrics.SERVER_NUM_CONNECTIONS, EnumSet.of(max, average)); + + addMetric(metrics, ContainerMetrics.SERVER_BYTES_RECEIVED, EnumSet.of(sum, count)); + addMetric(metrics, ContainerMetrics.SERVER_BYTES_SENT, EnumSet.of(sum, count)); + + addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_UNHANDLED_EXCEPTIONS, EnumSet.of(sum, count)); + addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_WORK_QUEUE_CAPACITY, EnumSet.of(max)); + addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_WORK_QUEUE_SIZE, EnumSet.of(sum, count, min, max)); + addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_REJECTED_TASKS, EnumSet.of(sum, count)); + addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_SIZE.max()); + addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_MAX_ALLOWED_SIZE.max()); + addMetric(metrics, ContainerMetrics.JDISC_THREAD_POOL_ACTIVE_THREADS, EnumSet.of(sum, count, min, max)); + + addMetric(metrics, ContainerMetrics.JETTY_THREADPOOL_BUSY_THREADS, EnumSet.of(sum, count, max)); + addMetric(metrics, ContainerMetrics.JETTY_THREADPOOL_TOTAL_THREADS.max()); + addMetric(metrics, ContainerMetrics.JETTY_THREADPOOL_QUEUE_SIZE.max()); + + addMetric(metrics, ContainerMetrics.HTTPAPI_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, ContainerMetrics.HTTPAPI_PENDING, EnumSet.of(max, sum, count)); + addMetric(metrics, ContainerMetrics.HTTPAPI_NUM_OPERATIONS.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_NUM_UPDATES.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_NUM_REMOVES.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_NUM_PUTS.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_SUCCEEDED.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_FAILED.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_PARSE_ERROR.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_CONDITION_NOT_MET.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_NOT_FOUND.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_FAILED_UNKNOWN.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_FAILED_INSUFFICIENT_STORAGE.rate()); + addMetric(metrics, ContainerMetrics.HTTPAPI_FAILED_TIMEOUT.rate()); + + addMetric(metrics, ContainerMetrics.MEM_HEAP_TOTAL.average()); + addMetric(metrics, ContainerMetrics.MEM_HEAP_FREE.average()); + addMetric(metrics, ContainerMetrics.MEM_HEAP_USED, EnumSet.of(average, max)); + addMetric(metrics, ContainerMetrics.MEM_DIRECT_TOTAL.average()); + addMetric(metrics, ContainerMetrics.MEM_DIRECT_FREE.average()); + addMetric(metrics, ContainerMetrics.MEM_DIRECT_USED, EnumSet.of(average, max)); + addMetric(metrics, ContainerMetrics.MEM_DIRECT_COUNT.max()); + addMetric(metrics, ContainerMetrics.MEM_NATIVE_TOTAL.average()); + addMetric(metrics, ContainerMetrics.MEM_NATIVE_FREE.average()); + addMetric(metrics, ContainerMetrics.MEM_NATIVE_USED.average()); + + addMetric(metrics, ContainerMetrics.JDISC_MEMORY_MAPPINGS.max()); + addMetric(metrics, ContainerMetrics.JDISC_OPEN_FILE_DESCRIPTORS.max()); + + addMetric(metrics, ContainerMetrics.JDISC_GC_MS.max()); + addMetric(metrics, ContainerMetrics.CPU.baseName()); + + addMetric(metrics, ContainerMetrics.JDISC_DEACTIVATED_CONTAINERS_TOTAL.sum()); + + addMetric(metrics, ContainerMetrics.JDISC_SINGLETON_IS_ACTIVE.max()); + + addMetric(metrics, ContainerMetrics.ATHENZ_TENANT_CERT_EXPIRY_SECONDS, EnumSet.of(min, max, last)); // TODO: Vespa 9: Remove max, last + addMetric(metrics, ContainerMetrics.CONTAINER_IAM_ROLE_EXPIRY_SECONDS.baseName()); + + addMetric(metrics, ContainerMetrics.JDISC_HTTP_REQUEST_PREMATURELY_CLOSED.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_REQUEST_REQUESTS_PER_CONNECTION, EnumSet.of(sum, count, min, max, average)); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_REQUEST_URI_LENGTH, EnumSet.of(sum, count, max)); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_REQUEST_CONTENT_SIZE, EnumSet.of(sum, count, max)); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_REQUESTS, EnumSet.of(rate, count)); + + addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_MISSING_CLIENT_CERT.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_EXPIRED_CLIENT_CERT.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INVALID_CLIENT_CERT.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_PROTOCOLS.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_INCOMPATIBLE_CHIFERS.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_CONNECTION_CLOSED.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_SSL_HANDSHAKE_FAILURE_UNKNOWN.rate()); + + addMetric(metrics, ContainerMetrics.JDISC_HTTP_FILTER_RULE_BLOCKED_REQUESTS.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_FILTER_RULE_ALLOWED_REQUESTS.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_FILTERING_REQUEST_HANDLED.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_FILTERING_REQUEST_UNHANDLED.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_FILTERING_RESPONSE_HANDLED.rate()); + addMetric(metrics, ContainerMetrics.JDISC_HTTP_FILTERING_RESPONSE_UNHANDLED.rate()); + + addMetric(metrics, ContainerMetrics.JDISC_HTTP_HANDLER_UNHANDLED_EXCEPTIONS.rate()); + + addMetric(metrics, ContainerMetrics.JDISC_APPLICATION_FAILED_COMPONENT_GRAPHS.rate()); + addMetric(metrics, ContainerMetrics.JDISC_APPLICATION_COMPONENT_GRAPH_RECONFIGURATIONS.rate()); + + addMetric(metrics, ContainerMetrics.FEED_LATENCY, EnumSet.of(sum, count, max)); + addMetric(metrics, ContainerMetrics.FEED_HTTP_REQUESTS, EnumSet.of(count, rate)); + + addMetric(metrics, ContainerMetrics.JDISC_TLS_CAPABILITY_CHECKS_SUCCEEDED.rate()); + addMetric(metrics, ContainerMetrics.JDISC_TLS_CAPABILITY_CHECKS_FAILED.rate()); + + return metrics; + } + + private static Set<Metric> getClusterControllerMetrics() { + Set<Metric> metrics = new LinkedHashSet<>(); + + addMetric(metrics, ClusterControllerMetrics.DOWN_COUNT.max()); + addMetric(metrics, ClusterControllerMetrics.INITIALIZING_COUNT.max()); + addMetric(metrics, ClusterControllerMetrics.MAINTENANCE_COUNT.max()); + addMetric(metrics, ClusterControllerMetrics.RETIRED_COUNT.max()); + addMetric(metrics, ClusterControllerMetrics.UP_COUNT, EnumSet.of(max, last)); // TODO: Remove last + addMetric(metrics, ClusterControllerMetrics.CLUSTER_STATE_CHANGE_COUNT.baseName()); + addMetric(metrics, ClusterControllerMetrics.BUSY_TICK_TIME_MS, EnumSet.of(max, sum, count)); + addMetric(metrics, ClusterControllerMetrics.IDLE_TICK_TIME_MS, EnumSet.of(max, sum, count)); + + addMetric(metrics, ClusterControllerMetrics.WORK_MS, EnumSet.of(sum, count)); + + addMetric(metrics, ClusterControllerMetrics.IS_MASTER.max()); + + // TODO(hakonhall): Update this name once persistent "count" metrics has been implemented. + // DO NOT RELY ON THIS METRIC YET. + addMetric(metrics, ClusterControllerMetrics.NODE_EVENT_COUNT.baseName()); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_NODES_ABOVE_LIMIT.max()); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MAX_MEMORY_UTILIZATION.max()); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MAX_DISK_UTILIZATION.max()); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MEMORY_LIMIT.max()); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_DISK_LIMIT.max()); + addMetric(metrics, ClusterControllerMetrics.REINDEXING_PROGRESS.max()); + + return metrics; + } + + private static Set<Metric> getDocprocMetrics() { + Set<Metric> metrics = new LinkedHashSet<>(); + + // per chain + metrics.add(new Metric("documents_processed.rate")); + + addMetric(metrics, ContainerMetrics.DOCPROC_PROC_TIME, EnumSet.of(sum, count, max)); + addMetric(metrics, ContainerMetrics.DOCPROC_DOCUMENTS, EnumSet.of(sum, count, max, min)); + + return metrics; + } + + private static Set<Metric> getSearchChainMetrics() { + Set<Metric> metrics = new LinkedHashSet<>(); + + addMetric(metrics, ContainerMetrics.PEAK_QPS.max()); + addMetric(metrics, ContainerMetrics.SEARCH_CONNECTIONS, EnumSet.of(sum, count, max)); + addMetric(metrics, ContainerMetrics.QUERIES.rate()); + addMetric(metrics, ContainerMetrics.QUERY_CONTAINER_LATENCY, EnumSet.of(sum, count, max)); + addMetric(metrics, ContainerMetrics.QUERY_LATENCY, EnumSet.of(sum, count, max, ninety_five_percentile, ninety_nine_percentile)); + addMetric(metrics, ContainerMetrics.QUERY_TIMEOUT, EnumSet.of(sum, count, max, min, ninety_five_percentile, ninety_nine_percentile)); + addMetric(metrics, ContainerMetrics.FAILED_QUERIES.rate()); + addMetric(metrics, ContainerMetrics.DEGRADED_QUERIES.rate()); + addMetric(metrics, ContainerMetrics.HITS_PER_QUERY, EnumSet.of(sum, count, max, ninety_five_percentile, ninety_nine_percentile)); + addMetric(metrics, ContainerMetrics.SEARCH_CONNECTIONS, EnumSet.of(sum, count, max)); + addMetric(metrics, ContainerMetrics.QUERY_HIT_OFFSET, EnumSet.of(sum, count, max)); + addMetric(metrics, ContainerMetrics.DOCUMENTS_COVERED.count()); + addMetric(metrics, ContainerMetrics.DOCUMENTS_TOTAL.count()); + addMetric(metrics, ContainerMetrics.DOCUMENTS_TARGET_TOTAL.count()); + addMetric(metrics, ContainerMetrics.JDISC_RENDER_LATENCY, EnumSet.of(min, max, count, sum)); + addMetric(metrics, ContainerMetrics.QUERY_ITEM_COUNT, EnumSet.of(max, sum, count)); + addMetric(metrics, ContainerMetrics.TOTAL_HITS_PER_QUERY, EnumSet.of(sum, count, max, ninety_five_percentile, ninety_nine_percentile)); + addMetric(metrics, ContainerMetrics.EMPTY_RESULTS.rate()); + addMetric(metrics, ContainerMetrics.REQUESTS_OVER_QUOTA, EnumSet.of(rate, count)); + + addMetric(metrics, ContainerMetrics.RELEVANCE_AT_1, EnumSet.of(sum, count)); + addMetric(metrics, ContainerMetrics.RELEVANCE_AT_3, EnumSet.of(sum, count)); + addMetric(metrics, ContainerMetrics.RELEVANCE_AT_10, EnumSet.of(sum, count)); + + // Errors from search container + addMetric(metrics, ContainerMetrics.ERROR_TIMEOUT.rate()); + addMetric(metrics, ContainerMetrics.ERROR_BACKENDS_OOS.rate()); + addMetric(metrics, ContainerMetrics.ERROR_PLUGIN_FAILURE.rate()); + addMetric(metrics, ContainerMetrics.ERROR_BACKEND_COMMUNICATION_ERROR.rate()); + addMetric(metrics, ContainerMetrics.ERROR_EMPTY_DOCUMENT_SUMMARIES.rate()); + addMetric(metrics, ContainerMetrics.ERROR_INVALID_QUERY_PARAMETER.rate()); + addMetric(metrics, ContainerMetrics.ERROR_INTERNAL_SERVER_ERROR.rate()); + addMetric(metrics, ContainerMetrics.ERROR_MISCONFIGURED_SERVER.rate()); + addMetric(metrics, ContainerMetrics.ERROR_INVALID_QUERY_TRANSFORMATION.rate()); + addMetric(metrics, ContainerMetrics.ERROR_RESULTS_WITH_ERRORS.rate()); + addMetric(metrics, ContainerMetrics.ERROR_UNSPECIFIED.rate()); + addMetric(metrics, ContainerMetrics.ERROR_UNHANDLED_EXCEPTION.rate()); + + return metrics; + } + + private static Set<Metric> getSearchNodeMetrics() { + Set<Metric> metrics = new LinkedHashSet<>(); + + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_TOTAL.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_READY.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_ACTIVE.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_REMOVED.max()); + + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_DOCS_IN_MEMORY.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_HEART_BEAT_AGE.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCSUM_DOCS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCSUM_LATENCY, EnumSet.of(max, sum, count)); + + // Search protocol + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_QUERY_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_QUERY_REQUEST_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_QUERY_REPLY_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_DOCSUM_LATENCY, EnumSet.of(max, sum, average)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_DOCSUM_REQUEST_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_DOCSUM_REPLY_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_SEARCH_PROTOCOL_DOCSUM_REQUESTED_DOCUMENTS.count()); + + // Executors shared between all document dbs + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_PROTON_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_PROTON_ACCEPTED.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_PROTON_UTILIZATION, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_FLUSH_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_FLUSH_ACCEPTED.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_FLUSH_UTILIZATION, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_MATCH_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_MATCH_ACCEPTED.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_MATCH_WAKEUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_MATCH_UTILIZATION, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_DOCSUM_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_DOCSUM_ACCEPTED.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_DOCSUM_UTILIZATION, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_SHARED_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_SHARED_ACCEPTED.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_SHARED_UTILIZATION, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_WARMUP_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_WARMUP_ACCEPTED.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_WARMUP_UTILIZATION, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_FIELD_WRITER_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_FIELD_WRITER_ACCEPTED.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_EXECUTOR_FIELD_WRITER_UTILIZATION, EnumSet.of(max, sum, count)); + + // jobs + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_JOB_TOTAL.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_JOB_ATTRIBUTE_FLUSH.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_JOB_MEMORY_INDEX_FLUSH.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_JOB_DISK_INDEX_FUSION.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_JOB_DOCUMENT_STORE_FLUSH.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_JOB_DOCUMENT_STORE_COMPACT.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_JOB_BUCKET_MOVE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_JOB_LID_SPACE_COMPACT.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_JOB_REMOVED_DOCUMENTS_PRUNE.average()); + + // Threading service (per document db) + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_THREADING_SERVICE_MASTER_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_THREADING_SERVICE_MASTER_ACCEPTED.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_THREADING_SERVICE_MASTER_UTILIZATION, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_THREADING_SERVICE_INDEX_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_THREADING_SERVICE_INDEX_ACCEPTED.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_THREADING_SERVICE_INDEX_UTILIZATION, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_THREADING_SERVICE_SUMMARY_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_THREADING_SERVICE_SUMMARY_ACCEPTED.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_THREADING_SERVICE_SUMMARY_UTILIZATION, EnumSet.of(max, sum, count)); + + // lid space + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_LID_SPACE_LID_LIMIT.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_LID_SPACE_USED_LIDS.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_LID_SPACE_LID_LIMIT.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_LID_SPACE_USED_LIDS.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_LID_SPACE_LID_LIMIT.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_LID_SPACE_USED_LIDS.max()); + + // bucket move + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_BUCKET_MOVE_BUCKETS_PENDING.max()); + + // resource usage + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK_USAGE_TOTAL.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK_USAGE_TOTAL_UTILIZATION.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK_USAGE_TRANSIENT.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY_USAGE_TOTAL.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY_USAGE_TOTAL_UTILIZATION.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY_USAGE_TRANSIENT.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY_MAPPINGS.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_OPEN_FILE_DESCRIPTORS.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_FEEDING_BLOCKED.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MALLOC_ARENA.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_ATTRIBUTE_RESOURCE_USAGE_ADDRESS_SPACE.max()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_ATTRIBUTE_RESOURCE_USAGE_FEEDING_BLOCKED.max()); + + // CPU util + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_CPU_UTIL_SETUP, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_CPU_UTIL_READ, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_CPU_UTIL_WRITE, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_CPU_UTIL_COMPACT, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_CPU_UTIL_OTHER, EnumSet.of(max, sum, count)); + + // transaction log + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_TRANSACTIONLOG_ENTRIES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_TRANSACTIONLOG_DISK_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_TRANSACTIONLOG_REPLAY_TIME.max()); + + // document store + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_DOCUMENT_STORE_DISK_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_DOCUMENT_STORE_DISK_BLOAT.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_DOCUMENT_STORE_MAX_BUCKET_SPREAD.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_DOCUMENT_STORE_MEMORY_USAGE_ALLOCATED_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_DOCUMENT_STORE_DISK_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_DOCUMENT_STORE_DISK_BLOAT.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_DOCUMENT_STORE_MAX_BUCKET_SPREAD.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_DOCUMENT_STORE_MEMORY_USAGE_ALLOCATED_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_DOCUMENT_STORE_DISK_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_DOCUMENT_STORE_DISK_BLOAT.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_DOCUMENT_STORE_MAX_BUCKET_SPREAD.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_DOCUMENT_STORE_MEMORY_USAGE_ALLOCATED_BYTES.average()); + + // document store cache + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_DOCUMENT_STORE_CACHE_MEMORY_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_DOCUMENT_STORE_CACHE_HIT_RATE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_DOCUMENT_STORE_CACHE_LOOKUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_DOCUMENT_STORE_CACHE_INVALIDATIONS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_DOCUMENT_STORE_CACHE_MEMORY_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_DOCUMENT_STORE_CACHE_HIT_RATE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_DOCUMENT_STORE_CACHE_LOOKUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_DOCUMENT_STORE_CACHE_INVALIDATIONS.rate()); + + // attribute + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_ALLOCATED_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_ALLOCATED_BYTES.average()); + + // index + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ALLOCATED_BYTES.average()); + + // matching + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_QUERIES.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_SOFT_DOOMED_QUERIES.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_QUERY_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_QUERY_SETUP_TIME, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_DOCS_MATCHED, EnumSet.of(rate, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_QUERIES.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_SOFT_DOOMED_QUERIES.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_SOFT_DOOM_FACTOR, EnumSet.of(min, max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_QUERY_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_QUERY_SETUP_TIME, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_GROUPING_TIME, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_RERANK_TIME, EnumSet.of(max, sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_DOCS_MATCHED, EnumSet.of(rate, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_RANK_PROFILE_LIMITED_QUERIES.rate()); + + // feeding + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_FEEDING_COMMIT_OPERATIONS, EnumSet.of(max, sum, count, rate)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_FEEDING_COMMIT_LATENCY, EnumSet.of(max, sum, count)); + + return metrics; + } + + private static Set<Metric> getStorageMetrics() { + Set<Metric> metrics = new LinkedHashSet<>(); + + // TODO - Vespa 9: For the purpose of this file and likely elsewhere, all but the last aggregate specifier, + // TODO - Vespa 9: such as 'average' and 'sum' in the metric names below are just confusing and can be mentally + // TODO - Vespa 9: disregarded when considering metric names. Clean up for Vespa 9. + addMetric(metrics, StorageMetrics.VDS_DATASTORED_ALLDISKS_BUCKETS.average()); + addMetric(metrics, StorageMetrics.VDS_DATASTORED_ALLDISKS_DOCS.average()); + addMetric(metrics, StorageMetrics.VDS_DATASTORED_ALLDISKS_BYTES.average()); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_AVERAGEVISITORLIFETIME, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_AVERAGEQUEUEWAIT, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_COMPLETED.rate()); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_CREATED.rate()); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_AVERAGEMESSAGESENDTIME, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_AVERAGEPROCESSINGTIME, EnumSet.of(max, sum, count)); + + addMetric(metrics, StorageMetrics.VDS_FILESTOR_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_AVERAGEQUEUEWAIT, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ACTIVE_OPERATIONS_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ACTIVE_OPERATIONS_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_THROTTLE_WINDOW_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_THROTTLE_WAITING_THREADS, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_THROTTLE_ACTIVE_TOKENS, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_MERGEMETADATAREADLATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_MERGEDATAREADLATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_MERGEDATAWRITELATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_MERGE_PUT_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_MERGE_REMOVE_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLSTRIPES_THROTTLED_RPC_DIRECT_DISPATCHES.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLSTRIPES_THROTTLED_PERSISTENCE_THREAD_POLLS.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLSTRIPES_TIMEOUTS_WAITING_FOR_THROTTLE_TOKEN.rate()); + + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_REQUEST_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_REQUEST_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_GET_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_GET_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_GET_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_GET_REQUEST_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_REQUEST_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_CREATEITERATOR_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_CREATEITERATOR_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_VISIT_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_VISIT_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_LOCATION_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_LOCATION_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_SPLITBUCKETS_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_JOINBUCKETS_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_SETBUCKETSTATES_COUNT.rate()); + + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_AVERAGEQUEUEWAITINGTIME, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_ACTIVE_WINDOW_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_BOUNCED_DUE_TO_BACK_PRESSURE.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_OK.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_MERGECHAINS_OK.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_BUSY.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_TOTAL.rate()); + + return metrics; + } + + private static Set<Metric> getDistributorMetrics() { + Set<Metric> metrics = new LinkedHashSet<>(); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_RECHECKING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_IDEALSTATE_DIFF.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_TOOFEWCOPIES.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_TOOMANYCOPIES.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_NOTRUSTED.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MAX_OBSERVED_TIME_SINCE_LAST_GC_SEC.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_DELETE_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_DELETE_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_DELETE_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_SPLIT_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_SPLIT_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_SPLIT_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_JOIN_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_JOIN_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_JOIN_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_DOCUMENTS_REMOVED, EnumSet.of(count, rate)); + + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_CONCURRENT_MUTATIONS.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_NOTCONNECTED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_NOTREADY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_WRONGDISTRIBUTOR.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_SAFE_TIME_NOT_REACHED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_STORAGEFAILURE.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_TIMEOUT.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_BUSY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_INCONSISTENT_BUCKET.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_CONCURRENT_MUTATIONS.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_CONCURRENT_MUTATIONS.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_DIVERGING_TIMESTAMP_UPDATES.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVELOCATIONS_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_NOTREADY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_NOTCONNECTED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_WRONGDISTRIBUTOR.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_SAFE_TIME_NOT_REACHED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_STORAGEFAILURE.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_TIMEOUT.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_BUSY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_INCONSISTENT_BUCKET.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_NOTFOUND.rate()); + + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_DOCSSTORED.average()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_BYTESSTORED.average()); + + addMetric(metrics, DistributorMetrics.VDS_BOUNCER_CLOCK_SKEW_ABORTS.count()); + + return metrics; + } + + private static void addMetric(Set<Metric> metrics, String nameWithSuffix) { + metrics.add(new Metric(nameWithSuffix)); + } + + private static void addMetric(Set<Metric> metrics, VespaMetrics metric, EnumSet<Suffix> suffixes) { + suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + } + + private static void addMetric(Set<Metric> metrics, String metricName, Iterable<String> aggregateSuffices) { + for (String suffix : aggregateSuffices) { + metrics.add(new Metric(metricName + "." + suffix)); + } + } + +} diff --git a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java index b7ed7293d6c..6c4626238eb 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java @@ -1,4 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +// TODO: Keep the set of metrics in this set stable until Vespa 9. +// TODO: Vespa 9: Let this class be replaced by Vespa9VespaMetricSet. package ai.vespa.metrics.set; import ai.vespa.metrics.ClusterControllerMetrics; @@ -17,6 +20,7 @@ import ai.vespa.metrics.VespaMetrics; import java.util.Collections; import java.util.EnumSet; import java.util.LinkedHashSet; +import java.util.List; import java.util.Set; import static ai.vespa.metrics.Suffix.average; @@ -29,7 +33,6 @@ import static ai.vespa.metrics.Suffix.ninety_nine_percentile; import static ai.vespa.metrics.Suffix.rate; import static ai.vespa.metrics.Suffix.sum; import static ai.vespa.metrics.set.DefaultVespaMetrics.defaultVespaMetricSet; -import static java.util.Collections.singleton; /** * Encapsulates vespa service metrics. @@ -38,9 +41,14 @@ import static java.util.Collections.singleton; */ public class VespaMetricSet { - public static final MetricSet vespaMetricSet = new MetricSet("vespa", - getVespaMetrics(), - singleton(defaultVespaMetricSet)); + public static final MetricSet vespaMetricSet = createMetricSet(); + + private static MetricSet createMetricSet() { + return new MetricSet("vespa", + getVespaMetrics(), + List.of(defaultVespaMetricSet, + BasicMetricSets.containerHttpStatusMetrics())); + } private static Set<Metric> getVespaMetrics() { Set<Metric> metrics = new LinkedHashSet<>(); @@ -62,7 +70,7 @@ public class VespaMetricSet { Set<Metric> metrics = new LinkedHashSet<>(); addMetric(metrics, SentinelMetrics.SENTINEL_RESTARTS.count()); - addMetric(metrics, SentinelMetrics.SENTINEL_TOTAL_RESTARTS.last()); + addMetric(metrics, SentinelMetrics.SENTINEL_TOTAL_RESTARTS, EnumSet.of(max, sum, last)); // TODO: Vespa 9: Remove last, sum? addMetric(metrics, SentinelMetrics.SENTINEL_UPTIME.last()); addMetric(metrics, SentinelMetrics.SENTINEL_RUNNING, EnumSet.of(count, last)); @@ -80,7 +88,7 @@ public class VespaMetricSet { // Java (JRT) TLS metrics addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES.baseName()); addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES.baseName()); - addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_SERVER_TLS_CONNECIONTS_ESTABLISHED.baseName()); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_SERVER_TLS_CONNECTIONS_ESTABLISHED.baseName()); addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_CLIENT_TLS_CONNECTIONS_ESTABLISHED.baseName()); addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_SERVER_UNENCRYPTED_CONNECTIONS_ESTABLISHED.baseName()); addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED.baseName()); @@ -119,6 +127,12 @@ public class VespaMetricSet { private static Set<Metric> getContainerMetrics() { Set<Metric> metrics = new LinkedHashSet<>(); + addMetric(metrics, ContainerMetrics.HTTP_STATUS_1XX.rate()); + addMetric(metrics, ContainerMetrics.HTTP_STATUS_2XX.rate()); + addMetric(metrics, ContainerMetrics.HTTP_STATUS_3XX.rate()); + addMetric(metrics, ContainerMetrics.HTTP_STATUS_4XX.rate()); + addMetric(metrics, ContainerMetrics.HTTP_STATUS_5XX.rate()); + addMetric(metrics, ContainerMetrics.APPLICATION_GENERATION.baseName()); addMetric(metrics, ContainerMetrics.HANDLED_REQUESTS.count()); @@ -177,10 +191,10 @@ public class VespaMetricSet { addMetric(metrics, ContainerMetrics.JDISC_GC_COUNT, EnumSet.of(average, max, last)); // TODO: Vespa 9: Remove last addMetric(metrics, ContainerMetrics.JDISC_GC_MS, EnumSet.of(average, max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ContainerMetrics.JDISC_DEACTIVATED_CONTAINERS.last()); + addMetric(metrics, ContainerMetrics.JDISC_DEACTIVATED_CONTAINERS_TOTAL, EnumSet.of(sum, last)); // TODO: Vespa 9: Remove last addMetric(metrics, ContainerMetrics.JDISC_DEACTIVATED_CONTAINERS_WITH_RETAINED_REFS.last()); - addMetric(metrics, ContainerMetrics.JDISC_SINGLETON_IS_ACTIVE.last()); + addMetric(metrics, ContainerMetrics.JDISC_SINGLETON_IS_ACTIVE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last addMetric(metrics, ContainerMetrics.JDISC_SINGLETON_ACTIVATION_COUNT.last()); addMetric(metrics, ContainerMetrics.JDISC_SINGLETON_ACTIVATION_FAILURE_COUNT.last()); addMetric(metrics, ContainerMetrics.JDISC_SINGLETON_ACTIVATION_MILLIS.last()); @@ -188,15 +202,9 @@ public class VespaMetricSet { addMetric(metrics, ContainerMetrics.JDISC_SINGLETON_DEACTIVATION_FAILURE_COUNT.last()); addMetric(metrics, ContainerMetrics.JDISC_SINGLETON_DEACTIVATION_MILLIS.last()); - addMetric(metrics, ContainerMetrics.ATHENZ_TENANT_CERT_EXPIRY_SECONDS.last()); + addMetric(metrics, ContainerMetrics.ATHENZ_TENANT_CERT_EXPIRY_SECONDS, EnumSet.of(min, max, last)); // TODO: Vespa 9: Remove last, max addMetric(metrics, ContainerMetrics.CONTAINER_IAM_ROLE_EXPIRY_SECONDS.baseName()); - addMetric(metrics, ContainerMetrics.HTTP_STATUS_1XX.rate()); - addMetric(metrics, ContainerMetrics.HTTP_STATUS_2XX.rate()); - addMetric(metrics, ContainerMetrics.HTTP_STATUS_3XX.rate()); - addMetric(metrics, ContainerMetrics.HTTP_STATUS_4XX.rate()); - addMetric(metrics, ContainerMetrics.HTTP_STATUS_5XX.rate()); - addMetric(metrics, ContainerMetrics.JDISC_HTTP_REQUEST_PREMATURELY_CLOSED.rate()); addMetric(metrics, ContainerMetrics.JDISC_HTTP_REQUEST_REQUESTS_PER_CONNECTION, EnumSet.of(sum, count, min, max, average)); addMetric(metrics, ContainerMetrics.JDISC_HTTP_REQUEST_URI_LENGTH, EnumSet.of(sum, count, max)); @@ -243,19 +251,19 @@ public class VespaMetricSet { private static Set<Metric> getClusterControllerMetrics() { Set<Metric> metrics = new LinkedHashSet<>(); - addMetric(metrics, ClusterControllerMetrics.DOWN_COUNT.last()); - addMetric(metrics, ClusterControllerMetrics.INITIALIZING_COUNT.last()); - addMetric(metrics, ClusterControllerMetrics.MAINTENANCE_COUNT.last()); - addMetric(metrics, ClusterControllerMetrics.RETIRED_COUNT.last()); + addMetric(metrics, ClusterControllerMetrics.DOWN_COUNT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ClusterControllerMetrics.INITIALIZING_COUNT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ClusterControllerMetrics.MAINTENANCE_COUNT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ClusterControllerMetrics.RETIRED_COUNT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last addMetric(metrics, ClusterControllerMetrics.STOPPING_COUNT.last()); - addMetric(metrics, ClusterControllerMetrics.UP_COUNT.last()); + addMetric(metrics, ClusterControllerMetrics.UP_COUNT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last addMetric(metrics, ClusterControllerMetrics.CLUSTER_STATE_CHANGE_COUNT.baseName()); addMetric(metrics, ClusterControllerMetrics.BUSY_TICK_TIME_MS, EnumSet.of(last, max, sum, count)); // TODO: Vespa 9: Remove last addMetric(metrics, ClusterControllerMetrics.IDLE_TICK_TIME_MS, EnumSet.of(last, max, sum, count)); // TODO: Vespa 9: Remove last addMetric(metrics, ClusterControllerMetrics.WORK_MS, EnumSet.of(last, sum, count)); // TODO: Vespa 9: Remove last - addMetric(metrics, ClusterControllerMetrics.IS_MASTER.last()); + addMetric(metrics, ClusterControllerMetrics.IS_MASTER, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last addMetric(metrics, ClusterControllerMetrics.REMOTE_TASK_QUEUE_SIZE.last()); // TODO(hakonhall): Update this name once persistent "count" metrics has been implemented. // DO NOT RELY ON THIS METRIC YET. @@ -263,9 +271,9 @@ public class VespaMetricSet { addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_NODES_ABOVE_LIMIT, EnumSet.of(last, max)); // TODO: Vespa 9: Remove last addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MAX_MEMORY_UTILIZATION, EnumSet.of(last, max)); // TODO: Vespa 9: Remove last addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MAX_DISK_UTILIZATION, EnumSet.of(last, max)); // TODO: Vespa 9: Remove last - addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MEMORY_LIMIT.last()); - addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_DISK_LIMIT.last()); - addMetric(metrics, ClusterControllerMetrics.REINDEXING_PROGRESS.last()); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MEMORY_LIMIT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_DISK_LIMIT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ClusterControllerMetrics.REINDEXING_PROGRESS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last return metrics; } @@ -331,15 +339,15 @@ public class VespaMetricSet { addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_CONFIG_GENERATION.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_TOTAL.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_READY.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_ACTIVE.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_REMOVED.last()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_TOTAL, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_READY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_ACTIVE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DOCUMENTS_REMOVED, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_DOCS_IN_MEMORY.last()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_DOCS_IN_MEMORY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_DISK_USAGE.last()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MEMORY_USAGE_ALLOCATED_BYTES.max()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_HEART_BEAT_AGE.last()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_HEART_BEAT_AGE, EnumSet.of(min, last)); // TODO: Vespa 9: Remove last addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCSUM_DOCS.rate()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCSUM_LATENCY, EnumSet.of(max, sum, count)); @@ -410,22 +418,22 @@ public class VespaMetricSet { // lid space addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_LID_SPACE_LID_BLOAT_FACTOR.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_LID_SPACE_LID_FRAGMENTATION_FACTOR.average()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_LID_SPACE_LID_LIMIT.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_LID_SPACE_HIGHEST_USED_LID.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_LID_SPACE_USED_LIDS.last()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_LID_SPACE_LID_LIMIT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_LID_SPACE_HIGHEST_USED_LID, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_LID_SPACE_USED_LIDS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_LID_SPACE_LID_BLOAT_FACTOR.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_LID_SPACE_LID_FRAGMENTATION_FACTOR.average()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_LID_SPACE_LID_LIMIT.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_LID_SPACE_HIGHEST_USED_LID.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_LID_SPACE_USED_LIDS.last()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_LID_SPACE_LID_LIMIT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_LID_SPACE_HIGHEST_USED_LID, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_LID_SPACE_USED_LIDS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_LID_SPACE_LID_BLOAT_FACTOR.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_LID_SPACE_LID_FRAGMENTATION_FACTOR.average()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_LID_SPACE_LID_LIMIT.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_LID_SPACE_HIGHEST_USED_LID.last()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_LID_SPACE_USED_LIDS.last()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_LID_SPACE_LID_LIMIT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_LID_SPACE_HIGHEST_USED_LID, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_REMOVED_LID_SPACE_USED_LIDS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last // bucket move - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_BUCKET_MOVE_BUCKETS_PENDING.last()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_BUCKET_MOVE_BUCKETS_PENDING, EnumSet.of(max, sum, last)); // TODO: Vespa 9: Remove last // resource usage addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_DISK.average()); @@ -438,7 +446,7 @@ public class VespaMetricSet { addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY_USAGE_TRANSIENT.max()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY_MAPPINGS.max()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_OPEN_FILE_DESCRIPTORS.max()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_FEEDING_BLOCKED, EnumSet.of(max,last)); // TODO: Vespa 9: Remove last + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_FEEDING_BLOCKED, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MALLOC_ARENA.max()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_ATTRIBUTE_RESOURCE_USAGE_ADDRESS_SPACE.max()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_ATTRIBUTE_RESOURCE_USAGE_FEEDING_BLOCKED.max()); @@ -453,7 +461,7 @@ public class VespaMetricSet { // transaction log addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_TRANSACTIONLOG_ENTRIES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_TRANSACTIONLOG_DISK_USAGE.average()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_TRANSACTIONLOG_REPLAY_TIME.last()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_TRANSACTIONLOG_REPLAY_TIME, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last // document store addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_DOCUMENT_STORE_DISK_USAGE.average()); diff --git a/metrics/src/test/java/ai/vespa/metrics/MetricSetTest.java b/metrics/src/test/java/ai/vespa/metrics/MetricSetTest.java new file mode 100644 index 00000000000..788e9e9836c --- /dev/null +++ b/metrics/src/test/java/ai/vespa/metrics/MetricSetTest.java @@ -0,0 +1,114 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package ai.vespa.metrics; + +import ai.vespa.metrics.set.Metric; +import ai.vespa.metrics.set.MetricSet; +import com.google.common.collect.Sets; +import org.junit.jupiter.api.Test; + +import java.util.EnumSet; +import java.util.List; +import java.util.Map; + +import static java.util.Collections.emptyList; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +/** + * @author gjoranv + */ +public class MetricSetTest { + + @Test + void metrics_from_children_are_added() { + MetricSet child1 = new MetricSet("child1", List.of(new Metric("child1_metric"))); + MetricSet child2 = new MetricSet("child2", List.of(new Metric("child2_metric"))); + MetricSet parent = new MetricSet("parent", emptyList(), List.of(child1, child2)); + + Map<String, Metric> parentMetrics = parent.getMetrics(); + assertEquals(2, parentMetrics.size()); + assertNotNull(parentMetrics.get("child1_metric")); + assertNotNull(parentMetrics.get("child2_metric")); + } + + @Test + void adding_the_same_child_set_twice_has_no_effect() { + MetricSet child = new MetricSet("child", List.of(new Metric("child_metric"))); + MetricSet parent = new MetricSet("parent", emptyList(), List.of(child, child)); + + Map<String, Metric> parentMetrics = parent.getMetrics(); + assertEquals(1, parentMetrics.size()); + assertNotNull(parentMetrics.get("child_metric")); + } + + @Test + void internal_metrics_take_precedence_over_metrics_from_children() { + String METRIC_NAME = "metric1"; + String COMMON_DIMENSION_KEY = "commonKey"; + + Map<String, String> childDimensions = Map.of(COMMON_DIMENSION_KEY, "childCommonVal", "childKey", "childVal"); + Metric childMetric = new Metric(METRIC_NAME, "child-output-name", "child-description", childDimensions); + + Map<String, String> parentDimensions = Map.of(COMMON_DIMENSION_KEY, "parentCommonVal","parentKey", "parentVal"); + Metric parentMetric = new Metric(METRIC_NAME, "parent-output-name", "parent-description", parentDimensions); + + MetricSet child = new MetricSet("set1", Sets.newHashSet(childMetric)); + MetricSet parent = new MetricSet("set1", Sets.newHashSet(parentMetric), Sets.newHashSet(child)); + + Metric combinedMetric = parent.getMetrics().get(METRIC_NAME); + assertEquals("parent-output-name", combinedMetric.outputName); + assertEquals("parent-description", combinedMetric.description); + assertEquals(3, combinedMetric.dimensions.size()); + assertEquals("parentCommonVal", combinedMetric.dimensions.get(COMMON_DIMENSION_KEY)); + } + + @Test + void it_can_be_generated_from_builder() { + MetricSet metricSet = new MetricSet.Builder("test") + .metric("metric1") + .metric(TestMetrics.ENUM_METRIC1.last()) + .metric(TestMetrics.ENUM_METRIC2, EnumSet.of(Suffix.sum, Suffix.count)) + .metric(new Metric("metric2")) + .metrics(List.of(new Metric("metric3"))) + .metricSet(new MetricSet.Builder("child") + .metric("child_metric1") + .metric("child_metric2") + .build()) + .build(); + + Map<String, Metric> metrics = metricSet.getMetrics(); + assertEquals(8, metrics.size()); + assertNotNull(metrics.get("metric1")); + assertNotNull(metrics.get("emum-metric1.last")); + assertNotNull(metrics.get("emum-metric2.sum")); + assertNotNull(metrics.get("emum-metric2.count")); + assertNotNull(metrics.get("metric2")); + assertNotNull(metrics.get("metric3")); + assertNotNull(metrics.get("child_metric1")); + assertNotNull(metrics.get("child_metric1")); + } + + enum TestMetrics implements VespaMetrics { + ENUM_METRIC1("emum-metric1"), + ENUM_METRIC2("emum-metric2"); + + private final String name; + + TestMetrics(String name) { + this.name = name; + } + + public String baseName() { + return name; + } + + public Unit unit() { + return null; + } + + public String description() { + return null; + } + + } +} diff --git a/metrics/src/test/java/ai/vespa/metrics/MetricTest.java b/metrics/src/test/java/ai/vespa/metrics/MetricTest.java new file mode 100644 index 00000000000..7a0b85f82cc --- /dev/null +++ b/metrics/src/test/java/ai/vespa/metrics/MetricTest.java @@ -0,0 +1,39 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package ai.vespa.metrics; + +import ai.vespa.metrics.set.Metric; +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * @author gjoranv + */ +public class MetricTest { + + @Test + void this_metric_takes_precedence_when_combined_with_another_metric() { + String COMMON_DIMENSION_KEY = "commonKey"; + + Map<String, String> thisDimensions = ImmutableMap.<String, String>builder() + .put(COMMON_DIMENSION_KEY, "thisCommonVal") + .put("thisKey", "thisVal") + .build(); + Metric thisMetric = new Metric("thisMetric", "this-output-name", "this-description", thisDimensions); + + Map<String, String> thatDimensions = ImmutableMap.<String, String>builder() + .put(COMMON_DIMENSION_KEY, "thatCommonVal") + .put("thatKey", "thatVal") + .build(); + Metric thatMetric = new Metric("thatMetric", "that-output-name", "that-description", thatDimensions); + + Metric combinedMetric = thisMetric.addDimensionsFrom(thatMetric); + assertEquals("this-output-name", combinedMetric.outputName); + assertEquals("this-description", combinedMetric.description); + assertEquals(3, combinedMetric.dimensions.size()); + assertEquals("thisCommonVal", combinedMetric.dimensions.get(COMMON_DIMENSION_KEY)); + } +} diff --git a/metrics/src/tests/metricmanagertest.cpp b/metrics/src/tests/metricmanagertest.cpp index 9e6b0f40be3..a6dd141576f 100644 --- a/metrics/src/tests/metricmanagertest.cpp +++ b/metrics/src/tests/metricmanagertest.cpp @@ -462,6 +462,7 @@ TEST_F(MetricManagerTest, test_snapshots) { MetricLockGuard lockGuard(mm.getMetricLock()); mm.registerMetric(lockGuard, mySet.set); + EXPECT_FALSE(mm.any_snapshots_taken(lockGuard)); // well-defined prior to init() } mm.init(ConfigUri("raw:" "consumer[2]\n" @@ -474,6 +475,7 @@ TEST_F(MetricManagerTest, test_snapshots) MetricNameVisitor visitor; { MetricLockGuard lockGuard(mm.getMetricLock()); + EXPECT_FALSE(mm.any_snapshots_taken(lockGuard)); // No snapshots yet mm.visit(lockGuard, mm.getActiveMetrics(lockGuard), visitor, "snapper"); const MetricManager::ConsumerSpec * consumerSpec = mm.getConsumerSpec(lockGuard, "snapper"); EXPECT_EQ(std::string("\n" @@ -506,6 +508,10 @@ TEST_F(MetricManagerTest, test_snapshots) ASSERT_VALUES(mm, 5 * 60s, "2,4,4,1,7,9,1,1,8,2,10"); ASSERT_VALUES(mm, 60 * 60s, ""); ASSERT_VALUES(mm, 0 * 60s, "2,4,4,1,7,9,1,1,8,2,10"); + { + auto guard = mm.getMetricLock(); + EXPECT_TRUE(mm.any_snapshots_taken(guard)); // At least one snapshot has been taken + } // Adding metrics done in second five minute period. Total should // be updated to account for both @@ -567,6 +573,14 @@ TEST_F(MetricManagerTest, test_json_output) "consumer[0].tags[1]\n" "consumer[0].tags[0] snaptest\n")); + { + // No snapshots have been taken yet, so the non-total getMetrics call should return + // the empty string (i.e. no metrics produced). + metrics::StateApiAdapter adapter(mm); + auto json_str = adapter.getMetrics("snapper"); + EXPECT_EQ(json_str, ""); + } + takeSnapshots(mm, 1000); // Adding metrics to have some values in them diff --git a/metrics/src/vespa/metrics/countmetric.h b/metrics/src/vespa/metrics/countmetric.h index 5225cd58f12..fe1f613fbf7 100644 --- a/metrics/src/vespa/metrics/countmetric.h +++ b/metrics/src/vespa/metrics/countmetric.h @@ -19,7 +19,7 @@ namespace metrics { struct AbstractCountMetric : public Metric { - bool visit(MetricVisitor& visitor, bool tagAsAutoGenerated = false) const override + bool visit(MetricVisitor& visitor, bool tagAsAutoGenerated) const override { return visitor.visitCountMetric(*this, tagAsAutoGenerated); } diff --git a/metrics/src/vespa/metrics/jsonwriter.h b/metrics/src/vespa/metrics/jsonwriter.h index e4a2e7ca10b..379ed215198 100644 --- a/metrics/src/vespa/metrics/jsonwriter.h +++ b/metrics/src/vespa/metrics/jsonwriter.h @@ -18,7 +18,7 @@ class JsonWriter : public MetricVisitor, public vespalib::JsonStreamTypes { uint64_t _period; public: - JsonWriter(vespalib::JsonStream&); + explicit JsonWriter(vespalib::JsonStream&); private: bool visitSnapshot(const MetricSnapshot&) override; @@ -29,7 +29,6 @@ private: bool visitValueMetric(const AbstractValueMetric&, bool autoGenerated) override; void doneVisiting() override; - void checkIfArrayNeedsToBeStarted(); void writeCommonPrefix(const Metric& m); void writeCommonPostfix(const Metric& m); @@ -37,7 +36,7 @@ private: void writeInheritedDimensions(); void writeMetricSpecificDimensions(const Metric&); - bool isLeafMetric(const Metric& m) const { return !m.isMetricSet(); } + static bool isLeafMetric(const Metric& m) { return !m.isMetricSet(); } }; } diff --git a/metrics/src/vespa/metrics/metricmanager.cpp b/metrics/src/vespa/metrics/metricmanager.cpp index 2f6fe4c6ba6..fa18ddc383b 100644 --- a/metrics/src/vespa/metrics/metricmanager.cpp +++ b/metrics/src/vespa/metrics/metricmanager.cpp @@ -168,6 +168,11 @@ MetricManager::isInitialized() const { return static_cast<bool>(_configHandle); } +bool +MetricManager::any_snapshots_taken(const MetricLockGuard&) const noexcept { + return (!_snapshots.empty() && _snapshots[0]->current_is_assigned()); +} + void MetricManager::init(const config::ConfigUri & uri, bool startThread) { @@ -200,7 +205,7 @@ namespace { struct Path { vespalib::StringTokenizer _path; - Path(vespalib::stringref fullpath) : _path(fullpath, ".") { } + explicit Path(vespalib::stringref fullpath) : _path(fullpath, ".") { } vespalib::string toString() const { vespalib::asciistream ost; @@ -246,7 +251,7 @@ struct ConsumerMetricBuilder : public MetricVisitor { }; std::list<Result> result; - ConsumerMetricBuilder(const Config::Consumer& c) __attribute__((noinline)); + explicit ConsumerMetricBuilder(const Config::Consumer& c) __attribute__((noinline)); ~ConsumerMetricBuilder() __attribute__((noinline)); bool tagAdded(const Metric& metric) { @@ -486,7 +491,7 @@ MetricManager::configure(const MetricLockGuard & , std::unique_ptr<Config> confi _totalMetrics = std::make_shared<MetricSnapshot>("All time snapshot", 0s, _activeMetrics.getMetrics(), _snapshotUnsetMetrics); _totalMetrics->reset(currentTime); } - if (_config.get() == 0 || (_config->consumer.size() != config->consumer.size())) { + if ( !_config || (_config->consumer.size() != config->consumer.size())) { _consumerConfigChanged = true; } else { for (uint32_t i=0; i<_config->consumer.size(); ++i) { @@ -553,7 +558,7 @@ MetricManager::visit(const MetricLockGuard & guard, const MetricSnapshot& snapsh MetricVisitor& visitor, const std::string& consumer) const { if (visitor.visitSnapshot(snapshot)) { - if (consumer == "") { + if (consumer.empty()) { snapshot.getMetrics().visit(visitor); } else { const ConsumerSpec * consumerSpec = getConsumerSpec(guard, consumer); @@ -795,6 +800,7 @@ MetricManager::takeSnapshots(const MetricLockGuard & guard, system_time timeToPr _activeMetrics.addToSnapshot(firstTarget, false, timeToProcess); _activeMetrics.addToSnapshot(*_totalMetrics, false, timeToProcess); _activeMetrics.reset(timeToProcess); + _snapshots[0]->tag_current_as_assigned(); LOG(debug, "After snapshotting, active metrics goes from %s to %s, and 5 minute metrics goes from %s to %s.", to_string(_activeMetrics.getFromTime()).c_str(), to_string(_activeMetrics.getToTime()).c_str(), to_string(firstTarget.getFromTime()).c_str(), to_string(firstTarget.getToTime()).c_str()); diff --git a/metrics/src/vespa/metrics/metricmanager.h b/metrics/src/vespa/metrics/metricmanager.h index 6f40e7961f4..cfb3ab2137a 100644 --- a/metrics/src/vespa/metrics/metricmanager.h +++ b/metrics/src/vespa/metrics/metricmanager.h @@ -123,7 +123,7 @@ private: public: MetricManager(); - MetricManager(std::unique_ptr<Timer> timer); + explicit MetricManager(std::unique_ptr<Timer> timer); ~MetricManager(); void stop(); @@ -218,7 +218,7 @@ public: * snapshots while you are accessing them. */ MetricLockGuard getMetricLock() const { - return MetricLockGuard(_waiter); + return {_waiter}; } /** While accessing the active metrics you should have the metric lock. */ @@ -267,6 +267,8 @@ public: bool isInitialized() const; + [[nodiscard]] bool any_snapshots_taken(const MetricLockGuard&) const noexcept; + private: void takeSnapshots(const MetricLockGuard &, system_time timeToProcess); diff --git a/metrics/src/vespa/metrics/metricsnapshot.cpp b/metrics/src/vespa/metrics/metricsnapshot.cpp index 6bcdcc60995..104ad858e43 100644 --- a/metrics/src/vespa/metrics/metricsnapshot.cpp +++ b/metrics/src/vespa/metrics/metricsnapshot.cpp @@ -32,7 +32,7 @@ MetricSnapshot::MetricSnapshot(const Metric::String& name, system_time::duration _snapshot(), _metrics() { - _snapshot.reset(source.clone(_metrics, Metric::INACTIVE, 0, copyUnset)); + _snapshot.reset(source.clone(_metrics, Metric::INACTIVE, nullptr, copyUnset)); _metrics.shrink_to_fit(); } @@ -54,7 +54,7 @@ void MetricSnapshot::recreateSnapshot(const MetricSet& metrics, bool copyUnset) { std::vector<Metric::UP> newMetrics; - Metric* m = metrics.clone(newMetrics, Metric::INACTIVE, 0, copyUnset); + Metric* m = metrics.clone(newMetrics, Metric::INACTIVE, nullptr, copyUnset); assert(m->isMetricSet()); std::unique_ptr<MetricSet> newSnapshot(static_cast<MetricSet*>(m)); newSnapshot->reset(); @@ -78,12 +78,15 @@ MetricSnapshotSet::MetricSnapshotSet(const Metric::String& name, system_time::du : _count(count), _builderCount(0), _current(std::make_unique<MetricSnapshot>(name, period, source, snapshotUnsetMetrics)), - _building(count == 1 ? nullptr : new MetricSnapshot(name, period, source, snapshotUnsetMetrics)) + _building(count == 1 ? nullptr : new MetricSnapshot(name, period, source, snapshotUnsetMetrics)), + _current_is_assigned(false) { _current->reset(); - if (_building.get()) _building->reset(); + if (_building) _building->reset(); } +MetricSnapshotSet::~MetricSnapshotSet() = default; + MetricSnapshot& MetricSnapshotSet::getNextTarget() { diff --git a/metrics/src/vespa/metrics/metricsnapshot.h b/metrics/src/vespa/metrics/metricsnapshot.h index 859ee4a4a97..a6a68b43015 100644 --- a/metrics/src/vespa/metrics/metricsnapshot.h +++ b/metrics/src/vespa/metrics/metricsnapshot.h @@ -71,15 +71,17 @@ public: }; class MetricSnapshotSet { - uint32_t _count; // Number of times we need to add to building period - // before we have a full time window. + const uint32_t _count; // Number of times we need to add to building period + // before we have a full time window. uint32_t _builderCount; // Number of times we've currently added to the // building instance. std::unique_ptr<MetricSnapshot> _current; // The last full period std::unique_ptr<MetricSnapshot> _building; // The building period + bool _current_is_assigned; public: MetricSnapshotSet(const Metric::String& name, system_time::duration period, uint32_t count, const MetricSet& source, bool snapshotUnsetMetrics); + ~MetricSnapshotSet(); const Metric::String& getName() const { return _current->getName(); } system_time::duration getPeriod() const { return _current->getPeriod(); } @@ -94,9 +96,7 @@ public: MetricSnapshot& getSnapshot(bool temporary) { return *((temporary && _count > 1) ? _building : _current); } - const MetricSnapshot& getSnapshot() const { - return getSnapshot(false); - } + const MetricSnapshot& getSnapshot(bool temporary) const { return *((temporary && _count > 1) ? _building : _current); } @@ -111,6 +111,13 @@ public: void recreateSnapshot(const MetricSet& metrics, bool copyUnset); void addMemoryUsage(MemoryConsumption&) const; void setFromTime(system_time fromTime); + + [[nodiscard]] bool current_is_assigned() const noexcept { + return _current_is_assigned; + } + void tag_current_as_assigned() noexcept { + _current_is_assigned = true; + } }; } // metrics diff --git a/metrics/src/vespa/metrics/state_api_adapter.cpp b/metrics/src/vespa/metrics/state_api_adapter.cpp index 136ccf6e06a..6c0cb9a6013 100644 --- a/metrics/src/vespa/metrics/state_api_adapter.cpp +++ b/metrics/src/vespa/metrics/state_api_adapter.cpp @@ -11,8 +11,8 @@ StateApiAdapter::getMetrics(const vespalib::string &consumer) { MetricLockGuard guard(_manager.getMetricLock()); auto periods = _manager.getSnapshotPeriods(guard); - if (periods.empty()) { - return ""; // no configuration yet + if (periods.empty() || !_manager.any_snapshots_taken(guard)) { + return ""; // no configuration or snapshots yet } const MetricSnapshot &snapshot(_manager.getMetricSnapshot(guard, periods[0])); vespalib::asciistream json; diff --git a/metrics/src/vespa/metrics/textwriter.h b/metrics/src/vespa/metrics/textwriter.h index f23d1cf585c..7feffdf22fc 100644 --- a/metrics/src/vespa/metrics/textwriter.h +++ b/metrics/src/vespa/metrics/textwriter.h @@ -19,7 +19,7 @@ class TextWriter : public MetricVisitor { public: TextWriter(std::ostream& out, vespalib::duration period, const std::string& regex, bool verbose); - ~TextWriter(); + ~TextWriter() override; bool visitSnapshot(const MetricSnapshot&) override; void doneVisitingSnapshot(const MetricSnapshot&) override; |