diff options
author | yngveaasheim <yngve@yahooinc.com> | 2023-03-01 16:58:07 +0100 |
---|---|---|
committer | yngveaasheim <yngve@yahooinc.com> | 2023-03-03 08:00:34 +0100 |
commit | a09c1276f2cdb952a1155852a96c8f9955002421 (patch) | |
tree | 6313839c60e6e583d5d9e519f6a2c8fd2774779b | |
parent | 0295dd9d2841c45fb02eb97c8c2e87e4fd9c9120 (diff) |
Add enums for most remaining metrics in VespaMetricSet
5 files changed, 98 insertions, 51 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java index 000ac92a8a0..091c0559d71 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -5,6 +5,10 @@ import com.yahoo.metrics.ContainerMetrics; import com.yahoo.metrics.DistributorMetrics; import com.yahoo.metrics.SearchNodeMetrics; import com.yahoo.metrics.StorageMetrics; +import com.yahoo.metrics.LogdMetrics; +import com.yahoo.metrics.SentinelMetrics; +import com.yahoo.metrics.SlobrokMetrics; +import com.yahoo.metrics.ConfigServerMetrics; import com.yahoo.metrics.Suffix; import java.util.Collections; @@ -55,12 +59,10 @@ public class VespaMetricSet { private static Set<Metric> getSentinelMetrics() { Set<Metric> metrics = new LinkedHashSet<>(); - addMetric(metrics, "sentinel.restarts.count"); - addMetric(metrics, "sentinel.totalRestarts.last"); - addMetric(metrics, "sentinel.uptime.last"); - - addMetric(metrics, "sentinel.running.count"); - addMetric(metrics, "sentinel.running.last"); + addMetric(metrics, SentinelMetrics.SENTINEL_RESTARTS.count()); + addMetric(metrics, SentinelMetrics.SENTINEL_TOTAL_RESTARTS.last()); + addMetric(metrics, SentinelMetrics.SENTINEL_UPTIME.last()); + addMetric(metrics, SentinelMetrics.SENTINEL_RUNNING, EnumSet.of(count, last)); return metrics; } @@ -68,36 +70,36 @@ public class VespaMetricSet { private static Set<Metric> getOtherMetrics() { Set<Metric> metrics = new LinkedHashSet<>(); - addMetric(metrics, "slobrok.heartbeats.failed.count"); - addMetric(metrics, "slobrok.missing.consensus.count"); + addMetric(metrics, SlobrokMetrics.SLOBROK_HEARTBEATS_FAILED.count()); + addMetric(metrics, SlobrokMetrics.SLOBROK_MISSING_CONSENSUS.count()); - addMetric(metrics, "logd.processed.lines.count"); - addMetric(metrics, "worker.connections.max"); + addMetric(metrics, LogdMetrics.LOGD_PROCESSED_LINES.count()); + addMetric(metrics, "worker.connections.max"); // Internal (routing layer) addMetric(metrics, "endpoint.certificate.expiry.seconds"); // Java (JRT) TLS metrics - addMetric(metrics, "jrt.transport.tls-certificate-verification-failures"); - addMetric(metrics, "jrt.transport.peer-authorization-failures"); - addMetric(metrics, "jrt.transport.server.tls-connections-established"); - addMetric(metrics, "jrt.transport.client.tls-connections-established"); - addMetric(metrics, "jrt.transport.server.unencrypted-connections-established"); - addMetric(metrics, "jrt.transport.client.unencrypted-connections-established"); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES.baseName()); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES.baseName()); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_SERVER_TLS_CONNECIONTS_ESTABLISHED.baseName()); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_CLIENT_TLS_CONNECTIONS_ESTABLISHED.baseName()); + addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_SERVER_UNENCRYPTED_CONNECTIONS_ESTABLISHED.baseName()); + addMetric(metrics, ContainerMetrics. JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED. baseName()); // C++ TLS metrics - addMetric(metrics, "vds.server.network.tls-handshakes-failed"); - addMetric(metrics, "vds.server.network.peer-authorization-failures"); - addMetric(metrics, "vds.server.network.client.tls-connections-established"); - addMetric(metrics, "vds.server.network.server.tls-connections-established"); - addMetric(metrics, "vds.server.network.client.insecure-connections-established"); - addMetric(metrics, "vds.server.network.server.insecure-connections-established"); - addMetric(metrics, "vds.server.network.tls-connections-broken"); - addMetric(metrics, "vds.server.network.failed-tls-config-reloads"); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_TLS_HANDSHAKES_FAILED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_PEER_AUTHORIZATION_FAILURES.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_CLIENT_TLS_CONNECTIONS_ESTABLISHED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_SERVER_TLS_CONNECTIONS_ESTABLISHED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_CLIENT_INSECURE_CONNECTIONS_ESTABLISHED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_SERVER_INSECURE_CONNECTIONS_ESTABLISHED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_TLS_CONNECTIONS_BROKEN.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_FAILED_TLS_CONFIG_RELOADS.count()); // C++ capability metrics - addMetric(metrics, "vds.server.network.rpc-capability-checks-failed"); - addMetric(metrics, "vds.server.network.status-capability-checks-failed"); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_RPC_CAPABILITY_CHECKS_FAILED.count()); + addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_STATUS_CAPABILITY_CHECKS_FAILED.count()); // C++ Fnet metrics - addMetric(metrics, "vds.server.fnet.num-connections"); + addMetric(metrics, StorageMetrics.VDS_SERVER_FNET_NUM_CONNECTIONS.count()); // Node certificate addMetric(metrics, "node-certificate.expiry.seconds"); @@ -108,22 +110,20 @@ public class VespaMetricSet { private static Set<Metric> getConfigServerMetrics() { Set<Metric> metrics =new LinkedHashSet<>(); - addMetric(metrics, "configserver.requests.count"); - addMetric(metrics, "configserver.failedRequests.count"); - addMetric(metrics, "configserver.latency.max"); - addMetric(metrics, "configserver.latency.sum"); - addMetric(metrics, "configserver.latency.count"); - addMetric(metrics, "configserver.cacheConfigElems.last"); - addMetric(metrics, "configserver.cacheChecksumElems.last"); - addMetric(metrics, "configserver.hosts.last"); - addMetric(metrics, "configserver.delayedResponses.count"); - addMetric(metrics, "configserver.sessionChangeErrors.count"); - - addMetric(metrics, "configserver.zkZNodes.last"); - addMetric(metrics, "configserver.zkAvgLatency.last"); - addMetric(metrics, "configserver.zkMaxLatency.last"); - addMetric(metrics, "configserver.zkConnections.last"); - addMetric(metrics, "configserver.zkOutstandingRequests.last"); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_REQUESTS.count()); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_FAILED_REQUESTS.count()); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_CACHE_CONFIG_ELEMS.last()); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_CACHE_CHECKSUM_ELEMS.last()); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_HOSTS.last()); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_DELAYED_RESPONSES.count()); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_SESSION_CHANGE_ERRORS.count()); + + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_ZK_Z_NODES.last()); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_ZK_AVG_LATENCY.last()); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_ZK_MAX_LATENCY.last()); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_ZK_CONNECTIONS.last()); + addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_ZK_OUTSTANDING_REQUESTS.last()); return metrics; } @@ -721,6 +721,18 @@ public class VespaMetricSet { private static void addMetric(Set<Metric> metrics, DistributorMetrics metric, EnumSet<Suffix> suffixes) { suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); } + private static void addMetric(Set<Metric> metrics, SentinelMetrics metric, EnumSet<Suffix> suffixes) { + suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + } + private static void addMetric(Set<Metric> metrics, SlobrokMetrics metric, EnumSet<Suffix> suffixes) { + suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + } + private static void addMetric(Set<Metric> metrics, LogdMetrics metric, EnumSet<Suffix> suffixes) { + suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + } + private static void addMetric(Set<Metric> metrics, ConfigServerMetrics metric, EnumSet<Suffix> suffixes) { + suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + } private static void addMetric(Set<Metric> metrics, String metricName, Iterable<String> aggregateSuffices) { for (String suffix : aggregateSuffices) { metrics.add(new Metric(metricName + "." + suffix)); diff --git a/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java b/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java index 0b61c937cb8..9c0b8cd7d71 100644 --- a/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java +++ b/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java @@ -184,7 +184,15 @@ public enum ContainerMetrics implements VespaMetrics { CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_DISK_UTILIZATION("cluster-controller.resource_usage.max_disk_utilization", Unit.FRACTION, "Current disk space utilisation, per content node"), CLUSTER_CONTROLLER_RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"), CLUSTER_CONTROLLER_RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"), - CLUSTER_CONTROLLER_REINDEXING_PROGRESS("reindexing.progress", Unit.FRACTION, "Re-indexing progress"); + CLUSTER_CONTROLLER_REINDEXING_PROGRESS("reindexing.progress", Unit.FRACTION, "Re-indexing progress"), + + // Java (JRT) TLS metrics + JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES("jrt.transport.tls-certificate-verification-failures", Unit.FAILURE, "TLS certificate verification failures"), + JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES("jrt.transport.peer-authorization-failures", Unit.FAILURE, "TLS peer authorization failures"), + JRT_TRANSPORT_SERVER_TLS_CONNECIONTS_ESTABLISHED("jrt.transport.server.tls-connections-established", Unit.CONNECTION, "TLS server connections established"), + JRT_TRANSPORT_CLIENT_TLS_CONNECTIONS_ESTABLISHED("jrt.transport.client.tls-connections-established", Unit.CONNECTION, "TLS client connections established"), + JRT_TRANSPORT_SERVER_UNENCRYPTED_CONNECTIONS_ESTABLISHED("jrt.transport.server.unencrypted-connections-established", Unit.CONNECTION, "Unencrypted server connections established"), + JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED("jrt.transport.client.unencrypted-connections-established", Unit.CONNECTION, "Unencrypted client connections established"); private final String name; diff --git a/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java b/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java index 2a59e5a9d92..d67b67d04b7 100644 --- a/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java +++ b/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java @@ -75,7 +75,26 @@ public enum StorageMetrics implements VespaMetrics { VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_OK("vds.mergethrottler.locallyexecutedmerges.ok", Unit.INSTANCE, "The number of successful merges for 'locallyexecutedmerges'"), VDS_MERGETHROTTLER_MERGECHAINS_OK("vds.mergethrottler.mergechains.ok", Unit.INSTANCE, "The number of successful merges for 'mergechains'"), VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_BUSY("vds.mergethrottler.mergechains.failures.busy", Unit.INSTANCE, "The number of merges that failed because the storage node was busy"), - VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_TOTAL("vds.mergethrottler.mergechains.failures.total", Unit.INSTANCE, "Sum of all failures"); + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_TOTAL("vds.mergethrottler.mergechains.failures.total", Unit.INSTANCE, "Sum of all failures"), + + + // C++ TLS metrics - these come from both the distributor and storage + VDS_SERVER_NETWORK_TLS_HANDSHAKES_FAILED("vds.server.network.tls-handshakes-failed", Unit.OPERATION, "Number of client or server connection attempts that failed during TLS handshaking"), + VDS_SERVER_NETWORK_PEER_AUTHORIZATION_FAILURES("vds.server.network.peer-authorization-failures", Unit.FAILURE, "Number of TLS connection attempts failed due to bad or missing peer certificate credentials"), + VDS_SERVER_NETWORK_CLIENT_TLS_CONNECTIONS_ESTABLISHED("vds.server.network.client.tls-connections-established", Unit.CONNECTION, "Number of secure mTLS connections established"), + VDS_SERVER_NETWORK_SERVER_TLS_CONNECTIONS_ESTABLISHED("vds.server.network.server.tls-connections-established", Unit.CONNECTION, "Number of secure mTLS connections established"), + VDS_SERVER_NETWORK_CLIENT_INSECURE_CONNECTIONS_ESTABLISHED("vds.server.network.client.insecure-connections-established", Unit.CONNECTION, "Number of insecure (plaintext) connections established"), + VDS_SERVER_NETWORK_SERVER_INSECURE_CONNECTIONS_ESTABLISHED("vds.server.network.server.insecure-connections-established", Unit.CONNECTION, "Number of insecure (plaintext) connections established"), + VDS_SERVER_NETWORK_TLS_CONNECTIONS_BROKEN("vds.server.network.tls-connections-broken", Unit.CONNECTION, "Number of TLS connections broken due to failures during frame encoding or decoding"), + VDS_SERVER_NETWORK_FAILED_TLS_CONFIG_RELOADS("vds.server.network.failed-tls-config-reloads", Unit.FAILURE, "Number of times background reloading of TLS config has failed"), + + // C++ capability metrics + VDS_SERVER_NETWORK_RPC_CAPABILITY_CHECKS_FAILED("vds.server.network.rpc-capability-checks-failed", Unit.FAILURE, "Number of RPC operations that failed to due one or more missing capabilities"), + VDS_SERVER_NETWORK_STATUS_CAPABILITY_CHECKS_FAILED("vds.server.network.status-capability-checks-failed", Unit.FAILURE, "Number of status page operations that failed to due one or more missing capabilities"), + + // C++ Fnet metrics + VDS_SERVER_FNET_NUM_CONNECTIONS("vds.server.fnet.num-connections", Unit.CONNECTION, "Total number of connection objects"); + private final String name; private final Unit unit; diff --git a/container-core/src/main/java/com/yahoo/metrics/Unit.java b/container-core/src/main/java/com/yahoo/metrics/Unit.java index bb7718ddb4c..d3f139cd976 100644 --- a/container-core/src/main/java/com/yahoo/metrics/Unit.java +++ b/container-core/src/main/java/com/yahoo/metrics/Unit.java @@ -12,6 +12,7 @@ public enum Unit { CONNECTION(BaseUnit.CONNECTION), DOCUMENT(BaseUnit.DOCUMENT), DOCUMENTID(BaseUnit.DOCUMENTID), + FAILURE(BaseUnit.FAILURE), FILE(BaseUnit.FILE), FRACTION(BaseUnit.FRACTION), HIT(BaseUnit.HIT), @@ -28,8 +29,10 @@ public enum Unit { RECORD(BaseUnit.RECORD), REQUEST(BaseUnit.REQUEST), RESPONSE(BaseUnit.RESPONSE), + RESTART(BaseUnit.RESTART), SCORE(BaseUnit.SCORE), SECOND(BaseUnit.SECOND), + SESSION(BaseUnit.SESSION), TASK(BaseUnit.TASK), THREAD(BaseUnit.THREAD), VERSION(BaseUnit.VERSION), @@ -69,6 +72,7 @@ public enum Unit { CONNECTION("connection"), DOCUMENT("document"), DOCUMENTID("documentid"), + FAILURE("failure"), FILE("file"), FRACTION("fraction"), HIT("hit"), @@ -82,8 +86,10 @@ public enum Unit { RECORD("record"), REQUEST("request"), RESPONSE("response"), + RESTART("restart"), SCORE("score"), SECOND("second", "s"), + SESSION("session"), TASK("task"), THREAD("thread"), VERSION("version"), diff --git a/container-disc/src/main/java/com/yahoo/container/jdisc/metric/JrtMetrics.java b/container-disc/src/main/java/com/yahoo/container/jdisc/metric/JrtMetrics.java index ca6b41962fe..80949df9ee9 100644 --- a/container-disc/src/main/java/com/yahoo/container/jdisc/metric/JrtMetrics.java +++ b/container-disc/src/main/java/com/yahoo/container/jdisc/metric/JrtMetrics.java @@ -1,8 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.container.jdisc.metric; +import com.yahoo.jdisc.Container; import com.yahoo.jdisc.Metric; import com.yahoo.jrt.TransportMetrics; +import com.yahoo.metrics.ContainerMetrics; import static com.yahoo.jrt.TransportMetrics.Snapshot; @@ -24,12 +26,12 @@ class JrtMetrics { void emitMetrics() { Snapshot snapshot = transportMetrics.snapshot(); Snapshot changesSincePrevious = snapshot.changesSince(previousSnapshot); - increment("jrt.transport.tls-certificate-verification-failures", changesSincePrevious.tlsCertificateVerificationFailures()); - increment("jrt.transport.peer-authorization-failures", changesSincePrevious.peerAuthorizationFailures()); - increment("jrt.transport.server.tls-connections-established", changesSincePrevious.serverTlsConnectionsEstablished()); - increment("jrt.transport.client.tls-connections-established", changesSincePrevious.clientTlsConnectionsEstablished()); - increment("jrt.transport.server.unencrypted-connections-established", changesSincePrevious.serverUnencryptedConnectionsEstablished()); - increment("jrt.transport.client.unencrypted-connections-established", changesSincePrevious.clientUnencryptedConnectionsEstablished()); + increment(ContainerMetrics.JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES.baseName(), changesSincePrevious.tlsCertificateVerificationFailures()); + increment(ContainerMetrics.JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES.baseName(), changesSincePrevious.peerAuthorizationFailures()); + increment(ContainerMetrics.JRT_TRANSPORT_SERVER_TLS_CONNECIONTS_ESTABLISHED.baseName(), changesSincePrevious.serverTlsConnectionsEstablished()); + increment(ContainerMetrics.JRT_TRANSPORT_CLIENT_TLS_CONNECTIONS_ESTABLISHED.baseName(), changesSincePrevious.clientTlsConnectionsEstablished()); + increment(ContainerMetrics.JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED.baseName(), changesSincePrevious.serverUnencryptedConnectionsEstablished()); + increment(ContainerMetrics.JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED.baseName(), changesSincePrevious.clientUnencryptedConnectionsEstablished()); previousSnapshot = snapshot; } |