summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java98
-rw-r--r--container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java10
-rw-r--r--container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java21
-rw-r--r--container-core/src/main/java/com/yahoo/metrics/Unit.java6
-rw-r--r--container-disc/src/main/java/com/yahoo/container/jdisc/metric/JrtMetrics.java14
5 files changed, 98 insertions, 51 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
index 000ac92a8a0..091c0559d71 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
@@ -5,6 +5,10 @@ import com.yahoo.metrics.ContainerMetrics;
import com.yahoo.metrics.DistributorMetrics;
import com.yahoo.metrics.SearchNodeMetrics;
import com.yahoo.metrics.StorageMetrics;
+import com.yahoo.metrics.LogdMetrics;
+import com.yahoo.metrics.SentinelMetrics;
+import com.yahoo.metrics.SlobrokMetrics;
+import com.yahoo.metrics.ConfigServerMetrics;
import com.yahoo.metrics.Suffix;
import java.util.Collections;
@@ -55,12 +59,10 @@ public class VespaMetricSet {
private static Set<Metric> getSentinelMetrics() {
Set<Metric> metrics = new LinkedHashSet<>();
- addMetric(metrics, "sentinel.restarts.count");
- addMetric(metrics, "sentinel.totalRestarts.last");
- addMetric(metrics, "sentinel.uptime.last");
-
- addMetric(metrics, "sentinel.running.count");
- addMetric(metrics, "sentinel.running.last");
+ addMetric(metrics, SentinelMetrics.SENTINEL_RESTARTS.count());
+ addMetric(metrics, SentinelMetrics.SENTINEL_TOTAL_RESTARTS.last());
+ addMetric(metrics, SentinelMetrics.SENTINEL_UPTIME.last());
+ addMetric(metrics, SentinelMetrics.SENTINEL_RUNNING, EnumSet.of(count, last));
return metrics;
}
@@ -68,36 +70,36 @@ public class VespaMetricSet {
private static Set<Metric> getOtherMetrics() {
Set<Metric> metrics = new LinkedHashSet<>();
- addMetric(metrics, "slobrok.heartbeats.failed.count");
- addMetric(metrics, "slobrok.missing.consensus.count");
+ addMetric(metrics, SlobrokMetrics.SLOBROK_HEARTBEATS_FAILED.count());
+ addMetric(metrics, SlobrokMetrics.SLOBROK_MISSING_CONSENSUS.count());
- addMetric(metrics, "logd.processed.lines.count");
- addMetric(metrics, "worker.connections.max");
+ addMetric(metrics, LogdMetrics.LOGD_PROCESSED_LINES.count());
+ addMetric(metrics, "worker.connections.max"); // Internal (routing layer)
addMetric(metrics, "endpoint.certificate.expiry.seconds");
// Java (JRT) TLS metrics
- addMetric(metrics, "jrt.transport.tls-certificate-verification-failures");
- addMetric(metrics, "jrt.transport.peer-authorization-failures");
- addMetric(metrics, "jrt.transport.server.tls-connections-established");
- addMetric(metrics, "jrt.transport.client.tls-connections-established");
- addMetric(metrics, "jrt.transport.server.unencrypted-connections-established");
- addMetric(metrics, "jrt.transport.client.unencrypted-connections-established");
+ addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES.baseName());
+ addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES.baseName());
+ addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_SERVER_TLS_CONNECIONTS_ESTABLISHED.baseName());
+ addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_CLIENT_TLS_CONNECTIONS_ESTABLISHED.baseName());
+ addMetric(metrics, ContainerMetrics.JRT_TRANSPORT_SERVER_UNENCRYPTED_CONNECTIONS_ESTABLISHED.baseName());
+ addMetric(metrics, ContainerMetrics. JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED. baseName());
// C++ TLS metrics
- addMetric(metrics, "vds.server.network.tls-handshakes-failed");
- addMetric(metrics, "vds.server.network.peer-authorization-failures");
- addMetric(metrics, "vds.server.network.client.tls-connections-established");
- addMetric(metrics, "vds.server.network.server.tls-connections-established");
- addMetric(metrics, "vds.server.network.client.insecure-connections-established");
- addMetric(metrics, "vds.server.network.server.insecure-connections-established");
- addMetric(metrics, "vds.server.network.tls-connections-broken");
- addMetric(metrics, "vds.server.network.failed-tls-config-reloads");
+ addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_TLS_HANDSHAKES_FAILED.count());
+ addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_PEER_AUTHORIZATION_FAILURES.count());
+ addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_CLIENT_TLS_CONNECTIONS_ESTABLISHED.count());
+ addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_SERVER_TLS_CONNECTIONS_ESTABLISHED.count());
+ addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_CLIENT_INSECURE_CONNECTIONS_ESTABLISHED.count());
+ addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_SERVER_INSECURE_CONNECTIONS_ESTABLISHED.count());
+ addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_TLS_CONNECTIONS_BROKEN.count());
+ addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_FAILED_TLS_CONFIG_RELOADS.count());
// C++ capability metrics
- addMetric(metrics, "vds.server.network.rpc-capability-checks-failed");
- addMetric(metrics, "vds.server.network.status-capability-checks-failed");
+ addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_RPC_CAPABILITY_CHECKS_FAILED.count());
+ addMetric(metrics, StorageMetrics.VDS_SERVER_NETWORK_STATUS_CAPABILITY_CHECKS_FAILED.count());
// C++ Fnet metrics
- addMetric(metrics, "vds.server.fnet.num-connections");
+ addMetric(metrics, StorageMetrics.VDS_SERVER_FNET_NUM_CONNECTIONS.count());
// Node certificate
addMetric(metrics, "node-certificate.expiry.seconds");
@@ -108,22 +110,20 @@ public class VespaMetricSet {
private static Set<Metric> getConfigServerMetrics() {
Set<Metric> metrics =new LinkedHashSet<>();
- addMetric(metrics, "configserver.requests.count");
- addMetric(metrics, "configserver.failedRequests.count");
- addMetric(metrics, "configserver.latency.max");
- addMetric(metrics, "configserver.latency.sum");
- addMetric(metrics, "configserver.latency.count");
- addMetric(metrics, "configserver.cacheConfigElems.last");
- addMetric(metrics, "configserver.cacheChecksumElems.last");
- addMetric(metrics, "configserver.hosts.last");
- addMetric(metrics, "configserver.delayedResponses.count");
- addMetric(metrics, "configserver.sessionChangeErrors.count");
-
- addMetric(metrics, "configserver.zkZNodes.last");
- addMetric(metrics, "configserver.zkAvgLatency.last");
- addMetric(metrics, "configserver.zkMaxLatency.last");
- addMetric(metrics, "configserver.zkConnections.last");
- addMetric(metrics, "configserver.zkOutstandingRequests.last");
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_REQUESTS.count());
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_FAILED_REQUESTS.count());
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_LATENCY, EnumSet.of(max, sum, count));
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_CACHE_CONFIG_ELEMS.last());
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_CACHE_CHECKSUM_ELEMS.last());
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_HOSTS.last());
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_DELAYED_RESPONSES.count());
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_SESSION_CHANGE_ERRORS.count());
+
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_ZK_Z_NODES.last());
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_ZK_AVG_LATENCY.last());
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_ZK_MAX_LATENCY.last());
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_ZK_CONNECTIONS.last());
+ addMetric(metrics, ConfigServerMetrics.CONFIGSERVER_ZK_OUTSTANDING_REQUESTS.last());
return metrics;
}
@@ -721,6 +721,18 @@ public class VespaMetricSet {
private static void addMetric(Set<Metric> metrics, DistributorMetrics metric, EnumSet<Suffix> suffixes) {
suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix())));
}
+ private static void addMetric(Set<Metric> metrics, SentinelMetrics metric, EnumSet<Suffix> suffixes) {
+ suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix())));
+ }
+ private static void addMetric(Set<Metric> metrics, SlobrokMetrics metric, EnumSet<Suffix> suffixes) {
+ suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix())));
+ }
+ private static void addMetric(Set<Metric> metrics, LogdMetrics metric, EnumSet<Suffix> suffixes) {
+ suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix())));
+ }
+ private static void addMetric(Set<Metric> metrics, ConfigServerMetrics metric, EnumSet<Suffix> suffixes) {
+ suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix())));
+ }
private static void addMetric(Set<Metric> metrics, String metricName, Iterable<String> aggregateSuffices) {
for (String suffix : aggregateSuffices) {
metrics.add(new Metric(metricName + "." + suffix));
diff --git a/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java b/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java
index 0b61c937cb8..9c0b8cd7d71 100644
--- a/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java
+++ b/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java
@@ -184,7 +184,15 @@ public enum ContainerMetrics implements VespaMetrics {
CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_DISK_UTILIZATION("cluster-controller.resource_usage.max_disk_utilization", Unit.FRACTION, "Current disk space utilisation, per content node"),
CLUSTER_CONTROLLER_RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"),
CLUSTER_CONTROLLER_RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"),
- CLUSTER_CONTROLLER_REINDEXING_PROGRESS("reindexing.progress", Unit.FRACTION, "Re-indexing progress");
+ CLUSTER_CONTROLLER_REINDEXING_PROGRESS("reindexing.progress", Unit.FRACTION, "Re-indexing progress"),
+
+ // Java (JRT) TLS metrics
+ JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES("jrt.transport.tls-certificate-verification-failures", Unit.FAILURE, "TLS certificate verification failures"),
+ JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES("jrt.transport.peer-authorization-failures", Unit.FAILURE, "TLS peer authorization failures"),
+ JRT_TRANSPORT_SERVER_TLS_CONNECIONTS_ESTABLISHED("jrt.transport.server.tls-connections-established", Unit.CONNECTION, "TLS server connections established"),
+ JRT_TRANSPORT_CLIENT_TLS_CONNECTIONS_ESTABLISHED("jrt.transport.client.tls-connections-established", Unit.CONNECTION, "TLS client connections established"),
+ JRT_TRANSPORT_SERVER_UNENCRYPTED_CONNECTIONS_ESTABLISHED("jrt.transport.server.unencrypted-connections-established", Unit.CONNECTION, "Unencrypted server connections established"),
+ JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED("jrt.transport.client.unencrypted-connections-established", Unit.CONNECTION, "Unencrypted client connections established");
private final String name;
diff --git a/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java b/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java
index 2a59e5a9d92..d67b67d04b7 100644
--- a/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java
+++ b/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java
@@ -75,7 +75,26 @@ public enum StorageMetrics implements VespaMetrics {
VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_OK("vds.mergethrottler.locallyexecutedmerges.ok", Unit.INSTANCE, "The number of successful merges for 'locallyexecutedmerges'"),
VDS_MERGETHROTTLER_MERGECHAINS_OK("vds.mergethrottler.mergechains.ok", Unit.INSTANCE, "The number of successful merges for 'mergechains'"),
VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_BUSY("vds.mergethrottler.mergechains.failures.busy", Unit.INSTANCE, "The number of merges that failed because the storage node was busy"),
- VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_TOTAL("vds.mergethrottler.mergechains.failures.total", Unit.INSTANCE, "Sum of all failures");
+ VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_TOTAL("vds.mergethrottler.mergechains.failures.total", Unit.INSTANCE, "Sum of all failures"),
+
+
+ // C++ TLS metrics - these come from both the distributor and storage
+ VDS_SERVER_NETWORK_TLS_HANDSHAKES_FAILED("vds.server.network.tls-handshakes-failed", Unit.OPERATION, "Number of client or server connection attempts that failed during TLS handshaking"),
+ VDS_SERVER_NETWORK_PEER_AUTHORIZATION_FAILURES("vds.server.network.peer-authorization-failures", Unit.FAILURE, "Number of TLS connection attempts failed due to bad or missing peer certificate credentials"),
+ VDS_SERVER_NETWORK_CLIENT_TLS_CONNECTIONS_ESTABLISHED("vds.server.network.client.tls-connections-established", Unit.CONNECTION, "Number of secure mTLS connections established"),
+ VDS_SERVER_NETWORK_SERVER_TLS_CONNECTIONS_ESTABLISHED("vds.server.network.server.tls-connections-established", Unit.CONNECTION, "Number of secure mTLS connections established"),
+ VDS_SERVER_NETWORK_CLIENT_INSECURE_CONNECTIONS_ESTABLISHED("vds.server.network.client.insecure-connections-established", Unit.CONNECTION, "Number of insecure (plaintext) connections established"),
+ VDS_SERVER_NETWORK_SERVER_INSECURE_CONNECTIONS_ESTABLISHED("vds.server.network.server.insecure-connections-established", Unit.CONNECTION, "Number of insecure (plaintext) connections established"),
+ VDS_SERVER_NETWORK_TLS_CONNECTIONS_BROKEN("vds.server.network.tls-connections-broken", Unit.CONNECTION, "Number of TLS connections broken due to failures during frame encoding or decoding"),
+ VDS_SERVER_NETWORK_FAILED_TLS_CONFIG_RELOADS("vds.server.network.failed-tls-config-reloads", Unit.FAILURE, "Number of times background reloading of TLS config has failed"),
+
+ // C++ capability metrics
+ VDS_SERVER_NETWORK_RPC_CAPABILITY_CHECKS_FAILED("vds.server.network.rpc-capability-checks-failed", Unit.FAILURE, "Number of RPC operations that failed to due one or more missing capabilities"),
+ VDS_SERVER_NETWORK_STATUS_CAPABILITY_CHECKS_FAILED("vds.server.network.status-capability-checks-failed", Unit.FAILURE, "Number of status page operations that failed to due one or more missing capabilities"),
+
+ // C++ Fnet metrics
+ VDS_SERVER_FNET_NUM_CONNECTIONS("vds.server.fnet.num-connections", Unit.CONNECTION, "Total number of connection objects");
+
private final String name;
private final Unit unit;
diff --git a/container-core/src/main/java/com/yahoo/metrics/Unit.java b/container-core/src/main/java/com/yahoo/metrics/Unit.java
index bb7718ddb4c..d3f139cd976 100644
--- a/container-core/src/main/java/com/yahoo/metrics/Unit.java
+++ b/container-core/src/main/java/com/yahoo/metrics/Unit.java
@@ -12,6 +12,7 @@ public enum Unit {
CONNECTION(BaseUnit.CONNECTION),
DOCUMENT(BaseUnit.DOCUMENT),
DOCUMENTID(BaseUnit.DOCUMENTID),
+ FAILURE(BaseUnit.FAILURE),
FILE(BaseUnit.FILE),
FRACTION(BaseUnit.FRACTION),
HIT(BaseUnit.HIT),
@@ -28,8 +29,10 @@ public enum Unit {
RECORD(BaseUnit.RECORD),
REQUEST(BaseUnit.REQUEST),
RESPONSE(BaseUnit.RESPONSE),
+ RESTART(BaseUnit.RESTART),
SCORE(BaseUnit.SCORE),
SECOND(BaseUnit.SECOND),
+ SESSION(BaseUnit.SESSION),
TASK(BaseUnit.TASK),
THREAD(BaseUnit.THREAD),
VERSION(BaseUnit.VERSION),
@@ -69,6 +72,7 @@ public enum Unit {
CONNECTION("connection"),
DOCUMENT("document"),
DOCUMENTID("documentid"),
+ FAILURE("failure"),
FILE("file"),
FRACTION("fraction"),
HIT("hit"),
@@ -82,8 +86,10 @@ public enum Unit {
RECORD("record"),
REQUEST("request"),
RESPONSE("response"),
+ RESTART("restart"),
SCORE("score"),
SECOND("second", "s"),
+ SESSION("session"),
TASK("task"),
THREAD("thread"),
VERSION("version"),
diff --git a/container-disc/src/main/java/com/yahoo/container/jdisc/metric/JrtMetrics.java b/container-disc/src/main/java/com/yahoo/container/jdisc/metric/JrtMetrics.java
index ca6b41962fe..80949df9ee9 100644
--- a/container-disc/src/main/java/com/yahoo/container/jdisc/metric/JrtMetrics.java
+++ b/container-disc/src/main/java/com/yahoo/container/jdisc/metric/JrtMetrics.java
@@ -1,8 +1,10 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.container.jdisc.metric;
+import com.yahoo.jdisc.Container;
import com.yahoo.jdisc.Metric;
import com.yahoo.jrt.TransportMetrics;
+import com.yahoo.metrics.ContainerMetrics;
import static com.yahoo.jrt.TransportMetrics.Snapshot;
@@ -24,12 +26,12 @@ class JrtMetrics {
void emitMetrics() {
Snapshot snapshot = transportMetrics.snapshot();
Snapshot changesSincePrevious = snapshot.changesSince(previousSnapshot);
- increment("jrt.transport.tls-certificate-verification-failures", changesSincePrevious.tlsCertificateVerificationFailures());
- increment("jrt.transport.peer-authorization-failures", changesSincePrevious.peerAuthorizationFailures());
- increment("jrt.transport.server.tls-connections-established", changesSincePrevious.serverTlsConnectionsEstablished());
- increment("jrt.transport.client.tls-connections-established", changesSincePrevious.clientTlsConnectionsEstablished());
- increment("jrt.transport.server.unencrypted-connections-established", changesSincePrevious.serverUnencryptedConnectionsEstablished());
- increment("jrt.transport.client.unencrypted-connections-established", changesSincePrevious.clientUnencryptedConnectionsEstablished());
+ increment(ContainerMetrics.JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES.baseName(), changesSincePrevious.tlsCertificateVerificationFailures());
+ increment(ContainerMetrics.JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES.baseName(), changesSincePrevious.peerAuthorizationFailures());
+ increment(ContainerMetrics.JRT_TRANSPORT_SERVER_TLS_CONNECIONTS_ESTABLISHED.baseName(), changesSincePrevious.serverTlsConnectionsEstablished());
+ increment(ContainerMetrics.JRT_TRANSPORT_CLIENT_TLS_CONNECTIONS_ESTABLISHED.baseName(), changesSincePrevious.clientTlsConnectionsEstablished());
+ increment(ContainerMetrics.JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED.baseName(), changesSincePrevious.serverUnencryptedConnectionsEstablished());
+ increment(ContainerMetrics.JRT_TRANSPORT_CLIENT_UNENCRYPTED_CONNECTIONS_ESTABLISHED.baseName(), changesSincePrevious.clientUnencryptedConnectionsEstablished());
previousSnapshot = snapshot;
}