aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryngveaasheim <yngve@yahooinc.com>2023-03-15 14:30:56 +0100
committeryngveaasheim <yngve@yahooinc.com>2023-03-15 14:30:56 +0100
commitcbe644a95e41118008124b77b9ff7f2c3580dd6f (patch)
treeb693604c358de88123b6b6bbae257fe653b8df8c
parent33118797862c2025aeafdf11422754e6df4396fc (diff)
Move cluster controller metrics to a separate enum file
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java47
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java9
-rw-r--r--container-core/src/main/java/com/yahoo/metrics/ClusterControllerMetrics.java53
-rw-r--r--container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java24
4 files changed, 83 insertions, 50 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
index 5f8d8148b41..f9f7f3a00ae 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.admin.monitoring;
+import com.yahoo.metrics.ClusterControllerMetrics;
import com.yahoo.metrics.ConfigServerMetrics;
import com.yahoo.metrics.ContainerMetrics;
import com.yahoo.metrics.DistributorMetrics;
@@ -254,31 +255,29 @@ public class VespaMetricSet {
private static Set<Metric> getClusterControllerMetrics() {
Set<Metric> metrics = new LinkedHashSet<>();
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_DOWN_COUNT.last());
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_INITIALIZING_COUNT.last());
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_MAINTENANCE_COUNT.last());
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RETIRED_COUNT.last());
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_STOPPING_COUNT.last());
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_UP_COUNT.last());
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_CLUSTER_STATE_CHANGE_COUNT.baseName());
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_BUSY_TICK_TIME_MS, EnumSet.of(last, max, sum, count));
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_IDLE_TICK_TIME_MS, EnumSet.of(last, max, sum, count));
+ addMetric(metrics, ClusterControllerMetrics.DOWN_COUNT.last());
+ addMetric(metrics, ClusterControllerMetrics.INITIALIZING_COUNT.last());
+ addMetric(metrics, ClusterControllerMetrics.MAINTENANCE_COUNT.last());
+ addMetric(metrics, ClusterControllerMetrics.RETIRED_COUNT.last());
+ addMetric(metrics, ClusterControllerMetrics.STOPPING_COUNT.last());
+ addMetric(metrics, ClusterControllerMetrics.UP_COUNT.last());
+ addMetric(metrics, ClusterControllerMetrics.CLUSTER_STATE_CHANGE_COUNT.baseName());
+ addMetric(metrics, ClusterControllerMetrics.BUSY_TICK_TIME_MS, EnumSet.of(last, max, sum, count));
+ addMetric(metrics, ClusterControllerMetrics.IDLE_TICK_TIME_MS, EnumSet.of(last, max, sum, count));
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_WORK_MS, EnumSet.of(last, sum, count));
-
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_IS_MASTER.last());
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_REMOTE_TASK_QUEUE_SIZE.last());
+ addMetric(metrics, ClusterControllerMetrics.WORK_MS, EnumSet.of(last, sum, count));
+
+ addMetric(metrics, ClusterControllerMetrics.IS_MASTER.last());
+ addMetric(metrics, ClusterControllerMetrics.REMOTE_TASK_QUEUE_SIZE.last());
// TODO(hakonhall): Update this name once persistent "count" metrics has been implemented.
// DO NOT RELY ON THIS METRIC YET.
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_NODE_EVENT_COUNT.baseName());
-
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_NODES_ABOVE_LIMIT, EnumSet.of(last, max));
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_MEMORY_UTILIZATION, EnumSet.of(last, max));
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_DISK_UTILIZATION, EnumSet.of(last, max));
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MEMORY_LIMIT.last());
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_DISK_LIMIT.last());
-
- addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_REINDEXING_PROGRESS.last());
+ addMetric(metrics, ClusterControllerMetrics.NODE_EVENT_COUNT.baseName());
+ addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_NODES_ABOVE_LIMIT, EnumSet.of(last, max));
+ addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MAX_MEMORY_UTILIZATION, EnumSet.of(last, max));
+ addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MAX_DISK_UTILIZATION, EnumSet.of(last, max));
+ addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MEMORY_LIMIT.last());
+ addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_DISK_LIMIT.last());
+ addMetric(metrics, ClusterControllerMetrics.REINDEXING_PROGRESS.last());
return metrics;
}
@@ -710,6 +709,10 @@ public class VespaMetricSet {
metrics.add(new Metric(nameWithSuffix));
}
+ private static void addMetric(Set<Metric> metrics, ClusterControllerMetrics metric, EnumSet<Suffix> suffixes) {
+ suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix())));
+ }
+
private static void addMetric(Set<Metric> metrics, ContainerMetrics metric, EnumSet<Suffix> suffixes) {
suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix())));
}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java
index 61c0c17264c..7920bbed763 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.config.server.metrics;
import ai.vespa.util.http.hc5.VespaHttpClientBuilder;
import com.yahoo.concurrent.DaemonThreadFactory;
+import com.yahoo.metrics.ClusterControllerMetrics;
import com.yahoo.metrics.ContainerMetrics;
import com.yahoo.slime.ArrayTraverser;
import com.yahoo.slime.Cursor;
@@ -137,11 +138,11 @@ public class ClusterDeploymentMetricsRetriever {
case VESPA_DISTRIBUTOR -> optionalDouble(values.field("vds.distributor.docsstored.average"))
.ifPresent(docCount -> aggregator.get().addDocumentCount(docCount));
case VESPA_CONTAINER_CLUSTERCONTROLLER ->
- optionalDouble(values.field(ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_MEMORY_UTILIZATION.max())).ifPresent(memoryUtil ->
+ optionalDouble(values.field(ClusterControllerMetrics.RESOURCE_USAGE_MAX_MEMORY_UTILIZATION.max())).ifPresent(memoryUtil ->
aggregator.get()
- .addMemoryUsage(memoryUtil, values.field(ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MEMORY_LIMIT.last()).asDouble())
- .addDiskUsage(values.field(ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_DISK_UTILIZATION.max()).asDouble(),
- values.field(ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_DISK_LIMIT.last()).asDouble()));
+ .addMemoryUsage(memoryUtil, values.field(ClusterControllerMetrics.RESOURCE_USAGE_MEMORY_LIMIT.last()).asDouble())
+ .addDiskUsage(values.field(ClusterControllerMetrics.RESOURCE_USAGE_MAX_DISK_UTILIZATION.max()).asDouble(),
+ values.field(ClusterControllerMetrics.RESOURCE_USAGE_DISK_LIMIT.last()).asDouble()));
}
}
diff --git a/container-core/src/main/java/com/yahoo/metrics/ClusterControllerMetrics.java b/container-core/src/main/java/com/yahoo/metrics/ClusterControllerMetrics.java
new file mode 100644
index 00000000000..fabfd5504f7
--- /dev/null
+++ b/container-core/src/main/java/com/yahoo/metrics/ClusterControllerMetrics.java
@@ -0,0 +1,53 @@
+package com.yahoo.metrics;
+
+/**
+ * @author yngve
+ */
+public enum ClusterControllerMetrics implements VespaMetrics {
+
+ DOWN_COUNT("cluster-controller.down.count", Unit.NODE, "Number of content nodes down"),
+ INITIALIZING_COUNT("cluster-controller.initializing.count", Unit.NODE, "Number of content nodes initializing"),
+ MAINTENANCE_COUNT("cluster-controller.maintenance.count", Unit.NODE, "Number of content nodes in maintenance"),
+ RETIRED_COUNT("cluster-controller.retired.count", Unit.NODE, "Number of content nodes that are retired"),
+ STOPPING_COUNT("cluster-controller.stopping.count", Unit.NODE, "Number of content nodes currently stopping"),
+ UP_COUNT("cluster-controller.up.count", Unit.NODE, "Number of content nodes up"),
+ CLUSTER_STATE_CHANGE_COUNT("cluster-controller.cluster-state-change.count", Unit.NODE, "Number of nodes changing state"),
+ BUSY_TICK_TIME_MS("cluster-controller.busy-tick-time-ms", Unit.MILLISECOND, "Time busy"),
+ IDLE_TICK_TIME_MS("cluster-controller.idle-tick-time-ms", Unit.MILLISECOND, "Time idle"),
+ WORK_MS("cluster-controller.work-ms", Unit.MILLISECOND, "Time used for actual work"),
+ IS_MASTER("cluster-controller.is-master", Unit.BINARY, "1 if this cluster controller is currently the master, or 0 if not"),
+ REMOTE_TASK_QUEUE_SIZE("cluster-controller.remote-task-queue.size", Unit.OPERATION, "Number of remote tasks queued"),
+ // TODO(hakonhall): Update this name once persistent "count" metrics has been implemented.
+ // DO NOT RELY ON THIS METRIC YET.
+ NODE_EVENT_COUNT("cluster-controller.node-event.count", Unit.OPERATION, "Number of node events"),
+ RESOURCE_USAGE_NODES_ABOVE_LIMIT("cluster-controller.resource_usage.nodes_above_limit", Unit.NODE, "The number of content nodes above resource limit, blocking feed"),
+ RESOURCE_USAGE_MAX_MEMORY_UTILIZATION("cluster-controller.resource_usage.max_memory_utilization", Unit.FRACTION, "Current memory utilisation, per content node"),
+ RESOURCE_USAGE_MAX_DISK_UTILIZATION("cluster-controller.resource_usage.max_disk_utilization", Unit.FRACTION, "Current disk space utilisation, per content node"),
+ RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"),
+ RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"),
+ REINDEXING_PROGRESS("reindexing.progress", Unit.FRACTION, "Re-indexing progress");
+
+
+ private final String name;
+ private final Unit unit;
+ private final String description;
+
+ ClusterControllerMetrics(String name, Unit unit, String description) {
+ this.name = name;
+ this.unit = unit;
+ this.description = description;
+ }
+
+ public String baseName() {
+ return name;
+ }
+
+ public Unit unit() {
+ return unit;
+ }
+
+ public String description() {
+ return description;
+ }
+
+}
diff --git a/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java b/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java
index ed1d6f7a001..4da028a8b05 100644
--- a/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java
+++ b/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java
@@ -163,30 +163,6 @@ public enum ContainerMetrics implements VespaMetrics {
SERVER_THREAD_POOL_SIZE("serverThreadPoolSize", Unit.THREAD, "Deprecated. Use jdisc.thread_pool.size instead."),
SERVER_ACTIVE_THREADS("serverActiveThreads", Unit.THREAD, "Deprecated. Use jdisc.thread_pool.active_threads instead."),
-
- // Metrics from the cluster controller
- CLUSTER_CONTROLLER_DOWN_COUNT("cluster-controller.down.count", Unit.NODE, "Number of content nodes down"),
- CLUSTER_CONTROLLER_INITIALIZING_COUNT("cluster-controller.initializing.count", Unit.NODE, "Number of content nodes initializing"),
- CLUSTER_CONTROLLER_MAINTENANCE_COUNT("cluster-controller.maintenance.count", Unit.NODE, "Number of content nodes in maintenance"),
- CLUSTER_CONTROLLER_RETIRED_COUNT("cluster-controller.retired.count", Unit.NODE, "Number of content nodes that are retired"),
- CLUSTER_CONTROLLER_STOPPING_COUNT("cluster-controller.stopping.count", Unit.NODE, "Number of content nodes currently stopping"),
- CLUSTER_CONTROLLER_UP_COUNT("cluster-controller.up.count", Unit.NODE, "Number of content nodes up"),
- CLUSTER_CONTROLLER_CLUSTER_STATE_CHANGE_COUNT("cluster-controller.cluster-state-change.count", Unit.NODE, "Number of nodes changing state"),
- CLUSTER_CONTROLLER_BUSY_TICK_TIME_MS("cluster-controller.busy-tick-time-ms", Unit.MILLISECOND, "Time busy"),
- CLUSTER_CONTROLLER_IDLE_TICK_TIME_MS("cluster-controller.idle-tick-time-ms", Unit.MILLISECOND, "Time idle"),
- CLUSTER_CONTROLLER_WORK_MS("cluster-controller.work-ms", Unit.MILLISECOND, "Time used for actual work"),
- CLUSTER_CONTROLLER_IS_MASTER("cluster-controller.is-master", Unit.BINARY, "1 if this cluster controller is currently the master, or 0 if not"),
- CLUSTER_CONTROLLER_REMOTE_TASK_QUEUE_SIZE("cluster-controller.remote-task-queue.size", Unit.OPERATION, "Number of remote tasks queued"),
- // TODO(hakonhall): Update this name once persistent "count" metrics has been implemented.
- // DO NOT RELY ON THIS METRIC YET.
- CLUSTER_CONTROLLER_NODE_EVENT_COUNT("cluster-controller.node-event.count", Unit.OPERATION, "Number of node events"),
- CLUSTER_CONTROLLER_RESOURCE_USAGE_NODES_ABOVE_LIMIT("cluster-controller.resource_usage.nodes_above_limit", Unit.NODE, "The number of content nodes above resource limit, blocking feed"),
- CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_MEMORY_UTILIZATION("cluster-controller.resource_usage.max_memory_utilization", Unit.FRACTION, "Current memory utilisation, per content node"),
- CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_DISK_UTILIZATION("cluster-controller.resource_usage.max_disk_utilization", Unit.FRACTION, "Current disk space utilisation, per content node"),
- CLUSTER_CONTROLLER_RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"),
- CLUSTER_CONTROLLER_RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"),
- CLUSTER_CONTROLLER_REINDEXING_PROGRESS("reindexing.progress", Unit.FRACTION, "Re-indexing progress"),
-
// Java (JRT) TLS metrics
JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES("jrt.transport.tls-certificate-verification-failures", Unit.FAILURE, "TLS certificate verification failures"),
JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES("jrt.transport.peer-authorization-failures", Unit.FAILURE, "TLS peer authorization failures"),