From dd7cda514d6b49d9c26aea498a3c010cf35d43d5 Mon Sep 17 00:00:00 2001 From: jonmv Date: Wed, 11 Oct 2023 13:38:25 +0200 Subject: Add metrics for job-runner threads --- metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java | 2 ++ metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java | 2 ++ 2 files changed, 4 insertions(+) (limited to 'metrics') diff --git a/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java index 3676be90cd4..f03c54aa822 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java @@ -9,6 +9,8 @@ public enum ControllerMetrics implements VespaMetrics { ATHENZ_REQUEST_ERROR("athenz.request.error", Unit.REQUEST, "Controller: Athenz request error"), ARCHIVE_BUCKET_COUNT("archive.bucketCount", Unit.BUCKET, "Controller: Archive bucket count"), + DEPLOYMENT_JOBS_QUEUED("deployment.jobsQueued", Unit.TASK, "The number of deployment jobs queued"), + DEPLOYMENT_JOBS_ACTIVE("deployment.jobsActive", Unit.TASK, "The number of deployment jobs active"), DEPLOYMENT_START("deployment.start", Unit.DEPLOYMENT, "The number of started deployment jobs"), DEPLOYMENT_NODE_ALLOCATION_FAILURE("deployment.nodeAllocationFailure", Unit.DEPLOYMENT, "The number of deployments failed due to node allocation failures"), DEPLOYMENT_ENDPOINT_CERTIFICATE_TIMEOUT("deployment.endpointCertificateTimeout", Unit.DEPLOYMENT, "The number of deployments failed due to timeout acquiring endpoint certificate"), diff --git a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java index 6bffddb885a..9443a08e28b 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java @@ -143,6 +143,8 @@ public class InfrastructureMetricSet { addMetric(metrics, ControllerMetrics.ARCHIVE_BUCKET_COUNT.max()); addMetric(metrics, ControllerMetrics.BILLING_TENANTS.max()); + addMetric(metrics, ControllerMetrics.DEPLOYMENT_JOBS_QUEUED, EnumSet.of(count, sum)); + addMetric(metrics, ControllerMetrics.DEPLOYMENT_JOBS_ACTIVE, EnumSet.of(count, sum)); addMetric(metrics, ControllerMetrics.DEPLOYMENT_ABORT.count()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_AVERAGE_DURATION.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_CONVERGENCE_FAILURE.count()); -- cgit v1.2.3