diff options
Diffstat (limited to 'metrics/src')
-rw-r--r-- | metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java | 48 | ||||
-rw-r--r-- | metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java | 74 |
2 files changed, 74 insertions, 48 deletions
diff --git a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java index 7a52c3384cd..a277f6b4dda 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java @@ -131,53 +131,7 @@ public enum ConfigServerMetrics implements VespaMetrics { NODE_FAIL_THROTTLING("nodeFailThrottling", Unit.BINARY, "Metric indicating when node failure throttling is active. The value 1 means active, 0 means inactive"), DEPLOYMENT_PREPARE_MILLIS("deployment.prepareMillis", Unit.MILLISECOND, "Duration of deployment preparations"), - DEPLOYMENT_ACTIVATE_MILLIS("deployment.activateMillis", Unit.MILLISECOND, "Duration of deployment activations"), - - - // Controller specific metrics - // TODO: Separate out to separate class, don't document - DEPLOYMENT_START("deployment.start", Unit.DEPLOYMENT, "The number of started deployment jobs"), - DEPLOYMENT_NODE_ALLOCATION_FAILURE("deployment.nodeAllocationFailure", Unit.DEPLOYMENT, "The number of deployments failed due to node allocation failures"), - DEPLOYMENT_ENDPOINT_CERTIFICATE_TIMEOUT("deployment.endpointCertificateTimeout", Unit.DEPLOYMENT, "The number of deployments failed due to timeout acquiring endpoint certificate"), - DEPLOYMENT_DEPLOYMENT_FAILURE("deployment.deploymentFailure", Unit.DEPLOYMENT, "The number of deployments that failed"), - DEPLOYMENT_INVALID_APPLICATION("deployment.invalidApplication", Unit.DEPLOYMENT, "Deployments with invalid application package"), - DEPLOYMENT_CONVERGENCE_FAILURE("deployment.convergenceFailure", Unit.DEPLOYMENT, "The number of deployments with convergence failure"), - DEPLOYMENT_TEST_FAILURE("deployment.testFailure", Unit.DEPLOYMENT, "The number of test deployments with test failure"), - DEPLOYMENT_NO_TESTS("deployment.noTests", Unit.DEPLOYMENT, "Deployments with no tests"), - DEPLOYMENT_ERROR("deployment.error", Unit.DEPLOYMENT, "Deployments with error"), - DEPLOYMENT_ABORT("deployment.abort", Unit.DEPLOYMENT, "Deployments that were aborted"), - DEPLOYMENT_CANCEL("deployment.cancel", Unit.DEPLOYMENT, "Deployments that were canceled"), - DEPLOYMENT_SUCCESS("deployment.success", Unit.DEPLOYMENT, "Successful deployments"), - DEPLOYMENT_QUOTA_EXCEEDED("deployment.quotaExceeded", Unit.DEPLOYMENT, "Deployments stopped due to exceeding quota"), - - - BILLING_TENANTS("billing.tenants", Unit.TENANT, "Billing tenants"), - DEPLOYMENT_FAILURE_PERCENTAGE("deployment.failurePercentage", Unit.PERCENTAGE, "Deployment: Failure percentage"), - DEPLOYMENT_AVERAGE_DURATION("deployment.averageDuration", Unit.SECOND, "Deployment duration"), - DEPLOYMENT_FAILING_UPGRADES("deployment.failingUpgrades", Unit.DEPLOYMENT, "Deployment: Failing upgrades"), - DEPLOYMENT_BUILDING_AGE_SECONDS("deployment.buildAgeSeconds", Unit.SECOND, "Deployment: Build age"), - DEPLOYMENT_WARNINGS("deployment.warnings", Unit.ITEM, "The number of application related warnings during deployments"), - DEPLOYMENT_OVERDUE_UPGRADE_SECONDS("deployment.overdueUpgradeSeconds", Unit.SECOND, "Deployment: Overdue upgrade period"), - DEPLOYMENT_OS_CHANGE_DURATION("deployment.osChangeDuration", Unit.SECOND, "Deployment: OS change duration"), - DEPLOYMENT_PLATFORM_CHANGE_DURATION("deployment.platformChangeDuration", Unit.SECOND, "Deployment: Platform change duration"), - DEPLOYMENT_NODE_COUNT_BY_OS_VERSION("deployment.nodeCountByOsVersion", Unit.NODE, "Deployment: Node count by OS version"), - DEPLOYMENT_NODE_COUNT_BY_PLATFORM_VERSION("deployment.nodeCountByPlatformVersion", Unit.NODE, "Deployment: Node count by platform version"), - DEPLOYMENT_BROKEN_SYSTEM_VERSION("deployment.brokenSystemVersion", Unit.BINARY, "Deployment: Value 1 for broken system versions, 0 if not"), - REMAINING_ROTATIONS("remaining_rotations", Unit.ROTATION, "Remaining rotations"), - DNS_QUEUED_REQUESTS("dns.queuedRequests", Unit.REQUEST, "Queued DNS requests"), - ZMS_QUOTA_USAGE("zms.quota.usage", Unit.FRACTION, "ZMS Quota usage per resource type"), - - - // Metrics per API, metrics created in ControllerMaintainer/MetricsReporter - OPERATION_API("operation.api.last", Unit.REQUEST, "Controller: Requests for /api API"), - OPERATION_APPLICATION("operation.application", Unit.REQUEST, "Controller: Requests for /application API"), - OPERATION_CONFIGSERVER("operation.configserver", Unit.REQUEST, "Controller: Requests for /configserver API"), - OPERATION_CONTROLLER("operation.controller", Unit.REQUEST, "Controller: Requests for /controller API"), - OPERATION_FLAGS("operation.flags", Unit.REQUEST, "Controller: Requests for /flags API"), - OPERATION_OS("operation.os", Unit.REQUEST, "Controller: Requests for /os API"), - OPERATION_ROUTING("operation.routing", Unit.REQUEST, "Controller: Requests for /routing API"), - OPERATION_ZONE("operation.zone", Unit.REQUEST, "Controller: Requests for /zone API"); - + DEPLOYMENT_ACTIVATE_MILLIS("deployment.activateMillis", Unit.MILLISECOND, "Duration of deployment activations"); private final String name; diff --git a/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java index e0cb0abd593..aa2ebd62284 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java @@ -1,2 +1,74 @@ -package ai.vespa.metrics;public class ControllerMetrics { +package ai.vespa.metrics; + +/** + * @author yngveaasheim + */ +public enum ControllerMetrics implements VespaMetrics { + + // The metrics enumerated in this file is intentionally not added to the metrics reference documentation. + DEPLOYMENT_START("deployment.start", Unit.DEPLOYMENT, "The number of started deployment jobs"), + DEPLOYMENT_NODE_ALLOCATION_FAILURE("deployment.nodeAllocationFailure", Unit.DEPLOYMENT, "The number of deployments failed due to node allocation failures"), + DEPLOYMENT_ENDPOINT_CERTIFICATE_TIMEOUT("deployment.endpointCertificateTimeout", Unit.DEPLOYMENT, "The number of deployments failed due to timeout acquiring endpoint certificate"), + DEPLOYMENT_DEPLOYMENT_FAILURE("deployment.deploymentFailure", Unit.DEPLOYMENT, "The number of deployments that failed"), + DEPLOYMENT_INVALID_APPLICATION("deployment.invalidApplication", Unit.DEPLOYMENT, "Deployments with invalid application package"), + DEPLOYMENT_CONVERGENCE_FAILURE("deployment.convergenceFailure", Unit.DEPLOYMENT, "The number of deployments with convergence failure"), + DEPLOYMENT_TEST_FAILURE("deployment.testFailure", Unit.DEPLOYMENT, "The number of test deployments with test failure"), + DEPLOYMENT_NO_TESTS("deployment.noTests", Unit.DEPLOYMENT, "Deployments with no tests"), + DEPLOYMENT_ERROR("deployment.error", Unit.DEPLOYMENT, "Deployments with error"), + DEPLOYMENT_ABORT("deployment.abort", Unit.DEPLOYMENT, "Deployments that were aborted"), + DEPLOYMENT_CANCEL("deployment.cancel", Unit.DEPLOYMENT, "Deployments that were canceled"), + DEPLOYMENT_SUCCESS("deployment.success", Unit.DEPLOYMENT, "Successful deployments"), + DEPLOYMENT_QUOTA_EXCEEDED("deployment.quotaExceeded", Unit.DEPLOYMENT, "Deployments stopped due to exceeding quota"), + + + BILLING_TENANTS("billing.tenants", Unit.TENANT, "Billing tenants"), + DEPLOYMENT_FAILURE_PERCENTAGE("deployment.failurePercentage", Unit.PERCENTAGE, "Deployment: Failure percentage"), + DEPLOYMENT_AVERAGE_DURATION("deployment.averageDuration", Unit.SECOND, "Deployment duration"), + DEPLOYMENT_FAILING_UPGRADES("deployment.failingUpgrades", Unit.DEPLOYMENT, "Deployment: Failing upgrades"), + DEPLOYMENT_BUILD_AGE_SECONDS("deployment.buildAgeSeconds", Unit.SECOND, "Deployment: The age of a build deployed"), + DEPLOYMENT_WARNINGS("deployment.warnings", Unit.ITEM, "The number of application related warnings during deployments"), + DEPLOYMENT_OVERDUE_UPGRADE_SECONDS("deployment.overdueUpgradeSeconds", Unit.SECOND, "Deployment: Overdue upgrade period"), + DEPLOYMENT_OS_CHANGE_DURATION("deployment.osChangeDuration", Unit.SECOND, "Deployment: OS change duration"), + DEPLOYMENT_PLATFORM_CHANGE_DURATION("deployment.platformChangeDuration", Unit.SECOND, "Deployment: Platform change duration"), + DEPLOYMENT_NODE_COUNT_BY_OS_VERSION("deployment.nodeCountByOsVersion", Unit.NODE, "Deployment: Node count by OS version"), + DEPLOYMENT_NODE_COUNT_BY_PLATFORM_VERSION("deployment.nodeCountByPlatformVersion", Unit.NODE, "Deployment: Node count by platform version"), + DEPLOYMENT_BROKEN_SYSTEM_VERSION("deployment.brokenSystemVersion", Unit.BINARY, "Deployment: Value 1 for broken system versions, 0 if not"), + REMAINING_ROTATIONS("remaining_rotations", Unit.ROTATION, "Remaining rotations"), + DNS_QUEUED_REQUESTS("dns.queuedRequests", Unit.REQUEST, "Queued DNS requests"), + ZMS_QUOTA_USAGE("zms.quota.usage", Unit.FRACTION, "ZMS Quota usage per resource type"), + + + // Metrics per API, metrics created in ControllerMaintainer/MetricsReporter + OPERATION_API("operation.api.last", Unit.REQUEST, "Controller: Requests for /api API"), + OPERATION_APPLICATION("operation.application", Unit.REQUEST, "Controller: Requests for /application API"), + OPERATION_CONFIGSERVER("operation.configserver", Unit.REQUEST, "Controller: Requests for /configserver API"), + OPERATION_CONTROLLER("operation.controller", Unit.REQUEST, "Controller: Requests for /controller API"), + OPERATION_FLAGS("operation.flags", Unit.REQUEST, "Controller: Requests for /flags API"), + OPERATION_OS("operation.os", Unit.REQUEST, "Controller: Requests for /os API"), + OPERATION_ROUTING("operation.routing", Unit.REQUEST, "Controller: Requests for /routing API"), + OPERATION_ZONE("operation.zone", Unit.REQUEST, "Controller: Requests for /zone API"); + + + private final String name; + private final Unit unit; + private final String description; + + ControllerMetrics(String name, Unit unit, String description) { + this.name = name; + this.unit = unit; + this.description = description; + } + + public String baseName() { + return name; + } + + public Unit unit() { + return unit; + } + + public String description() { + return description; + } + } |