summaryrefslogtreecommitdiffstats
path: root/metrics
diff options
context:
space:
mode:
authorOla Aunrønning <olaa@yahooinc.com>2023-08-31 11:15:30 +0200
committerGitHub <noreply@github.com>2023-08-31 11:15:30 +0200
commitc5c458f910f45b6d6c52b87e0a93c2ed17f87217 (patch)
treefa60c947fa65762960db1d6a8dbaa3100e19fc67 /metrics
parent06faa0b17b887877a68bb6fc3ab9431825644636 (diff)
parent5138fda92de02a3b35b0105936f2d8a87e174607 (diff)
Merge pull request #28302 from vespa-engine/yngveaasheim/gc-infrastructure-metrics-for-vespa9
Garbage collect infrastructure metrics for Vespa 9
Diffstat (limited to 'metrics')
-rw-r--r--metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java115
1 files changed, 57 insertions, 58 deletions
diff --git a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java
index 38533f40950..b3f27fa6117 100644
--- a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java
+++ b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java
@@ -52,74 +52,73 @@ public class InfrastructureMetricSet {
addMetric(metrics, ConfigServerMetrics.DELAYED_RESPONSES.count());
addMetric(metrics, ConfigServerMetrics.SESSION_CHANGE_ERRORS.count());
- addMetric(metrics, ConfigServerMetrics.ZK_Z_NODES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
- addMetric(metrics, ConfigServerMetrics.ZK_AVG_LATENCY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
- addMetric(metrics, ConfigServerMetrics.ZK_MAX_LATENCY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
- addMetric(metrics, ConfigServerMetrics.ZK_CONNECTIONS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
+ addMetric(metrics, ConfigServerMetrics.ZK_Z_NODES.max());
+ addMetric(metrics, ConfigServerMetrics.ZK_MAX_LATENCY, EnumSet.of(max, average));
+ addMetric(metrics, ConfigServerMetrics.ZK_CONNECTIONS.max());
addMetric(metrics, ConfigServerMetrics.ZK_CONNECTION_LOST.count());
addMetric(metrics, ConfigServerMetrics.ZK_RECONNECTED.count());
addMetric(metrics, ConfigServerMetrics.ZK_SUSPENDED.count());
- addMetric(metrics, ConfigServerMetrics.ZK_OUTSTANDING_REQUESTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
+ addMetric(metrics, ConfigServerMetrics.ZK_OUTSTANDING_REQUESTS.max());
// Node repository metrics
- addMetric(metrics, ConfigServerMetrics.NODES_ACTIVE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ConfigServerMetrics.NODES_NON_ACTIVE_FRACTION, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ConfigServerMetrics.CLUSTER_COST.last());
- addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.last());
- addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.last());
- addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.last());
+ addMetric(metrics, ConfigServerMetrics.NODES_ACTIVE.max());
+ addMetric(metrics, ConfigServerMetrics.NODES_NON_ACTIVE_FRACTION.max());
+ addMetric(metrics, ConfigServerMetrics.CLUSTER_COST.max());
+ addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.max());
+ addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.max());
+ addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.max());
addMetric(metrics, ConfigServerMetrics.WANT_TO_REBOOT.max());
addMetric(metrics, ConfigServerMetrics.WANT_TO_RESTART.max());
addMetric(metrics, ConfigServerMetrics.WANT_TO_RETIRE.max());
addMetric(metrics, ConfigServerMetrics.RETIRED.max());
addMetric(metrics, ConfigServerMetrics.WANT_TO_CHANGE_VESPA_VERSION.max());
- addMetric(metrics, ConfigServerMetrics.HAS_WIRE_GUARD_KEY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ConfigServerMetrics.HAS_WIRE_GUARD_KEY.max());
addMetric(metrics, ConfigServerMetrics.WANT_TO_DEPROVISION.max());
- addMetric(metrics, ConfigServerMetrics.SUSPENDED, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ConfigServerMetrics.SUSPENDED.max());
addMetric(metrics, ConfigServerMetrics.SOME_SERVICES_DOWN.max());
- addMetric(metrics, ConfigServerMetrics.NODE_FAILER_BAD_NODE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
+ addMetric(metrics, ConfigServerMetrics.NODE_FAILER_BAD_NODE.max());
addMetric(metrics, ConfigServerMetrics.LOCK_ATTEMPT_LOCKED_LOAD, EnumSet.of(max,average));
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_CPU, EnumSet.of(max, average, last)); // TODO: Vespa 9: Remove last, average?
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_MEM, EnumSet.of(max, average, last)); // TODO: Vespa 9: Remove last, average?
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_DISK, EnumSet.of(max, average, last)); // TODO: Vespa 9: Remove last, average?
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_CPU.max());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_MEM.max());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_DISK.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_CPU.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_MEM.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_DISK.max());
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_CPU, EnumSet.of(max,average));
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_DISK, EnumSet.of(max,average));
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_MEM, EnumSet.of(max,average));
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_SKEW, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PENDING_REDEPLOYMENTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_CPU.max());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_DISK.max());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_MEM.max());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_SKEW.max());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PENDING_REDEPLOYMENTS.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_ACTIVE_HOSTS.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DIRTY_HOSTS.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_FAILED_HOSTS.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_INACTIVE_HOSTS.max());
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PROVISIONED_HOSTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PROVISIONED_HOSTS.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_READY_HOSTS.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_RESERVED_HOSTS.max());
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_HOSTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_HOSTS.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_ACTIVE_NODES.max());
addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_FAILED_NODES.max());
- addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_NODES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_NODES.max());
addMetric(metrics, ConfigServerMetrics.RPC_SERVER_WORK_QUEUE_SIZE.average());
- addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_ACTIVATE_MILLIS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_PREPARE_MILLIS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_ACTIVATE_MILLIS.max());
+ addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_PREPARE_MILLIS.max());
addMetric(metrics, ConfigServerMetrics.LOCK_ATTEMPT_LOCKED_LOAD, EnumSet.of(max, average));
- addMetric(metrics, ConfigServerMetrics.MAINTENANCE_SUCCESS_FACTOR_DEVIATION, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ConfigServerMetrics.MAINTENANCE_SUCCESS_FACTOR_DEVIATION.max());
addMetric(metrics, ConfigServerMetrics.MAINTENANCE_DEPLOYMENT_FAILURE.count());
addMetric(metrics, ConfigServerMetrics.MAINTENANCE_DEPLOYMENT_TRANSIENT_FAILURE.count());
addMetric(metrics, ConfigServerMetrics.OVERCOMMITTED_HOSTS.max());
- addMetric(metrics, ConfigServerMetrics.SPARE_HOST_CAPACITY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
- addMetric(metrics, ConfigServerMetrics.THROTTLED_HOST_FAILURES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ConfigServerMetrics.THROTTLED_NODE_FAILURES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ConfigServerMetrics.NODE_FAIL_THROTTLING, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ConfigServerMetrics.SPARE_HOST_CAPACITY, EnumSet.of(min, max, last)); // TODO: Vespa 9: Remove last. WAIT
+ addMetric(metrics, ConfigServerMetrics.THROTTLED_HOST_FAILURES.max());
+ addMetric(metrics, ConfigServerMetrics.THROTTLED_NODE_FAILURES.max());
+ addMetric(metrics, ConfigServerMetrics.NODE_FAIL_THROTTLING.max());
addMetric(metrics, ConfigServerMetrics.ORCHESTRATOR_LOCK_ACQUIRE_SUCCESS.count());
addMetric(metrics, ConfigServerMetrics.ORCHESTRATOR_LOCK_ACQUIRE_TIMEOUT.count());
- addMetric(metrics, ConfigServerMetrics.ZONE_WORKING, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ConfigServerMetrics.ZONE_WORKING.max());
addMetric(metrics, ConfigServerMetrics.THROTTLED_HOST_PROVISIONING.max());
// Container metrics that should be stored for the config-server
@@ -128,11 +127,11 @@ public class InfrastructureMetricSet {
addMetric(metrics, ContainerMetrics.HTTP_STATUS_2XX.count());
addMetric(metrics, ContainerMetrics.HTTP_STATUS_4XX.count());
addMetric(metrics, ContainerMetrics.HTTP_STATUS_5XX.count());
- addMetric(metrics, ContainerMetrics.JDISC_GC_MS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
+ addMetric(metrics, ContainerMetrics.JDISC_GC_MS.max());
addMetric(metrics, ContainerMetrics.MEM_HEAP_USED.average());
addMetric(metrics, ContainerMetrics.SERVER_NUM_REQUESTS.count());
- addMetric(metrics, ContainerMetrics.SERVER_STARTED_MILLIS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
- addMetric(metrics, ContainerMetrics.SERVER_TOTAL_SUCCESSFUL_RESPONSE_LATENCY, EnumSet.of(max, sum, count, last)); // TODO: Vespa 9: Remove last.
+ addMetric(metrics, ContainerMetrics.SERVER_STARTED_MILLIS.max());
+ addMetric(metrics, ContainerMetrics.SERVER_TOTAL_SUCCESSFUL_RESPONSE_LATENCY.max());
return metrics;
}
@@ -141,42 +140,42 @@ public class InfrastructureMetricSet {
Set<Metric> metrics = new LinkedHashSet<>();
addMetric(metrics, ControllerMetrics.ATHENZ_REQUEST_ERROR.count());
- addMetric(metrics, ControllerMetrics.ARCHIVE_BUCKET_COUNT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.BILLING_TENANTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ControllerMetrics.ARCHIVE_BUCKET_COUNT.max());
+ addMetric(metrics, ControllerMetrics.BILLING_TENANTS.max());
addMetric(metrics, ControllerMetrics.DEPLOYMENT_ABORT.count());
- addMetric(metrics, ControllerMetrics.DEPLOYMENT_AVERAGE_DURATION, EnumSet.of(max, min, last)); // TODO: Vespa 9: Remove last.
+ addMetric(metrics, ControllerMetrics.DEPLOYMENT_AVERAGE_DURATION.max());
addMetric(metrics, ControllerMetrics.DEPLOYMENT_CONVERGENCE_FAILURE.count());
addMetric(metrics, ControllerMetrics.DEPLOYMENT_DEPLOYMENT_FAILURE.count());
addMetric(metrics, ControllerMetrics.DEPLOYMENT_ERROR.count());
- addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILING_UPGRADES, EnumSet.of(min, last)); // TODO: Vespa 9: Remove last.
- addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILURE_PERCENTAGE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
+ addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILING_UPGRADES.min());
+ addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILURE_PERCENTAGE.max());
addMetric(metrics, ControllerMetrics.DEPLOYMENT_NODE_COUNT_BY_OS_VERSION.max());
addMetric(metrics, ControllerMetrics.DEPLOYMENT_OS_CHANGE_DURATION.max());
addMetric(metrics, ControllerMetrics.DEPLOYMENT_START.count());
addMetric(metrics, ControllerMetrics.DEPLOYMENT_SUCCESS.count());
addMetric(metrics, ControllerMetrics.DEPLOYMENT_TEST_FAILURE.count());
- addMetric(metrics, ControllerMetrics.DEPLOYMENT_WARNINGS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last.
+ addMetric(metrics, ControllerMetrics.DEPLOYMENT_WARNINGS.max());
addMetric(metrics, ControllerMetrics.DEPLOYMENT_ENDPOINT_CERTIFICATE_TIMEOUT.count());
- addMetric(metrics, ControllerMetrics.DEPLOYMENT_BROKEN_SYSTEM_VERSION, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
-
- addMetric(metrics, ControllerMetrics.OPERATION_APPLICATION, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.OPERATION_CHANGEMANAGEMENT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.OPERATION_CONFIGSERVER, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.OPERATION_CONTROLLER, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.OPERATION_FLAGS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.OPERATION_OS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.OPERATION_ROUTING, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.OPERATION_ZONE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
-
- addMetric(metrics, ControllerMetrics.REMAINING_ROTATIONS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.DNS_QUEUED_REQUESTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.ZMS_QUOTA_USAGE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ControllerMetrics.DEPLOYMENT_BROKEN_SYSTEM_VERSION.max());
+
+ addMetric(metrics, ControllerMetrics.OPERATION_APPLICATION.max());
+ addMetric(metrics, ControllerMetrics.OPERATION_CHANGEMANAGEMENT.max());
+ addMetric(metrics, ControllerMetrics.OPERATION_CONFIGSERVER.max());
+ addMetric(metrics, ControllerMetrics.OPERATION_CONTROLLER.max());
+ addMetric(metrics, ControllerMetrics.OPERATION_FLAGS.max());
+ addMetric(metrics, ControllerMetrics.OPERATION_OS.max());
+ addMetric(metrics, ControllerMetrics.OPERATION_ROUTING.max());
+ addMetric(metrics, ControllerMetrics.OPERATION_ZONE.max());
+
+ addMetric(metrics, ControllerMetrics.REMAINING_ROTATIONS, EnumSet.of(min, max, last)); // TODO: Vespa 9: Remove last WAIT
+ addMetric(metrics, ControllerMetrics.DNS_QUEUED_REQUESTS.max());
+ addMetric(metrics, ControllerMetrics.ZMS_QUOTA_USAGE.max());
addMetric(metrics, ControllerMetrics.COREDUMP_PROCESSED.count());
addMetric(metrics, ControllerMetrics.AUTH0_EXCEPTIONS.count());
- addMetric(metrics, ControllerMetrics.METERING_AGE_SECONDS, EnumSet.of(min, last)); // TODO: Vespa 9: Remove last
- addMetric(metrics, ControllerMetrics.METERING_LAST_REPORTED, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last
+ addMetric(metrics, ControllerMetrics.METERING_AGE_SECONDS.min());
+ addMetric(metrics, ControllerMetrics.METERING_LAST_REPORTED.max());
return metrics;
}