diff options
author | yngveaasheim <yngve@yahooinc.com> | 2023-08-31 10:31:30 +0200 |
---|---|---|
committer | yngveaasheim <yngve@yahooinc.com> | 2023-08-31 10:31:30 +0200 |
commit | 5f17c0959f49847debbc86327feb81169f011782 (patch) | |
tree | f43d91bd9b1f48bbfcf43c299842bfed9a90827f /metrics | |
parent | 2d3a1be956b24f3eda343bddcecea6b418f4cd7c (diff) |
Garbage collect infrastructure metrics for Vespa 9
Diffstat (limited to 'metrics')
-rw-r--r-- | metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java | 115 |
1 files changed, 57 insertions, 58 deletions
diff --git a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java index 38533f40950..2aeb2199def 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java @@ -52,74 +52,73 @@ public class InfrastructureMetricSet { addMetric(metrics, ConfigServerMetrics.DELAYED_RESPONSES.count()); addMetric(metrics, ConfigServerMetrics.SESSION_CHANGE_ERRORS.count()); - addMetric(metrics, ConfigServerMetrics.ZK_Z_NODES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. - addMetric(metrics, ConfigServerMetrics.ZK_AVG_LATENCY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. - addMetric(metrics, ConfigServerMetrics.ZK_MAX_LATENCY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. - addMetric(metrics, ConfigServerMetrics.ZK_CONNECTIONS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ConfigServerMetrics.ZK_Z_NODES.max()); + addMetric(metrics, ConfigServerMetrics.ZK_MAX_LATENCY, EnumSet.of(max, average)); + addMetric(metrics, ConfigServerMetrics.ZK_CONNECTIONS.max()); addMetric(metrics, ConfigServerMetrics.ZK_CONNECTION_LOST.count()); addMetric(metrics, ConfigServerMetrics.ZK_RECONNECTED.count()); addMetric(metrics, ConfigServerMetrics.ZK_SUSPENDED.count()); - addMetric(metrics, ConfigServerMetrics.ZK_OUTSTANDING_REQUESTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ConfigServerMetrics.ZK_OUTSTANDING_REQUESTS.max()); // Node repository metrics - addMetric(metrics, ConfigServerMetrics.NODES_ACTIVE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ConfigServerMetrics.NODES_NON_ACTIVE_FRACTION, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ConfigServerMetrics.CLUSTER_COST.last()); - addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.last()); - addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.last()); - addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.last()); + addMetric(metrics, ConfigServerMetrics.NODES_ACTIVE.max()); + addMetric(metrics, ConfigServerMetrics.NODES_NON_ACTIVE_FRACTION.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_COST.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.max()); addMetric(metrics, ConfigServerMetrics.WANT_TO_REBOOT.max()); addMetric(metrics, ConfigServerMetrics.WANT_TO_RESTART.max()); addMetric(metrics, ConfigServerMetrics.WANT_TO_RETIRE.max()); addMetric(metrics, ConfigServerMetrics.RETIRED.max()); addMetric(metrics, ConfigServerMetrics.WANT_TO_CHANGE_VESPA_VERSION.max()); - addMetric(metrics, ConfigServerMetrics.HAS_WIRE_GUARD_KEY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.HAS_WIRE_GUARD_KEY.max()); addMetric(metrics, ConfigServerMetrics.WANT_TO_DEPROVISION.max()); - addMetric(metrics, ConfigServerMetrics.SUSPENDED, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.SUSPENDED.max(); addMetric(metrics, ConfigServerMetrics.SOME_SERVICES_DOWN.max()); - addMetric(metrics, ConfigServerMetrics.NODE_FAILER_BAD_NODE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ConfigServerMetrics.NODE_FAILER_BAD_NODE.max()); addMetric(metrics, ConfigServerMetrics.LOCK_ATTEMPT_LOCKED_LOAD, EnumSet.of(max,average)); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_CPU, EnumSet.of(max, average, last)); // TODO: Vespa 9: Remove last, average? - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_MEM, EnumSet.of(max, average, last)); // TODO: Vespa 9: Remove last, average? - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_DISK, EnumSet.of(max, average, last)); // TODO: Vespa 9: Remove last, average? + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_CPU, EnumSet.of(max, sum)); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_MEM, EnumSet.of(max, sum)); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_DISK, EnumSet.of(max, sum)); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_CPU.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_MEM.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_DISK.max()); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_CPU, EnumSet.of(max,average)); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_DISK, EnumSet.of(max,average)); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_MEM, EnumSet.of(max,average)); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_SKEW, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PENDING_REDEPLOYMENTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_CPU, EnumSet.of(max,sum)); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_DISK, EnumSet.of(max,sum)); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_MEM, EnumSet.of(max,sum)); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_SKEW.max()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PENDING_REDEPLOYMENTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_ACTIVE_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DIRTY_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_FAILED_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_INACTIVE_HOSTS.max()); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PROVISIONED_HOSTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PROVISIONED_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_READY_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_RESERVED_HOSTS.max()); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_HOSTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_HOSTS.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_ACTIVE_NODES.max()); addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_FAILED_NODES.max()); - addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_NODES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PARKED_NODES.max()); addMetric(metrics, ConfigServerMetrics.RPC_SERVER_WORK_QUEUE_SIZE.average()); - addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_ACTIVATE_MILLIS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_PREPARE_MILLIS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_ACTIVATE_MILLIS.max()); + addMetric(metrics, ConfigServerMetrics.DEPLOYMENT_PREPARE_MILLIS.max()); addMetric(metrics, ConfigServerMetrics.LOCK_ATTEMPT_LOCKED_LOAD, EnumSet.of(max, average)); - addMetric(metrics, ConfigServerMetrics.MAINTENANCE_SUCCESS_FACTOR_DEVIATION, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.MAINTENANCE_SUCCESS_FACTOR_DEVIATION.max(); addMetric(metrics, ConfigServerMetrics.MAINTENANCE_DEPLOYMENT_FAILURE.count()); addMetric(metrics, ConfigServerMetrics.MAINTENANCE_DEPLOYMENT_TRANSIENT_FAILURE.count()); addMetric(metrics, ConfigServerMetrics.OVERCOMMITTED_HOSTS.max()); - addMetric(metrics, ConfigServerMetrics.SPARE_HOST_CAPACITY, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. - addMetric(metrics, ConfigServerMetrics.THROTTLED_HOST_FAILURES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ConfigServerMetrics.THROTTLED_NODE_FAILURES, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ConfigServerMetrics.NODE_FAIL_THROTTLING, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.SPARE_HOST_CAPACITY, EnumSet.of(min, max, last)); // TODO: Vespa 9: Remove last. WAIT + addMetric(metrics, ConfigServerMetrics.THROTTLED_HOST_FAILURES.max()); + addMetric(metrics, ConfigServerMetrics.THROTTLED_NODE_FAILURES.max()); + addMetric(metrics, ConfigServerMetrics.NODE_FAIL_THROTTLING.max()); addMetric(metrics, ConfigServerMetrics.ORCHESTRATOR_LOCK_ACQUIRE_SUCCESS.count()); addMetric(metrics, ConfigServerMetrics.ORCHESTRATOR_LOCK_ACQUIRE_TIMEOUT.count()); - addMetric(metrics, ConfigServerMetrics.ZONE_WORKING, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ConfigServerMetrics.ZONE_WORKING.max()); addMetric(metrics, ConfigServerMetrics.THROTTLED_HOST_PROVISIONING.max()); // Container metrics that should be stored for the config-server @@ -128,11 +127,11 @@ public class InfrastructureMetricSet { addMetric(metrics, ContainerMetrics.HTTP_STATUS_2XX.count()); addMetric(metrics, ContainerMetrics.HTTP_STATUS_4XX.count()); addMetric(metrics, ContainerMetrics.HTTP_STATUS_5XX.count()); - addMetric(metrics, ContainerMetrics.JDISC_GC_MS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ContainerMetrics.JDISC_GC_MS.max()); addMetric(metrics, ContainerMetrics.MEM_HEAP_USED.average()); addMetric(metrics, ContainerMetrics.SERVER_NUM_REQUESTS.count()); - addMetric(metrics, ContainerMetrics.SERVER_STARTED_MILLIS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. - addMetric(metrics, ContainerMetrics.SERVER_TOTAL_SUCCESSFUL_RESPONSE_LATENCY, EnumSet.of(max, sum, count, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ContainerMetrics.SERVER_STARTED_MILLIS.max()); + addMetric(metrics, ContainerMetrics.SERVER_TOTAL_SUCCESSFUL_RESPONSE_LATENCY.max()); return metrics; } @@ -141,42 +140,42 @@ public class InfrastructureMetricSet { Set<Metric> metrics = new LinkedHashSet<>(); addMetric(metrics, ControllerMetrics.ATHENZ_REQUEST_ERROR.count()); - addMetric(metrics, ControllerMetrics.ARCHIVE_BUCKET_COUNT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.BILLING_TENANTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ControllerMetrics.ARCHIVE_BUCKET_COUNT.max()); + addMetric(metrics, ControllerMetrics.BILLING_TENANTS.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_ABORT.count()); - addMetric(metrics, ControllerMetrics.DEPLOYMENT_AVERAGE_DURATION, EnumSet.of(max, min, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ControllerMetrics.DEPLOYMENT_AVERAGE_DURATION.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_CONVERGENCE_FAILURE.count()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_DEPLOYMENT_FAILURE.count()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_ERROR.count()); - addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILING_UPGRADES, EnumSet.of(min, last)); // TODO: Vespa 9: Remove last. - addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILURE_PERCENTAGE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILING_UPGRADES.min()); + addMetric(metrics, ControllerMetrics.DEPLOYMENT_FAILURE_PERCENTAGE.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_NODE_COUNT_BY_OS_VERSION.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_OS_CHANGE_DURATION.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_START.count()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_SUCCESS.count()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_TEST_FAILURE.count()); - addMetric(metrics, ControllerMetrics.DEPLOYMENT_WARNINGS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last. + addMetric(metrics, ControllerMetrics.DEPLOYMENT_WARNINGS.max()); addMetric(metrics, ControllerMetrics.DEPLOYMENT_ENDPOINT_CERTIFICATE_TIMEOUT.count()); - addMetric(metrics, ControllerMetrics.DEPLOYMENT_BROKEN_SYSTEM_VERSION, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - - addMetric(metrics, ControllerMetrics.OPERATION_APPLICATION, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.OPERATION_CHANGEMANAGEMENT, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.OPERATION_CONFIGSERVER, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.OPERATION_CONTROLLER, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.OPERATION_FLAGS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.OPERATION_OS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.OPERATION_ROUTING, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.OPERATION_ZONE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - - addMetric(metrics, ControllerMetrics.REMAINING_ROTATIONS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.DNS_QUEUED_REQUESTS, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.ZMS_QUOTA_USAGE, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ControllerMetrics.DEPLOYMENT_BROKEN_SYSTEM_VERSION.max()); + + addMetric(metrics, ControllerMetrics.OPERATION_APPLICATION.max()); + addMetric(metrics, ControllerMetrics.OPERATION_CHANGEMANAGEMENT.max()); + addMetric(metrics, ControllerMetrics.OPERATION_CONFIGSERVER.max()); + addMetric(metrics, ControllerMetrics.OPERATION_CONTROLLER.max()); + addMetric(metrics, ControllerMetrics.OPERATION_FLAGS.max()); + addMetric(metrics, ControllerMetrics.OPERATION_OS.max()); + addMetric(metrics, ControllerMetrics.OPERATION_ROUTING.max()); + addMetric(metrics, ControllerMetrics.OPERATION_ZONE.max()); + + addMetric(metrics, ControllerMetrics.REMAINING_ROTATIONS, EnumSet.of(min, max, last)); // TODO: Vespa 9: Remove last WAIT + addMetric(metrics, ControllerMetrics.DNS_QUEUED_REQUESTS.max()); + addMetric(metrics, ControllerMetrics.ZMS_QUOTA_USAGE.max()); addMetric(metrics, ControllerMetrics.COREDUMP_PROCESSED.count()); addMetric(metrics, ControllerMetrics.AUTH0_EXCEPTIONS.count()); - addMetric(metrics, ControllerMetrics.METERING_AGE_SECONDS, EnumSet.of(min, last)); // TODO: Vespa 9: Remove last - addMetric(metrics, ControllerMetrics.METERING_LAST_REPORTED, EnumSet.of(max, last)); // TODO: Vespa 9: Remove last + addMetric(metrics, ControllerMetrics.METERING_AGE_SECONDS.min()); + addMetric(metrics, ControllerMetrics.METERING_LAST_REPORTED.max()); return metrics; } |