From e33ac17c1b7683a514f72556d75d7593a1b11bff Mon Sep 17 00:00:00 2001 From: bjormel Date: Tue, 11 Jun 2024 15:36:17 +0000 Subject: peak load metrics --- metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java | 3 +++ .../src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java | 3 +++ .../com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java | 3 +++ 3 files changed, 9 insertions(+) diff --git a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java index c413dc5e7d7..119f56a613d 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java @@ -66,6 +66,9 @@ public enum ConfigServerMetrics implements VespaMetrics { CLUSTER_LOAD_IDEAL_CPU("cluster.load.ideal.cpu", Unit.FRACTION, "The ideal cpu load of a certain cluster"), CLUSTER_LOAD_IDEAL_MEMORY("cluster.load.ideal.memory", Unit.FRACTION, "The ideal memory load of a certain cluster"), CLUSTER_LOAD_IDEAL_DISK("cluster.load.ideal.disk", Unit.FRACTION, "The ideal disk load of a certain cluster"), + CLUSTER_LOAD_PEAK_CPU("cluster.load.peak.cpu", Unit.FRACTION, "The peak cpu load in the period considered of a certain cluster"), + CLUSTER_LOAD_PEAK_MEMORY("cluster.load.peak.memory", Unit.FRACTION, "The peak memory load in the period considered of a certain cluster"), + CLUSTER_LOAD_PEAK_DISK("cluster.load.peak.disk", Unit.FRACTION, "The peak disk load in the period considered of a certain cluster"), ZONE_WORKING("zone.working", Unit.BINARY, "The value 1 if zone is considered healthy, 0 if not. This is decided by considering the number of non-active nodes vs the number of active nodes in a zone"), CACHE_NODE_OBJECT_HIT_RATE("cache.nodeObject.hitRate", Unit.FRACTION, "The fraction of cache hits vs cache lookups for the node object cache"), diff --git a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java index 2b9bc0b6422..ce3cdda7a11 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java @@ -67,6 +67,9 @@ public class InfrastructureMetricSet { addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.max()); addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.max()); addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_PEAK_CPU.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_PEAK_MEMORY.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_PEAK_DISK.max()); addMetric(metrics, ConfigServerMetrics.NODES_EMPTY_EXCLUSIVE.max()); addMetric(metrics, ConfigServerMetrics.NODES_EXPIRED_DEPROVISIONED.count()); addMetric(metrics, ConfigServerMetrics.NODES_EXPIRED_DIRTY.count()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index a2cb27246c6..cf7e0cf8fa3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -143,6 +143,9 @@ public class MetricsReporter extends NodeRepositoryMaintainer { metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.baseName(), cluster.get().target().ideal().cpu(), context); metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.baseName(), cluster.get().target().ideal().memory(), context); metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.baseName(), cluster.get().target().ideal().disk(), context); + metric.set(ConfigServerMetrics.CLUSTER_LOAD_PEAK_CPU.baseName(), cluster.get().target().peak().cpu(), context); + metric.set(ConfigServerMetrics.CLUSTER_LOAD_PEAK_MEMORY.baseName(), cluster.get().target().peak().memory(), context); + metric.set(ConfigServerMetrics.CLUSTER_LOAD_PEAK_DISK.baseName(), cluster.get().target().peak().disk(), context); } private void updateZoneMetrics() { -- cgit v1.2.3