diff options
author | Jon Bratseth <bratseth@vespa.ai> | 2023-05-18 16:11:53 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@vespa.ai> | 2023-05-18 16:11:53 +0200 |
commit | ff426d5cf4d02a259719770a360cce664087503d (patch) | |
tree | 876d62bba13f3db7cfcead2afff6b1a33d5440a8 /metrics/src | |
parent | 3a88b880f0b6323959dafeeb8e0076a7f515e311 (diff) |
Add cluster load metrics
Diffstat (limited to 'metrics/src')
-rw-r--r-- | metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java | 8 | ||||
-rw-r--r-- | metrics/src/main/java/ai/vespa/metrics/Unit.java | 2 |
2 files changed, 8 insertions, 2 deletions
diff --git a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java index 013c50e77cf..d323026e4ca 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java @@ -27,7 +27,6 @@ public enum ConfigServerMetrics implements VespaMetrics { MAINTENANCE_DEPLOYMENT_TRANSIENT_FAILURE("maintenanceDeployment.transientFailure", Unit.OPERATION, "Number of maintenance deployments that failed with a transient failure"), MAINTENANCE_DEPLOYMENT_FAILURE("maintenanceDeployment.failure", Unit.OPERATION, "Number of maintenance deployments that failed with a permanent failure"), - // ZooKeeper related metrics ZK_CONNECTIONS_LOST("configserver.zkConnectionLost", Unit.CONNECTION, "Number of ZooKeeper connections lost"), ZK_RECONNECTED("configserver.zkReconnected", Unit.CONNECTION, "Number of ZooKeeper reconnections"), @@ -45,8 +44,13 @@ public enum ConfigServerMetrics implements VespaMetrics { ORCHESTRATOR_LOCK_ACQUIRE_TIMEOUT("orchestrator.lock.acquire-timedout", Unit.OPERATION, "Number of times zookeeper lock couldn't be acquired within timeout"), ORCHESTRATOR_LOCK_ACQUIRE("orchestrator.lock.acquire", Unit.OPERATION, "Number of attempts to acquire zookeeper lock"), ORCHESTRATOR_LOCK_ACQUIRED("orchestrator.lock.acquired", Unit.OPERATION, "Number of times zookeeper lock was acquired"), - ORCHESTRATOR_LOCK_HOLD_LATENCY("orchestrator.lock.hold-latency", Unit.SECOND, "Time zookeeper lock was held before it was released"); + ORCHESTRATOR_LOCK_HOLD_LATENCY("orchestrator.lock.hold-latency", Unit.SECOND, "Time zookeeper lock was held before it was released"), + // Node repository metrics + CLUSTER_COST("cluster.cost", Unit.DOLLAR_PER_HOUR, "The cost of the nodes allocated to a certain cluster, in $/hr"), + CLUSTER_LOAD_IDEAL_CPU("cluster.load.ideal.cpu", Unit.FRACTION, "The ideal cpu load of a certain cluster"), + CLUSTER_LOAD_IDEAL_MEMORY("cluster.load.ideal.memory", Unit.FRACTION, "The ideal memory load of a certain cluster"), + CLUSTER_LOAD_IDEAL_DISK("cluster.load.ideal.disk", Unit.FRACTION, "The ideal disk load of a certain cluster"); private final String name; private final Unit unit; diff --git a/metrics/src/main/java/ai/vespa/metrics/Unit.java b/metrics/src/main/java/ai/vespa/metrics/Unit.java index a2123d72246..d5769707b76 100644 --- a/metrics/src/main/java/ai/vespa/metrics/Unit.java +++ b/metrics/src/main/java/ai/vespa/metrics/Unit.java @@ -12,6 +12,7 @@ public enum Unit { CONNECTION(BaseUnit.CONNECTION, "A link used for communication between a client and a server"), DOCUMENT(BaseUnit.DOCUMENT, "Vespa document, a collection of fields defined in a schema file"), DOCUMENTID(BaseUnit.DOCUMENTID, "A unique document identifier"), + DOLLAR_PER_HOUR(BaseUnit.MAGNITUDE, "$/hr"), FAILURE(BaseUnit.FAILURE, "Failures, typically for requests, operations or nodes"), FILE(BaseUnit.FILE, "Data file stored on the disk on a node"), FRACTION(BaseUnit.FRACTION, "A value in the range [0..1]. Higher values can occur for some metrics, but would indicate the value is outside of the allowed range."), @@ -86,6 +87,7 @@ public enum Unit { HIT("hit"), INSTANCE("instance"), ITEM("item"), + MAGNITUDE("magnitude"), MILLISECOND("millisecond", "ms"), NANOSECOND("nanosecond", "ns"), NODE("node"), |