summaryrefslogtreecommitdiffstats
path: root/metrics
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@vespa.ai>2023-05-18 16:11:53 +0200
committerJon Bratseth <bratseth@vespa.ai>2023-05-18 16:11:53 +0200
commitff426d5cf4d02a259719770a360cce664087503d (patch)
tree876d62bba13f3db7cfcead2afff6b1a33d5440a8 /metrics
parent3a88b880f0b6323959dafeeb8e0076a7f515e311 (diff)
Add cluster load metrics
Diffstat (limited to 'metrics')
-rw-r--r--metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java8
-rw-r--r--metrics/src/main/java/ai/vespa/metrics/Unit.java2
2 files changed, 8 insertions, 2 deletions
diff --git a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java
index 013c50e77cf..d323026e4ca 100644
--- a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java
+++ b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java
@@ -27,7 +27,6 @@ public enum ConfigServerMetrics implements VespaMetrics {
MAINTENANCE_DEPLOYMENT_TRANSIENT_FAILURE("maintenanceDeployment.transientFailure", Unit.OPERATION, "Number of maintenance deployments that failed with a transient failure"),
MAINTENANCE_DEPLOYMENT_FAILURE("maintenanceDeployment.failure", Unit.OPERATION, "Number of maintenance deployments that failed with a permanent failure"),
-
// ZooKeeper related metrics
ZK_CONNECTIONS_LOST("configserver.zkConnectionLost", Unit.CONNECTION, "Number of ZooKeeper connections lost"),
ZK_RECONNECTED("configserver.zkReconnected", Unit.CONNECTION, "Number of ZooKeeper reconnections"),
@@ -45,8 +44,13 @@ public enum ConfigServerMetrics implements VespaMetrics {
ORCHESTRATOR_LOCK_ACQUIRE_TIMEOUT("orchestrator.lock.acquire-timedout", Unit.OPERATION, "Number of times zookeeper lock couldn't be acquired within timeout"),
ORCHESTRATOR_LOCK_ACQUIRE("orchestrator.lock.acquire", Unit.OPERATION, "Number of attempts to acquire zookeeper lock"),
ORCHESTRATOR_LOCK_ACQUIRED("orchestrator.lock.acquired", Unit.OPERATION, "Number of times zookeeper lock was acquired"),
- ORCHESTRATOR_LOCK_HOLD_LATENCY("orchestrator.lock.hold-latency", Unit.SECOND, "Time zookeeper lock was held before it was released");
+ ORCHESTRATOR_LOCK_HOLD_LATENCY("orchestrator.lock.hold-latency", Unit.SECOND, "Time zookeeper lock was held before it was released"),
+ // Node repository metrics
+ CLUSTER_COST("cluster.cost", Unit.DOLLAR_PER_HOUR, "The cost of the nodes allocated to a certain cluster, in $/hr"),
+ CLUSTER_LOAD_IDEAL_CPU("cluster.load.ideal.cpu", Unit.FRACTION, "The ideal cpu load of a certain cluster"),
+ CLUSTER_LOAD_IDEAL_MEMORY("cluster.load.ideal.memory", Unit.FRACTION, "The ideal memory load of a certain cluster"),
+ CLUSTER_LOAD_IDEAL_DISK("cluster.load.ideal.disk", Unit.FRACTION, "The ideal disk load of a certain cluster");
private final String name;
private final Unit unit;
diff --git a/metrics/src/main/java/ai/vespa/metrics/Unit.java b/metrics/src/main/java/ai/vespa/metrics/Unit.java
index a2123d72246..d5769707b76 100644
--- a/metrics/src/main/java/ai/vespa/metrics/Unit.java
+++ b/metrics/src/main/java/ai/vespa/metrics/Unit.java
@@ -12,6 +12,7 @@ public enum Unit {
CONNECTION(BaseUnit.CONNECTION, "A link used for communication between a client and a server"),
DOCUMENT(BaseUnit.DOCUMENT, "Vespa document, a collection of fields defined in a schema file"),
DOCUMENTID(BaseUnit.DOCUMENTID, "A unique document identifier"),
+ DOLLAR_PER_HOUR(BaseUnit.MAGNITUDE, "$/hr"),
FAILURE(BaseUnit.FAILURE, "Failures, typically for requests, operations or nodes"),
FILE(BaseUnit.FILE, "Data file stored on the disk on a node"),
FRACTION(BaseUnit.FRACTION, "A value in the range [0..1]. Higher values can occur for some metrics, but would indicate the value is outside of the allowed range."),
@@ -86,6 +87,7 @@ public enum Unit {
HIT("hit"),
INSTANCE("instance"),
ITEM("item"),
+ MAGNITUDE("magnitude"),
MILLISECOND("millisecond", "ms"),
NANOSECOND("nanosecond", "ns"),
NODE("node"),