diff options
author | Kristian Aune <kkraune@users.noreply.github.com> | 2023-03-09 12:53:22 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-09 12:53:22 +0100 |
commit | 5f4bf637a8ede90fa66ecc18ed2864c0c826f80b (patch) | |
tree | 30072369d9bf5b26bc723ed44a93b5f338eb4fc4 | |
parent | f91cb63554a8eab5bb3401177d2634b238fd957b (diff) | |
parent | e9e863cb10fe68cf63c0f753b315d51745aea237 (diff) |
Merge pull request #26377 from vespa-engine/yngveaasheim/add-enums-for-all-storage-metrics
Add enums for all storage and distributor metrics.
-rw-r--r-- | container-core/src/main/java/com/yahoo/metrics/DistributorMetrics.java | 142 | ||||
-rw-r--r-- | container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java | 163 |
2 files changed, 291 insertions, 14 deletions
diff --git a/container-core/src/main/java/com/yahoo/metrics/DistributorMetrics.java b/container-core/src/main/java/com/yahoo/metrics/DistributorMetrics.java index 7a88607c7f6..82e9aff74a8 100644 --- a/container-core/src/main/java/com/yahoo/metrics/DistributorMetrics.java +++ b/container-core/src/main/java/com/yahoo/metrics/DistributorMetrics.java @@ -21,6 +21,8 @@ public enum DistributorMetrics implements VespaMetrics { VDS_IDEALSTATE_DELETE_BUCKET_DONE_OK("vds.idealstate.delete_bucket.done_ok", Unit.OPERATION, "The number of operations successfully performed"), VDS_IDEALSTATE_DELETE_BUCKET_DONE_FAILED("vds.idealstate.delete_bucket.done_failed", Unit.OPERATION, "The number of operations that failed"), VDS_IDEALSTATE_DELETE_BUCKET_PENDING("vds.idealstate.delete_bucket.pending", Unit.OPERATION, "The number of operations pending"), + VDS_IDEALSTATE_DELETE_BUCKET_BLOCKED("vds.idealstate.delete_bucket.blocked", Unit.OPERATION, "The number of operations blocked by blocking operation starter"), + VDS_IDEALSTATE_DELETE_BUCKET_THROTTLED("vds.idealstate.delete_bucket.throttled", Unit.OPERATION, "The number of operations throttled by throttling operation starter"), VDS_IDEALSTATE_MERGE_BUCKET_DONE_OK("vds.idealstate.merge_bucket.done_ok", Unit.OPERATION, "The number of operations successfully performed"), VDS_IDEALSTATE_MERGE_BUCKET_DONE_FAILED("vds.idealstate.merge_bucket.done_failed", Unit.OPERATION, "The number of operations that failed"), VDS_IDEALSTATE_MERGE_BUCKET_PENDING("vds.idealstate.merge_bucket.pending", Unit.OPERATION, "The number of operations pending"), @@ -32,13 +34,19 @@ public enum DistributorMetrics implements VespaMetrics { VDS_IDEALSTATE_SPLIT_BUCKET_DONE_OK("vds.idealstate.split_bucket.done_ok", Unit.OPERATION, "The number of operations successfully performed"), VDS_IDEALSTATE_SPLIT_BUCKET_DONE_FAILED("vds.idealstate.split_bucket.done_failed", Unit.OPERATION, "The number of operations that failed"), VDS_IDEALSTATE_SPLIT_BUCKET_PENDING("vds.idealstate.split_bucket.pending", Unit.OPERATION, "The number of operations pending"), + VDS_IDEALSTATE_SPLIT_BUCKET_BLOCKED("vds.idealstate.split_bucket.blocked", Unit.OPERATION, "The number of operations blocked by blocking operation starter"), + VDS_IDEALSTATE_SPLIT_BUCKET_THROTTLED("vds.idealstate.split_bucket.throttled", Unit.OPERATION, "The number of operations throttled by throttling operation starter"), VDS_IDEALSTATE_JOIN_BUCKET_DONE_OK("vds.idealstate.join_bucket.done_ok", Unit.OPERATION, "The number of operations successfully performed"), VDS_IDEALSTATE_JOIN_BUCKET_DONE_FAILED("vds.idealstate.join_bucket.done_failed", Unit.OPERATION, "The number of operations that failed"), VDS_IDEALSTATE_JOIN_BUCKET_PENDING("vds.idealstate.join_bucket.pending", Unit.OPERATION, "The number of operations pending"), + VDS_IDEALSTATE_JOIN_BUCKET_BLOCKED("vds.idealstate.join_bucket.blocked", Unit.OPERATION, "The number of operations blocked by blocking operation starter"), + VDS_IDEALSTATE_JOIN_BUCKET_THROTTLED("vds.idealstate.join_bucket.throttled", Unit.OPERATION, "The number of operations throttled by throttling operation starter"), VDS_IDEALSTATE_GARBAGE_COLLECTION_DONE_OK("vds.idealstate.garbage_collection.done_ok", Unit.OPERATION, "The number of operations successfully performed"), VDS_IDEALSTATE_GARBAGE_COLLECTION_DONE_FAILED("vds.idealstate.garbage_collection.done_failed", Unit.OPERATION, "The number of operations that failed"), VDS_IDEALSTATE_GARBAGE_COLLECTION_PENDING("vds.idealstate.garbage_collection.pending", Unit.OPERATION, "The number of operations pending"), VDS_IDEALSTATE_GARBAGE_COLLECTION_DOCUMENTS_REMOVED("vds.idealstate.garbage_collection.documents_removed", Unit.DOCUMENT, "Number of documents removed by GC operations"), + VDS_IDEALSTATE_GARBAGE_COLLECTION_BLOCKED("vds.idealstate.garbage_collection.blocked", Unit.OPERATION, "The number of operations blocked by blocking operation starter"), + VDS_IDEALSTATE_GARBAGE_COLLECTION_THROTTLED("vds.idealstate.garbage_collection.throttled", Unit.OPERATION, "The number of operations throttled by throttling operation starter"), VDS_DISTRIBUTOR_PUTS_LATENCY("vds.distributor.puts.latency", Unit.MILLISECOND, "The latency of put operations"), VDS_DISTRIBUTOR_PUTS_OK("vds.distributor.puts.ok", Unit.OPERATION, "The number of successful put operations performed"), @@ -60,6 +68,14 @@ public enum DistributorMetrics implements VespaMetrics { VDS_DISTRIBUTOR_REMOVES_FAILURES_NOTFOUND("vds.distributor.removes.failures.notfound", Unit.OPERATION, "The number of operations that failed because the document did not exist"), VDS_DISTRIBUTOR_REMOVES_FAILURES_TEST_AND_SET_FAILED("vds.distributor.removes.failures.test_and_set_failed", Unit.OPERATION, "The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document"), VDS_DISTRIBUTOR_REMOVES_FAILURES_CONCURRENT_MUTATIONS("vds.distributor.removes.failures.concurrent_mutations", Unit.OPERATION, "The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID"), + VDS_DISTRIBUTOR_REMOVES_FAILURES_BUSY("vds.distributor.removes.failures.busy", Unit.OPERATION, "The number of messages from storage that failed because the storage node was busy"), + VDS_DISTRIBUTOR_REMOVES_FAILURES_INCONSISTENT_BUCKET("vds.distributor.removes.failures.inconsistent_bucket", Unit.OPERATION, "The number of operations failed due to buckets being in an inconsistent state or not found"), + VDS_DISTRIBUTOR_REMOVES_FAILURES_NOTCONNECTED("vds.distributor.removes.failures.notconnected", Unit.OPERATION, "The number of operations discarded because there were no available storage nodes to send to"), + VDS_DISTRIBUTOR_REMOVES_FAILURES_NOTREADY("vds.distributor.removes.failures.notready", Unit.OPERATION, "The number of operations discarded because distributor was not ready"), + VDS_DISTRIBUTOR_REMOVES_FAILURES_SAFE_TIME_NOT_REACHED("vds.distributor.removes.failures.safe_time_not_reached", Unit.OPERATION, "The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed"), + VDS_DISTRIBUTOR_REMOVES_FAILURES_STORAGEFAILURE("vds.distributor.removes.failures.storagefailure", Unit.OPERATION, "The number of operations that failed in storage"), + VDS_DISTRIBUTOR_REMOVES_FAILURES_TIMEOUT("vds.distributor.removes.failures.timeout", Unit.OPERATION, "The number of operations that failed because the operation timed out towards storage"), + VDS_DISTRIBUTOR_REMOVES_FAILURES_WRONGDISTRIBUTOR("vds.distributor.removes.failures.wrongdistributor", Unit.OPERATION, "The number of operations discarded because they were sent to the wrong distributor"), VDS_DISTRIBUTOR_UPDATES_LATENCY("vds.distributor.updates.latency", Unit.MILLISECOND, "The latency of update operations"), VDS_DISTRIBUTOR_UPDATES_OK("vds.distributor.updates.ok", Unit.OPERATION, "The number of successful updates operations performed"), VDS_DISTRIBUTOR_UPDATES_FAILURES_TOTAL("vds.distributor.updates.failures.total", Unit.OPERATION, "Sum of all failures"), @@ -67,12 +83,43 @@ public enum DistributorMetrics implements VespaMetrics { VDS_DISTRIBUTOR_UPDATES_FAILURES_TEST_AND_SET_FAILED("vds.distributor.updates.failures.test_and_set_failed", Unit.OPERATION, "The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document"), VDS_DISTRIBUTOR_UPDATES_FAILURES_CONCURRENT_MUTATIONS("vds.distributor.updates.failures.concurrent_mutations", Unit.OPERATION, "The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID"), VDS_DISTRIBUTOR_UPDATES_DIVERGING_TIMESTAMP_UPDATES("vds.distributor.updates.diverging_timestamp_updates", Unit.OPERATION, "Number of updates that report they were performed against divergent version timestamps on different replicas"), + VDS_DISTRIBUTOR_UPDATES_FAILURES_BUSY("vds.distributor.updates.failures.busy", Unit.OPERATION, "The number of messages from storage that failed because the storage node was busy"), + VDS_DISTRIBUTOR_UPDATES_FAILURES_INCONSISTENT_BUCKET("vds.distributor.updates.failures.inconsistent_bucket", Unit.OPERATION, "The number of operations failed due to buckets being in an inconsistent state or not found"), + VDS_DISTRIBUTOR_UPDATES_FAILURES_NOTCONNECTED("vds.distributor.updates.failures.notconnected", Unit.OPERATION, "The number of operations discarded because there were no available storage nodes to send to"), + VDS_DISTRIBUTOR_UPDATES_FAILURES_NOTREADY("vds.distributor.updates.failures.notready", Unit.OPERATION, "The number of operations discarded because distributor was not ready"), + VDS_DISTRIBUTOR_UPDATES_FAILURES_SAFE_TIME_NOT_REACHED("vds.distributor.updates.failures.safe_time_not_reached", Unit.OPERATION, "The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed"), + VDS_DISTRIBUTOR_UPDATES_FAILURES_STORAGEFAILURE("vds.distributor.updates.failures.storagefailure", Unit.OPERATION, "The number of operations that failed in storage"), + VDS_DISTRIBUTOR_UPDATES_FAILURES_TIMEOUT("vds.distributor.updates.failures.timeout", Unit.OPERATION, "The number of operations that failed because the operation timed out towards storage"), + VDS_DISTRIBUTOR_UPDATES_FAILURES_WRONGDISTRIBUTOR("vds.distributor.updates.failures.wrongdistributor", Unit.OPERATION, "The number of operations discarded because they were sent to the wrong distributor"), + VDS_DISTRIBUTOR_UPDATES_FAST_PATH_RESTARTS("vds.distributor.updates.fast_path_restarts", Unit.OPERATION, "Number of safe path (write repair) updates that were restarted as fast path updates because all replicas returned documents with the same timestamp in the initial read phase"), VDS_DISTRIBUTOR_REMOVELOCATIONS_OK("vds.distributor.removelocations.ok", Unit.OPERATION, "The number of successful removelocations operations performed"), VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_TOTAL("vds.distributor.removelocations.failures.total", Unit.OPERATION, "Sum of all failures"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_BUSY("vds.distributor.removelocations.failures.busy", Unit.OPERATION, "The number of messages from storage that failed because the storage node was busy"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_CONCURRENT_MUTATIONS("vds.distributor.removelocations.failures.concurrent_mutations", Unit.OPERATION, "The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_INCONSISTENT_BUCKET("vds.distributor.removelocations.failures.inconsistent_bucket", Unit.OPERATION, "The number of operations failed due to buckets being in an inconsistent state or not found"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_NOTCONNECTED("vds.distributor.removelocations.failures.notconnected", Unit.OPERATION, "The number of operations discarded because there were no available storage nodes to send to"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_NOTFOUND("vds.distributor.removelocations.failures.notfound", Unit.OPERATION, "The number of operations that failed because the document did not exist"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_NOTREADY("vds.distributor.removelocations.failures.notready", Unit.OPERATION, "The number of operations discarded because distributor was not ready"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_SAFE_TIME_NOT_REACHED("vds.distributor.removelocations.failures.safe_time_not_reached", Unit.OPERATION, "The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_STORAGEFAILURE("vds.distributor.removelocations.failures.storagefailure", Unit.OPERATION, "The number of operations that failed in storage"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_TEST_AND_SET_FAILED("vds.distributor.removelocations.failures.test_and_set_failed", Unit.OPERATION, "The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_TIMEOUT("vds.distributor.removelocations.failures.timeout", Unit.OPERATION, "The number of operations that failed because the operation timed out towards storage"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_WRONGDISTRIBUTOR("vds.distributor.removelocations.failures.wrongdistributor", Unit.OPERATION, "The number of operations discarded because they were sent to the wrong distributor"), + VDS_DISTRIBUTOR_REMOVELOCATIONS_LATENCY("vds.distributor.removelocations.latency", Unit.MILLISECOND, "The average latency of removelocations operations"), VDS_DISTRIBUTOR_GETS_LATENCY("vds.distributor.gets.latency", Unit.MILLISECOND, "The average latency of gets operations"), VDS_DISTRIBUTOR_GETS_OK("vds.distributor.gets.ok", Unit.OPERATION, "The number of successful gets operations performed"), VDS_DISTRIBUTOR_GETS_FAILURES_TOTAL("vds.distributor.gets.failures.total", Unit.OPERATION, "Sum of all failures"), VDS_DISTRIBUTOR_GETS_FAILURES_NOTFOUND("vds.distributor.gets.failures.notfound", Unit.OPERATION, "The number of operations that failed because the document did not exist"), + VDS_DISTRIBUTOR_GETS_FAILURES_BUSY("vds.distributor.gets.failures.busy", Unit.OPERATION, "The number of messages from storage that failed because the storage node was busy"), + VDS_DISTRIBUTOR_GETS_FAILURES_CONCURRENT_MUTATIONS("vds.distributor.gets.failures.concurrent_mutations", Unit.OPERATION, "The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID"), + VDS_DISTRIBUTOR_GETS_FAILURES_INCONSISTENT_BUCKET("vds.distributor.gets.failures.inconsistent_bucket", Unit.OPERATION, "The number of operations failed due to buckets being in an inconsistent state or not found"), + VDS_DISTRIBUTOR_GETS_FAILURES_NOTCONNECTED("vds.distributor.gets.failures.notconnected", Unit.OPERATION, "The number of operations discarded because there were no available storage nodes to send to"), + VDS_DISTRIBUTOR_GETS_FAILURES_NOTREADY("vds.distributor.gets.failures.notready", Unit.OPERATION, "The number of operations discarded because distributor was not ready"), + VDS_DISTRIBUTOR_GETS_FAILURES_SAFE_TIME_NOT_REACHED("vds.distributor.gets.failures.safe_time_not_reached", Unit.OPERATION, "The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed"), + VDS_DISTRIBUTOR_GETS_FAILURES_STORAGEFAILURE("vds.distributor.gets.failures.storagefailure", Unit.OPERATION, "The number of operations that failed in storage"), + VDS_DISTRIBUTOR_GETS_FAILURES_TEST_AND_SET_FAILED("vds.distributor.gets.failures.test_and_set_failed", Unit.OPERATION, "The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document"), + VDS_DISTRIBUTOR_GETS_FAILURES_TIMEOUT("vds.distributor.gets.failures.timeout", Unit.OPERATION, "The number of operations that failed because the operation timed out towards storage"), + VDS_DISTRIBUTOR_GETS_FAILURES_WRONGDISTRIBUTOR("vds.distributor.gets.failures.wrongdistributor", Unit.OPERATION, "The number of operations discarded because they were sent to the wrong distributor"), VDS_DISTRIBUTOR_VISITOR_LATENCY("vds.distributor.visitor.latency", Unit.MILLISECOND, "The average latency of visitor operations"), VDS_DISTRIBUTOR_VISITOR_OK("vds.distributor.visitor.ok", Unit.OPERATION, "The number of successful visitor operations performed"), VDS_DISTRIBUTOR_VISITOR_FAILURES_TOTAL("vds.distributor.visitor.failures.total", Unit.OPERATION, "Sum of all failures"), @@ -85,10 +132,105 @@ public enum DistributorMetrics implements VespaMetrics { VDS_DISTRIBUTOR_VISITOR_FAILURES_BUSY("vds.distributor.visitor.failures.busy", Unit.OPERATION, "The number of messages from storage that failed because the storage node was busy"), VDS_DISTRIBUTOR_VISITOR_FAILURES_INCONSISTENT_BUCKET("vds.distributor.visitor.failures.inconsistent_bucket", Unit.OPERATION, "The number of operations failed due to buckets being in an inconsistent state or not found"), VDS_DISTRIBUTOR_VISITOR_FAILURES_NOTFOUND("vds.distributor.visitor.failures.notfound", Unit.OPERATION, "The number of operations that failed because the document did not exist"), + VDS_DISTRIBUTOR_VISITOR_BYTES_PER_VISITOR("vds.distributor.visitor.bytes_per_visitor", Unit.OPERATION, "The number of bytes visited on content nodes as part of a single client visitor command"), + VDS_DISTRIBUTOR_VISITOR_DOCS_PER_VISITOR("vds.distributor.visitor.docs_per_visitor", Unit.OPERATION, "The number of documents visited on content nodes as part of a single client visitor command"), + VDS_DISTRIBUTOR_VISITOR_FAILURES_CONCURRENT_MUTATIONS("vds.distributor.visitor.failures.concurrent_mutations", Unit.OPERATION, "The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID"), + VDS_DISTRIBUTOR_VISITOR_FAILURES_TEST_AND_SET_FAILED("vds.distributor.visitor.failures.test_and_set_failed", Unit.OPERATION, "The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document"), VDS_DISTRIBUTOR_DOCSSTORED("vds.distributor.docsstored", Unit.DOCUMENT, "Number of documents stored in all buckets controlled by this distributor"), VDS_DISTRIBUTOR_BYTESSTORED("vds.distributor.bytesstored", Unit.BYTE, "Number of bytes stored in all buckets controlled by this distributor"), + METRICMANAGER_PERIODICHOOKLATENCY("metricmanager.periodichooklatency", Unit.MILLISECOND, "Time in ms used to update a single periodic hook"), + METRICMANAGER_RESETLATENCY("metricmanager.resetlatency", Unit.MILLISECOND, "Time in ms used to reset all metrics."), + METRICMANAGER_SLEEPTIME("metricmanager.sleeptime", Unit.MILLISECOND, "Time in ms worker thread is sleeping"), + METRICMANAGER_SNAPSHOTHOOKLATENCY("metricmanager.snapshothooklatency", Unit.MILLISECOND, "Time in ms used to update a single snapshot hook"), + METRICMANAGER_SNAPSHOTLATENCY("metricmanager.snapshotlatency", Unit.MILLISECOND, "Time in ms used to take a snapshot"), + VDS_DISTRIBUTOR_ACTIVATE_CLUSTER_STATE_PROCESSING_TIME("vds.distributor.activate_cluster_state_processing_time", Unit.MILLISECOND, "Elapsed time where the distributor thread is blocked on merging pending bucket info into its bucket database upon activating a cluster state"), + VDS_DISTRIBUTOR_BUCKET_DB_MEMORY_USAGE_ALLOCATED_BYTES("vds.distributor.bucket_db.memory_usage.allocated_bytes", Unit.BYTE, "The number of allocated bytes"), + VDS_DISTRIBUTOR_BUCKET_DB_MEMORY_USAGE_DEAD_BYTES("vds.distributor.bucket_db.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes)"), + VDS_DISTRIBUTOR_BUCKET_DB_MEMORY_USAGE_ONHOLD_BYTES("vds.distributor.bucket_db.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold"), + VDS_DISTRIBUTOR_BUCKET_DB_MEMORY_USAGE_USED_BYTES("vds.distributor.bucket_db.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes)"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_BUSY("vds.distributor.getbucketlists.failures.busy", Unit.OPERATION, "The number of messages from storage that failed because the storage node was busy"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_CONCURRENT_MUTATIONS("vds.distributor.getbucketlists.failures.concurrent_mutations", Unit.OPERATION, "The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_INCONSISTENT_BUCKET("vds.distributor.getbucketlists.failures.inconsistent_bucket", Unit.OPERATION, "The number of operations failed due to buckets being in an inconsistent state or not found"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_NOTCONNECTED("vds.distributor.getbucketlists.failures.notconnected", Unit.OPERATION, "The number of operations discarded because there were no available storage nodes to send to"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_NOTFOUND("vds.distributor.getbucketlists.failures.notfound", Unit.OPERATION, "The number of operations that failed because the document did not exist"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_NOTREADY("vds.distributor.getbucketlists.failures.notready", Unit.OPERATION, "The number of operations discarded because distributor was not ready"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_SAFE_TIME_NOT_REACHED("vds.distributor.getbucketlists.failures.safe_time_not_reached", Unit.OPERATION, "The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_STORAGEFAILURE("vds.distributor.getbucketlists.failures.storagefailure", Unit.OPERATION, "The number of operations that failed in storage"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_TEST_AND_SET_FAILED("vds.distributor.getbucketlists.failures.test_and_set_failed", Unit.OPERATION, "The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_TIMEOUT("vds.distributor.getbucketlists.failures.timeout", Unit.OPERATION, "The number of operations that failed because the operation timed out towards storage"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_TOTAL("vds.distributor.getbucketlists.failures.total", Unit.OPERATION, "Total number of failures"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_FAILURES_WRONGDISTRIBUTOR("vds.distributor.getbucketlists.failures.wrongdistributor", Unit.OPERATION, "The number of operations discarded because they were sent to the wrong distributor"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_LATENCY("vds.distributor.getbucketlists.latency", Unit.MILLISECOND, "The average latency of getbucketlists operations"), + VDS_DISTRIBUTOR_GETBUCKETLISTS_OK("vds.distributor.getbucketlists.ok", Unit.OPERATION, "The number of successful getbucketlists operations performed"), + VDS_DISTRIBUTOR_RECOVERYMODESCHEDULINGTIME("vds.distributor.recoverymodeschedulingtime", Unit.MILLISECOND, "Time spent scheduling operations in recovery mode after receiving new cluster state"), + VDS_DISTRIBUTOR_SET_CLUSTER_STATE_PROCESSING_TIME("vds.distributor.set_cluster_state_processing_time", Unit.MILLISECOND, "Elapsed time where the distributor thread is blocked on processing its bucket database upon receiving a new cluster state"), + VDS_DISTRIBUTOR_STATE_TRANSITION_TIME("vds.distributor.state_transition_time", Unit.MILLISECOND, "Time it takes to complete a cluster state transition. If a state transition is preempted before completing, its elapsed time is counted as part of the total time spent for the final, completed state transition"), + VDS_DISTRIBUTOR_STATS_FAILURES_BUSY("vds.distributor.stats.failures.busy", Unit.OPERATION, "The number of messages from storage that failed because the storage node was busy"), + VDS_DISTRIBUTOR_STATS_FAILURES_CONCURRENT_MUTATIONS("vds.distributor.stats.failures.concurrent_mutations", Unit.OPERATION, "The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID"), + VDS_DISTRIBUTOR_STATS_FAILURES_INCONSISTENT_BUCKET("vds.distributor.stats.failures.inconsistent_bucket", Unit.OPERATION, "The number of operations failed due to buckets being in an inconsistent state or not found"), + VDS_DISTRIBUTOR_STATS_FAILURES_NOTCONNECTED("vds.distributor.stats.failures.notconnected", Unit.OPERATION, "The number of operations discarded because there were no available storage nodes to send to"), + VDS_DISTRIBUTOR_STATS_FAILURES_NOTFOUND("vds.distributor.stats.failures.notfound", Unit.OPERATION, "The number of operations that failed because the document did not exist"), + VDS_DISTRIBUTOR_STATS_FAILURES_NOTREADY("vds.distributor.stats.failures.notready", Unit.OPERATION, "The number of operations discarded because distributor was not ready"), + VDS_DISTRIBUTOR_STATS_FAILURES_SAFE_TIME_NOT_REACHED("vds.distributor.stats.failures.safe_time_not_reached", Unit.OPERATION, "The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed"), + VDS_DISTRIBUTOR_STATS_FAILURES_STORAGEFAILURE("vds.distributor.stats.failures.storagefailure", Unit.OPERATION, "The number of operations that failed in storage"), + VDS_DISTRIBUTOR_STATS_FAILURES_TEST_AND_SET_FAILED("vds.distributor.stats.failures.test_and_set_failed", Unit.OPERATION, "The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document"), + VDS_DISTRIBUTOR_STATS_FAILURES_TIMEOUT("vds.distributor.stats.failures.timeout", Unit.OPERATION, "The number of operations that failed because the operation timed out towards storage"), + VDS_DISTRIBUTOR_STATS_FAILURES_TOTAL("vds.distributor.stats.failures.total", Unit.OPERATION, "The total number of failures"), + VDS_DISTRIBUTOR_STATS_FAILURES_WRONGDISTRIBUTOR("vds.distributor.stats.failures.wrongdistributor", Unit.OPERATION, "The number of operations discarded because they were sent to the wrong distributor"), + VDS_DISTRIBUTOR_STATS_LATENCY("vds.distributor.stats.latency", Unit.MILLISECOND, "The average latency of stats operations"), + VDS_DISTRIBUTOR_STATS_OK("vds.distributor.stats.ok", Unit.OPERATION, "The number of successful stats operations performed"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_BUSY("vds.distributor.update_gets.failures.busy", Unit.OPERATION, "The number of messages from storage that failed because the storage node was busy"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_CONCURRENT_MUTATIONS("vds.distributor.update_gets.failures.concurrent_mutations", Unit.OPERATION, "The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_INCONSISTENT_BUCKET("vds.distributor.update_gets.failures.inconsistent_bucket", Unit.OPERATION, "The number of operations failed due to buckets being in an inconsistent state or not found"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_NOTCONNECTED("vds.distributor.update_gets.failures.notconnected", Unit.OPERATION, "The number of operations discarded because there were no available storage nodes to send to"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_NOTFOUND("vds.distributor.update_gets.failures.notfound", Unit.OPERATION, "The number of operations that failed because the document did not exist"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_NOTREADY("vds.distributor.update_gets.failures.notready", Unit.OPERATION, "The number of operations discarded because distributor was not ready"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_SAFE_TIME_NOT_REACHED("vds.distributor.update_gets.failures.safe_time_not_reached", Unit.OPERATION, "The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_STORAGEFAILURE("vds.distributor.update_gets.failures.storagefailure", Unit.OPERATION, "The number of operations that failed in storage"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_TEST_AND_SET_FAILED("vds.distributor.update_gets.failures.test_and_set_failed", Unit.OPERATION, "The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_TIMEOUT("vds.distributor.update_gets.failures.timeout", Unit.OPERATION, "The number of operations that failed because the operation timed out towards storage"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_TOTAL("vds.distributor.update_gets.failures.total", Unit.OPERATION, "The total number of failures"), + VDS_DISTRIBUTOR_UPDATE_GETS_FAILURES_WRONGDISTRIBUTOR("vds.distributor.update_gets.failures.wrongdistributor", Unit.OPERATION, "The number of operations discarded because they were sent to the wrong distributor"), + VDS_DISTRIBUTOR_UPDATE_GETS_LATENCY("vds.distributor.update_gets.latency", Unit.MILLISECOND, "The average latency of update_gets operations"), + VDS_DISTRIBUTOR_UPDATE_GETS_OK("vds.distributor.update_gets.ok", Unit.OPERATION, "The number of successful update_gets operations performed"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_BUSY("vds.distributor.update_metadata_gets.failures.busy", Unit.OPERATION, "The number of messages from storage that failed because the storage node was busy"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_CONCURRENT_MUTATIONS("vds.distributor.update_metadata_gets.failures.concurrent_mutations", Unit.OPERATION, "The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_INCONSISTENT_BUCKET("vds.distributor.update_metadata_gets.failures.inconsistent_bucket", Unit.OPERATION, "The number of operations failed due to buckets being in an inconsistent state or not found"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_NOTCONNECTED("vds.distributor.update_metadata_gets.failures.notconnected", Unit.OPERATION, "The number of operations discarded because there were no available storage nodes to send to"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_NOTFOUND("vds.distributor.update_metadata_gets.failures.notfound", Unit.OPERATION, "The number of operations that failed because the document did not exist"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_NOTREADY("vds.distributor.update_metadata_gets.failures.notready", Unit.OPERATION, "The number of operations discarded because distributor was not ready"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_SAFE_TIME_NOT_REACHED("vds.distributor.update_metadata_gets.failures.safe_time_not_reached", Unit.OPERATION, "The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_STORAGEFAILURE("vds.distributor.update_metadata_gets.failures.storagefailure", Unit.OPERATION, "The number of operations that failed in storage"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_TEST_AND_SET_FAILED("vds.distributor.update_metadata_gets.failures.test_and_set_failed", Unit.OPERATION, "The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_TIMEOUT("vds.distributor.update_metadata_gets.failures.timeout", Unit.OPERATION, "The number of operations that failed because the operation timed out towards storage"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_TOTAL("vds.distributor.update_metadata_gets.failures.total", Unit.OPERATION, "The total number of failures"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_FAILURES_WRONGDISTRIBUTOR("vds.distributor.update_metadata_gets.failures.wrongdistributor", Unit.OPERATION, "The number of operations discarded because they were sent to the wrong distributor"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_LATENCY("vds.distributor.update_metadata_gets.latency", Unit.MILLISECOND, "The average latency of update_metadata_gets operations"), + VDS_DISTRIBUTOR_UPDATE_METADATA_GETS_OK("vds.distributor.update_metadata_gets.ok", Unit.OPERATION, "The number of successful update_metadata_gets operations performed"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_BUSY("vds.distributor.update_puts.failures.busy", Unit.OPERATION, "The number of messages from storage that failed because the storage node was busy"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_CONCURRENT_MUTATIONS("vds.distributor.update_puts.failures.concurrent_mutations", Unit.OPERATION, "The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_INCONSISTENT_BUCKET("vds.distributor.update_puts.failures.inconsistent_bucket", Unit.OPERATION, "The number of operations failed due to buckets being in an inconsistent state or not found"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_NOTCONNECTED("vds.distributor.update_puts.failures.notconnected", Unit.OPERATION, "The number of operations discarded because there were no available storage nodes to send to"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_NOTFOUND("vds.distributor.update_puts.failures.notfound", Unit.OPERATION, "The number of operations that failed because the document did not exist"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_NOTREADY("vds.distributor.update_puts.failures.notready", Unit.OPERATION, "The number of operations discarded because distributor was not ready"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_SAFE_TIME_NOT_REACHED("vds.distributor.update_puts.failures.safe_time_not_reached", Unit.OPERATION, "The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_STORAGEFAILURE("vds.distributor.update_puts.failures.storagefailure", Unit.OPERATION, "The number of operations that failed in storage"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_TEST_AND_SET_FAILED("vds.distributor.update_puts.failures.test_and_set_failed", Unit.OPERATION, "The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_TIMEOUT("vds.distributor.update_puts.failures.timeout", Unit.OPERATION, "The number of operations that failed because the operation timed out towards storage"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_TOTAL("vds.distributor.update_puts.failures.total", Unit.OPERATION, "The total number of put failures"), + VDS_DISTRIBUTOR_UPDATE_PUTS_FAILURES_WRONGDISTRIBUTOR("vds.distributor.update_puts.failures.wrongdistributor", Unit.OPERATION, "The number of operations discarded because they were sent to the wrong distributor"), + VDS_DISTRIBUTOR_UPDATE_PUTS_LATENCY("vds.distributor.update_puts.latency", Unit.MILLISECOND, "The average latency of update_puts operations"), + VDS_DISTRIBUTOR_UPDATE_PUTS_OK("vds.distributor.update_puts.ok", Unit.OPERATION, "The number of successful update_puts operations performed"), + + VDS_IDEALSTATE_NODES_PER_MERGE("vds.idealstate.nodes_per_merge", Unit.NODE, "The number of nodes involved in a single merge operation."), + VDS_IDEALSTATE_SET_BUCKET_STATE_BLOCKED("vds.idealstate.set_bucket_state.blocked", Unit.OPERATION, "The number of operations blocked by blocking operation starter"), + VDS_IDEALSTATE_SET_BUCKET_STATE_DONE_FAILED("vds.idealstate.set_bucket_state.done_failed", Unit.OPERATION, "The number of operations that failed"), + VDS_IDEALSTATE_SET_BUCKET_STATE_DONE_OK("vds.idealstate.set_bucket_state.done_ok", Unit.OPERATION, "The number of operations successfully performed"), + VDS_IDEALSTATE_SET_BUCKET_STATE_PENDING("vds.idealstate.set_bucket_state.pending", Unit.OPERATION, "The number of operations pending"), + VDS_IDEALSTATE_SET_BUCKET_STATE_THROTTLED("vds.idealstate.set_bucket_state.throttled", Unit.OPERATION, "The number of operations throttled by throttling operation starter"), + VDS_BOUNCER_CLOCK_SKEW_ABORTS("vds.bouncer.clock_skew_aborts", Unit.OPERATION, "Number of client operations that were aborted due to clock skew between sender and receiver exceeding acceptable range"); diff --git a/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java b/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java index d67b67d04b7..05ae5180d3b 100644 --- a/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java +++ b/container-core/src/main/java/com/yahoo/metrics/StorageMetrics.java @@ -11,6 +11,9 @@ public enum StorageMetrics implements VespaMetrics { VDS_DATASTORED_ALLDISKS_BUCKETS("vds.datastored.alldisks.buckets", Unit.BUCKET, "Number of buckets managed"), VDS_DATASTORED_ALLDISKS_DOCS("vds.datastored.alldisks.docs", Unit.DOCUMENT, "Number of documents stored"), VDS_DATASTORED_ALLDISKS_BYTES("vds.datastored.alldisks.bytes", Unit.BYTE, "Number of bytes stored"), + VDS_DATASTORED_ALLDISKS_ACTIVEBUCKETS("vds.datastored.alldisks.activebuckets", Unit.BUCKET, "Number of active buckets on the node"), + VDS_DATASTORED_ALLDISKS_READYBUCKETS("vds.datastored.alldisks.readybuckets", Unit.BUCKET, "Number of ready buckets on the node"), + VDS_VISITOR_ALLTHREADS_AVERAGEVISITORLIFETIME("vds.visitor.allthreads.averagevisitorlifetime", Unit.MILLISECOND, "Average lifetime of a visitor"), VDS_VISITOR_ALLTHREADS_AVERAGEQUEUEWAIT("vds.visitor.allthreads.averagequeuewait", Unit.MILLISECOND, "Average time an operation spends in input queue."), VDS_VISITOR_ALLTHREADS_QUEUESIZE("vds.visitor.allthreads.queuesize", Unit.OPERATION, "Size of input message queue."), @@ -19,6 +22,9 @@ public enum StorageMetrics implements VespaMetrics { VDS_VISITOR_ALLTHREADS_FAILED("vds.visitor.allthreads.failed", Unit.OPERATION, "Number of visitors failed"), VDS_VISITOR_ALLTHREADS_AVERAGEMESSAGESENDTIME("vds.visitor.allthreads.averagemessagesendtime", Unit.MILLISECOND, "Average time it takes for messages to be sent to their target (and be replied to)"), VDS_VISITOR_ALLTHREADS_AVERAGEPROCESSINGTIME("vds.visitor.allthreads.averageprocessingtime", Unit.MILLISECOND, "Average time used to process visitor requests"), + VDS_VISITOR_ALLTHREADS_ABORTED("vds.visitor.allthreads.aborted", Unit.INSTANCE, "Number of visitors aborted."), + VDS_VISITOR_ALLTHREADS_AVERAGEVISITORCREATIONTIME("vds.visitor.allthreads.averagevisitorcreationtime", Unit.MILLISECOND, "Average time spent creating a visitor instance"), + VDS_VISITOR_ALLTHREADS_DESTINATION_FAILURE_REPLIES("vds.visitor.allthreads.destination_failure_replies", Unit.INSTANCE, "Number of failure replies received from the visitor destination"), VDS_FILESTOR_QUEUESIZE("vds.filestor.queuesize", Unit.OPERATION, "Size of input message queue."), VDS_FILESTOR_AVERAGEQUEUEWAIT("vds.filestor.averagequeuewait", Unit.MILLISECOND, "Average time an operation spends in input queue."), @@ -30,11 +36,17 @@ public enum StorageMetrics implements VespaMetrics { VDS_FILESTOR_ALLTHREADS_MERGEMETADATAREADLATENCY("vds.filestor.allthreads.mergemetadatareadlatency", Unit.MILLISECOND, "Time spent in a merge step to check metadata of current node to see what data it has."), VDS_FILESTOR_ALLTHREADS_MERGEDATAREADLATENCY("vds.filestor.allthreads.mergedatareadlatency", Unit.MILLISECOND, "Time spent in a merge step to read data other nodes need."), VDS_FILESTOR_ALLTHREADS_MERGEDATAWRITELATENCY("vds.filestor.allthreads.mergedatawritelatency", Unit.MILLISECOND, "Time spent in a merge step to write data needed to current node."), + VDS_FILESTOR_ALLTHREADS_MERGEAVGDATARECEIVEDNEEDED("vds.filestor.allthreads.mergeavgdatareceivedneeded", Unit.BYTE, "Amount of data transferred from previous node in chain that we needed to apply locally."), + VDS_FILESTOR_ALLTHREADS_MERGEBUCKETS_COUNT("vds.filestor.allthreads.mergebuckets.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_MERGEBUCKETS_FAILED("vds.filestor.allthreads.mergebuckets.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_MERGEBUCKETS_LATENCY("vds.filestor.allthreads.mergebuckets.latency", Unit.MILLISECOND, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_MERGELATENCYTOTAL("vds.filestor.allthreads.mergelatencytotal", Unit.MILLISECOND, "Latency of total merge operation, from master node receives it, until merge is complete and master node replies."), VDS_FILESTOR_ALLTHREADS_MERGE_PUT_LATENCY("vds.filestor.allthreads.put_latency", Unit.MILLISECOND, "Latency of individual puts that are part of merge operations"), // TODO Vespa 9: Update metric name to include 'merge' VDS_FILESTOR_ALLTHREADS_MERGE_REMOVE_LATENCY("vds.filestor.allthreads.remove_latency", Unit.MILLISECOND, "Latency of individual removes that are part of merge operations"), // TODO Vespa 9: Update metric name to include 'merge' VDS_FILESTOR_ALLSTRIPES_THROTTLED_RPC_DIRECT_DISPATCHES("vds.filestor.allstripes.throttled_rpc_direct_dispatches", Unit.INSTANCE, "Number of times an RPC thread could not directly dispatch an async operation directly to Proton because it was disallowed by the throttle policy"), VDS_FILESTOR_ALLSTRIPES_THROTTLED_PERSISTENCE_THREAD_POLLS("vds.filestor.allstripes.throttled_persistence_thread_polls", Unit.INSTANCE, "Number of times a persistence thread could not immediately dispatch a queued async operation because it was disallowed by the throttle policy"), VDS_FILESTOR_ALLSTRIPES_TIMEOUTS_WAITING_FOR_THROTTLE_TOKEN("vds.filestor.allstripes.timeouts_waiting_for_throttle_token", Unit.INSTANCE, "Number of times a persistence thread timed out waiting for an available throttle policy token"), + VDS_FILESTOR_ALLSTRIPES_AVERAGEQUEUEWAIT("vds.filestor.allstripes.averagequeuewait", Unit.MILLISECOND, "Average time an operation spends in input queue."), VDS_FILESTOR_ALLTHREADS_PUT_COUNT("vds.filestor.allthreads.put.count", Unit.OPERATION, "Number of requests processed."), VDS_FILESTOR_ALLTHREADS_PUT_FAILED("vds.filestor.allthreads.put.failed", Unit.OPERATION, "Number of failed requests."), @@ -46,36 +58,67 @@ public enum StorageMetrics implements VespaMetrics { VDS_FILESTOR_ALLTHREADS_REMOVE_TEST_AND_SET_FAILED("vds.filestor.allthreads.remove.test_and_set_failed", Unit.OPERATION, "Number of operations that were skipped due to a test-and-set condition not met"), VDS_FILESTOR_ALLTHREADS_REMOVE_LATENCY("vds.filestor.allthreads.remove.latency", Unit.MILLISECOND, "Latency of successful requests."), VDS_FILESTOR_ALLTHREADS_REMOVE_REQUEST_SIZE("vds.filestor.allthreads.remove.request_size", Unit.BYTE, "Size of requests, in bytes"), + VDS_FILESTOR_ALLTHREADS_REMOVE_NOT_FOUND("vds.filestor.allthreads.remove.not_found", Unit.REQUEST, "Number of requests that could not be completed due to source document not found."), + VDS_FILESTOR_ALLTHREADS_GET_COUNT("vds.filestor.allthreads.get.count", Unit.OPERATION, "Number of requests processed."), VDS_FILESTOR_ALLTHREADS_GET_FAILED("vds.filestor.allthreads.get.failed", Unit.OPERATION, "Number of failed requests."), VDS_FILESTOR_ALLTHREADS_GET_LATENCY("vds.filestor.allthreads.get.latency", Unit.MILLISECOND, "Latency of successful requests."), VDS_FILESTOR_ALLTHREADS_GET_REQUEST_SIZE("vds.filestor.allthreads.get.request_size", Unit.BYTE, "Size of requests, in bytes"), - VDS_FILESTOR_ALLTHREADS_UPDATE_COUNT("vds.filestor.allthreads.update.count", Unit.OPERATION, "Number of requests processed."), - VDS_FILESTOR_ALLTHREADS_UPDATE_FAILED("vds.filestor.allthreads.update.failed", Unit.OPERATION, "Number of failed requests."), - VDS_FILESTOR_ALLTHREADS_UPDATE_TEST_AND_SET_FAILED("vds.filestor.allthreads.update.test_and_set_failed", Unit.OPERATION, "Number of operations that were skipped due to a test-and-set condition not met"), + VDS_FILESTOR_ALLTHREADS_GET_NOT_FOUND("vds.filestor.allthreads.get.not_found", Unit.REQUEST, "Number of requests that could not be completed due to source document not found."), + VDS_FILESTOR_ALLTHREADS_UPDATE_COUNT("vds.filestor.allthreads.update.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_UPDATE_FAILED("vds.filestor.allthreads.update.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_UPDATE_TEST_AND_SET_FAILED("vds.filestor.allthreads.update.test_and_set_failed", Unit.REQUEST, "Number of requests that were skipped due to a test-and-set condition not met"), VDS_FILESTOR_ALLTHREADS_UPDATE_LATENCY("vds.filestor.allthreads.update.latency", Unit.MILLISECOND, "Latency of successful requests."), VDS_FILESTOR_ALLTHREADS_UPDATE_REQUEST_SIZE("vds.filestor.allthreads.update.request_size", Unit.BYTE, "Size of requests, in bytes"), - VDS_FILESTOR_ALLTHREADS_CREATEITERATOR_COUNT("vds.filestor.allthreads.createiterator.count", Unit.OPERATION, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_UPDATE_LATENCY_READ("vds.filestor.allthreads.update.latency_read", Unit.MILLISECOND, "Latency of the source read in the request."), + VDS_FILESTOR_ALLTHREADS_UPDATE_NOT_FOUND("vds.filestor.allthreads.update.not_found", Unit.REQUEST, "Number of requests that could not be completed due to source document not found."), + VDS_FILESTOR_ALLTHREADS_CREATEITERATOR_COUNT("vds.filestor.allthreads.createiterator.count", Unit.REQUEST, "Number of requests processed."), VDS_FILESTOR_ALLTHREADS_CREATEITERATOR_LATENCY("vds.filestor.allthreads.createiterator.latency", Unit.MILLISECOND, "Latency of successful requests."), - VDS_FILESTOR_ALLTHREADS_VISIT_COUNT("vds.filestor.allthreads.visit.count", Unit.OPERATION, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_CREATEITERATOR_FAILED("vds.filestor.allthreads.createiterator.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_VISIT_COUNT("vds.filestor.allthreads.visit.count", Unit.REQUEST, "Number of requests processed."), VDS_FILESTOR_ALLTHREADS_VISIT_LATENCY("vds.filestor.allthreads.visit.latency", Unit.MILLISECOND, "Latency of successful requests."), - VDS_FILESTOR_ALLTHREADS_REMOVE_LOCATION_COUNT("vds.filestor.allthreads.remove_location.count", Unit.OPERATION, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_VISIT_DOCS("vds.filestor.allthreads.visit.docs", Unit.DOCUMENT, "Number of entries read per iterate call"), + VDS_FILESTOR_ALLTHREADS_VISIT_FAILED("vds.filestor.allthreads.visit.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_REMOVE_LOCATION_COUNT("vds.filestor.allthreads.remove_location.count", Unit.REQUEST, "Number of requests processed."), VDS_FILESTOR_ALLTHREADS_REMOVE_LOCATION_LATENCY("vds.filestor.allthreads.remove_location.latency", Unit.MILLISECOND, "Latency of successful requests."), - VDS_FILESTOR_ALLTHREADS_SPLITBUCKETS_COUNT("vds.filestor.allthreads.splitbuckets.count", Unit.OPERATION, "Number of requests processed."), - VDS_FILESTOR_ALLTHREADS_JOINBUCKETS_COUNT("vds.filestor.allthreads.joinbuckets.count", Unit.OPERATION, "Number of requests processed."), - VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_COUNT("vds.filestor.allthreads.deletebuckets.count", Unit.OPERATION, "Number of requests processed."), - VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_FAILED("vds.filestor.allthreads.deletebuckets.failed", Unit.OPERATION, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_REMOVE_LOCATION_FAILED("vds.filestor.allthreads.remove_location.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_SPLITBUCKETS_COUNT("vds.filestor.allthreads.splitbuckets.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_SPLITBUCKETS_FAILED("vds.filestor.allthreads.splitbuckets.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_SPLITBUCKETS_LATENCY("vds.filestor.allthreads.splitbuckets.latency", Unit.REQUEST, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_JOINBUCKETS_COUNT("vds.filestor.allthreads.joinbuckets.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_JOINBUCKETS_FAILED("vds.filestor.allthreads.joinbuckets.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_JOINBUCKETS_LATENCY("vds.filestor.allthreads.joinbuckets.latency", Unit.MILLISECOND, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_COUNT("vds.filestor.allthreads.deletebuckets.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_FAILED("vds.filestor.allthreads.deletebuckets.failed", Unit.REQUEST, "Number of failed requests."), VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_LATENCY("vds.filestor.allthreads.deletebuckets.latency", Unit.MILLISECOND, "Latency of successful requests."), - VDS_FILESTOR_ALLTHREADS_SETBUCKETSTATES_COUNT("vds.filestor.allthreads.setbucketstates.count", Unit.OPERATION, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_SETBUCKETSTATES_COUNT("vds.filestor.allthreads.setbucketstates.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_SETBUCKETSTATES_FAILED("vds.filestor.allthreads.setbucketstates.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_SETBUCKETSTATES_LATENCY("vds.filestor.allthreads.setbucketstates.latency", Unit.MILLISECOND, "Latency of successful requests."), VDS_MERGETHROTTLER_AVERAGEQUEUEWAITINGTIME("vds.mergethrottler.averagequeuewaitingtime", Unit.MILLISECOND, "Time merges spent in the throttler queue"), VDS_MERGETHROTTLER_QUEUESIZE("vds.mergethrottler.queuesize", Unit.INSTANCE, "Length of merge queue"), VDS_MERGETHROTTLER_ACTIVE_WINDOW_SIZE("vds.mergethrottler.active_window_size", Unit.INSTANCE, "Number of merges active within the pending window size"), VDS_MERGETHROTTLER_BOUNCED_DUE_TO_BACK_PRESSURE("vds.mergethrottler.bounced_due_to_back_pressure", Unit.INSTANCE, "Number of merges bounced due to resource exhaustion back-pressure"), VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_OK("vds.mergethrottler.locallyexecutedmerges.ok", Unit.INSTANCE, "The number of successful merges for 'locallyexecutedmerges'"), - VDS_MERGETHROTTLER_MERGECHAINS_OK("vds.mergethrottler.mergechains.ok", Unit.INSTANCE, "The number of successful merges for 'mergechains'"), - VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_BUSY("vds.mergethrottler.mergechains.failures.busy", Unit.INSTANCE, "The number of merges that failed because the storage node was busy"), - VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_TOTAL("vds.mergethrottler.mergechains.failures.total", Unit.INSTANCE, "Sum of all failures"), + VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_FAILURES_ABORTED("vds.mergethrottler.locallyexecutedmerges.failures.aborted", Unit.OPERATION, "The number of merges that failed because the storage node was (most likely) shutting down"), + VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_FAILURES_BUCKETNOTFOUND("vds.mergethrottler.locallyexecutedmerges.failures.bucketnotfound", Unit.OPERATION, "The number of operations that failed because the bucket did not exist"), + VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_FAILURES_BUSY("vds.mergethrottler.locallyexecutedmerges.failures.busy", Unit.OPERATION, "The number of merges that failed because the storage node was busy"), + VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_FAILURES_EXISTS("vds.mergethrottler.locallyexecutedmerges.failures.exists", Unit.OPERATION, "The number of merges that were rejected due to a merge operation for their bucket already being processed"), + VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_FAILURES_NOTREADY("vds.mergethrottler.locallyexecutedmerges.failures.notready", Unit.OPERATION, "The number of merges discarded because distributor was not ready"), + VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_FAILURES_OTHER("vds.mergethrottler.locallyexecutedmerges.failures.other", Unit.OPERATION, "The number of other failures"), + VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_FAILURES_REJECTED("vds.mergethrottler.locallyexecutedmerges.failures.rejected", Unit.OPERATION, "The number of merges that were rejected"), + VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_FAILURES_TIMEOUT("vds.mergethrottler.locallyexecutedmerges.failures.timeout", Unit.OPERATION, "The number of merges that failed because they timed out towards storage"), + VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_FAILURES_TOTAL("vds.mergethrottler.locallyexecutedmerges.failures.total", Unit.OPERATION, "Sum of all failures"), + VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_FAILURES_WRONGDISTRIBUTION("vds.mergethrottler.locallyexecutedmerges.failures.wrongdistribution", Unit.OPERATION, "The number of merges that were discarded (flushed) because they were initiated at an older cluster state than the current"), + VDS_MERGETHROTTLER_MERGECHAINS_OK("vds.mergethrottler.mergechains.ok", Unit.OPERATION, "The number of successful merges for 'mergechains'"), + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_BUSY("vds.mergethrottler.mergechains.failures.busy", Unit.OPERATION, "The number of merges that failed because the storage node was busy"), + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_TOTAL("vds.mergethrottler.mergechains.failures.total", Unit.OPERATION, "Sum of all failures"), + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_EXISTS("vds.mergethrottler.mergechains.failures.exists", Unit.OPERATION, "The number of merges that were rejected due to a merge operation for their bucket already being processed"), + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_NOTREADY("vds.mergethrottler.mergechains.failures.notready", Unit.OPERATION, "The number of merges discarded because distributor was not ready"), + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_OTHER("vds.mergethrottler.mergechains.failures.other", Unit.OPERATION, "The number of other failures"), + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_REJECTED("vds.mergethrottler.mergechains.failures.rejected", Unit.OPERATION, "The number of merges that were rejected"), + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_TIMEOUT("vds.mergethrottler.mergechains.failures.timeout", Unit.OPERATION, "The number of merges that failed because they timed out towards storage"), + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_WRONGDISTRIBUTION("vds.mergethrottler.mergechains.failures.wrongdistribution", Unit.OPERATION, "The number of merges that were discarded (flushed) because they were initiated at an older cluster state than the current"), // C++ TLS metrics - these come from both the distributor and storage @@ -88,6 +131,98 @@ public enum StorageMetrics implements VespaMetrics { VDS_SERVER_NETWORK_TLS_CONNECTIONS_BROKEN("vds.server.network.tls-connections-broken", Unit.CONNECTION, "Number of TLS connections broken due to failures during frame encoding or decoding"), VDS_SERVER_NETWORK_FAILED_TLS_CONFIG_RELOADS("vds.server.network.failed-tls-config-reloads", Unit.FAILURE, "Number of times background reloading of TLS config has failed"), + VDS_BOUNCER_UNAVAILABLE_NODE_ABORTS("vds.bouncer.unavailable_node_aborts", Unit.OPERATION, "Number of operations that were aborted due to the node (or target bucket space) being unavailable"), + VDS_CHANGEDBUCKETOWNERSHIPHANDLER_AVG_ABORT_PROCESSING_TIME("vds.changedbucketownershiphandler.avg_abort_processing_time", Unit.MILLISECOND, "Average time spent aborting operations for changed buckets"), + VDS_CHANGEDBUCKETOWNERSHIPHANDLER_EXTERNAL_LOAD_OPS_ABORTED("vds.changedbucketownershiphandler.external_load_ops_aborted", Unit.OPERATION, "Number of outdated external load operations aborted"), + VDS_CHANGEDBUCKETOWNERSHIPHANDLER_IDEAL_STATE_OPS_ABORTED("vds.changedbucketownershiphandler.ideal_state_ops_aborted", Unit.OPERATION, "Number of outdated ideal state operations aborted"), + VDS_COMMUNICATION_BUCKET_SPACE_MAPPING_FAILURES("vds.communication.bucket_space_mapping_failures", Unit.OPERATION, "Number of messages that could not be resolved to a known bucket space"), + VDS_COMMUNICATION_CONVERTFAILURES("vds.communication.convertfailures", Unit.OPERATION, "Number of messages that failed to get converted to storage API messages"), + VDS_COMMUNICATION_EXCEPTIONMESSAGEPROCESSTIME("vds.communication.exceptionmessageprocesstime", Unit.MILLISECOND, "Time transport thread uses to process a single message that fails with an exception thrown into communication manager"), + VDS_COMMUNICATION_MESSAGEPROCESSTIME("vds.communication.messageprocesstime", Unit.MILLISECOND, "Time transport thread uses to process a single message"), + VDS_COMMUNICATION_MESSAGEQUEUE("vds.communication.messagequeue", Unit.ITEM, "Size of input message queue."), + VDS_COMMUNICATION_SENDCOMMANDLATENCY("vds.communication.sendcommandlatency", Unit.MILLISECOND, "Average ms used to send commands to MBUS"), + VDS_COMMUNICATION_SENDREPLYLATENCY("vds.communication.sendreplylatency", Unit.MILLISECOND, "Average ms used to send replies to MBUS"), + VDS_COMMUNICATION_TOOLITTLEMEMORY("vds.communication.toolittlememory", Unit.OPERATION, "Number of messages failed due to too little memory available"), + + VDS_DATASTORED_BUCKET_SPACE_ACTIVE_BUCKETS("vds.datastored.bucket_space.active_buckets", Unit.BUCKET, "Number of active buckets in the bucket space"), + VDS_DATASTORED_BUCKET_SPACE_BUCKET_DB_MEMORY_USAGE_ALLOCATED_BYTES("vds.datastored.bucket_space.bucket_db.memory_usage.allocated_bytes", Unit.BYTE, "The number of allocated bytes"), + VDS_DATASTORED_BUCKET_SPACE_BUCKET_DB_MEMORY_USAGE_DEAD_BYTES("vds.datastored.bucket_space.bucket_db.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes)"), + VDS_DATASTORED_BUCKET_SPACE_BUCKET_DB_MEMORY_USAGE_ONHOLD_BYTES("vds.datastored.bucket_space.bucket_db.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold"), + VDS_DATASTORED_BUCKET_SPACE_BUCKET_DB_MEMORY_USAGE_USED_BYTES("vds.datastored.bucket_space.bucket_db.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes)"), + VDS_DATASTORED_BUCKET_SPACE_BUCKETS_TOTAL("vds.datastored.bucket_space.buckets_total", Unit.BUCKET, "Total number buckets present in the bucket space (ready + not ready)"), + VDS_DATASTORED_BUCKET_SPACE_BYTES("vds.datastored.bucket_space.bytes", Unit.BYTE, "Bytes stored across all documents in the bucket space"), + VDS_DATASTORED_BUCKET_SPACE_DOCS("vds.datastored.bucket_space.docs", Unit.DOCUMENT, "Documents stored in the bucket space"), + VDS_DATASTORED_BUCKET_SPACE_READY_BUCKETS("vds.datastored.bucket_space.ready_buckets", Unit.BUCKET, "Number of ready buckets in the bucket space"), + VDS_DATASTORED_FULLBUCKETINFOLATENCY("vds.datastored.fullbucketinfolatency", Unit.MILLISECOND, "Amount of time spent to process a full bucket info request"), + VDS_DATASTORED_FULLBUCKETINFOREQSIZE("vds.datastored.fullbucketinforeqsize", Unit.NODE, "Amount of distributors answered at once in full bucket info requests."), + VDS_DATASTORED_SIMPLEBUCKETINFOREQSIZE("vds.datastored.simplebucketinforeqsize", Unit.BUCKET, "Amount of buckets returned in simple bucket info requests"), + + VDS_FILESTOR_ALLTHREADS_APPLYBUCKETDIFF_COUNT("vds.filestor.allthreads.applybucketdiff.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_APPLYBUCKETDIFF_FAILED("vds.filestor.allthreads.applybucketdiff.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_APPLYBUCKETDIFF_LATENCY("vds.filestor.allthreads.applybucketdiff.latency", Unit.MILLISECOND, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_APPLYBUCKETDIFFREPLY("vds.filestor.allthreads.applybucketdiffreply", Unit.REQUEST, "Number of applybucketdiff replies that have been processed."), + VDS_FILESTOR_ALLTHREADS_BUCKETFIXED("vds.filestor.allthreads.bucketfixed", Unit.BUCKET, "Number of times bucket has been fixed because of corruption"), + VDS_FILESTOR_ALLTHREADS_BUCKETVERIFIED_COUNT("vds.filestor.allthreads.bucketverified.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_BUCKETVERIFIED_FAILED("vds.filestor.allthreads.bucketverified.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_BUCKETVERIFIED_LATENCY("vds.filestor.allthreads.bucketverified.latency", Unit.REQUEST, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_BYTESMERGED("vds.filestor.allthreads.bytesmerged", Unit.BYTE, "Total number of bytes merged into this node."), + VDS_FILESTOR_ALLTHREADS_CREATEBUCKETS_COUNT("vds.filestor.allthreads.createbuckets.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_CREATEBUCKETS_FAILED("vds.filestor.allthreads.createbuckets.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_CREATEBUCKETS_LATENCY("vds.filestor.allthreads.createbuckets.latency", Unit.REQUEST, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_FAILEDOPERATIONS("vds.filestor.allthreads.failedoperations", Unit.OPERATION, "Number of operations throwing exceptions."), + VDS_FILESTOR_ALLTHREADS_GETBUCKETDIFF_COUNT("vds.filestor.allthreads.getbucketdiff.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_GETBUCKETDIFF_FAILED("vds.filestor.allthreads.getbucketdiff.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_GETBUCKETDIFF_LATENCY("vds.filestor.allthreads.getbucketdiff.latency", Unit.REQUEST, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_GETBUCKETDIFFREPLY("vds.filestor.allthreads.getbucketdiffreply", Unit.REQUEST, "Number of getbucketdiff replies that have been processed."), + VDS_FILESTOR_ALLTHREADS_INTERNALJOIN_COUNT("vds.filestor.allthreads.internaljoin.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_INTERNALJOIN_FAILED("vds.filestor.allthreads.internaljoin.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_INTERNALJOIN_LATENCY("vds.filestor.allthreads.internaljoin.latency", Unit.MILLISECOND, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_MOVEDBUCKETS_COUNT("vds.filestor.allthreads.movedbuckets.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_MOVEDBUCKETS_FAILED("vds.filestor.allthreads.movedbuckets.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_MOVEDBUCKETS_LATENCY("vds.filestor.allthreads.movedbuckets.latency", Unit.MILLISECOND, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_OPERATIONS("vds.filestor.allthreads.operations", Unit.OPERATION, "Number of operations processed."), + + VDS_FILESTOR_ALLTHREADS_READBUCKETINFO_COUNT("vds.filestor.allthreads.readbucketinfo.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_READBUCKETINFO_FAILED("vds.filestor.allthreads.readbucketinfo.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_READBUCKETINFO_LATENCY("vds.filestor.allthreads.readbucketinfo.latency", Unit.REQUEST, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_READBUCKETLIST_COUNT("vds.filestor.allthreads.readbucketlist.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_READBUCKETLIST_FAILED("vds.filestor.allthreads.readbucketlist.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_READBUCKETLIST_LATENCY("vds.filestor.allthreads.readbucketlist.latency", Unit.MILLISECOND, "Latency of successful requests."), + + VDS_FILESTOR_ALLTHREADS_RECHECKBUCKETINFO_COUNT("vds.filestor.allthreads.recheckbucketinfo.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_RECHECKBUCKETINFO_FAILED("vds.filestor.allthreads.recheckbucketinfo.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_RECHECKBUCKETINFO_LATENCY("vds.filestor.allthreads.recheckbucketinfo.latency", Unit.MILLISECOND, "Latency of successful requests."), + + VDS_FILESTOR_ALLTHREADS_REVERT_COUNT("vds.filestor.allthreads.revert.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_REVERT_FAILED("vds.filestor.allthreads.revert.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_REVERT_LATENCY("vds.filestor.allthreads.revert.latency", Unit.MILLISECOND, "Latency of successful requests."), + VDS_FILESTOR_ALLTHREADS_REVERT_NOT_FOUND("vds.filestor.allthreads.revert.not_found", Unit.REQUEST, "Number of requests that could not be completed due to source document not found."), + VDS_FILESTOR_ALLTHREADS_STAT_BUCKET_COUNT("vds.filestor.allthreads.stat_bucket.count", Unit.REQUEST, "Number of requests processed."), + VDS_FILESTOR_ALLTHREADS_STAT_BUCKET_FAILED("vds.filestor.allthreads.stat_bucket.failed", Unit.REQUEST, "Number of failed requests."), + VDS_FILESTOR_ALLTHREADS_STAT_BUCKET_LATENCY("vds.filestor.allthreads.stat_bucket.latency", Unit.REQUEST, "Latency of successful requests."), + VDS_FILESTOR_BUCKET_DB_INIT_LATENCY("vds.filestor.bucket_db_init_latency", Unit.MILLISECOND, "Time taken (in ms) to initialize bucket databases with information from the persistence provider"), + VDS_FILESTOR_DIRECTORYEVENTS("vds.filestor.directoryevents", Unit.OPERATION, "Number of directory events received."), + VDS_FILESTOR_DISKEVENTS("vds.filestor.diskevents", Unit.OPERATION, "Number of disk events received."), + VDS_FILESTOR_PARTITIONEVENTS("vds.filestor.partitionevents", Unit.OPERATION, "Number of partition events received."), + VDS_FILESTOR_PENDINGMERGE("vds.filestor.pendingmerge", Unit.BUCKET, "Number of buckets currently being merged."), + VDS_FILESTOR_WAITINGFORLOCKRATE("vds.filestor.waitingforlockrate", Unit.OPERATION, "Amount of times a filestor thread has needed to wait for lock to take next message in queue."), + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_ABORTED("vds.mergethrottler.mergechains.failures.aborted", Unit.OPERATION, "The number of merges that failed because the storage node was (most likely) shutting down"), + VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_BUCKETNOTFOUND("vds.mergethrottler.mergechains.failures.bucketnotfound", Unit.OPERATION, "The number of operations that failed because the bucket did not exist"), + VDS_SERVER_MEMORYUSAGE("vds.server.memoryusage", Unit.BYTE, "Amount of memory used by the storage subsystem"), + VDS_SERVER_MEMORYUSAGE_VISITING("vds.server.memoryusage_visiting", Unit.BYTE, "Message use from visiting"), + VDS_SERVER_MESSAGE_MEMORY_USE_HIGHPRI("vds.server.message_memory_use.highpri", Unit.BYTE, "Message use from high priority storage messages"), + VDS_SERVER_MESSAGE_MEMORY_USE_LOWPRI("vds.server.message_memory_use.lowpri", Unit.BYTE, "Message use from low priority storage messages"), + VDS_SERVER_MESSAGE_MEMORY_USE_NORMALPRI("vds.server.message_memory_use.normalpri", Unit.BYTE, "Message use from normal priority storage messages"), + VDS_SERVER_MESSAGE_MEMORY_USE_TOTAL("vds.server.message_memory_use.total", Unit.BYTE, "Message use from storage messages"), + VDS_SERVER_MESSAGE_MEMORY_USE_VERYHIGHPRI("vds.server.message_memory_use.veryhighpri", Unit.BYTE, "Message use from very high priority storage messages"), + VDS_STATE_MANAGER_INVOKE_STATE_LISTENERS_LATENCY("vds.state_manager.invoke_state_listeners_latency", Unit.MILLISECOND, "Time spent (in ms) propagating state changes to internal state listeners"), + VDS_VISITOR_CV_QUEUEEVICTEDWAITTIME("vds.visitor.cv_queueevictedwaittime", Unit.MILLISECOND, "Milliseconds waiting in create visitor queue, for visitors that was evicted from queue due to higher priority visitors coming"), + VDS_VISITOR_CV_QUEUEFULL("vds.visitor.cv_queuefull", Unit.OPERATION, "Number of create visitor messages failed as queue is full"), + VDS_VISITOR_CV_QUEUESIZE("vds.visitor.cv_queuesize", Unit.ITEM, "Size of create visitor queue"), + VDS_VISITOR_CV_QUEUETIMEOUTWAITTIME("vds.visitor.cv_queuetimeoutwaittime", Unit.MILLISECOND, "Milliseconds waiting in create visitor queue, for visitors that timed out while in the visitor quueue"), + VDS_VISITOR_CV_QUEUEWAITTIME("vds.visitor.cv_queuewaittime", Unit.MILLISECOND, "Milliseconds waiting in create visitor queue, for visitors that was added to visitor queue but scheduled later"), + VDS_VISITOR_CV_SKIPQUEUE("vds.visitor.cv_skipqueue", Unit.OPERATION, "Number of times we could skip queue as we had free visitor spots"), + // C++ capability metrics VDS_SERVER_NETWORK_RPC_CAPABILITY_CHECKS_FAILED("vds.server.network.rpc-capability-checks-failed", Unit.FAILURE, "Number of RPC operations that failed to due one or more missing capabilities"), VDS_SERVER_NETWORK_STATUS_CAPABILITY_CHECKS_FAILED("vds.server.network.status-capability-checks-failed", Unit.FAILURE, "Number of status page operations that failed to due one or more missing capabilities"), |