From 5ef77f7cf280a6d7a22dc01ebecd3299e4b95bbb Mon Sep 17 00:00:00 2001 From: yngveaasheim Date: Tue, 7 Feb 2023 18:06:03 +0100 Subject: Use enum for distributor metrics. And move some storage metrics where they belong. --- .../model/admin/monitoring/VespaMetricSet.java | 184 +++++++++++---------- 1 file changed, 94 insertions(+), 90 deletions(-) (limited to 'config-model') diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java index f3ad181887d..e1bd114c4d3 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -603,102 +603,103 @@ public class VespaMetricSet { addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_LATENCY, EnumSet.of(max, sum, count)); addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_SETBUCKETSTATES_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_AVERAGEQUEUEWAITINGTIME, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_ACTIVE_WINDOW_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_BOUNCED_DUE_TO_BACK_PRESSURE.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_OK.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_MERGECHAINS_OK.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_BUSY.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_TOTAL.rate()); + return metrics; } private static Set getDistributorMetrics() { Set metrics = new LinkedHashSet<>(); addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_RECHECKING.average()); addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_IDEALSTATE_DIFF.average()); - addMetric(metrics, "vds.idealstate.buckets_toofewcopies.average"); - addMetric(metrics, "vds.idealstate.buckets_toomanycopies.average"); - addMetric(metrics, "vds.idealstate.buckets.average"); - addMetric(metrics, "vds.idealstate.buckets_notrusted.average"); - addMetric(metrics, "vds.idealstate.bucket_replicas_moving_out.average"); - addMetric(metrics, "vds.idealstate.bucket_replicas_copying_out.average"); - addMetric(metrics, "vds.idealstate.bucket_replicas_copying_in.average"); - addMetric(metrics, "vds.idealstate.bucket_replicas_syncing.average"); - addMetric(metrics, "vds.idealstate.max_observed_time_since_last_gc_sec.average"); - addMetric(metrics, "vds.idealstate.delete_bucket.done_ok.rate"); - addMetric(metrics, "vds.idealstate.delete_bucket.done_failed.rate"); - addMetric(metrics, "vds.idealstate.delete_bucket.pending.average"); - addMetric(metrics, "vds.idealstate.merge_bucket.done_ok.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.done_failed.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.pending.average"); - addMetric(metrics, "vds.idealstate.merge_bucket.blocked.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.throttled.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.source_only_copy_changed.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.source_only_copy_delete_blocked.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.source_only_copy_delete_failed.rate"); - addMetric(metrics, "vds.idealstate.split_bucket.done_ok.rate"); - addMetric(metrics, "vds.idealstate.split_bucket.done_failed.rate"); - addMetric(metrics, "vds.idealstate.split_bucket.pending.average"); - addMetric(metrics, "vds.idealstate.join_bucket.done_ok.rate"); - addMetric(metrics, "vds.idealstate.join_bucket.done_failed.rate"); - addMetric(metrics, "vds.idealstate.join_bucket.pending.average"); - addMetric(metrics, "vds.idealstate.garbage_collection.done_ok.rate"); - addMetric(metrics, "vds.idealstate.garbage_collection.done_failed.rate"); - addMetric(metrics, "vds.idealstate.garbage_collection.pending.average"); - addMetric(metrics, "vds.idealstate.garbage_collection.documents_removed", List.of("count", "rate")); - - addMetric(metrics, "vds.distributor.puts.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.distributor.puts.ok.rate"); - addMetric(metrics, "vds.distributor.puts.failures.total.rate"); - addMetric(metrics, "vds.distributor.puts.failures.notfound.rate"); - addMetric(metrics, "vds.distributor.puts.failures.test_and_set_failed.rate"); - addMetric(metrics, "vds.distributor.puts.failures.concurrent_mutations.rate"); - addMetric(metrics, "vds.distributor.puts.failures.notconnected.rate"); - addMetric(metrics, "vds.distributor.puts.failures.notready.rate"); - addMetric(metrics, "vds.distributor.puts.failures.wrongdistributor.rate"); - addMetric(metrics, "vds.distributor.puts.failures.safe_time_not_reached.rate"); - addMetric(metrics, "vds.distributor.puts.failures.storagefailure.rate"); - addMetric(metrics, "vds.distributor.puts.failures.timeout.rate"); - addMetric(metrics, "vds.distributor.puts.failures.busy.rate"); - addMetric(metrics, "vds.distributor.puts.failures.inconsistent_bucket.rate"); - addMetric(metrics, "vds.distributor.removes.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.distributor.removes.ok.rate"); - addMetric(metrics, "vds.distributor.removes.failures.total.rate"); - addMetric(metrics, "vds.distributor.removes.failures.notfound.rate"); - addMetric(metrics, "vds.distributor.removes.failures.test_and_set_failed.rate"); - addMetric(metrics, "vds.distributor.removes.failures.concurrent_mutations.rate"); - addMetric(metrics, "vds.distributor.updates.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.distributor.updates.ok.rate"); - addMetric(metrics, "vds.distributor.updates.failures.total.rate"); - addMetric(metrics, "vds.distributor.updates.failures.notfound.rate"); - addMetric(metrics, "vds.distributor.updates.failures.test_and_set_failed.rate"); - addMetric(metrics, "vds.distributor.updates.failures.concurrent_mutations.rate"); - addMetric(metrics, "vds.distributor.updates.diverging_timestamp_updates.rate"); - addMetric(metrics, "vds.distributor.removelocations.ok.rate"); - addMetric(metrics, "vds.distributor.removelocations.failures.total.rate"); - addMetric(metrics, "vds.distributor.gets.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.distributor.gets.ok.rate"); - addMetric(metrics, "vds.distributor.gets.failures.total.rate"); - addMetric(metrics, "vds.distributor.gets.failures.notfound.rate"); - addMetric(metrics, "vds.distributor.visitor.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.distributor.visitor.ok.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.total.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.notready.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.notconnected.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.wrongdistributor.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.safe_time_not_reached.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.storagefailure.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.timeout.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.busy.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.inconsistent_bucket.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.notfound.rate"); - - addMetric(metrics, "vds.distributor.docsstored.average"); - addMetric(metrics, "vds.distributor.bytesstored.average"); - - addMetric(metrics, "vds.bouncer.clock_skew_aborts.count"); - - addMetric(metrics, "vds.mergethrottler.averagequeuewaitingtime", List.of("max", "sum", "count")); - addMetric(metrics, "vds.mergethrottler.queuesize", List.of("max", "sum", "count")); - addMetric(metrics, "vds.mergethrottler.active_window_size", List.of("max", "sum", "count")); - addMetric(metrics, "vds.mergethrottler.bounced_due_to_back_pressure.rate"); - addMetric(metrics, "vds.mergethrottler.locallyexecutedmerges.ok.rate"); - addMetric(metrics, "vds.mergethrottler.mergechains.ok.rate"); - addMetric(metrics, "vds.mergethrottler.mergechains.failures.busy.rate"); - addMetric(metrics, "vds.mergethrottler.mergechains.failures.total.rate"); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_TOOFEWCOPIES.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_TOOMANYCOPIES.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_NOTRUSTED.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKET_REPLICAS_MOVING_OUT.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKET_REPLICAS_COPYING_OUT.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKET_REPLICAS_COPYING_IN.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKET_REPLICAS_SYNCING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MAX_OBSERVED_TIME_SINCE_LAST_GC_SEC.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_DELETE_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_DELETE_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_DELETE_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_BLOCKED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_THROTTLED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_SOURCE_ONLY_COPY_CHANGED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_SOURCE_ONLY_COPY_DELETE_BLOCKED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_SOURCE_ONLY_COPY_DELETE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_SPLIT_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_SPLIT_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_SPLIT_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_JOIN_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_JOIN_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_JOIN_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_DOCUMENTS_REMOVED, EnumSet.of(count, rate)); + + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_CONCURRENT_MUTATIONS.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_NOTCONNECTED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_NOTREADY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_WRONGDISTRIBUTOR.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_SAFE_TIME_NOT_REACHED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_STORAGEFAILURE.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_TIMEOUT.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_BUSY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_INCONSISTENT_BUCKET.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_CONCURRENT_MUTATIONS.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_CONCURRENT_MUTATIONS.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_DIVERGING_TIMESTAMP_UPDATES.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVELOCATIONS_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_NOTREADY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_NOTCONNECTED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_WRONGDISTRIBUTOR.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_SAFE_TIME_NOT_REACHED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_STORAGEFAILURE.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_TIMEOUT.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_BUSY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_INCONSISTENT_BUCKET.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_NOTFOUND.rate()); + + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_DOCSSTORED.average()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_BYTESSTORED.average()); + + addMetric(metrics, DistributorMetrics.VDS_BOUNCER_CLOCK_SKEW_ABORTS.count()); + return metrics; } @@ -718,6 +719,9 @@ public class VespaMetricSet { suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); } + private static void addMetric(Set metrics, DistributorMetrics metric, EnumSet suffixes) { + suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + } private static void addMetric(Set metrics, String metricName, Iterable aggregateSuffices) { for (String suffix : aggregateSuffices) { metrics.add(new Metric(metricName + "." + suffix)); -- cgit v1.2.3