diff options
author | HÃ¥kon Hallingstad <hakon@verizonmedia.com> | 2020-12-01 14:18:10 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-01 14:18:10 +0100 |
commit | 2fe1c026d1fdc6a9f35307e8915a2de809d0c5fb (patch) | |
tree | 53ec5afde6ef7ff4d58145630b070401cb5ce6e7 /node-repository | |
parent | 008477111ac92e2eec81596a5037a205bbea53ff (diff) | |
parent | cf26d1df944520528280f2b5a1f5cd83fdd07507 (diff) |
Merge pull request #15564 from vespa-engine/hakonhall/remove-lock-metrics-not-used-in-dashboard
Remove lock metrics not used in dashboard
Diffstat (limited to 'node-repository')
2 files changed, 20 insertions, 32 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index d2dcaaeae5b..685fa3727a1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -261,30 +261,29 @@ public class MetricsReporter extends NodeRepositoryMaintainer { .forEach((lockPath, lockMetrics) -> { Metric.Context context = getContext(Map.of("lockPath", lockPath)); - metric.set("lockAttempt.acquire", lockMetrics.getAndResetAcquireCount(), context); - metric.set("lockAttempt.acquireFailed", lockMetrics.getAndResetAcquireFailedCount(), context); + LatencyMetrics acquireLatencyMetrics = lockMetrics.getAndResetAcquireLatencyMetrics(); + metric.set("lockAttempt.acquireMaxActiveLatency", acquireLatencyMetrics.maxActiveLatencySeconds(), context); + metric.set("lockAttempt.acquireHz", acquireLatencyMetrics.startHz(), context); + metric.set("lockAttempt.acquireLoad", acquireLatencyMetrics.load(), context); + + LatencyMetrics lockedLatencyMetrics = lockMetrics.getAndResetLockedLatencyMetrics(); + metric.set("lockAttempt.lockedLatency", lockedLatencyMetrics.maxLatencySeconds(), context); + metric.set("lockAttempt.lockedLoad", lockedLatencyMetrics.load(), context); + metric.set("lockAttempt.acquireTimedOut", lockMetrics.getAndResetAcquireTimedOutCount(), context); - metric.set("lockAttempt.locked", lockMetrics.getAndResetAcquireSucceededCount(), context); - metric.set("lockAttempt.release", lockMetrics.getAndResetReleaseCount(), context); - metric.set("lockAttempt.releaseFailed", lockMetrics.getAndResetReleaseFailedCount(), context); - metric.set("lockAttempt.reentry", lockMetrics.getAndResetReentryCount(), context); metric.set("lockAttempt.deadlock", lockMetrics.getAndResetDeadlockCount(), context); - metric.set("lockAttempt.nakedRelease", lockMetrics.getAndResetNakedReleaseCount(), context); - metric.set("lockAttempt.acquireWithoutRelease", lockMetrics.getAndResetAcquireWithoutReleaseCount(), context); - metric.set("lockAttempt.foreignRelease", lockMetrics.getAndResetForeignReleaseCount(), context); - setLockLatencyMetrics("acquire", lockMetrics.getAndResetAcquireLatencyMetrics(), context); - setLockLatencyMetrics("locked", lockMetrics.getAndResetLockedLatencyMetrics(), context); + // bucket for various rare errors - to reduce #metrics + metric.set("lockAttempt.errors", + lockMetrics.getAndResetAcquireFailedCount() + + lockMetrics.getAndResetReleaseFailedCount() + + lockMetrics.getAndResetNakedReleaseCount() + + lockMetrics.getAndResetAcquireWithoutReleaseCount() + + lockMetrics.getAndResetForeignReleaseCount(), + context); }); } - private void setLockLatencyMetrics(String name, LatencyMetrics latencyMetrics, Metric.Context context) { - metric.set("lockAttempt." + name + "Latency", latencyMetrics.latencySeconds(), context); - metric.set("lockAttempt." + name + "MaxActiveLatency", latencyMetrics.maxActiveLatencySeconds(), context); - metric.set("lockAttempt." + name + "Hz", latencyMetrics.startHz(), context); - metric.set("lockAttempt." + name + "Load", latencyMetrics.load(), context); - } - private void updateDockerMetrics(NodeList nodes) { NodeResources totalCapacity = getCapacityTotal(nodes); metric.set("hostedVespa.docker.totalCapacityCpu", totalCapacity.vcpu(), null); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java index a25858c034f..3e4887b6998 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java @@ -147,25 +147,14 @@ public class MetricsReporterTest { // Verify sum of values across dimensions, and remove these metrics to avoid checking against // metric.values below, which is not sensitive to dimensions. - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquire", 3); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireFailed", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireTimedOut", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.locked", 3); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.release", 3); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.releaseFailed", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.reentry", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.deadlock", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.nakedRelease", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireWithoutRelease", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.foreignRelease", 0); - metric.remove("lockAttempt.acquireLatency"); metric.remove("lockAttempt.acquireMaxActiveLatency"); metric.remove("lockAttempt.acquireHz"); metric.remove("lockAttempt.acquireLoad"); metric.remove("lockAttempt.lockedLatency"); - metric.remove("lockAttempt.lockedMaxActiveLatency"); - metric.remove("lockAttempt.lockedHz"); metric.remove("lockAttempt.lockedLoad"); + verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireTimedOut", 0); + verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.deadlock", 0); + verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.errors", 0); assertEquals(expectedMetrics, new TreeMap<>(metric.values)); } |