aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon@verizonmedia.com>2020-12-01 14:18:10 +0100
committerGitHub <noreply@github.com>2020-12-01 14:18:10 +0100
commit2fe1c026d1fdc6a9f35307e8915a2de809d0c5fb (patch)
tree53ec5afde6ef7ff4d58145630b070401cb5ce6e7
parent008477111ac92e2eec81596a5037a205bbea53ff (diff)
parentcf26d1df944520528280f2b5a1f5cd83fdd07507 (diff)
Merge pull request #15564 from vespa-engine/hakonhall/remove-lock-metrics-not-used-in-dashboard
Remove lock metrics not used in dashboard
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java35
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java17
2 files changed, 20 insertions, 32 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index d2dcaaeae5b..685fa3727a1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -261,30 +261,29 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
.forEach((lockPath, lockMetrics) -> {
Metric.Context context = getContext(Map.of("lockPath", lockPath));
- metric.set("lockAttempt.acquire", lockMetrics.getAndResetAcquireCount(), context);
- metric.set("lockAttempt.acquireFailed", lockMetrics.getAndResetAcquireFailedCount(), context);
+ LatencyMetrics acquireLatencyMetrics = lockMetrics.getAndResetAcquireLatencyMetrics();
+ metric.set("lockAttempt.acquireMaxActiveLatency", acquireLatencyMetrics.maxActiveLatencySeconds(), context);
+ metric.set("lockAttempt.acquireHz", acquireLatencyMetrics.startHz(), context);
+ metric.set("lockAttempt.acquireLoad", acquireLatencyMetrics.load(), context);
+
+ LatencyMetrics lockedLatencyMetrics = lockMetrics.getAndResetLockedLatencyMetrics();
+ metric.set("lockAttempt.lockedLatency", lockedLatencyMetrics.maxLatencySeconds(), context);
+ metric.set("lockAttempt.lockedLoad", lockedLatencyMetrics.load(), context);
+
metric.set("lockAttempt.acquireTimedOut", lockMetrics.getAndResetAcquireTimedOutCount(), context);
- metric.set("lockAttempt.locked", lockMetrics.getAndResetAcquireSucceededCount(), context);
- metric.set("lockAttempt.release", lockMetrics.getAndResetReleaseCount(), context);
- metric.set("lockAttempt.releaseFailed", lockMetrics.getAndResetReleaseFailedCount(), context);
- metric.set("lockAttempt.reentry", lockMetrics.getAndResetReentryCount(), context);
metric.set("lockAttempt.deadlock", lockMetrics.getAndResetDeadlockCount(), context);
- metric.set("lockAttempt.nakedRelease", lockMetrics.getAndResetNakedReleaseCount(), context);
- metric.set("lockAttempt.acquireWithoutRelease", lockMetrics.getAndResetAcquireWithoutReleaseCount(), context);
- metric.set("lockAttempt.foreignRelease", lockMetrics.getAndResetForeignReleaseCount(), context);
- setLockLatencyMetrics("acquire", lockMetrics.getAndResetAcquireLatencyMetrics(), context);
- setLockLatencyMetrics("locked", lockMetrics.getAndResetLockedLatencyMetrics(), context);
+ // bucket for various rare errors - to reduce #metrics
+ metric.set("lockAttempt.errors",
+ lockMetrics.getAndResetAcquireFailedCount() +
+ lockMetrics.getAndResetReleaseFailedCount() +
+ lockMetrics.getAndResetNakedReleaseCount() +
+ lockMetrics.getAndResetAcquireWithoutReleaseCount() +
+ lockMetrics.getAndResetForeignReleaseCount(),
+ context);
});
}
- private void setLockLatencyMetrics(String name, LatencyMetrics latencyMetrics, Metric.Context context) {
- metric.set("lockAttempt." + name + "Latency", latencyMetrics.latencySeconds(), context);
- metric.set("lockAttempt." + name + "MaxActiveLatency", latencyMetrics.maxActiveLatencySeconds(), context);
- metric.set("lockAttempt." + name + "Hz", latencyMetrics.startHz(), context);
- metric.set("lockAttempt." + name + "Load", latencyMetrics.load(), context);
- }
-
private void updateDockerMetrics(NodeList nodes) {
NodeResources totalCapacity = getCapacityTotal(nodes);
metric.set("hostedVespa.docker.totalCapacityCpu", totalCapacity.vcpu(), null);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
index a25858c034f..3e4887b6998 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
@@ -147,25 +147,14 @@ public class MetricsReporterTest {
// Verify sum of values across dimensions, and remove these metrics to avoid checking against
// metric.values below, which is not sensitive to dimensions.
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquire", 3);
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireFailed", 0);
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireTimedOut", 0);
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.locked", 3);
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.release", 3);
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.releaseFailed", 0);
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.reentry", 0);
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.deadlock", 0);
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.nakedRelease", 0);
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireWithoutRelease", 0);
- verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.foreignRelease", 0);
- metric.remove("lockAttempt.acquireLatency");
metric.remove("lockAttempt.acquireMaxActiveLatency");
metric.remove("lockAttempt.acquireHz");
metric.remove("lockAttempt.acquireLoad");
metric.remove("lockAttempt.lockedLatency");
- metric.remove("lockAttempt.lockedMaxActiveLatency");
- metric.remove("lockAttempt.lockedHz");
metric.remove("lockAttempt.lockedLoad");
+ verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireTimedOut", 0);
+ verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.deadlock", 0);
+ verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.errors", 0);
assertEquals(expectedMetrics, new TreeMap<>(metric.values));
}