aboutsummaryrefslogtreecommitdiffstats
path: root/zkfacade/src/main/java/com/yahoo
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2020-10-03 15:02:53 +0200
committerHåkon Hallingstad <hakon@verizonmedia.com>2020-10-03 15:02:53 +0200
commite925ef8b0a33ed0e67e09ca9320b386339e08cea (patch)
tree066150a4a9f829b3c4935e5f702bb1e17cfefa03 /zkfacade/src/main/java/com/yahoo
parent58c15afd3ea77235e2ea26f4baa2e171574dd3b6 (diff)
Move lock metrics to MetricsReporter
Adds two new metrics: - The load of acquiring each lock path: The average number of threads waiting to acquire the lock within the last minute (or unit of time). Aka the lock queue (depth). - The load of the lock for each lock path: The average number of threads holding the lock within the last minute (or unit of time). This is always <= 1. Aka the lock utilization. Changes the LockCounters to LockMetrics, and exporting those once every minute through MetricReporter which is designed for this.
Diffstat (limited to 'zkfacade/src/main/java/com/yahoo')
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java33
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java25
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java2
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/AtomicDurationSum.java86
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/DurationSum.java41
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyMetrics.java39
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyStore.java56
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java55
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockCounters.java66
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockMetrics.java138
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java10
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java58
12 files changed, 423 insertions, 186 deletions
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java
index f3fe0316682..6cbfa274c56 100644
--- a/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java
@@ -4,7 +4,6 @@ package com.yahoo.vespa.curator;
import com.google.inject.Inject;
import com.yahoo.cloud.config.ConfigserverConfig;
import com.yahoo.io.IOUtils;
-import com.yahoo.jdisc.Metric;
import com.yahoo.net.HostName;
import com.yahoo.path.Path;
import com.yahoo.text.Utf8;
@@ -67,31 +66,28 @@ public class Curator implements AutoCloseable {
// All lock keys, to allow re-entrancy. This will grow forever, but this should be too slow to be a problem
private final ConcurrentHashMap<Path, Lock> locks = new ConcurrentHashMap<>();
- private final Optional<Metric> metric;
-
/** Creates a curator instance from a comma-separated string of ZooKeeper host:port strings */
public static Curator create(String connectionSpec) {
- return new Curator(connectionSpec, connectionSpec, Optional.empty(), Optional.of(ZK_CLIENT_CONFIG_FILE));
+ return new Curator(connectionSpec, connectionSpec, Optional.of(ZK_CLIENT_CONFIG_FILE));
}
// For testing only, use Optional.empty for clientConfigFile parameter to create default zookeeper client config
public static Curator create(String connectionSpec, Optional<File> clientConfigFile) {
- return new Curator(connectionSpec, connectionSpec, Optional.empty(), clientConfigFile);
+ return new Curator(connectionSpec, connectionSpec, clientConfigFile);
}
// Depend on ZooKeeperServer to make sure it is started first
// TODO: Move zookeeperserver config out of configserverconfig (requires update of controller services.xml as well)
@Inject
- public Curator(ConfigserverConfig configserverConfig, Metric metric, VespaZooKeeperServer server) {
- this(configserverConfig, Optional.of(metric), Optional.of(ZK_CLIENT_CONFIG_FILE));
+ public Curator(ConfigserverConfig configserverConfig, VespaZooKeeperServer server) {
+ this(configserverConfig, Optional.of(ZK_CLIENT_CONFIG_FILE));
}
- Curator(ConfigserverConfig configserverConfig, Optional<Metric> metric, Optional<File> clientConfigFile) {
- this(createConnectionSpec(configserverConfig), createEnsembleConnectionSpec(configserverConfig), metric, clientConfigFile);
+ Curator(ConfigserverConfig configserverConfig, Optional<File> clientConfigFile) {
+ this(createConnectionSpec(configserverConfig), createEnsembleConnectionSpec(configserverConfig), clientConfigFile);
}
- private Curator(String connectionSpec, String zooKeeperEnsembleConnectionSpec, Optional<Metric> metric,
- Optional<File> clientConfigFile) {
+ private Curator(String connectionSpec, String zooKeeperEnsembleConnectionSpec, Optional<File> clientConfigFile) {
this(connectionSpec,
zooKeeperEnsembleConnectionSpec,
(retryPolicy) -> CuratorFrameworkFactory
@@ -102,24 +98,20 @@ public class Curator implements AutoCloseable {
.connectString(connectionSpec)
.zookeeperFactory(new VespaZooKeeperFactory(createClientConfig(clientConfigFile)))
.dontUseContainerParents() // TODO: Remove when we know ZooKeeper 3.5 works fine, consider waiting until Vespa 8
- .build(),
- metric);
+ .build());
}
protected Curator(String connectionSpec,
String zooKeeperEnsembleConnectionSpec,
- Function<RetryPolicy, CuratorFramework> curatorFactory,
- Optional<Metric> metric) {
+ Function<RetryPolicy, CuratorFramework> curatorFactory) {
this(connectionSpec, zooKeeperEnsembleConnectionSpec, curatorFactory,
- new ExponentialBackoffRetry((int) BASE_SLEEP_TIME.toMillis(), MAX_RETRIES),
- metric);
+ new ExponentialBackoffRetry((int) BASE_SLEEP_TIME.toMillis(), MAX_RETRIES));
}
private Curator(String connectionSpec,
String zooKeeperEnsembleConnectionSpec,
Function<RetryPolicy, CuratorFramework> curatorFactory,
- RetryPolicy retryPolicy,
- Optional<Metric> metric) {
+ RetryPolicy retryPolicy) {
this.connectionSpec = connectionSpec;
this.retryPolicy = retryPolicy;
this.curatorFramework = curatorFactory.apply(retryPolicy);
@@ -132,7 +124,6 @@ public class Curator implements AutoCloseable {
this.zooKeeperEnsembleConnectionSpec = zooKeeperEnsembleConnectionSpec;
this.zooKeeperEnsembleCount = zooKeeperEnsembleConnectionSpec.split(",").length;
- this.metric = metric;
}
private static String createConnectionSpec(ConfigserverConfig configserverConfig) {
@@ -363,7 +354,7 @@ public class Curator implements AutoCloseable {
/** Create and acquire a re-entrant lock in given path */
public Lock lock(Path path, Duration timeout) {
create(path);
- Lock lock = locks.computeIfAbsent(path, (pathArg) -> new Lock(pathArg.getAbsolute(), this, metric));
+ Lock lock = locks.computeIfAbsent(path, (pathArg) -> new Lock(pathArg.getAbsolute(), this));
lock.acquire(timeout);
return lock;
}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java
index 305e5d459fc..451389694c6 100644
--- a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java
@@ -2,7 +2,6 @@
package com.yahoo.vespa.curator;
import com.google.common.util.concurrent.UncheckedTimeoutException;
-import com.yahoo.jdisc.Metric;
import com.yahoo.path.Path;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.curator.stats.LockStats;
@@ -10,8 +9,6 @@ import com.yahoo.vespa.curator.stats.ThreadLockStats;
import org.apache.curator.framework.recipes.locks.InterProcessLock;
import java.time.Duration;
-import java.util.Map;
-import java.util.Optional;
import java.util.concurrent.TimeUnit;
/**
@@ -26,50 +23,46 @@ public class Lock implements Mutex {
private final InterProcessLock mutex;
private final String lockPath;
- private final Optional<Metric> metric;
- private final Optional<Metric.Context> metricContext;
- public Lock(String lockPath, Curator curator, Optional<Metric> metric) {
- this(lockPath, curator.createMutex(lockPath), metric);
+ public Lock(String lockPath, Curator curator) {
+ this(lockPath, curator.createMutex(lockPath));
}
/** Public for testing only */
- public Lock(String lockPath, InterProcessLock mutex, Optional<Metric> metric) {
+ public Lock(String lockPath, InterProcessLock mutex) {
this.lockPath = lockPath;
this.mutex = mutex;
- this.metric = metric;
- this.metricContext = metric.map(aMetric -> aMetric.createContext(Map.of("lockPath", lockPath)));
}
/** Take the lock with the given timeout. This may be called multiple times from the same thread - each matched by a close */
public void acquire(Duration timeout) throws UncheckedTimeoutException {
ThreadLockStats threadLockStats = LockStats.getForCurrentThread();
- threadLockStats.invokingAcquire(lockPath, timeout, metric, metricContext);
+ threadLockStats.invokingAcquire(lockPath, timeout);
final boolean acquired;
try {
acquired = mutex.acquire(timeout.toMillis(), TimeUnit.MILLISECONDS);
} catch (Exception e) {
- threadLockStats.acquireFailed(lockPath);
+ threadLockStats.acquireFailed();
throw new RuntimeException("Exception acquiring lock '" + lockPath + "'", e);
}
if (!acquired) {
- threadLockStats.acquireTimedOut(lockPath);
+ threadLockStats.acquireTimedOut();
throw new UncheckedTimeoutException("Timed out after waiting " + timeout +
" to acquire lock '" + lockPath + "'");
}
- threadLockStats.lockAcquired(lockPath);
+ threadLockStats.lockAcquired();
}
@Override
public void close() {
try {
mutex.release();
- LockStats.getForCurrentThread().lockReleased(lockPath);
+ LockStats.getForCurrentThread().lockReleased();
}
catch (Exception e) {
- LockStats.getForCurrentThread().lockReleaseFailed(lockPath);
+ LockStats.getForCurrentThread().lockReleaseFailed();
throw new RuntimeException("Exception releasing lock '" + lockPath + "'");
}
}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java
index 2ef42f539d2..3da7678c44e 100644
--- a/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java
@@ -137,7 +137,7 @@ public class MockCurator extends Curator {
* This is not what ZooKeeper does.
*/
public MockCurator(boolean stableOrdering) {
- super("", "", (retryPolicy) -> null, Optional.empty());
+ super("", "", (retryPolicy) -> null);
this.stableOrdering = stableOrdering;
curatorFramework = new MockCuratorFramework();
curatorFramework.start();
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/AtomicDurationSum.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/AtomicDurationSum.java
new file mode 100644
index 00000000000..7addd661b79
--- /dev/null
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/AtomicDurationSum.java
@@ -0,0 +1,86 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.curator.stats;
+
+import java.time.Duration;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * An instance of {@link AtomicDurationSum} provides atomic operations on a {@link Duration}
+ * and integer counter doublet: You can add a duration and increment the counter atomically,
+ * and get the doublet and reset the duration and counter to zero atomically.
+ *
+ * <p>The duration and counter must be small: You can add the equivalent of 16M durations of 1 minute each:
+ * The cumulative duration must be between -17 and 17 years, and the maximum count is 16M.
+ * The duration will have millisecond resolution. Overflow of count affects duration.</p>
+ *
+ * <p>Motivation: Metrics are snapshot and reset to zero every minute. Durations of typically 1 minute
+ * are then summed to produce a cumulative {@link Duration} and an associated count, both of which are
+ * therefore small numbers and can be represented in a compact and atomic form. The alternative is to
+ * use synchronization (which is slow) or allow inconsistencies between the duration and count
+ * (e.g. a sum of 2 latencies but a count of 1 makes the metrics noisy).</p>
+ *
+ * @author hakon
+ */
+public class AtomicDurationSum {
+
+ // Why 40? The duration-part requires 16 bits to represent 1 minute. If we require 1 minute
+ // durations can be added until both the duration-part and count-part are full, the remaining
+ // 48 bits must be divided equally, hence 16 + 24 = 40. Seems to give a nice balance.
+ static final long DURATION_BITS = 40;
+ static final long COUNT_BITS = Long.SIZE - DURATION_BITS;
+ static final long DURATION_MASK = -1L << COUNT_BITS;
+ static final long COUNT_MASK = -1L >>> DURATION_BITS;
+ // The most significant bit of duration is a sign bit, which complicates the initializer.
+ static final long MIN_DURATION = -1L << (DURATION_BITS - 1);
+ static final long MAX_DURATION = (DURATION_MASK << 1) >>> (COUNT_BITS + 1);
+ static final long MAX_COUNT = COUNT_MASK;
+ static final long MIN_COUNT = 0L;
+
+ private static final long ZERO_DURATION_AND_COUNT = 0L;
+
+ // Representation:
+ // - A signed long of 40 bits storing the duration in milliseconds
+ // - An unsigned int of 24 bits storing the count
+ private final AtomicLong encodedAtomic = new AtomicLong(ZERO_DURATION_AND_COUNT);
+
+ /** Initializes to zero duration and count. */
+ public AtomicDurationSum() {}
+
+ /** Add duration and increment count. */
+ void add(Duration duration) {
+ encodedAtomic.addAndGet(encodeDuration(duration) | 1L);
+ }
+
+ public DurationSum get() {
+ long snapshot = encodedAtomic.get();
+ return new DurationSum(decodeDuration(snapshot), decodeCount(snapshot));
+ }
+
+ /** Get the current {@link DurationSum} and reset the duration and count doublet to zero. */
+ public DurationSum getAndReset() {
+ long snapshot = encodedAtomic.getAndSet(ZERO_DURATION_AND_COUNT);
+ return new DurationSum(decodeDuration(snapshot), decodeCount(snapshot));
+ }
+
+ static long encodeDuration(Duration duration) {
+ long millis = duration.toMillis();
+ if (millis < MIN_DURATION || millis > MAX_DURATION) {
+ throw new IllegalArgumentException("Duration outside legal range: " + duration);
+ }
+
+ return millis << COUNT_BITS;
+ }
+
+ static Duration decodeDuration(long encoded) {
+ return Duration.ofMillis(encoded >> COUNT_BITS);
+ }
+
+ static int decodeCount(long encoded) {
+ return (int) (encoded & COUNT_MASK);
+ }
+
+ @Override
+ public String toString() {
+ return get().toString();
+ }
+}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/DurationSum.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/DurationSum.java
new file mode 100644
index 00000000000..3c1f0ec7f43
--- /dev/null
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/DurationSum.java
@@ -0,0 +1,41 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.curator.stats;
+
+import java.time.Duration;
+import java.util.Optional;
+
+/**
+ * Represents a sum and count of Duration.
+ *
+ * @author hakon
+ */
+public class DurationSum {
+
+ private final Duration duration;
+ private final int count;
+
+ DurationSum(Duration duration, int count) {
+ this.duration = duration;
+ this.count = count;
+ }
+
+ public Duration duration() { return duration; }
+ public int count() { return count; }
+
+ public Optional<Duration> averageDuration() {
+ if (count <= 0) {
+ return Optional.empty();
+ }
+
+ long averageMillis = Math.round(duration.toMillis() / (double) count);
+ return Optional.of(Duration.ofMillis(averageMillis));
+ }
+
+ @Override
+ public String toString() {
+ return "DurationSum{" +
+ "duration=" + duration +
+ ", count=" + count +
+ '}';
+ }
+}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyMetrics.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyMetrics.java
new file mode 100644
index 00000000000..61807b5db19
--- /dev/null
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyMetrics.java
@@ -0,0 +1,39 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.curator.stats;
+
+/**
+ * Metrics on the latency of execution of some piece of code, e.g. the acquiring of a lock.
+ *
+ * @author hakon
+ */
+public class LatencyMetrics {
+ private final DurationSum cumulativeLatency;
+ private final float load;
+
+ public LatencyMetrics(DurationSum cumulativeLatency, float load) {
+ this.cumulativeLatency = cumulativeLatency;
+ this.load = load;
+ }
+
+ /**
+ * The total time spent by all threads accumulating latency in an implicit time period,
+ * e.g. a metric snapshot window, divided by the duration of the time period.
+ */
+ public float load() { return load; }
+
+ /** Returns the average latency in seconds with milliseconds resolution, or 0.0 by default. */
+ public float averageInSeconds() {
+ return cumulativeLatency.averageDuration().map(average -> average.toMillis() / 1000f).orElse(0f);
+ }
+
+ /** The number of latency-producing events. */
+ public int count() { return cumulativeLatency.count(); }
+
+ @Override
+ public String toString() {
+ return "LatencyMetrics{" +
+ "cumulativeLatency=" + cumulativeLatency +
+ ", load=" + load +
+ '}';
+ }
+}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyStore.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyStore.java
new file mode 100644
index 00000000000..0b1056ddeba
--- /dev/null
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyStore.java
@@ -0,0 +1,56 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.curator.stats;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
+
+/**
+ * Stores the latency of e.g. acquiring the lock.
+ *
+ * @author hakon
+ */
+public class LatencyStore {
+
+ private final AtomicDurationSum latencySum = new AtomicDurationSum();
+ private final Clock clock;
+ private volatile Instant startOfPeriod;
+
+ LatencyStore() { this(Clock.systemUTC()); }
+
+ LatencyStore(Clock clock) {
+ this.clock = clock;
+ startOfPeriod = clock.instant();
+ }
+
+ void reportLatency(Duration latency) {
+ latencySum.add(latency);
+ }
+
+ public LatencyMetrics getLatencyMetrics() {
+ return makeMetricsForPeriod(latencySum.get(), startOfPeriod, clock.instant());
+ }
+
+ public LatencyMetrics getAndResetLatencyMetrics() {
+ Instant newStartOfPeriod = clock.instant();
+ DurationSum latencySumOfPeriod = latencySum.getAndReset();
+ LatencyMetrics latencyMetrics = makeMetricsForPeriod(latencySumOfPeriod, startOfPeriod, newStartOfPeriod);
+ startOfPeriod = newStartOfPeriod;
+ return latencyMetrics;
+ }
+
+ private static LatencyMetrics makeMetricsForPeriod(DurationSum latencySum, Instant start, Instant end) {
+ long millisPeriod = Duration.between(start, end).toMillis();
+ long normalizedMillisPeriod = Math.max(1L, millisPeriod);
+ double load = Math.round(latencySum.duration().toMillis() * 1000.0 / normalizedMillisPeriod) / 1000.0;
+ return new LatencyMetrics(latencySum, (float) load);
+ }
+
+ @Override
+ public String toString() {
+ return "LatencyStore{" +
+ "latencySum=" + latencySum +
+ ", startOfPeriod=" + startOfPeriod +
+ '}';
+ }
+}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java
index bec7c08b9b3..e25470ac227 100644
--- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java
@@ -1,8 +1,6 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.curator.stats;
-import com.yahoo.jdisc.Metric;
-
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
@@ -23,17 +21,16 @@ public class LockAttempt {
private final String lockPath;
private final Instant callAcquireInstant;
private final Duration timeout;
- private final Optional<Metric> metric;
- private final Optional<Metric.Context> metricContext;
+ private final LockMetrics lockMetrics;
private final List<LockAttempt> nestedLockAttempts = new ArrayList<>();
private volatile Optional<Instant> lockAcquiredInstant = Optional.empty();
private volatile Optional<Instant> terminalStateInstant = Optional.empty();
private volatile Optional<String> stackTrace = Optional.empty();
- public static LockAttempt invokingAcquire(ThreadLockStats threadLockStats, String lockPath, Duration timeout,
- Optional<Metric> metric, Optional<Metric.Context> metricContext) {
- return new LockAttempt(threadLockStats, lockPath, timeout, Instant.now(), metric, metricContext);
+ public static LockAttempt invokingAcquire(ThreadLockStats threadLockStats, String lockPath,
+ Duration timeout, LockMetrics lockMetrics) {
+ return new LockAttempt(threadLockStats, lockPath, timeout, Instant.now(), lockMetrics);
}
public enum LockState {
@@ -49,16 +46,15 @@ public class LockAttempt {
private volatile LockState lockState = LockState.ACQUIRING;
- private LockAttempt(ThreadLockStats threadLockStats, String lockPath, Duration timeout, Instant callAcquireInstant,
- Optional<Metric> metric, Optional<Metric.Context> metricContext) {
+ private LockAttempt(ThreadLockStats threadLockStats, String lockPath, Duration timeout,
+ Instant callAcquireInstant, LockMetrics lockMetrics) {
this.threadLockStats = threadLockStats;
this.lockPath = lockPath;
this.callAcquireInstant = callAcquireInstant;
this.timeout = timeout;
- this.metric = metric;
- this.metricContext = metricContext;
+ this.lockMetrics = lockMetrics;
- addToMetric("lockAttempt.acquiring", 1);
+ lockMetrics.acquireInvoked();
}
public String getThreadName() { return threadLockStats.getThreadName(); }
@@ -73,8 +69,6 @@ public class LockAttempt {
public Optional<Instant> getTimeTerminalStateWasReached() { return terminalStateInstant; }
public Optional<String> getStackTrace() { return stackTrace; }
public List<LockAttempt> getNestedLockAttempts() { return List.copyOf(nestedLockAttempts); }
- public Optional<Metric> metric() { return metric; }
- public Optional<Metric.Context> metricContext() { return metricContext; }
public Duration getDurationOfAcquire() { return Duration.between(callAcquireInstant, getTimeAcquireEndedOrNow()); }
@@ -105,39 +99,28 @@ public class LockAttempt {
void acquireFailed() {
setTerminalState(LockState.ACQUIRE_FAILED);
- addToMetric("lockAttempt.acquiring", -1);
- addToMetric("lockAttempt.acquireFailed", 1);
- setMetricTo("lockAttempt.acquiringLatency", getDurationOfAcquire().toMillis() / 1000.);
+ lockMetrics.acquireFailed(getDurationOfAcquire());
}
void timedOut() {
setTerminalState(LockState.TIMED_OUT);
- addToMetric("lockAttempt.acquiring", -1);
- addToMetric("lockAttempt.acquireTimedOut", 1);
- setMetricTo("lockAttempt.acquiringLatency", getDurationOfAcquire().toMillis() / 1000.);
+ lockMetrics.acquireTimedOut(getDurationOfAcquire());
}
void lockAcquired() {
lockState = LockState.ACQUIRED;
lockAcquiredInstant = Optional.of(Instant.now());
- addToMetric("lockAttempt.acquiring", -1);
- addToMetric("lockAttempt.acquired", 1);
- setMetricTo("lockAttempt.locked", 1);
- setMetricTo("lockAttempt.acquiringLatency", getDurationOfAcquire().toMillis() / 1000.);
+ lockMetrics.lockAcquired(getDurationOfAcquire());
}
void released() {
setTerminalState(LockState.RELEASED);
- setMetricTo("lockAttempt.locked", 0);
- addToMetric("lockAttempt.released", 1);
- setMetricTo("lockAttempt.lockedLatency", getDurationWithLock().toMillis() / 1000.);
+ lockMetrics.release(getDurationWithLock(), getDuration());
}
void releasedWithError() {
setTerminalState(LockState.RELEASED_WITH_ERROR);
- setMetricTo("lockAttempt.locked", 0);
- addToMetric("lockAttempt.releaseError", 1);
- setMetricTo("lockAttempt.lockedLatency", getDurationWithLock().toMillis() / 1000.);
+ lockMetrics.releaseFailed(getDurationWithLock(), getDuration());
}
void setTerminalState(LockState terminalState) { setTerminalState(terminalState, Instant.now()); }
@@ -146,16 +129,4 @@ public class LockAttempt {
lockState = terminalState;
terminalStateInstant = Optional.of(instant);
}
-
- private void addToMetric(String key, Number value) {
- if (metric.isPresent() && metricContext.isPresent()) {
- metric.get().add(key, value, metricContext.get());
- }
- }
-
- private void setMetricTo(String key, Number value) {
- if (metric.isPresent() && metricContext.isPresent()) {
- metric.get().set(key, value, metricContext.get());
- }
- }
}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockCounters.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockCounters.java
deleted file mode 100644
index 561ea9a7ed2..00000000000
--- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockCounters.java
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.curator.stats;
-
-import java.util.Objects;
-import java.util.concurrent.atomic.AtomicInteger;
-
-/**
- * A collection of counters for events related to lock acquisition and release.
- *
- * @author hakon
- */
-public class LockCounters {
- final AtomicInteger invokeAcquireCount = new AtomicInteger(0);
- final AtomicInteger inCriticalRegionCount = new AtomicInteger(0);
- final AtomicInteger acquireFailedCount = new AtomicInteger(0);
- final AtomicInteger acquireTimedOutCount = new AtomicInteger(0);
- final AtomicInteger lockAcquiredCount = new AtomicInteger(0);
- final AtomicInteger locksReleasedCount = new AtomicInteger(0);
-
- final AtomicInteger noLocksErrorCount = new AtomicInteger(0);
- final AtomicInteger lockReleaseErrorCount = new AtomicInteger(0);
-
- public int invokeAcquireCount() { return invokeAcquireCount.get(); }
- public int inCriticalRegionCount() { return inCriticalRegionCount.get(); }
- public int acquireFailedCount() { return acquireFailedCount.get(); }
- public int acquireTimedOutCount() { return acquireTimedOutCount.get(); }
- public int lockAcquiredCount() { return lockAcquiredCount.get(); }
- public int locksReleasedCount() { return locksReleasedCount.get(); }
- public int noLocksErrorCount() { return noLocksErrorCount.get(); }
- public int lockReleaseErrorCount() { return lockReleaseErrorCount.get(); }
-
- @Override
- public String toString() {
- return "LockCounters{" +
- "invokeAcquireCount=" + invokeAcquireCount +
- ", inCriticalRegionCount=" + inCriticalRegionCount +
- ", acquireFailedCount=" + acquireFailedCount +
- ", acquireTimedOutCount=" + acquireTimedOutCount +
- ", lockAcquiredCount=" + lockAcquiredCount +
- ", locksReleasedCount=" + locksReleasedCount +
- ", noLocksErrorCount=" + noLocksErrorCount +
- ", locksReleaseErrorCount=" + lockReleaseErrorCount +
- '}';
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
- LockCounters that = (LockCounters) o;
- return invokeAcquireCount.get() == that.invokeAcquireCount.get() &&
- inCriticalRegionCount.get() == that.inCriticalRegionCount.get() &&
- acquireFailedCount.get() == that.acquireFailedCount.get() &&
- acquireTimedOutCount.get() == that.acquireTimedOutCount.get() &&
- lockAcquiredCount.get() == that.lockAcquiredCount.get() &&
- locksReleasedCount.get() == that.locksReleasedCount.get() &&
- noLocksErrorCount.get() == that.noLocksErrorCount.get() &&
- lockReleaseErrorCount.get() == that.lockReleaseErrorCount.get();
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(invokeAcquireCount, inCriticalRegionCount, acquireFailedCount, acquireTimedOutCount,
- lockAcquiredCount, locksReleasedCount, noLocksErrorCount, lockReleaseErrorCount);
- }
-}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockMetrics.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockMetrics.java
new file mode 100644
index 00000000000..a5bf0124a54
--- /dev/null
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockMetrics.java
@@ -0,0 +1,138 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.curator.stats;
+
+import java.time.Duration;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * A collection of counters for events related to lock acquisition and release.
+ *
+ * @author hakon
+ */
+public class LockMetrics {
+ private final AtomicInteger acquireCount = new AtomicInteger(0);
+ private final AtomicInteger acquireFailedCount = new AtomicInteger(0);
+ private final AtomicInteger acquireTimedOutCount = new AtomicInteger(0);
+ private final AtomicInteger acquireSucceededCount = new AtomicInteger(0);
+ private final AtomicInteger releaseCount = new AtomicInteger(0);
+ private final AtomicInteger releaseFailedCount = new AtomicInteger(0);
+
+ private final AtomicInteger cumulativeAcquireCount = new AtomicInteger(0);
+ private final AtomicInteger cumulativeAcquireFailedCount = new AtomicInteger(0);
+ private final AtomicInteger cumulativeAcquireTimedOutCount = new AtomicInteger(0);
+ private final AtomicInteger cumulativeAcquireSucceededCount = new AtomicInteger(0);
+ private final AtomicInteger cumulativeReleaseCount = new AtomicInteger(0);
+ private final AtomicInteger cumulativeReleaseFailedCount = new AtomicInteger(0);
+
+ private final AtomicInteger acquiringNow = new AtomicInteger(0);
+ private final AtomicInteger lockedNow = new AtomicInteger(0);
+
+ private final LatencyStore acquireLatencyStore = new LatencyStore();
+ private final LatencyStore lockedLatencyStore = new LatencyStore();
+
+ void acquireInvoked() {
+ acquireCount.incrementAndGet();
+ cumulativeAcquireCount.incrementAndGet();
+ acquiringNow.incrementAndGet();
+ }
+
+ void acquireFailed(Duration acquireLatency) {
+ acquiringNow.decrementAndGet();
+ acquireFailedCount.incrementAndGet();
+ cumulativeAcquireFailedCount.incrementAndGet();
+ acquireLatencyStore.reportLatency(acquireLatency);
+ }
+
+ void acquireTimedOut(Duration acquireLatency) {
+ acquiringNow.decrementAndGet();
+ acquireTimedOutCount.incrementAndGet();
+ cumulativeAcquireTimedOutCount.incrementAndGet();
+ acquireLatencyStore.reportLatency(acquireLatency);
+ }
+
+ void lockAcquired(Duration acquireLatency) {
+ acquiringNow.decrementAndGet();
+ acquireSucceededCount.incrementAndGet();
+ cumulativeAcquireSucceededCount.incrementAndGet();
+ acquireLatencyStore.reportLatency(acquireLatency);
+ lockedNow.incrementAndGet();
+ }
+
+ void release(Duration lockedLatency, Duration totalLatency) {
+ lockedNow.decrementAndGet();
+ releaseCount.incrementAndGet();
+ cumulativeReleaseCount.incrementAndGet();
+ lockedLatencyStore.reportLatency(lockedLatency);
+ }
+
+ void releaseFailed(Duration lockedLatency, Duration totalLatency) {
+ release(lockedLatency, totalLatency);
+ releaseFailedCount.incrementAndGet();
+ cumulativeReleaseFailedCount.incrementAndGet();
+ }
+
+ public int getAndResetAcquireCount() { return acquireCount.getAndSet(0); }
+ public int getAndResetAcquireFailedCount() { return acquireFailedCount.getAndSet(0); }
+ public int getAndResetAcquireTimedOutCount() { return acquireTimedOutCount.getAndSet(0); }
+ public int getAndResetAcquireSucceededCount() { return acquireSucceededCount.getAndSet(0); }
+ public int getAndResetReleaseCount() { return releaseCount.getAndSet(0); }
+ public int getAndResetReleaseFailedCount() { return releaseFailedCount.getAndSet(0); }
+
+ public int getCumulativeAcquireCount() { return cumulativeAcquireCount.get(); }
+ public int getCumulativeAcquireFailedCount() { return cumulativeAcquireFailedCount.get(); }
+ public int getCumulativeAcquireTimedOutCount() { return cumulativeAcquireTimedOutCount.get(); }
+ public int getCumulativeAcquireSucceededCount() { return cumulativeAcquireSucceededCount.get(); }
+ public int getCumulativeReleaseCount() { return cumulativeReleaseCount.get(); }
+ public int getCumulativeReleaseFailedCount() { return cumulativeReleaseFailedCount.get(); }
+
+ public int getAcquiringNow() { return acquiringNow.get(); }
+ public int getLockedNow() { return lockedNow.get(); }
+
+ public LatencyMetrics getAcquireLatencyMetrics() { return acquireLatencyStore.getLatencyMetrics(); }
+ public LatencyMetrics getLockedLatencyMetrics() { return lockedLatencyStore.getLatencyMetrics(); }
+
+ public LatencyMetrics getAndResetAcquireLatencyMetrics() { return acquireLatencyStore.getAndResetLatencyMetrics(); }
+ public LatencyMetrics getAndResetLockedLatencyMetrics() { return lockedLatencyStore.getAndResetLatencyMetrics(); }
+
+ // For tests
+ void setAcquireCount(int count) { acquireCount.set(count); }
+ void setAcquireFailedCount(int count) { acquireFailedCount.set(count); }
+ void setAcquireTimedOutCount(int count) { acquireTimedOutCount.set(count); }
+ void setAcquireSucceededCount(int count) { acquireSucceededCount.set(count); }
+ void setReleaseCount(int count) { releaseCount.set(count); }
+ void setReleaseFailedCount(int count) { releaseFailedCount.set(count); }
+
+ // For tests
+ void setCumulativeAcquireCount(int count) { cumulativeAcquireCount.set(count); }
+ void setCumulativeAcquireFailedCount(int count) { cumulativeAcquireFailedCount.set(count); }
+ void setCumulativeAcquireTimedOutCount(int count) { cumulativeAcquireTimedOutCount.set(count); }
+ void setCumulativeAcquireSucceededCount(int count) { cumulativeAcquireSucceededCount.set(count); }
+ void setCumulativeReleaseCount(int count) { cumulativeReleaseCount.set(count); }
+ void setCumulativeReleaseFailedCount(int count) { cumulativeReleaseFailedCount.set(count); }
+
+ // For tests
+ void setAcquiringNow(int count) { acquiringNow.set(count); }
+ void setLockedNow(int count) { lockedNow.set(count); }
+
+ @Override
+ public String toString() {
+ return "LockMetrics{" +
+ "acquireCount=" + acquireCount +
+ ", acquireFailedCount=" + acquireFailedCount +
+ ", acquireTimedOutCount=" + acquireTimedOutCount +
+ ", acquireSucceededCount=" + acquireSucceededCount +
+ ", releaseCount=" + releaseCount +
+ ", releaseFailedCount=" + releaseFailedCount +
+ ", cumulativeAcquireCount=" + cumulativeAcquireCount +
+ ", cumulativeAcquireFailedCount=" + cumulativeAcquireFailedCount +
+ ", cumulativeAcquireTimedOutCount=" + cumulativeAcquireTimedOutCount +
+ ", cumulativeAcquireSucceededCount=" + cumulativeAcquireSucceededCount +
+ ", cumulativeReleaseCount=" + cumulativeReleaseCount +
+ ", cumulativeReleaseFailedCount=" + cumulativeReleaseFailedCount +
+ ", acquiringNow=" + acquiringNow +
+ ", lockedNow=" + lockedNow +
+ ", acquireLatencyStore=" + acquireLatencyStore +
+ ", lockedLatencyStore=" + lockedLatencyStore +
+ '}';
+ }
+}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java
index 3cb5af80c96..ee464e0918d 100644
--- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java
@@ -27,7 +27,7 @@ public class LockStats {
private final PriorityQueue<RecordedLockAttempts> interestingRecordings =
new PriorityQueue<>(MAX_RECORDINGS, Comparator.comparing(RecordedLockAttempts::duration));
- private final ConcurrentHashMap<String, LockCounters> countersByLockPath = new ConcurrentHashMap<>();
+ private final ConcurrentHashMap<String, LockMetrics> metricsByLockPath = new ConcurrentHashMap<>();
/** Returns global stats. */
public static LockStats getGlobal() { return stats; }
@@ -37,13 +37,13 @@ public class LockStats {
return stats.statsByThread.computeIfAbsent(Thread.currentThread(), ThreadLockStats::new);
}
- static void clearForTesting() {
+ public static void clearForTesting() {
stats = new LockStats();
}
private LockStats() {}
- public Map<String, LockCounters> getLockCountersByPath() { return Map.copyOf(countersByLockPath); }
+ public Map<String, LockMetrics> getLockMetricsByPath() { return Map.copyOf(metricsByLockPath); }
public List<ThreadLockStats> getThreadLockStats() { return List.copyOf(statsByThread.values()); }
public List<LockAttempt> getLockAttemptSamples() { return completedLockAttemptSamples.asList(); }
@@ -53,8 +53,8 @@ public class LockStats {
}
}
- LockCounters getLockCounters(String lockPath) {
- return countersByLockPath.computeIfAbsent(lockPath, __ -> new LockCounters());
+ LockMetrics getLockMetrics(String lockPath) {
+ return metricsByLockPath.computeIfAbsent(lockPath, __ -> new LockMetrics());
}
void maybeSample(LockAttempt lockAttempt) {
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java
index 90a2c2f8d25..cd6ae3b1e68 100644
--- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java
@@ -1,7 +1,6 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.curator.stats;
-import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.curator.Lock;
import java.time.Duration;
@@ -9,6 +8,7 @@ import java.util.List;
import java.util.Optional;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.function.Consumer;
+import java.util.logging.Logger;
/**
* This class manages thread-specific statistics and information related to acquiring and releasing
@@ -20,6 +20,8 @@ import java.util.function.Consumer;
*/
public class ThreadLockStats {
+ private static final Logger logger = Logger.getLogger(ThreadLockStats.class.getName());
+
private final Thread thread;
/**
@@ -61,12 +63,8 @@ public class ThreadLockStats {
public Optional<RecordedLockAttempts> getOngoingRecording() { return ongoingRecording; }
/** Mutable method (see class doc) */
- public void invokingAcquire(String lockPath, Duration timeout,
- Optional<Metric> metric, Optional<Metric.Context> metricContext) {
- LockCounters lockCounters = getGlobalLockCounters(lockPath);
- lockCounters.invokeAcquireCount.incrementAndGet();
- lockCounters.inCriticalRegionCount.incrementAndGet();
- LockAttempt lockAttempt = LockAttempt.invokingAcquire(this, lockPath, timeout, metric, metricContext);
+ public void invokingAcquire(String lockPath, Duration timeout) {
+ LockAttempt lockAttempt = LockAttempt.invokingAcquire(this, lockPath, timeout, getGlobalLockMetrics(lockPath));
LockAttempt lastLockAttempt = lockAttemptsStack.peekLast();
if (lastLockAttempt == null) {
@@ -78,42 +76,34 @@ public class ThreadLockStats {
}
/** Mutable method (see class doc) */
- public void acquireFailed(String lockPath) {
- LockCounters lockCounters = getGlobalLockCounters(lockPath);
- lockCounters.acquireFailedCount.incrementAndGet();
- removeLastLockAttempt(lockCounters, LockAttempt::acquireFailed);
+ public void acquireFailed() {
+ removeLastLockAttempt(LockAttempt::acquireFailed);
}
/** Mutable method (see class doc) */
- public void acquireTimedOut(String lockPath) {
- LockCounters lockCounters = getGlobalLockCounters(lockPath);
-
- lockCounters.acquireTimedOutCount.incrementAndGet();
- removeLastLockAttempt(lockCounters, LockAttempt::timedOut);
+ public void acquireTimedOut() {
+ removeLastLockAttempt(LockAttempt::timedOut);
}
/** Mutable method (see class doc) */
- public void lockAcquired(String lockPath) {
- getGlobalLockCounters(lockPath).lockAcquiredCount.incrementAndGet();
+ public void lockAcquired() {
LockAttempt lockAttempt = lockAttemptsStack.peekLast();
if (lockAttempt == null) {
- throw new IllegalStateException("lockAcquired invoked without lockAttempts");
+ logger.warning("Unable to get last lock attempt as the lock attempt stack is empty");
+ return;
}
+
lockAttempt.lockAcquired();
}
/** Mutable method (see class doc) */
- public void lockReleased(String lockPath) {
- LockCounters lockCounters = getGlobalLockCounters(lockPath);
- lockCounters.locksReleasedCount.incrementAndGet();
- removeLastLockAttempt(lockCounters, LockAttempt::released);
+ public void lockReleased() {
+ removeLastLockAttempt(LockAttempt::released);
}
/** Mutable method (see class doc) */
- public void lockReleaseFailed(String lockPath) {
- LockCounters lockCounters = getGlobalLockCounters(lockPath);
- lockCounters.lockReleaseErrorCount.incrementAndGet();
- removeLastLockAttempt(lockCounters, LockAttempt::releasedWithError);
+ public void lockReleaseFailed() {
+ removeLastLockAttempt(LockAttempt::releasedWithError);
}
/** Mutable method (see class doc) */
@@ -133,19 +123,17 @@ public class ThreadLockStats {
}
}
- private LockCounters getGlobalLockCounters(String lockPath) {
- return LockStats.getGlobal().getLockCounters(lockPath);
+ private LockMetrics getGlobalLockMetrics(String lockPath) {
+ return LockStats.getGlobal().getLockMetrics(lockPath);
}
- private void removeLastLockAttempt(LockCounters lockCounters, Consumer<LockAttempt> completeLockAttempt) {
- lockCounters.inCriticalRegionCount.decrementAndGet();
-
- if (lockAttemptsStack.isEmpty()) {
- lockCounters.noLocksErrorCount.incrementAndGet();
+ private void removeLastLockAttempt(Consumer<LockAttempt> completeLockAttempt) {
+ LockAttempt lockAttempt = lockAttemptsStack.pollLast();
+ if (lockAttempt == null) {
+ logger.warning("Unable to remove last lock attempt as the lock attempt stack is empty");
return;
}
- LockAttempt lockAttempt = lockAttemptsStack.pollLast();
completeLockAttempt.accept(lockAttempt);
LockStats.getGlobal().maybeSample(lockAttempt);