diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-09-24 13:53:40 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-09-24 13:53:40 +0200 |
commit | 069716d610dbea6fa58e009cac8c5495c5e7d06d (patch) | |
tree | 2ee22c776e85084330da7dd1fad0a54d643e49f6 /zkfacade/src | |
parent | 181b198578ce66e2759a450a9ee6adc8ca4f9134 (diff) |
Expose locks info in REST API
Diffstat (limited to 'zkfacade/src')
3 files changed, 203 insertions, 2 deletions
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java index d97d8f5ed71..299fceb8d7f 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java @@ -32,12 +32,20 @@ public class Lock implements Mutex { /** Take the lock with the given timeout. This may be called multiple times from the same thread - each matched by a close */ public void acquire(Duration timeout) throws UncheckedTimeoutException { + ThreadLockInfo threadLockInfo = getThreadLockInfo(); + threadLockInfo.invokingAcquire(timeout); try { - if ( ! mutex.acquire(timeout.toMillis(), TimeUnit.MILLISECONDS)) + if ( ! mutex.acquire(timeout.toMillis(), TimeUnit.MILLISECONDS)) { + threadLockInfo.acquireTimedOut(); + throw new UncheckedTimeoutException("Timed out after waiting " + timeout + - " to acquire lock '" + lockPath + "'"); + " to acquire lock '" + lockPath + "'"); + } + threadLockInfo.lockAcquired(); + if ( ! lock.tryLock()) { // Should be available to only this thread, while holding the above mutex. release(); + threadLockInfo.failedToAcquireReentrantLock(); throw new IllegalStateException("InterProcessMutex acquired, but guarded lock held by someone else, for lock '" + lockPath + "'"); } } @@ -60,6 +68,7 @@ public class Lock implements Mutex { } private void release() { + getThreadLockInfo().lockReleased(); try { mutex.release(); } @@ -68,6 +77,9 @@ public class Lock implements Mutex { } } + private ThreadLockInfo getThreadLockInfo() { + return ThreadLockInfo.getCurrentThreadLockInfo(lockPath, lock); + } } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/LockInfo.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/LockInfo.java new file mode 100644 index 00000000000..870ee12ebda --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/LockInfo.java @@ -0,0 +1,64 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator; + +import java.time.Duration; +import java.time.Instant; +import java.util.Optional; + +/** + * Information about a lock. + * + * <p>Should be mutated by a single thread. Other threads may see an inconsistent state of this instance.</p> + */ +public class LockInfo { + + private final int threadHoldCountOnAcquire; + private final Instant acquireInstant; + private final Duration timeout; + + private volatile Optional<Instant> lockAcquiredInstant = Optional.empty(); + private volatile Optional<Instant> terminalStateInstant = Optional.empty(); + + public static LockInfo invokingAcquire(int holdCount, Duration timeout) { + return new LockInfo(holdCount, timeout); + } + + public enum LockState { + ACQUIRING(false), TIMED_OUT(true), ACQUIRED(false), FAILED_TO_REENTER(true), RELEASED(true); + + private final boolean terminal; + + LockState(boolean terminal) { this.terminal = terminal; } + + public boolean isTerminal() { return terminal; } + } + + private volatile LockState lockState = LockState.ACQUIRING; + + private LockInfo(int threadHoldCountOnAcquire, Duration timeout) { + this.threadHoldCountOnAcquire = threadHoldCountOnAcquire; + this.acquireInstant = Instant.now(); + this.timeout = timeout; + } + + public int getThreadHoldCountOnAcquire() { return threadHoldCountOnAcquire; } + public Instant getTimeAcquiredWasInvoked() { return acquireInstant; } + public Duration getAcquireTimeout() { return timeout; } + public LockState getLockState() { return lockState; } + public Optional<Instant> getTimeLockWasAcquired() { return lockAcquiredInstant; } + public Optional<Instant> getTimeTerminalStateWasReached() { return terminalStateInstant; } + + void timedOut() { setTerminalState(LockState.TIMED_OUT); } + void failedToAcquireReentrantLock() { setTerminalState(LockState.FAILED_TO_REENTER); } + void released() { setTerminalState(LockState.RELEASED); } + + void lockAcquired() { + lockState = LockState.ACQUIRED; + lockAcquiredInstant = Optional.of(Instant.now()); + } + + void setTerminalState(LockState terminalState) { + lockState = terminalState; + terminalStateInstant = Optional.of(Instant.now()); + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/ThreadLockInfo.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/ThreadLockInfo.java new file mode 100644 index 00000000000..9fbcb550ddd --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/ThreadLockInfo.java @@ -0,0 +1,125 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Consumer; + +/** + * This class contains process-wide statistics and information related to acquiring and releasing + * {@link Lock}. Instances of this class contain information tied to a specific thread and lock path. + * + * <p>Instances of this class are thread-safe as long as foreign threads (!= this.thread) avoid mutable methods.</p> + */ +public class ThreadLockInfo { + + private static final ConcurrentHashMap<Thread, ThreadLockInfo> locks = new ConcurrentHashMap<>(); + + private static final int MAX_COMPLETED_LOCK_INFOS_SIZE = 10; + private static final ConcurrentLinkedDeque<LockInfo> completedLockInfos = new ConcurrentLinkedDeque<>(); + + private static final AtomicInteger invokeAcquireCount = new AtomicInteger(0); + private static final AtomicInteger inCriticalRegionCount = new AtomicInteger(0); + private static final AtomicInteger acquireTimedOutCount = new AtomicInteger(0); + private static final AtomicInteger lockAcquiredCount = new AtomicInteger(0); + private static final AtomicInteger locksReleasedCount = new AtomicInteger(0); + + private static final AtomicInteger failedToAcquireReentrantLockCount = new AtomicInteger(0); + private static final AtomicInteger noLocksErrorCount = new AtomicInteger(0); + private static final AtomicInteger timeoutOnReentrancyErrorCount = new AtomicInteger(0); + + private final Thread thread; + private final String lockPath; + private final ReentrantLock lock; + + /** The locks are reentrant so there may be more than 1 lock for this thread. */ + private final ConcurrentLinkedQueue<LockInfo> lockInfos = new ConcurrentLinkedQueue<>(); + + public static int invokeAcquireCount() { return invokeAcquireCount.get(); } + public static int inCriticalRegionCount() { return inCriticalRegionCount.get(); } + public static int acquireTimedOutCount() { return acquireTimedOutCount.get(); } + public static int lockAcquiredCount() { return lockAcquiredCount.get(); } + public static int locksReleasedCount() { return locksReleasedCount.get(); } + public static int noLocksErrorCount() { return noLocksErrorCount.get(); } + public static int failedToAcquireReentrantLockCount() { return failedToAcquireReentrantLockCount.get(); } + public static int timeoutOnReentrancyErrorCount() { return timeoutOnReentrancyErrorCount.get(); } + public static List<ThreadLockInfo> getThreadLockInfos() { return List.copyOf(locks.values()); } + + /** Returns the per-thread singleton ThreadLockInfo. */ + static ThreadLockInfo getCurrentThreadLockInfo(String lockPath, ReentrantLock lock) { + return locks.computeIfAbsent( + Thread.currentThread(), + currentThread -> new ThreadLockInfo(currentThread, lockPath, lock)); + } + + ThreadLockInfo(Thread currentThread, String lockPath, ReentrantLock lock) { + this.thread = currentThread; + this.lockPath = lockPath; + this.lock = lock; + } + + public String getThreadName() { return thread.getName(); } + public String getLockPath() { return lockPath; } + public List<LockInfo> getLockInfos() { return List.copyOf(lockInfos); } + + /** Mutable method (see class doc) */ + void invokingAcquire(Duration timeout) { + invokeAcquireCount.incrementAndGet(); + inCriticalRegionCount.incrementAndGet(); + lockInfos.add(LockInfo.invokingAcquire(lock.getHoldCount(), timeout)); + } + + /** Mutable method (see class doc) */ + void acquireTimedOut() { + if (lockInfos.size() > 1) { + timeoutOnReentrancyErrorCount.incrementAndGet(); + } + + removeLastLockInfo(acquireTimedOutCount, LockInfo::timedOut); + } + + /** Mutable method (see class doc) */ + void lockAcquired() { + lockAcquiredCount.incrementAndGet(); + getLastLockInfo().ifPresent(LockInfo::lockAcquired); + } + + /** Mutable method (see class doc) */ + void failedToAcquireReentrantLock() { + removeLastLockInfo(failedToAcquireReentrantLockCount, LockInfo::failedToAcquireReentrantLock); + } + + /** Mutable method (see class doc) */ + void lockReleased() { + removeLastLockInfo(locksReleasedCount, LockInfo::released); + } + + private Optional<LockInfo> getLastLockInfo() { + return lockInfos.isEmpty() ? Optional.empty() : Optional.of(lockInfos.peek()); + } + + private void removeLastLockInfo(AtomicInteger metricToIncrement, Consumer<LockInfo> completeLockInfo) { + metricToIncrement.incrementAndGet(); + inCriticalRegionCount.decrementAndGet(); + + if (lockInfos.isEmpty()) { + noLocksErrorCount.incrementAndGet(); + return; + } + + LockInfo lockInfo = lockInfos.poll(); + completeLockInfo.accept(lockInfo); + + if (completedLockInfos.size() >= MAX_COMPLETED_LOCK_INFOS_SIZE) { + // This is thread-safe, as no-one but currentThread mutates completedLockInfos + completedLockInfos.removeLast(); + } + completedLockInfos.addFirst(lockInfo); + } +} |