From 069716d610dbea6fa58e009cac8c5495c5e7d06d Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Thu, 24 Sep 2020 13:53:40 +0200 Subject: Expose locks info in REST API --- zkfacade/abi-spec.json | 60 ++++++++++ .../main/java/com/yahoo/vespa/curator/Lock.java | 16 ++- .../java/com/yahoo/vespa/curator/LockInfo.java | 64 +++++++++++ .../com/yahoo/vespa/curator/ThreadLockInfo.java | 125 +++++++++++++++++++++ 4 files changed, 263 insertions(+), 2 deletions(-) create mode 100644 zkfacade/src/main/java/com/yahoo/vespa/curator/LockInfo.java create mode 100644 zkfacade/src/main/java/com/yahoo/vespa/curator/ThreadLockInfo.java (limited to 'zkfacade') diff --git a/zkfacade/abi-spec.json b/zkfacade/abi-spec.json index f4ad1ab4372..614c58f3cf6 100644 --- a/zkfacade/abi-spec.json +++ b/zkfacade/abi-spec.json @@ -109,5 +109,65 @@ "public void close()" ], "fields": [] + }, + "com.yahoo.vespa.curator.LockInfo$LockState": { + "superClass": "java.lang.Enum", + "interfaces": [], + "attributes": [ + "public", + "final", + "enum" + ], + "methods": [ + "public static com.yahoo.vespa.curator.LockInfo$LockState[] values()", + "public static com.yahoo.vespa.curator.LockInfo$LockState valueOf(java.lang.String)", + "public boolean isTerminal()" + ], + "fields": [ + "public static final enum com.yahoo.vespa.curator.LockInfo$LockState ACQUIRING", + "public static final enum com.yahoo.vespa.curator.LockInfo$LockState TIMED_OUT", + "public static final enum com.yahoo.vespa.curator.LockInfo$LockState ACQUIRED", + "public static final enum com.yahoo.vespa.curator.LockInfo$LockState FAILED_TO_REENTER", + "public static final enum com.yahoo.vespa.curator.LockInfo$LockState RELEASED" + ] + }, + "com.yahoo.vespa.curator.LockInfo": { + "superClass": "java.lang.Object", + "interfaces": [], + "attributes": [ + "public" + ], + "methods": [ + "public static com.yahoo.vespa.curator.LockInfo invokingAcquire(int, java.time.Duration)", + "public int getThreadHoldCountOnAcquire()", + "public java.time.Instant getTimeAcquiredWasInvoked()", + "public java.time.Duration getAcquireTimeout()", + "public com.yahoo.vespa.curator.LockInfo$LockState getLockState()", + "public java.util.Optional getTimeLockWasAcquired()", + "public java.util.Optional getTimeTerminalStateWasReached()" + ], + "fields": [] + }, + "com.yahoo.vespa.curator.ThreadLockInfo": { + "superClass": "java.lang.Object", + "interfaces": [], + "attributes": [ + "public" + ], + "methods": [ + "public static int invokeAcquireCount()", + "public static int inCriticalRegionCount()", + "public static int acquireTimedOutCount()", + "public static int lockAcquiredCount()", + "public static int locksReleasedCount()", + "public static int noLocksErrorCount()", + "public static int failedToAcquireReentrantLockCount()", + "public static int timeoutOnReentrancyErrorCount()", + "public static java.util.List getThreadLockInfos()", + "public java.lang.String getThreadName()", + "public java.lang.String getLockPath()", + "public java.util.List getLockInfos()" + ], + "fields": [] } } \ No newline at end of file diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java index d97d8f5ed71..299fceb8d7f 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java @@ -32,12 +32,20 @@ public class Lock implements Mutex { /** Take the lock with the given timeout. This may be called multiple times from the same thread - each matched by a close */ public void acquire(Duration timeout) throws UncheckedTimeoutException { + ThreadLockInfo threadLockInfo = getThreadLockInfo(); + threadLockInfo.invokingAcquire(timeout); try { - if ( ! mutex.acquire(timeout.toMillis(), TimeUnit.MILLISECONDS)) + if ( ! mutex.acquire(timeout.toMillis(), TimeUnit.MILLISECONDS)) { + threadLockInfo.acquireTimedOut(); + throw new UncheckedTimeoutException("Timed out after waiting " + timeout + - " to acquire lock '" + lockPath + "'"); + " to acquire lock '" + lockPath + "'"); + } + threadLockInfo.lockAcquired(); + if ( ! lock.tryLock()) { // Should be available to only this thread, while holding the above mutex. release(); + threadLockInfo.failedToAcquireReentrantLock(); throw new IllegalStateException("InterProcessMutex acquired, but guarded lock held by someone else, for lock '" + lockPath + "'"); } } @@ -60,6 +68,7 @@ public class Lock implements Mutex { } private void release() { + getThreadLockInfo().lockReleased(); try { mutex.release(); } @@ -68,6 +77,9 @@ public class Lock implements Mutex { } } + private ThreadLockInfo getThreadLockInfo() { + return ThreadLockInfo.getCurrentThreadLockInfo(lockPath, lock); + } } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/LockInfo.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/LockInfo.java new file mode 100644 index 00000000000..870ee12ebda --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/LockInfo.java @@ -0,0 +1,64 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator; + +import java.time.Duration; +import java.time.Instant; +import java.util.Optional; + +/** + * Information about a lock. + * + *

Should be mutated by a single thread. Other threads may see an inconsistent state of this instance.

+ */ +public class LockInfo { + + private final int threadHoldCountOnAcquire; + private final Instant acquireInstant; + private final Duration timeout; + + private volatile Optional lockAcquiredInstant = Optional.empty(); + private volatile Optional terminalStateInstant = Optional.empty(); + + public static LockInfo invokingAcquire(int holdCount, Duration timeout) { + return new LockInfo(holdCount, timeout); + } + + public enum LockState { + ACQUIRING(false), TIMED_OUT(true), ACQUIRED(false), FAILED_TO_REENTER(true), RELEASED(true); + + private final boolean terminal; + + LockState(boolean terminal) { this.terminal = terminal; } + + public boolean isTerminal() { return terminal; } + } + + private volatile LockState lockState = LockState.ACQUIRING; + + private LockInfo(int threadHoldCountOnAcquire, Duration timeout) { + this.threadHoldCountOnAcquire = threadHoldCountOnAcquire; + this.acquireInstant = Instant.now(); + this.timeout = timeout; + } + + public int getThreadHoldCountOnAcquire() { return threadHoldCountOnAcquire; } + public Instant getTimeAcquiredWasInvoked() { return acquireInstant; } + public Duration getAcquireTimeout() { return timeout; } + public LockState getLockState() { return lockState; } + public Optional getTimeLockWasAcquired() { return lockAcquiredInstant; } + public Optional getTimeTerminalStateWasReached() { return terminalStateInstant; } + + void timedOut() { setTerminalState(LockState.TIMED_OUT); } + void failedToAcquireReentrantLock() { setTerminalState(LockState.FAILED_TO_REENTER); } + void released() { setTerminalState(LockState.RELEASED); } + + void lockAcquired() { + lockState = LockState.ACQUIRED; + lockAcquiredInstant = Optional.of(Instant.now()); + } + + void setTerminalState(LockState terminalState) { + lockState = terminalState; + terminalStateInstant = Optional.of(Instant.now()); + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/ThreadLockInfo.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/ThreadLockInfo.java new file mode 100644 index 00000000000..9fbcb550ddd --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/ThreadLockInfo.java @@ -0,0 +1,125 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Consumer; + +/** + * This class contains process-wide statistics and information related to acquiring and releasing + * {@link Lock}. Instances of this class contain information tied to a specific thread and lock path. + * + *

Instances of this class are thread-safe as long as foreign threads (!= this.thread) avoid mutable methods.

+ */ +public class ThreadLockInfo { + + private static final ConcurrentHashMap locks = new ConcurrentHashMap<>(); + + private static final int MAX_COMPLETED_LOCK_INFOS_SIZE = 10; + private static final ConcurrentLinkedDeque completedLockInfos = new ConcurrentLinkedDeque<>(); + + private static final AtomicInteger invokeAcquireCount = new AtomicInteger(0); + private static final AtomicInteger inCriticalRegionCount = new AtomicInteger(0); + private static final AtomicInteger acquireTimedOutCount = new AtomicInteger(0); + private static final AtomicInteger lockAcquiredCount = new AtomicInteger(0); + private static final AtomicInteger locksReleasedCount = new AtomicInteger(0); + + private static final AtomicInteger failedToAcquireReentrantLockCount = new AtomicInteger(0); + private static final AtomicInteger noLocksErrorCount = new AtomicInteger(0); + private static final AtomicInteger timeoutOnReentrancyErrorCount = new AtomicInteger(0); + + private final Thread thread; + private final String lockPath; + private final ReentrantLock lock; + + /** The locks are reentrant so there may be more than 1 lock for this thread. */ + private final ConcurrentLinkedQueue lockInfos = new ConcurrentLinkedQueue<>(); + + public static int invokeAcquireCount() { return invokeAcquireCount.get(); } + public static int inCriticalRegionCount() { return inCriticalRegionCount.get(); } + public static int acquireTimedOutCount() { return acquireTimedOutCount.get(); } + public static int lockAcquiredCount() { return lockAcquiredCount.get(); } + public static int locksReleasedCount() { return locksReleasedCount.get(); } + public static int noLocksErrorCount() { return noLocksErrorCount.get(); } + public static int failedToAcquireReentrantLockCount() { return failedToAcquireReentrantLockCount.get(); } + public static int timeoutOnReentrancyErrorCount() { return timeoutOnReentrancyErrorCount.get(); } + public static List getThreadLockInfos() { return List.copyOf(locks.values()); } + + /** Returns the per-thread singleton ThreadLockInfo. */ + static ThreadLockInfo getCurrentThreadLockInfo(String lockPath, ReentrantLock lock) { + return locks.computeIfAbsent( + Thread.currentThread(), + currentThread -> new ThreadLockInfo(currentThread, lockPath, lock)); + } + + ThreadLockInfo(Thread currentThread, String lockPath, ReentrantLock lock) { + this.thread = currentThread; + this.lockPath = lockPath; + this.lock = lock; + } + + public String getThreadName() { return thread.getName(); } + public String getLockPath() { return lockPath; } + public List getLockInfos() { return List.copyOf(lockInfos); } + + /** Mutable method (see class doc) */ + void invokingAcquire(Duration timeout) { + invokeAcquireCount.incrementAndGet(); + inCriticalRegionCount.incrementAndGet(); + lockInfos.add(LockInfo.invokingAcquire(lock.getHoldCount(), timeout)); + } + + /** Mutable method (see class doc) */ + void acquireTimedOut() { + if (lockInfos.size() > 1) { + timeoutOnReentrancyErrorCount.incrementAndGet(); + } + + removeLastLockInfo(acquireTimedOutCount, LockInfo::timedOut); + } + + /** Mutable method (see class doc) */ + void lockAcquired() { + lockAcquiredCount.incrementAndGet(); + getLastLockInfo().ifPresent(LockInfo::lockAcquired); + } + + /** Mutable method (see class doc) */ + void failedToAcquireReentrantLock() { + removeLastLockInfo(failedToAcquireReentrantLockCount, LockInfo::failedToAcquireReentrantLock); + } + + /** Mutable method (see class doc) */ + void lockReleased() { + removeLastLockInfo(locksReleasedCount, LockInfo::released); + } + + private Optional getLastLockInfo() { + return lockInfos.isEmpty() ? Optional.empty() : Optional.of(lockInfos.peek()); + } + + private void removeLastLockInfo(AtomicInteger metricToIncrement, Consumer completeLockInfo) { + metricToIncrement.incrementAndGet(); + inCriticalRegionCount.decrementAndGet(); + + if (lockInfos.isEmpty()) { + noLocksErrorCount.incrementAndGet(); + return; + } + + LockInfo lockInfo = lockInfos.poll(); + completeLockInfo.accept(lockInfo); + + if (completedLockInfos.size() >= MAX_COMPLETED_LOCK_INFOS_SIZE) { + // This is thread-safe, as no-one but currentThread mutates completedLockInfos + completedLockInfos.removeLast(); + } + completedLockInfos.addFirst(lockInfo); + } +} -- cgit v1.2.3