diff options
3 files changed, 79 insertions, 23 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java index bc4401ee03a..502bd6f3cd7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java @@ -55,19 +55,13 @@ public class LocksResponse extends HttpResponse { threadLockInfoCursor.setString("lock-path", threadLockInfo.getLockPath()); List<LockInfo> lockInfos = threadLockInfo.getLockInfos(); - if (!lockInfos.isEmpty()) { - Cursor lockInfosCursor = threadLockInfoCursor.setArray("locks"); - lockInfos.forEach(lockInfo -> { - Cursor lockInfoCursor = lockInfosCursor.addObject(); - lockInfoCursor.setString("invoke-acquire-time", toString(lockInfo.getTimeAcquiredWasInvoked())); - lockInfoCursor.setLong("reentrancy-hold-count-on-acquire", lockInfo.getThreadHoldCountOnAcquire()); - lockInfoCursor.setString("acquire-timeout", lockInfo.getAcquireTimeout().toString()); - lockInfo.getTimeLockWasAcquired().ifPresent(instant -> lockInfoCursor.setString("lock-acquired-time", toString(instant))); - lockInfoCursor.setString("lock-state", lockInfo.getLockState().name()); - lockInfo.getTimeTerminalStateWasReached().ifPresent(instant -> lockInfoCursor.setString("terminal-state-time", toString(instant))); - }); - } + Cursor lockInfosCursor = threadLockInfoCursor.setArray("locks"); + lockInfos.forEach(lockInfo -> setLockInfo(lockInfosCursor.addObject(), lockInfo, false)); }); + + List<LockInfo> slowLockInfos = ThreadLockInfo.getSlowLockInfos(); + Cursor slowLocksCursor = root.setArray("slow-locks"); + slowLockInfos.forEach(lockInfo -> setLockInfo(slowLocksCursor.addObject(), lockInfo, true)); } @Override @@ -80,6 +74,20 @@ public class LocksResponse extends HttpResponse { return "application/json"; } + private void setLockInfo(Cursor lockInfoCursor, LockInfo lockInfo, boolean includeThreadInfo) { + if (includeThreadInfo) { + lockInfoCursor.setString("thread-name", lockInfo.getThreadName()); + lockInfoCursor.setString("lock-path", lockInfo.getLockPath()); + } + lockInfoCursor.setString("invoke-acquire-time", toString(lockInfo.getTimeAcquiredWasInvoked())); + lockInfoCursor.setLong("reentrancy-hold-count-on-acquire", lockInfo.getThreadHoldCountOnAcquire()); + lockInfoCursor.setString("acquire-timeout", lockInfo.getAcquireTimeout().toString()); + lockInfo.getTimeLockWasAcquired().ifPresent(instant -> lockInfoCursor.setString("lock-acquired-time", toString(instant))); + lockInfoCursor.setString("lock-state", lockInfo.getLockState().name()); + lockInfo.getTimeTerminalStateWasReached().ifPresent(instant -> lockInfoCursor.setString("terminal-state-time", toString(instant))); + lockInfo.getStackTrace().ifPresent(stackTrace -> lockInfoCursor.setString("stack-trace", stackTrace)); + } + private static String toString(Instant time) { return Instant.ofEpochMilli(time.toEpochMilli()).toString(); } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockInfo.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockInfo.java index b192210c3de..6e58c1ea81b 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockInfo.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockInfo.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.curator.stats; import java.time.Duration; import java.time.Instant; import java.util.Optional; +import java.util.stream.Stream; /** * Information about a lock. @@ -14,15 +15,18 @@ import java.util.Optional; */ public class LockInfo { + private final String threadName; + private final String lockPath; private final int threadHoldCountOnAcquire; private final Instant acquireInstant; private final Duration timeout; private volatile Optional<Instant> lockAcquiredInstant = Optional.empty(); private volatile Optional<Instant> terminalStateInstant = Optional.empty(); + private volatile Optional<String> stackTrace = Optional.empty(); - public static LockInfo invokingAcquire(int holdCount, Duration timeout) { - return new LockInfo(holdCount, timeout); + public static LockInfo invokingAcquire(String threadName, String lockPath, int holdCount, Duration timeout) { + return new LockInfo(threadName, lockPath, holdCount, timeout); } public enum LockState { @@ -37,18 +41,28 @@ public class LockInfo { private volatile LockState lockState = LockState.ACQUIRING; - private LockInfo(int threadHoldCountOnAcquire, Duration timeout) { + private LockInfo(String threadName, String lockPath, int threadHoldCountOnAcquire, Duration timeout) { + this.threadName = threadName; + this.lockPath = lockPath; this.threadHoldCountOnAcquire = threadHoldCountOnAcquire; this.acquireInstant = Instant.now(); this.timeout = timeout; } + public String getThreadName() { return threadName; } + public String getLockPath() { return lockPath; } public int getThreadHoldCountOnAcquire() { return threadHoldCountOnAcquire; } public Instant getTimeAcquiredWasInvoked() { return acquireInstant; } public Duration getAcquireTimeout() { return timeout; } public LockState getLockState() { return lockState; } public Optional<Instant> getTimeLockWasAcquired() { return lockAcquiredInstant; } public Optional<Instant> getTimeTerminalStateWasReached() { return terminalStateInstant; } + public Optional<String> getStackTrace() { return stackTrace; } + + /** Get time from just before trying to acquire lock to the time the terminal state was reached, or ZERO. */ + public Duration getTotalTime() { + return terminalStateInstant.map(instant -> Duration.between(acquireInstant, instant)).orElse(Duration.ZERO); + } void timedOut() { setTerminalState(LockState.TIMED_OUT); } void failedToAcquireReentrantLock() { setTerminalState(LockState.FAILED_TO_REENTER); } @@ -63,4 +77,24 @@ public class LockInfo { lockState = terminalState; terminalStateInstant = Optional.of(Instant.now()); } + + /** Fill in the stack trace starting at the caller's stack frame. */ + void fillStackTrace() { + final int elementsToIgnore = 1; + + var stackTrace = new StringBuilder(); + + StackTraceElement[] elements = Thread.currentThread().getStackTrace(); + for (int i = elementsToIgnore; i < elements.length; ++i) { + Stream.of(elements).forEach(element -> + stackTrace.append(element.getClassName()) + .append('(') + .append(element.getFileName()) + .append(':') + .append(element.getLineNumber()) + .append(")\n")); + } + + this.stackTrace = Optional.of(stackTrace.toString()); + } } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockInfo.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockInfo.java index bba39e6dc49..5e77ec76290 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockInfo.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockInfo.java @@ -4,11 +4,12 @@ package com.yahoo.vespa.curator.stats; import com.yahoo.vespa.curator.Lock; import java.time.Duration; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.PriorityQueue; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.ReentrantLock; @@ -26,8 +27,10 @@ public class ThreadLockInfo { private static final ConcurrentHashMap<Thread, ThreadLockInfo> locks = new ConcurrentHashMap<>(); - private static final int MAX_COMPLETED_LOCK_INFOS_SIZE = 10; - private static final ConcurrentLinkedDeque<LockInfo> completedLockInfos = new ConcurrentLinkedDeque<>(); + private static final int MAX_COMPLETED_LOCK_INFOS_SIZE = 5; + /** Would have used a thread-safe priority queue. */ + private static final Object completedLockInfosMonitor = new Object(); + private static final PriorityQueue<LockInfo> completedLockInfos = new PriorityQueue<>(Comparator.comparing(LockInfo::getTotalTime)); private static final ConcurrentHashMap<String, LockCounters> countersByLockPath = new ConcurrentHashMap<>(); @@ -43,6 +46,12 @@ public class ThreadLockInfo { public static List<ThreadLockInfo> getThreadLockInfos() { return List.copyOf(locks.values()); } + public static List<LockInfo> getSlowLockInfos() { + synchronized (completedLockInfosMonitor) { + return List.copyOf(completedLockInfos); + } + } + /** Returns the per-thread singleton ThreadLockInfo. */ public static ThreadLockInfo getCurrentThreadLockInfo(String lockPath, ReentrantLock lock) { return locks.computeIfAbsent( @@ -68,7 +77,7 @@ public class ThreadLockInfo { public void invokingAcquire(Duration timeout) { lockCountersForPath.invokeAcquireCount.incrementAndGet(); lockCountersForPath.inCriticalRegionCount.incrementAndGet(); - lockInfos.add(LockInfo.invokingAcquire(lock.getHoldCount(), timeout)); + lockInfos.add(LockInfo.invokingAcquire(getThreadName(), lockPath, lock.getHoldCount(), timeout)); } /** Mutable method (see class doc) */ @@ -113,10 +122,15 @@ public class ThreadLockInfo { LockInfo lockInfo = lockInfos.poll(); completeLockInfo.accept(lockInfo); - if (completedLockInfos.size() >= MAX_COMPLETED_LOCK_INFOS_SIZE) { - // This is thread-safe, as no-one but currentThread mutates completedLockInfos - completedLockInfos.removeLast(); + synchronized (completedLockInfosMonitor) { + if (completedLockInfos.size() < MAX_COMPLETED_LOCK_INFOS_SIZE) { + lockInfo.fillStackTrace(); + completedLockInfos.add(lockInfo); + } else if (lockInfo.getTotalTime().compareTo(completedLockInfos.peek().getTotalTime()) > 0) { + completedLockInfos.poll(); + lockInfo.fillStackTrace(); + completedLockInfos.add(lockInfo); + } } - completedLockInfos.addFirst(lockInfo); } } |