summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java32
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockInfo.java40
-rw-r--r--zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockInfo.java30
3 files changed, 79 insertions, 23 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java
index bc4401ee03a..502bd6f3cd7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java
@@ -55,19 +55,13 @@ public class LocksResponse extends HttpResponse {
threadLockInfoCursor.setString("lock-path", threadLockInfo.getLockPath());
List<LockInfo> lockInfos = threadLockInfo.getLockInfos();
- if (!lockInfos.isEmpty()) {
- Cursor lockInfosCursor = threadLockInfoCursor.setArray("locks");
- lockInfos.forEach(lockInfo -> {
- Cursor lockInfoCursor = lockInfosCursor.addObject();
- lockInfoCursor.setString("invoke-acquire-time", toString(lockInfo.getTimeAcquiredWasInvoked()));
- lockInfoCursor.setLong("reentrancy-hold-count-on-acquire", lockInfo.getThreadHoldCountOnAcquire());
- lockInfoCursor.setString("acquire-timeout", lockInfo.getAcquireTimeout().toString());
- lockInfo.getTimeLockWasAcquired().ifPresent(instant -> lockInfoCursor.setString("lock-acquired-time", toString(instant)));
- lockInfoCursor.setString("lock-state", lockInfo.getLockState().name());
- lockInfo.getTimeTerminalStateWasReached().ifPresent(instant -> lockInfoCursor.setString("terminal-state-time", toString(instant)));
- });
- }
+ Cursor lockInfosCursor = threadLockInfoCursor.setArray("locks");
+ lockInfos.forEach(lockInfo -> setLockInfo(lockInfosCursor.addObject(), lockInfo, false));
});
+
+ List<LockInfo> slowLockInfos = ThreadLockInfo.getSlowLockInfos();
+ Cursor slowLocksCursor = root.setArray("slow-locks");
+ slowLockInfos.forEach(lockInfo -> setLockInfo(slowLocksCursor.addObject(), lockInfo, true));
}
@Override
@@ -80,6 +74,20 @@ public class LocksResponse extends HttpResponse {
return "application/json";
}
+ private void setLockInfo(Cursor lockInfoCursor, LockInfo lockInfo, boolean includeThreadInfo) {
+ if (includeThreadInfo) {
+ lockInfoCursor.setString("thread-name", lockInfo.getThreadName());
+ lockInfoCursor.setString("lock-path", lockInfo.getLockPath());
+ }
+ lockInfoCursor.setString("invoke-acquire-time", toString(lockInfo.getTimeAcquiredWasInvoked()));
+ lockInfoCursor.setLong("reentrancy-hold-count-on-acquire", lockInfo.getThreadHoldCountOnAcquire());
+ lockInfoCursor.setString("acquire-timeout", lockInfo.getAcquireTimeout().toString());
+ lockInfo.getTimeLockWasAcquired().ifPresent(instant -> lockInfoCursor.setString("lock-acquired-time", toString(instant)));
+ lockInfoCursor.setString("lock-state", lockInfo.getLockState().name());
+ lockInfo.getTimeTerminalStateWasReached().ifPresent(instant -> lockInfoCursor.setString("terminal-state-time", toString(instant)));
+ lockInfo.getStackTrace().ifPresent(stackTrace -> lockInfoCursor.setString("stack-trace", stackTrace));
+ }
+
private static String toString(Instant time) {
return Instant.ofEpochMilli(time.toEpochMilli()).toString();
}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockInfo.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockInfo.java
index b192210c3de..6e58c1ea81b 100644
--- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockInfo.java
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockInfo.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.curator.stats;
import java.time.Duration;
import java.time.Instant;
import java.util.Optional;
+import java.util.stream.Stream;
/**
* Information about a lock.
@@ -14,15 +15,18 @@ import java.util.Optional;
*/
public class LockInfo {
+ private final String threadName;
+ private final String lockPath;
private final int threadHoldCountOnAcquire;
private final Instant acquireInstant;
private final Duration timeout;
private volatile Optional<Instant> lockAcquiredInstant = Optional.empty();
private volatile Optional<Instant> terminalStateInstant = Optional.empty();
+ private volatile Optional<String> stackTrace = Optional.empty();
- public static LockInfo invokingAcquire(int holdCount, Duration timeout) {
- return new LockInfo(holdCount, timeout);
+ public static LockInfo invokingAcquire(String threadName, String lockPath, int holdCount, Duration timeout) {
+ return new LockInfo(threadName, lockPath, holdCount, timeout);
}
public enum LockState {
@@ -37,18 +41,28 @@ public class LockInfo {
private volatile LockState lockState = LockState.ACQUIRING;
- private LockInfo(int threadHoldCountOnAcquire, Duration timeout) {
+ private LockInfo(String threadName, String lockPath, int threadHoldCountOnAcquire, Duration timeout) {
+ this.threadName = threadName;
+ this.lockPath = lockPath;
this.threadHoldCountOnAcquire = threadHoldCountOnAcquire;
this.acquireInstant = Instant.now();
this.timeout = timeout;
}
+ public String getThreadName() { return threadName; }
+ public String getLockPath() { return lockPath; }
public int getThreadHoldCountOnAcquire() { return threadHoldCountOnAcquire; }
public Instant getTimeAcquiredWasInvoked() { return acquireInstant; }
public Duration getAcquireTimeout() { return timeout; }
public LockState getLockState() { return lockState; }
public Optional<Instant> getTimeLockWasAcquired() { return lockAcquiredInstant; }
public Optional<Instant> getTimeTerminalStateWasReached() { return terminalStateInstant; }
+ public Optional<String> getStackTrace() { return stackTrace; }
+
+ /** Get time from just before trying to acquire lock to the time the terminal state was reached, or ZERO. */
+ public Duration getTotalTime() {
+ return terminalStateInstant.map(instant -> Duration.between(acquireInstant, instant)).orElse(Duration.ZERO);
+ }
void timedOut() { setTerminalState(LockState.TIMED_OUT); }
void failedToAcquireReentrantLock() { setTerminalState(LockState.FAILED_TO_REENTER); }
@@ -63,4 +77,24 @@ public class LockInfo {
lockState = terminalState;
terminalStateInstant = Optional.of(Instant.now());
}
+
+ /** Fill in the stack trace starting at the caller's stack frame. */
+ void fillStackTrace() {
+ final int elementsToIgnore = 1;
+
+ var stackTrace = new StringBuilder();
+
+ StackTraceElement[] elements = Thread.currentThread().getStackTrace();
+ for (int i = elementsToIgnore; i < elements.length; ++i) {
+ Stream.of(elements).forEach(element ->
+ stackTrace.append(element.getClassName())
+ .append('(')
+ .append(element.getFileName())
+ .append(':')
+ .append(element.getLineNumber())
+ .append(")\n"));
+ }
+
+ this.stackTrace = Optional.of(stackTrace.toString());
+ }
}
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockInfo.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockInfo.java
index bba39e6dc49..5e77ec76290 100644
--- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockInfo.java
+++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockInfo.java
@@ -4,11 +4,12 @@ package com.yahoo.vespa.curator.stats;
import com.yahoo.vespa.curator.Lock;
import java.time.Duration;
+import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.PriorityQueue;
import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReentrantLock;
@@ -26,8 +27,10 @@ public class ThreadLockInfo {
private static final ConcurrentHashMap<Thread, ThreadLockInfo> locks = new ConcurrentHashMap<>();
- private static final int MAX_COMPLETED_LOCK_INFOS_SIZE = 10;
- private static final ConcurrentLinkedDeque<LockInfo> completedLockInfos = new ConcurrentLinkedDeque<>();
+ private static final int MAX_COMPLETED_LOCK_INFOS_SIZE = 5;
+ /** Would have used a thread-safe priority queue. */
+ private static final Object completedLockInfosMonitor = new Object();
+ private static final PriorityQueue<LockInfo> completedLockInfos = new PriorityQueue<>(Comparator.comparing(LockInfo::getTotalTime));
private static final ConcurrentHashMap<String, LockCounters> countersByLockPath = new ConcurrentHashMap<>();
@@ -43,6 +46,12 @@ public class ThreadLockInfo {
public static List<ThreadLockInfo> getThreadLockInfos() { return List.copyOf(locks.values()); }
+ public static List<LockInfo> getSlowLockInfos() {
+ synchronized (completedLockInfosMonitor) {
+ return List.copyOf(completedLockInfos);
+ }
+ }
+
/** Returns the per-thread singleton ThreadLockInfo. */
public static ThreadLockInfo getCurrentThreadLockInfo(String lockPath, ReentrantLock lock) {
return locks.computeIfAbsent(
@@ -68,7 +77,7 @@ public class ThreadLockInfo {
public void invokingAcquire(Duration timeout) {
lockCountersForPath.invokeAcquireCount.incrementAndGet();
lockCountersForPath.inCriticalRegionCount.incrementAndGet();
- lockInfos.add(LockInfo.invokingAcquire(lock.getHoldCount(), timeout));
+ lockInfos.add(LockInfo.invokingAcquire(getThreadName(), lockPath, lock.getHoldCount(), timeout));
}
/** Mutable method (see class doc) */
@@ -113,10 +122,15 @@ public class ThreadLockInfo {
LockInfo lockInfo = lockInfos.poll();
completeLockInfo.accept(lockInfo);
- if (completedLockInfos.size() >= MAX_COMPLETED_LOCK_INFOS_SIZE) {
- // This is thread-safe, as no-one but currentThread mutates completedLockInfos
- completedLockInfos.removeLast();
+ synchronized (completedLockInfosMonitor) {
+ if (completedLockInfos.size() < MAX_COMPLETED_LOCK_INFOS_SIZE) {
+ lockInfo.fillStackTrace();
+ completedLockInfos.add(lockInfo);
+ } else if (lockInfo.getTotalTime().compareTo(completedLockInfos.peek().getTotalTime()) > 0) {
+ completedLockInfos.poll();
+ lockInfo.fillStackTrace();
+ completedLockInfos.add(lockInfo);
+ }
}
- completedLockInfos.addFirst(lockInfo);
}
}