From 438b74814c8144f7284007a8680c277b03938e44 Mon Sep 17 00:00:00 2001 From: Håkon Hallingstad Date: Wed, 27 Apr 2022 15:24:39 +0200 Subject: Fix LockStats warnings: Lock releases may come out-of-order --- .../main/java/com/yahoo/vespa/curator/Lock.java | 20 +++++++---- .../yahoo/vespa/curator/stats/ThreadLockStats.java | 42 ++++++++++++++++++---- 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java index 3e9c586f43c..a5d2397dadb 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java @@ -13,7 +13,6 @@ import java.time.Instant; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; /** * A cluster-wide re-entrant mutex which is released on (the last symmetric) close. @@ -66,11 +65,16 @@ public class Lock implements Mutex { " to acquire lock '" + lockPath + "'"); } - invoke(+1L, threadLockStats::lockAcquired); + invoke(+1L, (lockPath, debug) -> threadLockStats.lockAcquired(debug), lockPath); + } + + @FunctionalInterface + private interface BiConsumer2 { + void accept(String lockPath, String debug); } // TODO(hakon): Remove once debugging is unnecessary - private void invoke(long reentryCountDiff, Consumer consumer) { + private void invoke(long reentryCountDiff, BiConsumer2 consumer, String lockPath) { long threadId = Thread.currentThread().getId(); final long sequenceNumber; final Map reentriesByThreadIdCopy; @@ -92,20 +96,22 @@ public class Lock implements Mutex { "@" + created + " Curator 0x" + Integer.toHexString(System.identityHashCode(curator)) + " lock " + lockPath + " #" + sequenceNumber + ", reentries by thread ID = " + reentriesByThreadIdCopy; - consumer.accept(debug); + consumer.accept(lockPath, debug); } @Override public void close() { ThreadLockStats threadLockStats = LockStats.getForCurrentThread(); // Update metrics now before release() to avoid double-counting time in locked state. - invoke(-1L, threadLockStats::preRelease); + // The lockPath must be sent down as close() may be invoked in an order not necessarily + // equal to the reverse order of acquires. + invoke(-1L, threadLockStats::preRelease, lockPath); try { mutex.release(); - threadLockStats.postRelease(); + threadLockStats.postRelease(lockPath); } catch (Exception e) { - threadLockStats.releaseFailed(); + threadLockStats.releaseFailed(lockPath); throw new RuntimeException("Exception releasing lock '" + lockPath + "'", e); } } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java index beda7eaa142..24305539be5 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java @@ -5,8 +5,8 @@ import com.yahoo.vespa.curator.Lock; import java.time.Duration; import java.util.HashSet; +import java.util.Iterator; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentLinkedDeque; @@ -113,8 +113,8 @@ public class ThreadLockStats { } /** Mutable method (see class doc) */ - public void preRelease(String debug) { - withLastLockAttempt(lockAttempt -> { + public void preRelease(String path, String debug) { + withLastLockAttemptFor(path, lockAttempt -> { // Note on the order of these two statement: Same concerns apply here as in lockAcquired(). if (!lockAttempt.isReentry()) { @@ -126,13 +126,13 @@ public class ThreadLockStats { } /** Mutable method (see class doc) */ - public void postRelease() { - removeLastLockAttempt(LockAttempt::postRelease); + public void postRelease(String lockPath) { + removeLastLockAttemptFor(lockPath, LockAttempt::postRelease); } /** Mutable method (see class doc) */ - public void releaseFailed() { - removeLastLockAttempt(LockAttempt::releaseFailed); + public void releaseFailed(String lockPath) { + removeLastLockAttemptFor(lockPath, LockAttempt::releaseFailed); } /** Mutable method (see class doc) */ @@ -240,4 +240,32 @@ public class ThreadLockStats { LockStats.getGlobal().maybeSample(lockAttempt); } + + private void withLastLockAttemptFor(String lockPath, Consumer consumer) { + Iterator lockAttemptIterator = lockAttemptsStack.descendingIterator(); + while (lockAttemptIterator.hasNext()) { + LockAttempt lockAttempt = lockAttemptIterator.next(); + if (lockAttempt.getLockPath().equals(lockPath)) { + consumer.accept(lockAttempt); + return; + } + } + + logger.warning("Unable to find any lock attempts for " + lockPath); + } + + private void removeLastLockAttemptFor(String lockPath, Consumer consumer) { + Iterator lockAttemptIterator = lockAttemptsStack.descendingIterator(); + while (lockAttemptIterator.hasNext()) { + LockAttempt lockAttempt = lockAttemptIterator.next(); + if (lockAttempt.getLockPath().equals(lockPath)) { + lockAttemptIterator.remove(); + consumer.accept(lockAttempt); + LockStats.getGlobal().maybeSample(lockAttempt); + return; + } + } + + logger.warning("Unable to remove last lock attempt as no locks were found for " + lockPath); + } } -- cgit v1.2.3 From 9e4cb8d801ef919ef4bb0152b92af8040ee33e46 Mon Sep 17 00:00:00 2001 From: Håkon Hallingstad Date: Wed, 27 Apr 2022 15:35:53 +0200 Subject: Test out-of-order release --- .../com/yahoo/vespa/curator/stats/LockTest.java | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java index e5a1ea7c683..524fcd3abd1 100644 --- a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java @@ -200,8 +200,46 @@ public class LockTest { assertEquals(lock2Path, lockAttempts.get(1).getLockPath()); assertEquals(LockAttempt.LockState.ACQUIRED, lockAttempts.get(1).getLockState()); + lock2.close(); lock.close(); + } + + @Test + public void locksReleaseOutOfOrder() throws Exception { + when(mutex.acquire(anyLong(), any())).thenReturn(true); + + lock.acquire(acquireTimeout); + assertLockMetricsIs(new LockMetrics().setAcquireCount(1) + .setCumulativeAcquireCount(1) + .setAcquireSucceededCount(1) + .setCumulativeAcquireSucceededCount(1)); + lock2.acquire(acquireTimeout); + assertLock2MetricsIs(new LockMetrics().setAcquireCount(1) + .setCumulativeAcquireCount(1) + .setAcquireSucceededCount(1) + .setCumulativeAcquireSucceededCount(1)); lock.close(); + assertLockMetricsIs(new LockMetrics().setAcquireCount(0) + .setCumulativeAcquireCount(1) + .setAcquireSucceededCount(0) + .setCumulativeAcquireSucceededCount(1) + .setReleaseCount(1) + .setCumulativeReleaseCount(1)); + + List threadLockStats = LockStats.getGlobal().getThreadLockStats(); + assertEquals(1, threadLockStats.size()); + List lockAttempts = threadLockStats.get(0).getOngoingLockAttempts(); + assertEquals(1, lockAttempts.size()); + assertEquals(lock2Path, lockAttempts.get(0).getLockPath()); + assertEquals(LockAttempt.LockState.ACQUIRED, lockAttempts.get(0).getLockState()); + + lock2.close(); + assertLock2MetricsIs(new LockMetrics().setAcquireCount(0) + .setCumulativeAcquireCount(1) + .setAcquireSucceededCount(0) + .setCumulativeAcquireSucceededCount(1) + .setReleaseCount(1) + .setCumulativeReleaseCount(1)); } @Test -- cgit v1.2.3