diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-10-03 15:02:53 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-10-03 15:02:53 +0200 |
commit | e925ef8b0a33ed0e67e09ca9320b386339e08cea (patch) | |
tree | 066150a4a9f829b3c4935e5f702bb1e17cfefa03 /zkfacade/src | |
parent | 58c15afd3ea77235e2ea26f4baa2e171574dd3b6 (diff) |
Move lock metrics to MetricsReporter
Adds two new metrics:
- The load of acquiring each lock path: The average number of threads waiting
to acquire the lock within the last minute (or unit of time). Aka the lock
queue (depth).
- The load of the lock for each lock path: The average number of threads
holding the lock within the last minute (or unit of time). This is always
<= 1. Aka the lock utilization.
Changes the LockCounters to LockMetrics, and exporting those once every minute
through MetricReporter which is designed for this.
Diffstat (limited to 'zkfacade/src')
17 files changed, 619 insertions, 215 deletions
diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java index f3fe0316682..6cbfa274c56 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java @@ -4,7 +4,6 @@ package com.yahoo.vespa.curator; import com.google.inject.Inject; import com.yahoo.cloud.config.ConfigserverConfig; import com.yahoo.io.IOUtils; -import com.yahoo.jdisc.Metric; import com.yahoo.net.HostName; import com.yahoo.path.Path; import com.yahoo.text.Utf8; @@ -67,31 +66,28 @@ public class Curator implements AutoCloseable { // All lock keys, to allow re-entrancy. This will grow forever, but this should be too slow to be a problem private final ConcurrentHashMap<Path, Lock> locks = new ConcurrentHashMap<>(); - private final Optional<Metric> metric; - /** Creates a curator instance from a comma-separated string of ZooKeeper host:port strings */ public static Curator create(String connectionSpec) { - return new Curator(connectionSpec, connectionSpec, Optional.empty(), Optional.of(ZK_CLIENT_CONFIG_FILE)); + return new Curator(connectionSpec, connectionSpec, Optional.of(ZK_CLIENT_CONFIG_FILE)); } // For testing only, use Optional.empty for clientConfigFile parameter to create default zookeeper client config public static Curator create(String connectionSpec, Optional<File> clientConfigFile) { - return new Curator(connectionSpec, connectionSpec, Optional.empty(), clientConfigFile); + return new Curator(connectionSpec, connectionSpec, clientConfigFile); } // Depend on ZooKeeperServer to make sure it is started first // TODO: Move zookeeperserver config out of configserverconfig (requires update of controller services.xml as well) @Inject - public Curator(ConfigserverConfig configserverConfig, Metric metric, VespaZooKeeperServer server) { - this(configserverConfig, Optional.of(metric), Optional.of(ZK_CLIENT_CONFIG_FILE)); + public Curator(ConfigserverConfig configserverConfig, VespaZooKeeperServer server) { + this(configserverConfig, Optional.of(ZK_CLIENT_CONFIG_FILE)); } - Curator(ConfigserverConfig configserverConfig, Optional<Metric> metric, Optional<File> clientConfigFile) { - this(createConnectionSpec(configserverConfig), createEnsembleConnectionSpec(configserverConfig), metric, clientConfigFile); + Curator(ConfigserverConfig configserverConfig, Optional<File> clientConfigFile) { + this(createConnectionSpec(configserverConfig), createEnsembleConnectionSpec(configserverConfig), clientConfigFile); } - private Curator(String connectionSpec, String zooKeeperEnsembleConnectionSpec, Optional<Metric> metric, - Optional<File> clientConfigFile) { + private Curator(String connectionSpec, String zooKeeperEnsembleConnectionSpec, Optional<File> clientConfigFile) { this(connectionSpec, zooKeeperEnsembleConnectionSpec, (retryPolicy) -> CuratorFrameworkFactory @@ -102,24 +98,20 @@ public class Curator implements AutoCloseable { .connectString(connectionSpec) .zookeeperFactory(new VespaZooKeeperFactory(createClientConfig(clientConfigFile))) .dontUseContainerParents() // TODO: Remove when we know ZooKeeper 3.5 works fine, consider waiting until Vespa 8 - .build(), - metric); + .build()); } protected Curator(String connectionSpec, String zooKeeperEnsembleConnectionSpec, - Function<RetryPolicy, CuratorFramework> curatorFactory, - Optional<Metric> metric) { + Function<RetryPolicy, CuratorFramework> curatorFactory) { this(connectionSpec, zooKeeperEnsembleConnectionSpec, curatorFactory, - new ExponentialBackoffRetry((int) BASE_SLEEP_TIME.toMillis(), MAX_RETRIES), - metric); + new ExponentialBackoffRetry((int) BASE_SLEEP_TIME.toMillis(), MAX_RETRIES)); } private Curator(String connectionSpec, String zooKeeperEnsembleConnectionSpec, Function<RetryPolicy, CuratorFramework> curatorFactory, - RetryPolicy retryPolicy, - Optional<Metric> metric) { + RetryPolicy retryPolicy) { this.connectionSpec = connectionSpec; this.retryPolicy = retryPolicy; this.curatorFramework = curatorFactory.apply(retryPolicy); @@ -132,7 +124,6 @@ public class Curator implements AutoCloseable { this.zooKeeperEnsembleConnectionSpec = zooKeeperEnsembleConnectionSpec; this.zooKeeperEnsembleCount = zooKeeperEnsembleConnectionSpec.split(",").length; - this.metric = metric; } private static String createConnectionSpec(ConfigserverConfig configserverConfig) { @@ -363,7 +354,7 @@ public class Curator implements AutoCloseable { /** Create and acquire a re-entrant lock in given path */ public Lock lock(Path path, Duration timeout) { create(path); - Lock lock = locks.computeIfAbsent(path, (pathArg) -> new Lock(pathArg.getAbsolute(), this, metric)); + Lock lock = locks.computeIfAbsent(path, (pathArg) -> new Lock(pathArg.getAbsolute(), this)); lock.acquire(timeout); return lock; } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java index 305e5d459fc..451389694c6 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.curator; import com.google.common.util.concurrent.UncheckedTimeoutException; -import com.yahoo.jdisc.Metric; import com.yahoo.path.Path; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.curator.stats.LockStats; @@ -10,8 +9,6 @@ import com.yahoo.vespa.curator.stats.ThreadLockStats; import org.apache.curator.framework.recipes.locks.InterProcessLock; import java.time.Duration; -import java.util.Map; -import java.util.Optional; import java.util.concurrent.TimeUnit; /** @@ -26,50 +23,46 @@ public class Lock implements Mutex { private final InterProcessLock mutex; private final String lockPath; - private final Optional<Metric> metric; - private final Optional<Metric.Context> metricContext; - public Lock(String lockPath, Curator curator, Optional<Metric> metric) { - this(lockPath, curator.createMutex(lockPath), metric); + public Lock(String lockPath, Curator curator) { + this(lockPath, curator.createMutex(lockPath)); } /** Public for testing only */ - public Lock(String lockPath, InterProcessLock mutex, Optional<Metric> metric) { + public Lock(String lockPath, InterProcessLock mutex) { this.lockPath = lockPath; this.mutex = mutex; - this.metric = metric; - this.metricContext = metric.map(aMetric -> aMetric.createContext(Map.of("lockPath", lockPath))); } /** Take the lock with the given timeout. This may be called multiple times from the same thread - each matched by a close */ public void acquire(Duration timeout) throws UncheckedTimeoutException { ThreadLockStats threadLockStats = LockStats.getForCurrentThread(); - threadLockStats.invokingAcquire(lockPath, timeout, metric, metricContext); + threadLockStats.invokingAcquire(lockPath, timeout); final boolean acquired; try { acquired = mutex.acquire(timeout.toMillis(), TimeUnit.MILLISECONDS); } catch (Exception e) { - threadLockStats.acquireFailed(lockPath); + threadLockStats.acquireFailed(); throw new RuntimeException("Exception acquiring lock '" + lockPath + "'", e); } if (!acquired) { - threadLockStats.acquireTimedOut(lockPath); + threadLockStats.acquireTimedOut(); throw new UncheckedTimeoutException("Timed out after waiting " + timeout + " to acquire lock '" + lockPath + "'"); } - threadLockStats.lockAcquired(lockPath); + threadLockStats.lockAcquired(); } @Override public void close() { try { mutex.release(); - LockStats.getForCurrentThread().lockReleased(lockPath); + LockStats.getForCurrentThread().lockReleased(); } catch (Exception e) { - LockStats.getForCurrentThread().lockReleaseFailed(lockPath); + LockStats.getForCurrentThread().lockReleaseFailed(); throw new RuntimeException("Exception releasing lock '" + lockPath + "'"); } } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java index 2ef42f539d2..3da7678c44e 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java @@ -137,7 +137,7 @@ public class MockCurator extends Curator { * This is not what ZooKeeper does. */ public MockCurator(boolean stableOrdering) { - super("", "", (retryPolicy) -> null, Optional.empty()); + super("", "", (retryPolicy) -> null); this.stableOrdering = stableOrdering; curatorFramework = new MockCuratorFramework(); curatorFramework.start(); diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/AtomicDurationSum.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/AtomicDurationSum.java new file mode 100644 index 00000000000..7addd661b79 --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/AtomicDurationSum.java @@ -0,0 +1,86 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator.stats; + +import java.time.Duration; +import java.util.concurrent.atomic.AtomicLong; + +/** + * An instance of {@link AtomicDurationSum} provides atomic operations on a {@link Duration} + * and integer counter doublet: You can add a duration and increment the counter atomically, + * and get the doublet and reset the duration and counter to zero atomically. + * + * <p>The duration and counter must be small: You can add the equivalent of 16M durations of 1 minute each: + * The cumulative duration must be between -17 and 17 years, and the maximum count is 16M. + * The duration will have millisecond resolution. Overflow of count affects duration.</p> + * + * <p>Motivation: Metrics are snapshot and reset to zero every minute. Durations of typically 1 minute + * are then summed to produce a cumulative {@link Duration} and an associated count, both of which are + * therefore small numbers and can be represented in a compact and atomic form. The alternative is to + * use synchronization (which is slow) or allow inconsistencies between the duration and count + * (e.g. a sum of 2 latencies but a count of 1 makes the metrics noisy).</p> + * + * @author hakon + */ +public class AtomicDurationSum { + + // Why 40? The duration-part requires 16 bits to represent 1 minute. If we require 1 minute + // durations can be added until both the duration-part and count-part are full, the remaining + // 48 bits must be divided equally, hence 16 + 24 = 40. Seems to give a nice balance. + static final long DURATION_BITS = 40; + static final long COUNT_BITS = Long.SIZE - DURATION_BITS; + static final long DURATION_MASK = -1L << COUNT_BITS; + static final long COUNT_MASK = -1L >>> DURATION_BITS; + // The most significant bit of duration is a sign bit, which complicates the initializer. + static final long MIN_DURATION = -1L << (DURATION_BITS - 1); + static final long MAX_DURATION = (DURATION_MASK << 1) >>> (COUNT_BITS + 1); + static final long MAX_COUNT = COUNT_MASK; + static final long MIN_COUNT = 0L; + + private static final long ZERO_DURATION_AND_COUNT = 0L; + + // Representation: + // - A signed long of 40 bits storing the duration in milliseconds + // - An unsigned int of 24 bits storing the count + private final AtomicLong encodedAtomic = new AtomicLong(ZERO_DURATION_AND_COUNT); + + /** Initializes to zero duration and count. */ + public AtomicDurationSum() {} + + /** Add duration and increment count. */ + void add(Duration duration) { + encodedAtomic.addAndGet(encodeDuration(duration) | 1L); + } + + public DurationSum get() { + long snapshot = encodedAtomic.get(); + return new DurationSum(decodeDuration(snapshot), decodeCount(snapshot)); + } + + /** Get the current {@link DurationSum} and reset the duration and count doublet to zero. */ + public DurationSum getAndReset() { + long snapshot = encodedAtomic.getAndSet(ZERO_DURATION_AND_COUNT); + return new DurationSum(decodeDuration(snapshot), decodeCount(snapshot)); + } + + static long encodeDuration(Duration duration) { + long millis = duration.toMillis(); + if (millis < MIN_DURATION || millis > MAX_DURATION) { + throw new IllegalArgumentException("Duration outside legal range: " + duration); + } + + return millis << COUNT_BITS; + } + + static Duration decodeDuration(long encoded) { + return Duration.ofMillis(encoded >> COUNT_BITS); + } + + static int decodeCount(long encoded) { + return (int) (encoded & COUNT_MASK); + } + + @Override + public String toString() { + return get().toString(); + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/DurationSum.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/DurationSum.java new file mode 100644 index 00000000000..3c1f0ec7f43 --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/DurationSum.java @@ -0,0 +1,41 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator.stats; + +import java.time.Duration; +import java.util.Optional; + +/** + * Represents a sum and count of Duration. + * + * @author hakon + */ +public class DurationSum { + + private final Duration duration; + private final int count; + + DurationSum(Duration duration, int count) { + this.duration = duration; + this.count = count; + } + + public Duration duration() { return duration; } + public int count() { return count; } + + public Optional<Duration> averageDuration() { + if (count <= 0) { + return Optional.empty(); + } + + long averageMillis = Math.round(duration.toMillis() / (double) count); + return Optional.of(Duration.ofMillis(averageMillis)); + } + + @Override + public String toString() { + return "DurationSum{" + + "duration=" + duration + + ", count=" + count + + '}'; + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyMetrics.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyMetrics.java new file mode 100644 index 00000000000..61807b5db19 --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyMetrics.java @@ -0,0 +1,39 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator.stats; + +/** + * Metrics on the latency of execution of some piece of code, e.g. the acquiring of a lock. + * + * @author hakon + */ +public class LatencyMetrics { + private final DurationSum cumulativeLatency; + private final float load; + + public LatencyMetrics(DurationSum cumulativeLatency, float load) { + this.cumulativeLatency = cumulativeLatency; + this.load = load; + } + + /** + * The total time spent by all threads accumulating latency in an implicit time period, + * e.g. a metric snapshot window, divided by the duration of the time period. + */ + public float load() { return load; } + + /** Returns the average latency in seconds with milliseconds resolution, or 0.0 by default. */ + public float averageInSeconds() { + return cumulativeLatency.averageDuration().map(average -> average.toMillis() / 1000f).orElse(0f); + } + + /** The number of latency-producing events. */ + public int count() { return cumulativeLatency.count(); } + + @Override + public String toString() { + return "LatencyMetrics{" + + "cumulativeLatency=" + cumulativeLatency + + ", load=" + load + + '}'; + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyStore.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyStore.java new file mode 100644 index 00000000000..0b1056ddeba --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyStore.java @@ -0,0 +1,56 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator.stats; + +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; + +/** + * Stores the latency of e.g. acquiring the lock. + * + * @author hakon + */ +public class LatencyStore { + + private final AtomicDurationSum latencySum = new AtomicDurationSum(); + private final Clock clock; + private volatile Instant startOfPeriod; + + LatencyStore() { this(Clock.systemUTC()); } + + LatencyStore(Clock clock) { + this.clock = clock; + startOfPeriod = clock.instant(); + } + + void reportLatency(Duration latency) { + latencySum.add(latency); + } + + public LatencyMetrics getLatencyMetrics() { + return makeMetricsForPeriod(latencySum.get(), startOfPeriod, clock.instant()); + } + + public LatencyMetrics getAndResetLatencyMetrics() { + Instant newStartOfPeriod = clock.instant(); + DurationSum latencySumOfPeriod = latencySum.getAndReset(); + LatencyMetrics latencyMetrics = makeMetricsForPeriod(latencySumOfPeriod, startOfPeriod, newStartOfPeriod); + startOfPeriod = newStartOfPeriod; + return latencyMetrics; + } + + private static LatencyMetrics makeMetricsForPeriod(DurationSum latencySum, Instant start, Instant end) { + long millisPeriod = Duration.between(start, end).toMillis(); + long normalizedMillisPeriod = Math.max(1L, millisPeriod); + double load = Math.round(latencySum.duration().toMillis() * 1000.0 / normalizedMillisPeriod) / 1000.0; + return new LatencyMetrics(latencySum, (float) load); + } + + @Override + public String toString() { + return "LatencyStore{" + + "latencySum=" + latencySum + + ", startOfPeriod=" + startOfPeriod + + '}'; + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java index bec7c08b9b3..e25470ac227 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java @@ -1,8 +1,6 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.curator.stats; -import com.yahoo.jdisc.Metric; - import java.time.Duration; import java.time.Instant; import java.util.ArrayList; @@ -23,17 +21,16 @@ public class LockAttempt { private final String lockPath; private final Instant callAcquireInstant; private final Duration timeout; - private final Optional<Metric> metric; - private final Optional<Metric.Context> metricContext; + private final LockMetrics lockMetrics; private final List<LockAttempt> nestedLockAttempts = new ArrayList<>(); private volatile Optional<Instant> lockAcquiredInstant = Optional.empty(); private volatile Optional<Instant> terminalStateInstant = Optional.empty(); private volatile Optional<String> stackTrace = Optional.empty(); - public static LockAttempt invokingAcquire(ThreadLockStats threadLockStats, String lockPath, Duration timeout, - Optional<Metric> metric, Optional<Metric.Context> metricContext) { - return new LockAttempt(threadLockStats, lockPath, timeout, Instant.now(), metric, metricContext); + public static LockAttempt invokingAcquire(ThreadLockStats threadLockStats, String lockPath, + Duration timeout, LockMetrics lockMetrics) { + return new LockAttempt(threadLockStats, lockPath, timeout, Instant.now(), lockMetrics); } public enum LockState { @@ -49,16 +46,15 @@ public class LockAttempt { private volatile LockState lockState = LockState.ACQUIRING; - private LockAttempt(ThreadLockStats threadLockStats, String lockPath, Duration timeout, Instant callAcquireInstant, - Optional<Metric> metric, Optional<Metric.Context> metricContext) { + private LockAttempt(ThreadLockStats threadLockStats, String lockPath, Duration timeout, + Instant callAcquireInstant, LockMetrics lockMetrics) { this.threadLockStats = threadLockStats; this.lockPath = lockPath; this.callAcquireInstant = callAcquireInstant; this.timeout = timeout; - this.metric = metric; - this.metricContext = metricContext; + this.lockMetrics = lockMetrics; - addToMetric("lockAttempt.acquiring", 1); + lockMetrics.acquireInvoked(); } public String getThreadName() { return threadLockStats.getThreadName(); } @@ -73,8 +69,6 @@ public class LockAttempt { public Optional<Instant> getTimeTerminalStateWasReached() { return terminalStateInstant; } public Optional<String> getStackTrace() { return stackTrace; } public List<LockAttempt> getNestedLockAttempts() { return List.copyOf(nestedLockAttempts); } - public Optional<Metric> metric() { return metric; } - public Optional<Metric.Context> metricContext() { return metricContext; } public Duration getDurationOfAcquire() { return Duration.between(callAcquireInstant, getTimeAcquireEndedOrNow()); } @@ -105,39 +99,28 @@ public class LockAttempt { void acquireFailed() { setTerminalState(LockState.ACQUIRE_FAILED); - addToMetric("lockAttempt.acquiring", -1); - addToMetric("lockAttempt.acquireFailed", 1); - setMetricTo("lockAttempt.acquiringLatency", getDurationOfAcquire().toMillis() / 1000.); + lockMetrics.acquireFailed(getDurationOfAcquire()); } void timedOut() { setTerminalState(LockState.TIMED_OUT); - addToMetric("lockAttempt.acquiring", -1); - addToMetric("lockAttempt.acquireTimedOut", 1); - setMetricTo("lockAttempt.acquiringLatency", getDurationOfAcquire().toMillis() / 1000.); + lockMetrics.acquireTimedOut(getDurationOfAcquire()); } void lockAcquired() { lockState = LockState.ACQUIRED; lockAcquiredInstant = Optional.of(Instant.now()); - addToMetric("lockAttempt.acquiring", -1); - addToMetric("lockAttempt.acquired", 1); - setMetricTo("lockAttempt.locked", 1); - setMetricTo("lockAttempt.acquiringLatency", getDurationOfAcquire().toMillis() / 1000.); + lockMetrics.lockAcquired(getDurationOfAcquire()); } void released() { setTerminalState(LockState.RELEASED); - setMetricTo("lockAttempt.locked", 0); - addToMetric("lockAttempt.released", 1); - setMetricTo("lockAttempt.lockedLatency", getDurationWithLock().toMillis() / 1000.); + lockMetrics.release(getDurationWithLock(), getDuration()); } void releasedWithError() { setTerminalState(LockState.RELEASED_WITH_ERROR); - setMetricTo("lockAttempt.locked", 0); - addToMetric("lockAttempt.releaseError", 1); - setMetricTo("lockAttempt.lockedLatency", getDurationWithLock().toMillis() / 1000.); + lockMetrics.releaseFailed(getDurationWithLock(), getDuration()); } void setTerminalState(LockState terminalState) { setTerminalState(terminalState, Instant.now()); } @@ -146,16 +129,4 @@ public class LockAttempt { lockState = terminalState; terminalStateInstant = Optional.of(instant); } - - private void addToMetric(String key, Number value) { - if (metric.isPresent() && metricContext.isPresent()) { - metric.get().add(key, value, metricContext.get()); - } - } - - private void setMetricTo(String key, Number value) { - if (metric.isPresent() && metricContext.isPresent()) { - metric.get().set(key, value, metricContext.get()); - } - } } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockCounters.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockCounters.java deleted file mode 100644 index 561ea9a7ed2..00000000000 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockCounters.java +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.curator.stats; - -import java.util.Objects; -import java.util.concurrent.atomic.AtomicInteger; - -/** - * A collection of counters for events related to lock acquisition and release. - * - * @author hakon - */ -public class LockCounters { - final AtomicInteger invokeAcquireCount = new AtomicInteger(0); - final AtomicInteger inCriticalRegionCount = new AtomicInteger(0); - final AtomicInteger acquireFailedCount = new AtomicInteger(0); - final AtomicInteger acquireTimedOutCount = new AtomicInteger(0); - final AtomicInteger lockAcquiredCount = new AtomicInteger(0); - final AtomicInteger locksReleasedCount = new AtomicInteger(0); - - final AtomicInteger noLocksErrorCount = new AtomicInteger(0); - final AtomicInteger lockReleaseErrorCount = new AtomicInteger(0); - - public int invokeAcquireCount() { return invokeAcquireCount.get(); } - public int inCriticalRegionCount() { return inCriticalRegionCount.get(); } - public int acquireFailedCount() { return acquireFailedCount.get(); } - public int acquireTimedOutCount() { return acquireTimedOutCount.get(); } - public int lockAcquiredCount() { return lockAcquiredCount.get(); } - public int locksReleasedCount() { return locksReleasedCount.get(); } - public int noLocksErrorCount() { return noLocksErrorCount.get(); } - public int lockReleaseErrorCount() { return lockReleaseErrorCount.get(); } - - @Override - public String toString() { - return "LockCounters{" + - "invokeAcquireCount=" + invokeAcquireCount + - ", inCriticalRegionCount=" + inCriticalRegionCount + - ", acquireFailedCount=" + acquireFailedCount + - ", acquireTimedOutCount=" + acquireTimedOutCount + - ", lockAcquiredCount=" + lockAcquiredCount + - ", locksReleasedCount=" + locksReleasedCount + - ", noLocksErrorCount=" + noLocksErrorCount + - ", locksReleaseErrorCount=" + lockReleaseErrorCount + - '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - LockCounters that = (LockCounters) o; - return invokeAcquireCount.get() == that.invokeAcquireCount.get() && - inCriticalRegionCount.get() == that.inCriticalRegionCount.get() && - acquireFailedCount.get() == that.acquireFailedCount.get() && - acquireTimedOutCount.get() == that.acquireTimedOutCount.get() && - lockAcquiredCount.get() == that.lockAcquiredCount.get() && - locksReleasedCount.get() == that.locksReleasedCount.get() && - noLocksErrorCount.get() == that.noLocksErrorCount.get() && - lockReleaseErrorCount.get() == that.lockReleaseErrorCount.get(); - } - - @Override - public int hashCode() { - return Objects.hash(invokeAcquireCount, inCriticalRegionCount, acquireFailedCount, acquireTimedOutCount, - lockAcquiredCount, locksReleasedCount, noLocksErrorCount, lockReleaseErrorCount); - } -} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockMetrics.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockMetrics.java new file mode 100644 index 00000000000..a5bf0124a54 --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockMetrics.java @@ -0,0 +1,138 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator.stats; + +import java.time.Duration; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * A collection of counters for events related to lock acquisition and release. + * + * @author hakon + */ +public class LockMetrics { + private final AtomicInteger acquireCount = new AtomicInteger(0); + private final AtomicInteger acquireFailedCount = new AtomicInteger(0); + private final AtomicInteger acquireTimedOutCount = new AtomicInteger(0); + private final AtomicInteger acquireSucceededCount = new AtomicInteger(0); + private final AtomicInteger releaseCount = new AtomicInteger(0); + private final AtomicInteger releaseFailedCount = new AtomicInteger(0); + + private final AtomicInteger cumulativeAcquireCount = new AtomicInteger(0); + private final AtomicInteger cumulativeAcquireFailedCount = new AtomicInteger(0); + private final AtomicInteger cumulativeAcquireTimedOutCount = new AtomicInteger(0); + private final AtomicInteger cumulativeAcquireSucceededCount = new AtomicInteger(0); + private final AtomicInteger cumulativeReleaseCount = new AtomicInteger(0); + private final AtomicInteger cumulativeReleaseFailedCount = new AtomicInteger(0); + + private final AtomicInteger acquiringNow = new AtomicInteger(0); + private final AtomicInteger lockedNow = new AtomicInteger(0); + + private final LatencyStore acquireLatencyStore = new LatencyStore(); + private final LatencyStore lockedLatencyStore = new LatencyStore(); + + void acquireInvoked() { + acquireCount.incrementAndGet(); + cumulativeAcquireCount.incrementAndGet(); + acquiringNow.incrementAndGet(); + } + + void acquireFailed(Duration acquireLatency) { + acquiringNow.decrementAndGet(); + acquireFailedCount.incrementAndGet(); + cumulativeAcquireFailedCount.incrementAndGet(); + acquireLatencyStore.reportLatency(acquireLatency); + } + + void acquireTimedOut(Duration acquireLatency) { + acquiringNow.decrementAndGet(); + acquireTimedOutCount.incrementAndGet(); + cumulativeAcquireTimedOutCount.incrementAndGet(); + acquireLatencyStore.reportLatency(acquireLatency); + } + + void lockAcquired(Duration acquireLatency) { + acquiringNow.decrementAndGet(); + acquireSucceededCount.incrementAndGet(); + cumulativeAcquireSucceededCount.incrementAndGet(); + acquireLatencyStore.reportLatency(acquireLatency); + lockedNow.incrementAndGet(); + } + + void release(Duration lockedLatency, Duration totalLatency) { + lockedNow.decrementAndGet(); + releaseCount.incrementAndGet(); + cumulativeReleaseCount.incrementAndGet(); + lockedLatencyStore.reportLatency(lockedLatency); + } + + void releaseFailed(Duration lockedLatency, Duration totalLatency) { + release(lockedLatency, totalLatency); + releaseFailedCount.incrementAndGet(); + cumulativeReleaseFailedCount.incrementAndGet(); + } + + public int getAndResetAcquireCount() { return acquireCount.getAndSet(0); } + public int getAndResetAcquireFailedCount() { return acquireFailedCount.getAndSet(0); } + public int getAndResetAcquireTimedOutCount() { return acquireTimedOutCount.getAndSet(0); } + public int getAndResetAcquireSucceededCount() { return acquireSucceededCount.getAndSet(0); } + public int getAndResetReleaseCount() { return releaseCount.getAndSet(0); } + public int getAndResetReleaseFailedCount() { return releaseFailedCount.getAndSet(0); } + + public int getCumulativeAcquireCount() { return cumulativeAcquireCount.get(); } + public int getCumulativeAcquireFailedCount() { return cumulativeAcquireFailedCount.get(); } + public int getCumulativeAcquireTimedOutCount() { return cumulativeAcquireTimedOutCount.get(); } + public int getCumulativeAcquireSucceededCount() { return cumulativeAcquireSucceededCount.get(); } + public int getCumulativeReleaseCount() { return cumulativeReleaseCount.get(); } + public int getCumulativeReleaseFailedCount() { return cumulativeReleaseFailedCount.get(); } + + public int getAcquiringNow() { return acquiringNow.get(); } + public int getLockedNow() { return lockedNow.get(); } + + public LatencyMetrics getAcquireLatencyMetrics() { return acquireLatencyStore.getLatencyMetrics(); } + public LatencyMetrics getLockedLatencyMetrics() { return lockedLatencyStore.getLatencyMetrics(); } + + public LatencyMetrics getAndResetAcquireLatencyMetrics() { return acquireLatencyStore.getAndResetLatencyMetrics(); } + public LatencyMetrics getAndResetLockedLatencyMetrics() { return lockedLatencyStore.getAndResetLatencyMetrics(); } + + // For tests + void setAcquireCount(int count) { acquireCount.set(count); } + void setAcquireFailedCount(int count) { acquireFailedCount.set(count); } + void setAcquireTimedOutCount(int count) { acquireTimedOutCount.set(count); } + void setAcquireSucceededCount(int count) { acquireSucceededCount.set(count); } + void setReleaseCount(int count) { releaseCount.set(count); } + void setReleaseFailedCount(int count) { releaseFailedCount.set(count); } + + // For tests + void setCumulativeAcquireCount(int count) { cumulativeAcquireCount.set(count); } + void setCumulativeAcquireFailedCount(int count) { cumulativeAcquireFailedCount.set(count); } + void setCumulativeAcquireTimedOutCount(int count) { cumulativeAcquireTimedOutCount.set(count); } + void setCumulativeAcquireSucceededCount(int count) { cumulativeAcquireSucceededCount.set(count); } + void setCumulativeReleaseCount(int count) { cumulativeReleaseCount.set(count); } + void setCumulativeReleaseFailedCount(int count) { cumulativeReleaseFailedCount.set(count); } + + // For tests + void setAcquiringNow(int count) { acquiringNow.set(count); } + void setLockedNow(int count) { lockedNow.set(count); } + + @Override + public String toString() { + return "LockMetrics{" + + "acquireCount=" + acquireCount + + ", acquireFailedCount=" + acquireFailedCount + + ", acquireTimedOutCount=" + acquireTimedOutCount + + ", acquireSucceededCount=" + acquireSucceededCount + + ", releaseCount=" + releaseCount + + ", releaseFailedCount=" + releaseFailedCount + + ", cumulativeAcquireCount=" + cumulativeAcquireCount + + ", cumulativeAcquireFailedCount=" + cumulativeAcquireFailedCount + + ", cumulativeAcquireTimedOutCount=" + cumulativeAcquireTimedOutCount + + ", cumulativeAcquireSucceededCount=" + cumulativeAcquireSucceededCount + + ", cumulativeReleaseCount=" + cumulativeReleaseCount + + ", cumulativeReleaseFailedCount=" + cumulativeReleaseFailedCount + + ", acquiringNow=" + acquiringNow + + ", lockedNow=" + lockedNow + + ", acquireLatencyStore=" + acquireLatencyStore + + ", lockedLatencyStore=" + lockedLatencyStore + + '}'; + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java index 3cb5af80c96..ee464e0918d 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java @@ -27,7 +27,7 @@ public class LockStats { private final PriorityQueue<RecordedLockAttempts> interestingRecordings = new PriorityQueue<>(MAX_RECORDINGS, Comparator.comparing(RecordedLockAttempts::duration)); - private final ConcurrentHashMap<String, LockCounters> countersByLockPath = new ConcurrentHashMap<>(); + private final ConcurrentHashMap<String, LockMetrics> metricsByLockPath = new ConcurrentHashMap<>(); /** Returns global stats. */ public static LockStats getGlobal() { return stats; } @@ -37,13 +37,13 @@ public class LockStats { return stats.statsByThread.computeIfAbsent(Thread.currentThread(), ThreadLockStats::new); } - static void clearForTesting() { + public static void clearForTesting() { stats = new LockStats(); } private LockStats() {} - public Map<String, LockCounters> getLockCountersByPath() { return Map.copyOf(countersByLockPath); } + public Map<String, LockMetrics> getLockMetricsByPath() { return Map.copyOf(metricsByLockPath); } public List<ThreadLockStats> getThreadLockStats() { return List.copyOf(statsByThread.values()); } public List<LockAttempt> getLockAttemptSamples() { return completedLockAttemptSamples.asList(); } @@ -53,8 +53,8 @@ public class LockStats { } } - LockCounters getLockCounters(String lockPath) { - return countersByLockPath.computeIfAbsent(lockPath, __ -> new LockCounters()); + LockMetrics getLockMetrics(String lockPath) { + return metricsByLockPath.computeIfAbsent(lockPath, __ -> new LockMetrics()); } void maybeSample(LockAttempt lockAttempt) { diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java index 90a2c2f8d25..cd6ae3b1e68 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java @@ -1,7 +1,6 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.curator.stats; -import com.yahoo.jdisc.Metric; import com.yahoo.vespa.curator.Lock; import java.time.Duration; @@ -9,6 +8,7 @@ import java.util.List; import java.util.Optional; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.function.Consumer; +import java.util.logging.Logger; /** * This class manages thread-specific statistics and information related to acquiring and releasing @@ -20,6 +20,8 @@ import java.util.function.Consumer; */ public class ThreadLockStats { + private static final Logger logger = Logger.getLogger(ThreadLockStats.class.getName()); + private final Thread thread; /** @@ -61,12 +63,8 @@ public class ThreadLockStats { public Optional<RecordedLockAttempts> getOngoingRecording() { return ongoingRecording; } /** Mutable method (see class doc) */ - public void invokingAcquire(String lockPath, Duration timeout, - Optional<Metric> metric, Optional<Metric.Context> metricContext) { - LockCounters lockCounters = getGlobalLockCounters(lockPath); - lockCounters.invokeAcquireCount.incrementAndGet(); - lockCounters.inCriticalRegionCount.incrementAndGet(); - LockAttempt lockAttempt = LockAttempt.invokingAcquire(this, lockPath, timeout, metric, metricContext); + public void invokingAcquire(String lockPath, Duration timeout) { + LockAttempt lockAttempt = LockAttempt.invokingAcquire(this, lockPath, timeout, getGlobalLockMetrics(lockPath)); LockAttempt lastLockAttempt = lockAttemptsStack.peekLast(); if (lastLockAttempt == null) { @@ -78,42 +76,34 @@ public class ThreadLockStats { } /** Mutable method (see class doc) */ - public void acquireFailed(String lockPath) { - LockCounters lockCounters = getGlobalLockCounters(lockPath); - lockCounters.acquireFailedCount.incrementAndGet(); - removeLastLockAttempt(lockCounters, LockAttempt::acquireFailed); + public void acquireFailed() { + removeLastLockAttempt(LockAttempt::acquireFailed); } /** Mutable method (see class doc) */ - public void acquireTimedOut(String lockPath) { - LockCounters lockCounters = getGlobalLockCounters(lockPath); - - lockCounters.acquireTimedOutCount.incrementAndGet(); - removeLastLockAttempt(lockCounters, LockAttempt::timedOut); + public void acquireTimedOut() { + removeLastLockAttempt(LockAttempt::timedOut); } /** Mutable method (see class doc) */ - public void lockAcquired(String lockPath) { - getGlobalLockCounters(lockPath).lockAcquiredCount.incrementAndGet(); + public void lockAcquired() { LockAttempt lockAttempt = lockAttemptsStack.peekLast(); if (lockAttempt == null) { - throw new IllegalStateException("lockAcquired invoked without lockAttempts"); + logger.warning("Unable to get last lock attempt as the lock attempt stack is empty"); + return; } + lockAttempt.lockAcquired(); } /** Mutable method (see class doc) */ - public void lockReleased(String lockPath) { - LockCounters lockCounters = getGlobalLockCounters(lockPath); - lockCounters.locksReleasedCount.incrementAndGet(); - removeLastLockAttempt(lockCounters, LockAttempt::released); + public void lockReleased() { + removeLastLockAttempt(LockAttempt::released); } /** Mutable method (see class doc) */ - public void lockReleaseFailed(String lockPath) { - LockCounters lockCounters = getGlobalLockCounters(lockPath); - lockCounters.lockReleaseErrorCount.incrementAndGet(); - removeLastLockAttempt(lockCounters, LockAttempt::releasedWithError); + public void lockReleaseFailed() { + removeLastLockAttempt(LockAttempt::releasedWithError); } /** Mutable method (see class doc) */ @@ -133,19 +123,17 @@ public class ThreadLockStats { } } - private LockCounters getGlobalLockCounters(String lockPath) { - return LockStats.getGlobal().getLockCounters(lockPath); + private LockMetrics getGlobalLockMetrics(String lockPath) { + return LockStats.getGlobal().getLockMetrics(lockPath); } - private void removeLastLockAttempt(LockCounters lockCounters, Consumer<LockAttempt> completeLockAttempt) { - lockCounters.inCriticalRegionCount.decrementAndGet(); - - if (lockAttemptsStack.isEmpty()) { - lockCounters.noLocksErrorCount.incrementAndGet(); + private void removeLastLockAttempt(Consumer<LockAttempt> completeLockAttempt) { + LockAttempt lockAttempt = lockAttemptsStack.pollLast(); + if (lockAttempt == null) { + logger.warning("Unable to remove last lock attempt as the lock attempt stack is empty"); return; } - LockAttempt lockAttempt = lockAttemptsStack.pollLast(); completeLockAttempt.accept(lockAttempt); LockStats.getGlobal().maybeSample(lockAttempt); diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/CuratorTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/CuratorTest.java index 06440098c52..2bf40c4e2bb 100644 --- a/zkfacade/src/test/java/com/yahoo/vespa/curator/CuratorTest.java +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/CuratorTest.java @@ -99,7 +99,7 @@ public class CuratorTest { } private Curator createCurator(ConfigserverConfig configserverConfig) { - return new Curator(configserverConfig, Optional.empty(), Optional.empty()); + return new Curator(configserverConfig, Optional.empty()); } private static class PortAllocator { diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/AtomicDurationSumTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/AtomicDurationSumTest.java new file mode 100644 index 00000000000..f1f9ce5950b --- /dev/null +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/AtomicDurationSumTest.java @@ -0,0 +1,80 @@ +package com.yahoo.vespa.curator.stats;// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +import org.junit.Test; + +import java.time.Duration; + +import static org.junit.Assert.assertEquals; + +/** + * @author hakon + */ +public class AtomicDurationSumTest { + private final AtomicDurationSum atomicDurationSum = new AtomicDurationSum(); + + @Test + public void test() { + assertAtomicDurationSum(Duration.ZERO, 0); + atomicDurationSum.add(Duration.ofMillis(3)); + assertAtomicDurationSum(Duration.ofMillis(3), 1); + atomicDurationSum.add(Duration.ofMillis(5)); + assertAtomicDurationSum(Duration.ofMillis(8), 2); + assertEquals(0.004, atomicDurationSum.get().averageDuration().get().toMillis() / 1000., 0.00001); + + DurationSum durationSum = atomicDurationSum.getAndReset(); + assertEquals(Duration.ofMillis(8), durationSum.duration()); + assertEquals(2, durationSum.count()); + assertAtomicDurationSum(Duration.ZERO, 0); + } + + @Test + public void testNegatives() { + atomicDurationSum.add(Duration.ofMillis(-1)); + assertAtomicDurationSum(Duration.ofMillis(-1), 1); + } + + private void assertAtomicDurationSum(Duration expectedDuration, int expectedCount) { + DurationSum durationSum = atomicDurationSum.get(); + assertEquals(expectedDuration, durationSum.duration()); + assertEquals(expectedCount, durationSum.count()); + } + + @Test + public void encoding() { + assertEquals(40, AtomicDurationSum.DURATION_BITS); + assertEquals(24, AtomicDurationSum.COUNT_BITS); + + assertEquals(0xFFFFFFFFFF000000L, AtomicDurationSum.DURATION_MASK); + assertEquals(0x0000000000FFFFFFL, AtomicDurationSum.COUNT_MASK); + + // duration is signed + assertEquals(0xFFFFFF8000000000L, AtomicDurationSum.MIN_DURATION); + assertEquals(0x0000007FFFFFFFFFL, AtomicDurationSum.MAX_DURATION); + + // count is unsigned + assertEquals(0x0000000000000000L, AtomicDurationSum.MIN_COUNT); + assertEquals(0x0000000000FFFFFFL, AtomicDurationSum.MAX_COUNT); + + assertDurationEncoding(Duration.ZERO); + assertDurationEncoding(Duration.ofMillis(1)); + assertDurationEncoding(Duration.ofMillis(-1)); + assertDurationEncoding(Duration.ofMillis(AtomicDurationSum.MIN_DURATION)); + assertDurationEncoding(Duration.ofMillis(AtomicDurationSum.MAX_DURATION)); + + assertCountEncoding(1L); + assertCountEncoding(AtomicDurationSum.MIN_COUNT); + assertCountEncoding(AtomicDurationSum.MAX_COUNT); + assertEquals(0L, AtomicDurationSum.decodeCount(AtomicDurationSum.MAX_COUNT + 1)); + } + + private void assertDurationEncoding(Duration duration) { + long encoded = AtomicDurationSum.encodeDuration(duration); + Duration decodedDuration = AtomicDurationSum.decodeDuration(encoded); + assertEquals(duration, decodedDuration); + } + + private void assertCountEncoding(long count) { + int actualCount = AtomicDurationSum.decodeCount(count); + assertEquals(count, actualCount); + } +}
\ No newline at end of file diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LatencyStoreTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LatencyStoreTest.java new file mode 100644 index 00000000000..119dccca229 --- /dev/null +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LatencyStoreTest.java @@ -0,0 +1,52 @@ +package com.yahoo.vespa.curator.stats;// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +import com.yahoo.test.ManualClock; +import org.junit.Test; + +import java.time.Duration; + +import static org.junit.Assert.assertEquals; + +/** + * @author hakon + */ +public class LatencyStoreTest { + private final ManualClock clock = new ManualClock(); + private final LatencyStore store = new LatencyStore(clock); + + @Test + public void verifyDefaultAndEmpty() { + assertGetLatencyMetrics(0, 0.000f, 0f); + assertGetAndResetLatencyMetrics(0, 0.000f, 0f); + assertGetLatencyMetrics(0, 0.000f, 0f); + } + + @Test + public void commonCase() { + store.reportLatency(Duration.ofMillis(2)); + store.reportLatency(Duration.ofMillis(6)); + clock.advance(Duration.ofMillis(2)); + assertGetLatencyMetrics(2, 0.004f, 4f); + clock.advance(Duration.ofMillis(14)); + assertGetAndResetLatencyMetrics(2, 0.004f, 0.5f); + assertGetLatencyMetrics(0, 0.000f, 0f); + } + + private void assertGetLatencyMetrics(int count, float average, float load) { + LatencyMetrics latencyMetrics = store.getLatencyMetrics(); + assertEquals(count, latencyMetrics.count()); + assertDoubleEquals(average, latencyMetrics.averageInSeconds()); + assertDoubleEquals(load, latencyMetrics.load()); + } + + private void assertGetAndResetLatencyMetrics(int count, float average, float load) { + LatencyMetrics latencyMetrics = store.getAndResetLatencyMetrics(); + assertEquals(count, latencyMetrics.count()); + assertDoubleEquals(average, latencyMetrics.averageInSeconds()); + assertDoubleEquals(load, latencyMetrics.load()); + } + + private static void assertDoubleEquals(float expected, float actual) { + assertEquals(expected, actual, 1e-6); + } +}
\ No newline at end of file diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockAttemptSamplesTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockAttemptSamplesTest.java index bc731169f96..252a90f8bb4 100644 --- a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockAttemptSamplesTest.java +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockAttemptSamplesTest.java @@ -6,7 +6,6 @@ import org.junit.Test; import java.time.Duration; import java.time.Instant; import java.util.Map; -import java.util.Optional; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; @@ -54,8 +53,8 @@ public class LockAttemptSamplesTest { } private boolean maybeSample(String lockPath, int secondsDuration) { - LockAttempt lockAttempt = LockAttempt.invokingAcquire(threadLockStats, lockPath, Duration.ofSeconds(1), - Optional.empty(), Optional.empty()); + LockAttempt lockAttempt = LockAttempt.invokingAcquire(threadLockStats, lockPath, + Duration.ofSeconds(1), new LockMetrics()); Instant instant = lockAttempt.getTimeAcquiredWasInvoked().plus(Duration.ofSeconds(secondsDuration)); lockAttempt.setTerminalState(LockAttempt.LockState.RELEASED, instant); return samples.maybeSample(lockAttempt); diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java index a1f95dc0735..a7715eb9756 100644 --- a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java @@ -8,10 +8,10 @@ import org.junit.Test; import java.time.Duration; import java.util.List; -import java.util.Map; import java.util.Optional; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -27,7 +27,7 @@ public class LockTest { private final InterProcessLock mutex = mock(InterProcessLock.class); private final String lockPath = "/lock/path"; private final Duration acquireTimeout = Duration.ofSeconds(10); - private final Lock lock = new Lock(lockPath, mutex, Optional.empty()); + private final Lock lock = new Lock(lockPath, mutex); @Before public void setUp() { @@ -46,10 +46,12 @@ public class LockTest { assertSame(e.getCause(), exception); } - var expectedCounters = new LockCounters(); - expectedCounters.invokeAcquireCount.set(1); - expectedCounters.acquireFailedCount.set(1); - assertEquals(Map.of(lockPath, expectedCounters), LockStats.getGlobal().getLockCountersByPath()); + var expectedMetrics = new LockMetrics(); + expectedMetrics.setAcquireCount(1); + expectedMetrics.setCumulativeAcquireCount(1); + expectedMetrics.setAcquireFailedCount(1); + expectedMetrics.setCumulativeAcquireFailedCount(1); + assertLockMetrics(expectedMetrics); List<LockAttempt> slowLockAttempts = LockStats.getGlobal().getLockAttemptSamples(); assertEquals(1, slowLockAttempts.size()); @@ -67,6 +69,28 @@ public class LockTest { assertEquals(0, threadLockStats.getOngoingLockAttempts().size()); } + private void assertLockMetrics(LockMetrics expected) { + LockMetrics actual = LockStats.getGlobal().getLockMetricsByPath().get(lockPath); + assertNotNull(actual); + + assertEquals(expected.getCumulativeAcquireCount(), actual.getCumulativeAcquireCount()); + assertEquals(expected.getCumulativeAcquireFailedCount(), actual.getCumulativeAcquireFailedCount()); + assertEquals(expected.getCumulativeAcquireTimedOutCount(), actual.getCumulativeAcquireTimedOutCount()); + assertEquals(expected.getCumulativeAcquireSucceededCount(), actual.getCumulativeAcquireSucceededCount()); + assertEquals(expected.getCumulativeReleaseCount(), actual.getCumulativeReleaseCount()); + assertEquals(expected.getCumulativeReleaseFailedCount(), actual.getCumulativeReleaseFailedCount()); + + assertEquals(expected.getAndResetAcquireCount(), actual.getAndResetAcquireCount()); + assertEquals(expected.getAndResetAcquireFailedCount(), actual.getAndResetAcquireFailedCount()); + assertEquals(expected.getAndResetAcquireTimedOutCount(), actual.getAndResetAcquireTimedOutCount()); + assertEquals(expected.getAndResetAcquireSucceededCount(), actual.getAndResetAcquireSucceededCount()); + assertEquals(expected.getAndResetReleaseCount(), actual.getAndResetReleaseCount()); + assertEquals(expected.getAndResetReleaseFailedCount(), actual.getAndResetReleaseFailedCount()); + + assertEquals(expected.getAcquiringNow(), actual.getAcquiringNow()); + assertEquals(expected.getLockedNow(), actual.getLockedNow()); + } + @Test public void acquireTimesOut() throws Exception { when(mutex.acquire(anyLong(), any())).thenReturn(false); @@ -78,10 +102,12 @@ public class LockTest { assertTrue("unexpected exception: " + e.getMessage(), e.getMessage().contains("Timed out")); } - var expectedCounters = new LockCounters(); - expectedCounters.invokeAcquireCount.set(1); - expectedCounters.acquireTimedOutCount.set(1); - assertEquals(Map.of(lockPath, expectedCounters), LockStats.getGlobal().getLockCountersByPath()); + var expectedMetrics = new LockMetrics(); + expectedMetrics.setAcquireCount(1); + expectedMetrics.setCumulativeAcquireCount(1); + expectedMetrics.setAcquireTimedOutCount(1); + expectedMetrics.setCumulativeAcquireTimedOutCount(1); + assertLockMetrics(expectedMetrics); } @Test @@ -90,29 +116,39 @@ public class LockTest { lock.acquire(acquireTimeout); - var expectedCounters = new LockCounters(); - expectedCounters.invokeAcquireCount.set(1); - expectedCounters.lockAcquiredCount.set(1); - expectedCounters.inCriticalRegionCount.set(1); - assertEquals(Map.of(lockPath, expectedCounters), LockStats.getGlobal().getLockCountersByPath()); + var expectedMetrics = new LockMetrics(); + expectedMetrics.setAcquireCount(1); + expectedMetrics.setCumulativeAcquireCount(1); + expectedMetrics.setAcquireSucceededCount(1); + expectedMetrics.setCumulativeAcquireSucceededCount(1); + expectedMetrics.setLockedNow(1); + assertLockMetrics(expectedMetrics); // reenter + // NB: non-cumulative counters are reset on fetch lock.acquire(acquireTimeout); - expectedCounters.invokeAcquireCount.set(2); - expectedCounters.lockAcquiredCount.set(2); - expectedCounters.inCriticalRegionCount.set(2); + expectedMetrics.setAcquireCount(1); // reset to 0 above, + 1 + expectedMetrics.setCumulativeAcquireCount(2); + expectedMetrics.setAcquireSucceededCount(1); // reset to 0 above, +1 + expectedMetrics.setCumulativeAcquireSucceededCount(2); + expectedMetrics.setLockedNow(2); + assertLockMetrics(expectedMetrics); // inner-most closes lock.close(); - expectedCounters.inCriticalRegionCount.set(1); - expectedCounters.locksReleasedCount.set(1); - assertEquals(Map.of(lockPath, expectedCounters), LockStats.getGlobal().getLockCountersByPath()); + expectedMetrics.setAcquireCount(0); // reset to 0 above + expectedMetrics.setAcquireSucceededCount(0); // reset to 0 above + expectedMetrics.setReleaseCount(1); + expectedMetrics.setCumulativeReleaseCount(1); + expectedMetrics.setLockedNow(1); + assertLockMetrics(expectedMetrics); // outer-most closes lock.close(); - expectedCounters.inCriticalRegionCount.set(0); - expectedCounters.locksReleasedCount.set(2); - assertEquals(Map.of(lockPath, expectedCounters), LockStats.getGlobal().getLockCountersByPath()); + expectedMetrics.setReleaseCount(1); // reset to 0 above, +1 + expectedMetrics.setCumulativeReleaseCount(2); + expectedMetrics.setLockedNow(0); + assertLockMetrics(expectedMetrics); } @Test @@ -120,7 +156,7 @@ public class LockTest { when(mutex.acquire(anyLong(), any())).thenReturn(true); String lockPath2 = "/lock/path/2"; - Lock lock2 = new Lock(lockPath2, mutex, Optional.empty()); + Lock lock2 = new Lock(lockPath2, mutex); lock.acquire(acquireTimeout); lock2.acquire(acquireTimeout); |