diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-10-03 15:02:53 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-10-03 15:02:53 +0200 |
commit | e925ef8b0a33ed0e67e09ca9320b386339e08cea (patch) | |
tree | 066150a4a9f829b3c4935e5f702bb1e17cfefa03 /zkfacade | |
parent | 58c15afd3ea77235e2ea26f4baa2e171574dd3b6 (diff) |
Move lock metrics to MetricsReporter
Adds two new metrics:
- The load of acquiring each lock path: The average number of threads waiting
to acquire the lock within the last minute (or unit of time). Aka the lock
queue (depth).
- The load of the lock for each lock path: The average number of threads
holding the lock within the last minute (or unit of time). This is always
<= 1. Aka the lock utilization.
Changes the LockCounters to LockMetrics, and exporting those once every minute
through MetricReporter which is designed for this.
Diffstat (limited to 'zkfacade')
19 files changed, 629 insertions, 219 deletions
diff --git a/zkfacade/abi-spec.json b/zkfacade/abi-spec.json index 0f421621523..e026559b283 100644 --- a/zkfacade/abi-spec.json +++ b/zkfacade/abi-spec.json @@ -68,8 +68,8 @@ "methods": [ "public static com.yahoo.vespa.curator.Curator create(java.lang.String)", "public static com.yahoo.vespa.curator.Curator create(java.lang.String, java.util.Optional)", - "public void <init>(com.yahoo.cloud.config.ConfigserverConfig, com.yahoo.jdisc.Metric, com.yahoo.vespa.zookeeper.VespaZooKeeperServer)", - "protected void <init>(java.lang.String, java.lang.String, java.util.function.Function, java.util.Optional)", + "public void <init>(com.yahoo.cloud.config.ConfigserverConfig, com.yahoo.vespa.zookeeper.VespaZooKeeperServer)", + "protected void <init>(java.lang.String, java.lang.String, java.util.function.Function)", "public java.lang.String connectionSpec()", "public org.apache.curator.framework.recipes.atomic.DistributedAtomicLong createAtomicCounter(java.lang.String)", "public org.apache.curator.framework.recipes.locks.InterProcessLock createMutex(java.lang.String)", @@ -104,8 +104,8 @@ "public" ], "methods": [ - "public void <init>(java.lang.String, com.yahoo.vespa.curator.Curator, java.util.Optional)", - "public void <init>(java.lang.String, org.apache.curator.framework.recipes.locks.InterProcessLock, java.util.Optional)", + "public void <init>(java.lang.String, com.yahoo.vespa.curator.Curator)", + "public void <init>(java.lang.String, org.apache.curator.framework.recipes.locks.InterProcessLock)", "public void acquire(java.time.Duration)", "public void close()" ], diff --git a/zkfacade/pom.xml b/zkfacade/pom.xml index 7f335467751..70079e53c78 100644 --- a/zkfacade/pom.xml +++ b/zkfacade/pom.xml @@ -42,6 +42,12 @@ <scope>provided</scope> </dependency> <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>testutil</artifactId> + <version>${project.version}</version> + <scope>test</scope> + </dependency> + <dependency> <groupId>org.apache.curator</groupId> <artifactId>curator-recipes</artifactId> </dependency> diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java index f3fe0316682..6cbfa274c56 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/Curator.java @@ -4,7 +4,6 @@ package com.yahoo.vespa.curator; import com.google.inject.Inject; import com.yahoo.cloud.config.ConfigserverConfig; import com.yahoo.io.IOUtils; -import com.yahoo.jdisc.Metric; import com.yahoo.net.HostName; import com.yahoo.path.Path; import com.yahoo.text.Utf8; @@ -67,31 +66,28 @@ public class Curator implements AutoCloseable { // All lock keys, to allow re-entrancy. This will grow forever, but this should be too slow to be a problem private final ConcurrentHashMap<Path, Lock> locks = new ConcurrentHashMap<>(); - private final Optional<Metric> metric; - /** Creates a curator instance from a comma-separated string of ZooKeeper host:port strings */ public static Curator create(String connectionSpec) { - return new Curator(connectionSpec, connectionSpec, Optional.empty(), Optional.of(ZK_CLIENT_CONFIG_FILE)); + return new Curator(connectionSpec, connectionSpec, Optional.of(ZK_CLIENT_CONFIG_FILE)); } // For testing only, use Optional.empty for clientConfigFile parameter to create default zookeeper client config public static Curator create(String connectionSpec, Optional<File> clientConfigFile) { - return new Curator(connectionSpec, connectionSpec, Optional.empty(), clientConfigFile); + return new Curator(connectionSpec, connectionSpec, clientConfigFile); } // Depend on ZooKeeperServer to make sure it is started first // TODO: Move zookeeperserver config out of configserverconfig (requires update of controller services.xml as well) @Inject - public Curator(ConfigserverConfig configserverConfig, Metric metric, VespaZooKeeperServer server) { - this(configserverConfig, Optional.of(metric), Optional.of(ZK_CLIENT_CONFIG_FILE)); + public Curator(ConfigserverConfig configserverConfig, VespaZooKeeperServer server) { + this(configserverConfig, Optional.of(ZK_CLIENT_CONFIG_FILE)); } - Curator(ConfigserverConfig configserverConfig, Optional<Metric> metric, Optional<File> clientConfigFile) { - this(createConnectionSpec(configserverConfig), createEnsembleConnectionSpec(configserverConfig), metric, clientConfigFile); + Curator(ConfigserverConfig configserverConfig, Optional<File> clientConfigFile) { + this(createConnectionSpec(configserverConfig), createEnsembleConnectionSpec(configserverConfig), clientConfigFile); } - private Curator(String connectionSpec, String zooKeeperEnsembleConnectionSpec, Optional<Metric> metric, - Optional<File> clientConfigFile) { + private Curator(String connectionSpec, String zooKeeperEnsembleConnectionSpec, Optional<File> clientConfigFile) { this(connectionSpec, zooKeeperEnsembleConnectionSpec, (retryPolicy) -> CuratorFrameworkFactory @@ -102,24 +98,20 @@ public class Curator implements AutoCloseable { .connectString(connectionSpec) .zookeeperFactory(new VespaZooKeeperFactory(createClientConfig(clientConfigFile))) .dontUseContainerParents() // TODO: Remove when we know ZooKeeper 3.5 works fine, consider waiting until Vespa 8 - .build(), - metric); + .build()); } protected Curator(String connectionSpec, String zooKeeperEnsembleConnectionSpec, - Function<RetryPolicy, CuratorFramework> curatorFactory, - Optional<Metric> metric) { + Function<RetryPolicy, CuratorFramework> curatorFactory) { this(connectionSpec, zooKeeperEnsembleConnectionSpec, curatorFactory, - new ExponentialBackoffRetry((int) BASE_SLEEP_TIME.toMillis(), MAX_RETRIES), - metric); + new ExponentialBackoffRetry((int) BASE_SLEEP_TIME.toMillis(), MAX_RETRIES)); } private Curator(String connectionSpec, String zooKeeperEnsembleConnectionSpec, Function<RetryPolicy, CuratorFramework> curatorFactory, - RetryPolicy retryPolicy, - Optional<Metric> metric) { + RetryPolicy retryPolicy) { this.connectionSpec = connectionSpec; this.retryPolicy = retryPolicy; this.curatorFramework = curatorFactory.apply(retryPolicy); @@ -132,7 +124,6 @@ public class Curator implements AutoCloseable { this.zooKeeperEnsembleConnectionSpec = zooKeeperEnsembleConnectionSpec; this.zooKeeperEnsembleCount = zooKeeperEnsembleConnectionSpec.split(",").length; - this.metric = metric; } private static String createConnectionSpec(ConfigserverConfig configserverConfig) { @@ -363,7 +354,7 @@ public class Curator implements AutoCloseable { /** Create and acquire a re-entrant lock in given path */ public Lock lock(Path path, Duration timeout) { create(path); - Lock lock = locks.computeIfAbsent(path, (pathArg) -> new Lock(pathArg.getAbsolute(), this, metric)); + Lock lock = locks.computeIfAbsent(path, (pathArg) -> new Lock(pathArg.getAbsolute(), this)); lock.acquire(timeout); return lock; } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java index 305e5d459fc..451389694c6 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/Lock.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.curator; import com.google.common.util.concurrent.UncheckedTimeoutException; -import com.yahoo.jdisc.Metric; import com.yahoo.path.Path; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.curator.stats.LockStats; @@ -10,8 +9,6 @@ import com.yahoo.vespa.curator.stats.ThreadLockStats; import org.apache.curator.framework.recipes.locks.InterProcessLock; import java.time.Duration; -import java.util.Map; -import java.util.Optional; import java.util.concurrent.TimeUnit; /** @@ -26,50 +23,46 @@ public class Lock implements Mutex { private final InterProcessLock mutex; private final String lockPath; - private final Optional<Metric> metric; - private final Optional<Metric.Context> metricContext; - public Lock(String lockPath, Curator curator, Optional<Metric> metric) { - this(lockPath, curator.createMutex(lockPath), metric); + public Lock(String lockPath, Curator curator) { + this(lockPath, curator.createMutex(lockPath)); } /** Public for testing only */ - public Lock(String lockPath, InterProcessLock mutex, Optional<Metric> metric) { + public Lock(String lockPath, InterProcessLock mutex) { this.lockPath = lockPath; this.mutex = mutex; - this.metric = metric; - this.metricContext = metric.map(aMetric -> aMetric.createContext(Map.of("lockPath", lockPath))); } /** Take the lock with the given timeout. This may be called multiple times from the same thread - each matched by a close */ public void acquire(Duration timeout) throws UncheckedTimeoutException { ThreadLockStats threadLockStats = LockStats.getForCurrentThread(); - threadLockStats.invokingAcquire(lockPath, timeout, metric, metricContext); + threadLockStats.invokingAcquire(lockPath, timeout); final boolean acquired; try { acquired = mutex.acquire(timeout.toMillis(), TimeUnit.MILLISECONDS); } catch (Exception e) { - threadLockStats.acquireFailed(lockPath); + threadLockStats.acquireFailed(); throw new RuntimeException("Exception acquiring lock '" + lockPath + "'", e); } if (!acquired) { - threadLockStats.acquireTimedOut(lockPath); + threadLockStats.acquireTimedOut(); throw new UncheckedTimeoutException("Timed out after waiting " + timeout + " to acquire lock '" + lockPath + "'"); } - threadLockStats.lockAcquired(lockPath); + threadLockStats.lockAcquired(); } @Override public void close() { try { mutex.release(); - LockStats.getForCurrentThread().lockReleased(lockPath); + LockStats.getForCurrentThread().lockReleased(); } catch (Exception e) { - LockStats.getForCurrentThread().lockReleaseFailed(lockPath); + LockStats.getForCurrentThread().lockReleaseFailed(); throw new RuntimeException("Exception releasing lock '" + lockPath + "'"); } } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java index 2ef42f539d2..3da7678c44e 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/mock/MockCurator.java @@ -137,7 +137,7 @@ public class MockCurator extends Curator { * This is not what ZooKeeper does. */ public MockCurator(boolean stableOrdering) { - super("", "", (retryPolicy) -> null, Optional.empty()); + super("", "", (retryPolicy) -> null); this.stableOrdering = stableOrdering; curatorFramework = new MockCuratorFramework(); curatorFramework.start(); diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/AtomicDurationSum.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/AtomicDurationSum.java new file mode 100644 index 00000000000..7addd661b79 --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/AtomicDurationSum.java @@ -0,0 +1,86 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator.stats; + +import java.time.Duration; +import java.util.concurrent.atomic.AtomicLong; + +/** + * An instance of {@link AtomicDurationSum} provides atomic operations on a {@link Duration} + * and integer counter doublet: You can add a duration and increment the counter atomically, + * and get the doublet and reset the duration and counter to zero atomically. + * + * <p>The duration and counter must be small: You can add the equivalent of 16M durations of 1 minute each: + * The cumulative duration must be between -17 and 17 years, and the maximum count is 16M. + * The duration will have millisecond resolution. Overflow of count affects duration.</p> + * + * <p>Motivation: Metrics are snapshot and reset to zero every minute. Durations of typically 1 minute + * are then summed to produce a cumulative {@link Duration} and an associated count, both of which are + * therefore small numbers and can be represented in a compact and atomic form. The alternative is to + * use synchronization (which is slow) or allow inconsistencies between the duration and count + * (e.g. a sum of 2 latencies but a count of 1 makes the metrics noisy).</p> + * + * @author hakon + */ +public class AtomicDurationSum { + + // Why 40? The duration-part requires 16 bits to represent 1 minute. If we require 1 minute + // durations can be added until both the duration-part and count-part are full, the remaining + // 48 bits must be divided equally, hence 16 + 24 = 40. Seems to give a nice balance. + static final long DURATION_BITS = 40; + static final long COUNT_BITS = Long.SIZE - DURATION_BITS; + static final long DURATION_MASK = -1L << COUNT_BITS; + static final long COUNT_MASK = -1L >>> DURATION_BITS; + // The most significant bit of duration is a sign bit, which complicates the initializer. + static final long MIN_DURATION = -1L << (DURATION_BITS - 1); + static final long MAX_DURATION = (DURATION_MASK << 1) >>> (COUNT_BITS + 1); + static final long MAX_COUNT = COUNT_MASK; + static final long MIN_COUNT = 0L; + + private static final long ZERO_DURATION_AND_COUNT = 0L; + + // Representation: + // - A signed long of 40 bits storing the duration in milliseconds + // - An unsigned int of 24 bits storing the count + private final AtomicLong encodedAtomic = new AtomicLong(ZERO_DURATION_AND_COUNT); + + /** Initializes to zero duration and count. */ + public AtomicDurationSum() {} + + /** Add duration and increment count. */ + void add(Duration duration) { + encodedAtomic.addAndGet(encodeDuration(duration) | 1L); + } + + public DurationSum get() { + long snapshot = encodedAtomic.get(); + return new DurationSum(decodeDuration(snapshot), decodeCount(snapshot)); + } + + /** Get the current {@link DurationSum} and reset the duration and count doublet to zero. */ + public DurationSum getAndReset() { + long snapshot = encodedAtomic.getAndSet(ZERO_DURATION_AND_COUNT); + return new DurationSum(decodeDuration(snapshot), decodeCount(snapshot)); + } + + static long encodeDuration(Duration duration) { + long millis = duration.toMillis(); + if (millis < MIN_DURATION || millis > MAX_DURATION) { + throw new IllegalArgumentException("Duration outside legal range: " + duration); + } + + return millis << COUNT_BITS; + } + + static Duration decodeDuration(long encoded) { + return Duration.ofMillis(encoded >> COUNT_BITS); + } + + static int decodeCount(long encoded) { + return (int) (encoded & COUNT_MASK); + } + + @Override + public String toString() { + return get().toString(); + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/DurationSum.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/DurationSum.java new file mode 100644 index 00000000000..3c1f0ec7f43 --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/DurationSum.java @@ -0,0 +1,41 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator.stats; + +import java.time.Duration; +import java.util.Optional; + +/** + * Represents a sum and count of Duration. + * + * @author hakon + */ +public class DurationSum { + + private final Duration duration; + private final int count; + + DurationSum(Duration duration, int count) { + this.duration = duration; + this.count = count; + } + + public Duration duration() { return duration; } + public int count() { return count; } + + public Optional<Duration> averageDuration() { + if (count <= 0) { + return Optional.empty(); + } + + long averageMillis = Math.round(duration.toMillis() / (double) count); + return Optional.of(Duration.ofMillis(averageMillis)); + } + + @Override + public String toString() { + return "DurationSum{" + + "duration=" + duration + + ", count=" + count + + '}'; + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyMetrics.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyMetrics.java new file mode 100644 index 00000000000..61807b5db19 --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyMetrics.java @@ -0,0 +1,39 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator.stats; + +/** + * Metrics on the latency of execution of some piece of code, e.g. the acquiring of a lock. + * + * @author hakon + */ +public class LatencyMetrics { + private final DurationSum cumulativeLatency; + private final float load; + + public LatencyMetrics(DurationSum cumulativeLatency, float load) { + this.cumulativeLatency = cumulativeLatency; + this.load = load; + } + + /** + * The total time spent by all threads accumulating latency in an implicit time period, + * e.g. a metric snapshot window, divided by the duration of the time period. + */ + public float load() { return load; } + + /** Returns the average latency in seconds with milliseconds resolution, or 0.0 by default. */ + public float averageInSeconds() { + return cumulativeLatency.averageDuration().map(average -> average.toMillis() / 1000f).orElse(0f); + } + + /** The number of latency-producing events. */ + public int count() { return cumulativeLatency.count(); } + + @Override + public String toString() { + return "LatencyMetrics{" + + "cumulativeLatency=" + cumulativeLatency + + ", load=" + load + + '}'; + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyStore.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyStore.java new file mode 100644 index 00000000000..0b1056ddeba --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LatencyStore.java @@ -0,0 +1,56 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator.stats; + +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; + +/** + * Stores the latency of e.g. acquiring the lock. + * + * @author hakon + */ +public class LatencyStore { + + private final AtomicDurationSum latencySum = new AtomicDurationSum(); + private final Clock clock; + private volatile Instant startOfPeriod; + + LatencyStore() { this(Clock.systemUTC()); } + + LatencyStore(Clock clock) { + this.clock = clock; + startOfPeriod = clock.instant(); + } + + void reportLatency(Duration latency) { + latencySum.add(latency); + } + + public LatencyMetrics getLatencyMetrics() { + return makeMetricsForPeriod(latencySum.get(), startOfPeriod, clock.instant()); + } + + public LatencyMetrics getAndResetLatencyMetrics() { + Instant newStartOfPeriod = clock.instant(); + DurationSum latencySumOfPeriod = latencySum.getAndReset(); + LatencyMetrics latencyMetrics = makeMetricsForPeriod(latencySumOfPeriod, startOfPeriod, newStartOfPeriod); + startOfPeriod = newStartOfPeriod; + return latencyMetrics; + } + + private static LatencyMetrics makeMetricsForPeriod(DurationSum latencySum, Instant start, Instant end) { + long millisPeriod = Duration.between(start, end).toMillis(); + long normalizedMillisPeriod = Math.max(1L, millisPeriod); + double load = Math.round(latencySum.duration().toMillis() * 1000.0 / normalizedMillisPeriod) / 1000.0; + return new LatencyMetrics(latencySum, (float) load); + } + + @Override + public String toString() { + return "LatencyStore{" + + "latencySum=" + latencySum + + ", startOfPeriod=" + startOfPeriod + + '}'; + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java index bec7c08b9b3..e25470ac227 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockAttempt.java @@ -1,8 +1,6 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.curator.stats; -import com.yahoo.jdisc.Metric; - import java.time.Duration; import java.time.Instant; import java.util.ArrayList; @@ -23,17 +21,16 @@ public class LockAttempt { private final String lockPath; private final Instant callAcquireInstant; private final Duration timeout; - private final Optional<Metric> metric; - private final Optional<Metric.Context> metricContext; + private final LockMetrics lockMetrics; private final List<LockAttempt> nestedLockAttempts = new ArrayList<>(); private volatile Optional<Instant> lockAcquiredInstant = Optional.empty(); private volatile Optional<Instant> terminalStateInstant = Optional.empty(); private volatile Optional<String> stackTrace = Optional.empty(); - public static LockAttempt invokingAcquire(ThreadLockStats threadLockStats, String lockPath, Duration timeout, - Optional<Metric> metric, Optional<Metric.Context> metricContext) { - return new LockAttempt(threadLockStats, lockPath, timeout, Instant.now(), metric, metricContext); + public static LockAttempt invokingAcquire(ThreadLockStats threadLockStats, String lockPath, + Duration timeout, LockMetrics lockMetrics) { + return new LockAttempt(threadLockStats, lockPath, timeout, Instant.now(), lockMetrics); } public enum LockState { @@ -49,16 +46,15 @@ public class LockAttempt { private volatile LockState lockState = LockState.ACQUIRING; - private LockAttempt(ThreadLockStats threadLockStats, String lockPath, Duration timeout, Instant callAcquireInstant, - Optional<Metric> metric, Optional<Metric.Context> metricContext) { + private LockAttempt(ThreadLockStats threadLockStats, String lockPath, Duration timeout, + Instant callAcquireInstant, LockMetrics lockMetrics) { this.threadLockStats = threadLockStats; this.lockPath = lockPath; this.callAcquireInstant = callAcquireInstant; this.timeout = timeout; - this.metric = metric; - this.metricContext = metricContext; + this.lockMetrics = lockMetrics; - addToMetric("lockAttempt.acquiring", 1); + lockMetrics.acquireInvoked(); } public String getThreadName() { return threadLockStats.getThreadName(); } @@ -73,8 +69,6 @@ public class LockAttempt { public Optional<Instant> getTimeTerminalStateWasReached() { return terminalStateInstant; } public Optional<String> getStackTrace() { return stackTrace; } public List<LockAttempt> getNestedLockAttempts() { return List.copyOf(nestedLockAttempts); } - public Optional<Metric> metric() { return metric; } - public Optional<Metric.Context> metricContext() { return metricContext; } public Duration getDurationOfAcquire() { return Duration.between(callAcquireInstant, getTimeAcquireEndedOrNow()); } @@ -105,39 +99,28 @@ public class LockAttempt { void acquireFailed() { setTerminalState(LockState.ACQUIRE_FAILED); - addToMetric("lockAttempt.acquiring", -1); - addToMetric("lockAttempt.acquireFailed", 1); - setMetricTo("lockAttempt.acquiringLatency", getDurationOfAcquire().toMillis() / 1000.); + lockMetrics.acquireFailed(getDurationOfAcquire()); } void timedOut() { setTerminalState(LockState.TIMED_OUT); - addToMetric("lockAttempt.acquiring", -1); - addToMetric("lockAttempt.acquireTimedOut", 1); - setMetricTo("lockAttempt.acquiringLatency", getDurationOfAcquire().toMillis() / 1000.); + lockMetrics.acquireTimedOut(getDurationOfAcquire()); } void lockAcquired() { lockState = LockState.ACQUIRED; lockAcquiredInstant = Optional.of(Instant.now()); - addToMetric("lockAttempt.acquiring", -1); - addToMetric("lockAttempt.acquired", 1); - setMetricTo("lockAttempt.locked", 1); - setMetricTo("lockAttempt.acquiringLatency", getDurationOfAcquire().toMillis() / 1000.); + lockMetrics.lockAcquired(getDurationOfAcquire()); } void released() { setTerminalState(LockState.RELEASED); - setMetricTo("lockAttempt.locked", 0); - addToMetric("lockAttempt.released", 1); - setMetricTo("lockAttempt.lockedLatency", getDurationWithLock().toMillis() / 1000.); + lockMetrics.release(getDurationWithLock(), getDuration()); } void releasedWithError() { setTerminalState(LockState.RELEASED_WITH_ERROR); - setMetricTo("lockAttempt.locked", 0); - addToMetric("lockAttempt.releaseError", 1); - setMetricTo("lockAttempt.lockedLatency", getDurationWithLock().toMillis() / 1000.); + lockMetrics.releaseFailed(getDurationWithLock(), getDuration()); } void setTerminalState(LockState terminalState) { setTerminalState(terminalState, Instant.now()); } @@ -146,16 +129,4 @@ public class LockAttempt { lockState = terminalState; terminalStateInstant = Optional.of(instant); } - - private void addToMetric(String key, Number value) { - if (metric.isPresent() && metricContext.isPresent()) { - metric.get().add(key, value, metricContext.get()); - } - } - - private void setMetricTo(String key, Number value) { - if (metric.isPresent() && metricContext.isPresent()) { - metric.get().set(key, value, metricContext.get()); - } - } } diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockCounters.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockCounters.java deleted file mode 100644 index 561ea9a7ed2..00000000000 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockCounters.java +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.curator.stats; - -import java.util.Objects; -import java.util.concurrent.atomic.AtomicInteger; - -/** - * A collection of counters for events related to lock acquisition and release. - * - * @author hakon - */ -public class LockCounters { - final AtomicInteger invokeAcquireCount = new AtomicInteger(0); - final AtomicInteger inCriticalRegionCount = new AtomicInteger(0); - final AtomicInteger acquireFailedCount = new AtomicInteger(0); - final AtomicInteger acquireTimedOutCount = new AtomicInteger(0); - final AtomicInteger lockAcquiredCount = new AtomicInteger(0); - final AtomicInteger locksReleasedCount = new AtomicInteger(0); - - final AtomicInteger noLocksErrorCount = new AtomicInteger(0); - final AtomicInteger lockReleaseErrorCount = new AtomicInteger(0); - - public int invokeAcquireCount() { return invokeAcquireCount.get(); } - public int inCriticalRegionCount() { return inCriticalRegionCount.get(); } - public int acquireFailedCount() { return acquireFailedCount.get(); } - public int acquireTimedOutCount() { return acquireTimedOutCount.get(); } - public int lockAcquiredCount() { return lockAcquiredCount.get(); } - public int locksReleasedCount() { return locksReleasedCount.get(); } - public int noLocksErrorCount() { return noLocksErrorCount.get(); } - public int lockReleaseErrorCount() { return lockReleaseErrorCount.get(); } - - @Override - public String toString() { - return "LockCounters{" + - "invokeAcquireCount=" + invokeAcquireCount + - ", inCriticalRegionCount=" + inCriticalRegionCount + - ", acquireFailedCount=" + acquireFailedCount + - ", acquireTimedOutCount=" + acquireTimedOutCount + - ", lockAcquiredCount=" + lockAcquiredCount + - ", locksReleasedCount=" + locksReleasedCount + - ", noLocksErrorCount=" + noLocksErrorCount + - ", locksReleaseErrorCount=" + lockReleaseErrorCount + - '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - LockCounters that = (LockCounters) o; - return invokeAcquireCount.get() == that.invokeAcquireCount.get() && - inCriticalRegionCount.get() == that.inCriticalRegionCount.get() && - acquireFailedCount.get() == that.acquireFailedCount.get() && - acquireTimedOutCount.get() == that.acquireTimedOutCount.get() && - lockAcquiredCount.get() == that.lockAcquiredCount.get() && - locksReleasedCount.get() == that.locksReleasedCount.get() && - noLocksErrorCount.get() == that.noLocksErrorCount.get() && - lockReleaseErrorCount.get() == that.lockReleaseErrorCount.get(); - } - - @Override - public int hashCode() { - return Objects.hash(invokeAcquireCount, inCriticalRegionCount, acquireFailedCount, acquireTimedOutCount, - lockAcquiredCount, locksReleasedCount, noLocksErrorCount, lockReleaseErrorCount); - } -} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockMetrics.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockMetrics.java new file mode 100644 index 00000000000..a5bf0124a54 --- /dev/null +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockMetrics.java @@ -0,0 +1,138 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.curator.stats; + +import java.time.Duration; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * A collection of counters for events related to lock acquisition and release. + * + * @author hakon + */ +public class LockMetrics { + private final AtomicInteger acquireCount = new AtomicInteger(0); + private final AtomicInteger acquireFailedCount = new AtomicInteger(0); + private final AtomicInteger acquireTimedOutCount = new AtomicInteger(0); + private final AtomicInteger acquireSucceededCount = new AtomicInteger(0); + private final AtomicInteger releaseCount = new AtomicInteger(0); + private final AtomicInteger releaseFailedCount = new AtomicInteger(0); + + private final AtomicInteger cumulativeAcquireCount = new AtomicInteger(0); + private final AtomicInteger cumulativeAcquireFailedCount = new AtomicInteger(0); + private final AtomicInteger cumulativeAcquireTimedOutCount = new AtomicInteger(0); + private final AtomicInteger cumulativeAcquireSucceededCount = new AtomicInteger(0); + private final AtomicInteger cumulativeReleaseCount = new AtomicInteger(0); + private final AtomicInteger cumulativeReleaseFailedCount = new AtomicInteger(0); + + private final AtomicInteger acquiringNow = new AtomicInteger(0); + private final AtomicInteger lockedNow = new AtomicInteger(0); + + private final LatencyStore acquireLatencyStore = new LatencyStore(); + private final LatencyStore lockedLatencyStore = new LatencyStore(); + + void acquireInvoked() { + acquireCount.incrementAndGet(); + cumulativeAcquireCount.incrementAndGet(); + acquiringNow.incrementAndGet(); + } + + void acquireFailed(Duration acquireLatency) { + acquiringNow.decrementAndGet(); + acquireFailedCount.incrementAndGet(); + cumulativeAcquireFailedCount.incrementAndGet(); + acquireLatencyStore.reportLatency(acquireLatency); + } + + void acquireTimedOut(Duration acquireLatency) { + acquiringNow.decrementAndGet(); + acquireTimedOutCount.incrementAndGet(); + cumulativeAcquireTimedOutCount.incrementAndGet(); + acquireLatencyStore.reportLatency(acquireLatency); + } + + void lockAcquired(Duration acquireLatency) { + acquiringNow.decrementAndGet(); + acquireSucceededCount.incrementAndGet(); + cumulativeAcquireSucceededCount.incrementAndGet(); + acquireLatencyStore.reportLatency(acquireLatency); + lockedNow.incrementAndGet(); + } + + void release(Duration lockedLatency, Duration totalLatency) { + lockedNow.decrementAndGet(); + releaseCount.incrementAndGet(); + cumulativeReleaseCount.incrementAndGet(); + lockedLatencyStore.reportLatency(lockedLatency); + } + + void releaseFailed(Duration lockedLatency, Duration totalLatency) { + release(lockedLatency, totalLatency); + releaseFailedCount.incrementAndGet(); + cumulativeReleaseFailedCount.incrementAndGet(); + } + + public int getAndResetAcquireCount() { return acquireCount.getAndSet(0); } + public int getAndResetAcquireFailedCount() { return acquireFailedCount.getAndSet(0); } + public int getAndResetAcquireTimedOutCount() { return acquireTimedOutCount.getAndSet(0); } + public int getAndResetAcquireSucceededCount() { return acquireSucceededCount.getAndSet(0); } + public int getAndResetReleaseCount() { return releaseCount.getAndSet(0); } + public int getAndResetReleaseFailedCount() { return releaseFailedCount.getAndSet(0); } + + public int getCumulativeAcquireCount() { return cumulativeAcquireCount.get(); } + public int getCumulativeAcquireFailedCount() { return cumulativeAcquireFailedCount.get(); } + public int getCumulativeAcquireTimedOutCount() { return cumulativeAcquireTimedOutCount.get(); } + public int getCumulativeAcquireSucceededCount() { return cumulativeAcquireSucceededCount.get(); } + public int getCumulativeReleaseCount() { return cumulativeReleaseCount.get(); } + public int getCumulativeReleaseFailedCount() { return cumulativeReleaseFailedCount.get(); } + + public int getAcquiringNow() { return acquiringNow.get(); } + public int getLockedNow() { return lockedNow.get(); } + + public LatencyMetrics getAcquireLatencyMetrics() { return acquireLatencyStore.getLatencyMetrics(); } + public LatencyMetrics getLockedLatencyMetrics() { return lockedLatencyStore.getLatencyMetrics(); } + + public LatencyMetrics getAndResetAcquireLatencyMetrics() { return acquireLatencyStore.getAndResetLatencyMetrics(); } + public LatencyMetrics getAndResetLockedLatencyMetrics() { return lockedLatencyStore.getAndResetLatencyMetrics(); } + + // For tests + void setAcquireCount(int count) { acquireCount.set(count); } + void setAcquireFailedCount(int count) { acquireFailedCount.set(count); } + void setAcquireTimedOutCount(int count) { acquireTimedOutCount.set(count); } + void setAcquireSucceededCount(int count) { acquireSucceededCount.set(count); } + void setReleaseCount(int count) { releaseCount.set(count); } + void setReleaseFailedCount(int count) { releaseFailedCount.set(count); } + + // For tests + void setCumulativeAcquireCount(int count) { cumulativeAcquireCount.set(count); } + void setCumulativeAcquireFailedCount(int count) { cumulativeAcquireFailedCount.set(count); } + void setCumulativeAcquireTimedOutCount(int count) { cumulativeAcquireTimedOutCount.set(count); } + void setCumulativeAcquireSucceededCount(int count) { cumulativeAcquireSucceededCount.set(count); } + void setCumulativeReleaseCount(int count) { cumulativeReleaseCount.set(count); } + void setCumulativeReleaseFailedCount(int count) { cumulativeReleaseFailedCount.set(count); } + + // For tests + void setAcquiringNow(int count) { acquiringNow.set(count); } + void setLockedNow(int count) { lockedNow.set(count); } + + @Override + public String toString() { + return "LockMetrics{" + + "acquireCount=" + acquireCount + + ", acquireFailedCount=" + acquireFailedCount + + ", acquireTimedOutCount=" + acquireTimedOutCount + + ", acquireSucceededCount=" + acquireSucceededCount + + ", releaseCount=" + releaseCount + + ", releaseFailedCount=" + releaseFailedCount + + ", cumulativeAcquireCount=" + cumulativeAcquireCount + + ", cumulativeAcquireFailedCount=" + cumulativeAcquireFailedCount + + ", cumulativeAcquireTimedOutCount=" + cumulativeAcquireTimedOutCount + + ", cumulativeAcquireSucceededCount=" + cumulativeAcquireSucceededCount + + ", cumulativeReleaseCount=" + cumulativeReleaseCount + + ", cumulativeReleaseFailedCount=" + cumulativeReleaseFailedCount + + ", acquiringNow=" + acquiringNow + + ", lockedNow=" + lockedNow + + ", acquireLatencyStore=" + acquireLatencyStore + + ", lockedLatencyStore=" + lockedLatencyStore + + '}'; + } +} diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java index 3cb5af80c96..ee464e0918d 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/LockStats.java @@ -27,7 +27,7 @@ public class LockStats { private final PriorityQueue<RecordedLockAttempts> interestingRecordings = new PriorityQueue<>(MAX_RECORDINGS, Comparator.comparing(RecordedLockAttempts::duration)); - private final ConcurrentHashMap<String, LockCounters> countersByLockPath = new ConcurrentHashMap<>(); + private final ConcurrentHashMap<String, LockMetrics> metricsByLockPath = new ConcurrentHashMap<>(); /** Returns global stats. */ public static LockStats getGlobal() { return stats; } @@ -37,13 +37,13 @@ public class LockStats { return stats.statsByThread.computeIfAbsent(Thread.currentThread(), ThreadLockStats::new); } - static void clearForTesting() { + public static void clearForTesting() { stats = new LockStats(); } private LockStats() {} - public Map<String, LockCounters> getLockCountersByPath() { return Map.copyOf(countersByLockPath); } + public Map<String, LockMetrics> getLockMetricsByPath() { return Map.copyOf(metricsByLockPath); } public List<ThreadLockStats> getThreadLockStats() { return List.copyOf(statsByThread.values()); } public List<LockAttempt> getLockAttemptSamples() { return completedLockAttemptSamples.asList(); } @@ -53,8 +53,8 @@ public class LockStats { } } - LockCounters getLockCounters(String lockPath) { - return countersByLockPath.computeIfAbsent(lockPath, __ -> new LockCounters()); + LockMetrics getLockMetrics(String lockPath) { + return metricsByLockPath.computeIfAbsent(lockPath, __ -> new LockMetrics()); } void maybeSample(LockAttempt lockAttempt) { diff --git a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java index 90a2c2f8d25..cd6ae3b1e68 100644 --- a/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java +++ b/zkfacade/src/main/java/com/yahoo/vespa/curator/stats/ThreadLockStats.java @@ -1,7 +1,6 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.curator.stats; -import com.yahoo.jdisc.Metric; import com.yahoo.vespa.curator.Lock; import java.time.Duration; @@ -9,6 +8,7 @@ import java.util.List; import java.util.Optional; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.function.Consumer; +import java.util.logging.Logger; /** * This class manages thread-specific statistics and information related to acquiring and releasing @@ -20,6 +20,8 @@ import java.util.function.Consumer; */ public class ThreadLockStats { + private static final Logger logger = Logger.getLogger(ThreadLockStats.class.getName()); + private final Thread thread; /** @@ -61,12 +63,8 @@ public class ThreadLockStats { public Optional<RecordedLockAttempts> getOngoingRecording() { return ongoingRecording; } /** Mutable method (see class doc) */ - public void invokingAcquire(String lockPath, Duration timeout, - Optional<Metric> metric, Optional<Metric.Context> metricContext) { - LockCounters lockCounters = getGlobalLockCounters(lockPath); - lockCounters.invokeAcquireCount.incrementAndGet(); - lockCounters.inCriticalRegionCount.incrementAndGet(); - LockAttempt lockAttempt = LockAttempt.invokingAcquire(this, lockPath, timeout, metric, metricContext); + public void invokingAcquire(String lockPath, Duration timeout) { + LockAttempt lockAttempt = LockAttempt.invokingAcquire(this, lockPath, timeout, getGlobalLockMetrics(lockPath)); LockAttempt lastLockAttempt = lockAttemptsStack.peekLast(); if (lastLockAttempt == null) { @@ -78,42 +76,34 @@ public class ThreadLockStats { } /** Mutable method (see class doc) */ - public void acquireFailed(String lockPath) { - LockCounters lockCounters = getGlobalLockCounters(lockPath); - lockCounters.acquireFailedCount.incrementAndGet(); - removeLastLockAttempt(lockCounters, LockAttempt::acquireFailed); + public void acquireFailed() { + removeLastLockAttempt(LockAttempt::acquireFailed); } /** Mutable method (see class doc) */ - public void acquireTimedOut(String lockPath) { - LockCounters lockCounters = getGlobalLockCounters(lockPath); - - lockCounters.acquireTimedOutCount.incrementAndGet(); - removeLastLockAttempt(lockCounters, LockAttempt::timedOut); + public void acquireTimedOut() { + removeLastLockAttempt(LockAttempt::timedOut); } /** Mutable method (see class doc) */ - public void lockAcquired(String lockPath) { - getGlobalLockCounters(lockPath).lockAcquiredCount.incrementAndGet(); + public void lockAcquired() { LockAttempt lockAttempt = lockAttemptsStack.peekLast(); if (lockAttempt == null) { - throw new IllegalStateException("lockAcquired invoked without lockAttempts"); + logger.warning("Unable to get last lock attempt as the lock attempt stack is empty"); + return; } + lockAttempt.lockAcquired(); } /** Mutable method (see class doc) */ - public void lockReleased(String lockPath) { - LockCounters lockCounters = getGlobalLockCounters(lockPath); - lockCounters.locksReleasedCount.incrementAndGet(); - removeLastLockAttempt(lockCounters, LockAttempt::released); + public void lockReleased() { + removeLastLockAttempt(LockAttempt::released); } /** Mutable method (see class doc) */ - public void lockReleaseFailed(String lockPath) { - LockCounters lockCounters = getGlobalLockCounters(lockPath); - lockCounters.lockReleaseErrorCount.incrementAndGet(); - removeLastLockAttempt(lockCounters, LockAttempt::releasedWithError); + public void lockReleaseFailed() { + removeLastLockAttempt(LockAttempt::releasedWithError); } /** Mutable method (see class doc) */ @@ -133,19 +123,17 @@ public class ThreadLockStats { } } - private LockCounters getGlobalLockCounters(String lockPath) { - return LockStats.getGlobal().getLockCounters(lockPath); + private LockMetrics getGlobalLockMetrics(String lockPath) { + return LockStats.getGlobal().getLockMetrics(lockPath); } - private void removeLastLockAttempt(LockCounters lockCounters, Consumer<LockAttempt> completeLockAttempt) { - lockCounters.inCriticalRegionCount.decrementAndGet(); - - if (lockAttemptsStack.isEmpty()) { - lockCounters.noLocksErrorCount.incrementAndGet(); + private void removeLastLockAttempt(Consumer<LockAttempt> completeLockAttempt) { + LockAttempt lockAttempt = lockAttemptsStack.pollLast(); + if (lockAttempt == null) { + logger.warning("Unable to remove last lock attempt as the lock attempt stack is empty"); return; } - LockAttempt lockAttempt = lockAttemptsStack.pollLast(); completeLockAttempt.accept(lockAttempt); LockStats.getGlobal().maybeSample(lockAttempt); diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/CuratorTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/CuratorTest.java index 06440098c52..2bf40c4e2bb 100644 --- a/zkfacade/src/test/java/com/yahoo/vespa/curator/CuratorTest.java +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/CuratorTest.java @@ -99,7 +99,7 @@ public class CuratorTest { } private Curator createCurator(ConfigserverConfig configserverConfig) { - return new Curator(configserverConfig, Optional.empty(), Optional.empty()); + return new Curator(configserverConfig, Optional.empty()); } private static class PortAllocator { diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/AtomicDurationSumTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/AtomicDurationSumTest.java new file mode 100644 index 00000000000..f1f9ce5950b --- /dev/null +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/AtomicDurationSumTest.java @@ -0,0 +1,80 @@ +package com.yahoo.vespa.curator.stats;// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +import org.junit.Test; + +import java.time.Duration; + +import static org.junit.Assert.assertEquals; + +/** + * @author hakon + */ +public class AtomicDurationSumTest { + private final AtomicDurationSum atomicDurationSum = new AtomicDurationSum(); + + @Test + public void test() { + assertAtomicDurationSum(Duration.ZERO, 0); + atomicDurationSum.add(Duration.ofMillis(3)); + assertAtomicDurationSum(Duration.ofMillis(3), 1); + atomicDurationSum.add(Duration.ofMillis(5)); + assertAtomicDurationSum(Duration.ofMillis(8), 2); + assertEquals(0.004, atomicDurationSum.get().averageDuration().get().toMillis() / 1000., 0.00001); + + DurationSum durationSum = atomicDurationSum.getAndReset(); + assertEquals(Duration.ofMillis(8), durationSum.duration()); + assertEquals(2, durationSum.count()); + assertAtomicDurationSum(Duration.ZERO, 0); + } + + @Test + public void testNegatives() { + atomicDurationSum.add(Duration.ofMillis(-1)); + assertAtomicDurationSum(Duration.ofMillis(-1), 1); + } + + private void assertAtomicDurationSum(Duration expectedDuration, int expectedCount) { + DurationSum durationSum = atomicDurationSum.get(); + assertEquals(expectedDuration, durationSum.duration()); + assertEquals(expectedCount, durationSum.count()); + } + + @Test + public void encoding() { + assertEquals(40, AtomicDurationSum.DURATION_BITS); + assertEquals(24, AtomicDurationSum.COUNT_BITS); + + assertEquals(0xFFFFFFFFFF000000L, AtomicDurationSum.DURATION_MASK); + assertEquals(0x0000000000FFFFFFL, AtomicDurationSum.COUNT_MASK); + + // duration is signed + assertEquals(0xFFFFFF8000000000L, AtomicDurationSum.MIN_DURATION); + assertEquals(0x0000007FFFFFFFFFL, AtomicDurationSum.MAX_DURATION); + + // count is unsigned + assertEquals(0x0000000000000000L, AtomicDurationSum.MIN_COUNT); + assertEquals(0x0000000000FFFFFFL, AtomicDurationSum.MAX_COUNT); + + assertDurationEncoding(Duration.ZERO); + assertDurationEncoding(Duration.ofMillis(1)); + assertDurationEncoding(Duration.ofMillis(-1)); + assertDurationEncoding(Duration.ofMillis(AtomicDurationSum.MIN_DURATION)); + assertDurationEncoding(Duration.ofMillis(AtomicDurationSum.MAX_DURATION)); + + assertCountEncoding(1L); + assertCountEncoding(AtomicDurationSum.MIN_COUNT); + assertCountEncoding(AtomicDurationSum.MAX_COUNT); + assertEquals(0L, AtomicDurationSum.decodeCount(AtomicDurationSum.MAX_COUNT + 1)); + } + + private void assertDurationEncoding(Duration duration) { + long encoded = AtomicDurationSum.encodeDuration(duration); + Duration decodedDuration = AtomicDurationSum.decodeDuration(encoded); + assertEquals(duration, decodedDuration); + } + + private void assertCountEncoding(long count) { + int actualCount = AtomicDurationSum.decodeCount(count); + assertEquals(count, actualCount); + } +}
\ No newline at end of file diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LatencyStoreTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LatencyStoreTest.java new file mode 100644 index 00000000000..119dccca229 --- /dev/null +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LatencyStoreTest.java @@ -0,0 +1,52 @@ +package com.yahoo.vespa.curator.stats;// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +import com.yahoo.test.ManualClock; +import org.junit.Test; + +import java.time.Duration; + +import static org.junit.Assert.assertEquals; + +/** + * @author hakon + */ +public class LatencyStoreTest { + private final ManualClock clock = new ManualClock(); + private final LatencyStore store = new LatencyStore(clock); + + @Test + public void verifyDefaultAndEmpty() { + assertGetLatencyMetrics(0, 0.000f, 0f); + assertGetAndResetLatencyMetrics(0, 0.000f, 0f); + assertGetLatencyMetrics(0, 0.000f, 0f); + } + + @Test + public void commonCase() { + store.reportLatency(Duration.ofMillis(2)); + store.reportLatency(Duration.ofMillis(6)); + clock.advance(Duration.ofMillis(2)); + assertGetLatencyMetrics(2, 0.004f, 4f); + clock.advance(Duration.ofMillis(14)); + assertGetAndResetLatencyMetrics(2, 0.004f, 0.5f); + assertGetLatencyMetrics(0, 0.000f, 0f); + } + + private void assertGetLatencyMetrics(int count, float average, float load) { + LatencyMetrics latencyMetrics = store.getLatencyMetrics(); + assertEquals(count, latencyMetrics.count()); + assertDoubleEquals(average, latencyMetrics.averageInSeconds()); + assertDoubleEquals(load, latencyMetrics.load()); + } + + private void assertGetAndResetLatencyMetrics(int count, float average, float load) { + LatencyMetrics latencyMetrics = store.getAndResetLatencyMetrics(); + assertEquals(count, latencyMetrics.count()); + assertDoubleEquals(average, latencyMetrics.averageInSeconds()); + assertDoubleEquals(load, latencyMetrics.load()); + } + + private static void assertDoubleEquals(float expected, float actual) { + assertEquals(expected, actual, 1e-6); + } +}
\ No newline at end of file diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockAttemptSamplesTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockAttemptSamplesTest.java index bc731169f96..252a90f8bb4 100644 --- a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockAttemptSamplesTest.java +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockAttemptSamplesTest.java @@ -6,7 +6,6 @@ import org.junit.Test; import java.time.Duration; import java.time.Instant; import java.util.Map; -import java.util.Optional; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; @@ -54,8 +53,8 @@ public class LockAttemptSamplesTest { } private boolean maybeSample(String lockPath, int secondsDuration) { - LockAttempt lockAttempt = LockAttempt.invokingAcquire(threadLockStats, lockPath, Duration.ofSeconds(1), - Optional.empty(), Optional.empty()); + LockAttempt lockAttempt = LockAttempt.invokingAcquire(threadLockStats, lockPath, + Duration.ofSeconds(1), new LockMetrics()); Instant instant = lockAttempt.getTimeAcquiredWasInvoked().plus(Duration.ofSeconds(secondsDuration)); lockAttempt.setTerminalState(LockAttempt.LockState.RELEASED, instant); return samples.maybeSample(lockAttempt); diff --git a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java index a1f95dc0735..a7715eb9756 100644 --- a/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java +++ b/zkfacade/src/test/java/com/yahoo/vespa/curator/stats/LockTest.java @@ -8,10 +8,10 @@ import org.junit.Test; import java.time.Duration; import java.util.List; -import java.util.Map; import java.util.Optional; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -27,7 +27,7 @@ public class LockTest { private final InterProcessLock mutex = mock(InterProcessLock.class); private final String lockPath = "/lock/path"; private final Duration acquireTimeout = Duration.ofSeconds(10); - private final Lock lock = new Lock(lockPath, mutex, Optional.empty()); + private final Lock lock = new Lock(lockPath, mutex); @Before public void setUp() { @@ -46,10 +46,12 @@ public class LockTest { assertSame(e.getCause(), exception); } - var expectedCounters = new LockCounters(); - expectedCounters.invokeAcquireCount.set(1); - expectedCounters.acquireFailedCount.set(1); - assertEquals(Map.of(lockPath, expectedCounters), LockStats.getGlobal().getLockCountersByPath()); + var expectedMetrics = new LockMetrics(); + expectedMetrics.setAcquireCount(1); + expectedMetrics.setCumulativeAcquireCount(1); + expectedMetrics.setAcquireFailedCount(1); + expectedMetrics.setCumulativeAcquireFailedCount(1); + assertLockMetrics(expectedMetrics); List<LockAttempt> slowLockAttempts = LockStats.getGlobal().getLockAttemptSamples(); assertEquals(1, slowLockAttempts.size()); @@ -67,6 +69,28 @@ public class LockTest { assertEquals(0, threadLockStats.getOngoingLockAttempts().size()); } + private void assertLockMetrics(LockMetrics expected) { + LockMetrics actual = LockStats.getGlobal().getLockMetricsByPath().get(lockPath); + assertNotNull(actual); + + assertEquals(expected.getCumulativeAcquireCount(), actual.getCumulativeAcquireCount()); + assertEquals(expected.getCumulativeAcquireFailedCount(), actual.getCumulativeAcquireFailedCount()); + assertEquals(expected.getCumulativeAcquireTimedOutCount(), actual.getCumulativeAcquireTimedOutCount()); + assertEquals(expected.getCumulativeAcquireSucceededCount(), actual.getCumulativeAcquireSucceededCount()); + assertEquals(expected.getCumulativeReleaseCount(), actual.getCumulativeReleaseCount()); + assertEquals(expected.getCumulativeReleaseFailedCount(), actual.getCumulativeReleaseFailedCount()); + + assertEquals(expected.getAndResetAcquireCount(), actual.getAndResetAcquireCount()); + assertEquals(expected.getAndResetAcquireFailedCount(), actual.getAndResetAcquireFailedCount()); + assertEquals(expected.getAndResetAcquireTimedOutCount(), actual.getAndResetAcquireTimedOutCount()); + assertEquals(expected.getAndResetAcquireSucceededCount(), actual.getAndResetAcquireSucceededCount()); + assertEquals(expected.getAndResetReleaseCount(), actual.getAndResetReleaseCount()); + assertEquals(expected.getAndResetReleaseFailedCount(), actual.getAndResetReleaseFailedCount()); + + assertEquals(expected.getAcquiringNow(), actual.getAcquiringNow()); + assertEquals(expected.getLockedNow(), actual.getLockedNow()); + } + @Test public void acquireTimesOut() throws Exception { when(mutex.acquire(anyLong(), any())).thenReturn(false); @@ -78,10 +102,12 @@ public class LockTest { assertTrue("unexpected exception: " + e.getMessage(), e.getMessage().contains("Timed out")); } - var expectedCounters = new LockCounters(); - expectedCounters.invokeAcquireCount.set(1); - expectedCounters.acquireTimedOutCount.set(1); - assertEquals(Map.of(lockPath, expectedCounters), LockStats.getGlobal().getLockCountersByPath()); + var expectedMetrics = new LockMetrics(); + expectedMetrics.setAcquireCount(1); + expectedMetrics.setCumulativeAcquireCount(1); + expectedMetrics.setAcquireTimedOutCount(1); + expectedMetrics.setCumulativeAcquireTimedOutCount(1); + assertLockMetrics(expectedMetrics); } @Test @@ -90,29 +116,39 @@ public class LockTest { lock.acquire(acquireTimeout); - var expectedCounters = new LockCounters(); - expectedCounters.invokeAcquireCount.set(1); - expectedCounters.lockAcquiredCount.set(1); - expectedCounters.inCriticalRegionCount.set(1); - assertEquals(Map.of(lockPath, expectedCounters), LockStats.getGlobal().getLockCountersByPath()); + var expectedMetrics = new LockMetrics(); + expectedMetrics.setAcquireCount(1); + expectedMetrics.setCumulativeAcquireCount(1); + expectedMetrics.setAcquireSucceededCount(1); + expectedMetrics.setCumulativeAcquireSucceededCount(1); + expectedMetrics.setLockedNow(1); + assertLockMetrics(expectedMetrics); // reenter + // NB: non-cumulative counters are reset on fetch lock.acquire(acquireTimeout); - expectedCounters.invokeAcquireCount.set(2); - expectedCounters.lockAcquiredCount.set(2); - expectedCounters.inCriticalRegionCount.set(2); + expectedMetrics.setAcquireCount(1); // reset to 0 above, + 1 + expectedMetrics.setCumulativeAcquireCount(2); + expectedMetrics.setAcquireSucceededCount(1); // reset to 0 above, +1 + expectedMetrics.setCumulativeAcquireSucceededCount(2); + expectedMetrics.setLockedNow(2); + assertLockMetrics(expectedMetrics); // inner-most closes lock.close(); - expectedCounters.inCriticalRegionCount.set(1); - expectedCounters.locksReleasedCount.set(1); - assertEquals(Map.of(lockPath, expectedCounters), LockStats.getGlobal().getLockCountersByPath()); + expectedMetrics.setAcquireCount(0); // reset to 0 above + expectedMetrics.setAcquireSucceededCount(0); // reset to 0 above + expectedMetrics.setReleaseCount(1); + expectedMetrics.setCumulativeReleaseCount(1); + expectedMetrics.setLockedNow(1); + assertLockMetrics(expectedMetrics); // outer-most closes lock.close(); - expectedCounters.inCriticalRegionCount.set(0); - expectedCounters.locksReleasedCount.set(2); - assertEquals(Map.of(lockPath, expectedCounters), LockStats.getGlobal().getLockCountersByPath()); + expectedMetrics.setReleaseCount(1); // reset to 0 above, +1 + expectedMetrics.setCumulativeReleaseCount(2); + expectedMetrics.setLockedNow(0); + assertLockMetrics(expectedMetrics); } @Test @@ -120,7 +156,7 @@ public class LockTest { when(mutex.acquire(anyLong(), any())).thenReturn(true); String lockPath2 = "/lock/path/2"; - Lock lock2 = new Lock(lockPath2, mutex, Optional.empty()); + Lock lock2 = new Lock(lockPath2, mutex); lock.acquire(acquireTimeout); lock2.acquire(acquireTimeout); |