diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-07-31 17:17:26 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-31 17:17:26 +0200 |
commit | e56abfa611b2b30db98df393bd6ce62207e8fc25 (patch) | |
tree | 712c16589958f10a35d792ac826d1b8e10f879d3 | |
parent | 372f6d7371d42ef23b543d4349cfeabf25ac400b (diff) | |
parent | 93da61f4951b908409fb0dc76facf2c26ca9958a (diff) |
Merge pull request #23540 from vespa-engine/bjorncs/stale-threads-watchdog
Bjorncs/stale threads watchdog
5 files changed, 122 insertions, 5 deletions
diff --git a/container-disc/src/main/java/com/yahoo/container/jdisc/metric/MetricUpdater.java b/container-disc/src/main/java/com/yahoo/container/jdisc/metric/MetricUpdater.java index 4b75bc233d6..3f7990288f5 100644 --- a/container-disc/src/main/java/com/yahoo/container/jdisc/metric/MetricUpdater.java +++ b/container-disc/src/main/java/com/yahoo/container/jdisc/metric/MetricUpdater.java @@ -9,6 +9,7 @@ import com.yahoo.nativec.NativeHeap; import java.lang.management.BufferPoolMXBean; import java.lang.management.ManagementFactory; +import java.lang.management.ThreadMXBean; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; @@ -39,6 +40,7 @@ public class MetricUpdater extends AbstractComponent { private static final String DIRECT_COUNT = "mem.direct.count"; private static final String MEMORY_MAPPINGS_COUNT = "jdisc.memory_mappings"; private static final String OPEN_FILE_DESCRIPTORS = "jdisc.open_file_descriptors"; + private static final String TOTAL_THREADS = "jdisc.threads.total"; private final Scheduler scheduler; @@ -99,6 +101,7 @@ public class MetricUpdater extends AbstractComponent { private final ContainerWatchdogMetrics containerWatchdogMetrics; private final GarbageCollectionMetrics garbageCollectionMetrics; private final JrtMetrics jrtMetrics; + private final ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean(); public UpdaterTask(Metric metric, ContainerWatchdogMetrics containerWatchdogMetrics) { this.metric = metric; @@ -148,6 +151,7 @@ public class MetricUpdater extends AbstractComponent { metric.set(HEAP_TOTAL_MEMORY_BYTES, totalMemory, null); metric.set(MEMORY_MAPPINGS_COUNT, count_mappings(), null); metric.set(OPEN_FILE_DESCRIPTORS, count_open_files(), null); + metric.set(TOTAL_THREADS, threadMXBean.getThreadCount(), null); directMemoryUsed(); nativeHeapUsed(); diff --git a/container-disc/src/test/java/com/yahoo/container/jdisc/metric/MetricUpdaterTest.java b/container-disc/src/test/java/com/yahoo/container/jdisc/metric/MetricUpdaterTest.java index 4f9e8dd2462..bc196b08ab9 100644 --- a/container-disc/src/test/java/com/yahoo/container/jdisc/metric/MetricUpdaterTest.java +++ b/container-disc/src/test/java/com/yahoo/container/jdisc/metric/MetricUpdaterTest.java @@ -27,7 +27,7 @@ public class MetricUpdaterTest { ContainerWatchdogMetrics containerWatchdogMetrics = mock(ContainerWatchdogMetrics.class); new MetricUpdater(new MockScheduler(), metric, containerWatchdogMetrics); verify(containerWatchdogMetrics, times(1)).emitMetrics(any()); - verify(metric, times(13 + 2 * gcCount)).set(anyString(), any(), any()); + verify(metric, times(14 + 2 * gcCount)).set(anyString(), any(), any()); } private static class MockScheduler implements MetricUpdater.Scheduler { diff --git a/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationLoader.java b/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationLoader.java index dc6ee180321..cdde43bb3c2 100644 --- a/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationLoader.java +++ b/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationLoader.java @@ -127,6 +127,7 @@ public class ApplicationLoader implements BootstrapLoader, ContainerActivator, C @Override public void start() throws Exception { log.finer("Initializing application."); + watchdog.start(); Injector injector = guiceModules.activate(); Application app; if (!appBundles.isEmpty()) { diff --git a/jdisc_core/src/main/java/com/yahoo/jdisc/core/ContainerWatchdog.java b/jdisc_core/src/main/java/com/yahoo/jdisc/core/ContainerWatchdog.java index c010eac02b5..f28d5ea2b26 100644 --- a/jdisc_core/src/main/java/com/yahoo/jdisc/core/ContainerWatchdog.java +++ b/jdisc_core/src/main/java/com/yahoo/jdisc/core/ContainerWatchdog.java @@ -1,18 +1,29 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.jdisc.core; +import com.yahoo.concurrent.Threads; import com.yahoo.jdisc.Metric; import com.yahoo.jdisc.statistics.ContainerWatchdogMetrics; +import com.yahoo.lang.MutableInteger; +import org.apache.felix.framework.BundleWiringImpl; +import org.osgi.framework.Bundle; +import java.lang.reflect.Field; import java.time.Clock; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Optional; import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.logging.Level; import java.util.logging.Logger; /** @@ -24,7 +35,7 @@ class ContainerWatchdog implements ContainerWatchdogMetrics, AutoCloseable { static final Duration GRACE_PERIOD = Duration.ofMinutes(5); static final Duration UPDATE_PERIOD = Duration.ofMinutes(5); - static final Duration CHECK_PERIOD = Duration.ofSeconds(1); + static final Duration CONTAINER_CHECK_PERIOD = Duration.ofSeconds(1); private static final Logger log = Logger.getLogger(ContainerWatchdog.class.getName()); @@ -37,6 +48,9 @@ class ContainerWatchdog implements ContainerWatchdogMetrics, AutoCloseable { private Instant currentContainerActivationTime; private int numStaleContainers; private Instant lastLogTime; + private ScheduledFuture<?> threadMonitoringTask; + private ScheduledFuture<?> containerMontoringTask; + ContainerWatchdog() { this(new ScheduledThreadPoolExecutor( @@ -53,7 +67,11 @@ class ContainerWatchdog implements ContainerWatchdogMetrics, AutoCloseable { this.scheduler = scheduler; this.clock = clock; this.lastLogTime = clock.instant(); - scheduler.scheduleAtFixedRate(this::monitorDeactivatedContainers, CHECK_PERIOD.getSeconds(), CHECK_PERIOD.getSeconds(), TimeUnit.SECONDS); + } + + void start() { + this.containerMontoringTask = scheduler.scheduleAtFixedRate(this::monitorDeactivatedContainers, + CONTAINER_CHECK_PERIOD.getSeconds(), CONTAINER_CHECK_PERIOD.getSeconds(), TimeUnit.SECONDS); } @Override @@ -67,8 +85,8 @@ class ContainerWatchdog implements ContainerWatchdogMetrics, AutoCloseable { @Override public void close() throws InterruptedException { - scheduler.shutdownNow(); - scheduler.awaitTermination(1, TimeUnit.MINUTES); + if (containerMontoringTask != null) containerMontoringTask.cancel(false); + if (threadMonitoringTask != null) threadMonitoringTask.cancel(false); synchronized (monitor) { deactivatedContainers.clear(); currentContainer = null; @@ -83,6 +101,8 @@ class ContainerWatchdog implements ContainerWatchdogMetrics, AutoCloseable { } currentContainer = nextContainer; currentContainerActivationTime = clock.instant(); + if (threadMonitoringTask != null) threadMonitoringTask.cancel(false); + threadMonitoringTask = scheduler.schedule(this::monitorThreads, 1, TimeUnit.MINUTES); } } @@ -126,6 +146,68 @@ class ContainerWatchdog implements ContainerWatchdogMetrics, AutoCloseable { return clock.instant().isAfter(container.timeDeactivated.plus(GRACE_PERIOD)); } + private void monitorThreads() { + Collection<Thread> threads = Threads.getAllThreads(); + warnOnThreadsHavingClassloaderFromUninstalledBundles(threads); + } + + /** + * Detect and warn on threads leaked by defunct application components. + * These are threads launched by components of application bundles and not stopped after subsequent component deconstruction. + * The below algorithm is a heuristic and will not detect all leaked threads. + */ + private void warnOnThreadsHavingClassloaderFromUninstalledBundles(Collection<Thread> threads) { + record ThreadDetails(Thread thread, Bundle bundle) {} + List<ThreadDetails> staleThreads = new ArrayList<>(); + for (Thread t : threads) { + // Find threads which are sub-classes of java.lang.Thread from an uninstalled bundle + Bundle b = hasClassloaderForUninstalledBundle(t).orElse(null); + if (b != null) { + staleThreads.add(new ThreadDetails(t, b)); + continue; + } + // Find threads which Runnable is a class from an uninstalled bundle + // This may create false positives + b = getTargetFieldOfThread(t).flatMap(ContainerWatchdog::hasClassloaderForUninstalledBundle).orElse(null); + if (b != null) { + staleThreads.add(new ThreadDetails(t, b)); + } + // Note: no reliable mechanism for detecting threads owned by leaked executors (e.g. ScheduledThreadPoolExecutor) + } + if (!staleThreads.isEmpty()) { + StringBuilder msg = new StringBuilder( + ("Found %d stale threads that should have been stopped during previous reconfiguration(s). " + + "These threads have a classloader for a bundle that has been uninstalled: \n") + .formatted(staleThreads.size())); + MutableInteger i = new MutableInteger(1); + Comparator<ThreadDetails> outputOrdering = + Comparator.<ThreadDetails, Long>comparing(td -> td.bundle().getBundleId()) + .thenComparing(td -> td.thread().getName()).thenComparing(td -> td.thread().getId()); + staleThreads.stream().sorted(outputOrdering).forEach(t -> + msg.append("%d) Thread '%s' using bundle '%s'. \n" + .formatted(i.next(), t.thread().getName(), t.bundle().toString()))); + log.log(Level.INFO, msg::toString); // Level 'info' until deemed reliable enough as 'warning' + } + } + + private static Optional<Runnable> getTargetFieldOfThread(Thread t) { + try { + Field f = Thread.class.getDeclaredField("target"); + f.setAccessible(true); + return Optional.ofNullable((Runnable)f.get(t)); + } catch (ReflectiveOperationException e) { + return Optional.empty(); + } + } + + private static Optional<Bundle> hasClassloaderForUninstalledBundle(Object o) { + if (o.getClass().getClassLoader() instanceof BundleWiringImpl.BundleClassLoader cl) { + Bundle b = cl.getBundle(); + if (b.getState() == Bundle.UNINSTALLED) return Optional.of(b); + } + return Optional.empty(); + } + private static class DeactivatedContainer { final ActiveContainer instance; final Instant timeActivated; diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/Threads.java b/vespajlib/src/main/java/com/yahoo/concurrent/Threads.java new file mode 100644 index 00000000000..d30750692e9 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/concurrent/Threads.java @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.concurrent; + +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +/** + * @author bjorncs + */ +public class Threads { + + private Threads() {} + + /** Returns all threads in JVM */ + public static Collection<Thread> getAllThreads() { + ThreadGroup root = Thread.currentThread().getThreadGroup(); + ThreadGroup parent; + while ((parent = root.getParent()) != null) { + root = parent; + } + // The number of threads may increase between activeCount() and enumerate() + Thread[] threads = new Thread[root.activeCount() + 100]; + int count; + while ((count = root.enumerate(threads, true)) == threads.length) { + threads = new Thread[threads.length + 1000]; + } + return List.of(Arrays.copyOf(threads, count)); + } +} |