diff options
author | Bjørn Christian Seime <bjorncs@yahoo-inc.com> | 2017-06-07 16:13:40 +0200 |
---|---|---|
committer | Bjørn Christian Seime <bjorncs@yahoo-inc.com> | 2017-06-08 12:50:07 +0200 |
commit | eab973deb3dddf18fd3b806e04313a469c30a77d (patch) | |
tree | 8eb389d52e598135583b4171b34f228d79f66946 /jdisc_core/src/main/java/com/yahoo/jdisc/core | |
parent | 025eae59e89a473dbff75b435df3dc4710b8a789 (diff) |
Use a single thread to monitor all deactivated ActiveContainers
Add new watchdog class to combine monitoring and reporting of stale
ActiveContainer instances. Introduce a grace period to allow deactivated
containers some time to die.
Diffstat (limited to 'jdisc_core/src/main/java/com/yahoo/jdisc/core')
3 files changed, 164 insertions, 42 deletions
diff --git a/jdisc_core/src/main/java/com/yahoo/jdisc/core/ActiveContainerDeactivationWatchdog.java b/jdisc_core/src/main/java/com/yahoo/jdisc/core/ActiveContainerDeactivationWatchdog.java new file mode 100644 index 00000000000..7902a89b88e --- /dev/null +++ b/jdisc_core/src/main/java/com/yahoo/jdisc/core/ActiveContainerDeactivationWatchdog.java @@ -0,0 +1,154 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.jdisc.core; + +import com.google.inject.Inject; +import com.yahoo.jdisc.Metric; +import com.yahoo.jdisc.statistics.ActiveContainerMetrics; + +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.util.List; +import java.util.WeakHashMap; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static java.util.Comparator.comparing; +import static java.util.stream.Collectors.toList; + +/** + * A watchdog that monitors all deactivated {@link ActiveContainer} instances with the purpose of detecting containers + * that are unable to be garbage collected by the JVM. + * + * @author bjorncs + */ +class ActiveContainerDeactivationWatchdog implements ActiveContainerMetrics, AutoCloseable { + static final Duration WATCHDOG_FREQUENCY = Duration.ofMinutes(20); + static final Duration ACTIVE_CONTAINER_GRACE_PERIOD = Duration.ofHours(1); + + private static final Logger log = Logger.getLogger(ActiveContainerDeactivationWatchdog.class.getName()); + + private final Object monitor = new Object(); + private final WeakHashMap<ActiveContainer, LifecycleStats> deactivatedContainers = new WeakHashMap<>(); + private final ScheduledExecutorService scheduler; + private final Clock clock; + + private ActiveContainer currentContainer; + private Instant currentContainerActivationTime; + + @Inject + ActiveContainerDeactivationWatchdog() { + this( + Clock.systemUTC(), + new ScheduledThreadPoolExecutor(1, runnable -> { + Thread thread = new Thread(runnable, "active-container-deactivation-watchdog"); + thread.setDaemon(true); + return thread; + })); + } + + ActiveContainerDeactivationWatchdog(Clock clock, ScheduledExecutorService scheduler) { + this.clock = clock; + this.scheduler = scheduler; + this.scheduler.scheduleWithFixedDelay( + this::warnOnStaleContainers, + WATCHDOG_FREQUENCY.getSeconds(), + WATCHDOG_FREQUENCY.getSeconds(), + TimeUnit.SECONDS); + } + + void onContainerActivation(ActiveContainer nextContainer) { + synchronized (monitor) { + Instant now = clock.instant(); + if (currentContainer != null) { + deactivatedContainers.put(currentContainer, new LifecycleStats(currentContainerActivationTime, now)); + } + currentContainer = nextContainer; + currentContainerActivationTime = now; + } + } + + @Override + public void emitMetrics(Metric metric) { + List<DeactivatedContainer> snapshot = getDeactivatedContainersSnapshot(); + long containersWithRetainedRefsCount = snapshot.stream() + .filter(c -> c.activeContainer.retainCount() > 0) + .count(); + metric.set(TOTAL_DEACTIVATED_CONTAINERS, snapshot.size(), null); + metric.set(DEACTIVATED_CONTAINERS_WITH_RETAINED_REFERENCES, containersWithRetainedRefsCount, null); + } + + @Override + public void close() { + synchronized (monitor) { + scheduler.shutdown(); + deactivatedContainers.clear(); + currentContainer = null; + currentContainerActivationTime = null; + } + } + + private void warnOnStaleContainers() { + try { + List<DeactivatedContainer> snapshot = getDeactivatedContainersSnapshot(); + if (snapshot.isEmpty()) return; + logWarning(snapshot); + } catch (Throwable t) { + log.log(Level.WARNING, "Watchdog task died!", t); + } + } + + private List<DeactivatedContainer> getDeactivatedContainersSnapshot() { + Instant now = clock.instant(); + synchronized (monitor) { + return deactivatedContainers.entrySet().stream() + .filter(e -> e.getValue().isPastGracePeriod(now)) + .map(e -> new DeactivatedContainer(e.getKey(), e.getValue())) + .sorted(comparing(e -> e.lifecycleStats.timeActivated)) + .collect(toList()); + } + } + + private static void logWarning(List<DeactivatedContainer> snapshot) { + log.warning(String.format("%s instances of deactivated containers are still alive.", snapshot.size())); + for (DeactivatedContainer deactivatedContainer : snapshot) { + log.warning(" - " + deactivatedContainer.toSummaryString()); + } + } + + private static class LifecycleStats { + public final Instant timeActivated; + public final Instant timeDeactivated; + + public LifecycleStats(Instant timeActivated, Instant timeDeactivated) { + this.timeActivated = timeActivated; + this.timeDeactivated = timeDeactivated; + } + + public boolean isPastGracePeriod(Instant instant) { + return timeDeactivated.plus(ACTIVE_CONTAINER_GRACE_PERIOD).isBefore(instant); + } + } + + private static class DeactivatedContainer { + public final ActiveContainer activeContainer; + public final LifecycleStats lifecycleStats; + + public DeactivatedContainer(ActiveContainer activeContainer, LifecycleStats lifecycleStats) { + this.activeContainer = activeContainer; + this.lifecycleStats = lifecycleStats; + } + + public String toSummaryString() { + return String.format("%s: time activated = %s, time deactivated = %s, reference count = %d", + activeContainer.toString(), + lifecycleStats.timeActivated.toString(), + lifecycleStats.timeDeactivated.toString(), + activeContainer.retainCount()); + } + } + +} diff --git a/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationEnvironmentModule.java b/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationEnvironmentModule.java index c6acde814eb..956b2483ee0 100644 --- a/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationEnvironmentModule.java +++ b/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationEnvironmentModule.java @@ -8,7 +8,7 @@ import com.yahoo.jdisc.application.ContainerBuilder; import com.yahoo.jdisc.application.ContainerThread; import com.yahoo.jdisc.application.OsgiFramework; import com.yahoo.jdisc.service.CurrentContainer; -import com.yahoo.jdisc.statistics.ActiveContainerStatistics; +import com.yahoo.jdisc.statistics.ActiveContainerMetrics; import java.util.concurrent.ThreadFactory; @@ -29,7 +29,7 @@ class ApplicationEnvironmentModule extends AbstractModule { bind(CurrentContainer.class).toInstance(loader); bind(OsgiFramework.class).toInstance(loader.osgiFramework()); bind(ThreadFactory.class).to(ContainerThread.Factory.class); - bind(ActiveContainerStatistics.class).toInstance(loader.getActiveContainerStatistics()); + bind(ActiveContainerMetrics.class).toInstance(loader.getActiveContainerMetrics()); } @Provides diff --git a/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationLoader.java b/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationLoader.java index 4e63bc77c9a..8ddc62a6e19 100644 --- a/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationLoader.java +++ b/jdisc_core/src/main/java/com/yahoo/jdisc/core/ApplicationLoader.java @@ -15,23 +15,22 @@ import com.yahoo.jdisc.application.OsgiFramework; import com.yahoo.jdisc.application.OsgiHeader; import com.yahoo.jdisc.service.ContainerNotReadyException; import com.yahoo.jdisc.service.CurrentContainer; -import com.yahoo.jdisc.statistics.ActiveContainerStatistics; +import com.yahoo.jdisc.statistics.ActiveContainerMetrics; import org.osgi.framework.Bundle; import org.osgi.framework.BundleContext; import org.osgi.framework.BundleException; -import java.lang.ref.WeakReference; import java.net.URI; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.logging.Level; import java.util.logging.Logger; /** * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + * @author bjorncs */ public class ApplicationLoader implements BootstrapLoader, ContainerActivator, CurrentContainer { @@ -42,7 +41,7 @@ public class ApplicationLoader implements BootstrapLoader, ContainerActivator, C private final AtomicReference<ActiveContainer> containerRef = new AtomicReference<>(); private final Object appLock = new Object(); private final List<Bundle> appBundles = new ArrayList<>(); - private final ActiveContainerStatistics statistics = new ActiveContainerStatistics(); + private final ActiveContainerDeactivationWatchdog watchdog = new ActiveContainerDeactivationWatchdog(); private Application application; private ApplicationInUseTracker applicationInUseTracker; @@ -71,46 +70,14 @@ public class ApplicationLoader implements BootstrapLoader, ContainerActivator, C next.retainReference(applicationInUseTracker); } + watchdog.onContainerActivation(next); prev = containerRef.getAndSet(next); - statistics.onActivated(next); if (prev == null) { return null; } - statistics.onDeactivated(prev); } prev.release(); - DeactivatedContainer deactivatedContainer = prev.shutdown(); - - final WeakReference<ActiveContainer> prevContainerReference = new WeakReference<>(prev); - final Runnable deactivationMonitor = () -> { - long waitTimeSeconds = 30L; - long totalTimeWaited = 0L; - - while (!Thread.interrupted()) { - final long currentWaitTimeSeconds = waitTimeSeconds; - totalTimeWaited += currentWaitTimeSeconds; - - Interruption.mapExceptionToThreadState(() -> - Thread.sleep(TimeUnit.MILLISECONDS.convert(currentWaitTimeSeconds, TimeUnit.SECONDS)) - ); - - statistics.printSummaryToLog(); - final ActiveContainer prevContainer = prevContainerReference.get(); - if (prevContainer == null || prevContainer.retainCount() == 0) { - return; - } - log.warning("Previous container not terminated in the last " + totalTimeWaited + " seconds." - + " Reference state={ " + prevContainer.currentState() + " }"); - - waitTimeSeconds = (long) (waitTimeSeconds * 1.2); - } - log.warning("Deactivation monitor thread unexpectedly interrupted"); - }; - final Thread deactivationMonitorThread = new Thread(deactivationMonitor, "Container deactivation monitor"); - deactivationMonitorThread.setDaemon(true); - deactivationMonitorThread.start(); - - return deactivatedContainer; + return prev.shutdown(); } @Override @@ -228,6 +195,7 @@ public class ApplicationLoader implements BootstrapLoader, ContainerActivator, C @Override public void destroy() { log.finer("Destroying application loader."); + watchdog.close(); try { osgiFramework.stop(); } catch (BundleException e) { @@ -241,8 +209,8 @@ public class ApplicationLoader implements BootstrapLoader, ContainerActivator, C } } - public ActiveContainerStatistics getActiveContainerStatistics() { - return statistics; + public ActiveContainerMetrics getActiveContainerMetrics() { + return watchdog; } public OsgiFramework osgiFramework() { |