aboutsummaryrefslogtreecommitdiffstats
path: root/jdisc_core/src/main/java/com/yahoo/jdisc/core/ActiveContainerDeactivationWatchdog.java
blob: a6b4ef03c614a8018a14dde1c496d615a6cd1c61 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.jdisc.core;

import com.google.inject.Inject;
import com.yahoo.jdisc.Metric;
import com.yahoo.jdisc.statistics.ActiveContainerMetrics;

import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
import java.util.WeakHashMap;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;

import static java.util.Comparator.comparing;
import static java.util.stream.Collectors.toList;

/**
 * A watchdog that monitors all deactivated {@link ActiveContainer} instances with the purpose of detecting containers
 * that are unable to be garbage collected by the JVM.
 *
 * @author bjorncs
 */
class ActiveContainerDeactivationWatchdog implements ActiveContainerMetrics, AutoCloseable {
    static final Duration WATCHDOG_FREQUENCY = Duration.ofMinutes(20);
    static final Duration ACTIVE_CONTAINER_GRACE_PERIOD = Duration.ofHours(1);
    static final Duration GC_TRIGGER_FREQUENCY = ACTIVE_CONTAINER_GRACE_PERIOD.minusMinutes(5);

    private static final Logger log = Logger.getLogger(ActiveContainerDeactivationWatchdog.class.getName());

    private final Object monitor = new Object();
    private final WeakHashMap<ActiveContainer, LifecycleStats> deactivatedContainers = new WeakHashMap<>();
    private final ScheduledExecutorService scheduler;
    private final Clock clock;

    private ActiveContainer currentContainer;
    private Instant currentContainerActivationTime;

    @Inject
    ActiveContainerDeactivationWatchdog() {
        this(
                Clock.systemUTC(),
                new ScheduledThreadPoolExecutor(2, runnable -> {
                    Thread thread = new Thread(runnable, "active-container-deactivation-watchdog");
                    thread.setDaemon(true);
                    return thread;
                }));
    }

    ActiveContainerDeactivationWatchdog(Clock clock, ScheduledExecutorService scheduler) {
        this.clock = clock;
        this.scheduler = scheduler;
        this.scheduler.scheduleAtFixedRate(
                this::warnOnStaleContainers,
                WATCHDOG_FREQUENCY.getSeconds(),
                WATCHDOG_FREQUENCY.getSeconds(),
                TimeUnit.SECONDS);
        this.scheduler.scheduleAtFixedRate(
                ActiveContainerDeactivationWatchdog::triggerGc,
                GC_TRIGGER_FREQUENCY.getSeconds(),
                GC_TRIGGER_FREQUENCY.getSeconds(),
                TimeUnit.SECONDS);
    }

    void onContainerActivation(ActiveContainer nextContainer) {
        synchronized (monitor) {
            Instant now = clock.instant();
            if (currentContainer != null) {
                deactivatedContainers.put(currentContainer, new LifecycleStats(currentContainerActivationTime, now));
            }
            currentContainer = nextContainer;
            currentContainerActivationTime = now;
        }
    }

    @Override
    public void emitMetrics(Metric metric) {
        List<DeactivatedContainer> snapshot = getDeactivatedContainersSnapshot();
        long containersWithRetainedRefsCount = snapshot.stream()
                .filter(c -> c.activeContainer.retainCount() > 0)
                .count();
        metric.set(TOTAL_DEACTIVATED_CONTAINERS, snapshot.size(), null);
        metric.set(DEACTIVATED_CONTAINERS_WITH_RETAINED_REFERENCES, containersWithRetainedRefsCount, null);
    }

    @Override
    public void close() {
        synchronized (monitor) {
            scheduler.shutdown();
            deactivatedContainers.clear();
            currentContainer = null;
            currentContainerActivationTime = null;
        }
    }

    private void warnOnStaleContainers() {
        try {
            List<DeactivatedContainer> snapshot = getDeactivatedContainersSnapshot();
            if (snapshot.isEmpty()) return;
            logWarning(snapshot);
        } catch (Throwable t) {
            log.log(Level.WARNING, "Watchdog task died!", t);
        }
    }

    private static void triggerGc() {
        // ActiveContainer has a finalizer, so gc -> finalizer -> gc is required.
        System.gc();
        System.runFinalization();
        System.gc();
    }

    private List<DeactivatedContainer> getDeactivatedContainersSnapshot() {
        Instant now = clock.instant();
        synchronized (monitor) {
            return deactivatedContainers.entrySet().stream()
                    .filter(e -> e.getValue().isPastGracePeriod(now))
                    .map(e -> new DeactivatedContainer(e.getKey(), e.getValue()))
                    .sorted(comparing(e -> e.lifecycleStats.timeActivated))
                    .collect(toList());
        }
    }

    private static void logWarning(List<DeactivatedContainer> snapshot) {
        log.warning(String.format("%s instances of deactivated containers are still alive.", snapshot.size()));
        for (DeactivatedContainer deactivatedContainer : snapshot) {
            log.warning(" - " + deactivatedContainer.toSummaryString());
        }
    }

    private static class LifecycleStats {
        public final Instant timeActivated;
        public final Instant timeDeactivated;

        public LifecycleStats(Instant timeActivated, Instant timeDeactivated) {
            this.timeActivated = timeActivated;
            this.timeDeactivated = timeDeactivated;
        }

        public boolean isPastGracePeriod(Instant instant) {
            return timeDeactivated.plus(ACTIVE_CONTAINER_GRACE_PERIOD).isBefore(instant);
        }
    }

    private static class DeactivatedContainer {
        public final ActiveContainer activeContainer;
        public final LifecycleStats lifecycleStats;

        public DeactivatedContainer(ActiveContainer activeContainer, LifecycleStats lifecycleStats) {
            this.activeContainer = activeContainer;
            this.lifecycleStats = lifecycleStats;
        }

        public String toSummaryString() {
            return String.format("%s: time activated = %s, time deactivated = %s, reference count = %d",
                    activeContainer.toString(),
                    lifecycleStats.timeActivated.toString(),
                    lifecycleStats.timeDeactivated.toString(),
                    activeContainer.retainCount());
        }
    }

}