diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /container-core/src/main/java/com/yahoo/container/protect |
Publish
Diffstat (limited to 'container-core/src/main/java/com/yahoo/container/protect')
7 files changed, 373 insertions, 0 deletions
diff --git a/container-core/src/main/java/com/yahoo/container/protect/Error.java b/container-core/src/main/java/com/yahoo/container/protect/Error.java new file mode 100644 index 00000000000..08ef8a0393d --- /dev/null +++ b/container-core/src/main/java/com/yahoo/container/protect/Error.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.container.protect; + +/** + * Error codes to use in ErrorMessage instances for container applications. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public enum Error { + + NO_BACKENDS_IN_SERVICE(0), + NULL_QUERY(1), + REQUEST_TOO_LARGE(2), + ILLEGAL_QUERY(3), + INVALID_QUERY_PARAMETER(4), + UNSPECIFIED(5), + ERROR_IN_PLUGIN(6), + INVALID_QUERY_TRANSFORMATION(7), + RESULT_HAS_ERRORS(8), + SERVER_IS_MISCONFIGURED(9), + BACKEND_COMMUNICATION_ERROR(10), + NO_ANSWER_WHEN_PINGING_NODE(11), + TIMEOUT(12), + EMPTY_DOCUMENTS(13), + UNAUTHORIZED(14), + FORBIDDEN(15), + NOT_FOUND(16), + BAD_REQUEST(17), + INTERNAL_SERVER_ERROR(18); + + public final int code; + + Error(int code) { + this.code = code; + } + +} diff --git a/container-core/src/main/java/com/yahoo/container/protect/FreezeDetector.java b/container-core/src/main/java/com/yahoo/container/protect/FreezeDetector.java new file mode 100644 index 00000000000..97b8304babc --- /dev/null +++ b/container-core/src/main/java/com/yahoo/container/protect/FreezeDetector.java @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.container.protect; + +import java.util.Timer; + +import com.yahoo.component.AbstractComponent; +import com.yahoo.concurrent.ThreadLocalDirectory; +import com.yahoo.container.core.DiagnosticsConfig; + +/** + * Runs and initializes a {@link Watchdog} instance. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @deprecated this is not in use and will be removed in the next major release + */ +@Deprecated +public class FreezeDetector extends AbstractComponent { + + private final Timer timeoutWatchdog; + private final Watchdog watchdog; + + public FreezeDetector(DiagnosticsConfig diagnosticsConfig) { + if (diagnosticsConfig.disabled()) { + timeoutWatchdog = null; + watchdog = null; + } else { + timeoutWatchdog = new Timer("TimeoutWatchdog", true); + watchdog = new Watchdog(diagnosticsConfig.timeoutfraction(), + diagnosticsConfig.minimumqps(), + diagnosticsConfig.shutdown()); + timeoutWatchdog.schedule(watchdog, 10L * 1000L, 100L); + } + } + + public void register(ThreadLocalDirectory<TimeoutRate, Boolean> timeouts) { + if (watchdog == null) { + return; + } + watchdog.addTimeouts(timeouts); + } + + public boolean isBreakdown() { + if (watchdog == null) { + return false; + } + return watchdog.isBreakdown(); + } + + public void unRegister(ThreadLocalDirectory<TimeoutRate, Boolean> timeouts) { + if (watchdog == null) { + return; + } + watchdog.removeTimeouts(timeouts); + } + + @Override + public void deconstruct() { + super.deconstruct(); + if (timeoutWatchdog != null) { + timeoutWatchdog.cancel(); + } + } + +} diff --git a/container-core/src/main/java/com/yahoo/container/protect/ProcessTerminator.java b/container-core/src/main/java/com/yahoo/container/protect/ProcessTerminator.java new file mode 100644 index 00000000000..7ff077f9f30 --- /dev/null +++ b/container-core/src/main/java/com/yahoo/container/protect/ProcessTerminator.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.container.protect; + +import com.yahoo.protect.Process; + +/** + * An injectable terminator of the Java vm. + * Components that encounters conditions where the vm should be terminator should + * request an instance of this injected. That makes termination testable + * as tests can create subclasses of this which register the termination request + * rather than terminating. + * + * @author bratseth + */ +public class ProcessTerminator { + + /** Logs and dies without taking a thread dump */ + public void logAndDie(String message) { + logAndDie(message, false); + } + + /** + * Logs and dies + * + * @param dumpThreads if true the stack trace of all threads is dumped to the + * log with level info before shutting down + */ + public void logAndDie(String message, boolean dumpThreads) { + Process.logAndDie(message, dumpThreads); + } + +} diff --git a/container-core/src/main/java/com/yahoo/container/protect/TimeoutCollector.java b/container-core/src/main/java/com/yahoo/container/protect/TimeoutCollector.java new file mode 100644 index 00000000000..ee2f6419423 --- /dev/null +++ b/container-core/src/main/java/com/yahoo/container/protect/TimeoutCollector.java @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.container.protect; + +import com.yahoo.concurrent.ThreadLocalDirectory.Updater; + +/** + * Allocator and glue for sampling timeouts in SearchHandler. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @deprecated this is not in use and will be removed in the next major release + */ +@Deprecated +public final class TimeoutCollector implements Updater<TimeoutRate, Boolean> { + + @Override + public TimeoutRate createGenerationInstance(TimeoutRate previous) { + return new TimeoutRate(); + } + + @Override + public TimeoutRate update(TimeoutRate current, Boolean x) { + current.addQuery(x); + return current; + } + +} diff --git a/container-core/src/main/java/com/yahoo/container/protect/TimeoutRate.java b/container-core/src/main/java/com/yahoo/container/protect/TimeoutRate.java new file mode 100644 index 00000000000..79e52b49183 --- /dev/null +++ b/container-core/src/main/java/com/yahoo/container/protect/TimeoutRate.java @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.container.protect; + +/** + * Helper class to account for measuring how many queries times outs. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @deprecated this is not in use and will be removed in the next major release + */ +@Deprecated +public final class TimeoutRate { + + private int timeouts = 0; + private int total = 0; + + public void addQuery(Boolean timeout) { + if (timeout) { + timeouts += 1; + } + total += 1; + } + + public void merge(TimeoutRate other) { + timeouts += other.timeouts; + total += other.total; + } + + public double timeoutFraction() { + if (total == 0) { + return 0.0d; + } else { + return ((double) timeouts) / ((double) total); + } + } + + public int getTotal() { + return total; + } + +} diff --git a/container-core/src/main/java/com/yahoo/container/protect/Watchdog.java b/container-core/src/main/java/com/yahoo/container/protect/Watchdog.java new file mode 100644 index 00000000000..b86da523a0a --- /dev/null +++ b/container-core/src/main/java/com/yahoo/container/protect/Watchdog.java @@ -0,0 +1,167 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.container.protect; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TimerTask; +import java.util.logging.Level; +import java.util.logging.Logger; + +import com.yahoo.concurrent.ThreadLocalDirectory; +import com.yahoo.log.LogLevel; +import com.yahoo.protect.Process; + +/** + * Watchdog for a frozen process, too many timeouts, etc. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @deprecated this is not in use and will be removed in the next major release + */ +@Deprecated +class Watchdog extends TimerTask { + + public static final String FREEZEDETECTOR_DISABLE = "vespa.freezedetector.disable"; + Logger log = Logger.getLogger(Watchdog.class.getName()); + private long lastRun = 0L; + private long lastQpsCheck = 0L; + // Local copy to avoid ever _reading_ the volatile version + private boolean breakdownCopy = false; + private volatile boolean breakdown; + // The fraction of queries which must time out to view the QRS as being + // in breakdown + private final double timeoutThreshold; + // The minimal QPS to care about timeoutThreshold + private final int minimalQps; + private final boolean disableSevereBreakdownCheck; + private final List<ThreadLocalDirectory<TimeoutRate, Boolean>> timeoutRegistry = new ArrayList<>(); + private final boolean shutdownIfFrozen; + + Watchdog(double timeoutThreshold, int minimalQps, boolean shutdownIfFrozen) { + this.timeoutThreshold = timeoutThreshold; + this.minimalQps = minimalQps; + if (System.getProperty(FREEZEDETECTOR_DISABLE) != null) { + disableSevereBreakdownCheck = true; + } else { + disableSevereBreakdownCheck = false; + } + this.shutdownIfFrozen = shutdownIfFrozen; + } + + @Override + public void run() { + long now = System.currentTimeMillis(); + if (lastRun != 0L) { + severeBreakdown(now); + queryTimeouts(now); + } else { + lastQpsCheck = now; + } + lastRun = now; + } + + private void severeBreakdown(final long now) { + if (disableSevereBreakdownCheck) { + return; + } + if (now - lastRun < 5000L) { + return; + } + + threadStackMessage(); + + if (shutdownIfFrozen) { + Process.logAndDie("Watchdog timer meant to run ten times per second" + + " not run for five seconds or more." + + " Assuming severe failure or overloaded node, shutting down container."); + } else { + log.log(LogLevel.ERROR, + "A watchdog meant to run 10 times a second has not been invoked for 5 seconds." + + " This usually means this machine is swapping or otherwise severely overloaded."); + } + } + + private void threadStackMessage() { + log.log(LogLevel.INFO, "System seems unresponsive, performing full thread dump for diagnostics."); + threadDump(); + log.log(LogLevel.INFO, "End of diagnostic thread dump."); + } + + private void threadDump() { + try { + Map<Thread, StackTraceElement[]> allStackTraces = Thread.getAllStackTraces(); + for (Map.Entry<Thread, StackTraceElement[]> e : allStackTraces.entrySet()) { + Thread t = e.getKey(); + StackTraceElement[] stack = e.getValue(); + StringBuilder forOneThread = new StringBuilder(); + int initLen; + forOneThread.append("Stack for thread: ").append(t.getName()).append(": "); + initLen = forOneThread.length(); + for (StackTraceElement s : stack) { + if (forOneThread.length() > initLen) { + forOneThread.append(" "); + } + forOneThread.append(s.toString()); + } + log.log(LogLevel.INFO, forOneThread.toString()); + } + } catch (Exception e) { + // just give up... + } + } + + private void queryTimeouts(final long now) { + // only check query timeout every 10s + if (now - lastQpsCheck < 10000L) { + return; + } else { + lastQpsCheck = now; + } + + final TimeoutRate globalState = new TimeoutRate(); + synchronized (timeoutRegistry) { + for (ThreadLocalDirectory<TimeoutRate, Boolean> timeouts : timeoutRegistry) { + final List<TimeoutRate> threadStates = timeouts.fetch(); + for (final TimeoutRate t : threadStates) { + globalState.merge(t); + } + } + } + if (globalState.timeoutFraction() > timeoutThreshold && globalState.getTotal() > (10 * minimalQps)) { + setBreakdown(true); + log.log(Level.WARNING, "Too many queries timed out. Assuming container is in breakdown."); + } else { + if (!breakdown()) { + return; + } + setBreakdown(false); + log.log(Level.WARNING, "Fewer queries timed out. Assuming container is no longer in breakdown."); + } + } + + private void setBreakdown(final boolean state) { + breakdown = state; + breakdownCopy = state; + } + + private boolean breakdown() { + return breakdownCopy; + } + + boolean isBreakdown() { + return breakdown; + } + + void addTimeouts(ThreadLocalDirectory<TimeoutRate, Boolean> t) { + synchronized (timeoutRegistry) { + timeoutRegistry.add(t); + } + } + + void removeTimeouts(ThreadLocalDirectory<TimeoutRate, Boolean> timeouts) { + synchronized (timeoutRegistry) { + timeoutRegistry.remove(timeouts); + } + } + +} diff --git a/container-core/src/main/java/com/yahoo/container/protect/package-info.java b/container-core/src/main/java/com/yahoo/container/protect/package-info.java new file mode 100644 index 00000000000..a026833cc21 --- /dev/null +++ b/container-core/src/main/java/com/yahoo/container/protect/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.container.protect; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; |