summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-09-27 08:16:25 +0200
committerGitHub <noreply@github.com>2021-09-27 08:16:25 +0200
commit9377da84086392e118d69b467006e73fe9ae3f70 (patch)
tree33680b4a48de80b5a6014fcbd525ddf4e0cc046a
parent9dabf1ce317325a334fd5f6aac531456acea17c4 (diff)
parentea831b2d3eb0e34e5240b30ec86b0c45dc33e928 (diff)
Merge pull request #19293 from vespa-engine/balder/disable-heapdump-of-shutdown-timeout-by-default
Disable heapdumps on shutdown timeout by default. MERGEOK
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java2
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java13
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/Container.java8
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilderTest.java50
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java6
-rw-r--r--container-core/src/main/resources/configdefinitions/container.qr.def6
-rw-r--r--container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java21
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java14
-rw-r--r--vespajlib/src/main/java/com/yahoo/protect/Process.java8
9 files changed, 98 insertions, 30 deletions
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
index 9ee36831d6a..2870b88d105 100644
--- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
+++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
@@ -87,6 +87,8 @@ public interface ModelContext {
@ModelFeatureFlag(owners = {"baldersheim"}) default int maxConcurrentMergesPerNode() { throw new UnsupportedOperationException("TODO specify default value"); }
@ModelFeatureFlag(owners = {"baldersheim"}) default int maxMergeQueueSize() { throw new UnsupportedOperationException("TODO specify default value"); }
@ModelFeatureFlag(owners = {"baldersheim"}) default boolean dryRunOnnxOnSetup() { return true; }
+ @ModelFeatureFlag(owners = {"baldersheim"}) default boolean containerDumpHeapOnShutdownTimeout() { throw new UnsupportedOperationException("TODO specify default value"); }
+ @ModelFeatureFlag(owners = {"baldersheim"}) default double containerShutdownTimeout() { throw new UnsupportedOperationException("TODO specify default value"); }
@ModelFeatureFlag(owners = {"geirst"}) default boolean enableFeedBlockInDistributor() { return true; }
@ModelFeatureFlag(owners = {"bjorncs", "tokle"}) default List<String> allowedAthenzProxyIdentities() { return List.of(); }
@ModelFeatureFlag(owners = {"vekterli"}) default int maxActivationInhibitedOutOfSyncGroups() { return 0; }
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
index 4bf20e75a5d..dea6b980692 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
@@ -63,6 +63,8 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
private double resourceLimitDisk = 0.8;
private double resourceLimitMemory = 0.8;
private double minNodeRatioPerGroup = 0.0;
+ private boolean containerDumpHeapOnShutdownTimeout = false;
+ private double containerShutdownTimeout = 50.0;
@Override public ModelContext.FeatureFlags featureFlags() { return this; }
@Override public boolean multitenant() { return multitenant; }
@@ -106,7 +108,16 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
@Override public double minNodeRatioPerGroup() { return minNodeRatioPerGroup; }
@Override public int metricsproxyNumThreads() { return 1; }
@Override public boolean enforceRankProfileInheritance() { return enforceRankProfileInheritance; }
-
+ @Override public double containerShutdownTimeout() { return containerShutdownTimeout; }
+ @Override public boolean containerDumpHeapOnShutdownTimeout() { return containerDumpHeapOnShutdownTimeout; }
+ public TestProperties containerDumpHeapOnShutdownTimeout(boolean value) {
+ containerDumpHeapOnShutdownTimeout = value;
+ return this;
+ }
+ public TestProperties containerShutdownTimeout(double value) {
+ containerShutdownTimeout = value;
+ return this;
+ }
public TestProperties enforceRankProfileInheritance(boolean value) {
enforceRankProfileInheritance = value;
return this;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java b/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java
index b915453b593..cdf8d592391 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java
@@ -74,6 +74,8 @@ public abstract class Container extends AbstractService implements
private final boolean retired;
/** The unique index of this node */
private final int index;
+ private final boolean dumpHeapOnShutdownTimeout;
+ private final double shutdownTimeoutS;
private final ComponentGroup<Handler<?>> handlers = new ComponentGroup<>(this, "handler");
private final ComponentGroup<Component<?, ?>> components = new ComponentGroup<>(this, "components");
@@ -90,6 +92,8 @@ public abstract class Container extends AbstractService implements
this.parent = parent;
this.retired = retired;
this.index = index;
+ dumpHeapOnShutdownTimeout = deployState.featureFlags().containerDumpHeapOnShutdownTimeout();
+ shutdownTimeoutS = deployState.featureFlags().containerShutdownTimeout();
this.defaultHttpServer = new JettyHttpServer("DefaultHttpServer", containerClusterOrNull(parent), deployState.isHosted());
if (getHttp() == null) {
addChild(defaultHttpServer);
@@ -315,7 +319,9 @@ public abstract class Container extends AbstractService implements
.slobrokId(serviceSlobrokId()))
.filedistributor(filedistributorConfig())
.discriminator((clusterName != null ? clusterName + "." : "" ) + name)
- .nodeIndex(index);
+ .nodeIndex(index)
+ .shutdown.dumpHeapOnTimeout(dumpHeapOnShutdownTimeout)
+ .timeout(shutdownTimeoutS);
}
/** Returns the jvm args set explicitly for this node */
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilderTest.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilderTest.java
index 2505aa3b01e..912ca23dce2 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilderTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilderTest.java
@@ -7,6 +7,7 @@ import com.yahoo.config.application.api.ApplicationPackage;
import com.yahoo.config.model.NullConfigModelRegistry;
import com.yahoo.config.model.api.ContainerEndpoint;
import com.yahoo.config.model.api.EndpointCertificateSecrets;
+import com.yahoo.config.model.api.ModelContext;
import com.yahoo.config.model.api.TenantSecretStore;
import com.yahoo.config.model.builder.xml.test.DomBuilderTest;
import com.yahoo.config.model.deploy.DeployState;
@@ -681,36 +682,47 @@ public class ContainerModelBuilderTest extends ContainerModelBuilderTestBase {
@Test
public void qrconfig_is_produced() throws IOException, SAXException {
+ QrConfig qr = getQrConfig(new TestProperties());
+ String hostname = HostName.getLocalhost(); // Using the same way of getting hostname as filedistribution model
+ assertEquals("default.container.0", qr.discriminator());
+ assertEquals(19102, qr.rpc().port());
+ assertEquals("vespa/service/default/container.0", qr.rpc().slobrokId());
+ assertTrue(qr.rpc().enabled());
+ assertEquals("", qr.rpc().host());
+ assertFalse(qr.restartOnDeploy());
+ assertEquals("filedistribution/" + hostname, qr.filedistributor().configid());
+ assertEquals(50.0, qr.shutdown().timeout(), 0.00000000000001);
+ assertFalse(qr.shutdown().dumpHeapOnTimeout());
+ }
+ private QrConfig getQrConfig(ModelContext.Properties properties) throws IOException, SAXException {
String servicesXml =
"<services>" +
- "<admin version='3.0'>" +
- " <nodes count='2'/>" +
- "</admin>" +
- "<container id ='default' version='1.0'>" +
- " <nodes>" +
- " <node hostalias='node1' />" +
- " </nodes>" +
- "</container>" +
- "</services>";
+ " <admin version='3.0'>" +
+ " <nodes count='2'/>" +
+ " </admin>" +
+ " <container id ='default' version='1.0'>" +
+ " <nodes>" +
+ " <node hostalias='node1' />" +
+ " </nodes>" +
+ " </container>" +
+ "</services>";
ApplicationPackage applicationPackage = new MockApplicationPackage.Builder()
.withServices(servicesXml)
.build();
VespaModel model = new VespaModel(new NullConfigModelRegistry(), new DeployState.Builder()
.applicationPackage(applicationPackage)
- .properties(new TestProperties())
+ .properties(properties)
.build());
- String hostname = HostName.getLocalhost(); // Using the same way of getting hostname as filedistribution model
+ return model.getConfig(QrConfig.class, "default/container.0");
+ }
- QrConfig config = model.getConfig(QrConfig.class, "default/container.0");
- assertEquals("default.container.0", config.discriminator());
- assertEquals(19102, config.rpc().port());
- assertEquals("vespa/service/default/container.0", config.rpc().slobrokId());
- assertTrue(config.rpc().enabled());
- assertEquals("", config.rpc().host());
- assertFalse(config.restartOnDeploy());
- assertEquals("filedistribution/" + hostname, config.filedistributor().configid());
+ @Test
+ public void control_container_shutdown() throws IOException, SAXException {
+ QrConfig qr = getQrConfig(new TestProperties().containerShutdownTimeout(133).containerDumpHeapOnShutdownTimeout(true));
+ assertEquals(133.0, qr.shutdown().timeout(), 0.00000000000001);
+ assertTrue(qr.shutdown().dumpHeapOnTimeout());
}
@Test
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
index 89987891c61..0198e5899da 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
@@ -189,6 +189,8 @@ public class ModelContextImpl implements ModelContext {
private final int metricsproxyNumThreads;
private final boolean enforceRankProfileInheritance;
private final boolean newLocationBrokerLogic;
+ private final boolean containerDumpHeapOnShutdownTimeout;
+ private final double containerShutdownTimeout;
public FeatureFlags(FlagSource source, ApplicationId appId) {
this.defaultTermwiseLimit = flagValue(source, appId, Flags.DEFAULT_TERM_WISE_LIMIT);
@@ -216,6 +218,8 @@ public class ModelContextImpl implements ModelContext {
this.metricsproxyNumThreads = flagValue(source, appId, Flags.METRICSPROXY_NUM_THREADS);
this.enforceRankProfileInheritance = flagValue(source, appId, Flags.ENFORCE_RANK_PROFILE_INHERITANCE);
this.newLocationBrokerLogic = flagValue(source, appId, Flags.NEW_LOCATION_BROKER_LOGIC);
+ this.containerDumpHeapOnShutdownTimeout = flagValue(source, appId, Flags.CONTAINER_DUMP_HEAP_ON_SHUTDOWN_TIMEOUT);
+ this.containerShutdownTimeout = flagValue(source, appId,Flags.CONTAINER_SHUTDOWN_TIMEOUT);
}
@Override public double defaultTermwiseLimit() { return defaultTermwiseLimit; }
@@ -245,6 +249,8 @@ public class ModelContextImpl implements ModelContext {
@Override public double minNodeRatioPerGroup() { return minNodeRatioPerGroup; }
@Override public int metricsproxyNumThreads() { return metricsproxyNumThreads; }
@Override public boolean newLocationBrokerLogic() { return newLocationBrokerLogic; }
+ @Override public double containerShutdownTimeout() { return containerShutdownTimeout; }
+ @Override public boolean containerDumpHeapOnShutdownTimeout() { return containerDumpHeapOnShutdownTimeout; }
@Override public boolean enforceRankProfileInheritance() { return enforceRankProfileInheritance; }
diff --git a/container-core/src/main/resources/configdefinitions/container.qr.def b/container-core/src/main/resources/configdefinitions/container.qr.def
index 9d9b84eb428..08a598bf4bf 100644
--- a/container-core/src/main/resources/configdefinitions/container.qr.def
+++ b/container-core/src/main/resources/configdefinitions/container.qr.def
@@ -30,3 +30,9 @@ nodeIndex int default=0
## Force restart of container on deploy, and defer any changes until restart
restartOnDeploy bool default=false restart
+
+## Force heapdump if process is not able to stop within shutdown.timeout
+shutdown.dumpHeapOnTimeout bool default=false
+
+## Timeout for clean shutdown
+shutdown.timeout double default=50.0
diff --git a/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java b/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java
index 853224a5b91..457d4fc96a9 100644
--- a/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java
+++ b/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.container.jdisc;
+import com.google.common.util.concurrent.AtomicDouble;
import com.google.inject.AbstractModule;
import com.google.inject.Inject;
import com.google.inject.Injector;
@@ -43,7 +44,6 @@ import com.yahoo.vespa.config.ConfigKey;
import com.yahoo.vespa.defaults.Defaults;
import com.yahoo.yolean.Exceptions;
-import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.IdentityHashMap;
@@ -54,6 +54,7 @@ import java.util.Set;
import java.util.WeakHashMap;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -74,6 +75,8 @@ public final class ConfiguredApplication implements Application {
private final String configId;
private final OsgiFramework osgiFramework;
private final com.yahoo.jdisc.Timer timerSingleton;
+ private final AtomicBoolean dumpHeapOnShutdownTimeout = new AtomicBoolean(false);
+ private final AtomicDouble shudownTimeoutS = new AtomicDouble(50.0);
// Subscriber that is used when this is not a standalone-container. Subscribes
// to config to make sure that container will be registered in slobrok (by {@link com.yahoo.jrt.slobrok.api.Register})
// if slobrok config changes (typically slobroks moving to other nodes)
@@ -133,7 +136,7 @@ public final class ConfiguredApplication implements Application {
@Override
public void start() {
qrConfig = getConfig(QrConfig.class, true);
-
+ reconfigure(qrConfig);
hackToInitializeServer(qrConfig);
ContainerBuilder builder = createBuilderWithGuiceBindings();
@@ -222,6 +225,7 @@ public final class ConfiguredApplication implements Application {
while (true) {
subscriber.waitNextGeneration(false);
QrConfig newConfig = QrConfig.class.cast(first(subscriber.config().values()));
+ reconfigure(qrConfig);
if (qrConfig.rpc().port() != newConfig.rpc().port()) {
com.yahoo.protect.Process.logAndDie(
"Rpc port config has changed from " +
@@ -235,6 +239,11 @@ public final class ConfiguredApplication implements Application {
}
}
+ void reconfigure(QrConfig qrConfig) {
+ dumpHeapOnShutdownTimeout.set(qrConfig.shutdown().dumpHeapOnTimeout());
+ shudownTimeoutS.set(qrConfig.shutdown().timeout());
+ }
+
private void initializeAndActivateContainer(ContainerBuilder builder) {
addHandlerBindings(builder, Container.get().getRequestHandlerRegistry(),
configurer.getComponent(ApplicationContext.class).discBindingsConfig);
@@ -401,13 +410,11 @@ public final class ConfiguredApplication implements Application {
private void startShutdownDeadlineExecutor() {
shutdownDeadlineExecutor = new ScheduledThreadPoolExecutor(1, new DaemonThreadFactory("Shutdown deadline timer"));
shutdownDeadlineExecutor.setExecuteExistingDelayedTasksAfterShutdownPolicy(false);
- long delayMillis = 50 * 1000;
+ long delayMillis = (long)(shudownTimeoutS.get() * 1000.0);
shutdownDeadlineExecutor.schedule(() -> {
- String heapDumpName = Defaults.getDefaults().underVespaHome("var/crash/java_pid.") + ProcessHandle.current().pid() + ".hprof";
- try {
+ if (dumpHeapOnShutdownTimeout.get()) {
+ String heapDumpName = Defaults.getDefaults().underVespaHome("var/crash/java_pid.") + ProcessHandle.current().pid() + ".hprof";
com.yahoo.protect.Process.dumpHeap(heapDumpName, true);
- } catch (IOException e) {
- log.log(Level.WARNING, "Failed writing heap dump:", e);
}
com.yahoo.protect.Process.logAndDie(
"Timed out waiting for application shutdown. Please check that all your request handlers " +
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index d4157b659ae..69d325d4841 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -172,6 +172,20 @@ public class Flags {
"Takes effect at redeployment",
ZONE_ID, APPLICATION_ID);
+ public static final UnboundBooleanFlag CONTAINER_DUMP_HEAP_ON_SHUTDOWN_TIMEOUT = defineFeatureFlag(
+ "container-dump-heap-on-shutdown-timeout", false,
+ List.of("baldersheim"), "2021-09-25", "2021-11-01",
+ "Will trigger a heap dump during if container shutdown times out",
+ "Takes effect at redeployment",
+ ZONE_ID, APPLICATION_ID);
+
+ public static final UnboundDoubleFlag CONTAINER_SHUTDOWN_TIMEOUT = defineDoubleFlag(
+ "container-shutdown-timeout", 50.0,
+ List.of("baldersheim"), "2021-09-25", "2021-11-01",
+ "Timeout for shutdown of a jdisc container",
+ "Takes effect at redeployment",
+ ZONE_ID, APPLICATION_ID);
+
public static final UnboundListFlag<String> ALLOWED_ATHENZ_PROXY_IDENTITIES = defineListFlag(
"allowed-athenz-proxy-identities", List.of(), String.class,
List.of("bjorncs", "tokle"), "2021-02-10", "2021-12-01",
diff --git a/vespajlib/src/main/java/com/yahoo/protect/Process.java b/vespajlib/src/main/java/com/yahoo/protect/Process.java
index f3674f665b2..8038382c348 100644
--- a/vespajlib/src/main/java/com/yahoo/protect/Process.java
+++ b/vespajlib/src/main/java/com/yahoo/protect/Process.java
@@ -74,9 +74,13 @@ public final class Process {
}
}
- public static void dumpHeap(String filePath, boolean live) throws IOException {
+ public static void dumpHeap(String filePath, boolean live) {
log.log(Level.INFO, "Will dump the heap to '" + filePath + "', with the live = " + live);
- getHotspotMXBean().dumpHeap(filePath, live);
+ try {
+ getHotspotMXBean().dumpHeap(filePath, live);
+ } catch (IOException e) {
+ log.log(Level.WARNING, "Failed writing heap dump:", e);
+ }
}
private static HotSpotDiagnosticMXBean getHotspotMXBean() throws IOException {