diff options
9 files changed, 98 insertions, 30 deletions
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index 9ee36831d6a..2870b88d105 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -87,6 +87,8 @@ public interface ModelContext { @ModelFeatureFlag(owners = {"baldersheim"}) default int maxConcurrentMergesPerNode() { throw new UnsupportedOperationException("TODO specify default value"); } @ModelFeatureFlag(owners = {"baldersheim"}) default int maxMergeQueueSize() { throw new UnsupportedOperationException("TODO specify default value"); } @ModelFeatureFlag(owners = {"baldersheim"}) default boolean dryRunOnnxOnSetup() { return true; } + @ModelFeatureFlag(owners = {"baldersheim"}) default boolean containerDumpHeapOnShutdownTimeout() { throw new UnsupportedOperationException("TODO specify default value"); } + @ModelFeatureFlag(owners = {"baldersheim"}) default double containerShutdownTimeout() { throw new UnsupportedOperationException("TODO specify default value"); } @ModelFeatureFlag(owners = {"geirst"}) default boolean enableFeedBlockInDistributor() { return true; } @ModelFeatureFlag(owners = {"bjorncs", "tokle"}) default List<String> allowedAthenzProxyIdentities() { return List.of(); } @ModelFeatureFlag(owners = {"vekterli"}) default int maxActivationInhibitedOutOfSyncGroups() { return 0; } diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 4bf20e75a5d..dea6b980692 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -63,6 +63,8 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea private double resourceLimitDisk = 0.8; private double resourceLimitMemory = 0.8; private double minNodeRatioPerGroup = 0.0; + private boolean containerDumpHeapOnShutdownTimeout = false; + private double containerShutdownTimeout = 50.0; @Override public ModelContext.FeatureFlags featureFlags() { return this; } @Override public boolean multitenant() { return multitenant; } @@ -106,7 +108,16 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea @Override public double minNodeRatioPerGroup() { return minNodeRatioPerGroup; } @Override public int metricsproxyNumThreads() { return 1; } @Override public boolean enforceRankProfileInheritance() { return enforceRankProfileInheritance; } - + @Override public double containerShutdownTimeout() { return containerShutdownTimeout; } + @Override public boolean containerDumpHeapOnShutdownTimeout() { return containerDumpHeapOnShutdownTimeout; } + public TestProperties containerDumpHeapOnShutdownTimeout(boolean value) { + containerDumpHeapOnShutdownTimeout = value; + return this; + } + public TestProperties containerShutdownTimeout(double value) { + containerShutdownTimeout = value; + return this; + } public TestProperties enforceRankProfileInheritance(boolean value) { enforceRankProfileInheritance = value; return this; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java b/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java index b915453b593..cdf8d592391 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java @@ -74,6 +74,8 @@ public abstract class Container extends AbstractService implements private final boolean retired; /** The unique index of this node */ private final int index; + private final boolean dumpHeapOnShutdownTimeout; + private final double shutdownTimeoutS; private final ComponentGroup<Handler<?>> handlers = new ComponentGroup<>(this, "handler"); private final ComponentGroup<Component<?, ?>> components = new ComponentGroup<>(this, "components"); @@ -90,6 +92,8 @@ public abstract class Container extends AbstractService implements this.parent = parent; this.retired = retired; this.index = index; + dumpHeapOnShutdownTimeout = deployState.featureFlags().containerDumpHeapOnShutdownTimeout(); + shutdownTimeoutS = deployState.featureFlags().containerShutdownTimeout(); this.defaultHttpServer = new JettyHttpServer("DefaultHttpServer", containerClusterOrNull(parent), deployState.isHosted()); if (getHttp() == null) { addChild(defaultHttpServer); @@ -315,7 +319,9 @@ public abstract class Container extends AbstractService implements .slobrokId(serviceSlobrokId())) .filedistributor(filedistributorConfig()) .discriminator((clusterName != null ? clusterName + "." : "" ) + name) - .nodeIndex(index); + .nodeIndex(index) + .shutdown.dumpHeapOnTimeout(dumpHeapOnShutdownTimeout) + .timeout(shutdownTimeoutS); } /** Returns the jvm args set explicitly for this node */ diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilderTest.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilderTest.java index 2505aa3b01e..912ca23dce2 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilderTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilderTest.java @@ -7,6 +7,7 @@ import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.model.NullConfigModelRegistry; import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.api.EndpointCertificateSecrets; +import com.yahoo.config.model.api.ModelContext; import com.yahoo.config.model.api.TenantSecretStore; import com.yahoo.config.model.builder.xml.test.DomBuilderTest; import com.yahoo.config.model.deploy.DeployState; @@ -681,36 +682,47 @@ public class ContainerModelBuilderTest extends ContainerModelBuilderTestBase { @Test public void qrconfig_is_produced() throws IOException, SAXException { + QrConfig qr = getQrConfig(new TestProperties()); + String hostname = HostName.getLocalhost(); // Using the same way of getting hostname as filedistribution model + assertEquals("default.container.0", qr.discriminator()); + assertEquals(19102, qr.rpc().port()); + assertEquals("vespa/service/default/container.0", qr.rpc().slobrokId()); + assertTrue(qr.rpc().enabled()); + assertEquals("", qr.rpc().host()); + assertFalse(qr.restartOnDeploy()); + assertEquals("filedistribution/" + hostname, qr.filedistributor().configid()); + assertEquals(50.0, qr.shutdown().timeout(), 0.00000000000001); + assertFalse(qr.shutdown().dumpHeapOnTimeout()); + } + private QrConfig getQrConfig(ModelContext.Properties properties) throws IOException, SAXException { String servicesXml = "<services>" + - "<admin version='3.0'>" + - " <nodes count='2'/>" + - "</admin>" + - "<container id ='default' version='1.0'>" + - " <nodes>" + - " <node hostalias='node1' />" + - " </nodes>" + - "</container>" + - "</services>"; + " <admin version='3.0'>" + + " <nodes count='2'/>" + + " </admin>" + + " <container id ='default' version='1.0'>" + + " <nodes>" + + " <node hostalias='node1' />" + + " </nodes>" + + " </container>" + + "</services>"; ApplicationPackage applicationPackage = new MockApplicationPackage.Builder() .withServices(servicesXml) .build(); VespaModel model = new VespaModel(new NullConfigModelRegistry(), new DeployState.Builder() .applicationPackage(applicationPackage) - .properties(new TestProperties()) + .properties(properties) .build()); - String hostname = HostName.getLocalhost(); // Using the same way of getting hostname as filedistribution model + return model.getConfig(QrConfig.class, "default/container.0"); + } - QrConfig config = model.getConfig(QrConfig.class, "default/container.0"); - assertEquals("default.container.0", config.discriminator()); - assertEquals(19102, config.rpc().port()); - assertEquals("vespa/service/default/container.0", config.rpc().slobrokId()); - assertTrue(config.rpc().enabled()); - assertEquals("", config.rpc().host()); - assertFalse(config.restartOnDeploy()); - assertEquals("filedistribution/" + hostname, config.filedistributor().configid()); + @Test + public void control_container_shutdown() throws IOException, SAXException { + QrConfig qr = getQrConfig(new TestProperties().containerShutdownTimeout(133).containerDumpHeapOnShutdownTimeout(true)); + assertEquals(133.0, qr.shutdown().timeout(), 0.00000000000001); + assertTrue(qr.shutdown().dumpHeapOnTimeout()); } @Test diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index 89987891c61..0198e5899da 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -189,6 +189,8 @@ public class ModelContextImpl implements ModelContext { private final int metricsproxyNumThreads; private final boolean enforceRankProfileInheritance; private final boolean newLocationBrokerLogic; + private final boolean containerDumpHeapOnShutdownTimeout; + private final double containerShutdownTimeout; public FeatureFlags(FlagSource source, ApplicationId appId) { this.defaultTermwiseLimit = flagValue(source, appId, Flags.DEFAULT_TERM_WISE_LIMIT); @@ -216,6 +218,8 @@ public class ModelContextImpl implements ModelContext { this.metricsproxyNumThreads = flagValue(source, appId, Flags.METRICSPROXY_NUM_THREADS); this.enforceRankProfileInheritance = flagValue(source, appId, Flags.ENFORCE_RANK_PROFILE_INHERITANCE); this.newLocationBrokerLogic = flagValue(source, appId, Flags.NEW_LOCATION_BROKER_LOGIC); + this.containerDumpHeapOnShutdownTimeout = flagValue(source, appId, Flags.CONTAINER_DUMP_HEAP_ON_SHUTDOWN_TIMEOUT); + this.containerShutdownTimeout = flagValue(source, appId,Flags.CONTAINER_SHUTDOWN_TIMEOUT); } @Override public double defaultTermwiseLimit() { return defaultTermwiseLimit; } @@ -245,6 +249,8 @@ public class ModelContextImpl implements ModelContext { @Override public double minNodeRatioPerGroup() { return minNodeRatioPerGroup; } @Override public int metricsproxyNumThreads() { return metricsproxyNumThreads; } @Override public boolean newLocationBrokerLogic() { return newLocationBrokerLogic; } + @Override public double containerShutdownTimeout() { return containerShutdownTimeout; } + @Override public boolean containerDumpHeapOnShutdownTimeout() { return containerDumpHeapOnShutdownTimeout; } @Override public boolean enforceRankProfileInheritance() { return enforceRankProfileInheritance; } diff --git a/container-core/src/main/resources/configdefinitions/container.qr.def b/container-core/src/main/resources/configdefinitions/container.qr.def index 9d9b84eb428..08a598bf4bf 100644 --- a/container-core/src/main/resources/configdefinitions/container.qr.def +++ b/container-core/src/main/resources/configdefinitions/container.qr.def @@ -30,3 +30,9 @@ nodeIndex int default=0 ## Force restart of container on deploy, and defer any changes until restart restartOnDeploy bool default=false restart + +## Force heapdump if process is not able to stop within shutdown.timeout +shutdown.dumpHeapOnTimeout bool default=false + +## Timeout for clean shutdown +shutdown.timeout double default=50.0 diff --git a/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java b/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java index 853224a5b91..457d4fc96a9 100644 --- a/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java +++ b/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.container.jdisc; +import com.google.common.util.concurrent.AtomicDouble; import com.google.inject.AbstractModule; import com.google.inject.Inject; import com.google.inject.Injector; @@ -43,7 +44,6 @@ import com.yahoo.vespa.config.ConfigKey; import com.yahoo.vespa.defaults.Defaults; import com.yahoo.yolean.Exceptions; -import java.io.IOException; import java.util.Collections; import java.util.HashSet; import java.util.IdentityHashMap; @@ -54,6 +54,7 @@ import java.util.Set; import java.util.WeakHashMap; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; import java.util.logging.Logger; @@ -74,6 +75,8 @@ public final class ConfiguredApplication implements Application { private final String configId; private final OsgiFramework osgiFramework; private final com.yahoo.jdisc.Timer timerSingleton; + private final AtomicBoolean dumpHeapOnShutdownTimeout = new AtomicBoolean(false); + private final AtomicDouble shudownTimeoutS = new AtomicDouble(50.0); // Subscriber that is used when this is not a standalone-container. Subscribes // to config to make sure that container will be registered in slobrok (by {@link com.yahoo.jrt.slobrok.api.Register}) // if slobrok config changes (typically slobroks moving to other nodes) @@ -133,7 +136,7 @@ public final class ConfiguredApplication implements Application { @Override public void start() { qrConfig = getConfig(QrConfig.class, true); - + reconfigure(qrConfig); hackToInitializeServer(qrConfig); ContainerBuilder builder = createBuilderWithGuiceBindings(); @@ -222,6 +225,7 @@ public final class ConfiguredApplication implements Application { while (true) { subscriber.waitNextGeneration(false); QrConfig newConfig = QrConfig.class.cast(first(subscriber.config().values())); + reconfigure(qrConfig); if (qrConfig.rpc().port() != newConfig.rpc().port()) { com.yahoo.protect.Process.logAndDie( "Rpc port config has changed from " + @@ -235,6 +239,11 @@ public final class ConfiguredApplication implements Application { } } + void reconfigure(QrConfig qrConfig) { + dumpHeapOnShutdownTimeout.set(qrConfig.shutdown().dumpHeapOnTimeout()); + shudownTimeoutS.set(qrConfig.shutdown().timeout()); + } + private void initializeAndActivateContainer(ContainerBuilder builder) { addHandlerBindings(builder, Container.get().getRequestHandlerRegistry(), configurer.getComponent(ApplicationContext.class).discBindingsConfig); @@ -401,13 +410,11 @@ public final class ConfiguredApplication implements Application { private void startShutdownDeadlineExecutor() { shutdownDeadlineExecutor = new ScheduledThreadPoolExecutor(1, new DaemonThreadFactory("Shutdown deadline timer")); shutdownDeadlineExecutor.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); - long delayMillis = 50 * 1000; + long delayMillis = (long)(shudownTimeoutS.get() * 1000.0); shutdownDeadlineExecutor.schedule(() -> { - String heapDumpName = Defaults.getDefaults().underVespaHome("var/crash/java_pid.") + ProcessHandle.current().pid() + ".hprof"; - try { + if (dumpHeapOnShutdownTimeout.get()) { + String heapDumpName = Defaults.getDefaults().underVespaHome("var/crash/java_pid.") + ProcessHandle.current().pid() + ".hprof"; com.yahoo.protect.Process.dumpHeap(heapDumpName, true); - } catch (IOException e) { - log.log(Level.WARNING, "Failed writing heap dump:", e); } com.yahoo.protect.Process.logAndDie( "Timed out waiting for application shutdown. Please check that all your request handlers " + diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index d4157b659ae..69d325d4841 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -172,6 +172,20 @@ public class Flags { "Takes effect at redeployment", ZONE_ID, APPLICATION_ID); + public static final UnboundBooleanFlag CONTAINER_DUMP_HEAP_ON_SHUTDOWN_TIMEOUT = defineFeatureFlag( + "container-dump-heap-on-shutdown-timeout", false, + List.of("baldersheim"), "2021-09-25", "2021-11-01", + "Will trigger a heap dump during if container shutdown times out", + "Takes effect at redeployment", + ZONE_ID, APPLICATION_ID); + + public static final UnboundDoubleFlag CONTAINER_SHUTDOWN_TIMEOUT = defineDoubleFlag( + "container-shutdown-timeout", 50.0, + List.of("baldersheim"), "2021-09-25", "2021-11-01", + "Timeout for shutdown of a jdisc container", + "Takes effect at redeployment", + ZONE_ID, APPLICATION_ID); + public static final UnboundListFlag<String> ALLOWED_ATHENZ_PROXY_IDENTITIES = defineListFlag( "allowed-athenz-proxy-identities", List.of(), String.class, List.of("bjorncs", "tokle"), "2021-02-10", "2021-12-01", diff --git a/vespajlib/src/main/java/com/yahoo/protect/Process.java b/vespajlib/src/main/java/com/yahoo/protect/Process.java index f3674f665b2..8038382c348 100644 --- a/vespajlib/src/main/java/com/yahoo/protect/Process.java +++ b/vespajlib/src/main/java/com/yahoo/protect/Process.java @@ -74,9 +74,13 @@ public final class Process { } } - public static void dumpHeap(String filePath, boolean live) throws IOException { + public static void dumpHeap(String filePath, boolean live) { log.log(Level.INFO, "Will dump the heap to '" + filePath + "', with the live = " + live); - getHotspotMXBean().dumpHeap(filePath, live); + try { + getHotspotMXBean().dumpHeap(filePath, live); + } catch (IOException e) { + log.log(Level.WARNING, "Failed writing heap dump:", e); + } } private static HotSpotDiagnosticMXBean getHotspotMXBean() throws IOException { |