diff options
author | valerijf <valerijf@yahoo-inc.com> | 2017-06-14 13:52:46 +0200 |
---|---|---|
committer | valerijf <valerijf@yahoo-inc.com> | 2017-06-14 14:15:17 +0200 |
commit | 86f49f9de0d0c964baa96bb41dccc0533ca3b2a8 (patch) | |
tree | e9e7a83c7b0e09f160589ddac9de0fc592bbbe13 /node-admin | |
parent | 9e6b3f5d678832e382dd8c01972b1af2d541282a (diff) |
Node-Admin restart filebeat service once a day
Diffstat (limited to 'node-admin')
-rw-r--r-- | node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java | 49 |
1 files changed, 40 insertions, 9 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 0fbd69708db..10afd6c2545 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -2,11 +2,13 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent; import com.fasterxml.jackson.core.JsonProcessingException; +import com.yahoo.concurrent.ThreadFactoryFactory; import com.yahoo.vespa.hosted.dockerapi.Container; import com.yahoo.vespa.hosted.dockerapi.ContainerName; import com.yahoo.vespa.hosted.dockerapi.Docker; import com.yahoo.vespa.hosted.dockerapi.DockerExecTimeoutException; import com.yahoo.vespa.hosted.dockerapi.DockerImage; +import com.yahoo.vespa.hosted.dockerapi.ProcessResult; import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions; import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper; import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec; @@ -29,7 +31,12 @@ import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.Map; import java.util.Optional; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Consumer; import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.ContainerState.ABSENT; import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.ContainerState.RUNNING; @@ -70,6 +77,11 @@ public class NodeAgentImpl implements NodeAgent { private Thread loopThread; + private final ScheduledExecutorService filebeatRestarter = + Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("filebeatrestarter")); + private final Consumer<String> serviceRestarter; + private Future<?> currentFilebeatRestarter; + enum ContainerState { ABSENT, RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN, @@ -105,6 +117,18 @@ public class NodeAgentImpl implements NodeAgent { this.clock = clock; this.aclMaintainer = aclMaintainer; this.lastConverge = clock.instant(); + this.serviceRestarter = service -> { + try { + ProcessResult processResult = dockerOperations.executeCommandInContainerAsRoot( + containerName, "service", service, "restart"); + + if (!processResult.isSuccess()) { + logger.error("Failed to restart service " + service + ": " + processResult); + } + } catch (Exception e) { + logger.error("Failed to restart service " + service, e); + } + }; // If the container is already running, initialize vespaVersion and lastCpuMetric lastCpuMetric = new CpuUsageReporter(clock.instant()); @@ -175,6 +199,7 @@ public class NodeAgentImpl implements NodeAgent { @Override public void stop() { addDebugMessage("Stopping"); + filebeatRestarter.shutdown(); if (!terminated.compareAndSet(false, true)) { throw new RuntimeException("Can not re-stop a node agent."); } @@ -187,6 +212,11 @@ public class NodeAgentImpl implements NodeAgent { } catch (InterruptedException e1) { logger.error("Interrupted; Could not stop host thread " + hostname); } + try { + filebeatRestarter.awaitTermination(10, TimeUnit.SECONDS); + } catch (InterruptedException e) { + logger.error("Interrupted; Could not stop filebeatrestarter thread"); + } } private void runLocalResumeScriptIfNeeded(final ContainerNodeSpec nodeSpec) { @@ -229,7 +259,12 @@ public class NodeAgentImpl implements NodeAgent { dockerOperations.startContainer(containerName, nodeSpec); lastCpuMetric = new CpuUsageReporter(clock.instant()); - writeConfigs(nodeSpec); + currentFilebeatRestarter = filebeatRestarter.scheduleWithFixedDelay(() -> serviceRestarter.accept("filebeat"), 1, 1, TimeUnit.DAYS); + storageMaintainer.ifPresent(maintainer -> { + maintainer.writeMetricsConfig(containerName, nodeSpec); + maintainer.writeFilebeatConfig(containerName, nodeSpec); + }); + addDebugMessage("startContainerIfNeeded: containerState " + containerState + " -> " + RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN); @@ -308,6 +343,7 @@ public class NodeAgentImpl implements NodeAgent { logger.info("Failed stopping services, ignoring", e); } } + if (currentFilebeatRestarter != null) currentFilebeatRestarter.cancel(true); dockerOperations.removeContainer(existingContainer); metricReceiver.unsetMetricsForContainer(hostname); containerState = ABSENT; @@ -402,7 +438,9 @@ public class NodeAgentImpl implements NodeAgent { // TODO: Should be retried if writing fails metricReceiver.unsetMetricsForContainer(hostname); if (container.isPresent()) { - writeConfigs(nodeSpec); + storageMaintainer.ifPresent(maintainer -> { + maintainer.writeMetricsConfig(containerName, nodeSpec); + }); } } @@ -602,13 +640,6 @@ public class NodeAgentImpl implements NodeAgent { } } - private void writeConfigs(ContainerNodeSpec nodeSpec) { - storageMaintainer.ifPresent(maintainer -> { - maintainer.writeMetricsConfig(containerName, nodeSpec); - maintainer.writeFilebeatConfig(containerName, nodeSpec); - }); - } - private Optional<Container> getContainer() { if (containerState == ABSENT) return Optional.empty(); return dockerOperations.getContainer(containerName); |