summaryrefslogtreecommitdiffstats
path: root/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
diff options
context:
space:
mode:
Diffstat (limited to 'node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java65
1 files changed, 33 insertions, 32 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 77348a9dc45..6ea65be6799 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -64,20 +64,20 @@ public class NodeAgentImpl implements NodeAgent {
private final PrefixLogger logger;
private DockerImage imageBeingDownloaded = null;
- private final ContainerName containerName;
private final String hostname;
+ private final ContainerName containerName;
private final NodeRepository nodeRepository;
private final Orchestrator orchestrator;
private final DockerOperations dockerOperations;
private final StorageMaintainer storageMaintainer;
- private final AclMaintainer aclMaintainer;
private final Environment environment;
private final Clock clock;
- private final Duration timeBetweenEachConverge;
+ private final AclMaintainer aclMaintainer;
private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private final LinkedList<String> debugMessages = new LinkedList<>();
+ private long delaysBetweenEachConvergeMillis = 30_000;
private int numberOfUnhandledException = 0;
private Instant lastConverge;
@@ -85,7 +85,7 @@ public class NodeAgentImpl implements NodeAgent {
private final ScheduledExecutorService filebeatRestarter =
Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("filebeatrestarter"));
- private Consumer<String> serviceRestarter;
+ private final Consumer<String> serviceRestarter;
private Future<?> currentFilebeatRestarter;
private boolean resumeScriptRun = false;
@@ -117,8 +117,7 @@ public class NodeAgentImpl implements NodeAgent {
final StorageMaintainer storageMaintainer,
final AclMaintainer aclMaintainer,
final Environment environment,
- final Clock clock,
- final Duration timeBetweenEachConverge) {
+ final Clock clock) {
this.containerName = ContainerName.fromHostname(hostName);
this.logger = PrefixLogger.getNodeAgentLogger(NodeAgentImpl.class, containerName);
this.hostname = hostName;
@@ -129,8 +128,19 @@ public class NodeAgentImpl implements NodeAgent {
this.aclMaintainer = aclMaintainer;
this.environment = environment;
this.clock = clock;
- this.timeBetweenEachConverge = timeBetweenEachConverge;
this.lastConverge = clock.instant();
+ this.serviceRestarter = service -> {
+ try {
+ ProcessResult processResult = dockerOperations.executeCommandInContainerAsRoot(
+ containerName, "service", service, "restart");
+
+ if (!processResult.isSuccess()) {
+ logger.error("Failed to restart service " + service + ": " + processResult);
+ }
+ } catch (Exception e) {
+ logger.error("Failed to restart service " + service, e);
+ }
+ };
}
@Override
@@ -173,11 +183,11 @@ public class NodeAgentImpl implements NodeAgent {
}
@Override
- public void start() {
- String message = "Starting with interval " + timeBetweenEachConverge.toMillis() + " ms";
+ public void start(int intervalMillis) {
+ String message = "Starting with interval " + intervalMillis + " ms";
logger.info(message);
addDebugMessage(message);
-
+ delaysBetweenEachConvergeMillis = intervalMillis;
if (loopThread != null) {
throw new RuntimeException("Can not restart a node agent.");
}
@@ -187,19 +197,6 @@ public class NodeAgentImpl implements NodeAgent {
});
loopThread.setName("tick-" + hostname);
loopThread.start();
-
- serviceRestarter = service -> {
- try {
- ProcessResult processResult = dockerOperations.executeCommandInContainerAsRoot(
- containerName, "service", service, "restart");
-
- if (!processResult.isSuccess()) {
- logger.error("Failed to restart service " + service + ": " + processResult);
- }
- } catch (Exception e) {
- logger.error("Failed to restart service " + service, e);
- }
- };
}
@Override
@@ -210,15 +207,19 @@ public class NodeAgentImpl implements NodeAgent {
throw new RuntimeException("Can not re-stop a node agent.");
}
signalWorkToBeDone();
-
- do {
- try {
- loopThread.join();
- filebeatRestarter.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
- } catch (InterruptedException e) {
- logger.error("Interrupted while waiting for converge thread and filebeatRestarter scheduler to shutdown");
+ try {
+ loopThread.join(10000);
+ if (loopThread.isAlive()) {
+ logger.error("Could not stop host thread " + hostname);
}
- } while (loopThread.isAlive() || !filebeatRestarter.isTerminated());
+ } catch (InterruptedException e1) {
+ logger.error("Interrupted; Could not stop host thread " + hostname);
+ }
+ try {
+ filebeatRestarter.awaitTermination(10, TimeUnit.SECONDS);
+ } catch (InterruptedException e) {
+ logger.error("Interrupted; Could not stop filebeatrestarter thread");
+ }
logger.info("Stopped");
}
@@ -374,7 +375,7 @@ public class NodeAgentImpl implements NodeAgent {
boolean isFrozenCopy;
synchronized (monitor) {
while (!workToDoNow) {
- long remainder = timeBetweenEachConverge.minus(Duration.between(lastConverge, clock.instant())).toMillis();
+ long remainder = delaysBetweenEachConvergeMillis - Duration.between(lastConverge, clock.instant()).toMillis();
if (remainder > 0) {
try {
monitor.wait(remainder);