summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorvalerijf <valerijf@yahoo-inc.com>2017-05-24 09:33:29 +0200
committervalerijf <valerijf@yahoo-inc.com>2017-05-24 09:33:29 +0200
commit179d274a23f5699130864e3e2067449a30ee2f5c (patch)
tree1ada9b06fcb27c419556aeabfc8e6b391dd290c0 /node-admin
parentfdfd15fea0fa6be43dfa7015fa5e92bbbae5b708 (diff)
getContainer once per converge loop
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java76
1 files changed, 40 insertions, 36 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index d8ee3550272..8f5540577c5 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -234,25 +234,27 @@ public class NodeAgentImpl implements NodeAgent {
}
}
- private void startContainerIfNeeded(final ContainerNodeSpec nodeSpec) {
- if (!getContainer().isPresent()) {
- aclMaintainer.ifPresent(AclMaintainer::run);
- dockerOperations.startContainer(containerName, nodeSpec);
- lastCpuMetric = new CpuUsageReporter(clock.instant());
-
- addDebugMessage("startContainerIfNeeded: containerState " + containerState + " -> " +
- RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN);
- containerState = RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN;
- logger.info("Container successfully started, new containerState is " + containerState);
- }
+ private void startContainer(ContainerNodeSpec nodeSpec) {
+ aclMaintainer.ifPresent(AclMaintainer::run);
+ dockerOperations.startContainer(containerName, nodeSpec);
+ lastCpuMetric = new CpuUsageReporter(clock.instant());
+
+ addDebugMessage("startContainerIfNeeded: containerState " + containerState + " -> " +
+ RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN);
+ containerState = RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN;
+ logger.info("Container successfully started, new containerState is " + containerState);
}
- private void removeContainerIfNeededUpdateContainerState(ContainerNodeSpec nodeSpec) {
- removeContainerIfNeeded(nodeSpec).ifPresent(existingContainer ->
- shouldRestartServices(nodeSpec).ifPresent(restartReason -> {
- logger.info("Will restart services for container " + existingContainer + ": " + restartReason);
- restartServices(nodeSpec, existingContainer);
- }));
+ private Optional<Container> removeContainerIfNeededUpdateContainerState(ContainerNodeSpec nodeSpec, Optional<Container> existingContainer) {
+ return existingContainer
+ .flatMap(container -> removeContainerIfNeeded(nodeSpec, container))
+ .map(container -> {
+ shouldRestartServices(nodeSpec).ifPresent(restartReason -> {
+ logger.info("Will restart services for container " + container + ": " + restartReason);
+ restartServices(nodeSpec, container);
+ });
+ return container;
+ });
}
private Optional<String> shouldRestartServices(ContainerNodeSpec nodeSpec) {
@@ -298,16 +300,12 @@ public class NodeAgentImpl implements NodeAgent {
return Optional.empty();
}
- // Returns true if container is absent on return
- private Optional<Container> removeContainerIfNeeded(ContainerNodeSpec nodeSpec) {
- Optional<Container> existingContainer = getContainer();
- if (!existingContainer.isPresent()) return Optional.empty();
-
- Optional<String> removeReason = shouldRemoveContainer(nodeSpec, existingContainer.get());
+ private Optional<Container> removeContainerIfNeeded(ContainerNodeSpec nodeSpec, Container existingContainer) {
+ Optional<String> removeReason = shouldRemoveContainer(nodeSpec, existingContainer);
if (removeReason.isPresent()) {
- logger.info("Will remove container " + existingContainer.get() + ": " + removeReason.get());
+ logger.info("Will remove container " + existingContainer + ": " + removeReason.get());
- if (existingContainer.get().state.isRunning()) {
+ if (existingContainer.state.isRunning()) {
if (nodeSpec.nodeState == Node.State.active) {
orchestratorSuspendNode();
}
@@ -318,13 +316,13 @@ public class NodeAgentImpl implements NodeAgent {
logger.info("Failed stopping services, ignoring", e);
}
}
- dockerOperations.removeContainer(existingContainer.get());
+ dockerOperations.removeContainer(existingContainer);
metricReceiver.unsetMetricsForContainer(hostname);
containerState = ABSENT;
logger.info("Container successfully removed, new containerState is " + containerState);
return Optional.empty();
}
- return existingContainer;
+ return Optional.of(existingContainer);
}
@@ -400,17 +398,21 @@ public class NodeAgentImpl implements NodeAgent {
.orElseThrow(() ->
new IllegalStateException(String.format("Node '%s' missing from node repository.", hostname)));
+ Optional<Container> container = getContainer();
if (!nodeSpec.equals(lastNodeSpec)) {
addDebugMessage("Loading new node spec: " + nodeSpec.toString());
lastNodeSpec = nodeSpec;
// Every time the node spec changes, we should clear the metrics for this container as the dimensions
// will change and we will be reporting duplicate metrics.
+ // TODO: Should be retried if writing fails
metricReceiver.unsetMetricsForContainer(hostname);
- storageMaintainer.ifPresent(maintainer -> {
- maintainer.writeMetricsConfig(containerName, nodeSpec);
- maintainer.writeFilebeatConfig(containerName, nodeSpec);
- });
+ if (container.isPresent()) {
+ storageMaintainer.ifPresent(maintainer -> {
+ maintainer.writeMetricsConfig(containerName, nodeSpec);
+ maintainer.writeFilebeatConfig(containerName, nodeSpec);
+ });
+ }
}
switch (nodeSpec.nodeState) {
@@ -418,7 +420,7 @@ public class NodeAgentImpl implements NodeAgent {
case reserved:
case parked:
case failed:
- removeContainerIfNeededUpdateContainerState(nodeSpec);
+ removeContainerIfNeededUpdateContainerState(nodeSpec, container);
updateNodeRepoWithCurrentAttributes(nodeSpec);
break;
case active:
@@ -431,9 +433,11 @@ public class NodeAgentImpl implements NodeAgent {
addDebugMessage("Waiting for image to download " + imageBeingDownloaded.asString());
return;
}
- removeContainerIfNeededUpdateContainerState(nodeSpec);
+ container = removeContainerIfNeededUpdateContainerState(nodeSpec, container);
+ if (! container.isPresent()) {
+ startContainer(nodeSpec);
+ }
- startContainerIfNeeded(nodeSpec);
runLocalResumeScriptIfNeeded(nodeSpec);
// Because it's more important to stop a bad release from rolling out in prod,
// we put the resume call last. So if we fail after updating the node repo attributes
@@ -451,7 +455,7 @@ public class NodeAgentImpl implements NodeAgent {
break;
case inactive:
storageMaintainer.ifPresent(maintainer -> maintainer.removeOldFilesFromNode(containerName));
- removeContainerIfNeededUpdateContainerState(nodeSpec);
+ removeContainerIfNeededUpdateContainerState(nodeSpec, container);
updateNodeRepoWithCurrentAttributes(nodeSpec);
break;
case provisioned:
@@ -459,7 +463,7 @@ public class NodeAgentImpl implements NodeAgent {
break;
case dirty:
storageMaintainer.ifPresent(maintainer -> maintainer.removeOldFilesFromNode(containerName));
- removeContainerIfNeededUpdateContainerState(nodeSpec);
+ removeContainerIfNeededUpdateContainerState(nodeSpec, container);
logger.info("State is " + nodeSpec.nodeState + ", will delete application storage and mark node as ready");
storageMaintainer.ifPresent(maintainer -> maintainer.archiveNodeData(containerName));
updateNodeRepoAndMarkNodeAsReady(nodeSpec);