diff options
author | HÃ¥kon Hallingstad <hakon@oath.com> | 2018-01-29 10:34:52 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-29 10:34:52 +0100 |
commit | 28e3545728977a0be82159b8f278be8e772cb59b (patch) | |
tree | 85a67ddd05e5f8c42041653018b97711ed5ce7c6 | |
parent | 4027310dcef4a098ff1b93c1d14bca4daf805522 (diff) | |
parent | 5b10008ff51a5320d6f8be69454c537b721d0aaa (diff) |
Merge pull request #4775 from vespa-engine/freva/set-converge-time-at-the-end
Assume NodeAgent fails to retrieve node from node-repo after dirty
-rw-r--r-- | node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java | 20 |
1 files changed, 15 insertions, 5 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 32f6186707a..edf4f059fc2 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -58,6 +58,7 @@ public class NodeAgentImpl implements NodeAgent { private boolean isFrozen = true; private boolean wantFrozen = false; private boolean workToDoNow = true; + private boolean expectNodeNotInNodeRepo = false; private final Object monitor = new Object(); @@ -378,7 +379,9 @@ public class NodeAgentImpl implements NodeAgent { boolean isFrozenCopy; synchronized (monitor) { while (!workToDoNow) { - long remainder = timeBetweenEachConverge.minus(Duration.between(lastConverge, clock.instant())).toMillis(); + long remainder = timeBetweenEachConverge + .minus(Duration.between(lastConverge, clock.instant())) + .toMillis(); if (remainder > 0) { try { monitor.wait(remainder); @@ -413,7 +416,7 @@ public class NodeAgentImpl implements NodeAgent { // therefore be reset if we get an exception from docker. numberOfUnhandledException++; containerState = UNKNOWN; - logger.error("Caught a DockerExecption, resetting containerState to " + containerState, e); + logger.error("Caught a DockerException, resetting containerState to " + containerState, e); } catch (Exception e) { numberOfUnhandledException++; logger.error("Unhandled exception, ignoring.", e); @@ -427,9 +430,15 @@ public class NodeAgentImpl implements NodeAgent { // Public for testing void converge() { - final ContainerNodeSpec nodeSpec = nodeRepository.getContainerNodeSpec(hostname) - .orElseThrow(() -> - new IllegalStateException(String.format("Node '%s' missing from node repository.", hostname))); + final Optional<ContainerNodeSpec> nodeSpecOptional = nodeRepository.getContainerNodeSpec(hostname); + + // We just removed the node from node repo, so this is expected until NodeAdmin stop this NodeAgent + if (!nodeSpecOptional.isPresent() && expectNodeNotInNodeRepo) return; + + final ContainerNodeSpec nodeSpec = nodeSpecOptional.orElseThrow(() -> + new IllegalStateException(String.format("Node '%s' missing from node repository.", hostname))); + expectNodeNotInNodeRepo = false; + Optional<Container> container = getContainer(); if (!nodeSpec.equals(lastNodeSpec)) { @@ -499,6 +508,7 @@ public class NodeAgentImpl implements NodeAgent { storageMaintainer.cleanupNodeStorage(containerName, nodeSpec); updateNodeRepoWithCurrentAttributes(nodeSpec); nodeRepository.markNodeAvailableForNewAllocation(hostname); + expectNodeNotInNodeRepo = true; break; default: throw new RuntimeException("UNKNOWN STATE " + nodeSpec.nodeState.name()); |