diff options
author | Valerij Fredriksen <valerijf@oath.com> | 2018-10-10 11:13:22 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@oath.com> | 2018-10-10 12:05:29 +0200 |
commit | 6762f25ebed733514225ece5b05d739fa9fd5a37 (patch) | |
tree | 6a729a665d4415dc6db0fc28dc799dba32138f50 /node-admin | |
parent | 8fccd2a17d3169613efb1f563b5e8ed1f5373f4c (diff) |
Pass NodeAgentContext to NodeAgent
Diffstat (limited to 'node-admin')
3 files changed, 134 insertions, 131 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index e3547ed7f10..86adee0d577 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -3,8 +3,8 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent; import com.fasterxml.jackson.core.JsonProcessingException; import com.yahoo.concurrent.ThreadFactoryFactory; +import com.yahoo.log.LogLevel; import com.yahoo.vespa.hosted.dockerapi.Container; -import com.yahoo.vespa.hosted.dockerapi.ContainerName; import com.yahoo.vespa.hosted.dockerapi.ContainerResources; import com.yahoo.vespa.hosted.dockerapi.ContainerStats; import com.yahoo.vespa.hosted.dockerapi.exception.ContainerNotFoundException; @@ -24,7 +24,6 @@ import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator; import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException; import com.yahoo.vespa.hosted.node.admin.component.Environment; import com.yahoo.vespa.hosted.node.admin.maintenance.identity.AthenzCredentialsMaintainer; -import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger; import com.yahoo.vespa.hosted.provision.Node; import java.time.Clock; @@ -41,6 +40,7 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; +import java.util.logging.Logger; import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.ContainerState.ABSENT; import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.ContainerState.STARTING; @@ -54,6 +54,8 @@ public class NodeAgentImpl implements NodeAgent { // This is used as a definition of 1 GB when comparing flavor specs in node-repo private static final long BYTES_IN_GB = 1_000_000_000L; + private static final Logger logger = Logger.getLogger(NodeAgentImpl.class.getName()); + private final AtomicBoolean terminated = new AtomicBoolean(false); private boolean isFrozen = true; @@ -63,11 +65,9 @@ public class NodeAgentImpl implements NodeAgent { private final Object monitor = new Object(); - private final PrefixLogger logger; private DockerImage imageBeingDownloaded = null; - private final ContainerName containerName; - private final String hostname; + private final NodeAgentContext context; private final NodeRepository nodeRepository; private final Orchestrator orchestrator; private final DockerOperations dockerOperations; @@ -111,7 +111,7 @@ public class NodeAgentImpl implements NodeAgent { private CpuUsageReporter lastCpuMetric = new CpuUsageReporter(); public NodeAgentImpl( - final String hostName, + final NodeAgentContext context, final NodeRepository nodeRepository, final Orchestrator orchestrator, final DockerOperations dockerOperations, @@ -121,9 +121,7 @@ public class NodeAgentImpl implements NodeAgent { final Clock clock, final Duration timeBetweenEachConverge, final AthenzCredentialsMaintainer athenzCredentialsMaintainer) { - this.containerName = ContainerName.fromHostname(hostName); - this.logger = PrefixLogger.getNodeAgentLogger(NodeAgentImpl.class, containerName); - this.hostname = hostName; + this.context = context; this.nodeRepository = nodeRepository; this.orchestrator = orchestrator; this.dockerOperations = dockerOperations; @@ -139,11 +137,11 @@ public class NodeAgentImpl implements NodeAgent { try { while (!terminated.get()) tick(); } catch (Throwable t) { - logger.error("Unhandled throwable, taking down system.", t); + context.log(logger, LogLevel.ERROR, "Unhandled throwable, taking down system.", t); System.exit(234); } }); - this.loopThread.setName("tick-" + hostname); + this.loopThread.setName("tick-" + context.hostname()); } @Override @@ -151,7 +149,7 @@ public class NodeAgentImpl implements NodeAgent { synchronized (monitor) { if (wantFrozen != frozen) { wantFrozen = frozen; - logger.debug(wantFrozen ? "Freezing" : "Unfreezing"); + context.log(logger, LogLevel.DEBUG, wantFrozen ? "Freezing" : "Unfreezing"); signalWorkToBeDone(); } @@ -162,7 +160,7 @@ public class NodeAgentImpl implements NodeAgent { @Override public Map<String, Object> debugInfo() { Map<String, Object> debug = new LinkedHashMap<>(); - debug.put("hostname", hostname); + debug.put("hostname", context.hostname()); debug.put("isFrozen", isFrozen); debug.put("wantFrozen", wantFrozen); debug.put("terminated", terminated); @@ -173,20 +171,20 @@ public class NodeAgentImpl implements NodeAgent { @Override public void start() { - logger.info("Starting with interval " + timeBetweenEachConverge.toMillis() + " ms"); + context.log(logger, "Starting with interval " + timeBetweenEachConverge.toMillis() + " ms"); loopThread.start(); serviceRestarter = service -> { try { ProcessResult processResult = dockerOperations.executeCommandInContainerAsRoot( - containerName, "service", service, "restart"); + context.containerName(), "service", service, "restart"); if (!processResult.isSuccess()) { - logger.error("Failed to restart service " + service + ": " + processResult); + context.log(logger, LogLevel.ERROR, "Failed to restart service " + service + ": " + processResult); } } catch (Exception e) { - logger.error("Failed to restart service " + service, e); + context.log(logger, LogLevel.ERROR, "Failed to restart service " + service, e); } }; } @@ -204,11 +202,12 @@ public class NodeAgentImpl implements NodeAgent { loopThread.join(); filebeatRestarter.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); } catch (InterruptedException e) { - logger.error("Interrupted while waiting for converge thread and filebeatRestarter scheduler to shutdown"); + context.log(logger, LogLevel.ERROR, + "Interrupted while waiting for converge thread and filebeatRestarter scheduler to shutdown"); } } while (loopThread.isAlive() || !filebeatRestarter.isTerminated()); - logger.info("Stopped"); + context.log(logger, "Stopped"); } /** @@ -219,8 +218,8 @@ public class NodeAgentImpl implements NodeAgent { void startServicesIfNeeded() { if (!hasStartedServices) { - logger.info("Starting services"); - dockerOperations.startServices(containerName); + context.log(logger, "Starting services"); + dockerOperations.startServices(context.containerName()); hasStartedServices = true; } } @@ -228,14 +227,14 @@ public class NodeAgentImpl implements NodeAgent { void resumeNodeIfNeeded(NodeSpec node) { if (!hasResumedNode) { if (!currentFilebeatRestarter.isPresent()) { - storageMaintainer.writeMetricsConfig(containerName, node); - storageMaintainer.writeFilebeatConfig(containerName, node); + storageMaintainer.writeMetricsConfig(context.containerName(), node); + storageMaintainer.writeFilebeatConfig(context.containerName(), node); currentFilebeatRestarter = Optional.of(filebeatRestarter.scheduleWithFixedDelay( () -> serviceRestarter.accept("filebeat"), 1, 1, TimeUnit.DAYS)); } - logger.debug("Starting optional node program resume command"); - dockerOperations.resumeNode(containerName); + context.log(logger, LogLevel.DEBUG, "Starting optional node program resume command"); + dockerOperations.resumeNode(context.containerName()); hasResumedNode = true; } } @@ -258,21 +257,21 @@ public class NodeAgentImpl implements NodeAgent { private void publishStateToNodeRepoIfChanged(NodeAttributes currentAttributes, NodeAttributes wantedAttributes) { if (!currentAttributes.equals(wantedAttributes)) { - logger.info("Publishing new set of attributes to node repo: " - + currentAttributes + " -> " + wantedAttributes); - nodeRepository.updateNodeAttributes(hostname, wantedAttributes); + context.log(logger, "Publishing new set of attributes to node repo: %s -> %s", + currentAttributes, wantedAttributes); + nodeRepository.updateNodeAttributes(context.hostname().value(), wantedAttributes); } } private void startContainer(NodeSpec node) { ContainerData containerData = createContainerData(environment, node); - dockerOperations.createContainer(containerName, node, containerData); - dockerOperations.startContainer(containerName); + dockerOperations.createContainer(context.containerName(), node, containerData); + dockerOperations.startContainer(context.containerName()); lastCpuMetric = new CpuUsageReporter(); hasStartedServices = true; // Automatically started with the container hasResumedNode = false; - logger.info("Container successfully started, new containerState is " + containerState); + context.log(logger, "Container successfully started, new containerState is " + containerState); } private Optional<Container> removeContainerIfNeededUpdateContainerState(NodeSpec node, Optional<Container> existingContainer) { @@ -280,7 +279,7 @@ public class NodeAgentImpl implements NodeAgent { .flatMap(container -> removeContainerIfNeeded(node, container)) .map(container -> { shouldRestartServices(node).ifPresent(restartReason -> { - logger.info("Will restart services: " + restartReason); + context.log(logger, "Will restart services: " + restartReason); restartServices(node, container); }); return container; @@ -300,21 +299,20 @@ public class NodeAgentImpl implements NodeAgent { private void restartServices(NodeSpec node, Container existingContainer) { if (existingContainer.state.isRunning() && node.getState() == Node.State.active) { - ContainerName containerName = existingContainer.name; - logger.info("Restarting services"); + context.log(logger, "Restarting services"); // Since we are restarting the services we need to suspend the node. orchestratorSuspendNode(); - dockerOperations.restartVespa(containerName); + dockerOperations.restartVespa(context.containerName()); } } @Override public void stopServices() { - logger.info("Stopping services"); + context.log(logger, "Stopping services"); if (containerState == ABSENT) return; try { hasStartedServices = hasResumedNode = false; - dockerOperations.stopServices(containerName); + dockerOperations.stopServices(context.containerName()); } catch (ContainerNotFoundException e) { containerState = ABSENT; } @@ -322,17 +320,17 @@ public class NodeAgentImpl implements NodeAgent { @Override public void suspend() { - logger.info("Suspending services on node"); + context.log(logger, "Suspending services on node"); if (containerState == ABSENT) return; try { hasResumedNode = false; - dockerOperations.suspendNode(containerName); + dockerOperations.suspendNode(context.containerName()); } catch (ContainerNotFoundException e) { containerState = ABSENT; } catch (RuntimeException e) { // It's bad to continue as-if nothing happened, but on the other hand if we do not proceed to // remove container, we will not be able to upgrade to fix any problems in the suspend logic! - logger.warning("Failed trying to suspend container " + containerName.asString(), e); + context.log(logger, LogLevel.WARNING, "Failed trying to suspend container", e); } } @@ -363,7 +361,7 @@ public class NodeAgentImpl implements NodeAgent { private Optional<Container> removeContainerIfNeeded(NodeSpec node, Container existingContainer) { Optional<String> removeReason = shouldRemoveContainer(node, existingContainer); if (removeReason.isPresent()) { - logger.info("Will remove container: " + removeReason.get()); + context.log(logger, "Will remove container: " + removeReason.get()); if (existingContainer.state.isRunning()) { if (node.getState() == Node.State.active) { @@ -376,13 +374,13 @@ public class NodeAgentImpl implements NodeAgent { } stopServices(); } catch (Exception e) { - logger.info("Failed stopping services, ignoring", e); + context.log(logger, LogLevel.WARNING, "Failed stopping services, ignoring", e); } } stopFilebeatSchedulerIfNeeded(); dockerOperations.removeContainer(existingContainer); containerState = ABSENT; - logger.info("Container successfully removed, new containerState is " + containerState); + context.log(logger, "Container successfully removed, new containerState is " + containerState); return Optional.empty(); } return Optional.of(existingContainer); @@ -403,7 +401,7 @@ public class NodeAgentImpl implements NodeAgent { synchronized (monitor) { if (!workToDoNow) { workToDoNow = true; - logger.debug("Signaling work to be done"); + context.log(logger, LogLevel.DEBUG, "Signaling work to be done"); monitor.notifyAll(); } } @@ -420,7 +418,7 @@ public class NodeAgentImpl implements NodeAgent { try { monitor.wait(remainder); } catch (InterruptedException e) { - logger.error("Interrupted while sleeping before tick, ignoring"); + context.log(logger, LogLevel.ERROR, "Interrupted while sleeping before tick, ignoring"); } } else break; } @@ -429,7 +427,7 @@ public class NodeAgentImpl implements NodeAgent { if (isFrozen != wantFrozen) { isFrozen = wantFrozen; - logger.info("Updated NodeAgent's frozen state, new value: isFrozen: " + isFrozen); + context.log(logger, "Updated NodeAgent's frozen state, new value: isFrozen: " + isFrozen); } isFrozenCopy = isFrozen; } @@ -438,22 +436,22 @@ public class NodeAgentImpl implements NodeAgent { boolean converged = false; if (isFrozenCopy) { - logger.debug("tick: isFrozen"); + context.log(logger, LogLevel.DEBUG, "tick: isFrozen"); } else { try { converge(); converged = true; } catch (OrchestratorException e) { - logger.info(e.getMessage()); + context.log(logger, e.getMessage()); } catch (ContainerNotFoundException e) { containerState = ABSENT; - logger.warning("Container unexpectedly gone, resetting containerState to " + containerState); + context.log(logger, LogLevel.WARNING, "Container unexpectedly gone, resetting containerState to " + containerState); } catch (DockerException e) { numberOfUnhandledException++; - logger.error("Caught a DockerException", e); + context.log(logger, LogLevel.ERROR, "Caught a DockerException", e); } catch (Exception e) { numberOfUnhandledException++; - logger.error("Unhandled exception, ignoring.", e); + context.log(logger, LogLevel.ERROR, "Unhandled exception, ignoring.", e); } } @@ -462,13 +460,13 @@ public class NodeAgentImpl implements NodeAgent { // Public for testing void converge() { - final Optional<NodeSpec> optionalNode = nodeRepository.getOptionalNode(hostname); + final Optional<NodeSpec> optionalNode = nodeRepository.getOptionalNode(context.hostname().value()); // We just removed the node from node repo, so this is expected until NodeAdmin stop this NodeAgent if (!optionalNode.isPresent() && expectNodeNotInNodeRepo) return; final NodeSpec node = optionalNode.orElseThrow(() -> - new IllegalStateException(String.format("Node '%s' missing from node repository.", hostname))); + new IllegalStateException(String.format("Node '%s' missing from node repository", context.hostname()))); expectNodeNotInNodeRepo = false; @@ -477,10 +475,10 @@ public class NodeAgentImpl implements NodeAgent { // Every time the node spec changes, we should clear the metrics for this container as the dimensions // will change and we will be reporting duplicate metrics. if (container.map(c -> c.state.isRunning()).orElse(false)) { - storageMaintainer.writeMetricsConfig(containerName, node); + storageMaintainer.writeMetricsConfig(context.containerName(), node); } - logger.debug("Loading new node spec: " + node.toString()); + context.log(logger, LogLevel.DEBUG, "Loading new node spec: " + node.toString()); lastNode = node; } @@ -493,16 +491,16 @@ public class NodeAgentImpl implements NodeAgent { updateNodeRepoWithCurrentAttributes(node); break; case active: - storageMaintainer.handleCoreDumpsForContainer(containerName, node); + storageMaintainer.handleCoreDumpsForContainer(context.containerName(), node); - storageMaintainer.getDiskUsageFor(containerName) + storageMaintainer.getDiskUsageFor(context.containerName()) .map(diskUsage -> (double) diskUsage / BYTES_IN_GB / node.getMinDiskAvailableGb()) .filter(diskUtil -> diskUtil >= 0.8) - .ifPresent(diskUtil -> storageMaintainer.removeOldFilesFromNode(containerName)); + .ifPresent(diskUtil -> storageMaintainer.removeOldFilesFromNode(context.containerName())); scheduleDownLoadIfNeeded(node); if (isDownloadingImage()) { - logger.debug("Waiting for image to download " + imageBeingDownloaded.asString()); + context.log(logger, LogLevel.DEBUG, "Waiting for image to download " + imageBeingDownloaded.asString()); return; } container = removeContainerIfNeededUpdateContainerState(node, container); @@ -532,23 +530,23 @@ public class NodeAgentImpl implements NodeAgent { // - Slobrok and internal orchestrator state is used to determine whether // to allow upgrade (suspend). updateNodeRepoWithCurrentAttributes(node); - logger.info("Call resume against Orchestrator"); - orchestrator.resume(hostname); + context.log(logger, "Call resume against Orchestrator"); + orchestrator.resume(context.hostname().value()); break; case inactive: removeContainerIfNeededUpdateContainerState(node, container); updateNodeRepoWithCurrentAttributes(node); break; case provisioned: - nodeRepository.setNodeState(hostname, Node.State.dirty); + nodeRepository.setNodeState(context.hostname().value(), Node.State.dirty); break; case dirty: removeContainerIfNeededUpdateContainerState(node, container); - logger.info("State is " + node.getState() + ", will delete application storage and mark node as ready"); + context.log(logger, "State is " + node.getState() + ", will delete application storage and mark node as ready"); athenzCredentialsMaintainer.clearCredentials(); - storageMaintainer.cleanupNodeStorage(containerName, node); + storageMaintainer.cleanupNodeStorage(context.containerName(), node); updateNodeRepoWithCurrentAttributes(node); - nodeRepository.setNodeState(hostname, Node.State.ready); + nodeRepository.setNodeState(context.hostname().value(), Node.State.ready); expectNodeNotInNodeRepo = true; break; default: @@ -601,11 +599,11 @@ public class NodeAgentImpl implements NodeAgent { final NodeSpec node = lastNode; if (node == null || containerState != UNKNOWN) return; - Optional<ContainerStats> containerStats = dockerOperations.getContainerStats(containerName); + Optional<ContainerStats> containerStats = dockerOperations.getContainerStats(context.containerName()); if (!containerStats.isPresent()) return; Dimensions.Builder dimensionsBuilder = new Dimensions.Builder() - .add("host", hostname) + .add("host", context.hostname().value()) .add("role", "tenants") .add("state", node.getState().toString()) .add("parentHostname", environment.getParentHostHostname()); @@ -623,7 +621,7 @@ public class NodeAgentImpl implements NodeAgent { final long memoryTotalBytesUsage = ((Number) stats.getMemoryStats().get("usage")).longValue(); final long memoryTotalBytesCache = ((Number) ((Map) stats.getMemoryStats().get("stats")).get("cache")).longValue(); final long diskTotalBytes = (long) (node.getMinDiskAvailableGb() * BYTES_IN_GB); - final Optional<Long> diskTotalBytesUsed = storageMaintainer.getDiskUsageFor(containerName); + final Optional<Long> diskTotalBytesUsed = storageMaintainer.getDiskUsageFor(context.containerName()); lastCpuMetric.updateCpuDeltas(cpuSystemTotalTime, cpuContainerTotalTime, cpuContainerKernelTime); @@ -676,15 +674,15 @@ public class NodeAgentImpl implements NodeAgent { // Push metrics to the metrics proxy in each container - give it maximum 1 seconds to complete String[] command = {"vespa-rpc-invoke", "-t", "2", "tcp/localhost:19091", "setExtraMetrics", wrappedMetrics}; - dockerOperations.executeCommandInContainerAsRoot(containerName, 5L, command); + dockerOperations.executeCommandInContainerAsRoot(context.containerName(), 5L, command); } catch (DockerExecTimeoutException | JsonProcessingException e) { - logger.warning("Failed to push metrics to container", e); + context.log(logger, LogLevel.WARNING, "Failed to push metrics to container", e); } } private Optional<Container> getContainer() { if (containerState == ABSENT) return Optional.empty(); - Optional<Container> container = dockerOperations.getContainer(containerName); + Optional<Container> container = dockerOperations.getContainer(context.containerName()); if (! container.isPresent()) containerState = ABSENT; return container; } @@ -747,8 +745,8 @@ public class NodeAgentImpl implements NodeAgent { // to allow the node admin to make decisions that depend on the docker image. Or, each docker image // needs to contain routines for drain and suspend. For many images, these can just be dummy routines. private void orchestratorSuspendNode() { - logger.info("Ask Orchestrator for permission to suspend node " + hostname); - orchestrator.suspend(hostname); + context.log(logger, "Ask Orchestrator for permission to suspend node"); + orchestrator.suspend(context.hostname().value()); } protected ContainerData createContainerData(Environment environment, NodeSpec node) { diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java index 15bb2825738..da119b756b8 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java @@ -18,12 +18,15 @@ import com.yahoo.vespa.hosted.node.admin.maintenance.identity.AthenzCredentialsM import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl; import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdaterImpl; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImplTest; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl; import com.yahoo.vespa.hosted.node.admin.component.Environment; import com.yahoo.vespa.hosted.node.admin.component.PathResolver; import com.yahoo.vespa.hosted.node.admin.task.util.network.IPAddressesMock; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.test.file.TestFileSystem; +import java.nio.file.FileSystem; import java.nio.file.Path; import java.nio.file.Paths; import java.time.Clock; @@ -53,6 +56,7 @@ public class DockerTester implements AutoCloseable { final NodeAdminStateUpdaterImpl nodeAdminStateUpdater; final NodeAdminImpl nodeAdmin; private final OrchestratorMock orchestratorMock = new OrchestratorMock(callOrderVerifier); + private final FileSystem fileSystem = TestFileSystem.create(); DockerTester() { @@ -93,7 +97,8 @@ public class DockerTester implements AutoCloseable { MetricReceiverWrapper mr = new MetricReceiverWrapper(MetricReceiver.nullImplementation); - Function<String, NodeAgent> nodeAgentFactory = (hostName) -> new NodeAgentImpl(hostName, nodeRepositoryMock, + Function<String, NodeAgent> nodeAgentFactory = (hostName) -> new NodeAgentImpl( + NodeAgentContextImplTest.nodeAgentFromHostname(fileSystem, hostName), nodeRepositoryMock, orchestratorMock, dockerOperations, storageMaintainer, aclMaintainer, environment, clock, NODE_AGENT_SCAN_INTERVAL, athenzCredentialsMaintainer); nodeAdmin = new NodeAdminImpl(dockerOperations, nodeAgentFactory, storageMaintainer, aclMaintainer, mr, Clock.systemUTC()); nodeAdminStateUpdater = new NodeAdminStateUpdaterImpl(nodeRepositoryMock, orchestratorMock, storageMaintainer, diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index 62f69aa12cf..24fcb363f9b 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -69,7 +69,7 @@ public class NodeAgentImplTest { private static final String vespaVersion = "1.2.3"; private final String hostName = "host1.test.yahoo.com"; - private final ContainerName containerName = new ContainerName("host1"); + private final NodeAgentContext context = NodeAgentContextImplTest.nodeAgentFromHostname(hostName); private final DockerImage dockerImage = new DockerImage("dockerImage"); private final DockerOperations dockerOperations = mock(DockerOperations.class); private final NodeRepository nodeRepository = mock(NodeRepository.class); @@ -95,7 +95,7 @@ public class NodeAgentImplTest { .build(); private final NodeSpec.Builder nodeBuilder = new NodeSpec.Builder() - .hostname(hostName) + .hostname(context.hostname().value()) .nodeType(NodeType.tenant) .flavor("docker") .minCpuCores(MIN_CPU_CORES) @@ -120,19 +120,19 @@ public class NodeAgentImplTest { NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node)); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(187500000000L)); + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(187500000000L)); nodeAgent.converge(); verify(dockerOperations, never()).removeContainer(any()); verify(orchestrator, never()).suspend(any(String.class)); verify(dockerOperations, never()).pullImageAsyncIfNeeded(any()); - verify(storageMaintainer, never()).removeOldFilesFromNode(eq(containerName)); + verify(storageMaintainer, never()).removeOldFilesFromNode(eq(context.containerName())); final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository); // TODO: Verify this isn't run unless 1st time - inOrder.verify(dockerOperations, never()).startServices(eq(containerName)); - inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); + inOrder.verify(dockerOperations, never()).startServices(eq(context.containerName())); + inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context.containerName())); inOrder.verify(orchestrator).resume(hostName); } @@ -153,11 +153,11 @@ public class NodeAgentImplTest { NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node)); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(217432719360L)); + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(217432719360L)); nodeAgent.converge(); - verify(storageMaintainer, times(1)).removeOldFilesFromNode(eq(containerName)); + verify(storageMaintainer, times(1)).removeOldFilesFromNode(eq(context.containerName())); } @Test @@ -173,27 +173,27 @@ public class NodeAgentImplTest { NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node)); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(187500000000L)); + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(187500000000L)); nodeAgent.converge(); - inOrder.verify(dockerOperations, never()).startServices(eq(containerName)); - inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); + inOrder.verify(dockerOperations, never()).startServices(eq(context.containerName())); + inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context.containerName())); nodeAgent.suspend(); nodeAgent.converge(); - inOrder.verify(dockerOperations, never()).startServices(eq(containerName)); - inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); // Expect a resume, but no start services + inOrder.verify(dockerOperations, never()).startServices(eq(context.containerName())); + inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context.containerName())); // Expect a resume, but no start services // No new suspends/stops, so no need to resume/start nodeAgent.converge(); - inOrder.verify(dockerOperations, never()).startServices(eq(containerName)); - inOrder.verify(dockerOperations, never()).resumeNode(eq(containerName)); + inOrder.verify(dockerOperations, never()).startServices(eq(context.containerName())); + inOrder.verify(dockerOperations, never()).resumeNode(eq(context.containerName())); nodeAgent.suspend(); nodeAgent.stopServices(); nodeAgent.converge(); - inOrder.verify(dockerOperations, times(1)).startServices(eq(containerName)); - inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); + inOrder.verify(dockerOperations, times(1)).startServices(eq(context.containerName())); + inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context.containerName())); } @Test @@ -215,7 +215,7 @@ public class NodeAgentImplTest { when(pathResolver.getApplicationStoragePathForNodeAdmin()).thenReturn(Files.createTempDirectory("foo")); when(pathResolver.getApplicationStoragePathForHost()).thenReturn(Files.createTempDirectory("bar")); when(dockerOperations.pullImageAsyncIfNeeded(eq(dockerImage))).thenReturn(false); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(201326592000L)); nodeAgent.converge(); @@ -225,10 +225,10 @@ public class NodeAgentImplTest { final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository, aclMaintainer); inOrder.verify(dockerOperations, times(1)).pullImageAsyncIfNeeded(eq(dockerImage)); - inOrder.verify(dockerOperations, times(1)).createContainer(eq(containerName), eq(node), any()); - inOrder.verify(dockerOperations, times(1)).startContainer(eq(containerName)); + inOrder.verify(dockerOperations, times(1)).createContainer(eq(context.containerName()), eq(node), any()); + inOrder.verify(dockerOperations, times(1)).startContainer(eq(context.containerName())); inOrder.verify(aclMaintainer, times(1)).run(); - inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); + inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context.containerName())); inOrder.verify(nodeRepository).updateNodeAttributes( hostName, new NodeAttributes() .withRestartGeneration(restartGeneration) @@ -256,7 +256,7 @@ public class NodeAgentImplTest { when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node)); when(dockerOperations.pullImageAsyncIfNeeded(any())).thenReturn(true); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(201326592000L)); nodeAgent.converge(); @@ -291,7 +291,7 @@ public class NodeAgentImplTest { .thenReturn(Optional.of(secondSpec)) .thenReturn(Optional.of(thirdSpec)); when(dockerOperations.pullImageAsyncIfNeeded(any())).thenReturn(true); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(201326592000L)); when(pathResolver.getApplicationStoragePathForHost()).thenReturn(Files.createTempDirectory("bar")); nodeAgent.converge(); @@ -303,8 +303,8 @@ public class NodeAgentImplTest { inOrder.verify(orchestrator).resume(any(String.class)); inOrder.verify(orchestrator).suspend(any(String.class)); inOrder.verify(dockerOperations).removeContainer(any()); - inOrder.verify(dockerOperations, times(1)).createContainer(eq(containerName), eq(thirdSpec), any()); - inOrder.verify(dockerOperations).startContainer(eq(containerName)); + inOrder.verify(dockerOperations, times(1)).createContainer(eq(context.containerName()), eq(thirdSpec), any()); + inOrder.verify(dockerOperations).startContainer(eq(context.containerName())); inOrder.verify(orchestrator).resume(any(String.class)); } @@ -329,8 +329,8 @@ public class NodeAgentImplTest { fail("Expected to throw an exception"); } catch (Exception ignored) { } - verify(dockerOperations, never()).createContainer(eq(containerName), eq(node), any()); - verify(dockerOperations, never()).startContainer(eq(containerName)); + verify(dockerOperations, never()).createContainer(eq(context.containerName()), eq(node), any()); + verify(dockerOperations, never()).startContainer(eq(context.containerName())); verify(orchestrator, never()).resume(any(String.class)); verify(nodeRepository, never()).updateNodeAttributes(any(String.class), any(NodeAttributes.class)); } @@ -379,10 +379,10 @@ public class NodeAgentImplTest { nodeAgent.converge(); // Should only be called once, when we initialize - verify(dockerOperations, times(1)).getContainer(eq(containerName)); + verify(dockerOperations, times(1)).getContainer(eq(context.containerName())); verify(dockerOperations, never()).removeContainer(any()); - verify(dockerOperations, never()).createContainer(eq(containerName), eq(node), any()); - verify(dockerOperations, never()).startContainer(eq(containerName)); + verify(dockerOperations, never()).createContainer(eq(context.containerName()), eq(node), any()); + verify(dockerOperations, never()).startContainer(eq(context.containerName())); verify(orchestrator, never()).resume(any(String.class)); verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any()); } @@ -458,13 +458,13 @@ public class NodeAgentImplTest { final InOrder inOrder = inOrder(storageMaintainer, dockerOperations, nodeRepository); inOrder.verify(dockerOperations, times(1)).removeContainer(any()); - inOrder.verify(storageMaintainer, times(1)).cleanupNodeStorage(eq(containerName), eq(node)); + inOrder.verify(storageMaintainer, times(1)).cleanupNodeStorage(eq(context.containerName()), eq(node)); inOrder.verify(nodeRepository, times(1)).setNodeState(eq(hostName), eq(Node.State.ready)); - verify(dockerOperations, never()).createContainer(eq(containerName), any(), any()); - verify(dockerOperations, never()).startContainer(eq(containerName)); - verify(dockerOperations, never()).suspendNode(eq(containerName)); - verify(dockerOperations, times(1)).stopServices(eq(containerName)); + verify(dockerOperations, never()).createContainer(eq(context.containerName()), any(), any()); + verify(dockerOperations, never()).startContainer(eq(context.containerName())); + verify(dockerOperations, never()).suspendNode(eq(context.containerName())); + verify(dockerOperations, times(1)).stopServices(eq(context.containerName())); verify(orchestrator, never()).resume(any(String.class)); verify(orchestrator, never()).suspend(any(String.class)); // current Docker image and vespa version should be cleared @@ -513,13 +513,13 @@ public class NodeAgentImplTest { when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node)); when(pathResolver.getApplicationStoragePathForNodeAdmin()).thenReturn(Files.createTempDirectory("foo")); when(pathResolver.getApplicationStoragePathForHost()).thenReturn(Files.createTempDirectory("bar")); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(201326592000L)); nodeAgent.tick(); verify(dockerOperations, times(1)).removeContainer(any()); - verify(dockerOperations, times(1)).createContainer(eq(containerName), eq(node), any()); - verify(dockerOperations, times(1)).startContainer(eq(containerName)); + verify(dockerOperations, times(1)).createContainer(eq(context.containerName()), eq(node), any()); + verify(dockerOperations, times(1)).startContainer(eq(context.containerName())); } @Test @@ -537,12 +537,12 @@ public class NodeAgentImplTest { NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node)); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(201326592000L)); final InOrder inOrder = inOrder(orchestrator, dockerOperations, nodeRepository); doThrow(new RuntimeException("Failed 1st time")) .doNothing() - .when(dockerOperations).resumeNode(eq(containerName)); + .when(dockerOperations).resumeNode(eq(context.containerName())); // 1st try try { @@ -607,8 +607,8 @@ public class NodeAgentImplTest { when(pathResolver.getApplicationStoragePathForNodeAdmin()).thenReturn(Files.createTempDirectory("foo")); when(pathResolver.getApplicationStoragePathForHost()).thenReturn(Files.createTempDirectory("bar")); when(dockerOperations.pullImageAsyncIfNeeded(eq(dockerImage))).thenReturn(false); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); - doThrow(new DockerException("Failed to set up network")).doNothing().when(dockerOperations).startContainer(eq(containerName)); + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(201326592000L)); + doThrow(new DockerException("Failed to set up network")).doNothing().when(dockerOperations).startContainer(eq(context.containerName())); try { nodeAgent.converge(); @@ -616,8 +616,8 @@ public class NodeAgentImplTest { } catch (DockerException ignored) { } verify(dockerOperations, never()).removeContainer(any()); - verify(dockerOperations, times(1)).createContainer(eq(containerName), eq(node), any()); - verify(dockerOperations, times(1)).startContainer(eq(containerName)); + verify(dockerOperations, times(1)).createContainer(eq(context.containerName()), eq(node), any()); + verify(dockerOperations, times(1)).startContainer(eq(context.containerName())); verify(nodeAgent, never()).resumeNodeIfNeeded(any()); // The docker container was actually started and is running, but subsequent exec calls to set up @@ -626,8 +626,8 @@ public class NodeAgentImplTest { nodeAgent.converge(); verify(dockerOperations, times(1)).removeContainer(any()); - verify(dockerOperations, times(2)).createContainer(eq(containerName), eq(node), any()); - verify(dockerOperations, times(2)).startContainer(eq(containerName)); + verify(dockerOperations, times(2)).createContainer(eq(context.containerName()), eq(node), any()); + verify(dockerOperations, times(2)).startContainer(eq(context.containerName())); verify(nodeAgent, times(1)).resumeNodeIfNeeded(any()); } @@ -663,8 +663,8 @@ public class NodeAgentImplTest { NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node)); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(39625000000L)); - when(dockerOperations.getContainerStats(eq(containerName))) + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(39625000000L)); + when(dockerOperations.getContainerStats(eq(context.containerName()))) .thenReturn(Optional.of(stats1)) .thenReturn(Optional.of(stats2)); when(pathResolver.getApplicationStoragePathForHost()).thenReturn(Files.createTempDirectory("bar")); @@ -687,7 +687,7 @@ public class NodeAgentImplTest { System.arraycopy(invocation.getArguments(), 2, calledCommand, 0, calledCommand.length); calledCommand[calledCommand.length - 1] = calledCommand[calledCommand.length - 1].replaceAll("\"timestamp\":\\d+", "\"timestamp\":0"); - assertEquals(containerName, calledContainerName); + assertEquals(context.containerName(), calledContainerName); assertEquals(5L, calledTimeout); assertArrayEquals(expectedCommand, calledCommand); return null; @@ -705,7 +705,7 @@ public class NodeAgentImplTest { NodeAgentImpl nodeAgent = makeNodeAgent(null, false); when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node)); - when(dockerOperations.getContainerStats(eq(containerName))).thenReturn(Optional.empty()); + when(dockerOperations.getContainerStats(eq(context.containerName()))).thenReturn(Optional.empty()); nodeAgent.converge(); // Run the converge loop once to initialize lastNode @@ -731,7 +731,7 @@ public class NodeAgentImplTest { Path tempDirectory = Files.createTempDirectory("foo"); when(pathResolver.getApplicationStoragePathForHost()).thenReturn(tempDirectory); when(dockerOperations.pullImageAsyncIfNeeded(eq(dockerImage))).thenReturn(false); - when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); + when(storageMaintainer.getDiskUsageFor(eq(context.containerName()))).thenReturn(Optional.of(201326592000L)); nodeAgent.converge(); @@ -740,10 +740,10 @@ public class NodeAgentImplTest { final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository, aclMaintainer); inOrder.verify(dockerOperations, times(1)).pullImageAsyncIfNeeded(eq(dockerImage)); - inOrder.verify(dockerOperations, times(1)).createContainer(eq(containerName), eq(node), any()); - inOrder.verify(dockerOperations, times(1)).startContainer(eq(containerName)); + inOrder.verify(dockerOperations, times(1)).createContainer(eq(context.containerName()), eq(node), any()); + inOrder.verify(dockerOperations, times(1)).startContainer(eq(context.containerName())); inOrder.verify(aclMaintainer, times(1)).run(); - inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); + inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context.containerName())); inOrder.verify(nodeRepository).updateNodeAttributes( hostName, new NodeAttributes() .withRebootGeneration(rebootGeneration) @@ -758,7 +758,7 @@ public class NodeAgentImplTest { doNothing().when(storageMaintainer).writeFilebeatConfig(any(), any()); doNothing().when(storageMaintainer).writeMetricsConfig(any(), any()); - return new NodeAgentImpl(hostName, nodeRepository, orchestrator, dockerOperations, + return new NodeAgentImpl(context, nodeRepository, orchestrator, dockerOperations, storageMaintainer, aclMaintainer, environment, clock, NODE_AGENT_SCAN_INTERVAL, athenzCredentialsMaintainer); } @@ -768,11 +768,11 @@ public class NodeAgentImplTest { hostName, dockerImage, ContainerResources.from(MIN_CPU_CORES, MIN_MAIN_MEMORY_AVAILABLE_GB), - containerName, + context.containerName(), isRunning ? Container.State.RUNNING : Container.State.EXITED, isRunning ? 1 : 0)) : Optional.empty(); - when(dockerOperations.getContainer(eq(containerName))).thenReturn(container); + when(dockerOperations.getContainer(eq(context.containerName()))).thenReturn(container); } } |