aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
diff options
context:
space:
mode:
Diffstat (limited to 'node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java')
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java889
1 files changed, 889 insertions, 0 deletions
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
new file mode 100644
index 00000000000..709326cc3b8
--- /dev/null
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -0,0 +1,889 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.nodeagent;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.DockerImage;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.jdisc.test.TestTimer;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
+import com.yahoo.vespa.flags.PermanentFlags;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeAttributes;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeRepository;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeState;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.OrchestratorStatus;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.reports.DropDocumentsReport;
+import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator;
+import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException;
+import com.yahoo.vespa.hosted.node.admin.container.Container;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerId;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerName;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerResources;
+import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentials;
+import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer;
+import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer;
+import com.yahoo.vespa.hosted.node.admin.maintenance.identity.CredentialsMaintainer;
+import com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.VespaServiceDumper;
+import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
+import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath;
+import com.yahoo.vespa.test.file.TestFileSystem;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.InOrder;
+
+import java.nio.file.FileSystem;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.function.BiFunction;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.inOrder;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+/**
+ * @author Øyvind Bakksjø
+ */
+public class NodeAgentImplTest {
+ private static final NodeResources resources = new NodeResources(2, 16, 250, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.local);
+ private static final Version vespaVersion = Version.fromString("1.2.3");
+ private static final ContainerId containerId = new ContainerId("af23");
+ private static final String hostName = "host1.test.yahoo.com";
+
+ private final NodeAgentContextSupplier contextSupplier = mock(NodeAgentContextSupplier.class);
+ private final DockerImage dockerImage = DockerImage.fromString("registry.example.com/repo/image");
+ private final ContainerOperations containerOperations = mock(ContainerOperations.class);
+ private final NodeRepository nodeRepository = mock(NodeRepository.class);
+ private final Orchestrator orchestrator = mock(Orchestrator.class);
+ private final StorageMaintainer storageMaintainer = mock(StorageMaintainer.class);
+ private final AclMaintainer aclMaintainer = mock(AclMaintainer.class);
+ private final HealthChecker healthChecker = mock(HealthChecker.class);
+ private final CredentialsMaintainer credentialsMaintainer = mock(CredentialsMaintainer.class);
+ private final InMemoryFlagSource flagSource = new InMemoryFlagSource();
+ private final TestTimer timer = new TestTimer(Instant.now());
+ private final FileSystem fileSystem = TestFileSystem.create();
+
+ @BeforeEach
+ public void setUp() {
+ when(containerOperations.suspendNode(any())).thenReturn("");
+ when(containerOperations.resumeNode(any())).thenReturn("");
+ when(containerOperations.restartVespa(any())).thenReturn("");
+ when(containerOperations.startServices(any())).thenReturn("");
+ when(containerOperations.stopServices(any())).thenReturn("");
+ }
+
+ @Test
+ void upToDateContainerIsUntouched() {
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .orchestratorStatus(OrchestratorStatus.NO_REMARKS)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+
+ nodeAgent.doConverge(context);
+
+ verify(containerOperations, never()).removeContainer(eq(context), any());
+ verify(orchestrator, never()).suspend(any(String.class));
+ verify(containerOperations, never()).pullImageAsyncIfNeeded(any(), any(), any());
+
+ final InOrder inOrder = inOrder(containerOperations, orchestrator, nodeRepository);
+ // TODO: Verify this isn't run unless 1st time
+ inOrder.verify(containerOperations, never()).startServices(eq(context));
+ inOrder.verify(containerOperations, times(1)).resumeNode(eq(context));
+ inOrder.verify(orchestrator, never()).resume(hostName);
+ }
+
+ @Test
+ void verifyRemoveOldFilesIfDiskFull() {
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+
+ nodeAgent.doConverge(context);
+
+ verify(storageMaintainer, times(1)).cleanDiskIfFull(eq(context));
+ }
+
+ @Test
+ void startsAfterStoppingServices() {
+ final InOrder inOrder = inOrder(containerOperations);
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+
+ nodeAgent.doConverge(context);
+ inOrder.verify(containerOperations, never()).startServices(eq(context));
+ inOrder.verify(containerOperations, times(1)).resumeNode(eq(context));
+
+ nodeAgent.stopForHostSuspension(context);
+ nodeAgent.doConverge(context);
+ inOrder.verify(containerOperations, never()).startServices(eq(context));
+ inOrder.verify(containerOperations, times(1)).resumeNode(eq(context)); // Expect a resume, but no start services
+
+ // No new suspends/stops, so no need to resume/start
+ nodeAgent.doConverge(context);
+ inOrder.verify(containerOperations, never()).startServices(eq(context));
+ inOrder.verify(containerOperations, never()).resumeNode(eq(context));
+
+ nodeAgent.stopForHostSuspension(context);
+ nodeAgent.doConverge(context);
+ inOrder.verify(containerOperations, times(1)).createContainer(eq(context), any());
+ inOrder.verify(containerOperations, times(1)).startContainer(eq(context));
+ inOrder.verify(containerOperations, times(0)).startServices(eq(context)); // done as part of startContainer
+ inOrder.verify(containerOperations, times(1)).resumeNode(eq(context));
+ }
+
+ @Test
+ void absentContainerCausesStart() {
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
+
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+ when(containerOperations.pullImageAsyncIfNeeded(any(), eq(dockerImage), any())).thenReturn(false);
+
+ nodeAgent.doConverge(context);
+
+ verify(containerOperations, never()).removeContainer(eq(context), any());
+ verify(containerOperations, never()).startServices(any());
+ verify(orchestrator, never()).suspend(any(String.class));
+
+ final InOrder inOrder = inOrder(containerOperations, orchestrator, nodeRepository, aclMaintainer, healthChecker);
+ inOrder.verify(containerOperations, times(1)).pullImageAsyncIfNeeded(any(), eq(dockerImage), any());
+ inOrder.verify(containerOperations, times(1)).createContainer(eq(context), any());
+ inOrder.verify(containerOperations, times(1)).startContainer(eq(context));
+ inOrder.verify(aclMaintainer, times(1)).converge(eq(context));
+ inOrder.verify(containerOperations, times(1)).resumeNode(eq(context));
+ inOrder.verify(healthChecker, times(1)).verifyHealth(eq(context));
+ inOrder.verify(nodeRepository).updateNodeAttributes(
+ hostName, new NodeAttributes().withDockerImage(dockerImage).withVespaVersion(vespaVersion).withRebootGeneration(0));
+ inOrder.verify(orchestrator, never()).resume(hostName);
+ }
+
+ @Test
+ void containerIsNotStoppedIfNewImageMustBePulled() {
+ final DockerImage newDockerImage = DockerImage.fromString("registry.example.com/repo/new-image");
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .wantedDockerImage(newDockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+ when(containerOperations.pullImageAsyncIfNeeded(any(), any(), any())).thenReturn(true);
+
+ nodeAgent.doConverge(context);
+
+ verify(orchestrator, never()).suspend(any(String.class));
+ verify(orchestrator, never()).resume(any(String.class));
+ verify(containerOperations, never()).removeContainer(eq(context), any());
+
+ final InOrder inOrder = inOrder(containerOperations);
+ inOrder.verify(containerOperations, times(1)).pullImageAsyncIfNeeded(any(), eq(newDockerImage), any());
+ }
+
+ @Test
+ void containerIsUpdatedIfCpuChanged() {
+ NodeSpec.Builder specBuilder = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .orchestratorStatus(OrchestratorStatus.NO_REMARKS);
+
+ NodeAgentContext firstContext = createContext(specBuilder.build());
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ when(containerOperations.pullImageAsyncIfNeeded(any(), any(), any())).thenReturn(true);
+
+ InOrder inOrder = inOrder(orchestrator, containerOperations);
+
+ nodeAgent.doConverge(firstContext);
+ inOrder.verify(orchestrator, never()).resume(any(String.class));
+
+ NodeAgentContext secondContext = createContext(specBuilder.diskGb(200).build());
+ nodeAgent.doConverge(secondContext);
+ inOrder.verify(orchestrator, never()).resume(any(String.class));
+
+ NodeAgentContext thirdContext = NodeAgentContextImpl.builder(specBuilder.vcpu(5).build()).fileSystem(fileSystem).cpuSpeedUp(1.25).build();
+ nodeAgent.doConverge(thirdContext);
+ ContainerResources resourcesAfterThird = ContainerResources.from(0, 4, 16);
+ mockGetContainer(dockerImage, resourcesAfterThird, true);
+
+ inOrder.verify(orchestrator, never()).suspend(any());
+ inOrder.verify(containerOperations).updateContainer(eq(thirdContext), eq(containerId), eq(resourcesAfterThird));
+ inOrder.verify(containerOperations, never()).removeContainer(any(), any());
+ inOrder.verify(containerOperations, never()).startContainer(any());
+ inOrder.verify(orchestrator, never()).resume(any());
+
+ // No changes
+ nodeAgent.converge(thirdContext);
+ inOrder.verify(orchestrator, never()).suspend(any());
+ inOrder.verify(containerOperations, never()).updateContainer(eq(thirdContext), eq(containerId), any());
+ inOrder.verify(containerOperations, never()).removeContainer(any(), any());
+ inOrder.verify(orchestrator, never()).resume(any());
+
+ // Set the feature flag
+ flagSource.withDoubleFlag(PermanentFlags.CONTAINER_CPU_CAP.id(), 2.3);
+
+ nodeAgent.doConverge(thirdContext);
+ inOrder.verify(containerOperations).updateContainer(eq(thirdContext), eq(containerId), eq(ContainerResources.from(9.2, 4, 16)));
+ inOrder.verify(orchestrator, never()).resume(any());
+ }
+
+ @Test
+ void containerIsRecreatedIfMemoryChanged() {
+ NodeSpec.Builder specBuilder = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .wantedRestartGeneration(2).currentRestartGeneration(1);
+
+ NodeAgentContext firstContext = createContext(specBuilder.build());
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ when(containerOperations.pullImageAsyncIfNeeded(any(), any(), any())).thenReturn(true);
+
+ nodeAgent.doConverge(firstContext);
+ NodeAgentContext secondContext = createContext(specBuilder.memoryGb(20).build());
+ nodeAgent.doConverge(secondContext);
+ ContainerResources resourcesAfterThird = ContainerResources.from(0, 2, 20);
+ mockGetContainer(dockerImage, resourcesAfterThird, true);
+
+ InOrder inOrder = inOrder(orchestrator, containerOperations, nodeRepository);
+ inOrder.verify(orchestrator).resume(any(String.class));
+ inOrder.verify(containerOperations).removeContainer(eq(secondContext), any());
+ inOrder.verify(containerOperations, never()).updateContainer(any(), any(), any());
+ inOrder.verify(containerOperations, never()).restartVespa(any());
+ inOrder.verify(nodeRepository).updateNodeAttributes(eq(hostName), eq(new NodeAttributes().withRestartGeneration(2).withRebootGeneration(0)));
+
+ nodeAgent.doConverge(secondContext);
+ inOrder.verify(orchestrator).resume(any(String.class));
+ inOrder.verify(containerOperations, never()).updateContainer(any(), any(), any());
+ inOrder.verify(containerOperations, never()).removeContainer(any(), any());
+ }
+
+ @Test
+ void noRestartIfOrchestratorSuspendFails() {
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .wantedRestartGeneration(2).currentRestartGeneration(1)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ doThrow(new OrchestratorException("Denied")).when(orchestrator).suspend(eq(hostName));
+ try {
+ nodeAgent.doConverge(context);
+ fail("Expected to throw an exception");
+ } catch (OrchestratorException ignored) {
+ }
+
+ verify(containerOperations, never()).createContainer(eq(context), any());
+ verify(containerOperations, never()).startContainer(eq(context));
+ verify(orchestrator, never()).resume(any(String.class));
+ verify(nodeRepository, never()).updateNodeAttributes(any(String.class), any(NodeAttributes.class));
+
+ // Verify aclMaintainer is called even if suspension fails
+ verify(aclMaintainer, times(1)).converge(eq(context));
+ }
+
+ @Test
+ void recreatesContainerIfRebootWanted() {
+ final long wantedRebootGeneration = 2;
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .wantedRebootGeneration(wantedRebootGeneration).currentRebootGeneration(1)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+ when(containerOperations.pullImageAsyncIfNeeded(any(), eq(dockerImage), any())).thenReturn(false);
+ doThrow(ConvergenceException.ofTransient("Connection refused")).doNothing()
+ .when(healthChecker).verifyHealth(eq(context));
+
+ try {
+ nodeAgent.doConverge(context);
+ } catch (ConvergenceException ignored) {
+ }
+
+ // First time we fail to resume because health verification fails
+ verify(orchestrator, times(1)).suspend(eq(hostName));
+ verify(containerOperations, times(1)).removeContainer(eq(context), any());
+ verify(containerOperations, times(1)).createContainer(eq(context), any());
+ verify(containerOperations, times(1)).startContainer(eq(context));
+ verify(orchestrator, never()).resume(eq(hostName));
+ verify(nodeRepository, never()).updateNodeAttributes(any(), any());
+
+ nodeAgent.doConverge(context);
+
+ // Do not reboot the container again
+ verify(containerOperations, times(1)).removeContainer(eq(context), any());
+ verify(containerOperations, times(1)).createContainer(eq(context), any());
+ verify(orchestrator, times(1)).resume(eq(hostName));
+ verify(nodeRepository, times(1)).updateNodeAttributes(eq(hostName), eq(new NodeAttributes()
+ .withRebootGeneration(wantedRebootGeneration)));
+ }
+
+ @Test
+ void failedNodeRunningContainerShouldStillBeRunning() {
+ final NodeSpec node = nodeBuilder(NodeState.failed)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+
+ nodeAgent.doConverge(context);
+
+ verify(containerOperations, never()).removeContainer(eq(context), any());
+ verify(orchestrator, never()).resume(any(String.class));
+ verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any());
+ }
+
+ @Test
+ void readyNodeLeadsToNoAction() {
+ final NodeSpec node = nodeBuilder(NodeState.ready).build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
+
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+
+ nodeAgent.doConverge(context);
+ nodeAgent.doConverge(context);
+ nodeAgent.doConverge(context);
+
+ // Should only be called once, when we initialize
+ verify(containerOperations, times(1)).getContainer(eq(context));
+ verify(containerOperations, never()).removeContainer(eq(context), any());
+ verify(containerOperations, never()).createContainer(eq(context), any());
+ verify(containerOperations, never()).startContainer(eq(context));
+ verify(orchestrator, never()).resume(any(String.class));
+ verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any());
+ }
+
+ @Test
+ void inactiveNodeRunningContainerShouldStillBeRunning() {
+ final NodeSpec node = nodeBuilder(NodeState.inactive)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+
+ nodeAgent.doConverge(context);
+
+ final InOrder inOrder = inOrder(storageMaintainer, containerOperations);
+ inOrder.verify(containerOperations, never()).removeContainer(eq(context), any());
+
+ verify(orchestrator, never()).resume(any(String.class));
+ verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any());
+ }
+
+ @Test
+ void reservedNodeDoesNotUpdateNodeRepoWithVersion() {
+ final NodeSpec node = nodeBuilder(NodeState.reserved)
+ .wantedDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
+
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+
+ nodeAgent.doConverge(context);
+
+ verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any());
+ }
+
+ private void nodeRunningContainerIsTakenDownAndCleanedAndRecycled(NodeState nodeState, Optional<Long> wantedRestartGeneration) {
+ NodeSpec.Builder builder = nodeBuilder(nodeState)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage);
+ wantedRestartGeneration.ifPresent(restartGeneration -> builder
+ .wantedRestartGeneration(restartGeneration).currentRestartGeneration(restartGeneration));
+
+ NodeSpec node = builder.build();
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+
+ nodeAgent.doConverge(context);
+
+ final InOrder inOrder = inOrder(storageMaintainer, containerOperations, nodeRepository);
+ inOrder.verify(containerOperations, times(1)).stopServices(eq(context));
+ inOrder.verify(storageMaintainer, times(1)).handleCoreDumpsForContainer(eq(context), any(), eq(true));
+ inOrder.verify(containerOperations, times(1)).removeContainer(eq(context), any());
+ inOrder.verify(storageMaintainer, times(1)).archiveNodeStorage(eq(context));
+ inOrder.verify(nodeRepository, times(1)).setNodeState(eq(hostName), eq(NodeState.ready));
+
+ verify(containerOperations, never()).createContainer(eq(context), any());
+ verify(containerOperations, never()).startContainer(eq(context));
+ verify(containerOperations, never()).suspendNode(eq(context));
+ verify(containerOperations, times(1)).stopServices(eq(context));
+ verify(orchestrator, never()).resume(any(String.class));
+ verify(orchestrator, never()).suspend(any(String.class));
+ // current Docker image and vespa version should be cleared
+ verify(nodeRepository, times(1)).updateNodeAttributes(
+ eq(hostName), eq(new NodeAttributes().withDockerImage(DockerImage.EMPTY).withVespaVersion(Version.emptyVersion)));
+ }
+
+ @Test
+ void dirtyNodeRunningContainerIsTakenDownAndCleanedAndRecycled() {
+ nodeRunningContainerIsTakenDownAndCleanedAndRecycled(NodeState.dirty, Optional.of(1L));
+ }
+
+ @Test
+ void dirtyNodeRunningContainerIsTakenDownAndCleanedAndRecycledNoRestartGeneration() {
+ nodeRunningContainerIsTakenDownAndCleanedAndRecycled(NodeState.dirty, Optional.empty());
+ }
+
+ @Test
+ void testRestartDeadContainerAfterNodeAdminRestart() {
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .currentDockerImage(dockerImage).wantedDockerImage(dockerImage)
+ .currentVespaVersion(vespaVersion)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, false);
+
+ when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node));
+
+ nodeAgent.doConverge(context);
+
+ verify(containerOperations, times(1)).removeContainer(eq(context), any());
+ verify(containerOperations, times(1)).createContainer(eq(context), any());
+ verify(containerOperations, times(1)).startContainer(eq(context));
+ }
+
+ @Test
+ void resumeProgramRunsUntilSuccess() {
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .currentVespaVersion(vespaVersion)
+ .wantedRestartGeneration(1).currentRestartGeneration(1)
+ .orchestratorStatus(OrchestratorStatus.ALLOWED_TO_BE_DOWN)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node));
+
+ final InOrder inOrder = inOrder(orchestrator, containerOperations, nodeRepository);
+ doThrow(new RuntimeException("Failed 1st time"))
+ .doReturn("")
+ .when(containerOperations).resumeNode(eq(context));
+
+ // 1st try
+ try {
+ nodeAgent.doConverge(context);
+ fail("Expected to throw an exception");
+ } catch (RuntimeException ignored) {
+ }
+
+ inOrder.verify(containerOperations, times(1)).resumeNode(any());
+ inOrder.verifyNoMoreInteractions();
+
+ // 2nd try
+ nodeAgent.doConverge(context);
+
+ inOrder.verify(containerOperations).resumeNode(any());
+ inOrder.verify(orchestrator).resume(hostName);
+ inOrder.verifyNoMoreInteractions();
+ }
+
+ @Test
+ void start_container_subtask_failure_leads_to_container_restart() {
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion)
+ .wantedRestartGeneration(1).currentRestartGeneration(1)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = spy(makeNodeAgent(null, false));
+
+ when(containerOperations.pullImageAsyncIfNeeded(any(), eq(dockerImage), any())).thenReturn(false);
+ doThrow(new RuntimeException("Failed to set up network")).doNothing().when(containerOperations).startContainer(eq(context));
+
+ try {
+ nodeAgent.doConverge(context);
+ fail("Expected to get RuntimeException");
+ } catch (RuntimeException ignored) {
+ }
+
+ verify(containerOperations, never()).removeContainer(eq(context), any());
+ verify(containerOperations, times(1)).createContainer(eq(context), any());
+ verify(containerOperations, times(1)).startContainer(eq(context));
+ verify(nodeAgent, never()).resumeNodeIfNeeded(any());
+
+ // The docker container was actually started and is running, but subsequent exec calls to set up
+ // networking failed
+ mockGetContainer(dockerImage, true);
+ nodeAgent.doConverge(context);
+
+ verify(containerOperations, times(1)).removeContainer(eq(context), any());
+ verify(containerOperations, times(2)).createContainer(eq(context), any());
+ verify(containerOperations, times(2)).startContainer(eq(context));
+ verify(nodeAgent, times(1)).resumeNodeIfNeeded(any());
+ }
+
+ @Test
+ void testRunningConfigServer() {
+ final NodeSpec node = nodeBuilder(NodeState.active)
+ .type(NodeType.config)
+ .wantedDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion)
+ .orchestratorStatus(OrchestratorStatus.ALLOWED_TO_BE_DOWN)
+ .build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
+
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+ when(containerOperations.pullImageAsyncIfNeeded(any(), eq(dockerImage), any())).thenReturn(false);
+
+ nodeAgent.doConverge(context);
+
+ verify(containerOperations, never()).removeContainer(eq(context), any());
+ verify(orchestrator, never()).suspend(any(String.class));
+
+ final InOrder inOrder = inOrder(containerOperations, orchestrator, nodeRepository, aclMaintainer);
+ inOrder.verify(containerOperations, times(1)).pullImageAsyncIfNeeded(any(), eq(dockerImage), any());
+ inOrder.verify(containerOperations, times(1)).createContainer(eq(context), any());
+ inOrder.verify(containerOperations, times(1)).startContainer(eq(context));
+ inOrder.verify(aclMaintainer, times(1)).converge(eq(context));
+ inOrder.verify(containerOperations, times(1)).resumeNode(eq(context));
+ inOrder.verify(nodeRepository).updateNodeAttributes(
+ hostName, new NodeAttributes().withDockerImage(dockerImage).withVespaVersion(vespaVersion).withRebootGeneration(0));
+ inOrder.verify(orchestrator).resume(hostName);
+ }
+
+
+ // Tests that only containers without owners are stopped
+ @Test
+ void testThatStopContainerDependsOnOwnerPresent() {
+ verifyThatContainerIsStopped(NodeState.parked, Optional.empty());
+ verifyThatContainerIsStopped(NodeState.parked, Optional.of(ApplicationId.defaultId()));
+ verifyThatContainerIsStopped(NodeState.failed, Optional.empty());
+ verifyThatContainerIsStopped(NodeState.failed, Optional.of(ApplicationId.defaultId()));
+ verifyThatContainerIsStopped(NodeState.inactive, Optional.of(ApplicationId.defaultId()));
+ }
+
+ @Test
+ void initial_cpu_cap_test() {
+ NodeSpec.Builder specBuilder = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion);
+
+ NodeAgentContext context = createContext(specBuilder.build());
+ NodeAgentImpl nodeAgent = makeNodeAgent(null, false, Duration.ofSeconds(30));
+
+ InOrder inOrder = inOrder(orchestrator, containerOperations);
+
+ ConvergenceException healthCheckException = ConvergenceException.ofTransient("Not yet up");
+ doThrow(healthCheckException).when(healthChecker).verifyHealth(any());
+ for (int i = 0; i < 3; i++) {
+ try {
+ nodeAgent.doConverge(context);
+ fail("Expected to fail with health check exception");
+ } catch (ConvergenceException e) {
+ assertEquals(healthCheckException, e);
+ }
+ timer.advance(Duration.ofSeconds(30));
+ }
+
+ doNothing().when(healthChecker).verifyHealth(any());
+ try {
+ nodeAgent.doConverge(context);
+ fail("Expected to fail due to warm up period not yet done");
+ } catch (ConvergenceException e) {
+ assertEquals("Refusing to resume until warm up period ends (in PT30S)", e.getMessage());
+ }
+ inOrder.verify(orchestrator, never()).resume(any());
+ inOrder.verify(orchestrator, never()).suspend(any());
+ inOrder.verify(containerOperations, never()).updateContainer(any(), any(), any());
+
+
+ timer.advance(Duration.ofSeconds(31));
+ nodeAgent.doConverge(context);
+
+ inOrder.verify(orchestrator, never()).suspend(any());
+ inOrder.verify(containerOperations).updateContainer(eq(context), eq(containerId), eq(ContainerResources.from(0, 2, 16)));
+ inOrder.verify(containerOperations, never()).removeContainer(any(), any());
+ inOrder.verify(containerOperations, never()).startContainer(any());
+ inOrder.verify(orchestrator, never()).resume(any());
+
+ // No changes
+ nodeAgent.converge(context);
+ inOrder.verify(orchestrator, never()).suspend(any());
+ inOrder.verify(containerOperations, never()).updateContainer(eq(context), eq(containerId), any());
+ inOrder.verify(containerOperations, never()).removeContainer(any(), any());
+ inOrder.verify(orchestrator, never()).resume(any());
+ }
+
+ @Test
+ void resumes_normally_if_container_is_already_capped_on_start() {
+ NodeSpec.Builder specBuilder = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .wantedRestartGeneration(1).currentRestartGeneration(1);
+
+ NodeAgentContext context = createContext(specBuilder.build());
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true, Duration.ofSeconds(30));
+ mockGetContainer(dockerImage, ContainerResources.from(0, 2, 16), true);
+
+ InOrder inOrder = inOrder(orchestrator, containerOperations);
+
+ nodeAgent.doConverge(context);
+
+ nodeAgent.converge(context);
+ inOrder.verify(orchestrator, never()).suspend(any(String.class));
+ inOrder.verify(containerOperations, never()).updateContainer(eq(context), eq(containerId), any());
+ inOrder.verify(containerOperations, never()).removeContainer(any(), any());
+ inOrder.verify(orchestrator, never()).resume(any(String.class));
+ }
+
+ @Test
+ void uncaps_and_caps_cpu_for_services_restart() {
+ NodeSpec.Builder specBuilder = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .wantedRestartGeneration(2).currentRestartGeneration(1);
+
+ NodeAgentContext context = createContext(specBuilder.build());
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true, Duration.ofSeconds(30));
+ mockGetContainer(dockerImage, ContainerResources.from(2, 2, 16), true);
+
+ InOrder inOrder = inOrder(orchestrator, containerOperations);
+
+ nodeAgent.converge(context);
+ inOrder.verify(orchestrator, times(1)).suspend(eq(hostName));
+ inOrder.verify(containerOperations, times(1)).updateContainer(eq(context), eq(containerId), eq(ContainerResources.from(0, 0, 16)));
+ inOrder.verify(containerOperations, times(1)).restartVespa(eq(context));
+
+ mockGetContainer(dockerImage, ContainerResources.from(0, 0, 16), true);
+ doNothing().when(healthChecker).verifyHealth(any());
+ try {
+ nodeAgent.doConverge(context);
+ fail("Expected to fail due to warm up period not yet done");
+ } catch (ConvergenceException e) {
+ assertEquals("Refusing to resume until warm up period ends (in PT30S)", e.getMessage());
+ }
+ inOrder.verify(orchestrator, never()).resume(any());
+ inOrder.verify(orchestrator, never()).suspend(any());
+ inOrder.verify(containerOperations, never()).updateContainer(any(), any(), any());
+
+
+ timer.advance(Duration.ofSeconds(31));
+ nodeAgent.doConverge(context);
+ inOrder.verify(orchestrator, times(1)).resume(eq(hostName));
+ }
+
+ @Test
+ void resume_during_first_warmup() {
+ InOrder inOrder = inOrder(orchestrator, nodeRepository);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true, Duration.ofSeconds(30));
+ mockGetContainer(dockerImage, ContainerResources.from(2, 2, 16), true);
+
+ // Warmup period prevents resume when node has a current docker image, i.e., already existed.
+ nodeAgent.converge(createContext(nodeBuilder(NodeState.active).wantedDockerImage(dockerImage).currentDockerImage(dockerImage).build()));
+ inOrder.verifyNoMoreInteractions();
+
+ nodeAgent.converge(createContext(nodeBuilder(NodeState.active).wantedDockerImage(dockerImage).build()));
+ inOrder.verify(nodeRepository).updateNodeAttributes(eq(hostName), eq(new NodeAttributes().withDockerImage(dockerImage)
+ .withRebootGeneration(0)
+ .withVespaVersion(Version.fromString("7.1.1"))));
+ inOrder.verifyNoMoreInteractions();
+ }
+
+
+ @Test
+ void drop_all_documents() {
+ InOrder inOrder = inOrder(orchestrator, nodeRepository);
+ BiFunction<NodeState, DropDocumentsReport, NodeSpec> specBuilder = (state, report) -> (report == null ?
+ nodeBuilder(state) : nodeBuilder(state).report(DropDocumentsReport.reportId(), report.toJsonNode()))
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .build();
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true, Duration.ofSeconds(30));
+
+ NodeAgentContext context = createContext(specBuilder.apply(NodeState.active, null));
+ UnixPath indexPath = new UnixPath(context.paths().underVespaHome("var/db/vespa/search/cluster.foo/0/doc")).createParents().createNewFile();
+ mockGetContainer(dockerImage, ContainerResources.from(2, 2, 16), true);
+ assertTrue(indexPath.exists());
+
+ // Initially no changes, index is not dropped
+ nodeAgent.converge(context);
+ assertTrue(indexPath.exists());
+ inOrder.verifyNoMoreInteractions();
+
+ context = createContext(specBuilder.apply(NodeState.active, new DropDocumentsReport(1L, null, null, null)));
+ nodeAgent.converge(context);
+ verify(containerOperations).removeContainer(eq(context), any());
+ assertFalse(indexPath.exists());
+ inOrder.verify(nodeRepository).updateNodeAttributes(eq(hostName), eq(new NodeAttributes().withReport(DropDocumentsReport.reportId(), new DropDocumentsReport(1L, timer.currentTimeMillis(), null, null).toJsonNode())));
+ inOrder.verifyNoMoreInteractions();
+
+ // After droppedAt and before readiedAt are set, we cannot proceed
+ mockGetContainer(null, false);
+ context = createContext(specBuilder.apply(NodeState.active, new DropDocumentsReport(1L, 2L, null, null)));
+ nodeAgent.converge(context);
+ verify(containerOperations, never()).removeContainer(eq(context), any());
+ verify(containerOperations, never()).startContainer(eq(context));
+ inOrder.verifyNoMoreInteractions();
+
+ context = createContext(specBuilder.apply(NodeState.active, new DropDocumentsReport(1L, 2L, 3L, null)));
+ nodeAgent.converge(context);
+ verify(containerOperations).startContainer(eq(context));
+ inOrder.verifyNoMoreInteractions();
+
+ mockGetContainer(dockerImage, ContainerResources.from(0, 2, 16), true);
+ timer.advance(Duration.ofSeconds(31));
+ nodeAgent.converge(context);
+ verify(containerOperations, times(1)).startContainer(eq(context));
+ verify(containerOperations, never()).removeContainer(eq(context), any());
+ inOrder.verify(nodeRepository).updateNodeAttributes(eq(hostName), eq(new NodeAttributes()
+ .withRebootGeneration(0)
+ .withReport(DropDocumentsReport.reportId(), new DropDocumentsReport(1L, 2L, 3L, timer.currentTimeMillis()).toJsonNode())));
+ inOrder.verifyNoMoreInteractions();
+ }
+
+ private void verifyThatContainerIsStopped(NodeState nodeState, Optional<ApplicationId> owner) {
+ NodeSpec.Builder nodeBuilder = nodeBuilder(nodeState)
+ .type(NodeType.tenant)
+ .flavor("docker")
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage);
+
+ owner.ifPresent(nodeBuilder::owner);
+ NodeSpec node = nodeBuilder.build();
+
+ NodeAgentContext context = createContext(node);
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node));
+
+ nodeAgent.doConverge(context);
+
+ verify(containerOperations, never()).removeContainer(eq(context), any());
+ if (owner.isPresent()) {
+ verify(containerOperations, never()).stopServices(eq(context));
+ } else {
+ verify(containerOperations, times(1)).stopServices(eq(context));
+ nodeAgent.doConverge(context);
+ // Should not be called more than once, have already been stopped
+ verify(containerOperations, times(1)).stopServices(eq(context));
+ }
+ }
+
+ private NodeAgentImpl makeNodeAgent(DockerImage dockerImage, boolean isRunning) {
+ return makeNodeAgent(dockerImage, isRunning, Duration.ofSeconds(-1));
+ }
+
+ private NodeAgentImpl makeNodeAgent(DockerImage dockerImage, boolean isRunning, Duration warmUpDuration) {
+ mockGetContainer(dockerImage, isRunning);
+ doAnswer(invoc -> {
+ NodeAgentContext context = invoc.getArgument(0, NodeAgentContext.class);
+ ContainerResources resources = invoc.getArgument(1, ContainerResources.class);
+ mockGetContainer(context.node().wantedDockerImage().get(), resources, true);
+ return null;
+ }).when(containerOperations).createContainer(any(), any());
+
+ doAnswer(invoc -> {
+ NodeAgentContext context = invoc.getArgument(0, NodeAgentContext.class);
+ ContainerResources resources = invoc.getArgument(2, ContainerResources.class);
+ mockGetContainer(context.node().wantedDockerImage().get(), resources, true);
+ return null;
+ }).when(containerOperations).updateContainer(any(), any(), any());
+
+ return new NodeAgentImpl(contextSupplier, nodeRepository, orchestrator, containerOperations,
+ () -> RegistryCredentials.none, storageMaintainer, flagSource,
+ List.of(credentialsMaintainer), Optional.of(aclMaintainer), Optional.of(healthChecker),
+ timer, warmUpDuration, VespaServiceDumper.DUMMY_INSTANCE, List.of());
+ }
+
+ private void mockGetContainer(DockerImage dockerImage, boolean isRunning) {
+ mockGetContainer(dockerImage, ContainerResources.from(0, resources.vcpu(), resources.memoryGb()), isRunning);
+ }
+
+ private void mockGetContainer(DockerImage dockerImage, ContainerResources containerResources, boolean isRunning) {
+ doAnswer(invoc -> {
+ NodeAgentContext context = invoc.getArgument(0);
+ if (!hostName.equals(context.hostname().value()))
+ throw new IllegalArgumentException();
+ return dockerImage != null ?
+ Optional.of(new Container(
+ containerId,
+ ContainerName.fromHostname(hostName),
+ timer.currentTime(),
+ isRunning ? Container.State.running : Container.State.exited,
+ "image-id-1",
+ dockerImage,
+ Map.of(),
+ 42,
+ 43,
+ hostName,
+ containerResources,
+ List.of(),
+ true)) :
+ Optional.empty();
+ }).when(containerOperations).getContainer(any());
+ }
+
+ private NodeAgentContext createContext(NodeSpec nodeSpec) {
+ return NodeAgentContextImpl.builder(nodeSpec).fileSystem(fileSystem).build();
+ }
+
+ private NodeSpec.Builder nodeBuilder(NodeState state) {
+ return NodeSpec.Builder.testSpec(hostName, state).realResources(resources);
+ }
+}