diff options
author | Valerij Fredriksen <valerijf@yahooinc.com> | 2022-01-06 23:29:51 +0100 |
---|---|---|
committer | Valerij Fredriksen <valerijf@yahooinc.com> | 2022-01-06 23:29:51 +0100 |
commit | 01d747007bfeac918c33b19fd3a2c2e9c8bc0039 (patch) | |
tree | c0c0f341651da72117b869a0fce3eff79b743429 /node-admin | |
parent | eb85f48b3a6e4e69b2d45f2d9393d8b4d8e27daa (diff) |
Run with uncapped CPU during warmup period after restarting services
Diffstat (limited to 'node-admin')
2 files changed, 45 insertions, 0 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index f184deab375..f3b58b47ef0 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -262,11 +262,20 @@ public class NodeAgentImpl implements NodeAgent { context.log(logger, "Invoking vespa-nodectl to restart services: " + restartReason); orchestratorSuspendNode(context); + ContainerResources currentResources = existingContainer.get().resources(); + ContainerResources wantedResources = currentResources.withUnlimitedCpus(); + if ( ! wantedResources.equals(currentResources)) { + context.log(logger, "Updating container resources: %s -> %s", + existingContainer.get().resources().toStringCpu(), wantedResources.toStringCpu()); + containerOperations.updateContainer(context, existingContainer.get().id(), wantedResources); + } + String output = containerOperations.restartVespa(context); if (!output.isBlank()) { context.log(logger, "Restart services output: " + output); } currentRestartGeneration = context.node().wantedRestartGeneration(); + firstSuccessfulHealthCheckInstant = Optional.empty(); }); } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index 13f101be6e8..d87a60e4a44 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -699,6 +699,42 @@ public class NodeAgentImplTest { inOrder.verify(orchestrator, never()).resume(any(String.class)); } + @Test + public void uncaps_and_caps_cpu_for_services_restart() { + NodeSpec.Builder specBuilder = nodeBuilder(NodeState.active) + .wantedDockerImage(dockerImage).currentDockerImage(dockerImage) + .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion) + .wantedRestartGeneration(2).currentRestartGeneration(1); + + NodeAgentContext context = createContext(specBuilder.build()); + NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true, Duration.ofSeconds(30)); + mockGetContainer(dockerImage, ContainerResources.from(2, 2, 16), true); + + InOrder inOrder = inOrder(orchestrator, containerOperations); + + nodeAgent.converge(context); + inOrder.verify(orchestrator, times(1)).suspend(eq(hostName)); + inOrder.verify(containerOperations, times(1)).updateContainer(eq(context), eq(containerId), eq(ContainerResources.from(0, 0, 16))); + inOrder.verify(containerOperations, times(1)).restartVespa(eq(context)); + + mockGetContainer(dockerImage, ContainerResources.from(0, 0, 16), true); + doNothing().when(healthChecker).verifyHealth(any()); + try { + nodeAgent.doConverge(context); + fail("Expected to fail due to warm up period not yet done"); + } catch (ConvergenceException e) { + assertEquals("Refusing to resume until warm up period ends (in PT30S)", e.getMessage()); + } + inOrder.verify(orchestrator, never()).resume(any()); + inOrder.verify(orchestrator, never()).suspend(any()); + inOrder.verify(containerOperations, never()).updateContainer(any(), any(), any()); + + + clock.advance(Duration.ofSeconds(31)); + nodeAgent.doConverge(context); + inOrder.verify(orchestrator, times(1)).resume(eq(hostName)); + } + private void verifyThatContainerIsStopped(NodeState nodeState, Optional<ApplicationId> owner) { NodeSpec.Builder nodeBuilder = nodeBuilder(nodeState) .type(NodeType.tenant) |