summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@yahooinc.com>2022-01-06 23:29:51 +0100
committerValerij Fredriksen <valerijf@yahooinc.com>2022-01-06 23:29:51 +0100
commit01d747007bfeac918c33b19fd3a2c2e9c8bc0039 (patch)
treec0c0f341651da72117b869a0fce3eff79b743429 /node-admin
parenteb85f48b3a6e4e69b2d45f2d9393d8b4d8e27daa (diff)
Run with uncapped CPU during warmup period after restarting services
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java9
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java36
2 files changed, 45 insertions, 0 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index f184deab375..f3b58b47ef0 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -262,11 +262,20 @@ public class NodeAgentImpl implements NodeAgent {
context.log(logger, "Invoking vespa-nodectl to restart services: " + restartReason);
orchestratorSuspendNode(context);
+ ContainerResources currentResources = existingContainer.get().resources();
+ ContainerResources wantedResources = currentResources.withUnlimitedCpus();
+ if ( ! wantedResources.equals(currentResources)) {
+ context.log(logger, "Updating container resources: %s -> %s",
+ existingContainer.get().resources().toStringCpu(), wantedResources.toStringCpu());
+ containerOperations.updateContainer(context, existingContainer.get().id(), wantedResources);
+ }
+
String output = containerOperations.restartVespa(context);
if (!output.isBlank()) {
context.log(logger, "Restart services output: " + output);
}
currentRestartGeneration = context.node().wantedRestartGeneration();
+ firstSuccessfulHealthCheckInstant = Optional.empty();
});
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index 13f101be6e8..d87a60e4a44 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -699,6 +699,42 @@ public class NodeAgentImplTest {
inOrder.verify(orchestrator, never()).resume(any(String.class));
}
+ @Test
+ public void uncaps_and_caps_cpu_for_services_restart() {
+ NodeSpec.Builder specBuilder = nodeBuilder(NodeState.active)
+ .wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
+ .wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
+ .wantedRestartGeneration(2).currentRestartGeneration(1);
+
+ NodeAgentContext context = createContext(specBuilder.build());
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true, Duration.ofSeconds(30));
+ mockGetContainer(dockerImage, ContainerResources.from(2, 2, 16), true);
+
+ InOrder inOrder = inOrder(orchestrator, containerOperations);
+
+ nodeAgent.converge(context);
+ inOrder.verify(orchestrator, times(1)).suspend(eq(hostName));
+ inOrder.verify(containerOperations, times(1)).updateContainer(eq(context), eq(containerId), eq(ContainerResources.from(0, 0, 16)));
+ inOrder.verify(containerOperations, times(1)).restartVespa(eq(context));
+
+ mockGetContainer(dockerImage, ContainerResources.from(0, 0, 16), true);
+ doNothing().when(healthChecker).verifyHealth(any());
+ try {
+ nodeAgent.doConverge(context);
+ fail("Expected to fail due to warm up period not yet done");
+ } catch (ConvergenceException e) {
+ assertEquals("Refusing to resume until warm up period ends (in PT30S)", e.getMessage());
+ }
+ inOrder.verify(orchestrator, never()).resume(any());
+ inOrder.verify(orchestrator, never()).suspend(any());
+ inOrder.verify(containerOperations, never()).updateContainer(any(), any(), any());
+
+
+ clock.advance(Duration.ofSeconds(31));
+ nodeAgent.doConverge(context);
+ inOrder.verify(orchestrator, times(1)).resume(eq(hostName));
+ }
+
private void verifyThatContainerIsStopped(NodeState nodeState, Optional<ApplicationId> owner) {
NodeSpec.Builder nodeBuilder = nodeBuilder(nodeState)
.type(NodeType.tenant)