aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2020-02-23 16:41:10 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2020-02-23 16:41:10 +0100
commitf91ce5cbbe344834af616ca8c20eafafdbe720bb (patch)
tree56d56004568e35c658c43b9da6d0740bd1e6fff1
parent5c105fa6ffe3ed27c633e48f17206d2b31307936 (diff)
Avoid resume unless suspended
This is to reduce the contention for the application lock in Orchestrator.
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java12
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java26
2 files changed, 26 insertions, 12 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 2f92ef8affe..3b6145aa6a8 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -76,6 +76,7 @@ public class NodeAgentImpl implements NodeAgent {
private boolean hasResumedNode = false;
private boolean hasStartedServices = true;
private Optional<Instant> firstSuccessfulHealthCheckInstant = Optional.empty();
+ private boolean suspendedInOrchestrator = false;
private int numberOfUnhandledException = 0;
private long currentRebootGeneration = 0;
@@ -397,6 +398,7 @@ public class NodeAgentImpl implements NodeAgent {
public void converge(NodeAgentContext context) {
try {
doConverge(context);
+ context.log(logger, LogLevel.INFO, "Converged");
} catch (ConvergenceException e) {
context.log(logger, e.getMessage());
} catch (ContainerNotFoundException e) {
@@ -411,7 +413,7 @@ public class NodeAgentImpl implements NodeAgent {
}
}
- // Public for testing
+ // Non-private for testing
void doConverge(NodeAgentContext context) {
NodeSpec node = context.node();
Optional<Container> container = getContainer(context);
@@ -488,8 +490,11 @@ public class NodeAgentImpl implements NodeAgent {
// - Slobrok and internal orchestrator state is used to determine whether
// to allow upgrade (suspend).
updateNodeRepoWithCurrentAttributes(context);
- context.log(logger, "Call resume against Orchestrator");
- orchestrator.resume(context.hostname().value());
+ if (suspendedInOrchestrator || node.allowedToBeDown().orElse(false)) {
+ context.log(logger, "Call resume against Orchestrator");
+ orchestrator.resume(context.hostname().value());
+ suspendedInOrchestrator = false;
+ }
break;
case provisioned:
nodeRepository.setNodeState(context.hostname().value(), NodeState.dirty);
@@ -562,6 +567,7 @@ public class NodeAgentImpl implements NodeAgent {
context.log(logger, "Ask Orchestrator for permission to suspend node");
try {
orchestrator.suspend(context.hostname().value());
+ suspendedInOrchestrator = true;
} catch (OrchestratorException e) {
// Ensure the ACLs are up to date: The reason we're unable to suspend may be because some other
// node is unable to resume because the ACL rules of SOME Docker container is wrong...
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index f65ad379a63..06d15a3f577 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -80,6 +80,7 @@ public class NodeAgentImplTest {
.wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
.wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
.wantedRestartGeneration(1).currentRestartGeneration(1)
+ .allowedToBeDown(false)
.build();
NodeAgentContext context = createContext(node);
@@ -98,7 +99,7 @@ public class NodeAgentImplTest {
// TODO: Verify this isn't run unless 1st time
inOrder.verify(dockerOperations, never()).startServices(eq(context));
inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context));
- inOrder.verify(orchestrator).resume(hostName);
+ inOrder.verify(orchestrator, never()).resume(hostName);
}
@Test
@@ -188,7 +189,7 @@ public class NodeAgentImplTest {
inOrder.verify(healthChecker, times(1)).verifyHealth(eq(context));
inOrder.verify(nodeRepository).updateNodeAttributes(
hostName, new NodeAttributes().withDockerImage(dockerImage).withVespaVersion(dockerImage.tagAsVersion()));
- inOrder.verify(orchestrator).resume(hostName);
+ inOrder.verify(orchestrator, never()).resume(hostName);
}
@Test
@@ -224,7 +225,8 @@ public class NodeAgentImplTest {
.state(NodeState.active)
.wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
.wantedVespaVersion(vespaVersion).currentVespaVersion(vespaVersion)
- .wantedRestartGeneration(1).currentRestartGeneration(1);
+ .wantedRestartGeneration(1).currentRestartGeneration(1)
+ .allowedToBeDown(false);
NodeAgentContext firstContext = createContext(specBuilder.build());
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
@@ -232,17 +234,20 @@ public class NodeAgentImplTest {
when(dockerOperations.pullImageAsyncIfNeeded(any())).thenReturn(true);
when(storageMaintainer.getDiskUsageFor(any())).thenReturn(Optional.of(201326592000L));
+ InOrder inOrder = inOrder(orchestrator, dockerOperations);
+
nodeAgent.doConverge(firstContext);
+ inOrder.verify(orchestrator, never()).resume(any(String.class));
+
NodeAgentContext secondContext = createContext(specBuilder.diskGb(200).build());
nodeAgent.doConverge(secondContext);
+ inOrder.verify(orchestrator, never()).resume(any(String.class));
+
NodeAgentContext thirdContext = new NodeAgentContextImpl.Builder(specBuilder.vcpu(5).build()).cpuSpeedUp(1.25).build();
nodeAgent.doConverge(thirdContext);
ContainerResources resourcesAfterThird = ContainerResources.from(0, 4, 16);
mockGetContainer(dockerImage, resourcesAfterThird, true);
- InOrder inOrder = inOrder(orchestrator, dockerOperations);
- inOrder.verify(orchestrator).resume(any(String.class));
- inOrder.verify(orchestrator).resume(any(String.class));
inOrder.verify(orchestrator).suspend(any(String.class));
inOrder.verify(dockerOperations).updateContainer(eq(thirdContext), eq(resourcesAfterThird));
inOrder.verify(dockerOperations, never()).removeContainer(any(), any());
@@ -254,7 +259,7 @@ public class NodeAgentImplTest {
inOrder.verify(orchestrator, never()).suspend(any(String.class));
inOrder.verify(dockerOperations, never()).updateContainer(eq(thirdContext), any());
inOrder.verify(dockerOperations, never()).removeContainer(any(), any());
- inOrder.verify(orchestrator).resume(any(String.class));
+ inOrder.verify(orchestrator, never()).resume(any(String.class));
// Set the feature flag
flagSource.withDoubleFlag(Flags.CONTAINER_CPU_CAP.id(), 2.3);
@@ -537,6 +542,7 @@ public class NodeAgentImplTest {
.wantedDockerImage(dockerImage).currentDockerImage(dockerImage)
.currentVespaVersion(vespaVersion)
.wantedRestartGeneration(1).currentRestartGeneration(1)
+ .allowedToBeDown(true)
.build();
NodeAgentContext context = createContext(node);
@@ -611,6 +617,7 @@ public class NodeAgentImplTest {
.type(NodeType.config)
.wantedDockerImage(dockerImage)
.wantedVespaVersion(vespaVersion)
+ .allowedToBeDown(true)
.build();
NodeAgentContext context = createContext(node);
@@ -689,6 +696,7 @@ public class NodeAgentImplTest {
clock.advance(Duration.ofSeconds(31));
nodeAgent.doConverge(context);
+ inOrder.verify(orchestrator).suspend(hostName);
inOrder.verify(dockerOperations).updateContainer(eq(context), eq(ContainerResources.from(0, 2, 16)));
inOrder.verify(dockerOperations, never()).removeContainer(any(), any());
inOrder.verify(dockerOperations, never()).startContainer(any());
@@ -699,7 +707,7 @@ public class NodeAgentImplTest {
inOrder.verify(orchestrator, never()).suspend(any(String.class));
inOrder.verify(dockerOperations, never()).updateContainer(eq(context), any());
inOrder.verify(dockerOperations, never()).removeContainer(any(), any());
- inOrder.verify(orchestrator).resume(any(String.class));
+ inOrder.verify(orchestrator, never()).resume(any(String.class));
}
@Test
@@ -724,7 +732,7 @@ public class NodeAgentImplTest {
inOrder.verify(orchestrator, never()).suspend(any(String.class));
inOrder.verify(dockerOperations, never()).updateContainer(eq(context), any());
inOrder.verify(dockerOperations, never()).removeContainer(any(), any());
- inOrder.verify(orchestrator).resume(any(String.class));
+ inOrder.verify(orchestrator, never()).resume(any(String.class));
}
private void verifyThatContainerIsStopped(NodeState nodeState, Optional<ApplicationId> owner) {