diff options
author | valerijf <valerijf@yahoo-inc.com> | 2017-04-06 13:40:47 +0200 |
---|---|---|
committer | valerijf <valerijf@yahoo-inc.com> | 2017-04-06 13:47:29 +0200 |
commit | c8be5b731240b11e001cf83cae967d336f8da33b (patch) | |
tree | fbc38d14c1312221d2f78b9bbac5fb52b1b89b02 /node-admin | |
parent | 76dec9e9f4031deadc171914eec86c34ba19d93f (diff) |
Set initial node-admin state to frozen to force convergence
Diffstat (limited to 'node-admin')
6 files changed, 25 insertions, 12 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index e165b2476f4..42d5536ea05 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -41,7 +41,7 @@ public class NodeAdminImpl implements NodeAdmin { private final DockerOperations dockerOperations; private final Function<String, NodeAgent> nodeAgentFactory; private final Optional<StorageMaintainer> storageMaintainer; - private boolean isFrozen = false; + private boolean isFrozen = true; private final Map<String, NodeAgent> nodeAgents = new ConcurrentHashMap<>(); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java index 7287c374723..a19aacda846 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java @@ -34,7 +34,7 @@ import static java.util.concurrent.TimeUnit.MILLISECONDS; */ public class NodeAdminStateUpdater extends AbstractComponent { private final AtomicBoolean terminated = new AtomicBoolean(false); - private State currentState = RESUMED; + private State currentState = SUSPENDED_NODE_ADMIN; private State wantedState = RESUMED; private boolean workToDoNow = true; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 8128bb47eac..c80506a0a90 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -46,7 +46,7 @@ import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.Containe */ public class NodeAgentImpl implements NodeAgent { private final AtomicBoolean terminated = new AtomicBoolean(false); - private boolean isFrozen = false; + private boolean isFrozen = true; private boolean wantFrozen = false; private boolean workToDoNow = true; diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java index 09a749709f4..11d666b567c 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java @@ -112,7 +112,10 @@ public class RunInContainerTest { waitForJdiscContainerToServe(); final String parentHostname = "localhost.test.yahoo.com"; - assertThat(doPutCall("resume"), is(true)); + assertFalse(doPutCall("resume")); // Initial is false to force convergence + when(ComponentsProviderWithMocks.orchestratorMock.resume(parentHostname)).thenReturn(true); + Thread.sleep(50); + assertTrue(doPutCall("resume")); // No nodes are allocated to this host yet, so freezing should be fine, but orchestrator doesnt allow node-admin suspend when(ComponentsProviderWithMocks.orchestratorMock.suspend(parentHostname, Collections.singletonList(parentHostname))) @@ -132,7 +135,6 @@ public class RunInContainerTest { assertTrue(doPutCall("suspend")); // Back to resume - when(ComponentsProviderWithMocks.orchestratorMock.resume(parentHostname)).thenReturn(true); assertFalse(doPutCall("resume")); Thread.sleep(50); assertTrue(doPutCall("resume")); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java index 8bd43024556..7dc3d0e8ac2 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java @@ -112,6 +112,11 @@ public class NodeAdminImplTest { nodeAdmin.synchronizeNodeSpecsToNodeAgents(existingContainerHostnames, existingContainerHostnames); + assertTrue(nodeAdmin.isFrozen()); // Initially everything is frozen to force convergence + mockNodeAgentSetFrozenResponse(nodeAgents, true, true, true); + assertTrue(nodeAdmin.setFrozen(false)); // Unfreeze everything + + mockNodeAgentSetFrozenResponse(nodeAgents, false, false, false); assertFalse(nodeAdmin.setFrozen(true)); // NodeAdmin freezes only when all the NodeAgents are frozen diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java index 974f450ddc0..ef463b103e5 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java @@ -62,15 +62,21 @@ public class NodeAdminStateUpdaterTest { when(nodeRepository.getContainersToRun()).thenReturn(containersToRun); - // Initially we start with everything running and we want to continue running, therefore we are converged - // and ticks should complete without ever calling NodeAdmin - tickAfter(0); + // Initially everything is frozen to force convergence + assertFalse(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.RESUMED)); + when(nodeAdmin.setFrozen(eq(false))).thenReturn(true); + when(orchestrator.resume(parentHostname)).thenReturn(true); + tickAfter(0); // The first tick should unfreeze + verify(orchestrator, times(1)).resume(parentHostname); // Resume host + verify(orchestrator, times(1)).resume(parentHostname); + + // Everything is running and we want to continue running, therefore we have converged assertTrue(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.RESUMED)); tickAfter(35); tickAfter(35); assertTrue(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.RESUMED)); verify(refresher, never()).signalWorkToBeDone(); // No attempt in changing state - verify(orchestrator, never()).resume(parentHostname); // Already resumed + verify(orchestrator, times(1)).resume(parentHostname); // Already resumed // Lets try to suspend node admin only, immediately we get false back, and need to wait until next // tick before any change can happen @@ -91,18 +97,18 @@ public class NodeAdminStateUpdaterTest { tickAfter(35); assertFalse(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN)); verify(refresher, times(1)).signalWorkToBeDone(); - verify(nodeAdmin, times(1)).setFrozen(eq(false)); // Roll back + verify(nodeAdmin, times(2)).setFrozen(eq(false)); // Roll back tickAfter(35); assertTrue(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN)); - verify(nodeAdmin, times(1)).setFrozen(eq(false)); + verify(nodeAdmin, times(2)).setFrozen(eq(false)); // At this point orchestrator says its OK to suspend, but something goes wrong when we try to stop services doThrow(new RuntimeException("Failed to stop services")).doNothing().when(nodeAdmin).stopNodeAgentServices(eq(activeHostnames)); assertFalse(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED)); tickAfter(0); // Change in wanted state, no need to wait verify(refresher, times(2)).signalWorkToBeDone(); // No change in desired state - verify(nodeAdmin, times(1)).setFrozen(eq(false)); // Make sure we dont roll back + verify(nodeAdmin, times(2)).setFrozen(eq(false)); // Make sure we dont roll back // Finally we are successful in transitioning to frozen tickAfter(35); |