aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorvalerijf <valerijf@yahoo-inc.com>2017-04-06 13:40:47 +0200
committervalerijf <valerijf@yahoo-inc.com>2017-04-06 13:47:29 +0200
commitc8be5b731240b11e001cf83cae967d336f8da33b (patch)
treefbc38d14c1312221d2f78b9bbac5fb52b1b89b02 /node-admin
parent76dec9e9f4031deadc171914eec86c34ba19d93f (diff)
Set initial node-admin state to frozen to force convergence
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java2
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java6
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java5
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java20
6 files changed, 25 insertions, 12 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
index e165b2476f4..42d5536ea05 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
@@ -41,7 +41,7 @@ public class NodeAdminImpl implements NodeAdmin {
private final DockerOperations dockerOperations;
private final Function<String, NodeAgent> nodeAgentFactory;
private final Optional<StorageMaintainer> storageMaintainer;
- private boolean isFrozen = false;
+ private boolean isFrozen = true;
private final Map<String, NodeAgent> nodeAgents = new ConcurrentHashMap<>();
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
index 7287c374723..a19aacda846 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
@@ -34,7 +34,7 @@ import static java.util.concurrent.TimeUnit.MILLISECONDS;
*/
public class NodeAdminStateUpdater extends AbstractComponent {
private final AtomicBoolean terminated = new AtomicBoolean(false);
- private State currentState = RESUMED;
+ private State currentState = SUSPENDED_NODE_ADMIN;
private State wantedState = RESUMED;
private boolean workToDoNow = true;
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 8128bb47eac..c80506a0a90 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -46,7 +46,7 @@ import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.Containe
*/
public class NodeAgentImpl implements NodeAgent {
private final AtomicBoolean terminated = new AtomicBoolean(false);
- private boolean isFrozen = false;
+ private boolean isFrozen = true;
private boolean wantFrozen = false;
private boolean workToDoNow = true;
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java
index 09a749709f4..11d666b567c 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java
@@ -112,7 +112,10 @@ public class RunInContainerTest {
waitForJdiscContainerToServe();
final String parentHostname = "localhost.test.yahoo.com";
- assertThat(doPutCall("resume"), is(true));
+ assertFalse(doPutCall("resume")); // Initial is false to force convergence
+ when(ComponentsProviderWithMocks.orchestratorMock.resume(parentHostname)).thenReturn(true);
+ Thread.sleep(50);
+ assertTrue(doPutCall("resume"));
// No nodes are allocated to this host yet, so freezing should be fine, but orchestrator doesnt allow node-admin suspend
when(ComponentsProviderWithMocks.orchestratorMock.suspend(parentHostname, Collections.singletonList(parentHostname)))
@@ -132,7 +135,6 @@ public class RunInContainerTest {
assertTrue(doPutCall("suspend"));
// Back to resume
- when(ComponentsProviderWithMocks.orchestratorMock.resume(parentHostname)).thenReturn(true);
assertFalse(doPutCall("resume"));
Thread.sleep(50);
assertTrue(doPutCall("resume"));
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
index 8bd43024556..7dc3d0e8ac2 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
@@ -112,6 +112,11 @@ public class NodeAdminImplTest {
nodeAdmin.synchronizeNodeSpecsToNodeAgents(existingContainerHostnames, existingContainerHostnames);
+ assertTrue(nodeAdmin.isFrozen()); // Initially everything is frozen to force convergence
+ mockNodeAgentSetFrozenResponse(nodeAgents, true, true, true);
+ assertTrue(nodeAdmin.setFrozen(false)); // Unfreeze everything
+
+
mockNodeAgentSetFrozenResponse(nodeAgents, false, false, false);
assertFalse(nodeAdmin.setFrozen(true)); // NodeAdmin freezes only when all the NodeAgents are frozen
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java
index 974f450ddc0..ef463b103e5 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java
@@ -62,15 +62,21 @@ public class NodeAdminStateUpdaterTest {
when(nodeRepository.getContainersToRun()).thenReturn(containersToRun);
- // Initially we start with everything running and we want to continue running, therefore we are converged
- // and ticks should complete without ever calling NodeAdmin
- tickAfter(0);
+ // Initially everything is frozen to force convergence
+ assertFalse(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.RESUMED));
+ when(nodeAdmin.setFrozen(eq(false))).thenReturn(true);
+ when(orchestrator.resume(parentHostname)).thenReturn(true);
+ tickAfter(0); // The first tick should unfreeze
+ verify(orchestrator, times(1)).resume(parentHostname); // Resume host
+ verify(orchestrator, times(1)).resume(parentHostname);
+
+ // Everything is running and we want to continue running, therefore we have converged
assertTrue(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.RESUMED));
tickAfter(35);
tickAfter(35);
assertTrue(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.RESUMED));
verify(refresher, never()).signalWorkToBeDone(); // No attempt in changing state
- verify(orchestrator, never()).resume(parentHostname); // Already resumed
+ verify(orchestrator, times(1)).resume(parentHostname); // Already resumed
// Lets try to suspend node admin only, immediately we get false back, and need to wait until next
// tick before any change can happen
@@ -91,18 +97,18 @@ public class NodeAdminStateUpdaterTest {
tickAfter(35);
assertFalse(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN));
verify(refresher, times(1)).signalWorkToBeDone();
- verify(nodeAdmin, times(1)).setFrozen(eq(false)); // Roll back
+ verify(nodeAdmin, times(2)).setFrozen(eq(false)); // Roll back
tickAfter(35);
assertTrue(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN));
- verify(nodeAdmin, times(1)).setFrozen(eq(false));
+ verify(nodeAdmin, times(2)).setFrozen(eq(false));
// At this point orchestrator says its OK to suspend, but something goes wrong when we try to stop services
doThrow(new RuntimeException("Failed to stop services")).doNothing().when(nodeAdmin).stopNodeAgentServices(eq(activeHostnames));
assertFalse(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED));
tickAfter(0); // Change in wanted state, no need to wait
verify(refresher, times(2)).signalWorkToBeDone(); // No change in desired state
- verify(nodeAdmin, times(1)).setFrozen(eq(false)); // Make sure we dont roll back
+ verify(nodeAdmin, times(2)).setFrozen(eq(false)); // Make sure we dont roll back
// Finally we are successful in transitioning to frozen
tickAfter(35);