summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorvalerijf <valerijf@yahoo-inc.com>2017-04-10 11:59:27 +0200
committervalerijf <valerijf@yahoo-inc.com>2017-04-10 11:59:27 +0200
commit6c2039bef54af450c696b2a5be1958231ceb4599 (patch)
tree62bf584ce8416b74078bf7b8da2629266b4c1c48 /node-admin
parent6b92af3f370a84a87472af361590512e9822f39a (diff)
Ask orchestrator to suspend before freezing
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java44
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java6
2 files changed, 21 insertions, 29 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
index 3b4f3bc471c..bdc5994c89e 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
@@ -131,25 +131,16 @@ public class NodeAdminStateUpdater extends AbstractComponent {
}
/**
- * This method attempts to converge NodeAgent's and NodeAdmin's frozen state with their orchestrator
- * state. When trying to suspend node-admin, this method will first attempt to freeze all NodeAgents and
- * NodeAdmin, then asking orchestrator for permission to suspend all active nodes on this host, including
- * node-admin itself, if the request is denied, this method will unfreeze NodeAgents and NodeAdmin.
+ * This method attempts to converge node-admin towards one of the {@link State}
*/
private void convergeState(State wantedState) {
- boolean wantFrozen = wantedState != RESUMED;
- if (!nodeAdmin.setFrozen(wantFrozen)) {
- throw new RuntimeException("NodeAdmin has not yet converged to " + (wantFrozen ? "frozen" : "unfrozen"));
- }
-
- // To get to resumed state, we only need to converge NodeAdmins frozen state
if (wantedState == RESUMED) {
- orchestrator.resume(dockerHostHostName);
-
- synchronized (monitor) {
- currentState = RESUMED;
+ if (!nodeAdmin.setFrozen(false)) {
+ throw new RuntimeException("NodeAdmin has not yet converged to unfrozen");
}
- return;
+
+ orchestrator.resume(dockerHostHostName);
+ if (wantedState == updateAndGetCurrentState(RESUMED)) return;
}
// Fetch active nodes from node repo before suspending nodes.
@@ -162,28 +153,29 @@ public class NodeAdminStateUpdater extends AbstractComponent {
try {
nodesInActiveState = getNodesInActiveState();
} catch (IOException e) {
- throw new RuntimeException("Failed to get nodes from node repo:" + e.getMessage());
+ throw new RuntimeException("Failed to get nodes from node repo: " + e.getMessage());
}
if (currentState == RESUMED) {
List<String> nodesToSuspend = new ArrayList<>(nodesInActiveState);
nodesToSuspend.add(dockerHostHostName);
- try {
- orchestrator.suspend(dockerHostHostName, nodesToSuspend);
- } catch (Exception e) {
- nodeAdmin.setFrozen(false);
- throw e;
- }
+ orchestrator.suspend(dockerHostHostName, nodesToSuspend);
- synchronized (monitor) {
- currentState = SUSPENDED_NODE_ADMIN;
+ if (!nodeAdmin.setFrozen(true)) {
+ throw new RuntimeException("NodeAdmin has not yet converged to frozen");
}
- if (wantedState == currentState) return;
+
+ if (wantedState == updateAndGetCurrentState(SUSPENDED_NODE_ADMIN)) return;
}
nodeAdmin.stopNodeAgentServices(nodesInActiveState);
+ updateAndGetCurrentState(SUSPENDED);
+ }
+
+ private State updateAndGetCurrentState(State currentState) {
synchronized (monitor) {
- currentState = SUSPENDED;
+ this.currentState = currentState;
+ return currentState;
}
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java
index 25fa23b8951..2ce7e2a3699 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java
@@ -97,18 +97,18 @@ public class NodeAdminStateUpdaterTest {
tickAfter(35);
assertFalse(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN));
verify(refresher, times(1)).signalWorkToBeDone();
- verify(nodeAdmin, times(2)).setFrozen(eq(false)); // Roll back
+ verify(nodeAdmin, times(1)).setFrozen(eq(false));
tickAfter(35);
assertTrue(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN));
- verify(nodeAdmin, times(2)).setFrozen(eq(false));
+ verify(nodeAdmin, times(1)).setFrozen(eq(false));
// At this point orchestrator says its OK to suspend, but something goes wrong when we try to stop services
doThrow(new RuntimeException("Failed to stop services")).doNothing().when(nodeAdmin).stopNodeAgentServices(eq(activeHostnames));
assertFalse(refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED));
tickAfter(0); // Change in wanted state, no need to wait
verify(refresher, times(2)).signalWorkToBeDone(); // No change in desired state
- verify(nodeAdmin, times(2)).setFrozen(eq(false)); // Make sure we dont roll back
+ verify(nodeAdmin, times(1)).setFrozen(eq(false)); // Make sure we dont roll back
// Finally we are successful in transitioning to frozen
tickAfter(35);