diff options
author | Valerij Fredriksen <valerijf@verizonmedia.com> | 2021-09-13 11:18:14 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@verizonmedia.com> | 2021-09-13 11:18:31 +0200 |
commit | 5826f0f2895409e57a30462c4eee3db8dbf29020 (patch) | |
tree | 8704e70dc7d76b168eb8553083fb75d223f649a3 /node-admin | |
parent | b4a485abc154aaa3b18be19a306bbd34d3e382f2 (diff) |
Ensure internal state is in sync with orchestrator state
Diffstat (limited to 'node-admin')
2 files changed, 27 insertions, 6 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java index 726e23a3fe4..37ecc6c4e56 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java @@ -117,10 +117,10 @@ public class NodeAdminStateUpdater { throw new ConvergenceException("Timed out trying to freeze all nodes: will force an unfrozen tick"); } - if (currentState == wantedState) return; + boolean wantFrozen = wantedState != RESUMED; + if (currentState == wantedState && wantFrozen == node.orchestratorStatus().isSuspended()) return; currentState = TRANSITIONING; - boolean wantFrozen = wantedState != RESUMED; if (!nodeAdmin.setFrozen(wantFrozen)) throw new ConvergenceException("NodeAdmin is not yet " + (wantFrozen ? "frozen" : "unfrozen")); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java index 9ab340a2421..e6fa4118542 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java @@ -79,10 +79,17 @@ public class NodeAdminStateUpdaterTest { verify(orchestrator, times(1)).resume(hostHostname.value()); verify(nodeAdmin, times(2)).setFrozen(eq(false)); + // Host is externally suspended in orchestrator, should be resumed by node-admin + setHostOrchestratorStatus(hostHostname, OrchestratorStatus.ALLOWED_TO_BE_DOWN); + updater.converge(RESUMED); + verify(orchestrator, times(2)).resume(hostHostname.value()); + verify(nodeAdmin, times(3)).setFrozen(eq(false)); + setHostOrchestratorStatus(hostHostname, OrchestratorStatus.NO_REMARKS); + // Lets try to suspend node admin only when(nodeAdmin.setFrozen(eq(true))).thenReturn(false); assertConvergeError(SUSPENDED_NODE_ADMIN, "NodeAdmin is not yet frozen"); - verify(nodeAdmin, times(2)).setFrozen(eq(false)); + verify(nodeAdmin, times(3)).setFrozen(eq(false)); } { @@ -93,10 +100,24 @@ public class NodeAdminStateUpdaterTest { doThrow(new RuntimeException(exceptionMessage)).doNothing() .when(orchestrator).suspend(eq(hostHostname.value())); assertConvergeError(SUSPENDED_NODE_ADMIN, exceptionMessage); - verify(nodeAdmin, times(2)).setFrozen(eq(false)); + verify(nodeAdmin, times(3)).setFrozen(eq(false)); updater.converge(SUSPENDED_NODE_ADMIN); - verify(nodeAdmin, times(2)).setFrozen(eq(false)); + verify(nodeAdmin, times(3)).setFrozen(eq(false)); + verify(orchestrator, times(2)).suspend(hostHostname.value()); + setHostOrchestratorStatus(hostHostname, OrchestratorStatus.ALLOWED_TO_BE_DOWN); + + // Already suspended, no changes + updater.converge(SUSPENDED_NODE_ADMIN); + verify(nodeAdmin, times(3)).setFrozen(eq(false)); + verify(orchestrator, times(2)).suspend(hostHostname.value()); + + // Host is externally resumed + setHostOrchestratorStatus(hostHostname, OrchestratorStatus.NO_REMARKS); + updater.converge(SUSPENDED_NODE_ADMIN); + verify(nodeAdmin, times(3)).setFrozen(eq(false)); + verify(orchestrator, times(3)).suspend(hostHostname.value()); + setHostOrchestratorStatus(hostHostname, OrchestratorStatus.ALLOWED_TO_BE_DOWN); } { @@ -107,7 +128,7 @@ public class NodeAdminStateUpdaterTest { assertConvergeError(SUSPENDED, exceptionMessage); verify(orchestrator, times(1)).suspend(eq(hostHostname.value()), eq(suspendHostnames)); // Make sure we dont roll back if we fail to stop services - we will try to stop again next tick - verify(nodeAdmin, times(2)).setFrozen(eq(false)); + verify(nodeAdmin, times(3)).setFrozen(eq(false)); // Finally we are successful in transitioning to frozen updater.converge(SUSPENDED); |