aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@oath.com>2018-08-09 15:42:31 +0200
committerValerij Fredriksen <valerijf@oath.com>2018-08-09 15:42:31 +0200
commit5b37ad7a17a8a4214910e4481acce01562c96a15 (patch)
treec9436290afae31ccce624d53da19866fb00c3b25 /node-admin
parentdbd0acddebf68e728569dc9d5cd2a174ca4fbdaa (diff)
Only orchestrate host if it's active
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterImpl.java16
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterImplTest.java44
2 files changed, 50 insertions, 10 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterImpl.java
index 427588c287d..5ab73788299 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterImpl.java
@@ -236,12 +236,13 @@ public class NodeAdminStateUpdaterImpl implements NodeAdminStateUpdater {
throw new ConvergenceException("NodeAdmin is not yet " + (wantFrozen ? "frozen" : "unfrozen"));
}
+ boolean hostIsActiveInNR = nodeRepository.getNode(dockerHostHostName).getState() == Node.State.active;
switch (wantedState) {
case RESUMED:
- orchestrator.resume(dockerHostHostName);
+ if (hostIsActiveInNR) orchestrator.resume(dockerHostHostName);
break;
case SUSPENDED_NODE_ADMIN:
- orchestrator.suspend(dockerHostHostName);
+ if (hostIsActiveInNR) orchestrator.suspend(dockerHostHostName);
break;
case SUSPENDED:
// Fetch active nodes from node repo before suspending nodes.
@@ -252,11 +253,12 @@ public class NodeAdminStateUpdaterImpl implements NodeAdminStateUpdater {
// We should also suspend host's hostname to suspend node-admin
List<String> nodesInActiveState = getNodesInActiveState();
- List<String> nodesToSuspend = new ArrayList<>();
- nodesToSuspend.addAll(nodesInActiveState);
- nodesToSuspend.add(dockerHostHostName);
- orchestrator.suspend(dockerHostHostName, nodesToSuspend);
- log.info("Orchestrator allows suspension of " + nodesToSuspend);
+ List<String> nodesToSuspend = new ArrayList<>(nodesInActiveState);
+ if (hostIsActiveInNR) nodesToSuspend.add(dockerHostHostName);
+ if (!nodesToSuspend.isEmpty()) {
+ orchestrator.suspend(dockerHostHostName, nodesToSuspend);
+ log.info("Orchestrator allows suspension of " + nodesToSuspend);
+ }
// The node agent services are stopped by this thread, which is OK only
// because the node agents are frozen (see above).
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterImplTest.java
index 02baf5959c9..cfdb576fb34 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterImplTest.java
@@ -22,6 +22,7 @@ import java.util.stream.IntStream;
import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdaterImpl.TRANSITION_EXCEPTION_MESSAGE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
+import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.doThrow;
@@ -55,7 +56,7 @@ public class NodeAdminStateUpdaterImplTest {
@Test
public void testStateConvergence() {
- mockNodeRepo(4);
+ mockNodeRepo(Node.State.active, 4);
List<String> activeHostnames = nodeRepository.getNodes(parentHostname).stream()
.map(NodeSpec::getHostname)
.collect(Collectors.toList());
@@ -159,7 +160,7 @@ public class NodeAdminStateUpdaterImplTest {
@Test
public void half_transition_revert() {
final String exceptionMsg = "Cannot allow to suspend because some reason";
- mockNodeRepo(3);
+ mockNodeRepo(Node.State.active, 3);
// Initially everything is frozen to force convergence
when(nodeAdmin.setFrozen(eq(false))).thenReturn(true);
@@ -186,6 +187,33 @@ public class NodeAdminStateUpdaterImplTest {
verify(nodeAdmin, times(2)).setFrozen(eq(false)); // Make sure that we unfreeze!
}
+ @Test
+ public void do_not_orchestrate_host_when_not_active() {
+ when(nodeAdmin.subsystemFreezeDuration()).thenReturn(Duration.ofHours(1));
+ when(nodeAdmin.setFrozen(anyBoolean())).thenReturn(true);
+ mockNodeRepo(Node.State.ready, 3);
+
+ // Resume and suspend only require that node-agents are frozen and permission from
+ // orchestrator to resume/suspend host. Therefore, if host is not active, we only need to freeze.
+ tickAfter(0);
+ refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.RESUMED);
+ verify(orchestrator, never()).resume(eq(parentHostname));
+
+ assertResumeStateError(NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN, TRANSITION_EXCEPTION_MESSAGE);
+ tickAfter(0);
+ refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN);
+ verify(orchestrator, never()).suspend(eq(parentHostname));
+
+ // When doing batch suspend, only suspend the containers if the host is not active
+ List<String> activeHostnames = nodeRepository.getNodes(parentHostname).stream()
+ .map(NodeSpec::getHostname)
+ .collect(Collectors.toList());
+ assertResumeStateError(NodeAdminStateUpdater.State.SUSPENDED, TRANSITION_EXCEPTION_MESSAGE);
+ tickAfter(0);
+ refresher.setResumeStateAndCheckIfResumed(NodeAdminStateUpdater.State.SUSPENDED);
+ verify(orchestrator, times(1)).suspend(eq(parentHostname), eq(activeHostnames));
+ }
+
private void assertResumeStateError(NodeAdminStateUpdater.State targetState, String reason) {
try {
refresher.setResumeStateAndCheckIfResumed(targetState);
@@ -195,7 +223,7 @@ public class NodeAdminStateUpdaterImplTest {
}
}
- private void mockNodeRepo(int numberOfNodes) {
+ private void mockNodeRepo(Node.State hostState, int numberOfNodes) {
List<NodeSpec> containersToRun = IntStream.range(0, numberOfNodes)
.mapToObj(i -> new NodeSpec.Builder()
.hostname("host" + i + ".test.yahoo.com")
@@ -209,6 +237,16 @@ public class NodeAdminStateUpdaterImplTest {
.collect(Collectors.toList());
when(nodeRepository.getNodes(eq(parentHostname))).thenReturn(containersToRun);
+
+ when(nodeRepository.getNode(eq(parentHostname))).thenReturn(new NodeSpec.Builder()
+ .hostname(parentHostname)
+ .state(hostState)
+ .nodeType(NodeType.tenant)
+ .flavor("default")
+ .minCpuCores(1)
+ .minMainMemoryAvailableGb(1)
+ .minDiskAvailableGb(1)
+ .build());
}
private void tickAfter(int seconds) {