diff options
Diffstat (limited to 'node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java')
-rw-r--r-- | node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java | 277 |
1 files changed, 0 insertions, 277 deletions
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java deleted file mode 100644 index 420146b52f0..00000000000 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.nodeadmin; - -import com.yahoo.config.provision.HostName; -import com.yahoo.config.provision.NodeType; -import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.Acl; -import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; -import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeState; -import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.OrchestratorStatus; -import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator; -import com.yahoo.vespa.hosted.node.admin.integration.NodeRepoMock; -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextFactory; -import org.junit.jupiter.api.Test; - -import java.time.Duration; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.RESUMED; -import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.SUSPENDED; -import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.fail; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyBoolean; -import static org.mockito.ArgumentMatchers.argThat; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -/** - * Basic test of NodeAdminStateUpdater - * - * @author freva - */ -public class NodeAdminStateUpdaterTest { - private final NodeAgentContextFactory nodeAgentContextFactory = mock(NodeAgentContextFactory.class); - private final NodeRepoMock nodeRepository = spy(new NodeRepoMock()); - private final Orchestrator orchestrator = mock(Orchestrator.class); - private final NodeAdmin nodeAdmin = mock(NodeAdmin.class); - private final HostName hostHostname = HostName.of("basehost1.test.yahoo.com"); - - private final NodeAdminStateUpdater updater = spy(new NodeAdminStateUpdater( - nodeAgentContextFactory, nodeRepository, orchestrator, nodeAdmin, hostHostname)); - - - @Test - void state_convergence() { - mockNodeRepo(NodeState.active, 4); - List<String> activeHostnames = nodeRepository.getNodes(hostHostname.value()).stream() - .map(NodeSpec::hostname) - .toList(); - List<String> suspendHostnames = new ArrayList<>(activeHostnames); - suspendHostnames.add(hostHostname.value()); - when(nodeAdmin.subsystemFreezeDuration()).thenReturn(Duration.ofSeconds(1)); - - { - // Initially everything is frozen to force convergence - assertConvergeError(RESUMED, "NodeAdmin is not yet unfrozen"); - when(nodeAdmin.setFrozen(eq(false))).thenReturn(true); - updater.converge(RESUMED); - verify(orchestrator, times(1)).resume(hostHostname.value()); - - // We are already resumed, so this should return without resuming again - updater.converge(RESUMED); - verify(orchestrator, times(1)).resume(hostHostname.value()); - verify(nodeAdmin, times(2)).setFrozen(eq(false)); - - // Host is externally suspended in orchestrator, should be resumed by node-admin - setHostOrchestratorStatus(hostHostname, OrchestratorStatus.ALLOWED_TO_BE_DOWN); - updater.converge(RESUMED); - verify(orchestrator, times(2)).resume(hostHostname.value()); - verify(nodeAdmin, times(3)).setFrozen(eq(false)); - setHostOrchestratorStatus(hostHostname, OrchestratorStatus.NO_REMARKS); - - // Lets try to suspend node admin only - when(nodeAdmin.setFrozen(eq(true))).thenReturn(false); - assertConvergeError(SUSPENDED_NODE_ADMIN, "NodeAdmin is not yet frozen"); - verify(nodeAdmin, times(3)).setFrozen(eq(false)); - } - - { - // First orchestration failure happens within the freeze convergence timeout, - // and so should not call setFrozen(false) - final String exceptionMessage = "Cannot allow to suspend because some reason"; - when(nodeAdmin.setFrozen(eq(true))).thenReturn(true); - doThrow(new RuntimeException(exceptionMessage)).doNothing() - .when(orchestrator).suspend(eq(hostHostname.value())); - assertConvergeError(SUSPENDED_NODE_ADMIN, exceptionMessage); - verify(nodeAdmin, times(3)).setFrozen(eq(false)); - - updater.converge(SUSPENDED_NODE_ADMIN); - verify(nodeAdmin, times(3)).setFrozen(eq(false)); - verify(orchestrator, times(2)).suspend(hostHostname.value()); - setHostOrchestratorStatus(hostHostname, OrchestratorStatus.ALLOWED_TO_BE_DOWN); - - // Already suspended, no changes - updater.converge(SUSPENDED_NODE_ADMIN); - verify(nodeAdmin, times(3)).setFrozen(eq(false)); - verify(orchestrator, times(2)).suspend(hostHostname.value()); - - // Host is externally resumed - setHostOrchestratorStatus(hostHostname, OrchestratorStatus.NO_REMARKS); - updater.converge(SUSPENDED_NODE_ADMIN); - verify(nodeAdmin, times(3)).setFrozen(eq(false)); - verify(orchestrator, times(3)).suspend(hostHostname.value()); - setHostOrchestratorStatus(hostHostname, OrchestratorStatus.ALLOWED_TO_BE_DOWN); - } - - { - // At this point orchestrator will say its OK to suspend, but something goes wrong when we try to stop services - final String exceptionMessage = "Failed to stop services"; - verify(orchestrator, times(0)).suspend(eq(hostHostname.value()), eq(suspendHostnames)); - doThrow(new RuntimeException(exceptionMessage)).doNothing().when(nodeAdmin).stopNodeAgentServices(); - assertConvergeError(SUSPENDED, exceptionMessage); - verify(orchestrator, times(1)).suspend(eq(hostHostname.value()), eq(suspendHostnames)); - // Make sure we dont roll back if we fail to stop services - we will try to stop again next tick - verify(nodeAdmin, times(3)).setFrozen(eq(false)); - - // Finally we are successful in transitioning to frozen - updater.converge(SUSPENDED); - } - } - - @Test - void half_transition_revert() { - final String exceptionMsg = "Cannot allow to suspend because some reason"; - mockNodeRepo(NodeState.active, 3); - - // Initially everything is frozen to force convergence - when(nodeAdmin.setFrozen(eq(false))).thenReturn(true); - updater.converge(RESUMED); - verify(nodeAdmin, times(1)).setFrozen(eq(false)); - verify(nodeAdmin, times(1)).refreshContainersToRun(any()); - - // Let's start suspending, we are able to freeze the nodes, but orchestrator denies suspension - when(nodeAdmin.subsystemFreezeDuration()).thenReturn(Duration.ofSeconds(1)); - when(nodeAdmin.setFrozen(eq(true))).thenReturn(true); - doThrow(new RuntimeException(exceptionMsg)).when(orchestrator).suspend(eq(hostHostname.value())); - - assertConvergeError(SUSPENDED_NODE_ADMIN, exceptionMsg); - verify(nodeAdmin, times(1)).setFrozen(eq(true)); - verify(orchestrator, times(1)).suspend(eq(hostHostname.value())); - assertConvergeError(SUSPENDED_NODE_ADMIN, exceptionMsg); - verify(nodeAdmin, times(2)).setFrozen(eq(true)); - verify(orchestrator, times(2)).suspend(eq(hostHostname.value())); - assertConvergeError(SUSPENDED_NODE_ADMIN, exceptionMsg); - verify(nodeAdmin, times(3)).setFrozen(eq(true)); - verify(orchestrator, times(3)).suspend(eq(hostHostname.value())); - - // No new unfreezes nor refresh while trying to freeze - verify(nodeAdmin, times(1)).setFrozen(eq(false)); - verify(nodeAdmin, times(1)).refreshContainersToRun(any()); - - // Only resume and fetch containers when subsystem freeze duration expires - when(nodeAdmin.subsystemFreezeDuration()).thenReturn(Duration.ofHours(1)); - assertConvergeError(SUSPENDED_NODE_ADMIN, "Timed out trying to freeze all nodes: will force an unfrozen tick"); - verify(nodeAdmin, times(2)).setFrozen(eq(false)); - verify(orchestrator, times(3)).suspend(eq(hostHostname.value())); // no new suspend calls - verify(nodeAdmin, times(2)).refreshContainersToRun(any()); - - // We change our mind, want to remain resumed - updater.converge(RESUMED); - verify(nodeAdmin, times(3)).setFrozen(eq(false)); // Make sure that we unfreeze! - } - - @Test - void do_not_orchestrate_host_when_not_active() { - when(nodeAdmin.subsystemFreezeDuration()).thenReturn(Duration.ofHours(1)); - when(nodeAdmin.setFrozen(anyBoolean())).thenReturn(true); - mockNodeRepo(NodeState.ready, 3); - - // Resume and suspend only require that node-agents are frozen and permission from - // orchestrator to resume/suspend host. Therefore, if host is not active, we only need to freeze. - updater.converge(RESUMED); - verify(orchestrator, never()).resume(eq(hostHostname.value())); - - updater.converge(SUSPENDED_NODE_ADMIN); - verify(orchestrator, never()).suspend(eq(hostHostname.value())); - - // When doing batch suspend, only suspend the containers if the host is not active - List<String> activeHostnames = nodeRepository.getNodes(hostHostname.value()).stream() - .map(NodeSpec::hostname) - .toList(); - updater.converge(SUSPENDED); - verify(orchestrator, times(1)).suspend(eq(hostHostname.value()), eq(activeHostnames)); - } - - @Test - void node_spec_and_acl_aligned() { - Acl acl = new Acl.Builder().withTrustedPorts(22).build(); - mockNodeRepo(NodeState.active, 3); - mockAcl(acl, 1, 2, 3); - - updater.adjustNodeAgentsToRunFromNodeRepository(); - updater.adjustNodeAgentsToRunFromNodeRepository(); - updater.adjustNodeAgentsToRunFromNodeRepository(); - - verify(nodeAgentContextFactory, times(3)).create(argThat(spec -> spec.hostname().equals("host1.yahoo.com")), eq(acl)); - verify(nodeAgentContextFactory, times(3)).create(argThat(spec -> spec.hostname().equals("host2.yahoo.com")), eq(acl)); - verify(nodeAgentContextFactory, times(3)).create(argThat(spec -> spec.hostname().equals("host3.yahoo.com")), eq(acl)); - verify(nodeRepository, times(3)).getNodes(eq(hostHostname.value())); - verify(nodeRepository, times(3)).getAcls(eq(hostHostname.value())); - } - - @Test - void node_spec_and_acl_mismatch_missing_one_acl() { - Acl acl = new Acl.Builder().withTrustedPorts(22).build(); - mockNodeRepo(NodeState.active, 3); - mockAcl(acl, 1, 2); // Acl for 3 is missing - - updater.adjustNodeAgentsToRunFromNodeRepository(); - mockNodeRepo(NodeState.active, 2); // Next tick, the spec for 3 is no longer returned - updater.adjustNodeAgentsToRunFromNodeRepository(); - updater.adjustNodeAgentsToRunFromNodeRepository(); - - verify(nodeAgentContextFactory, times(3)).create(argThat(spec -> spec.hostname().equals("host1.yahoo.com")), eq(acl)); - verify(nodeAgentContextFactory, times(3)).create(argThat(spec -> spec.hostname().equals("host2.yahoo.com")), eq(acl)); - verify(nodeAgentContextFactory, times(1)).create(argThat(spec -> spec.hostname().equals("host3.yahoo.com")), eq(Acl.EMPTY)); - verify(nodeRepository, times(3)).getNodes(eq(hostHostname.value())); - verify(nodeRepository, times(3)).getAcls(eq(hostHostname.value())); - } - - @Test - void node_spec_and_acl_mismatch_additional_acl() { - Acl acl = new Acl.Builder().withTrustedPorts(22).build(); - mockNodeRepo(NodeState.active, 2); - mockAcl(acl, 1, 2, 3); // Acl for 3 is extra - - updater.adjustNodeAgentsToRunFromNodeRepository(); - updater.adjustNodeAgentsToRunFromNodeRepository(); - updater.adjustNodeAgentsToRunFromNodeRepository(); - - verify(nodeAgentContextFactory, times(3)).create(argThat(spec -> spec.hostname().equals("host1.yahoo.com")), eq(acl)); - verify(nodeAgentContextFactory, times(3)).create(argThat(spec -> spec.hostname().equals("host2.yahoo.com")), eq(acl)); - verify(nodeRepository, times(3)).getNodes(eq(hostHostname.value())); - verify(nodeRepository, times(3)).getAcls(eq(hostHostname.value())); - } - - private void assertConvergeError(NodeAdminStateUpdater.State targetState, String reason) { - try { - updater.converge(targetState); - fail("Expected converging to " + targetState + " to fail with \"" + reason + "\", but it succeeded without error"); - } catch (RuntimeException e) { - assertEquals(reason, e.getMessage()); - } - } - - private void mockNodeRepo(NodeState hostState, int numberOfNodes) { - nodeRepository.resetNodeSpecs(); - - IntStream.rangeClosed(1, numberOfNodes) - .mapToObj(i -> NodeSpec.Builder.testSpec("host" + i + ".yahoo.com").parentHostname(hostHostname.value()).build()) - .forEach(nodeRepository::updateNodeSpec); - - nodeRepository.updateNodeSpec(NodeSpec.Builder.testSpec(hostHostname.value(), hostState).type(NodeType.host).build()); - } - - private void mockAcl(Acl acl, int... nodeIds) { - nodeRepository.setAcl(Arrays.stream(nodeIds) - .mapToObj(i -> "host" + i + ".yahoo.com") - .collect(Collectors.toMap(Function.identity(), h -> acl))); - } - - private void setHostOrchestratorStatus(HostName hostname, OrchestratorStatus orchestratorStatus) { - nodeRepository.updateNodeSpec(hostname.value(), node -> node.orchestratorStatus(orchestratorStatus)); - } -} |