diff options
author | Valerij Fredriksen <valerijf@oath.com> | 2018-08-22 12:01:53 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@oath.com> | 2018-08-22 12:01:53 +0200 |
commit | 912c412bc62374561b78343607fb3d5bd20949d3 (patch) | |
tree | 45a35a42daf693d43c0660c20c4fbbce4d3fe27c /node-repository | |
parent | 08c599b4286ffe5c19c23b192c36a8b24f919352 (diff) |
Add test for failing with hardware failure
Diffstat (limited to 'node-repository')
3 files changed, 88 insertions, 23 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java index 46d72974718..b38f8c91245 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java @@ -3,17 +3,9 @@ package com.yahoo.vespa.hosted.provision.testutils; import com.yahoo.config.provision.ApplicationId; import com.yahoo.vespa.applicationmodel.HostName; -import com.yahoo.vespa.orchestrator.ApplicationIdNotFoundException; -import com.yahoo.vespa.orchestrator.ApplicationStateChangeDeniedException; -import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException; -import com.yahoo.vespa.orchestrator.BatchInternalErrorException; import com.yahoo.vespa.orchestrator.Host; -import com.yahoo.vespa.orchestrator.HostNameNotFoundException; -import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.model.NodeGroup; -import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; -import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; import com.yahoo.vespa.orchestrator.status.HostStatus; @@ -27,32 +19,39 @@ import java.util.Set; */ public class OrchestratorMock implements Orchestrator { - Set<ApplicationId> suspendedApplications = new HashSet<>(); + private final Set<HostName> suspendedHosts = new HashSet<>(); + private final Set<ApplicationId> suspendedApplications = new HashSet<>(); @Override - public Host getHost(HostName hostName) throws HostNameNotFoundException { + public Host getHost(HostName hostName) { return null; } @Override - public HostStatus getNodeStatus(HostName hostName) throws HostNameNotFoundException { - return null; + public HostStatus getNodeStatus(HostName hostName) { + return suspendedHosts.contains(hostName) ? HostStatus.ALLOWED_TO_BE_DOWN : HostStatus.NO_REMARKS; } @Override - public void setNodeStatus(HostName hostName, HostStatus state) throws OrchestrationException {} + public void setNodeStatus(HostName hostName, HostStatus state) {} @Override - public void resume(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {} + public void resume(HostName hostName) { + suspendedHosts.remove(hostName); + } @Override - public void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {} + public void suspend(HostName hostName) { + suspendedHosts.add(hostName); + } @Override - public void suspendGroup(NodeGroup nodeGroup) throws HostStateChangeDeniedException, HostNameNotFoundException {} + public void suspendGroup(NodeGroup nodeGroup) { + nodeGroup.getHostNames().forEach(this::suspend); + } @Override - public ApplicationInstanceStatus getApplicationInstanceStatus(ApplicationId appId) throws ApplicationIdNotFoundException { + public ApplicationInstanceStatus getApplicationInstanceStatus(ApplicationId appId) { return suspendedApplications.contains(appId) ? ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN : ApplicationInstanceStatus.NO_REMARKS; } @@ -63,20 +62,20 @@ public class OrchestratorMock implements Orchestrator { } @Override - public void resume(ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException { + public void resume(ApplicationId appId) { suspendedApplications.remove(appId); } @Override - public void suspend(ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException { + public void suspend(ApplicationId appId) { suspendedApplications.add(appId); } @Override - public void acquirePermissionToRemove(HostName hostName) throws OrchestrationException {} + public void acquirePermissionToRemove(HostName hostName) {} @Override - public void suspendAll(HostName parentHostname, List<HostName> hostNames) throws BatchInternalErrorException, BatchHostStateChangeDeniedException, BatchHostNameNotFoundException { - throw new UnsupportedOperationException("Not implemented"); + public void suspendAll(HostName parentHostname, List<HostName> hostNames) { + hostNames.forEach(this::suspend); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java index ea28f7bafc8..c29f26ef1f6 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java @@ -19,6 +19,7 @@ import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.Zone; import com.yahoo.test.ManualClock; import com.yahoo.transaction.NestedTransaction; +import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.mock.MockCurator; import com.yahoo.vespa.curator.transaction.CuratorTransaction; @@ -190,8 +191,15 @@ public class NodeFailTester { public void suspend(ApplicationId app) { try { orchestrator.suspend(app); + } catch (Exception e) { + throw new RuntimeException(e); } - catch (Exception e) { + } + + public void suspend(String hostName) { + try { + orchestrator.suspend(new HostName(hostName)); + } catch (Exception e) { throw new RuntimeException(e); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index c483615203d..71b0b125e0f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -14,6 +14,7 @@ import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -37,6 +38,63 @@ import static org.mockito.Mockito.when; public class NodeFailerTest { @Test + public void fail_nodes_with_hardware_failure_if_allowed_to_be_down() { + NodeFailTester tester = NodeFailTester.withTwoApplicationsOnDocker(6); + String hostWithHwFailure = selectFirstParentHostWithNActiveNodesExcept(tester.nodeRepository, 2); + + // Set hardware failure to the parent and all its children + tester.nodeRepository.getNodes().stream() + .filter(node -> node.parentHostname().map(parent -> parent.equals(hostWithHwFailure)) + .orElse(node.hostname().equals(hostWithHwFailure))) + .forEach(node -> { + Node updatedNode = node.with(node.status().withHardwareFailureDescription(Optional.of("HW failure"))); + tester.nodeRepository.write(updatedNode); + }); + + // The host should have 2 nodes in active and 1 ready + Map<Node.State, List<String>> hostnamesByState = tester.nodeRepository.getChildNodes(hostWithHwFailure).stream() + .collect(Collectors.groupingBy(Node::state, Collectors.mapping(Node::hostname, Collectors.toList()))); + assertEquals(2, hostnamesByState.get(Node.State.active).size()); + assertEquals(1, hostnamesByState.get(Node.State.ready).size()); + + // Suspend the first of the active nodes + tester.suspend(hostnamesByState.get(Node.State.active).get(0)); + + tester.failer.run(); + tester.clock.advance(Duration.ofHours(25)); + tester.allNodesMakeAConfigRequestExcept(); + tester.failer.run(); + + // The first (and the only) ready node and the 1st active node that was allowed to fail should be failed + Map<Node.State, List<String>> expectedHostnamesByState1Iter = new HashMap<>(); + expectedHostnamesByState1Iter.put(Node.State.failed, + Arrays.asList(hostnamesByState.get(Node.State.active).get(0), hostnamesByState.get(Node.State.ready).get(0))); + expectedHostnamesByState1Iter.put(Node.State.active, hostnamesByState.get(Node.State.active).subList(1, 2)); + Map<Node.State, List<String>> hostnamesByState1Iter = tester.nodeRepository.getChildNodes(hostWithHwFailure).stream() + .collect(Collectors.groupingBy(Node::state, Collectors.mapping(Node::hostname, Collectors.toList()))); + assertEquals(expectedHostnamesByState1Iter, hostnamesByState1Iter); + + // Suspend the second of the active nodes + tester.suspend(hostnamesByState.get(Node.State.active).get(1)); + + tester.clock.advance(Duration.ofHours(25)); + tester.allNodesMakeAConfigRequestExcept(); + tester.failer.run(); + + // All of the children should be failed now + Set<Node.State> childStates2Iter = tester.nodeRepository.getChildNodes(hostWithHwFailure).stream() + .map(Node::state).collect(Collectors.toSet()); + assertEquals(Collections.singleton(Node.State.failed), childStates2Iter); + // The host itself is still active as it too must be allowed to suspend + assertEquals(Node.State.active, tester.nodeRepository.getNode(hostWithHwFailure).get().state()); + + tester.suspend(hostWithHwFailure); + tester.failer.run(); + assertEquals(Node.State.failed, tester.nodeRepository.getNode(hostWithHwFailure).get().state()); + assertEquals(4, tester.nodeRepository.getNodes(Node.State.failed).size()); + } + + @Test public void nodes_for_suspended_applications_are_not_failed() { NodeFailTester tester = NodeFailTester.withTwoApplications(); tester.suspend(NodeFailTester.app1); |