aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@oath.com>2018-08-22 12:01:53 +0200
committerValerij Fredriksen <valerijf@oath.com>2018-08-22 12:01:53 +0200
commit912c412bc62374561b78343607fb3d5bd20949d3 (patch)
tree45a35a42daf693d43c0660c20c4fbbce4d3fe27c /node-repository/src
parent08c599b4286ffe5c19c23b192c36a8b24f919352 (diff)
Add test for failing with hardware failure
Diffstat (limited to 'node-repository/src')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java43
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java10
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java58
3 files changed, 88 insertions, 23 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
index 46d72974718..b38f8c91245 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
@@ -3,17 +3,9 @@ package com.yahoo.vespa.hosted.provision.testutils;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.vespa.applicationmodel.HostName;
-import com.yahoo.vespa.orchestrator.ApplicationIdNotFoundException;
-import com.yahoo.vespa.orchestrator.ApplicationStateChangeDeniedException;
-import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException;
-import com.yahoo.vespa.orchestrator.BatchInternalErrorException;
import com.yahoo.vespa.orchestrator.Host;
-import com.yahoo.vespa.orchestrator.HostNameNotFoundException;
-import com.yahoo.vespa.orchestrator.OrchestrationException;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.model.NodeGroup;
-import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
-import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
import com.yahoo.vespa.orchestrator.status.HostStatus;
@@ -27,32 +19,39 @@ import java.util.Set;
*/
public class OrchestratorMock implements Orchestrator {
- Set<ApplicationId> suspendedApplications = new HashSet<>();
+ private final Set<HostName> suspendedHosts = new HashSet<>();
+ private final Set<ApplicationId> suspendedApplications = new HashSet<>();
@Override
- public Host getHost(HostName hostName) throws HostNameNotFoundException {
+ public Host getHost(HostName hostName) {
return null;
}
@Override
- public HostStatus getNodeStatus(HostName hostName) throws HostNameNotFoundException {
- return null;
+ public HostStatus getNodeStatus(HostName hostName) {
+ return suspendedHosts.contains(hostName) ? HostStatus.ALLOWED_TO_BE_DOWN : HostStatus.NO_REMARKS;
}
@Override
- public void setNodeStatus(HostName hostName, HostStatus state) throws OrchestrationException {}
+ public void setNodeStatus(HostName hostName, HostStatus state) {}
@Override
- public void resume(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {}
+ public void resume(HostName hostName) {
+ suspendedHosts.remove(hostName);
+ }
@Override
- public void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {}
+ public void suspend(HostName hostName) {
+ suspendedHosts.add(hostName);
+ }
@Override
- public void suspendGroup(NodeGroup nodeGroup) throws HostStateChangeDeniedException, HostNameNotFoundException {}
+ public void suspendGroup(NodeGroup nodeGroup) {
+ nodeGroup.getHostNames().forEach(this::suspend);
+ }
@Override
- public ApplicationInstanceStatus getApplicationInstanceStatus(ApplicationId appId) throws ApplicationIdNotFoundException {
+ public ApplicationInstanceStatus getApplicationInstanceStatus(ApplicationId appId) {
return suspendedApplications.contains(appId)
? ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN : ApplicationInstanceStatus.NO_REMARKS;
}
@@ -63,20 +62,20 @@ public class OrchestratorMock implements Orchestrator {
}
@Override
- public void resume(ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException {
+ public void resume(ApplicationId appId) {
suspendedApplications.remove(appId);
}
@Override
- public void suspend(ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException {
+ public void suspend(ApplicationId appId) {
suspendedApplications.add(appId);
}
@Override
- public void acquirePermissionToRemove(HostName hostName) throws OrchestrationException {}
+ public void acquirePermissionToRemove(HostName hostName) {}
@Override
- public void suspendAll(HostName parentHostname, List<HostName> hostNames) throws BatchInternalErrorException, BatchHostStateChangeDeniedException, BatchHostNameNotFoundException {
- throw new UnsupportedOperationException("Not implemented");
+ public void suspendAll(HostName parentHostname, List<HostName> hostNames) {
+ hostNames.forEach(this::suspend);
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java
index ea28f7bafc8..c29f26ef1f6 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java
@@ -19,6 +19,7 @@ import com.yahoo.config.provision.TenantName;
import com.yahoo.config.provision.Zone;
import com.yahoo.test.ManualClock;
import com.yahoo.transaction.NestedTransaction;
+import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.mock.MockCurator;
import com.yahoo.vespa.curator.transaction.CuratorTransaction;
@@ -190,8 +191,15 @@ public class NodeFailTester {
public void suspend(ApplicationId app) {
try {
orchestrator.suspend(app);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
}
- catch (Exception e) {
+ }
+
+ public void suspend(String hostName) {
+ try {
+ orchestrator.suspend(new HostName(hostName));
+ } catch (Exception e) {
throw new RuntimeException(e);
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
index c483615203d..71b0b125e0f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
@@ -14,6 +14,7 @@ import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -37,6 +38,63 @@ import static org.mockito.Mockito.when;
public class NodeFailerTest {
@Test
+ public void fail_nodes_with_hardware_failure_if_allowed_to_be_down() {
+ NodeFailTester tester = NodeFailTester.withTwoApplicationsOnDocker(6);
+ String hostWithHwFailure = selectFirstParentHostWithNActiveNodesExcept(tester.nodeRepository, 2);
+
+ // Set hardware failure to the parent and all its children
+ tester.nodeRepository.getNodes().stream()
+ .filter(node -> node.parentHostname().map(parent -> parent.equals(hostWithHwFailure))
+ .orElse(node.hostname().equals(hostWithHwFailure)))
+ .forEach(node -> {
+ Node updatedNode = node.with(node.status().withHardwareFailureDescription(Optional.of("HW failure")));
+ tester.nodeRepository.write(updatedNode);
+ });
+
+ // The host should have 2 nodes in active and 1 ready
+ Map<Node.State, List<String>> hostnamesByState = tester.nodeRepository.getChildNodes(hostWithHwFailure).stream()
+ .collect(Collectors.groupingBy(Node::state, Collectors.mapping(Node::hostname, Collectors.toList())));
+ assertEquals(2, hostnamesByState.get(Node.State.active).size());
+ assertEquals(1, hostnamesByState.get(Node.State.ready).size());
+
+ // Suspend the first of the active nodes
+ tester.suspend(hostnamesByState.get(Node.State.active).get(0));
+
+ tester.failer.run();
+ tester.clock.advance(Duration.ofHours(25));
+ tester.allNodesMakeAConfigRequestExcept();
+ tester.failer.run();
+
+ // The first (and the only) ready node and the 1st active node that was allowed to fail should be failed
+ Map<Node.State, List<String>> expectedHostnamesByState1Iter = new HashMap<>();
+ expectedHostnamesByState1Iter.put(Node.State.failed,
+ Arrays.asList(hostnamesByState.get(Node.State.active).get(0), hostnamesByState.get(Node.State.ready).get(0)));
+ expectedHostnamesByState1Iter.put(Node.State.active, hostnamesByState.get(Node.State.active).subList(1, 2));
+ Map<Node.State, List<String>> hostnamesByState1Iter = tester.nodeRepository.getChildNodes(hostWithHwFailure).stream()
+ .collect(Collectors.groupingBy(Node::state, Collectors.mapping(Node::hostname, Collectors.toList())));
+ assertEquals(expectedHostnamesByState1Iter, hostnamesByState1Iter);
+
+ // Suspend the second of the active nodes
+ tester.suspend(hostnamesByState.get(Node.State.active).get(1));
+
+ tester.clock.advance(Duration.ofHours(25));
+ tester.allNodesMakeAConfigRequestExcept();
+ tester.failer.run();
+
+ // All of the children should be failed now
+ Set<Node.State> childStates2Iter = tester.nodeRepository.getChildNodes(hostWithHwFailure).stream()
+ .map(Node::state).collect(Collectors.toSet());
+ assertEquals(Collections.singleton(Node.State.failed), childStates2Iter);
+ // The host itself is still active as it too must be allowed to suspend
+ assertEquals(Node.State.active, tester.nodeRepository.getNode(hostWithHwFailure).get().state());
+
+ tester.suspend(hostWithHwFailure);
+ tester.failer.run();
+ assertEquals(Node.State.failed, tester.nodeRepository.getNode(hostWithHwFailure).get().state());
+ assertEquals(4, tester.nodeRepository.getNodes(Node.State.failed).size());
+ }
+
+ @Test
public void nodes_for_suspended_applications_are_not_failed() {
NodeFailTester tester = NodeFailTester.withTwoApplications();
tester.suspend(NodeFailTester.app1);