diff options
author | freva <valerijf@yahoo-inc.com> | 2017-02-24 13:51:27 +0100 |
---|---|---|
committer | freva <valerijf@yahoo-inc.com> | 2017-02-24 13:51:27 +0100 |
commit | 20c0623a8f9056f761aad7aa38c88d8a46624e93 (patch) | |
tree | 856b4ae6352c7c47b952986a13ca7548b728d315 /node-repository | |
parent | 97d745c24c3bf9de0d415fe859a0810056fbce8d (diff) |
Node failer should fail children when failing host
Diffstat (limited to 'node-repository')
3 files changed, 230 insertions, 18 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index df373902eb6..b0cee37c5ec 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -42,7 +42,7 @@ public class NodeFailer extends Maintainer { /** Provides information about the status of ready hosts */ private final HostLivenessTracker hostLivenessTracker; - + /** Provides (more accurate) information about the status of active hosts */ private final ServiceMonitor serviceMonitor; @@ -50,11 +50,11 @@ public class NodeFailer extends Maintainer { private final Duration downTimeLimit; private final Clock clock; private final Orchestrator orchestrator; - + private final Duration nodeRequestInterval = Duration.ofMinutes(10); private final Instant constructionTime; - public NodeFailer(Deployer deployer, HostLivenessTracker hostLivenessTracker, + public NodeFailer(Deployer deployer, HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, NodeRepository nodeRepository, Duration downTimeLimit, Clock clock, Orchestrator orchestrator) { // check ping status every five minutes, but at least twice as often as the down time limit @@ -87,7 +87,7 @@ public class NodeFailer extends Maintainer { failActive(node); } } - + private void updateNodeLivenessEventsForReadyNodes() { // Update node last request events through ZooKeeper to collect request to all config servers. // We do this here ("lazily") to avoid writing to zk for each config request. @@ -106,15 +106,15 @@ public class NodeFailer extends Maintainer { } } - private List<Node> readyNodesWhichAreDead() { + private List<Node> readyNodesWhichAreDead() { // Allow requests some time to be registered in case all config servers have been down if (constructionTime.isAfter(clock.instant().minus(nodeRequestInterval).minus(nodeRequestInterval) )) return Collections.emptyList(); - + // Nodes are taken as dead if they have not made a config request since this instant. // Add 10 minutes to the down time limit to allow nodes to make a request that infrequently. Instant oldestAcceptableRequestTime = clock.instant().minus(downTimeLimit).minus(nodeRequestInterval); - + return nodeRepository().getNodes(Node.State.ready).stream() .filter(node -> wasMadeReadyBefore(oldestAcceptableRequestTime, node)) .filter(node -> ! hasRecordedRequestAfter(oldestAcceptableRequestTime, node)) @@ -159,7 +159,7 @@ public class NodeFailer extends Maintainer { if (nodeType == NodeType.tenant) return true; return nodeRepository().getNodes(nodeType, Node.State.failed).size() == 0; } - + /** * If the node is positively DOWN, and there is no "down" history record, we add it. * If the node is positively UP we remove any "down" history record. @@ -210,19 +210,34 @@ public class NodeFailer extends Maintainer { /** * Called when a node should be moved to the failed state: Do that if it seems safe, - * which is when the node repo has available capacity to replace the node. + * which is when the node repo has available capacity to replace the node (and all its tenant nodes if host). * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken. + * + * @return whether node was successfully failed */ - private void failActive(Node node) { + private boolean failActive(Node node) { Optional<Deployment> deployment = deployer.deployFromLocalActive(node.allocation().get().owner(), Duration.ofMinutes(30)); - if ( ! deployment.isPresent()) return; // this will be done at another config server + if ( ! deployment.isPresent()) return false; // this will be done at another config server try (Mutex lock = nodeRepository().lock(node.allocation().get().owner())) { + // If the active node that we are trying to fail is of type host, we need to successfully fail all + // the children nodes running on it before we fail the host + boolean allTenantNodesFailedOutSuccessfully = true; + for (Node failingTenantNode : nodeRepository().getNodes(node)) { + if (failingTenantNode.state() == Node.State.active) { + allTenantNodesFailedOutSuccessfully &= failActive(failingTenantNode); + } else { + nodeRepository().fail(failingTenantNode.hostname()); + } + } + + if (! allTenantNodesFailedOutSuccessfully) return false; node = nodeRepository().fail(node.hostname()); try { deployment.get().prepare(); deployment.get().activate(); + return true; } catch (RuntimeException e) { // The expected reason for deployment to fail here is that there is no capacity available to redeploy. @@ -230,6 +245,7 @@ public class NodeFailer extends Maintainer { nodeRepository().reactivate(node.hostname()); log.log(Level.WARNING, "Attempted to fail " + node + " for " + node.allocation().get().owner() + ", but redeploying without the node failed", e); + return false; } } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java index 23dd41daae6..a76bc0eaadb 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java @@ -67,6 +67,7 @@ import static org.junit.Assert.assertEquals; public class NodeFailTester { // Immutable components + public static final ApplicationId nodeAdminApp = ApplicationId.from(TenantName.from("hosted-vespa"), ApplicationName.from("routing"), InstanceName.from("default")); public static final ApplicationId app1 = ApplicationId.from(TenantName.from("foo1"), ApplicationName.from("bar"), InstanceName.from("fuz")); public static final ApplicationId app2 = ApplicationId.from(TenantName.from("foo2"), ApplicationName.from("bar"), InstanceName.from("fuz")); public static final NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default", "docker"); @@ -118,6 +119,41 @@ public class NodeFailTester { return tester; } + public static NodeFailTester withTwoApplicationsOnDocker(int numberOfHosts) { + NodeFailTester tester = new NodeFailTester(); + + int nodesPerHost = 3; + List<Node> hosts = tester.createHostNodes(numberOfHosts); + for (int i = 0; i < hosts.size(); i++) { + tester.createReadyNodes(nodesPerHost, i * nodesPerHost, Optional.of("parent" + i), + nodeFlavors.getFlavorOrThrow("docker"), NodeType.tenant); + } + + // Create applications + ClusterSpec clusterNodeAdminApp = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("node-admin"), Optional.empty()); + ClusterSpec clusterApp1 = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test"), Optional.of("vespa:6.75.0")); + ClusterSpec clusterApp2 = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Optional.of("vespa:6.75.0")); + Capacity allHosts = Capacity.fromRequiredNodeType(NodeType.host); + Capacity capacity1 = Capacity.fromNodeCount(3, Optional.of("docker")); + Capacity capacity2 = Capacity.fromNodeCount(5, Optional.of("docker")); + tester.activate(nodeAdminApp, clusterNodeAdminApp, allHosts); + tester.activate(app1, clusterApp1, capacity1); + tester.activate(app2, clusterApp2, capacity2); + assertEquals(new HashSet<>(tester.nodeRepository.getNodes(NodeType.host)), + new HashSet<>(tester.nodeRepository.getNodes(nodeAdminApp, Node.State.active))); + assertEquals(capacity1.nodeCount(), tester.nodeRepository.getNodes(app1, Node.State.active).size()); + assertEquals(capacity2.nodeCount(), tester.nodeRepository.getNodes(app2, Node.State.active).size()); + + Map<ApplicationId, MockDeployer.ApplicationContext> apps = new HashMap<>(); + apps.put(nodeAdminApp, new MockDeployer.ApplicationContext(nodeAdminApp, clusterNodeAdminApp, allHosts, 1)); + apps.put(app1, new MockDeployer.ApplicationContext(app1, clusterApp1, capacity1, 1)); + apps.put(app2, new MockDeployer.ApplicationContext(app2, clusterApp2, capacity2, 1)); + tester.deployer = new MockDeployer(tester.provisioner, apps); + tester.serviceMonitor = new ServiceMonitorStub(apps, tester.nodeRepository); + tester.failer = tester.createFailer(); + return tester; + } + public static NodeFailTester withProxyApplication() { NodeFailTester tester = new NodeFailTester(); @@ -163,7 +199,7 @@ public class NodeFailTester { } public void createReadyNodes(int count, NodeType nodeType) { - createReadyNodes(count, 0, nodeFlavors.getFlavorOrThrow("default"), nodeType); + createReadyNodes(count, 0, Optional.empty(), nodeFlavors.getFlavorOrThrow("default"), nodeType); } public void createReadyNodes(int count, int startIndex) { @@ -171,25 +207,26 @@ public class NodeFailTester { } public void createReadyNodes(int count, int startIndex, String flavor) { - createReadyNodes(count, startIndex, nodeFlavors.getFlavorOrThrow(flavor), NodeType.tenant); + createReadyNodes(count, startIndex, Optional.empty(), nodeFlavors.getFlavorOrThrow(flavor), NodeType.tenant); } - private void createReadyNodes(int count, int startIndex, Flavor flavor, NodeType nodeType) { + private void createReadyNodes(int count, int startIndex, Optional<String> parentHostname, Flavor flavor, NodeType nodeType) { List<Node> nodes = new ArrayList<>(count); for (int i = startIndex; i < startIndex + count; i++) - nodes.add(nodeRepository.createNode("node" + i, "host" + i, Optional.empty(), flavor, nodeType)); + nodes.add(nodeRepository.createNode("node" + i, "host" + i, parentHostname, flavor, nodeType)); + nodes = nodeRepository.addNodes(nodes); nodes = nodeRepository.setDirty(nodes); nodeRepository.setReady(nodes); } - private void createHostNodes(int count) { + private List<Node> createHostNodes(int count) { List<Node> nodes = new ArrayList<>(count); for (int i = 0; i < count; i++) nodes.add(nodeRepository.createNode("parent" + i, "parent" + i, Optional.empty(), nodeFlavors.getFlavorOrThrow("default"), NodeType.host)); nodes = nodeRepository.addNodes(nodes); nodes = nodeRepository.setDirty(nodes); - nodeRepository.setReady(nodes); + return nodeRepository.setReady(nodes); } private void activate(ApplicationId applicationId, ClusterSpec cluster, int nodeCount) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index b5c2c216633..0251e60e70b 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -11,6 +11,7 @@ import org.junit.Test; import java.time.Duration; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -197,6 +198,164 @@ public class NodeFailerTest { } @Test + public void testFailingDockerHost() { + NodeFailTester tester = NodeFailTester.withTwoApplicationsOnDocker(7); + + // For a day all nodes work so nothing happens + for (int minutes = 0; minutes < 24 * 60; minutes += 5 ) { + tester.clock.advance(Duration.ofMinutes(5)); + tester.allNodesMakeAConfigRequestExcept(); + tester.failer.run(); + assertEquals(8, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size()); + assertEquals(7, tester.nodeRepository.getNodes(NodeType.host, Node.State.active).size()); + assertEquals( 13, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size()); + } + + + // Select the first host that has two active nodes + String downHost1 = tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).stream() + .collect(Collectors.groupingBy(Node::parentHostname)) + .entrySet().stream() + .filter(entry -> entry.getValue().size() == 2) + .map(Map.Entry::getKey) + .findFirst().get().get(); + tester.serviceMonitor.setHostDown(downHost1); + + // nothing happens the first 45 minutes + for (int minutes = 0; minutes < 45; minutes += 5 ) { + tester.failer.run(); + tester.clock.advance(Duration.ofMinutes(5)); + tester.allNodesMakeAConfigRequestExcept(); + assertEquals( 0, tester.deployer.redeployments); + assertEquals(8, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size()); + assertEquals(7, tester.nodeRepository.getNodes(NodeType.host, Node.State.active).size()); + assertEquals( 13, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size()); + } + + tester.clock.advance(Duration.ofMinutes(30)); + tester.allNodesMakeAConfigRequestExcept(); + tester.failer.run(); + + assertEquals( 3, tester.deployer.redeployments); + assertEquals(3, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + assertEquals(8, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size()); + assertEquals(6, tester.nodeRepository.getNodes(NodeType.host, Node.State.active).size()); + assertEquals( 10, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size()); + + + // Now lets fail an active tenant node + String downTenant1 = tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).get(0).hostname(); + tester.serviceMonitor.setHostDown(downTenant1); + + // nothing happens the first 45 minutes + for (int minutes = 0; minutes < 45; minutes += 5 ) { + tester.failer.run(); + tester.clock.advance(Duration.ofMinutes(5)); + tester.allNodesMakeAConfigRequestExcept(); + assertEquals(3, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + } + + tester.clock.advance(Duration.ofMinutes(30)); + tester.allNodesMakeAConfigRequestExcept(); + tester.failer.run(); + + assertEquals( 4, tester.deployer.redeployments); + assertEquals(4, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + assertEquals(8, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size()); + assertEquals(6, tester.nodeRepository.getNodes(NodeType.host, Node.State.active).size()); + assertEquals( 9, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size()); + + + // Lets fail another host, but now nothing should happen as we have already failed a host + String downHost2 = tester.nodeRepository.getNodes(NodeType.host).get(0).hostname(); + tester.serviceMonitor.setHostDown(downHost2); + + // Nothing happens + for (int minutes = 0; minutes < 90; minutes += 5 ) { + tester.failer.run(); + tester.clock.advance(Duration.ofMinutes(5)); + tester.allNodesMakeAConfigRequestExcept(); + assertEquals( 4, tester.deployer.redeployments); + assertEquals(4, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + assertEquals(8, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size()); + assertEquals(6, tester.nodeRepository.getNodes(NodeType.host, Node.State.active).size()); + assertEquals( 9, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size()); + } + } + + @Test + public void testFailingDockerHostNoReplacement() { + // app2 requires 5 nodes + NodeFailTester tester = NodeFailTester.withTwoApplicationsOnDocker(5); + + // For a day all nodes work so nothing happens + for (int minutes = 0; minutes < 24 * 60; minutes += 5 ) { + tester.clock.advance(Duration.ofMinutes(5)); + tester.allNodesMakeAConfigRequestExcept(); + tester.failer.run(); + assertEquals(8, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size()); + assertEquals(5, tester.nodeRepository.getNodes(NodeType.host, Node.State.active).size()); + assertEquals( 7, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size()); + } + + + // Select the first host that has two active nodes + String downHost1 = tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).stream() + .collect(Collectors.groupingBy(Node::parentHostname)) + .entrySet().stream() + .filter(entry -> entry.getValue().size() == 2) + .map(Map.Entry::getKey) + .findFirst().get().get(); + tester.serviceMonitor.setHostDown(downHost1); + + // nothing happens the first 45 minutes + for (int minutes = 0; minutes < 45; minutes += 5 ) { + tester.failer.run(); + tester.clock.advance(Duration.ofMinutes(5)); + tester.allNodesMakeAConfigRequestExcept(); + assertEquals( 0, tester.deployer.redeployments); + assertEquals(8, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size()); + assertEquals(5, tester.nodeRepository.getNodes(NodeType.host, Node.State.active).size()); + assertEquals( 7, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size()); + } + + tester.clock.advance(Duration.ofMinutes(30)); + tester.allNodesMakeAConfigRequestExcept(); + tester.failer.run(); + + // The node used by app1 should've been redeployed, while the host and node used by app2 should stay + assertEquals( 1, tester.deployer.redeployments); + assertEquals(2, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + assertEquals(8, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size()); + assertEquals(5, tester.nodeRepository.getNodes(NodeType.host, Node.State.active).size()); + assertEquals( 5, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size()); + + + // Now lets fail an active tenant node, this should work as normal + String downTenant1 = tester.nodeRepository.getNodes(NodeFailTester.app1).get(0).hostname(); + tester.serviceMonitor.setHostDown(downTenant1); + + // nothing happens the first 45 minutes + for (int minutes = 0; minutes < 45; minutes += 5 ) { + tester.failer.run(); + tester.clock.advance(Duration.ofMinutes(5)); + tester.allNodesMakeAConfigRequestExcept(); + assertEquals(2, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + } + + tester.clock.advance(Duration.ofMinutes(30)); + tester.allNodesMakeAConfigRequestExcept(); + tester.failer.run(); + + assertEquals( 2, tester.deployer.redeployments); + assertEquals(3, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + assertEquals(8, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size()); + assertEquals(5, tester.nodeRepository.getNodes(NodeType.host, Node.State.active).size()); + assertEquals( 4, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size()); + } + + + @Test public void testFailingProxyNodes() { NodeFailTester tester = NodeFailTester.withProxyApplication(); @@ -250,5 +409,5 @@ public class NodeFailerTest { assertFalse(failedHost1.equals(failedHost2)); assertTrue(downHosts.contains(failedHost2)); } - + } |