diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2021-03-16 15:49:36 +0100 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2021-03-16 15:49:36 +0100 |
commit | bdbfa98a4dc8c20e889a47e37b710c0051676c77 (patch) | |
tree | 5dc6492ff75e340187a222417e50f88fc4ca28e5 | |
parent | e310e41508f166efac306e83c20cad45f1b13086 (diff) |
Retire all wanttoretire nodes with TypeNodeSpec
3 files changed, 41 insertions, 73 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java index 3ff4765dd00..1cd6cf4a9f6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java @@ -207,10 +207,8 @@ public interface NodeSpec { @Override public int idealRetiredCount(int acceptedCount, int currentRetiredCount) { - // All nodes marked with wantToRetire get marked as retired just before this function is called, - // the job of this function is to throttle the retired count. If no nodes are marked as retired - // then continue this way, otherwise allow only 1 node to be retired - return Math.min(1, currentRetiredCount); + // All nodes marked with wantToRetire get marked as retired just before this function is called + return currentRetiredCount; } @Override diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java index 718facd477c..f80f34dd5b3 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java @@ -42,7 +42,6 @@ import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; @@ -191,25 +190,24 @@ public class RetiredExpirerTest { // Redeploy to retire all 3 config servers infraDeployer.activateAllSupportedInfraApplications(true); - - // Only 1 config server is allowed to retire at any given point in time - List<Node> retiredNodes = tester.nodeRepository().nodes().list(() -> {}).stream() - .filter(node -> node.allocation().map(allocation -> allocation.membership().retired()).orElse(false)) - .collect(Collectors.toList()); - assertEquals(1, retiredNodes.size()); - Node retiredNode = retiredNodes.get(0); - String retiredNodeHostname = retiredNode.hostname(); - - // Allow retiredNodeHostname to be removed + List<Node> retiredNodes = tester.nodeRepository() + .nodes().list(() -> {}) + .stream() + .filter(node -> node.allocation().map(allocation -> allocation.membership().retired()).orElse(false)) + .collect(Collectors.toList()); + assertEquals(3, retiredNodes.size()); + + // The Orchestrator will allow only 1 to be removed, say cfg1 + Node retiredNode = tester.nodeRepository().nodes().node(cfg1.s()).orElseThrow(); doThrow(new OrchestrationException("denied")).when(orchestrator).acquirePermissionToRemove(any()); - doNothing().when(orchestrator).acquirePermissionToRemove(eq(new HostName(retiredNodeHostname))); + doNothing().when(orchestrator).acquirePermissionToRemove(eq(new HostName(retiredNode.hostname()))); // RetiredExpirer should remove cfg1 from application RetiredExpirer retiredExpirer = createRetiredExpirer(deployer); retiredExpirer.run(); var activeConfigServerHostnames = new HashSet<>(Set.of("cfg1", "cfg2", "cfg3")); - assertTrue(activeConfigServerHostnames.contains(retiredNodeHostname)); - activeConfigServerHostnames.remove(retiredNodeHostname); + assertTrue(activeConfigServerHostnames.contains(retiredNode.hostname())); + activeConfigServerHostnames.remove(retiredNode.hostname()); assertEquals(activeConfigServerHostnames, configServerHostnames(duperModel)); assertEquals(1, tester.nodeRepository().nodes().list(Node.State.inactive).nodeType(NodeType.config).size()); assertEquals(2, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); @@ -234,8 +232,8 @@ public class RetiredExpirerTest { // Provision and ready new config server MockNameResolver nameResolver = (MockNameResolver)tester.nodeRepository().nameResolver(); String ipv4 = "127.0.1.4"; - nameResolver.addRecord(retiredNodeHostname, ipv4); - Node node = Node.create(retiredNodeHostname, new IP.Config(Set.of(ipv4), Set.of()), retiredNodeHostname, + nameResolver.addRecord(retiredNode.hostname(), ipv4); + Node node = Node.create(retiredNode.hostname(), new IP.Config(Set.of(ipv4), Set.of()), retiredNode.hostname(), tester.asFlavor("default", NodeType.config), NodeType.config).build(); var nodes = List.of(node); nodes = nodeRepository.nodes().addNodes(nodes, Agent.system); @@ -252,14 +250,16 @@ public class RetiredExpirerTest { infraDeployer.activateAllSupportedInfraApplications(true); assertEquals(3, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); - // Another config server should now have retired + // There are now 2 retired config servers left retiredExpirer.run(); assertEquals(3, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); - var retiredNodes2 = tester.nodeRepository().nodes().list(() -> {}).stream() - .filter(n -> n.allocation().map(allocation -> allocation.membership().retired()).orElse(false)) - .collect(Collectors.toList()); - assertEquals(1, retiredNodes2.size()); - assertNotEquals(retiredNodeHostname, retiredNodes2.get(0)); + var retiredHostnames = tester.nodeRepository() + .nodes().list(() -> {}) + .stream() + .filter(n -> n.allocation().map(allocation -> allocation.membership().retired()).orElse(false)) + .map(Node::hostname) + .collect(Collectors.toSet()); + assertEquals(Set.of("cfg2", "cfg3"), retiredHostnames); } private Set<String> configServerHostnames(MockDuperModel duperModel) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java index 6e50c934047..e94d1c1230e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java @@ -19,10 +19,10 @@ import java.time.Duration; import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; /** @@ -183,7 +183,6 @@ public class NodeTypeProvisioningTest { List<Node> nodesToRetire = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.proxy).asList() .subList(3, 3 + numNodesToRetire); - String currentyRetiringHostname; { nodesToRetire.forEach(nodeToRetire -> tester.nodeRepository().nodes().write(nodeToRetire.withWantToRetire(true, Agent.system, tester.clock().instant()), () -> {})); @@ -198,14 +197,13 @@ public class NodeTypeProvisioningTest { List<Node> nodesCurrentlyRetiring = nodes.stream() .filter(node -> node.allocation().get().membership().retired()) .collect(Collectors.toList()); - assertEquals(1, nodesCurrentlyRetiring.size()); + assertEquals(5, nodesCurrentlyRetiring.size()); - // The retiring node should be one of the nodes we marked for retirement - currentyRetiringHostname = nodesCurrentlyRetiring.get(0).hostname(); - assertEquals(1, nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(currentyRetiringHostname)).count()); + // The retiring nodes should be the nodes we marked for retirement + assertTrue(Set.copyOf(nodesToRetire).containsAll(nodesCurrentlyRetiring)); } - { // Redeploying while the node is still retiring has no effect + { // Redeploying while the nodes are still retiring has no effect List<HostSpec> hosts = deployProxies(application, tester); assertEquals(11, hosts.size()); tester.activate(application, new HashSet<>(hosts)); @@ -216,57 +214,29 @@ public class NodeTypeProvisioningTest { List<Node> nodesCurrentlyRetiring = nodes.stream() .filter(node -> node.allocation().get().membership().retired()) .collect(Collectors.toList()); - assertEquals(1, nodesCurrentlyRetiring.size()); - - // The node that started retiring is still the only one retiring - assertEquals(currentyRetiringHostname, nodesCurrentlyRetiring.get(0).hostname()); + assertEquals(5, nodesCurrentlyRetiring.size()); } { + // Let all retired nodes expire tester.advanceTime(Duration.ofMinutes(11)); retiredExpirer.run(); List<HostSpec> hosts = deployProxies(application, tester); - assertEquals(10, hosts.size()); + assertEquals(6, hosts.size()); tester.activate(application, new HashSet<>(hosts)); - NodeList nodes = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.proxy); - assertEquals(10, nodes.size()); - // Verify the node we previously set to retire has finished retiring - assertEquals(Node.State.dirty, tester.nodeRepository().nodes().node(currentyRetiringHostname) - .orElseThrow(RuntimeException::new).state()); - - // Verify that a node is currently retiring - List<Node> nodesCurrentlyRetiring = nodes.stream() - .filter(node -> node.allocation().get().membership().retired()) - .collect(Collectors.toList()); - assertEquals(1, nodesCurrentlyRetiring.size()); + // All currently active proxy nodes are not marked with wantToRetire or as retired + long numRetiredActiveProxyNodes = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.proxy).stream() + .filter(node -> !node.status().wantToRetire()) + .filter(node -> !node.allocation().get().membership().retired()) + .count(); + assertEquals(6, numRetiredActiveProxyNodes); - // This node is different from the one that was retiring previously - String newRetiringHostname = nodesCurrentlyRetiring.get(0).hostname(); - assertNotEquals(currentyRetiringHostname, newRetiringHostname); - // ... but is one of the nodes that were put to wantToRetire earlier - assertTrue(nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(newRetiringHostname)).count() == 1); + // All the nodes that were marked with wantToRetire earlier are now dirty + assertEquals(nodesToRetire.stream().map(Node::hostname).collect(Collectors.toSet()), + tester.nodeRepository().nodes().list(Node.State.dirty).stream().map(Node::hostname).collect(Collectors.toSet())); } - - - for (int i = 0; i < 10; i++){ - tester.advanceTime(Duration.ofMinutes(11)); - retiredExpirer.run(); - List<HostSpec> hosts = deployProxies(application, tester); - tester.activate(application, new HashSet<>(hosts)); - } - - // After a long time, all currently active proxy nodes are not marked with wantToRetire or as retired - long numRetiredActiveProxyNodes = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.proxy).stream() - .filter(node -> !node.status().wantToRetire()) - .filter(node -> !node.allocation().get().membership().retired()) - .count(); - assertEquals(11 - numNodesToRetire, numRetiredActiveProxyNodes); - - // All the nodes that were marked with wantToRetire earlier are now dirty - assertEquals(nodesToRetire.stream().map(Node::hostname).collect(Collectors.toSet()), - tester.nodeRepository().nodes().list(Node.State.dirty).stream().map(Node::hostname).collect(Collectors.toSet())); } private List<HostSpec> deployProxies(ApplicationId application, ProvisioningTester tester) { |