diff options
author | HÃ¥kon Hallingstad <hakon.hallingstad@gmail.com> | 2022-07-12 12:34:06 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-12 12:34:06 +0200 |
commit | 4aa646e7fbec942c301e167bdff3da2b1acf8cc1 (patch) | |
tree | 159a15864d5da48d7f6d70aef8def77aa08f7cad | |
parent | d8a6aa364cf080b2acab0d61372028b0b91514e3 (diff) | |
parent | 4d9ce035350d8e726530d398665e0c6fcd76c02a (diff) |
Merge pull request #23462 from vespa-engine/mpolden/deprovision-with-allocation
Allow deprovisioning parked host where children have allocation
2 files changed, 63 insertions, 16 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 368a8da0f90..a9e7ded66e6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -186,24 +186,38 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { .collect(Collectors.toList()); } - private List<Node> candidatesForRemoval(List<Node> nodes) { - Map<String, Node> hostsByHostname = new HashMap<>(nodes.stream() - .filter(node -> switch (node.type()) { - case host -> - // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here - node.state() != Node.State.parked || node.status().wantToDeprovision(); - case confighost, controllerhost -> node.state() == Node.State.parked && node.status().wantToDeprovision(); - default -> false; - }) - .collect(Collectors.toMap(Node::hostname, Function.identity()))); + private static List<Node> candidatesForRemoval(List<Node> nodes) { + Map<String, Node> removableHostsByHostname = new HashMap<>(); + for (var node : nodes) { + if (canRemoveHost(node)) { + removableHostsByHostname.put(node.hostname(), node); + } + } + for (var node : nodes) { + if (node.parentHostname().isPresent() && !canRemoveNode(node)) { + removableHostsByHostname.remove(node.parentHostname().get()); + } + } + return List.copyOf(removableHostsByHostname.values()); + } - nodes.stream() - .filter(node -> node.allocation().isPresent()) - .flatMap(node -> node.parentHostname().stream()) - .distinct() - .forEach(hostsByHostname::remove); + private static boolean canRemoveHost(Node host) { + return switch (host.type()) { + // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here + case host -> host.state() != Node.State.parked || host.status().wantToDeprovision(); + case confighost, controllerhost -> canDeprovision(host); + default -> false; + }; + } + + private static boolean canRemoveNode(Node node) { + if (node.type().isHost()) throw new IllegalArgumentException("Node " + node + " is not a child"); + return node.allocation().isEmpty() || canDeprovision(node); + } - return List.copyOf(hostsByHostname.values()); + private static boolean canDeprovision(Node node) { + return node.status().wantToDeprovision() && (node.state() == Node.State.parked || + node.state() == Node.State.failed); } private Map<String, Node> findSharedHosts(NodeList nodeList) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index 905fdc57813..e5e361da379 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -570,6 +570,39 @@ public class DynamicProvisioningMaintainerTest { assertEquals(2, provisioningTester.activate(applicationId, prepared).size()); } + @Test + public void deprovision_parked_node_with_allocation() { + var tester = new DynamicProvisioningTester(); + tester.hostProvisioner.with(Behaviour.failProvisioning); + Node host4 = tester.addNode("host4", Optional.empty(), NodeType.host, Node.State.parked); + Node host41 = tester.addNode("host4-1", Optional.of("host4"), NodeType.tenant, Node.State.parked, DynamicProvisioningTester.tenantApp); + Node host42 = tester.addNode("host4-2", Optional.of("host4"), NodeType.tenant, Node.State.active, DynamicProvisioningTester.tenantApp); + Node host43 = tester.addNode("host4-3", Optional.of("host4"), NodeType.tenant, Node.State.failed, DynamicProvisioningTester.tenantApp); + + // Host and children are marked for deprovisioning + tester.nodeRepository.nodes().deprovision("host4", Agent.operator, Instant.now()); + for (var node : List.of(host4, host41, host42, host43)) { + assertTrue(tester.nodeRepository.nodes().node(node.hostname()).map(n -> n.status().wantToDeprovision()).get()); + } + + // Host and children remain parked because one child is still active + tester.maintainer.maintain(); + for (var node : List.of(host4, host41)) { + assertEquals(Node.State.parked, tester.nodeRepository.nodes().node(node.hostname()).get().state()); + } + assertEquals(Node.State.active, tester.nodeRepository.nodes().node(host42.hostname()).get().state()); + assertEquals(Node.State.failed, tester.nodeRepository.nodes().node(host43.hostname()).get().state()); + + // Last child is parked + tester.nodeRepository.nodes().park(host42.hostname(), true, Agent.system, getClass().getSimpleName()); + + // Host and children can now be removed + tester.maintainer.maintain(); + for (var node : List.of(host4, host41, host42, host43)) { + assertTrue(node.hostname() + " removed", tester.nodeRepository.nodes().node(node.hostname()).isEmpty()); + } + } + private void assertCfghost3IsActive(DynamicProvisioningTester tester) { assertEquals(5, tester.nodeRepository.nodes().list(Node.State.active).size()); assertEquals(3, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.confighost).size()); |