diff options
author | Martin Polden <mpolden@mpolden.no> | 2022-02-21 12:53:54 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2022-02-21 12:53:54 +0100 |
commit | 766198080984ca1a2af1a66613eb570d4e0fe11b (patch) | |
tree | 518dc6d304f1659991bf3cc70e0c4af245e99824 /node-repository | |
parent | c7d896f7484b629aef89ebd511e715ce85ba6a30 (diff) |
Retry deprovisioning if child is un-retired
Diffstat (limited to 'node-repository')
3 files changed, 40 insertions, 17 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index 57a3b436e37..c7c97701520 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -244,8 +244,9 @@ public class Nodes { if ( ! zone.environment().isProduction() || zone.system().isCd()) return deallocate(nodes, Agent.application, "Deactivated by application", transaction.nested()); - var stateless = NodeList.copyOf(nodes).stateless(); - var stateful = NodeList.copyOf(nodes).stateful(); + NodeList nodeList = NodeList.copyOf(nodes); + NodeList stateless = nodeList.stateless(); + NodeList stateful = nodeList.stateful(); List<Node> written = new ArrayList<>(); written.addAll(deallocate(stateless.asList(), Agent.application, "Deactivated by application", transaction.nested())); written.addAll(db.writeTo(Node.State.inactive, stateful.asList(), Agent.application, Optional.empty(), transaction.nested())); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java index ba59ff8e425..5229fbb8f37 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java @@ -37,10 +37,10 @@ public class RetiringOsUpgrader implements OsUpgrader { NodeList allNodes = nodeRepository.nodes().list(); Instant now = nodeRepository.clock().instant(); NodeList candidates = candidates(now, target, allNodes); - candidates.not().deprovisioning() + candidates.not().matching(host -> deprovisioning(host, allNodes)) .byIncreasingOsVersion() - .first(1) - .forEach(node -> deprovision(node, target.version(), now)); + .first() + .ifPresent(node -> deprovision(node, target.version(), now)); } @Override @@ -62,7 +62,7 @@ public class RetiringOsUpgrader implements OsUpgrader { /** Upgrade given host by retiring and deprovisioning it */ private void deprovision(Node host, Version target, Instant now) { - LOG.info("Retiring and deprovisioning " + host + ": On stale OS version " + + LOG.info("Retiring and deprovisioning " + host + " and its children: On stale OS version " + host.status().osVersion().current().map(Version::toFullString).orElse("<unset>") + ", want " + target); nodeRepository.nodes().deprovision(host.hostname(), Agent.RetiringUpgrader, now); @@ -70,4 +70,10 @@ public class RetiringOsUpgrader implements OsUpgrader { nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, host.type())); } + /** Returns whether host and all its children are deprovisioning */ + private static boolean deprovisioning(Node host, NodeList allNodes) { + return host.status().wantToRetire() && host.status().wantToDeprovision() && + allNodes.childrenOf(host).not().deprovisioning().isEmpty(); + } + } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java index 26be6d95336..5a64d99ea33 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java @@ -227,25 +227,41 @@ public class OsVersionsTest { @Test public void upgrade_by_retiring_everything_at_once() { - var versions = new OsVersions(tester.nodeRepository(), true, Integer.MAX_VALUE); - int hostCount = 3; - provisionInfraApplication(hostCount, infraApplication, NodeType.confighost); - Supplier<NodeList> hostNodes = () -> tester.nodeRepository().nodes().list() - .nodeType(NodeType.confighost) - .not().state(Node.State.deprovisioned); + OsVersions versions = new OsVersions(tester.nodeRepository(), true, Integer.MAX_VALUE); + int nodeCount = 3; + ApplicationId configServerApp = ApplicationId.from("hosted-vespa", "zone-config-servers", "default"); + List<Node> configHosts = provisionInfraApplication(nodeCount, infraApplication, NodeType.confighost); + NodeResources resources = new NodeResources(2, 4, 8, 1); + for (int i = 0; i < nodeCount; i++) { + tester.makeReadyChildren(1, i, resources, NodeType.config, configHosts.get(i).hostname(), (index) -> "cfg" + index); + } + tester.prepareAndActivateInfraApplication(configServerApp, NodeType.config); // Target is set with zero budget and upgrade started var version1 = Version.fromString("7.1"); versions.setTarget(NodeType.confighost, version1, Duration.ZERO,false); - for (int i = 0; i < hostCount; i++) { + for (int i = 0; i < nodeCount; i++) { versions.resumeUpgradeOf(NodeType.confighost, true); } // All hosts are deprovisioning - assertEquals(hostCount, hostNodes.get().deprovisioning().size()); + NodeList nodes = tester.nodeRepository().nodes().list().not().state(Node.State.deprovisioned); + NodeList configNodes = nodes.nodeType(NodeType.config); + assertEquals(nodeCount, nodes.nodeType(NodeType.confighost).deprovisioning().size()); + assertEquals(nodeCount, configNodes.deprovisioning().size()); + + // One child is inadvertently unretired + tester.patchNode(configNodes.first().get(), (node) -> node.withWantToRetire(false, false, + Agent.system, tester.clock().instant())); + + // Resuming upgrade of host re-retires child + versions.resumeUpgradeOf(NodeType.confighost, true); + nodes = tester.nodeRepository().nodes().list().not().state(Node.State.deprovisioned); + assertEquals(nodeCount, nodes.nodeType(NodeType.config).deprovisioning().size()); + // Nodes complete their upgrade by being reprovisioned - completeReprovisionOf(hostNodes.get().deprovisioning().asList(), NodeType.confighost); - assertEquals(hostCount, hostNodes.get().onOsVersion(version1).size()); + completeReprovisionOf(nodes.nodeType(NodeType.confighost).deprovisioning().asList(), NodeType.confighost); + assertEquals(nodeCount, tester.nodeRepository().nodes().list().nodeType(NodeType.confighost).onOsVersion(version1).size()); } @Test @@ -528,7 +544,7 @@ public class OsVersionsTest { ApplicationId application = node.allocation().get().owner(); tester.nodeRepository().nodes().park(node.hostname(), false, Agent.system, getClass().getSimpleName()); - tester.nodeRepository().nodes().removeRecursively(node.hostname()); + tester.nodeRepository().nodes().removeRecursively(node, true); node = provisionInfraApplication(1, application, nodeType).get(0); } return node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(wantedOsVersion))); |