summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2022-02-21 12:53:54 +0100
committerMartin Polden <mpolden@mpolden.no>2022-02-21 12:53:54 +0100
commit766198080984ca1a2af1a66613eb570d4e0fe11b (patch)
tree518dc6d304f1659991bf3cc70e0c4af245e99824 /node-repository
parentc7d896f7484b629aef89ebd511e715ce85ba6a30 (diff)
Retry deprovisioning if child is un-retired
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java14
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java38
3 files changed, 40 insertions, 17 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index 57a3b436e37..c7c97701520 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -244,8 +244,9 @@ public class Nodes {
if ( ! zone.environment().isProduction() || zone.system().isCd())
return deallocate(nodes, Agent.application, "Deactivated by application", transaction.nested());
- var stateless = NodeList.copyOf(nodes).stateless();
- var stateful = NodeList.copyOf(nodes).stateful();
+ NodeList nodeList = NodeList.copyOf(nodes);
+ NodeList stateless = nodeList.stateless();
+ NodeList stateful = nodeList.stateful();
List<Node> written = new ArrayList<>();
written.addAll(deallocate(stateless.asList(), Agent.application, "Deactivated by application", transaction.nested()));
written.addAll(db.writeTo(Node.State.inactive, stateful.asList(), Agent.application, Optional.empty(), transaction.nested()));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
index ba59ff8e425..5229fbb8f37 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
@@ -37,10 +37,10 @@ public class RetiringOsUpgrader implements OsUpgrader {
NodeList allNodes = nodeRepository.nodes().list();
Instant now = nodeRepository.clock().instant();
NodeList candidates = candidates(now, target, allNodes);
- candidates.not().deprovisioning()
+ candidates.not().matching(host -> deprovisioning(host, allNodes))
.byIncreasingOsVersion()
- .first(1)
- .forEach(node -> deprovision(node, target.version(), now));
+ .first()
+ .ifPresent(node -> deprovision(node, target.version(), now));
}
@Override
@@ -62,7 +62,7 @@ public class RetiringOsUpgrader implements OsUpgrader {
/** Upgrade given host by retiring and deprovisioning it */
private void deprovision(Node host, Version target, Instant now) {
- LOG.info("Retiring and deprovisioning " + host + ": On stale OS version " +
+ LOG.info("Retiring and deprovisioning " + host + " and its children: On stale OS version " +
host.status().osVersion().current().map(Version::toFullString).orElse("<unset>") +
", want " + target);
nodeRepository.nodes().deprovision(host.hostname(), Agent.RetiringUpgrader, now);
@@ -70,4 +70,10 @@ public class RetiringOsUpgrader implements OsUpgrader {
nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, host.type()));
}
+ /** Returns whether host and all its children are deprovisioning */
+ private static boolean deprovisioning(Node host, NodeList allNodes) {
+ return host.status().wantToRetire() && host.status().wantToDeprovision() &&
+ allNodes.childrenOf(host).not().deprovisioning().isEmpty();
+ }
+
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
index 26be6d95336..5a64d99ea33 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
@@ -227,25 +227,41 @@ public class OsVersionsTest {
@Test
public void upgrade_by_retiring_everything_at_once() {
- var versions = new OsVersions(tester.nodeRepository(), true, Integer.MAX_VALUE);
- int hostCount = 3;
- provisionInfraApplication(hostCount, infraApplication, NodeType.confighost);
- Supplier<NodeList> hostNodes = () -> tester.nodeRepository().nodes().list()
- .nodeType(NodeType.confighost)
- .not().state(Node.State.deprovisioned);
+ OsVersions versions = new OsVersions(tester.nodeRepository(), true, Integer.MAX_VALUE);
+ int nodeCount = 3;
+ ApplicationId configServerApp = ApplicationId.from("hosted-vespa", "zone-config-servers", "default");
+ List<Node> configHosts = provisionInfraApplication(nodeCount, infraApplication, NodeType.confighost);
+ NodeResources resources = new NodeResources(2, 4, 8, 1);
+ for (int i = 0; i < nodeCount; i++) {
+ tester.makeReadyChildren(1, i, resources, NodeType.config, configHosts.get(i).hostname(), (index) -> "cfg" + index);
+ }
+ tester.prepareAndActivateInfraApplication(configServerApp, NodeType.config);
// Target is set with zero budget and upgrade started
var version1 = Version.fromString("7.1");
versions.setTarget(NodeType.confighost, version1, Duration.ZERO,false);
- for (int i = 0; i < hostCount; i++) {
+ for (int i = 0; i < nodeCount; i++) {
versions.resumeUpgradeOf(NodeType.confighost, true);
}
// All hosts are deprovisioning
- assertEquals(hostCount, hostNodes.get().deprovisioning().size());
+ NodeList nodes = tester.nodeRepository().nodes().list().not().state(Node.State.deprovisioned);
+ NodeList configNodes = nodes.nodeType(NodeType.config);
+ assertEquals(nodeCount, nodes.nodeType(NodeType.confighost).deprovisioning().size());
+ assertEquals(nodeCount, configNodes.deprovisioning().size());
+
+ // One child is inadvertently unretired
+ tester.patchNode(configNodes.first().get(), (node) -> node.withWantToRetire(false, false,
+ Agent.system, tester.clock().instant()));
+
+ // Resuming upgrade of host re-retires child
+ versions.resumeUpgradeOf(NodeType.confighost, true);
+ nodes = tester.nodeRepository().nodes().list().not().state(Node.State.deprovisioned);
+ assertEquals(nodeCount, nodes.nodeType(NodeType.config).deprovisioning().size());
+
// Nodes complete their upgrade by being reprovisioned
- completeReprovisionOf(hostNodes.get().deprovisioning().asList(), NodeType.confighost);
- assertEquals(hostCount, hostNodes.get().onOsVersion(version1).size());
+ completeReprovisionOf(nodes.nodeType(NodeType.confighost).deprovisioning().asList(), NodeType.confighost);
+ assertEquals(nodeCount, tester.nodeRepository().nodes().list().nodeType(NodeType.confighost).onOsVersion(version1).size());
}
@Test
@@ -528,7 +544,7 @@ public class OsVersionsTest {
ApplicationId application = node.allocation().get().owner();
tester.nodeRepository().nodes().park(node.hostname(), false, Agent.system,
getClass().getSimpleName());
- tester.nodeRepository().nodes().removeRecursively(node.hostname());
+ tester.nodeRepository().nodes().removeRecursively(node, true);
node = provisionInfraApplication(1, application, nodeType).get(0);
}
return node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(wantedOsVersion)));