From 83324e0c2d7dccfc5c4bc07b89699a06769c7457 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Thu, 20 Jul 2023 10:47:49 +0200 Subject: Support downgrades where possible --- .../hosted/provision/os/DelegatingOsUpgrader.java | 6 ++++-- .../yahoo/vespa/hosted/provision/os/OsUpgrader.java | 2 +- .../hosted/provision/os/RebuildingOsUpgrader.java | 12 ++++++------ .../vespa/hosted/provision/os/RetiringOsUpgrader.java | 14 +++++++------- .../vespa/hosted/provision/os/OsVersionsTest.java | 19 ++++++++++++++++--- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java index 23aa03a5315..4ee0774db8f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java @@ -12,8 +12,8 @@ import java.util.Optional; import java.util.logging.Logger; /** - * An upgrader that delegates the upgrade to the node itself, triggered by changing its wanted OS version. This - * implementation limits the number of parallel upgrades to avoid overloading the orchestrator with suspension requests. + * An upgrader that delegates the upgrade to the node itself, triggered by changing its wanted OS version. Downgrades + * are not supported. * * Used in clouds where nodes can upgrade themselves in-place, without data loss. * @@ -32,6 +32,8 @@ public class DelegatingOsUpgrader extends OsUpgrader { NodeList activeNodes = nodeRepository.nodes().list(Node.State.active).nodeType(target.nodeType()); Instant now = nodeRepository.clock().instant(); NodeList nodesToUpgrade = activeNodes.not().changingOsVersionTo(target.version()) + // This upgrader cannot downgrade nodes. We therefore consider only nodes + // on a lower version than the target .osVersionIsBefore(target.version()) .matching(node -> canUpgradeAt(now, node)) .byIncreasingOsVersion() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java index d54b40f17de..f8becd31792 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java @@ -43,7 +43,7 @@ public abstract class OsUpgrader { return Math.max(0, max - upgrading); } - /** Returns whether node can upgrade at given instant */ + /** Returns whether node can change version at given instant */ final boolean canUpgradeAt(Instant instant, Node node) { return node.status().osVersion().downgrading() || // Fast-track downgrades node.history().age(instant).compareTo(gracePeriod()) > 0; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java index e0affaae666..805793b41a4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java @@ -66,13 +66,13 @@ public class RebuildingOsUpgrader extends OsUpgrader { .statefulClusters()); // Rebuild hosts not containing stateful clusters with retiring nodes, up to rebuild limit - NodeList activeHosts = hostsOfTargetType.state(Node.State.active); - int rebuildLimit = upgradeSlots(target, activeHosts.rebuilding(softRebuild)); + NodeList hosts = hostsOfTargetType.state(Node.State.active, Node.State.provisioned); + int rebuildLimit = upgradeSlots(target, hosts.rebuilding(softRebuild)); List hostsToRebuild = new ArrayList<>(rebuildLimit); - NodeList candidates = activeHosts.not().rebuilding(softRebuild) - .osVersionIsBefore(target.version()) - .matching(node -> canUpgradeAt(now, node)) - .byIncreasingOsVersion(); + NodeList candidates = hosts.not().rebuilding(softRebuild) + .not().onOsVersion(target.version()) + .matching(node -> canUpgradeAt(now, node)) + .byIncreasingOsVersion(); for (Node host : candidates) { if (hostsToRebuild.size() == rebuildLimit) break; Set clustersOnHost = activeNodes.childrenOf(host).statefulClusters(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java index de4915d60aa..ccb7f40b0de 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java @@ -47,16 +47,16 @@ public class RetiringOsUpgrader extends OsUpgrader { /** Returns nodes that are candidates for upgrade */ private NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) { - NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType()); + NodeList nodes = allNodes.state(Node.State.active, Node.State.provisioned).nodeType(target.nodeType()); if (softRebuild) { // Retire only hosts which do not have a replaceable root disk - activeNodes = activeNodes.not().replaceableRootDisk(); + nodes = nodes.not().replaceableRootDisk(); } - return activeNodes.not().deprovisioning() - .osVersionIsBefore(target.version()) - .matching(node -> canUpgradeAt(instant, node)) - .byIncreasingOsVersion() - .first(upgradeSlots(target, activeNodes.deprovisioning())); + return nodes.not().deprovisioning() + .not().onOsVersion(target.version()) + .matching(node -> canUpgradeAt(instant, node)) + .byIncreasingOsVersion() + .first(upgradeSlots(target, nodes.deprovisioning())); } /** Upgrade given host by retiring and deprovisioning it */ diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java index 0fa9aa610a4..fda4c47b1ee 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java @@ -38,7 +38,6 @@ import static org.junit.Assert.fail; public class OsVersionsTest { private final ProvisioningTester tester = new ProvisioningTester.Builder().build(); - private final ApplicationId infraApplication = ApplicationId.from("hosted-vespa", "infra", "default"); @Test public void upgrade() { @@ -159,7 +158,7 @@ public class OsVersionsTest { } @Test - public void upgrade_by_retiring() { + public void upgrade_and_downgrade_by_retiring() { int maxActiveUpgrades = 2; var versions = new OsVersions(tester.nodeRepository(), Cloud.builder().dynamicProvisioning(true).build()); setMaxActiveUpgrades(maxActiveUpgrades); @@ -196,7 +195,7 @@ public class OsVersionsTest { assertEquals(2, deprovisioningChildrenOf(nodesDeprovisioning.asList().get(0)).size()); completeReprovisionOf(nodesDeprovisioning.asList()); - // Remaining hosts complete upgrades one by one + // Remaining hosts upgrade, batch by batch for (int i = 0; i < hostCount - 2; i += maxActiveUpgrades) { versions.resumeUpgradeOf(NodeType.host, true); nodesDeprovisioning = hostNodes.get().deprovisioning(); @@ -212,6 +211,20 @@ public class OsVersionsTest { // Resuming after everything has upgraded does nothing versions.resumeUpgradeOf(NodeType.host, true); assertEquals(0, hostNodes.get().deprovisioning().size()); + + // Downgrade is triggered + var version0 = Version.fromString("7.0"); + versions.setTarget(NodeType.host, version0, true); + + // Hosts downgrade, batch by batch + for (int i = 0; i < hostCount; i += maxActiveUpgrades) { + versions.resumeUpgradeOf(NodeType.host, true); + nodesDeprovisioning = hostNodes.get().deprovisioning(); + assertEquals(maxActiveUpgrades, nodesDeprovisioning.size()); + completeReprovisionOf(nodesDeprovisioning.asList()); + } + assertEquals(hostCount, hostNodes.get().onOsVersion(version0).not().deprovisioning().size()); + assertEquals(hostCount*2, tester.nodeRepository().nodes().list(Node.State.deprovisioned).size()); } @Test -- cgit v1.2.3