aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-07-20 10:47:49 +0200
committerMartin Polden <mpolden@mpolden.no>2023-07-20 10:47:49 +0200
commit83324e0c2d7dccfc5c4bc07b89699a06769c7457 (patch)
tree13f6834552efee71dcdc1a2059ba1d1f09b82a8c
parentec191d52a8529a96d83f9fee56febef6ab10af58 (diff)
Support downgrades where possible
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java14
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java19
5 files changed, 34 insertions, 19 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java
index 23aa03a5315..4ee0774db8f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java
@@ -12,8 +12,8 @@ import java.util.Optional;
import java.util.logging.Logger;
/**
- * An upgrader that delegates the upgrade to the node itself, triggered by changing its wanted OS version. This
- * implementation limits the number of parallel upgrades to avoid overloading the orchestrator with suspension requests.
+ * An upgrader that delegates the upgrade to the node itself, triggered by changing its wanted OS version. Downgrades
+ * are not supported.
*
* Used in clouds where nodes can upgrade themselves in-place, without data loss.
*
@@ -32,6 +32,8 @@ public class DelegatingOsUpgrader extends OsUpgrader {
NodeList activeNodes = nodeRepository.nodes().list(Node.State.active).nodeType(target.nodeType());
Instant now = nodeRepository.clock().instant();
NodeList nodesToUpgrade = activeNodes.not().changingOsVersionTo(target.version())
+ // This upgrader cannot downgrade nodes. We therefore consider only nodes
+ // on a lower version than the target
.osVersionIsBefore(target.version())
.matching(node -> canUpgradeAt(now, node))
.byIncreasingOsVersion()
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java
index d54b40f17de..f8becd31792 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java
@@ -43,7 +43,7 @@ public abstract class OsUpgrader {
return Math.max(0, max - upgrading);
}
- /** Returns whether node can upgrade at given instant */
+ /** Returns whether node can change version at given instant */
final boolean canUpgradeAt(Instant instant, Node node) {
return node.status().osVersion().downgrading() || // Fast-track downgrades
node.history().age(instant).compareTo(gracePeriod()) > 0;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
index e0affaae666..805793b41a4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
@@ -66,13 +66,13 @@ public class RebuildingOsUpgrader extends OsUpgrader {
.statefulClusters());
// Rebuild hosts not containing stateful clusters with retiring nodes, up to rebuild limit
- NodeList activeHosts = hostsOfTargetType.state(Node.State.active);
- int rebuildLimit = upgradeSlots(target, activeHosts.rebuilding(softRebuild));
+ NodeList hosts = hostsOfTargetType.state(Node.State.active, Node.State.provisioned);
+ int rebuildLimit = upgradeSlots(target, hosts.rebuilding(softRebuild));
List<Node> hostsToRebuild = new ArrayList<>(rebuildLimit);
- NodeList candidates = activeHosts.not().rebuilding(softRebuild)
- .osVersionIsBefore(target.version())
- .matching(node -> canUpgradeAt(now, node))
- .byIncreasingOsVersion();
+ NodeList candidates = hosts.not().rebuilding(softRebuild)
+ .not().onOsVersion(target.version())
+ .matching(node -> canUpgradeAt(now, node))
+ .byIncreasingOsVersion();
for (Node host : candidates) {
if (hostsToRebuild.size() == rebuildLimit) break;
Set<ClusterId> clustersOnHost = activeNodes.childrenOf(host).statefulClusters();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
index de4915d60aa..ccb7f40b0de 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
@@ -47,16 +47,16 @@ public class RetiringOsUpgrader extends OsUpgrader {
/** Returns nodes that are candidates for upgrade */
private NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) {
- NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType());
+ NodeList nodes = allNodes.state(Node.State.active, Node.State.provisioned).nodeType(target.nodeType());
if (softRebuild) {
// Retire only hosts which do not have a replaceable root disk
- activeNodes = activeNodes.not().replaceableRootDisk();
+ nodes = nodes.not().replaceableRootDisk();
}
- return activeNodes.not().deprovisioning()
- .osVersionIsBefore(target.version())
- .matching(node -> canUpgradeAt(instant, node))
- .byIncreasingOsVersion()
- .first(upgradeSlots(target, activeNodes.deprovisioning()));
+ return nodes.not().deprovisioning()
+ .not().onOsVersion(target.version())
+ .matching(node -> canUpgradeAt(instant, node))
+ .byIncreasingOsVersion()
+ .first(upgradeSlots(target, nodes.deprovisioning()));
}
/** Upgrade given host by retiring and deprovisioning it */
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
index 0fa9aa610a4..fda4c47b1ee 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
@@ -38,7 +38,6 @@ import static org.junit.Assert.fail;
public class OsVersionsTest {
private final ProvisioningTester tester = new ProvisioningTester.Builder().build();
- private final ApplicationId infraApplication = ApplicationId.from("hosted-vespa", "infra", "default");
@Test
public void upgrade() {
@@ -159,7 +158,7 @@ public class OsVersionsTest {
}
@Test
- public void upgrade_by_retiring() {
+ public void upgrade_and_downgrade_by_retiring() {
int maxActiveUpgrades = 2;
var versions = new OsVersions(tester.nodeRepository(), Cloud.builder().dynamicProvisioning(true).build());
setMaxActiveUpgrades(maxActiveUpgrades);
@@ -196,7 +195,7 @@ public class OsVersionsTest {
assertEquals(2, deprovisioningChildrenOf(nodesDeprovisioning.asList().get(0)).size());
completeReprovisionOf(nodesDeprovisioning.asList());
- // Remaining hosts complete upgrades one by one
+ // Remaining hosts upgrade, batch by batch
for (int i = 0; i < hostCount - 2; i += maxActiveUpgrades) {
versions.resumeUpgradeOf(NodeType.host, true);
nodesDeprovisioning = hostNodes.get().deprovisioning();
@@ -212,6 +211,20 @@ public class OsVersionsTest {
// Resuming after everything has upgraded does nothing
versions.resumeUpgradeOf(NodeType.host, true);
assertEquals(0, hostNodes.get().deprovisioning().size());
+
+ // Downgrade is triggered
+ var version0 = Version.fromString("7.0");
+ versions.setTarget(NodeType.host, version0, true);
+
+ // Hosts downgrade, batch by batch
+ for (int i = 0; i < hostCount; i += maxActiveUpgrades) {
+ versions.resumeUpgradeOf(NodeType.host, true);
+ nodesDeprovisioning = hostNodes.get().deprovisioning();
+ assertEquals(maxActiveUpgrades, nodesDeprovisioning.size());
+ completeReprovisionOf(nodesDeprovisioning.asList());
+ }
+ assertEquals(hostCount, hostNodes.get().onOsVersion(version0).not().deprovisioning().size());
+ assertEquals(hostCount*2, tester.nodeRepository().nodes().list(Node.State.deprovisioned).size());
}
@Test