aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-07-20 12:08:49 +0200
committerMartin Polden <mpolden@mpolden.no>2023-07-20 12:59:59 +0200
commit052b83e68665f94c0bccbaf169e625d747b989fa (patch)
tree15d3681ef70a207a703f540d9b4008edf8e0fa73
parent83324e0c2d7dccfc5c4bc07b89699a06769c7457 (diff)
Require convergence of downgrades according to cloud support
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java18
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java20
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java68
4 files changed, 92 insertions, 18 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
index 68c79fdca7e..b1ea4584497 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
@@ -128,8 +128,12 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten
/** Returns whether the upgrader should expect given node to upgrade */
protected abstract boolean expectUpgradeOf(Node node, SystemApplication application, ZoneApi zone);
- /** Find the highest version used by nodes satisfying nodeSlice in zone. If no such slice exists, the lowest known version is returned */
- protected final Optional<Version> versionOf(NodeSlice nodeSlice, ZoneApi zone, SystemApplication application, Function<Node, Version> versionField) {
+ /**
+ * Find the version currently used by a slice of nodes, in given zone. If no such slice exists,
+ * the lowest (or highest, when downgrading) overall version is returned.
+ */
+ protected final Optional<Version> versionOf(NodeSlice nodeSlice, ZoneApi zone, SystemApplication application,
+ Function<Node, Version> versionField, boolean downgrading) {
try {
Map<Version, Long> nodeCountByVersion = controller().serviceRegistry().configServer()
.nodeRepository()
@@ -147,11 +151,13 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten
}
}
if (!versionsOfMatchingSlices.isEmpty()) {
- // Choose the highest version in case we have several matching slices
- return versionsOfMatchingSlices.stream().max(Comparator.naturalOrder());
+ return downgrading
+ ? versionsOfMatchingSlices.stream().min(Comparator.naturalOrder())
+ : versionsOfMatchingSlices.stream().max(Comparator.naturalOrder());
}
- // No matching slices found, fall back to the lowest known version
- return nodeCountByVersion.keySet().stream().min(Comparator.naturalOrder());
+ return downgrading
+ ? nodeCountByVersion.keySet().stream().max(Comparator.naturalOrder())
+ : nodeCountByVersion.keySet().stream().min(Comparator.naturalOrder());
} catch (Exception e) {
throw new UnreachableNodeRepositoryException(Text.format("Failed to get version for %s in %s: %s",
application.id(), zone,
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
index 4df40850cc9..44f0bcecf5f 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
@@ -54,7 +54,7 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> {
@Override
protected boolean convergedOn(OsVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) {
- Version currentVersion = versionOf(nodeSlice, zone, application, Node::currentOsVersion).orElse(target.osVersion().version());
+ Version currentVersion = versionOf(nodeSlice, zone, application, Node::currentOsVersion, target.downgrade()).orElse(target.version());
return satisfiedBy(currentVersion, target);
}
@@ -67,9 +67,11 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> {
@Override
protected Optional<OsVersionTarget> target() {
- // Return target if we have nodes in this cloud on the wrong version
+ // Return target if we have nodes in this cloud on the wrong version, or if we're downgrading a zone which does
+ // not support downgrading all nodes
return controller().os().target(cloud)
- .filter(target -> controller().os().status().nodesIn(cloud).stream()
+ .filter(target -> (target.downgrade() && !downgradingSupported()) ||
+ controller().os().status().nodesIn(cloud).stream()
.anyMatch(node -> !satisfiedBy(node.currentVersion(), target)));
}
@@ -79,19 +81,23 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> {
return controller().serviceRegistry().configServer().nodeRepository()
.targetVersionsOf(zone.getVirtualId())
.osVersion(application.nodeType())
- .map(currentVersion -> !satisfiedBy(currentVersion, target))
+ .map(currentVersion -> !currentVersion.equals(target.version()))
.orElse(true);
}
- private static boolean satisfiedBy(Version version, OsVersionTarget target) {
- if (target.downgrade()) {
- // When downgrading we want an exact version
+ private boolean satisfiedBy(Version version, OsVersionTarget target) {
+ if (target.downgrade() && downgradingSupported()) {
+ // When downgrading we want an exact version if the cloud supports downgrades
return version.equals(target.osVersion().version());
}
// Otherwise, matching or later version is fine
return !version.isBefore(target.osVersion().version());
}
+ private boolean downgradingSupported() {
+ return !controller().zoneRegistry().zones().all().dynamicallyProvisioned().in(cloud).zones().isEmpty();
+ }
+
/** Returns whether node currently allows upgrades */
public static boolean canUpgrade(Node node, boolean includeDeferring) {
return (includeDeferring || !node.deferOsUpgrade()) && upgradableNodeStates.contains(node.state());
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java
index 000acd16155..effcc4dd4df 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java
@@ -40,7 +40,7 @@ public class SystemUpgrader extends InfrastructureUpgrader<VespaVersionTarget> {
@Override
protected boolean convergedOn(VespaVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) {
- Optional<Version> currentVersion = versionOf(nodeSlice, zone, application, Node::currentVersion);
+ Optional<Version> currentVersion = versionOf(nodeSlice, zone, application, Node::currentVersion, target.downgrade());
// Skip application convergence check if there are no nodes belonging to the application in the zone
if (currentVersion.isEmpty()) return true;
@@ -78,7 +78,7 @@ public class SystemUpgrader extends InfrastructureUpgrader<VespaVersionTarget> {
// For applications with package we do not have a zone-wide version target. This means that we must check
// the wanted version of each node.
boolean zoneHasSharedRouting = controller().zoneRegistry().routingMethod(zone.getId()).isShared();
- return versionOf(NodeSlice.ALL, zone, application, Node::wantedVersion)
+ return versionOf(NodeSlice.ALL, zone, application, Node::wantedVersion, target.downgrade())
.map(wantedVersion -> !wantedVersion.equals(target.version()))
.orElse(zoneHasSharedRouting); // Always upgrade if zone uses shared routing, but has no nodes allocated yet
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java
index 056db4f119c..fad78edc58f 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java
@@ -18,10 +18,14 @@ import com.yahoo.vespa.hosted.controller.versions.NodeVersion;
import org.junit.jupiter.api.Test;
import java.time.Duration;
+import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.List;
+import java.util.Map;
import java.util.function.Function;
import java.util.function.UnaryOperator;
+import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -198,7 +202,8 @@ public class OsUpgraderTest {
statusUpdater.maintain();
// All zones upgrade
- for (var zone : List.of(zone1, zone2)) {
+ List<ZoneApi> zones = new ArrayList<>(List.of(zone1, zone2));
+ for (var zone : zones) {
osUpgrader.maintain();
completeUpgrade(version1, SystemApplication.tenantHost, zone);
statusUpdater.maintain();
@@ -208,9 +213,17 @@ public class OsUpgraderTest {
// Downgrade is triggered
tester.controller().os().upgradeTo(version0, cloud, true, false);
+ // Zone order is reversed
+ Collections.reverse(zones);
- // All zones downgrade, in reverse order
- for (var zone : List.of(zone2, zone1)) {
+ // One host in first zone downgrades. Wanted version is not changed for second zone yet
+ osUpgrader.maintain();
+ completeUpgrade(1, version0, SystemApplication.tenantHost, zones.get(0));
+ osUpgrader.maintain();
+ assertWanted(version1, SystemApplication.tenantHost, zones.get(1));
+
+ // All zones downgrade
+ for (var zone : zones) {
osUpgrader.maintain();
completeUpgrade(version0, SystemApplication.tenantHost, zone);
statusUpdater.maintain();
@@ -219,6 +232,55 @@ public class OsUpgraderTest {
.allMatch(node -> node.currentVersion().equals(version0)), "All nodes on target version");
}
+ @Test
+ public void downgrade_os_partially() {
+ CloudName cloud = CloudName.from("cloud");
+ ZoneApi zone1 = zone("dev.us-east-1", cloud);
+ ZoneApi zone2 = zone("prod.us-west-1", cloud);
+ UpgradePolicy upgradePolicy = UpgradePolicy.builder()
+ .upgrade(zone1)
+ .upgrade(zone2)
+ .build();
+ OsUpgrader osUpgrader = osUpgrader(upgradePolicy, cloud, false);
+
+ // Bootstrap system
+ tester.configServer().bootstrap(List.of(zone1.getId(), zone2.getId()),
+ List.of(SystemApplication.tenantHost));
+
+ // New OS version released
+ Version version0 = Version.fromString("1.0");
+ Version version1 = Version.fromString("2.0");
+ tester.controller().os().upgradeTo(version1, cloud, false, false);
+ statusUpdater.maintain();
+
+ // All zones upgrade
+ for (var zone : List.of(zone1, zone2)) {
+ osUpgrader.maintain();
+ completeUpgrade(version1, SystemApplication.tenantHost, zone);
+ statusUpdater.maintain();
+ }
+ assertTrue(tester.controller().os().status().nodesIn(cloud).stream()
+ .allMatch(node -> node.currentVersion().equals(version1)), "All nodes on target version");
+
+ // Downgrade is triggered
+ tester.controller().os().upgradeTo(version0, cloud, true, false);
+
+ // All zones downgrade, in reverse order
+ for (var zone : List.of(zone2, zone1)) {
+ osUpgrader.maintain();
+ // Partial downgrading happens, as this decision is left up to the zone. Downgrade target is still set in
+ // all zones as a best-effort, and to halt any further upgrades
+ completeUpgrade(1, version0, SystemApplication.tenantHost, zone);
+ statusUpdater.maintain();
+ }
+ int zoneCount = 2;
+ Map<Version, Long> currentVersions = tester.controller().os().status().nodesIn(cloud).stream()
+ .collect(Collectors.groupingBy(NodeVersion::currentVersion,
+ Collectors.counting()));
+ assertEquals(1 * zoneCount, currentVersions.get(version0));
+ assertEquals(2 * zoneCount, currentVersions.get(version1));
+ }
+
private List<NodeVersion> nodesOn(Version version) {
return tester.controller().os().status().versions().entrySet().stream()
.filter(entry -> entry.getKey().version().equals(version))