diff options
author | Martin Polden <mpolden@mpolden.no> | 2023-07-20 12:08:49 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2023-07-20 12:59:59 +0200 |
commit | 052b83e68665f94c0bccbaf169e625d747b989fa (patch) | |
tree | 15d3681ef70a207a703f540d9b4008edf8e0fa73 | |
parent | 83324e0c2d7dccfc5c4bc07b89699a06769c7457 (diff) |
Require convergence of downgrades according to cloud support
4 files changed, 92 insertions, 18 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java index 68c79fdca7e..b1ea4584497 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java @@ -128,8 +128,12 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten /** Returns whether the upgrader should expect given node to upgrade */ protected abstract boolean expectUpgradeOf(Node node, SystemApplication application, ZoneApi zone); - /** Find the highest version used by nodes satisfying nodeSlice in zone. If no such slice exists, the lowest known version is returned */ - protected final Optional<Version> versionOf(NodeSlice nodeSlice, ZoneApi zone, SystemApplication application, Function<Node, Version> versionField) { + /** + * Find the version currently used by a slice of nodes, in given zone. If no such slice exists, + * the lowest (or highest, when downgrading) overall version is returned. + */ + protected final Optional<Version> versionOf(NodeSlice nodeSlice, ZoneApi zone, SystemApplication application, + Function<Node, Version> versionField, boolean downgrading) { try { Map<Version, Long> nodeCountByVersion = controller().serviceRegistry().configServer() .nodeRepository() @@ -147,11 +151,13 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten } } if (!versionsOfMatchingSlices.isEmpty()) { - // Choose the highest version in case we have several matching slices - return versionsOfMatchingSlices.stream().max(Comparator.naturalOrder()); + return downgrading + ? versionsOfMatchingSlices.stream().min(Comparator.naturalOrder()) + : versionsOfMatchingSlices.stream().max(Comparator.naturalOrder()); } - // No matching slices found, fall back to the lowest known version - return nodeCountByVersion.keySet().stream().min(Comparator.naturalOrder()); + return downgrading + ? nodeCountByVersion.keySet().stream().max(Comparator.naturalOrder()) + : nodeCountByVersion.keySet().stream().min(Comparator.naturalOrder()); } catch (Exception e) { throw new UnreachableNodeRepositoryException(Text.format("Failed to get version for %s in %s: %s", application.id(), zone, diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java index 4df40850cc9..44f0bcecf5f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java @@ -54,7 +54,7 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> { @Override protected boolean convergedOn(OsVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) { - Version currentVersion = versionOf(nodeSlice, zone, application, Node::currentOsVersion).orElse(target.osVersion().version()); + Version currentVersion = versionOf(nodeSlice, zone, application, Node::currentOsVersion, target.downgrade()).orElse(target.version()); return satisfiedBy(currentVersion, target); } @@ -67,9 +67,11 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> { @Override protected Optional<OsVersionTarget> target() { - // Return target if we have nodes in this cloud on the wrong version + // Return target if we have nodes in this cloud on the wrong version, or if we're downgrading a zone which does + // not support downgrading all nodes return controller().os().target(cloud) - .filter(target -> controller().os().status().nodesIn(cloud).stream() + .filter(target -> (target.downgrade() && !downgradingSupported()) || + controller().os().status().nodesIn(cloud).stream() .anyMatch(node -> !satisfiedBy(node.currentVersion(), target))); } @@ -79,19 +81,23 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> { return controller().serviceRegistry().configServer().nodeRepository() .targetVersionsOf(zone.getVirtualId()) .osVersion(application.nodeType()) - .map(currentVersion -> !satisfiedBy(currentVersion, target)) + .map(currentVersion -> !currentVersion.equals(target.version())) .orElse(true); } - private static boolean satisfiedBy(Version version, OsVersionTarget target) { - if (target.downgrade()) { - // When downgrading we want an exact version + private boolean satisfiedBy(Version version, OsVersionTarget target) { + if (target.downgrade() && downgradingSupported()) { + // When downgrading we want an exact version if the cloud supports downgrades return version.equals(target.osVersion().version()); } // Otherwise, matching or later version is fine return !version.isBefore(target.osVersion().version()); } + private boolean downgradingSupported() { + return !controller().zoneRegistry().zones().all().dynamicallyProvisioned().in(cloud).zones().isEmpty(); + } + /** Returns whether node currently allows upgrades */ public static boolean canUpgrade(Node node, boolean includeDeferring) { return (includeDeferring || !node.deferOsUpgrade()) && upgradableNodeStates.contains(node.state()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java index 000acd16155..effcc4dd4df 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java @@ -40,7 +40,7 @@ public class SystemUpgrader extends InfrastructureUpgrader<VespaVersionTarget> { @Override protected boolean convergedOn(VespaVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) { - Optional<Version> currentVersion = versionOf(nodeSlice, zone, application, Node::currentVersion); + Optional<Version> currentVersion = versionOf(nodeSlice, zone, application, Node::currentVersion, target.downgrade()); // Skip application convergence check if there are no nodes belonging to the application in the zone if (currentVersion.isEmpty()) return true; @@ -78,7 +78,7 @@ public class SystemUpgrader extends InfrastructureUpgrader<VespaVersionTarget> { // For applications with package we do not have a zone-wide version target. This means that we must check // the wanted version of each node. boolean zoneHasSharedRouting = controller().zoneRegistry().routingMethod(zone.getId()).isShared(); - return versionOf(NodeSlice.ALL, zone, application, Node::wantedVersion) + return versionOf(NodeSlice.ALL, zone, application, Node::wantedVersion, target.downgrade()) .map(wantedVersion -> !wantedVersion.equals(target.version())) .orElse(zoneHasSharedRouting); // Always upgrade if zone uses shared routing, but has no nodes allocated yet } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java index 056db4f119c..fad78edc58f 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java @@ -18,10 +18,14 @@ import com.yahoo.vespa.hosted.controller.versions.NodeVersion; import org.junit.jupiter.api.Test; import java.time.Duration; +import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.function.Function; import java.util.function.UnaryOperator; +import java.util.stream.Collectors; import java.util.stream.Stream; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -198,7 +202,8 @@ public class OsUpgraderTest { statusUpdater.maintain(); // All zones upgrade - for (var zone : List.of(zone1, zone2)) { + List<ZoneApi> zones = new ArrayList<>(List.of(zone1, zone2)); + for (var zone : zones) { osUpgrader.maintain(); completeUpgrade(version1, SystemApplication.tenantHost, zone); statusUpdater.maintain(); @@ -208,9 +213,17 @@ public class OsUpgraderTest { // Downgrade is triggered tester.controller().os().upgradeTo(version0, cloud, true, false); + // Zone order is reversed + Collections.reverse(zones); - // All zones downgrade, in reverse order - for (var zone : List.of(zone2, zone1)) { + // One host in first zone downgrades. Wanted version is not changed for second zone yet + osUpgrader.maintain(); + completeUpgrade(1, version0, SystemApplication.tenantHost, zones.get(0)); + osUpgrader.maintain(); + assertWanted(version1, SystemApplication.tenantHost, zones.get(1)); + + // All zones downgrade + for (var zone : zones) { osUpgrader.maintain(); completeUpgrade(version0, SystemApplication.tenantHost, zone); statusUpdater.maintain(); @@ -219,6 +232,55 @@ public class OsUpgraderTest { .allMatch(node -> node.currentVersion().equals(version0)), "All nodes on target version"); } + @Test + public void downgrade_os_partially() { + CloudName cloud = CloudName.from("cloud"); + ZoneApi zone1 = zone("dev.us-east-1", cloud); + ZoneApi zone2 = zone("prod.us-west-1", cloud); + UpgradePolicy upgradePolicy = UpgradePolicy.builder() + .upgrade(zone1) + .upgrade(zone2) + .build(); + OsUpgrader osUpgrader = osUpgrader(upgradePolicy, cloud, false); + + // Bootstrap system + tester.configServer().bootstrap(List.of(zone1.getId(), zone2.getId()), + List.of(SystemApplication.tenantHost)); + + // New OS version released + Version version0 = Version.fromString("1.0"); + Version version1 = Version.fromString("2.0"); + tester.controller().os().upgradeTo(version1, cloud, false, false); + statusUpdater.maintain(); + + // All zones upgrade + for (var zone : List.of(zone1, zone2)) { + osUpgrader.maintain(); + completeUpgrade(version1, SystemApplication.tenantHost, zone); + statusUpdater.maintain(); + } + assertTrue(tester.controller().os().status().nodesIn(cloud).stream() + .allMatch(node -> node.currentVersion().equals(version1)), "All nodes on target version"); + + // Downgrade is triggered + tester.controller().os().upgradeTo(version0, cloud, true, false); + + // All zones downgrade, in reverse order + for (var zone : List.of(zone2, zone1)) { + osUpgrader.maintain(); + // Partial downgrading happens, as this decision is left up to the zone. Downgrade target is still set in + // all zones as a best-effort, and to halt any further upgrades + completeUpgrade(1, version0, SystemApplication.tenantHost, zone); + statusUpdater.maintain(); + } + int zoneCount = 2; + Map<Version, Long> currentVersions = tester.controller().os().status().nodesIn(cloud).stream() + .collect(Collectors.groupingBy(NodeVersion::currentVersion, + Collectors.counting())); + assertEquals(1 * zoneCount, currentVersions.get(version0)); + assertEquals(2 * zoneCount, currentVersions.get(version1)); + } + private List<NodeVersion> nodesOn(Version version) { return tester.controller().os().status().versions().entrySet().stream() .filter(entry -> entry.getKey().version().equals(version)) |