From 14295d7f9618f7a80adbb155b0303a08d008d9d7 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Wed, 19 Jul 2023 15:41:54 +0200 Subject: OS downgrades --- .../integration/configserver/NodeRepository.java | 2 +- .../yahoo/vespa/hosted/controller/Controller.java | 42 ++++++++++----- .../controller/maintenance/OsUpgradeScheduler.java | 18 ++++++- .../hosted/controller/maintenance/OsUpgrader.java | 26 ++++++--- .../hosted/controller/persistence/CuratorDb.java | 3 +- .../persistence/OsVersionTargetSerializer.java | 11 +++- .../hosted/controller/restapi/os/OsApiHandler.java | 7 +-- .../controller/versions/OsVersionStatus.java | 15 +++--- .../controller/versions/OsVersionTarget.java | 4 +- .../controller/integration/NodeRepositoryMock.java | 2 +- .../maintenance/MetricsReporterTest.java | 4 +- .../maintenance/OsUpgradeSchedulerTest.java | 25 ++++++--- .../controller/maintenance/OsUpgraderTest.java | 61 ++++++++++++++++++---- .../maintenance/OsVersionStatusUpdaterTest.java | 4 +- .../persistence/OsVersionTargetSerializerTest.java | 11 ++-- .../hosted/controller/restapi/os/OsApiTest.java | 24 ++++++--- .../os/responses/versions-all-upgraded.json | 2 + .../os/responses/versions-partially-upgraded.json | 2 + 18 files changed, 192 insertions(+), 71 deletions(-) diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java index 485bf627c87..71003b9d86e 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java @@ -55,7 +55,7 @@ public interface NodeRepository { void upgrade(ZoneId zone, NodeType type, Version version, boolean allowDowngrade); /** Upgrade OS for all nodes of given type to a new version */ - void upgradeOs(ZoneId zone, NodeType type, Version version); + void upgradeOs(ZoneId zone, NodeType type, Version version, boolean allowDowngrade); /** Get target versions for upgrades in given zone */ TargetVersions targetVersionsOf(ZoneId zone); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java index f6bcbc9828b..7da67227095 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java @@ -243,13 +243,20 @@ public class Controller extends AbstractComponent { return curator.readOsVersionTargets(); } - /** Set the target OS version for given cloud in this system */ - public void upgradeOsIn(CloudName cloudName, Version version, boolean force) { + /** + * Set the target OS version for given cloud in this system. + * + * @param cloud The cloud to upgrade + * @param version The target OS version + * @param force Allow downgrades, and override pinned target (if any) + * @param pin Pin this version. This prevents automatic scheduling of upgrades until version is unpinned + */ + public void upgradeOsIn(CloudName cloud, Version version, boolean force, boolean pin) { if (version.isEmpty()) { throw new IllegalArgumentException("Invalid version '" + version.toFullString() + "'"); } - if (!clouds().contains(cloudName)) { - throw new IllegalArgumentException("Cloud '" + cloudName + "' does not exist in this system"); + if (!clouds().contains(cloud)) { + throw new IllegalArgumentException("Cloud '" + cloud + "' does not exist in this system"); } Instant scheduledAt = clock.instant(); try (Mutex lock = curator.lockOsVersions()) { @@ -257,19 +264,28 @@ public class Controller extends AbstractComponent { .collect(Collectors.toMap(t -> t.osVersion().cloud(), Function.identity())); - OsVersionTarget currentTarget = targets.get(cloudName); - if (!force && currentTarget != null) { - if (currentTarget.osVersion().version().isAfter(version)) { - throw new IllegalArgumentException("Cannot downgrade cloud '" + cloudName.value() + "' to version " + - version.toFullString()); + OsVersionTarget currentTarget = targets.get(cloud); + boolean downgrade = false; + if (currentTarget != null) { + boolean versionChange = !currentTarget.osVersion().version().equals(version); + downgrade = version.isBefore(currentTarget.osVersion().version()); + if (versionChange && currentTarget.pinned() && !force) { + throw new IllegalArgumentException("Cannot " + (downgrade ? "downgrade" : "upgrade") + " cloud " + + cloud.value() + "' to version " + version.toFullString() + + ": Current target is pinned. Add 'force' parameter to override"); + } + if (downgrade && !force) { + throw new IllegalArgumentException("Cannot downgrade cloud '" + cloud.value() + "' to version " + + version.toFullString() + ": Missing 'force' parameter"); } - if (currentTarget.osVersion().version().equals(version)) return; // Version unchanged + if (!versionChange && currentTarget.pinned() == pin) return; // No change } - OsVersionTarget newTarget = new OsVersionTarget(new OsVersion(version, cloudName), scheduledAt); - targets.put(cloudName, newTarget); + OsVersionTarget newTarget = new OsVersionTarget(new OsVersion(version, cloud), scheduledAt, pin, downgrade); + targets.put(cloud, newTarget); curator.writeOsVersionTargets(new TreeSet<>(targets.values())); - log.info("Triggered OS upgrade to " + version.toFullString() + " in cloud " + cloudName.value()); + log.info("Triggered OS " + (downgrade ? "downgrade" : "upgrade") + " to " + version.toFullString() + + " in cloud " + cloud.value()); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java index f0d218ae6cf..18e5b9388b3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java @@ -8,6 +8,7 @@ import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ArtifactRepository; import com.yahoo.vespa.hosted.controller.api.integration.deployment.OsRelease; import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget; +import com.yahoo.yolean.Exceptions; import java.time.DayOfWeek; import java.time.Duration; @@ -19,6 +20,8 @@ import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; import java.util.Objects; import java.util.Optional; +import java.util.logging.Level; +import java.util.logging.Logger; /** * Automatically schedule upgrades to the next OS version. @@ -27,6 +30,8 @@ import java.util.Optional; */ public class OsUpgradeScheduler extends ControllerMaintainer { + private static final Logger LOG = Logger.getLogger(OsUpgradeScheduler.class.getName()); + public OsUpgradeScheduler(Controller controller, Duration interval) { super(controller, interval); } @@ -34,13 +39,22 @@ public class OsUpgradeScheduler extends ControllerMaintainer { @Override protected double maintain() { Instant now = controller().clock().instant(); + int attempts = 0; + int failures = 0; for (var cloud : controller().clouds()) { Optional change = changeIn(cloud, now); if (change.isEmpty()) continue; if (!change.get().scheduleAt(now)) continue; - controller().upgradeOsIn(cloud, change.get().version(), false); + try { + attempts++; + controller().upgradeOsIn(cloud, change.get().version(), false, false); + } catch (IllegalArgumentException e) { + failures++; + LOG.log(Level.WARNING, "Failed to schedule OS upgrade: " + Exceptions.toMessageString(e) + + ". Retrying in " + interval()); + } } - return 0.0; + return asSuccessFactorDeviation(attempts, failures); } /** Returns the wanted change for cloud at given instant, if any */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java index b44643b4405..a2b4d95775e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java @@ -44,17 +44,18 @@ public class OsUpgrader extends InfrastructureUpgrader { @Override protected void upgrade(OsVersionTarget target, SystemApplication application, ZoneApi zone) { - log.info(Text.format("Upgrading OS of %s to version %s in %s in cloud %s", application.id(), - target.osVersion().version().toFullString(), - zone.getVirtualId(), zone.getCloudName())); + log.info(Text.format((target.downgrade() ? "Downgrading" : "Upgrading") + " OS of %s to version %s in %s in cloud %s", application.id(), + target.osVersion().version().toFullString(), + zone.getVirtualId(), zone.getCloudName())); controller().serviceRegistry().configServer().nodeRepository().upgradeOs(zone.getVirtualId(), application.nodeType(), - target.osVersion().version()); + target.osVersion().version(), + target.downgrade()); } @Override protected boolean convergedOn(OsVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) { Version currentVersion = versionOf(nodeSlice, zone, application, Node::currentOsVersion).orElse(target.osVersion().version()); - return !currentVersion.isBefore(target.osVersion().version()); + return satisfiedBy(currentVersion, target); } @Override @@ -66,10 +67,10 @@ public class OsUpgrader extends InfrastructureUpgrader { @Override protected Optional target() { - // Return target if we have nodes in this cloud on a lower version + // Return target if we have nodes in this cloud on the wrong version return controller().osVersionTarget(cloud) .filter(target -> controller().osVersionStatus().nodesIn(cloud).stream() - .anyMatch(node -> node.currentVersion().isBefore(target.osVersion().version()))); + .anyMatch(node -> !satisfiedBy(node.currentVersion(), target))); } @Override @@ -78,10 +79,19 @@ public class OsUpgrader extends InfrastructureUpgrader { return controller().serviceRegistry().configServer().nodeRepository() .targetVersionsOf(zone.getVirtualId()) .osVersion(application.nodeType()) - .map(currentTarget -> target.osVersion().version().isAfter(currentTarget)) + .map(currentVersion -> !satisfiedBy(currentVersion, target)) .orElse(true); } + private static boolean satisfiedBy(Version version, OsVersionTarget target) { + if (target.downgrade()) { + // When downgrading we want an exact version + return version.equals(target.osVersion().version()); + } + // Otherwise, matching or later version is fine + return !version.isBefore(target.osVersion().version()); + } + /** Returns whether node currently allows upgrades */ public static boolean canUpgrade(Node node, boolean includeDeferring) { return (includeDeferring || !node.deferOsUpgrade()) && upgradableNodeStates.contains(node.state()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index 89b4166e477..ae35306c783 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -63,6 +63,7 @@ import java.util.Map; import java.util.NavigableMap; import java.util.Optional; import java.util.Set; +import java.util.SortedSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; @@ -335,7 +336,7 @@ public class CuratorDb { // Infrastructure upgrades - public void writeOsVersionTargets(Set versions) { + public void writeOsVersionTargets(SortedSet versions) { curator.set(osVersionTargetsPath(), asJson(osVersionTargetSerializer.toSlime(versions))); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializer.java index c06a36d3a1d..a5e5d925865 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializer.java @@ -12,6 +12,7 @@ import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget; import java.time.Instant; import java.util.Collections; import java.util.Set; +import java.util.SortedSet; import java.util.TreeSet; /** @@ -25,12 +26,14 @@ public class OsVersionTargetSerializer { private static final String versionsField = "versions"; private static final String scheduledAtField = "scheduledAt"; + private static final String pinnedField = "pinned"; + private static final String downgradeField = "downgrade"; public OsVersionTargetSerializer(OsVersionSerializer osVersionSerializer) { this.osVersionSerializer = osVersionSerializer; } - public Slime toSlime(Set osVersionTargets) { + public Slime toSlime(SortedSet osVersionTargets) { Slime slime = new Slime(); Cursor root = slime.setObject(); Cursor array = root.setArray(versionsField); @@ -44,7 +47,9 @@ public class OsVersionTargetSerializer { array.traverse((ArrayTraverser) (i, inspector) -> { OsVersion osVersion = osVersionSerializer.fromSlime(inspector); Instant scheduledAt = SlimeUtils.instant(inspector.field(scheduledAtField)); - osVersionTargets.add(new OsVersionTarget(osVersion, scheduledAt)); + boolean pinned = inspector.field(pinnedField).asBool(); + boolean downgrade = inspector.field(downgradeField).asBool(); + osVersionTargets.add(new OsVersionTarget(osVersion, scheduledAt, pinned, downgrade)); }); return Collections.unmodifiableSet(osVersionTargets); } @@ -52,6 +57,8 @@ public class OsVersionTargetSerializer { private void toSlime(OsVersionTarget target, Cursor object) { osVersionSerializer.toSlime(target.osVersion(), object); object.setLong(scheduledAtField, target.scheduledAt().toEpochMilli()); + object.setBool(pinnedField, target.pinned()); + object.setBool(downgradeField, target.downgrade()); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java index cfd6ea3d6d9..dace7b7be8f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java @@ -39,7 +39,6 @@ import java.util.Optional; import java.util.Set; import java.util.StringJoiner; import java.util.function.Function; -import java.util.stream.Collectors; /** * This implements the /os/v1 API which provides operators with information about, and scheduling of OS upgrades for @@ -147,9 +146,10 @@ public class OsApiHandler extends AuditLoggingRequestHandler { } Version target = parseStringField("version", root, Version::fromString); boolean force = root.field("force").asBool(); - controller.upgradeOsIn(cloud, target, force); + boolean pin = root.field("pin").asBool(); + controller.upgradeOsIn(cloud, target, force, pin); return new MessageResponse("Set target OS version for cloud '" + cloud.value() + "' to " + - target.toFullString()); + target.toFullString() + (pin ? " (pinned)" : "")); } private Slime osVersions() { @@ -167,6 +167,7 @@ public class OsApiHandler extends AuditLoggingRequestHandler { target.ifPresent(t -> { currentVersionObject.setString("upgradeBudget", Duration.ZERO.toString()); currentVersionObject.setLong("scheduledAt", t.scheduledAt().toEpochMilli()); + currentVersionObject.setBool("pinned", t.pinned()); Optional nextChange = osUpgradeScheduler.changeIn(t.osVersion().cloud(), now); nextChange.ifPresent(c -> { currentVersionObject.setString("nextVersion", c.version().toFullString()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java index 6f9888b79e0..a06f26b1fdf 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java @@ -14,6 +14,7 @@ import com.yahoo.vespa.hosted.controller.maintenance.OsUpgrader; import java.time.Instant; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -36,13 +37,15 @@ public record OsVersionStatus(Map> versions) { this.versions = ImmutableMap.copyOf(Objects.requireNonNull(versions, "versions must be non-null")); } - /** Returns nodes eligible for OS upgrades that exist in given cloud */ + /** Returns all node versions that exist in given cloud */ public List nodesIn(CloudName cloud) { - return versions.entrySet().stream() - .filter(entry -> entry.getKey().cloud().equals(cloud)) - .map(Map.Entry::getValue) - .findFirst() - .orElseGet(List::of); + List nodeVersions = new ArrayList<>(); + versions.forEach((osVersion, nodesOnVersion) -> { + if (osVersion.cloud().equals(cloud)) { + nodeVersions.addAll(nodesOnVersion); + } + }); + return Collections.unmodifiableList(nodeVersions); } /** Returns versions that exist in given cloud */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionTarget.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionTarget.java index 471670ec399..e9785216376 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionTarget.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionTarget.java @@ -11,7 +11,7 @@ import java.util.Objects; * * @author mpolden */ -public record OsVersionTarget(OsVersion osVersion, Instant scheduledAt) implements VersionTarget, Comparable { +public record OsVersionTarget(OsVersion osVersion, Instant scheduledAt, boolean pinned, boolean downgrade) implements VersionTarget, Comparable { public OsVersionTarget { Objects.requireNonNull(osVersion); @@ -30,7 +30,7 @@ public record OsVersionTarget(OsVersion osVersion, Instant scheduledAt) implemen @Override public boolean downgrade() { - return false; // Not supported by this target type + return downgrade; } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java index 7004028c072..5b70942bfd1 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java @@ -169,7 +169,7 @@ public class NodeRepositoryMock implements NodeRepository { } @Override - public void upgradeOs(ZoneId zone, NodeType type, Version version) { + public void upgradeOs(ZoneId zone, NodeType type, Version version, boolean downgrade) { this.targetVersions.compute(zone, (ignored, targetVersions) -> { if (targetVersions == null) { targetVersions = TargetVersions.EMPTY; diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java index 869a94c0449..49e647904cd 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java @@ -365,7 +365,7 @@ public class MetricsReporterTest { // All nodes upgrade to initial OS version var version0 = Version.fromString("8.0"); - tester.controller().upgradeOsIn(cloud, version0, false); + tester.controller().upgradeOsIn(cloud, version0, false, false); osUpgrader.maintain(); tester.configServer().setOsVersion(version0, SystemApplication.tenantHost.id(), zone); tester.configServer().setOsVersion(version0, SystemApplication.configServerHost.id(), zone); @@ -379,7 +379,7 @@ public class MetricsReporterTest { var currentVersion = i == 0 ? version0 : targets.get(i - 1); var nextVersion = targets.get(i); // System starts upgrading to next OS version - tester.controller().upgradeOsIn(cloud, nextVersion, false); + tester.controller().upgradeOsIn(cloud, nextVersion, false, false); runAll(osUpgrader, statusUpdater, reporter); assertOsChangeDuration(Duration.ZERO, hosts); assertOsNodeCount(hosts.size(), currentVersion); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java index 899745c7a39..4cec38c456e 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java @@ -47,7 +47,7 @@ public class OsUpgradeSchedulerTest { // Target is set manually Version version0 = Version.fromString("7.0.0.20220101"); - tester.controller().upgradeOsIn(cloud, version0, false); + tester.controller().upgradeOsIn(cloud, version0, false, false); // Target remains unchanged as it hasn't expired yet for (var interval : List.of(Duration.ZERO, Duration.ofDays(30))) { @@ -56,7 +56,8 @@ public class OsUpgradeSchedulerTest { assertEquals(version0, tester.controller().osVersionTarget(cloud).get().osVersion().version()); } - // New release becomes available, but is not triggered until cool-down period has passed + // New release becomes available, but is not triggered until cool-down period has passed, and we're inside a + // trigger period Version version1 = Version.fromString("7.0.0.20220301"); tester.clock().advance(Duration.ofDays(16)); assertEquals("2022-03-03T09:05:00", formatInstant(tester.clock().instant())); @@ -65,8 +66,6 @@ public class OsUpgradeSchedulerTest { assertEquals(version0, tester.controller().osVersionTarget(cloud).get().osVersion().version(), "Target is unchanged because cooldown hasn't passed"); - - // ... and we're inside the trigger period tester.clock().advance(Duration.ofDays(3).plusHours(18)); assertEquals("2022-03-07T03:05:00", formatInstant(tester.clock().instant())); scheduler.maintain(); @@ -75,8 +74,18 @@ public class OsUpgradeSchedulerTest { "Target is unchanged because we're outside trigger period"); tester.clock().advance(Duration.ofHours(5)); assertEquals("2022-03-07T08:05:00", formatInstant(tester.clock().instant())); + + // Time constraints have now passed, but the current target has been pinned in the meantime + tester.controller().upgradeOsIn(cloud, version0, false, true); Optional change = scheduler.changeIn(cloud, tester.clock().instant()); assertTrue(change.isPresent()); + assertEquals(-1, scheduler.maintain()); + assertEquals(version0, + tester.controller().osVersionTarget(cloud).get().osVersion().version(), + "Target is unchanged because it's pinned"); + + // Target is unpinned and new version is allowed to be scheduled + tester.controller().upgradeOsIn(cloud, version0, false, false); scheduler.maintain(); assertEquals(version1, tester.controller().osVersionTarget(cloud).get().osVersion().version(), @@ -106,7 +115,7 @@ public class OsUpgradeSchedulerTest { // Set initial target Version version0 = Version.fromString("7.0.0.20220101"); - tester.controller().upgradeOsIn(cloud, version0, false); + tester.controller().upgradeOsIn(cloud, version0, false, false); // Next version is triggered Version version1 = Version.fromString("7.0.0.20220301"); @@ -137,7 +146,7 @@ public class OsUpgradeSchedulerTest { // Set initial target CloudName cloud = tester.controller().clouds().iterator().next(); Version version0 = Version.fromString("8.0"); - tester.controller().upgradeOsIn(cloud, version0, false); + tester.controller().upgradeOsIn(cloud, version0, false, false); // Stable release (tagged outside trigger period) is scheduled once trigger period opens Version version1 = Version.fromString("8.1"); @@ -151,7 +160,7 @@ public class OsUpgradeSchedulerTest { // A newer version is triggered manually Version version3 = Version.fromString("8.3"); - tester.controller().upgradeOsIn(cloud, version3, false); + tester.controller().upgradeOsIn(cloud, version3, false, false); // Nothing happens in next iteration as tagged release is older than manually triggered version scheduleUpgradeAfter(Duration.ofDays(7), version3, scheduler, tester); @@ -168,7 +177,7 @@ public class OsUpgradeSchedulerTest { // Set initial target CloudName cloud = tester.controller().clouds().iterator().next(); Version version0 = Version.fromString("8.0"); - tester.controller().upgradeOsIn(cloud, version0, false); + tester.controller().upgradeOsIn(cloud, version0, false, false); // Latest release is not scheduled immediately Version version1 = Version.fromString("8.1"); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java index adfb0286202..cc9df6b38e6 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java @@ -22,7 +22,6 @@ import java.util.Collection; import java.util.List; import java.util.function.Function; import java.util.function.UnaryOperator; -import java.util.stream.Collectors; import java.util.stream.Stream; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -72,8 +71,8 @@ public class OsUpgraderTest { // New OS version released Version version1 = Version.fromString("7.1"); - tester.controller().upgradeOsIn(cloud1, Version.fromString("7.0"), false); - tester.controller().upgradeOsIn(cloud1, version1, false); + tester.controller().upgradeOsIn(cloud1, Version.fromString("7.0"), false, false); + tester.controller().upgradeOsIn(cloud1, version1, false, false); assertEquals(1, tester.controller().osVersionTargets().size()); // Only allows one version per cloud statusUpdater.maintain(); @@ -151,8 +150,8 @@ public class OsUpgraderTest { // New OS version released Version version = Version.fromString("7.1"); - tester.controller().upgradeOsIn(cloud, Version.fromString("7.0"), false); - tester.controller().upgradeOsIn(cloud, version, false); // Replaces existing target + tester.controller().upgradeOsIn(cloud, Version.fromString("7.0"), false, false); + tester.controller().upgradeOsIn(cloud, version, false, false); // Replaces existing target statusUpdater.maintain(); // zone 1 upgrades @@ -177,6 +176,49 @@ public class OsUpgraderTest { .noneMatch(node -> node.currentVersion().isBefore(version)), "All nodes on target version or newer"); } + @Test + public void downgrade_os() { + CloudName cloud = CloudName.from("cloud"); + ZoneApi zone1 = zone("dev.us-east-1", cloud); + ZoneApi zone2 = zone("prod.us-west-1", cloud); + UpgradePolicy upgradePolicy = UpgradePolicy.builder() + .upgrade(zone1) + .upgrade(zone2) + .build(); + OsUpgrader osUpgrader = osUpgrader(upgradePolicy, cloud, true); + + // Bootstrap system + tester.configServer().bootstrap(List.of(zone1.getId(), zone2.getId()), + List.of(SystemApplication.tenantHost)); + + // New OS version released + Version version0 = Version.fromString("1.0"); + Version version1 = Version.fromString("2.0"); + tester.controller().upgradeOsIn(cloud, version1, false, false); + statusUpdater.maintain(); + + // All zones upgrade + for (var zone : List.of(zone1, zone2)) { + osUpgrader.maintain(); + completeUpgrade(version1, SystemApplication.tenantHost, zone); + statusUpdater.maintain(); + } + assertTrue(tester.controller().osVersionStatus().nodesIn(cloud).stream() + .allMatch(node -> node.currentVersion().equals(version1)), "All nodes on target version"); + + // Downgrade is triggered + tester.controller().upgradeOsIn(cloud, version0, true, false); + + // All zones downgrade, in reverse order + for (var zone : List.of(zone2, zone1)) { + osUpgrader.maintain(); + completeUpgrade(version0, SystemApplication.tenantHost, zone); + statusUpdater.maintain(); + } + assertTrue(tester.controller().osVersionStatus().nodesIn(cloud).stream() + .allMatch(node -> node.currentVersion().equals(version0)), "All nodes on target version"); + } + private List nodesOn(Version version) { return tester.controller().osVersionStatus().versions().entrySet().stream() .filter(entry -> entry.getKey().version().equals(version)) @@ -241,22 +283,23 @@ public class OsUpgraderTest { completeUpgrade(nodeCount, version, version, application, zones); } - private void completeUpgrade(int nodeCount, Version wantedVersion, Version version, SystemApplication application, ZoneApi... zones) { + private void completeUpgrade(int nodeCount, Version wantedVersion, Version currentVersion, SystemApplication application, ZoneApi... zones) { assertWanted(wantedVersion, application, zones); for (ZoneApi zone : zones) { int nodesUpgraded = 0; List nodes = nodesRequiredToUpgrade(zone, application); for (Node node : nodes) { if (node.currentVersion().equals(wantedVersion)) continue; - nodeRepository().putNodes(zone.getVirtualId(), Node.builder(node).wantedOsVersion(version) - .currentOsVersion(version) + nodeRepository().putNodes(zone.getVirtualId(), Node.builder(node) + .wantedOsVersion(currentVersion) + .currentOsVersion(currentVersion) .build()); if (++nodesUpgraded == nodeCount) { break; } } if (nodesUpgraded == nodes.size()) { - assertCurrent(version, application, zone); + assertCurrent(currentVersion, application, zone); } } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java index d4d146f0834..92be999e508 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java @@ -40,7 +40,7 @@ public class OsVersionStatusUpdaterTest { // Setting a new target adds it to current status Version version1 = Version.fromString("7.1"); CloudName cloud = CloudName.DEFAULT; - tester.controller().upgradeOsIn(cloud, version1, false); + tester.controller().upgradeOsIn(cloud, version1, false, false); statusUpdater.maintain(); var osVersions = tester.controller().osVersionStatus().versions(); @@ -49,7 +49,7 @@ public class OsVersionStatusUpdaterTest { assertTrue(osVersions.get(new OsVersion(version1, cloud)).isEmpty(), "No nodes on current target"); CloudName otherCloud = CloudName.AWS; - tester.controller().upgradeOsIn(otherCloud, version1, false); + tester.controller().upgradeOsIn(otherCloud, version1, false, false); statusUpdater.maintain(); osVersions = tester.controller().osVersionStatus().versions(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializerTest.java index 55d940ca6f9..7bec217c889 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializerTest.java @@ -1,7 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.persistence; -import com.google.common.collect.ImmutableSet; import com.yahoo.component.Version; import com.yahoo.config.provision.CloudName; import com.yahoo.vespa.hosted.controller.versions.OsVersion; @@ -10,6 +9,8 @@ import org.junit.jupiter.api.Test; import java.time.Instant; import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -21,10 +22,10 @@ public class OsVersionTargetSerializerTest { @Test void serialization() { OsVersionTargetSerializer serializer = new OsVersionTargetSerializer(new OsVersionSerializer()); - Set targets = ImmutableSet.of( - new OsVersionTarget(new OsVersion(Version.fromString("7.1"), CloudName.DEFAULT), Instant.ofEpochMilli(123)), - new OsVersionTarget(new OsVersion(Version.fromString("7.1"), CloudName.from("foo")), Instant.ofEpochMilli(456)) - ); + SortedSet targets = new TreeSet<>(); + targets.add(new OsVersionTarget(new OsVersion(Version.fromString("7.1"), CloudName.DEFAULT), Instant.ofEpochMilli(123), false, false)); + targets.add(new OsVersionTarget(new OsVersion(Version.fromString("7.1"), CloudName.from("foo")), Instant.ofEpochMilli(456), true, true)); + Set serialized = serializer.fromSlime(serializer.toSlime(targets)); assertEquals(targets, serialized); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java index 1067db31473..d6edb90d149 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java @@ -96,12 +96,20 @@ public class OsApiTest extends ControllerContainerTest { assertFile(new Request("http://localhost:8080/os/v1/"), "versions-all-upgraded.json"); // Downgrade with force is permitted - assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.1\", \"cloud\": \"cloud1\", \"force\": true}", Request.Method.PATCH), - "{\"message\":\"Set target OS version for cloud 'cloud1' to 7.5.1\"}", 200); + assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"8.2.0\", \"cloud\": \"cloud2\", \"force\": true}", Request.Method.PATCH), + "{\"message\":\"Set target OS version for cloud 'cloud2' to 8.2.0\"}", 200); // Clear target for a given cloud - assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": null, \"cloud\": \"cloud2\"}", Request.Method.PATCH), - "{\"message\":\"Cleared target OS version for cloud 'cloud2'\"}", 200); + assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": null, \"cloud\": \"cloud1\"}", Request.Method.PATCH), + "{\"message\":\"Cleared target OS version for cloud 'cloud1'\"}", 200); + + // Pin/unpin a version + assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.2\", \"cloud\": \"cloud1\", \"pin\": true}", Request.Method.PATCH), + "{\"message\":\"Set target OS version for cloud 'cloud1' to 7.5.2 (pinned)\"}", 200); + assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.2\", \"cloud\": \"cloud1\", \"pin\": false}", Request.Method.PATCH), + "{\"message\":\"Set target OS version for cloud 'cloud1' to 7.5.2\"}", 200); + assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.2\", \"cloud\": \"cloud1\", \"pin\": true}", Request.Method.PATCH), + "{\"message\":\"Set target OS version for cloud 'cloud1' to 7.5.2 (pinned)\"}", 200); // Error: Missing fields assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.6\"}", Request.Method.PATCH), @@ -120,8 +128,12 @@ public class OsApiTest extends ControllerContainerTest { "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cloud 'foo' does not exist in this system\"}", 400); // Error: Downgrade OS - assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.4.1\", \"cloud\": \"cloud1\"}", Request.Method.PATCH), - "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cannot downgrade cloud 'cloud1' to version 7.4.1\"}", 400); + assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.4.1\", \"cloud\": \"cloud2\"}", Request.Method.PATCH), + "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cannot downgrade cloud 'cloud2' to version 7.4.1: Missing 'force' parameter\"}", 400); + + // Error: Change a pinned version + assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.3\", \"cloud\": \"cloud1\"}", Request.Method.PATCH), + "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cannot upgrade cloud cloud1' to version 7.5.3: Current target is pinned. Add 'force' parameter to override\"}", 400); // Request firmware checks in all zones. assertResponse(new Request("http://localhost:8080/os/v1/firmware/", "", Request.Method.POST), diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json index 92a2cad86f1..0f2e05986b6 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json @@ -5,6 +5,7 @@ "targetVersion": true, "upgradeBudget": "PT0S", "scheduledAt": 1234, + "pinned": false, "cloud": "cloud1", "nodes": [ { @@ -104,6 +105,7 @@ "targetVersion": true, "upgradeBudget": "PT0S", "scheduledAt": 1234, + "pinned": false, "nextVersion": "8.2.1.20211228", "nextScheduledAt": 1640743200000, "cloud": "cloud2", diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-partially-upgraded.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-partially-upgraded.json index 662ff4bb373..20d7147a258 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-partially-upgraded.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-partially-upgraded.json @@ -57,6 +57,7 @@ "targetVersion": true, "upgradeBudget": "PT0S", "scheduledAt": 1234, + "pinned": false, "cloud": "cloud1", "nodes": [ { @@ -163,6 +164,7 @@ "targetVersion": true, "upgradeBudget": "PT0S", "scheduledAt": 1234, + "pinned": false, "nextVersion": "8.2.1.20211228", "nextScheduledAt": 1640743200000, "cloud": "cloud2", -- cgit v1.2.3