aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-07-20 10:44:49 +0200
committerGitHub <noreply@github.com>2023-07-20 10:44:49 +0200
commit6cf6de060d9852fbf7c8cd7e7e5291456a24ed46 (patch)
treef13032f7c14031e3c2afa7c556a19dc136e90ade
parentcc7d61d8bab95d7b7225712e94758861596946ae (diff)
parent05b89413eb8a47a24b8e16255d68b42f0f5549b3 (diff)
Merge pull request #27840 from vespa-engine/mpolden/os-downgrade
OS downgrades at system level
-rw-r--r--controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java87
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java129
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/RoutingController.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java22
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java30
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializer.java11
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java13
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java17
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionTarget.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java43
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java75
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java10
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializerTest.java11
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java26
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-partially-upgraded.json2
24 files changed, 333 insertions, 172 deletions
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java
index 485bf627c87..71003b9d86e 100644
--- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java
+++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java
@@ -55,7 +55,7 @@ public interface NodeRepository {
void upgrade(ZoneId zone, NodeType type, Version version, boolean allowDowngrade);
/** Upgrade OS for all nodes of given type to a new version */
- void upgradeOs(ZoneId zone, NodeType type, Version version);
+ void upgradeOs(ZoneId zone, NodeType type, Version version, boolean allowDowngrade);
/** Get target versions for upgrades in given zone */
TargetVersions targetVersionsOf(ZoneId zone);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
index 54dcfa46188..bac2c0ab9d7 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
@@ -126,7 +126,7 @@ import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toSet;
/**
- * A singleton owned by the Controller which contains the methods and state for controlling applications.
+ * A singleton owned by {@link Controller} which contains the methods and state for controlling applications.
*
* @author bratseth
*/
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
index f6bcbc9828b..6cbcc64cf33 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
@@ -30,9 +30,6 @@ import com.yahoo.vespa.hosted.controller.persistence.JobControlFlags;
import com.yahoo.vespa.hosted.controller.restapi.dataplanetoken.DataplaneTokenService;
import com.yahoo.vespa.hosted.controller.security.AccessControl;
import com.yahoo.vespa.hosted.controller.support.access.SupportAccessControl;
-import com.yahoo.vespa.hosted.controller.versions.OsVersion;
-import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus;
-import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
import com.yahoo.vespa.hosted.controller.versions.VersionStatus;
import com.yahoo.vespa.hosted.controller.versions.VespaVersion;
import com.yahoo.vespa.hosted.rotation.config.RotationsConfig;
@@ -40,15 +37,12 @@ import com.yahoo.yolean.concurrent.Sleeper;
import java.security.SecureRandom;
import java.time.Clock;
-import java.time.Instant;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
-import java.util.TreeSet;
-import java.util.function.Function;
import java.util.function.Predicate;
import java.util.logging.Logger;
import java.util.stream.Collectors;
@@ -85,6 +79,7 @@ public class Controller extends AbstractComponent {
private final MavenRepository mavenRepository;
private final Metric metric;
private final RoutingController routingController;
+ private final OsController osController;
private final ControllerConfig controllerConfig;
private final SecretStore secretStore;
private final CuratorArchiveBucketDb archiveBucketDb;
@@ -132,6 +127,7 @@ public class Controller extends AbstractComponent {
applicationController = new ApplicationController(this, curator, accessControl, clock, flagSource, serviceRegistry.billingController());
tenantController = new TenantController(this, curator, accessControl);
routingController = new RoutingController(this, rotationsConfig);
+ osController = new OsController(this);
auditLogger = new AuditLogger(curator, clock);
jobControl = new JobControl(new JobControlFlags(curator, flagSource));
archiveBucketDb = new CuratorArchiveBucketDb(this);
@@ -161,6 +157,11 @@ public class Controller extends AbstractComponent {
return routingController;
}
+ /** Returns the instance controlling OS upgrades */
+ public OsController os() {
+ return osController;
+ }
+
/** Returns the service registry of this */
public ServiceRegistry serviceRegistry() {
return serviceRegistry;
@@ -232,80 +233,6 @@ public class Controller extends AbstractComponent {
.orElse(Vtag.currentVersion);
}
- /** Returns the target OS version for infrastructure in this system. The controller will drive infrastructure OS
- * upgrades to this version */
- public Optional<OsVersionTarget> osVersionTarget(CloudName cloud) {
- return osVersionTargets().stream().filter(target -> target.osVersion().cloud().equals(cloud)).findFirst();
- }
-
- /** Returns all target OS versions in this system */
- public Set<OsVersionTarget> osVersionTargets() {
- return curator.readOsVersionTargets();
- }
-
- /** Set the target OS version for given cloud in this system */
- public void upgradeOsIn(CloudName cloudName, Version version, boolean force) {
- if (version.isEmpty()) {
- throw new IllegalArgumentException("Invalid version '" + version.toFullString() + "'");
- }
- if (!clouds().contains(cloudName)) {
- throw new IllegalArgumentException("Cloud '" + cloudName + "' does not exist in this system");
- }
- Instant scheduledAt = clock.instant();
- try (Mutex lock = curator.lockOsVersions()) {
- Map<CloudName, OsVersionTarget> targets = curator.readOsVersionTargets().stream()
- .collect(Collectors.toMap(t -> t.osVersion().cloud(),
- Function.identity()));
-
- OsVersionTarget currentTarget = targets.get(cloudName);
- if (!force && currentTarget != null) {
- if (currentTarget.osVersion().version().isAfter(version)) {
- throw new IllegalArgumentException("Cannot downgrade cloud '" + cloudName.value() + "' to version " +
- version.toFullString());
- }
- if (currentTarget.osVersion().version().equals(version)) return; // Version unchanged
- }
-
- OsVersionTarget newTarget = new OsVersionTarget(new OsVersion(version, cloudName), scheduledAt);
- targets.put(cloudName, newTarget);
- curator.writeOsVersionTargets(new TreeSet<>(targets.values()));
- log.info("Triggered OS upgrade to " + version.toFullString() + " in cloud " + cloudName.value());
- }
- }
-
- /** Clear the target OS version for given cloud in this system */
- public void cancelOsUpgradeIn(CloudName cloudName) {
- try (Mutex lock = curator.lockOsVersions()) {
- Map<CloudName, OsVersionTarget> targets = curator.readOsVersionTargets().stream()
- .collect(Collectors.toMap(t -> t.osVersion().cloud(),
- Function.identity()));
- if (targets.remove(cloudName) == null) {
- throw new IllegalArgumentException("Cloud '" + cloudName.value() + " has no OS upgrade target");
- }
- curator.writeOsVersionTargets(new TreeSet<>(targets.values()));
- }
- }
-
- /** Returns the current OS version status */
- public OsVersionStatus osVersionStatus() {
- return curator.readOsVersionStatus();
- }
-
- /** Replace the current OS version status with a new one */
- public void updateOsVersionStatus(OsVersionStatus newStatus) {
- try (Mutex lock = curator.lockOsVersionStatus()) {
- OsVersionStatus currentStatus = curator.readOsVersionStatus();
- for (CloudName cloud : clouds()) {
- Set<Version> newVersions = newStatus.versionsIn(cloud);
- if (currentStatus.versionsIn(cloud).size() > 1 && newVersions.size() == 1) {
- log.info("All nodes in " + cloud + " cloud upgraded to OS version " +
- newVersions.iterator().next().toFullString());
- }
- }
- curator.writeOsVersionStatus(newStatus);
- }
- }
-
/** Returns the hostname of this controller */
public HostName hostname() {
return serviceRegistry.getHostname();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java
new file mode 100644
index 00000000000..9d480c57c7a
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java
@@ -0,0 +1,129 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.controller;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.CloudName;
+import com.yahoo.transaction.Mutex;
+import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
+import com.yahoo.vespa.hosted.controller.versions.OsVersion;
+import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus;
+import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
+
+import java.time.Instant;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.function.Function;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+/**
+ * A singleton owned by {@link Controller} which contains the methods and state for controlling OS upgrades.
+ *
+ * @author mpolden
+ */
+public record OsController(Controller controller) {
+
+ private static final Logger LOG = Logger.getLogger(OsController.class.getName());
+
+ public OsController {
+ Objects.requireNonNull(controller);
+ }
+
+ /** Returns the target OS version for infrastructure in this system. The controller will drive infrastructure OS
+ * upgrades to this version */
+ public Optional<OsVersionTarget> target(CloudName cloud) {
+ return targets().stream().filter(target -> target.osVersion().cloud().equals(cloud)).findFirst();
+ }
+
+ /** Returns all target OS versions in this system */
+ public Set<OsVersionTarget> targets() {
+ return curator().readOsVersionTargets();
+ }
+
+ /**
+ * Set the target OS version for given cloud in this system.
+ *
+ * @param version The target OS version
+ * @param cloud The cloud to upgrade
+ * @param force Allow downgrades, and override pinned target (if any)
+ * @param pin Pin this version. This prevents automatic scheduling of upgrades until version is unpinned
+ */
+ public void upgradeTo(Version version, CloudName cloud, boolean force, boolean pin) {
+ if (version.isEmpty()) {
+ throw new IllegalArgumentException("Invalid version '" + version.toFullString() + "'");
+ }
+ if (!controller.clouds().contains(cloud)) {
+ throw new IllegalArgumentException("Cloud '" + cloud + "' does not exist in this system");
+ }
+ Instant scheduledAt = controller.clock().instant();
+ try (Mutex lock = curator().lockOsVersions()) {
+ Map<CloudName, OsVersionTarget> targets = curator().readOsVersionTargets().stream()
+ .collect(Collectors.toMap(t -> t.osVersion().cloud(),
+ Function.identity()));
+
+ OsVersionTarget currentTarget = targets.get(cloud);
+ boolean downgrade = false;
+ if (currentTarget != null) {
+ boolean versionChange = !currentTarget.osVersion().version().equals(version);
+ downgrade = version.isBefore(currentTarget.osVersion().version());
+ if (versionChange && currentTarget.pinned() && !force) {
+ throw new IllegalArgumentException("Cannot " + (downgrade ? "downgrade" : "upgrade") + " cloud " +
+ cloud.value() + "' to version " + version.toFullString() +
+ ": Current target is pinned. Add 'force' parameter to override");
+ }
+ if (downgrade && !force) {
+ throw new IllegalArgumentException("Cannot downgrade cloud '" + cloud.value() + "' to version " +
+ version.toFullString() + ": Missing 'force' parameter");
+ }
+ if (!versionChange && currentTarget.pinned() == pin) return; // No change
+ }
+
+ OsVersionTarget newTarget = new OsVersionTarget(new OsVersion(version, cloud), scheduledAt, pin, downgrade);
+ targets.put(cloud, newTarget);
+ curator().writeOsVersionTargets(new TreeSet<>(targets.values()));
+ LOG.info("Triggered OS " + (downgrade ? "downgrade" : "upgrade") + " to " + version.toFullString() +
+ " in cloud " + cloud.value());
+ }
+ }
+
+ /** Clear the target OS version for given cloud in this system */
+ public void cancelUpgrade(CloudName cloudName) {
+ try (Mutex lock = curator().lockOsVersions()) {
+ Map<CloudName, OsVersionTarget> targets = curator().readOsVersionTargets().stream()
+ .collect(Collectors.toMap(t -> t.osVersion().cloud(),
+ Function.identity()));
+ if (targets.remove(cloudName) == null) {
+ throw new IllegalArgumentException("Cloud '" + cloudName.value() + " has no OS upgrade target");
+ }
+ curator().writeOsVersionTargets(new TreeSet<>(targets.values()));
+ }
+ }
+
+ /** Returns the current OS version status */
+ public OsVersionStatus status() {
+ return curator().readOsVersionStatus();
+ }
+
+ /** Replace the current OS version status with a new one */
+ public void updateStatus(OsVersionStatus newStatus) {
+ try (Mutex lock = curator().lockOsVersionStatus()) {
+ OsVersionStatus currentStatus = curator().readOsVersionStatus();
+ for (CloudName cloud : controller.clouds()) {
+ Set<Version> newVersions = newStatus.versionsIn(cloud);
+ if (currentStatus.versionsIn(cloud).size() > 1 && newVersions.size() == 1) {
+ LOG.info("All nodes in " + cloud + " cloud upgraded to OS version " +
+ newVersions.iterator().next().toFullString());
+ }
+ }
+ curator().writeOsVersionStatus(newStatus);
+ }
+ }
+
+ private CuratorDb curator() {
+ return controller.curator();
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/RoutingController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/RoutingController.java
index a635d512054..3d550973f22 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/RoutingController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/RoutingController.java
@@ -66,8 +66,8 @@ import java.util.stream.Collectors;
import static java.util.stream.Collectors.toMap;
/**
- * The routing controller encapsulates state and methods for inspecting and manipulating deployment endpoints in a
- * hosted Vespa system.
+ * The routing controller is owned by {@link Controller} and encapsulates state and methods for inspecting and
+ * manipulating deployment endpoints in a hosted Vespa system.
*
* The one-stop shop for all your routing needs!
*
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java
index b6a645d96d2..bf2f2ab90eb 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java
@@ -27,7 +27,7 @@ import java.util.logging.Level;
import java.util.logging.Logger;
/**
- * A singleton owned by the Controller which contains the methods and state for controlling tenants.
+ * A singleton owned by the {@link Controller} which contains the methods and state for controlling tenants.
*
* @author bratseth
* @author mpolden
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
index 67188eb5e3a..98de84216e0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
@@ -276,7 +276,7 @@ public class MetricsReporter extends ControllerMaintainer {
}
private Map<NodeVersion, Duration> osChangeDurations() {
- return changeDurations(controller().osVersionStatus().versions().values(), Function.identity());
+ return changeDurations(controller().os().status().versions().values(), Function.identity());
}
private <V> Map<NodeVersion, Duration> changeDurations(Collection<V> versions, Function<V, List<NodeVersion>> versionsGetter) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
index f0d218ae6cf..ac76ecf4b2a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
@@ -8,6 +8,7 @@ import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.ArtifactRepository;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.OsRelease;
import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
+import com.yahoo.yolean.Exceptions;
import java.time.DayOfWeek;
import java.time.Duration;
@@ -19,6 +20,8 @@ import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.Objects;
import java.util.Optional;
+import java.util.logging.Level;
+import java.util.logging.Logger;
/**
* Automatically schedule upgrades to the next OS version.
@@ -27,6 +30,8 @@ import java.util.Optional;
*/
public class OsUpgradeScheduler extends ControllerMaintainer {
+ private static final Logger LOG = Logger.getLogger(OsUpgradeScheduler.class.getName());
+
public OsUpgradeScheduler(Controller controller, Duration interval) {
super(controller, interval);
}
@@ -34,18 +39,27 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
@Override
protected double maintain() {
Instant now = controller().clock().instant();
+ int attempts = 0;
+ int failures = 0;
for (var cloud : controller().clouds()) {
Optional<Change> change = changeIn(cloud, now);
if (change.isEmpty()) continue;
if (!change.get().scheduleAt(now)) continue;
- controller().upgradeOsIn(cloud, change.get().version(), false);
+ try {
+ attempts++;
+ controller().os().upgradeTo(change.get().version(), cloud, false, false);
+ } catch (IllegalArgumentException e) {
+ failures++;
+ LOG.log(Level.WARNING, "Failed to schedule OS upgrade: " + Exceptions.toMessageString(e) +
+ ". Retrying in " + interval());
+ }
}
- return 0.0;
+ return asSuccessFactorDeviation(attempts, failures);
}
/** Returns the wanted change for cloud at given instant, if any */
public Optional<Change> changeIn(CloudName cloud, Instant instant) {
- Optional<OsVersionTarget> currentTarget = controller().osVersionTarget(cloud);
+ Optional<OsVersionTarget> currentTarget = controller().os().target(cloud);
if (currentTarget.isEmpty()) return Optional.empty();
if (upgradingToNewMajor(cloud)) return Optional.empty(); // Skip further upgrades until major version upgrade is complete
@@ -54,7 +68,7 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
private boolean upgradingToNewMajor(CloudName cloud) {
- return controller().osVersionStatus().versionsIn(cloud).stream()
+ return controller().os().status().versionsIn(cloud).stream()
.filter(version -> !version.isEmpty()) // Ignore empty/unknown versions
.map(Version::getMajor)
.distinct()
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
index b44643b4405..4df40850cc9 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
@@ -44,17 +44,18 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> {
@Override
protected void upgrade(OsVersionTarget target, SystemApplication application, ZoneApi zone) {
- log.info(Text.format("Upgrading OS of %s to version %s in %s in cloud %s", application.id(),
- target.osVersion().version().toFullString(),
- zone.getVirtualId(), zone.getCloudName()));
+ log.info(Text.format((target.downgrade() ? "Downgrading" : "Upgrading") + " OS of %s to version %s in %s in cloud %s", application.id(),
+ target.osVersion().version().toFullString(),
+ zone.getVirtualId(), zone.getCloudName()));
controller().serviceRegistry().configServer().nodeRepository().upgradeOs(zone.getVirtualId(), application.nodeType(),
- target.osVersion().version());
+ target.osVersion().version(),
+ target.downgrade());
}
@Override
protected boolean convergedOn(OsVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) {
Version currentVersion = versionOf(nodeSlice, zone, application, Node::currentOsVersion).orElse(target.osVersion().version());
- return !currentVersion.isBefore(target.osVersion().version());
+ return satisfiedBy(currentVersion, target);
}
@Override
@@ -66,10 +67,10 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> {
@Override
protected Optional<OsVersionTarget> target() {
- // Return target if we have nodes in this cloud on a lower version
- return controller().osVersionTarget(cloud)
- .filter(target -> controller().osVersionStatus().nodesIn(cloud).stream()
- .anyMatch(node -> node.currentVersion().isBefore(target.osVersion().version())));
+ // Return target if we have nodes in this cloud on the wrong version
+ return controller().os().target(cloud)
+ .filter(target -> controller().os().status().nodesIn(cloud).stream()
+ .anyMatch(node -> !satisfiedBy(node.currentVersion(), target)));
}
@Override
@@ -78,10 +79,19 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> {
return controller().serviceRegistry().configServer().nodeRepository()
.targetVersionsOf(zone.getVirtualId())
.osVersion(application.nodeType())
- .map(currentTarget -> target.osVersion().version().isAfter(currentTarget))
+ .map(currentVersion -> !satisfiedBy(currentVersion, target))
.orElse(true);
}
+ private static boolean satisfiedBy(Version version, OsVersionTarget target) {
+ if (target.downgrade()) {
+ // When downgrading we want an exact version
+ return version.equals(target.osVersion().version());
+ }
+ // Otherwise, matching or later version is fine
+ return !version.isBefore(target.osVersion().version());
+ }
+
/** Returns whether node currently allows upgrades */
public static boolean canUpgrade(Node node, boolean includeDeferring) {
return (includeDeferring || !node.deferOsUpgrade()) && upgradableNodeStates.contains(node.state());
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
index c643df6af68..831b4275422 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
@@ -21,7 +21,7 @@ public class OsVersionStatusUpdater extends ControllerMaintainer {
protected double maintain() {
try {
OsVersionStatus newStatus = OsVersionStatus.compute(controller());
- controller().updateOsVersionStatus(newStatus);
+ controller().os().updateStatus(newStatus);
return 0.0;
} catch (Exception e) {
log.log(Level.WARNING, "Failed to compute OS version status: " + Exceptions.toMessageString(e) +
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
index 89b4166e477..ae35306c783 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
@@ -63,6 +63,7 @@ import java.util.Map;
import java.util.NavigableMap;
import java.util.Optional;
import java.util.Set;
+import java.util.SortedSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
@@ -335,7 +336,7 @@ public class CuratorDb {
// Infrastructure upgrades
- public void writeOsVersionTargets(Set<OsVersionTarget> versions) {
+ public void writeOsVersionTargets(SortedSet<OsVersionTarget> versions) {
curator.set(osVersionTargetsPath(), asJson(osVersionTargetSerializer.toSlime(versions)));
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializer.java
index c06a36d3a1d..a5e5d925865 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializer.java
@@ -12,6 +12,7 @@ import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
import java.time.Instant;
import java.util.Collections;
import java.util.Set;
+import java.util.SortedSet;
import java.util.TreeSet;
/**
@@ -25,12 +26,14 @@ public class OsVersionTargetSerializer {
private static final String versionsField = "versions";
private static final String scheduledAtField = "scheduledAt";
+ private static final String pinnedField = "pinned";
+ private static final String downgradeField = "downgrade";
public OsVersionTargetSerializer(OsVersionSerializer osVersionSerializer) {
this.osVersionSerializer = osVersionSerializer;
}
- public Slime toSlime(Set<OsVersionTarget> osVersionTargets) {
+ public Slime toSlime(SortedSet<OsVersionTarget> osVersionTargets) {
Slime slime = new Slime();
Cursor root = slime.setObject();
Cursor array = root.setArray(versionsField);
@@ -44,7 +47,9 @@ public class OsVersionTargetSerializer {
array.traverse((ArrayTraverser) (i, inspector) -> {
OsVersion osVersion = osVersionSerializer.fromSlime(inspector);
Instant scheduledAt = SlimeUtils.instant(inspector.field(scheduledAtField));
- osVersionTargets.add(new OsVersionTarget(osVersion, scheduledAt));
+ boolean pinned = inspector.field(pinnedField).asBool();
+ boolean downgrade = inspector.field(downgradeField).asBool();
+ osVersionTargets.add(new OsVersionTarget(osVersion, scheduledAt, pinned, downgrade));
});
return Collections.unmodifiableSet(osVersionTargets);
}
@@ -52,6 +57,8 @@ public class OsVersionTargetSerializer {
private void toSlime(OsVersionTarget target, Cursor object) {
osVersionSerializer.toSlime(target.osVersion(), object);
object.setLong(scheduledAtField, target.scheduledAt().toEpochMilli());
+ object.setBool(pinnedField, target.pinned());
+ object.setBool(downgradeField, target.downgrade());
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
index cfd6ea3d6d9..0fa2dc492c2 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
@@ -39,7 +39,6 @@ import java.util.Optional;
import java.util.Set;
import java.util.StringJoiner;
import java.util.function.Function;
-import java.util.stream.Collectors;
/**
* This implements the /os/v1 API which provides operators with information about, and scheduling of OS upgrades for
@@ -142,24 +141,25 @@ public class OsApiHandler extends AuditLoggingRequestHandler {
Inspector root = requestData.get();
CloudName cloud = parseStringField("cloud", root, CloudName::from);
if (requireField("version", root).type() == Type.NIX) {
- controller.cancelOsUpgradeIn(cloud);
+ controller.os().cancelUpgrade(cloud);
return new MessageResponse("Cleared target OS version for cloud '" + cloud.value() + "'");
}
Version target = parseStringField("version", root, Version::fromString);
boolean force = root.field("force").asBool();
- controller.upgradeOsIn(cloud, target, force);
+ boolean pin = root.field("pin").asBool();
+ controller.os().upgradeTo(target, cloud, force, pin);
return new MessageResponse("Set target OS version for cloud '" + cloud.value() + "' to " +
- target.toFullString());
+ target.toFullString() + (pin ? " (pinned)" : ""));
}
private Slime osVersions() {
Slime slime = new Slime();
Cursor root = slime.setObject();
- Set<OsVersionTarget> targets = controller.osVersionTargets();
+ Set<OsVersionTarget> targets = controller.os().targets();
Cursor versions = root.setArray("versions");
Instant now = controller.clock().instant();
- controller.osVersionStatus().versions().forEach((osVersion, nodeVersions) -> {
+ controller.os().status().versions().forEach((osVersion, nodeVersions) -> {
Cursor currentVersionObject = versions.addObject();
currentVersionObject.setString("version", osVersion.version().toFullString());
Optional<OsVersionTarget> target = targets.stream().filter(t -> t.osVersion().equals(osVersion)).findFirst();
@@ -167,6 +167,7 @@ public class OsApiHandler extends AuditLoggingRequestHandler {
target.ifPresent(t -> {
currentVersionObject.setString("upgradeBudget", Duration.ZERO.toString());
currentVersionObject.setLong("scheduledAt", t.scheduledAt().toEpochMilli());
+ currentVersionObject.setBool("pinned", t.pinned());
Optional<Change> nextChange = osUpgradeScheduler.changeIn(t.osVersion().cloud(), now);
nextChange.ifPresent(c -> {
currentVersionObject.setString("nextVersion", c.version().toFullString());
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java
index 6f9888b79e0..f90cee65058 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java
@@ -14,6 +14,7 @@ import com.yahoo.vespa.hosted.controller.maintenance.OsUpgrader;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -36,13 +37,15 @@ public record OsVersionStatus(Map<OsVersion, List<NodeVersion>> versions) {
this.versions = ImmutableMap.copyOf(Objects.requireNonNull(versions, "versions must be non-null"));
}
- /** Returns nodes eligible for OS upgrades that exist in given cloud */
+ /** Returns all node versions that exist in given cloud */
public List<NodeVersion> nodesIn(CloudName cloud) {
- return versions.entrySet().stream()
- .filter(entry -> entry.getKey().cloud().equals(cloud))
- .map(Map.Entry::getValue)
- .findFirst()
- .orElseGet(List::of);
+ List<NodeVersion> nodeVersions = new ArrayList<>();
+ versions.forEach((osVersion, nodesOnVersion) -> {
+ if (osVersion.cloud().equals(cloud)) {
+ nodeVersions.addAll(nodesOnVersion);
+ }
+ });
+ return Collections.unmodifiableList(nodeVersions);
}
/** Returns versions that exist in given cloud */
@@ -56,7 +59,7 @@ public record OsVersionStatus(Map<OsVersion, List<NodeVersion>> versions) {
/** Compute the current OS versions in this system. This is expensive and should be called infrequently */
public static OsVersionStatus compute(Controller controller) {
Map<OsVersion, List<NodeVersion>> osVersions = new HashMap<>();
- controller.osVersionTargets().forEach(target -> osVersions.put(target.osVersion(), new ArrayList<>()));
+ controller.os().targets().forEach(target -> osVersions.put(target.osVersion(), new ArrayList<>()));
for (var application : SystemApplication.all()) {
for (var zone : zonesToUpgrade(controller)) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionTarget.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionTarget.java
index 471670ec399..e9785216376 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionTarget.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionTarget.java
@@ -11,7 +11,7 @@ import java.util.Objects;
*
* @author mpolden
*/
-public record OsVersionTarget(OsVersion osVersion, Instant scheduledAt) implements VersionTarget, Comparable<OsVersionTarget> {
+public record OsVersionTarget(OsVersion osVersion, Instant scheduledAt, boolean pinned, boolean downgrade) implements VersionTarget, Comparable<OsVersionTarget> {
public OsVersionTarget {
Objects.requireNonNull(osVersion);
@@ -30,7 +30,7 @@ public record OsVersionTarget(OsVersion osVersion, Instant scheduledAt) implemen
@Override
public boolean downgrade() {
- return false; // Not supported by this target type
+ return downgrade;
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
index 7004028c072..5b70942bfd1 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
@@ -169,7 +169,7 @@ public class NodeRepositoryMock implements NodeRepository {
}
@Override
- public void upgradeOs(ZoneId zone, NodeType type, Version version) {
+ public void upgradeOs(ZoneId zone, NodeType type, Version version, boolean downgrade) {
this.targetVersions.compute(zone, (ignored, targetVersions) -> {
if (targetVersions == null) {
targetVersions = TargetVersions.EMPTY;
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java
index 869a94c0449..53f2e85ad31 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java
@@ -365,7 +365,7 @@ public class MetricsReporterTest {
// All nodes upgrade to initial OS version
var version0 = Version.fromString("8.0");
- tester.controller().upgradeOsIn(cloud, version0, false);
+ tester.controller().os().upgradeTo(version0, cloud, false, false);
osUpgrader.maintain();
tester.configServer().setOsVersion(version0, SystemApplication.tenantHost.id(), zone);
tester.configServer().setOsVersion(version0, SystemApplication.configServerHost.id(), zone);
@@ -379,7 +379,7 @@ public class MetricsReporterTest {
var currentVersion = i == 0 ? version0 : targets.get(i - 1);
var nextVersion = targets.get(i);
// System starts upgrading to next OS version
- tester.controller().upgradeOsIn(cloud, nextVersion, false);
+ tester.controller().os().upgradeTo(nextVersion, cloud, false, false);
runAll(osUpgrader, statusUpdater, reporter);
assertOsChangeDuration(Duration.ZERO, hosts);
assertOsNodeCount(hosts.size(), currentVersion);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
index 899745c7a39..27cdd847fb9 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
@@ -43,49 +43,58 @@ public class OsUpgradeSchedulerTest {
// Initial run does nothing as the cloud does not have a target
scheduler.maintain();
- assertTrue(tester.controller().osVersionTarget(cloud).isEmpty(), "No target set");
+ assertTrue(tester.controller().os().target(cloud).isEmpty(), "No target set");
// Target is set manually
Version version0 = Version.fromString("7.0.0.20220101");
- tester.controller().upgradeOsIn(cloud, version0, false);
+ tester.controller().os().upgradeTo(version0, cloud, false, false);
// Target remains unchanged as it hasn't expired yet
for (var interval : List.of(Duration.ZERO, Duration.ofDays(30))) {
tester.clock().advance(interval);
scheduler.maintain();
- assertEquals(version0, tester.controller().osVersionTarget(cloud).get().osVersion().version());
+ assertEquals(version0, tester.controller().os().target(cloud).get().osVersion().version());
}
- // New release becomes available, but is not triggered until cool-down period has passed
+ // New release becomes available, but is not triggered until cool-down period has passed, and we're inside a
+ // trigger period
Version version1 = Version.fromString("7.0.0.20220301");
tester.clock().advance(Duration.ofDays(16));
assertEquals("2022-03-03T09:05:00", formatInstant(tester.clock().instant()));
assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().version());
scheduler.maintain();
assertEquals(version0,
- tester.controller().osVersionTarget(cloud).get().osVersion().version(),
+ tester.controller().os().target(cloud).get().osVersion().version(),
"Target is unchanged because cooldown hasn't passed");
-
- // ... and we're inside the trigger period
tester.clock().advance(Duration.ofDays(3).plusHours(18));
assertEquals("2022-03-07T03:05:00", formatInstant(tester.clock().instant()));
scheduler.maintain();
assertEquals(version0,
- tester.controller().osVersionTarget(cloud).get().osVersion().version(),
+ tester.controller().os().target(cloud).get().osVersion().version(),
"Target is unchanged because we're outside trigger period");
tester.clock().advance(Duration.ofHours(5));
assertEquals("2022-03-07T08:05:00", formatInstant(tester.clock().instant()));
+
+ // Time constraints have now passed, but the current target has been pinned in the meantime
+ tester.controller().os().upgradeTo(version0, cloud, false, true);
Optional<OsUpgradeScheduler.Change> change = scheduler.changeIn(cloud, tester.clock().instant());
assertTrue(change.isPresent());
+ assertEquals(-1, scheduler.maintain());
+ assertEquals(version0,
+ tester.controller().os().target(cloud).get().osVersion().version(),
+ "Target is unchanged because it's pinned");
+
+ // Target is unpinned and new version is allowed to be scheduled
+ tester.controller().os().upgradeTo(version0, cloud, false, false);
scheduler.maintain();
assertEquals(version1,
- tester.controller().osVersionTarget(cloud).get().osVersion().version(),
+ tester.controller().os().target(cloud).get().osVersion().version(),
"New target set");
// A few more days pass and target remains unchanged
tester.clock().advance(Duration.ofDays(2));
scheduler.maintain();
- assertEquals(version1, tester.controller().osVersionTarget(cloud).get().osVersion().version());
+ assertEquals(version1, tester.controller().os().target(cloud).get().osVersion().version());
// Estimate next change
Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant());
@@ -106,19 +115,19 @@ public class OsUpgradeSchedulerTest {
// Set initial target
Version version0 = Version.fromString("7.0.0.20220101");
- tester.controller().upgradeOsIn(cloud, version0, false);
+ tester.controller().os().upgradeTo(version0, cloud, false, false);
// Next version is triggered
Version version1 = Version.fromString("7.0.0.20220301");
tester.clock().advance(Duration.ofDays(44));
assertEquals("2022-03-01T02:05:00", formatInstant(tester.clock().instant()));
scheduler.maintain();
- assertEquals(version0, tester.controller().osVersionTarget(cloud).get().osVersion().version());
+ assertEquals(version0, tester.controller().os().target(cloud).get().osVersion().version());
// Cool-down passes
tester.clock().advance(Duration.ofDays(1));
assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().version());
scheduler.maintain();
- assertEquals(version1, tester.controller().osVersionTarget(cloud).get().osVersion().version());
+ assertEquals(version1, tester.controller().os().target(cloud).get().osVersion().version());
// Estimate next change
Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant());
@@ -137,7 +146,7 @@ public class OsUpgradeSchedulerTest {
// Set initial target
CloudName cloud = tester.controller().clouds().iterator().next();
Version version0 = Version.fromString("8.0");
- tester.controller().upgradeOsIn(cloud, version0, false);
+ tester.controller().os().upgradeTo(version0, cloud, false, false);
// Stable release (tagged outside trigger period) is scheduled once trigger period opens
Version version1 = Version.fromString("8.1");
@@ -151,7 +160,7 @@ public class OsUpgradeSchedulerTest {
// A newer version is triggered manually
Version version3 = Version.fromString("8.3");
- tester.controller().upgradeOsIn(cloud, version3, false);
+ tester.controller().os().upgradeTo(version3, cloud, false, false);
// Nothing happens in next iteration as tagged release is older than manually triggered version
scheduleUpgradeAfter(Duration.ofDays(7), version3, scheduler, tester);
@@ -168,7 +177,7 @@ public class OsUpgradeSchedulerTest {
// Set initial target
CloudName cloud = tester.controller().clouds().iterator().next();
Version version0 = Version.fromString("8.0");
- tester.controller().upgradeOsIn(cloud, version0, false);
+ tester.controller().os().upgradeTo(version0, cloud, false, false);
// Latest release is not scheduled immediately
Version version1 = Version.fromString("8.1");
@@ -205,7 +214,7 @@ public class OsUpgradeSchedulerTest {
tester.clock().advance(duration);
scheduler.maintain();
CloudName cloud = tester.controller().clouds().iterator().next();
- OsVersionTarget target = tester.controller().osVersionTarget(cloud).get();
+ OsVersionTarget target = tester.controller().os().target(cloud).get();
assertEquals(version, target.osVersion().version());
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java
index adfb0286202..056db4f119c 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java
@@ -22,7 +22,6 @@ import java.util.Collection;
import java.util.List;
import java.util.function.Function;
import java.util.function.UnaryOperator;
-import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -72,9 +71,9 @@ public class OsUpgraderTest {
// New OS version released
Version version1 = Version.fromString("7.1");
- tester.controller().upgradeOsIn(cloud1, Version.fromString("7.0"), false);
- tester.controller().upgradeOsIn(cloud1, version1, false);
- assertEquals(1, tester.controller().osVersionTargets().size()); // Only allows one version per cloud
+ tester.controller().os().upgradeTo(Version.fromString("7.0"), cloud1, false, false);
+ tester.controller().os().upgradeTo(version1, cloud1, false, false);
+ assertEquals(1, tester.controller().os().targets().size()); // Only allows one version per cloud
statusUpdater.maintain();
// zone 0: controllers upgrade first
@@ -128,9 +127,9 @@ public class OsUpgraderTest {
osUpgrader.maintain();
assertWanted(version1, SystemApplication.tenantHost, zone1, zone2, zone3, zone4);
statusUpdater.maintain();
- assertTrue(tester.controller().osVersionStatus().nodesIn(cloud1).stream()
- .filter(node -> !node.hostname().equals(nodeDeferringOsUpgrade.hostname()))
- .allMatch(node -> node.currentVersion().equals(version1)),
+ assertTrue(tester.controller().os().status().nodesIn(cloud1).stream()
+ .filter(node -> !node.hostname().equals(nodeDeferringOsUpgrade.hostname()))
+ .allMatch(node -> node.currentVersion().equals(version1)),
"All non-deferring nodes are on target version");
}
@@ -151,8 +150,8 @@ public class OsUpgraderTest {
// New OS version released
Version version = Version.fromString("7.1");
- tester.controller().upgradeOsIn(cloud, Version.fromString("7.0"), false);
- tester.controller().upgradeOsIn(cloud, version, false); // Replaces existing target
+ tester.controller().os().upgradeTo(Version.fromString("7.0"), cloud, false, false);
+ tester.controller().os().upgradeTo(version, cloud, false, false); // Replaces existing target
statusUpdater.maintain();
// zone 1 upgrades
@@ -173,12 +172,55 @@ public class OsUpgraderTest {
// No more upgrades
osUpgrader.maintain();
assertWanted(version, SystemApplication.tenantHost, zone1, zone2);
- assertTrue(tester.controller().osVersionStatus().nodesIn(cloud).stream()
- .noneMatch(node -> node.currentVersion().isBefore(version)), "All nodes on target version or newer");
+ assertTrue(tester.controller().os().status().nodesIn(cloud).stream()
+ .noneMatch(node -> node.currentVersion().isBefore(version)), "All nodes on target version or newer");
+ }
+
+ @Test
+ public void downgrade_os() {
+ CloudName cloud = CloudName.from("cloud");
+ ZoneApi zone1 = zone("dev.us-east-1", cloud);
+ ZoneApi zone2 = zone("prod.us-west-1", cloud);
+ UpgradePolicy upgradePolicy = UpgradePolicy.builder()
+ .upgrade(zone1)
+ .upgrade(zone2)
+ .build();
+ OsUpgrader osUpgrader = osUpgrader(upgradePolicy, cloud, true);
+
+ // Bootstrap system
+ tester.configServer().bootstrap(List.of(zone1.getId(), zone2.getId()),
+ List.of(SystemApplication.tenantHost));
+
+ // New OS version released
+ Version version0 = Version.fromString("1.0");
+ Version version1 = Version.fromString("2.0");
+ tester.controller().os().upgradeTo(version1, cloud, false, false);
+ statusUpdater.maintain();
+
+ // All zones upgrade
+ for (var zone : List.of(zone1, zone2)) {
+ osUpgrader.maintain();
+ completeUpgrade(version1, SystemApplication.tenantHost, zone);
+ statusUpdater.maintain();
+ }
+ assertTrue(tester.controller().os().status().nodesIn(cloud).stream()
+ .allMatch(node -> node.currentVersion().equals(version1)), "All nodes on target version");
+
+ // Downgrade is triggered
+ tester.controller().os().upgradeTo(version0, cloud, true, false);
+
+ // All zones downgrade, in reverse order
+ for (var zone : List.of(zone2, zone1)) {
+ osUpgrader.maintain();
+ completeUpgrade(version0, SystemApplication.tenantHost, zone);
+ statusUpdater.maintain();
+ }
+ assertTrue(tester.controller().os().status().nodesIn(cloud).stream()
+ .allMatch(node -> node.currentVersion().equals(version0)), "All nodes on target version");
}
private List<NodeVersion> nodesOn(Version version) {
- return tester.controller().osVersionStatus().versions().entrySet().stream()
+ return tester.controller().os().status().versions().entrySet().stream()
.filter(entry -> entry.getKey().version().equals(version))
.flatMap(entry -> entry.getValue().stream())
.toList();
@@ -241,22 +283,23 @@ public class OsUpgraderTest {
completeUpgrade(nodeCount, version, version, application, zones);
}
- private void completeUpgrade(int nodeCount, Version wantedVersion, Version version, SystemApplication application, ZoneApi... zones) {
+ private void completeUpgrade(int nodeCount, Version wantedVersion, Version currentVersion, SystemApplication application, ZoneApi... zones) {
assertWanted(wantedVersion, application, zones);
for (ZoneApi zone : zones) {
int nodesUpgraded = 0;
List<Node> nodes = nodesRequiredToUpgrade(zone, application);
for (Node node : nodes) {
if (node.currentVersion().equals(wantedVersion)) continue;
- nodeRepository().putNodes(zone.getVirtualId(), Node.builder(node).wantedOsVersion(version)
- .currentOsVersion(version)
+ nodeRepository().putNodes(zone.getVirtualId(), Node.builder(node)
+ .wantedOsVersion(currentVersion)
+ .currentOsVersion(currentVersion)
.build());
if (++nodesUpgraded == nodeCount) {
break;
}
}
if (nodesUpgraded == nodes.size()) {
- assertCurrent(version, application, zone);
+ assertCurrent(currentVersion, application, zone);
}
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java
index d4d146f0834..f45c7bfcdfb 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java
@@ -35,24 +35,24 @@ public class OsVersionStatusUpdaterTest {
tester.zoneRegistry().setOsUpgradePolicy(CloudName.DEFAULT, upgradePolicy.build());
// Initially empty
- assertSame(OsVersionStatus.empty, tester.controller().osVersionStatus());
+ assertSame(OsVersionStatus.empty, tester.controller().os().status());
// Setting a new target adds it to current status
Version version1 = Version.fromString("7.1");
CloudName cloud = CloudName.DEFAULT;
- tester.controller().upgradeOsIn(cloud, version1, false);
+ tester.controller().os().upgradeTo(version1, cloud, false, false);
statusUpdater.maintain();
- var osVersions = tester.controller().osVersionStatus().versions();
+ var osVersions = tester.controller().os().status().versions();
assertEquals(3, osVersions.size());
assertFalse(osVersions.get(new OsVersion(Version.emptyVersion, cloud)).isEmpty(), "All nodes on unknown version");
assertTrue(osVersions.get(new OsVersion(version1, cloud)).isEmpty(), "No nodes on current target");
CloudName otherCloud = CloudName.AWS;
- tester.controller().upgradeOsIn(otherCloud, version1, false);
+ tester.controller().os().upgradeTo(version1, otherCloud, false, false);
statusUpdater.maintain();
- osVersions = tester.controller().osVersionStatus().versions();
+ osVersions = tester.controller().os().status().versions();
assertEquals(4, osVersions.size()); // 2 in cloud, 2 in otherCloud.
assertFalse(osVersions.get(new OsVersion(Version.emptyVersion, cloud)).isEmpty(), "All nodes on unknown version");
assertTrue(osVersions.get(new OsVersion(version1, cloud)).isEmpty(), "No nodes on current target");
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializerTest.java
index 55d940ca6f9..7bec217c889 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/OsVersionTargetSerializerTest.java
@@ -1,7 +1,6 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.persistence;
-import com.google.common.collect.ImmutableSet;
import com.yahoo.component.Version;
import com.yahoo.config.provision.CloudName;
import com.yahoo.vespa.hosted.controller.versions.OsVersion;
@@ -10,6 +9,8 @@ import org.junit.jupiter.api.Test;
import java.time.Instant;
import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -21,10 +22,10 @@ public class OsVersionTargetSerializerTest {
@Test
void serialization() {
OsVersionTargetSerializer serializer = new OsVersionTargetSerializer(new OsVersionSerializer());
- Set<OsVersionTarget> targets = ImmutableSet.of(
- new OsVersionTarget(new OsVersion(Version.fromString("7.1"), CloudName.DEFAULT), Instant.ofEpochMilli(123)),
- new OsVersionTarget(new OsVersion(Version.fromString("7.1"), CloudName.from("foo")), Instant.ofEpochMilli(456))
- );
+ SortedSet<OsVersionTarget> targets = new TreeSet<>();
+ targets.add(new OsVersionTarget(new OsVersion(Version.fromString("7.1"), CloudName.DEFAULT), Instant.ofEpochMilli(123), false, false));
+ targets.add(new OsVersionTarget(new OsVersion(Version.fromString("7.1"), CloudName.from("foo")), Instant.ofEpochMilli(456), true, true));
+
Set<OsVersionTarget> serialized = serializer.fromSlime(serializer.toSlime(targets));
assertEquals(targets, serialized);
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java
index 1067db31473..e569e0aca5b 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java
@@ -96,12 +96,20 @@ public class OsApiTest extends ControllerContainerTest {
assertFile(new Request("http://localhost:8080/os/v1/"), "versions-all-upgraded.json");
// Downgrade with force is permitted
- assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.1\", \"cloud\": \"cloud1\", \"force\": true}", Request.Method.PATCH),
- "{\"message\":\"Set target OS version for cloud 'cloud1' to 7.5.1\"}", 200);
+ assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"8.2.0\", \"cloud\": \"cloud2\", \"force\": true}", Request.Method.PATCH),
+ "{\"message\":\"Set target OS version for cloud 'cloud2' to 8.2.0\"}", 200);
// Clear target for a given cloud
- assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": null, \"cloud\": \"cloud2\"}", Request.Method.PATCH),
- "{\"message\":\"Cleared target OS version for cloud 'cloud2'\"}", 200);
+ assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": null, \"cloud\": \"cloud1\"}", Request.Method.PATCH),
+ "{\"message\":\"Cleared target OS version for cloud 'cloud1'\"}", 200);
+
+ // Pin/unpin a version
+ assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.2\", \"cloud\": \"cloud1\", \"pin\": true}", Request.Method.PATCH),
+ "{\"message\":\"Set target OS version for cloud 'cloud1' to 7.5.2 (pinned)\"}", 200);
+ assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.2\", \"cloud\": \"cloud1\", \"pin\": false}", Request.Method.PATCH),
+ "{\"message\":\"Set target OS version for cloud 'cloud1' to 7.5.2\"}", 200);
+ assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.2\", \"cloud\": \"cloud1\", \"pin\": true}", Request.Method.PATCH),
+ "{\"message\":\"Set target OS version for cloud 'cloud1' to 7.5.2 (pinned)\"}", 200);
// Error: Missing fields
assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.6\"}", Request.Method.PATCH),
@@ -120,8 +128,12 @@ public class OsApiTest extends ControllerContainerTest {
"{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cloud 'foo' does not exist in this system\"}", 400);
// Error: Downgrade OS
- assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.4.1\", \"cloud\": \"cloud1\"}", Request.Method.PATCH),
- "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cannot downgrade cloud 'cloud1' to version 7.4.1\"}", 400);
+ assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.4.1\", \"cloud\": \"cloud2\"}", Request.Method.PATCH),
+ "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cannot downgrade cloud 'cloud2' to version 7.4.1: Missing 'force' parameter\"}", 400);
+
+ // Error: Change a pinned version
+ assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.3\", \"cloud\": \"cloud1\"}", Request.Method.PATCH),
+ "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cannot upgrade cloud cloud1' to version 7.5.3: Current target is pinned. Add 'force' parameter to override\"}", 400);
// Request firmware checks in all zones.
assertResponse(new Request("http://localhost:8080/os/v1/firmware/", "", Request.Method.POST),
@@ -148,7 +160,7 @@ public class OsApiTest extends ControllerContainerTest {
}
private void updateVersionStatus() {
- tester.controller().updateOsVersionStatus(OsVersionStatus.compute(tester.controller()));
+ tester.controller().os().updateStatus(OsVersionStatus.compute(tester.controller()));
}
private void completeUpgrade(ZoneId... zones) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json
index 92a2cad86f1..0f2e05986b6 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json
@@ -5,6 +5,7 @@
"targetVersion": true,
"upgradeBudget": "PT0S",
"scheduledAt": 1234,
+ "pinned": false,
"cloud": "cloud1",
"nodes": [
{
@@ -104,6 +105,7 @@
"targetVersion": true,
"upgradeBudget": "PT0S",
"scheduledAt": 1234,
+ "pinned": false,
"nextVersion": "8.2.1.20211228",
"nextScheduledAt": 1640743200000,
"cloud": "cloud2",
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-partially-upgraded.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-partially-upgraded.json
index 662ff4bb373..20d7147a258 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-partially-upgraded.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-partially-upgraded.json
@@ -57,6 +57,7 @@
"targetVersion": true,
"upgradeBudget": "PT0S",
"scheduledAt": 1234,
+ "pinned": false,
"cloud": "cloud1",
"nodes": [
{
@@ -163,6 +164,7 @@
"targetVersion": true,
"upgradeBudget": "PT0S",
"scheduledAt": 1234,
+ "pinned": false,
"nextVersion": "8.2.1.20211228",
"nextScheduledAt": 1640743200000,
"cloud": "cloud2",