summaryrefslogtreecommitdiffstats
path: root/controller-server/src
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-07-24 15:35:45 +0200
committerMartin Polden <mpolden@mpolden.no>2023-07-25 11:12:37 +0200
commitcff56da7d0aa232fb73ba3685c349b87fe26a749 (patch)
treec9c8a53021f4df3143c6781fc39b9f17b8902789 /controller-server/src
parent3798bad2b4549620d9b49a5994bf73f723007a42 (diff)
Require certification of scheduled OS upgrades
Diffstat (limited to 'controller-server/src')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java71
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java32
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java1
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CertifiedOsVersionSerializer.java50
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java20
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java35
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/CertifiedOsVersion.java19
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java36
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java26
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/CertifiedOsVersionSerializerTest.java30
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java9
11 files changed, 304 insertions, 25 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java
index 9d480c57c7a..1c77efe095c 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java
@@ -5,11 +5,13 @@ import com.yahoo.component.Version;
import com.yahoo.config.provision.CloudName;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
+import com.yahoo.vespa.hosted.controller.versions.CertifiedOsVersion;
import com.yahoo.vespa.hosted.controller.versions.OsVersion;
import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus;
import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
import java.time.Instant;
+import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
@@ -55,9 +57,7 @@ public record OsController(Controller controller) {
if (version.isEmpty()) {
throw new IllegalArgumentException("Invalid version '" + version.toFullString() + "'");
}
- if (!controller.clouds().contains(cloud)) {
- throw new IllegalArgumentException("Cloud '" + cloud + "' does not exist in this system");
- }
+ requireCloud(cloud);
Instant scheduledAt = controller.clock().instant();
try (Mutex lock = curator().lockOsVersions()) {
Map<CloudName, OsVersionTarget> targets = curator().readOsVersionTargets().stream()
@@ -122,6 +122,71 @@ public record OsController(Controller controller) {
}
}
+ /** Certify an OS version as compatible with given Vespa version */
+ public CertifiedOsVersion certify(Version version, CloudName cloud, Version vespaVersion) {
+ requireCloud(cloud);
+ try (Mutex lock = curator().lockCertifiedOsVersions()) {
+ OsVersion osVersion = new OsVersion(version, cloud);
+ Set<CertifiedOsVersion> certifiedVersions = curator().readCertifiedOsVersions();
+ Optional<CertifiedOsVersion> matching = certifiedVersions.stream()
+ .filter(cv -> cv.osVersion().equals(osVersion))
+ .findFirst();
+ if (matching.isPresent()) {
+ return matching.get();
+ }
+ certifiedVersions = new HashSet<>(certifiedVersions);
+ certifiedVersions.add(new CertifiedOsVersion(osVersion, vespaVersion));
+ curator().writeCertifiedOsVersions(certifiedVersions);
+ return new CertifiedOsVersion(osVersion, vespaVersion);
+ }
+ }
+
+ /** Revoke certification of an OS version */
+ public void uncertify(Version version, CloudName cloud) {
+ try (Mutex lock = curator().lockCertifiedOsVersions()) {
+ OsVersion osVersion = new OsVersion(version, cloud);
+ Set<CertifiedOsVersion> certifiedVersions = curator().readCertifiedOsVersions();
+ Optional<CertifiedOsVersion> existing = certifiedVersions.stream()
+ .filter(cv -> cv.osVersion().equals(osVersion))
+ .findFirst();
+ if (existing.isEmpty()) {
+ throw new IllegalArgumentException(version + " is not certified");
+ }
+ certifiedVersions = new HashSet<>(certifiedVersions);
+ certifiedVersions.remove(existing.get());
+ curator().writeCertifiedOsVersions(certifiedVersions);
+ }
+ }
+
+ /** Remove certifications for non-existent OS versions */
+ public void removeStaleCertifications(OsVersionStatus currentStatus) {
+ try (Mutex lock = curator().lockCertifiedOsVersions()) {
+ Set<OsVersion> knownVersions = currentStatus.versions().keySet();
+ Set<CertifiedOsVersion> certifiedVersions = new HashSet<>(curator().readCertifiedOsVersions());
+ if (certifiedVersions.removeIf(cv -> !knownVersions.contains(cv.osVersion()))) {
+ curator().writeCertifiedOsVersions(certifiedVersions);
+ }
+ }
+ }
+
+ /** Returns whether given OS version is certified as compatible with the current system version */
+ public boolean certified(OsVersion osVersion) {
+ if (controller.system().isCd()) return true; // Always certified (this is the system doing the certifying)
+
+ Version systemVersion = controller.readSystemVersion();
+ return controller.curator().readCertifiedOsVersions().stream()
+ .anyMatch(certifiedOsVersion -> certifiedOsVersion.osVersion().equals(osVersion) &&
+ // A later system version is fine, as we don't guarantee that
+ // an OS upgrade will always coincide with a Vespa release
+ !certifiedOsVersion.vespaVersion().isAfter(systemVersion));
+ }
+
+ private void requireCloud(CloudName cloud) {
+ if (!controller.clouds().contains(cloud)) {
+ throw new IllegalArgumentException("Cloud '" + cloud + "' does not exist in this system");
+ }
+ }
+
private CuratorDb curator() {
return controller.curator();
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
index 65f9e8c6ec1..5701a495641 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
@@ -7,6 +7,7 @@ import com.yahoo.config.provision.SystemName;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.ArtifactRepository;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.OsRelease;
+import com.yahoo.vespa.hosted.controller.versions.OsVersion;
import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
import com.yahoo.yolean.Exceptions;
@@ -47,7 +48,7 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
if (!change.get().scheduleAt(now)) continue;
try {
attempts++;
- controller().os().upgradeTo(change.get().version(), cloud, false, false);
+ controller().os().upgradeTo(change.get().osVersion().version(), cloud, false, false);
} catch (IllegalArgumentException e) {
failures++;
LOG.log(Level.WARNING, "Failed to schedule OS upgrade: " + Exceptions.toMessageString(e) +
@@ -64,7 +65,17 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
if (upgradingToNewMajor(cloud)) return Optional.empty(); // Skip further upgrades until major version upgrade is complete
Release release = releaseIn(cloud);
- return release.change(currentTarget.get().version(), instant);
+ Optional<Change> change = release.change(currentTarget.get().version(), instant);
+ return change.filter(this::certified);
+ }
+
+ private boolean certified(Change change) {
+ boolean certified = controller().os().certified(change.osVersion());
+ if (!certified) {
+ LOG.log(Level.WARNING, "Want to schedule " + change + ", but this change is not certified for " +
+ "the current system version");
+ }
+ return certified;
}
private boolean upgradingToNewMajor(CloudName cloud) {
@@ -79,9 +90,9 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
boolean useTaggedRelease = controller().zoneRegistry().zones().all().dynamicallyProvisioned().in(cloud)
.zones().isEmpty();
if (useTaggedRelease) {
- return new TaggedRelease(controller().system(), controller().serviceRegistry().artifactRepository());
+ return new TaggedRelease(controller().system(), cloud, controller().serviceRegistry().artifactRepository());
}
- return new CalendarVersionedRelease(controller().system());
+ return new CalendarVersionedRelease(controller().system(), cloud);
}
private static boolean canTriggerAt(Instant instant, boolean isCd) {
@@ -116,10 +127,10 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
/** OS version change and the earliest time it can be scheduled */
- public record Change(Version version, Instant scheduleAt) {
+ public record Change(OsVersion osVersion, Instant scheduleAt) {
public Change {
- Objects.requireNonNull(version);
+ Objects.requireNonNull(osVersion);
Objects.requireNonNull(scheduleAt);
}
@@ -131,10 +142,11 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
/** OS release based on a tag */
- private record TaggedRelease(SystemName system, ArtifactRepository artifactRepository) implements Release {
+ private record TaggedRelease(SystemName system, CloudName cloud, ArtifactRepository artifactRepository) implements Release {
public TaggedRelease {
Objects.requireNonNull(system);
+ Objects.requireNonNull(cloud);
Objects.requireNonNull(artifactRepository);
}
@@ -144,7 +156,7 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
if (!release.version().isAfter(currentVersion)) return Optional.empty();
Duration cooldown = remainingCooldownOf(cooldown(), release.age(instant));
Instant scheduleAt = schedulingInstant(instant.plus(cooldown), system);
- return Optional.of(new Change(release.version(), scheduleAt));
+ return Optional.of(new Change(new OsVersion(release.version(), cloud), scheduleAt));
}
/** Returns the release tag tracked by this system */
@@ -160,7 +172,7 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
/** OS release based on calendar-versioning */
- record CalendarVersionedRelease(SystemName system) implements Release {
+ record CalendarVersionedRelease(SystemName system, CloudName cloud) implements Release {
/** A fixed point in time which the release schedule is calculated from */
private static final Instant START_OF_SCHEDULE = LocalDate.of(2022, 1, 1)
@@ -187,7 +199,7 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
Duration cooldown = remainingCooldownOf(cooldown(), version.age(instant));
Instant schedulingInstant = schedulingInstant(instant.plus(cooldown), system);
- return Optional.of(new Change(version.version(), schedulingInstant));
+ return Optional.of(new Change(new OsVersion(version.version(), cloud), schedulingInstant));
}
private Duration cooldown() {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
index 831b4275422..a3a866d2036 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
@@ -22,6 +22,7 @@ public class OsVersionStatusUpdater extends ControllerMaintainer {
try {
OsVersionStatus newStatus = OsVersionStatus.compute(controller());
controller().os().updateStatus(newStatus);
+ controller().os().removeStaleCertifications(newStatus);
return 0.0;
} catch (Exception e) {
log.log(Level.WARNING, "Failed to compute OS version status: " + Exceptions.toMessageString(e) +
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CertifiedOsVersionSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CertifiedOsVersionSerializer.java
new file mode 100644
index 00000000000..8fd696cffc4
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CertifiedOsVersionSerializer.java
@@ -0,0 +1,50 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.controller.persistence;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.CloudName;
+import com.yahoo.slime.ArrayTraverser;
+import com.yahoo.slime.Cursor;
+import com.yahoo.slime.Slime;
+import com.yahoo.vespa.hosted.controller.versions.CertifiedOsVersion;
+import com.yahoo.vespa.hosted.controller.versions.OsVersion;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Serializer for {@link com.yahoo.vespa.hosted.controller.versions.CertifiedOsVersion}.
+ *
+ * @author mpolden
+ */
+public class CertifiedOsVersionSerializer {
+
+ private static final String versionField = "version";
+ private static final String cloudField = "cloud";
+ private static final String vespaVersionField = "vespaVersion";
+
+ public Slime toSlime(Set<CertifiedOsVersion> versions) {
+ Slime slime = new Slime();
+ Cursor array = slime.setArray();
+ for (var version : versions) {
+ Cursor root = array.addObject();
+ root.setString(versionField, version.osVersion().version().toFullString());
+ root.setString(cloudField, version.osVersion().cloud().value());
+ root.setString(vespaVersionField, version.vespaVersion().toFullString());
+ }
+ return slime;
+ }
+
+ public Set<CertifiedOsVersion> fromSlime(Slime slime) {
+ Cursor array = slime.get();
+ Set<CertifiedOsVersion> certifiedOsVersions = new HashSet<>();
+ array.traverse((ArrayTraverser) (idx, object) -> certifiedOsVersions.add(
+ new CertifiedOsVersion(new OsVersion(Version.fromString(object.field(versionField).asString()),
+ CloudName.from(object.field(cloudField).asString())),
+ Version.fromString(object.field(vespaVersionField).asString())))
+ );
+ return Collections.unmodifiableSet(certifiedOsVersions);
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
index ae35306c783..a25aa9797ba 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
@@ -46,6 +46,7 @@ import com.yahoo.vespa.hosted.controller.routing.ZoneRoutingPolicy;
import com.yahoo.vespa.hosted.controller.support.access.SupportAccess;
import com.yahoo.vespa.hosted.controller.tenant.PendingMailVerification;
import com.yahoo.vespa.hosted.controller.tenant.Tenant;
+import com.yahoo.vespa.hosted.controller.versions.CertifiedOsVersion;
import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus;
import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
import com.yahoo.vespa.hosted.controller.versions.VersionStatus;
@@ -119,6 +120,7 @@ public class CuratorDb {
private final OsVersionSerializer osVersionSerializer = new OsVersionSerializer();
private final OsVersionTargetSerializer osVersionTargetSerializer = new OsVersionTargetSerializer(osVersionSerializer);
private final OsVersionStatusSerializer osVersionStatusSerializer = new OsVersionStatusSerializer(osVersionSerializer, nodeVersionSerializer);
+ private final CertifiedOsVersionSerializer certifiedOsVersionSerializer = new CertifiedOsVersionSerializer();
private final RoutingPolicySerializer routingPolicySerializer = new RoutingPolicySerializer();
private final ZoneRoutingPolicySerializer zoneRoutingPolicySerializer = new ZoneRoutingPolicySerializer(routingPolicySerializer);
private final AuditLogSerializer auditLogSerializer = new AuditLogSerializer();
@@ -216,6 +218,10 @@ public class CuratorDb {
return curator.lock(lockRoot.append("osVersionStatus"), defaultLockTimeout);
}
+ public Mutex lockCertifiedOsVersions() {
+ return curator.lock(lockRoot.append("certifiedOsVersions"), defaultLockTimeout);
+ }
+
public Mutex lockRoutingPolicies() {
return curator.lock(lockRoot.append("routingPolicies"), defaultLockTimeout);
}
@@ -334,7 +340,7 @@ public class CuratorDb {
.orElse(ControllerVersion.CURRENT);
}
- // Infrastructure upgrades
+ // OS upgrades
public void writeOsVersionTargets(SortedSet<OsVersionTarget> versions) {
curator.set(osVersionTargetsPath(), asJson(osVersionTargetSerializer.toSlime(versions)));
@@ -352,6 +358,14 @@ public class CuratorDb {
return readSlime(osVersionStatusPath()).map(osVersionStatusSerializer::fromSlime).orElse(OsVersionStatus.empty);
}
+ public void writeCertifiedOsVersions(Set<CertifiedOsVersion> certifiedOsVersions) {
+ curator.set(certifiedOsVersionsPath(), asJson(certifiedOsVersionSerializer.toSlime(certifiedOsVersions)));
+ }
+
+ public Set<CertifiedOsVersion> readCertifiedOsVersions() {
+ return readSlime(certifiedOsVersionsPath()).map(certifiedOsVersionSerializer::fromSlime).orElseGet(Set::of);
+ }
+
// -------------- Tenant --------------------------------------------------
public void writeTenant(Tenant tenant) {
@@ -812,6 +826,10 @@ public class CuratorDb {
return root.append("osUpgrader").append("targetVersion");
}
+ private static Path certifiedOsVersionsPath() {
+ return root.append("osUpgrader").append("certifiedVersion");
+ }
+
private static Path osVersionStatusPath() {
return root.append("osVersionStatus");
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
index 0fa2dc492c2..fe1fb979abc 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
@@ -26,6 +26,7 @@ import com.yahoo.vespa.hosted.controller.maintenance.ControllerMaintenance;
import com.yahoo.vespa.hosted.controller.maintenance.OsUpgradeScheduler;
import com.yahoo.vespa.hosted.controller.maintenance.OsUpgradeScheduler.Change;
import com.yahoo.vespa.hosted.controller.restapi.ErrorResponses;
+import com.yahoo.vespa.hosted.controller.versions.CertifiedOsVersion;
import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
import com.yahoo.yolean.Exceptions;
@@ -36,6 +37,7 @@ import java.time.Duration;
import java.time.Instant;
import java.util.List;
import java.util.Optional;
+import java.util.Scanner;
import java.util.Set;
import java.util.StringJoiner;
import java.util.function.Function;
@@ -89,6 +91,7 @@ public class OsApiHandler extends AuditLoggingRequestHandler {
private HttpResponse post(HttpRequest request) {
Path path = new Path(request.getUri());
+ if (path.matches("/os/v1/certify/{cloud}/{version}")) return certifyVersion(request, path.get("version"), path.get("cloud"));
if (path.matches("/os/v1/firmware/")) return requestFirmwareCheckResponse(path);
if (path.matches("/os/v1/firmware/{environment}/")) return requestFirmwareCheckResponse(path);
if (path.matches("/os/v1/firmware/{environment}/{region}/")) return requestFirmwareCheckResponse(path);
@@ -97,12 +100,34 @@ public class OsApiHandler extends AuditLoggingRequestHandler {
private HttpResponse delete(HttpRequest request) {
Path path = new Path(request.getUri());
+ if (path.matches("/os/v1/certify/{cloud}/{version}")) return uncertifyVersion(request, path.get("version"), path.get("cloud"));
if (path.matches("/os/v1/firmware/")) return cancelFirmwareCheckResponse(path);
if (path.matches("/os/v1/firmware/{environment}/")) return cancelFirmwareCheckResponse(path);
if (path.matches("/os/v1/firmware/{environment}/{region}/")) return cancelFirmwareCheckResponse(path);
return ErrorResponse.notFoundError("Nothing at " + path);
}
+ private HttpResponse certifyVersion(HttpRequest request, String versionString, String cloudName) {
+ Version version = Version.fromString(versionString);
+ CloudName cloud = CloudName.from(cloudName);
+ Version vespaVersion = Version.fromString(asString(request.getData()));
+ CertifiedOsVersion certified = controller.os().certify(version, cloud, vespaVersion);
+ if (certified.vespaVersion().equals(vespaVersion)) {
+ return new MessageResponse("Certified " + version.toFullString() + " in cloud " + cloud +
+ " as compatible with Vespa version " + vespaVersion.toFullString());
+ }
+ return new MessageResponse(version.toFullString() + " is already certified in cloud " + cloud +
+ " as compatible with Vespa version " + certified.vespaVersion().toFullString() +
+ ". Leaving certification unchanged");
+ }
+
+ private HttpResponse uncertifyVersion(HttpRequest request, String versionString, String cloudName) {
+ Version version = Version.fromString(versionString);
+ CloudName cloud = CloudName.from(cloudName);
+ controller.os().uncertify(version, cloud);
+ return new MessageResponse("Removed certification of " + version.toFullString() + " in cloud " + cloud);
+ }
+
private HttpResponse requestFirmwareCheckResponse(Path path) {
List<ZoneId> zones = zonesAt(path);
if (zones.isEmpty())
@@ -170,7 +195,7 @@ public class OsApiHandler extends AuditLoggingRequestHandler {
currentVersionObject.setBool("pinned", t.pinned());
Optional<Change> nextChange = osUpgradeScheduler.changeIn(t.osVersion().cloud(), now);
nextChange.ifPresent(c -> {
- currentVersionObject.setString("nextVersion", c.version().toFullString());
+ currentVersionObject.setString("nextVersion", c.osVersion().version().toFullString());
currentVersionObject.setLong("nextScheduledAt", c.scheduleAt().toEpochMilli());
});
});
@@ -211,4 +236,12 @@ public class OsApiHandler extends AuditLoggingRequestHandler {
return field;
}
+ private static String asString(InputStream in) {
+ Scanner scanner = new Scanner(in).useDelimiter("\\A");
+ if (scanner.hasNext()) {
+ return scanner.next();
+ }
+ return "";
+ }
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/CertifiedOsVersion.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/CertifiedOsVersion.java
new file mode 100644
index 00000000000..be99e170f03
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/CertifiedOsVersion.java
@@ -0,0 +1,19 @@
+package com.yahoo.vespa.hosted.controller.versions;
+
+import com.yahoo.component.Version;
+
+import java.util.Objects;
+
+/**
+ * An OS version that has been certified to work on a specific Vespa version.
+ *
+ * @author mpolden
+ */
+public record CertifiedOsVersion(OsVersion osVersion, Version vespaVersion) {
+
+ public CertifiedOsVersion {
+ Objects.requireNonNull(osVersion);
+ Objects.requireNonNull(vespaVersion);
+ }
+
+}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
index 1b1c4e3bf1a..84227b4fd9f 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
@@ -23,6 +23,7 @@ import java.util.Map;
import java.util.Optional;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
@@ -61,7 +62,15 @@ public class OsUpgradeSchedulerTest {
Version version1 = Version.fromString("7.0.0.20220301");
tester.clock().advance(Duration.ofDays(14));
assertEquals("2022-03-01T09:05:00", formatInstant(tester.clock().instant()));
- assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().version());
+
+ // Change does not become available until certification
+ assertFalse(scheduler.changeIn(cloud, tester.clock().instant()).isPresent());
+ Version systemVersion = tester.controller().readSystemVersion();
+ Version olderThanSystemVersion = new Version(systemVersion.getMajor(), systemVersion.getMinor() - 1, systemVersion.getMicro());
+ tester.controller().os().certify(version1, cloud, olderThanSystemVersion);
+
+ // Change is now certified
+ assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().osVersion().version());
scheduler.maintain();
assertEquals(version0,
tester.controller().os().target(cloud).get().osVersion().version(),
@@ -97,9 +106,11 @@ public class OsUpgradeSchedulerTest {
assertEquals(version1, tester.controller().os().target(cloud).get().osVersion().version());
// Estimate next change
+ Version expected = Version.fromString("7.0.0.20220426");
+ tester.controller().os().certify(expected, cloud, systemVersion);
Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant());
assertTrue(nextChange.isPresent());
- assertEquals("7.0.0.20220426", nextChange.get().version().toFullString());
+ assertEquals(expected, nextChange.get().osVersion().version());
assertEquals("2022-04-27T07:00:00", formatInstant(nextChange.get().scheduleAt()));
}
@@ -125,14 +136,14 @@ public class OsUpgradeSchedulerTest {
assertEquals(version0, tester.controller().os().target(cloud).get().osVersion().version());
// Cool-down passes
tester.clock().advance(Duration.ofDays(1));
- assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().version());
+ assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().osVersion().version());
scheduler.maintain();
assertEquals(version1, tester.controller().os().target(cloud).get().osVersion().version());
// Estimate next change
Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant());
assertTrue(nextChange.isPresent());
- assertEquals("7.0.0.20220426", nextChange.get().version().toFullString());
+ assertEquals("7.0.0.20220426", nextChange.get().osVersion().version().toFullString());
assertEquals("2022-04-27T02:00:00", formatInstant(nextChange.get().scheduleAt()));
}
@@ -153,9 +164,18 @@ public class OsUpgradeSchedulerTest {
tester.serviceRegistry().artifactRepository().addRelease(new OsRelease(version1, OsRelease.Tag.stable,
Instant.parse("2021-06-21T23:59:00.00Z")));
scheduleUpgradeAfter(Duration.ZERO, version0, scheduler, tester);
- OsUpgradeScheduler.Change nextChange = scheduler.changeIn(cloud, tester.clock().instant()).get();
- assertEquals(version1, nextChange.version());
- assertEquals("2021-06-22T07:00:00", formatInstant(nextChange.scheduleAt()));
+
+ // No change yet because it hasn't been certified
+ Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant());
+ assertFalse(nextChange.isPresent(), "No change");
+
+ // Change is certified and upgrade is scheduled
+ Version systemVersion = tester.controller().readSystemVersion();
+ tester.controller().os().certify(version1, cloud, systemVersion);
+ nextChange = scheduler.changeIn(cloud, tester.clock().instant());
+ assertTrue(nextChange.isPresent());
+ assertEquals(version1, nextChange.get().osVersion().version());
+ assertEquals("2021-06-22T07:00:00", formatInstant(nextChange.get().scheduleAt()));
scheduleUpgradeAfter(Duration.ofHours(7), version1, scheduler, tester); // Inside trigger period
// A newer version is triggered manually
@@ -183,7 +203,7 @@ public class OsUpgradeSchedulerTest {
Version version1 = Version.fromString("8.1");
tester.serviceRegistry().artifactRepository().addRelease(new OsRelease(version1, OsRelease.Tag.latest,
tester.clock().instant()));
- assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().version());
+ assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().osVersion().version());
assertEquals("2021-06-22T07:05:00", formatInstant(scheduler.changeIn(cloud, tester.clock().instant()).get().scheduleAt()),
"Not valid until cool-down period passes");
scheduleUpgradeAfter(Duration.ZERO, version0, scheduler, tester);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java
index f45c7bfcdfb..6644c3013ff 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java
@@ -6,11 +6,16 @@ import com.yahoo.config.provision.CloudName;
import com.yahoo.config.provision.zone.UpgradePolicy;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.vespa.hosted.controller.ControllerTester;
+import com.yahoo.vespa.hosted.controller.versions.CertifiedOsVersion;
import com.yahoo.vespa.hosted.controller.versions.OsVersion;
import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus;
import org.junit.jupiter.api.Test;
import java.time.Duration;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -25,8 +30,7 @@ public class OsVersionStatusUpdaterTest {
@Test
void test_update() {
ControllerTester tester = new ControllerTester();
- OsVersionStatusUpdater statusUpdater = new OsVersionStatusUpdater(tester.controller(), Duration.ofDays(1)
- );
+ OsVersionStatusUpdater statusUpdater = new OsVersionStatusUpdater(tester.controller(), Duration.ofDays(1));
// Add all zones to upgrade policy
UpgradePolicy.Builder upgradePolicy = UpgradePolicy.builder();
for (ZoneApi zone : tester.zoneRegistry().zones().controllerUpgraded().zones()) {
@@ -58,6 +62,24 @@ public class OsVersionStatusUpdaterTest {
assertTrue(osVersions.get(new OsVersion(version1, cloud)).isEmpty(), "No nodes on current target");
assertFalse(osVersions.get(new OsVersion(Version.emptyVersion, otherCloud)).isEmpty(), "All nodes on unknown version");
assertTrue(osVersions.get(new OsVersion(version1, otherCloud)).isEmpty(), "No nodes on current target");
+
+ // Updating status cleans up stale certifications
+ Set<OsVersion> knownVersions = osVersions.keySet();
+ List<OsVersion> versionsToCertify = new ArrayList<>(knownVersions);
+ versionsToCertify.addAll(List.of(new OsVersion(Version.fromString("95.0.1"), cloud),
+ new OsVersion(Version.fromString("98.0.2"), cloud)));
+ for (OsVersion version : versionsToCertify) {
+ tester.controller().os().certify(version.version(), version.cloud(), Version.fromString("1.2.3"));
+ }
+ assertEquals(knownVersions.size() + 2, certifiedOsVersions(tester).size());
+ statusUpdater.maintain();
+ assertEquals(knownVersions, certifiedOsVersions(tester));
+ }
+
+ private static Set<OsVersion> certifiedOsVersions(ControllerTester tester) {
+ return tester.controller().curator().readCertifiedOsVersions().stream()
+ .map(CertifiedOsVersion::osVersion)
+ .collect(Collectors.toSet());
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/CertifiedOsVersionSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/CertifiedOsVersionSerializerTest.java
new file mode 100644
index 00000000000..fcf5be08abf
--- /dev/null
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/CertifiedOsVersionSerializerTest.java
@@ -0,0 +1,30 @@
+package com.yahoo.vespa.hosted.controller.persistence;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.CloudName;
+import com.yahoo.vespa.hosted.controller.versions.CertifiedOsVersion;
+import com.yahoo.vespa.hosted.controller.versions.OsVersion;
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * @author mpolden
+ */
+class CertifiedOsVersionSerializerTest {
+
+ @Test
+ public void serialization() {
+ Set<CertifiedOsVersion> certifiedVersion = Set.of(new CertifiedOsVersion(new OsVersion(Version.fromString("1.2.3"),
+ CloudName.from("cloud1")),
+ Version.fromString("4.5.6")),
+ new CertifiedOsVersion(new OsVersion(Version.fromString("3.2.1"),
+ CloudName.from("cloud2")),
+ Version.fromString("6.5.4")));
+ CertifiedOsVersionSerializer serializer = new CertifiedOsVersionSerializer();
+ assertEquals(certifiedVersion, serializer.fromSlime(serializer.toSlime(certifiedVersion)));
+ }
+
+}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java
index e569e0aca5b..15505dc3e95 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java
@@ -79,6 +79,7 @@ public class OsApiTest extends ControllerContainerTest {
// All nodes are initially on empty version
upgradeAndUpdateStatus();
+
// Upgrade OS to a different version in each cloud
assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.2\", \"cloud\": \"cloud1\"}", Request.Method.PATCH),
"{\"message\":\"Set target OS version for cloud 'cloud1' to 7.5.2\"}", 200);
@@ -111,6 +112,14 @@ public class OsApiTest extends ControllerContainerTest {
assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.5.2\", \"cloud\": \"cloud1\", \"pin\": true}", Request.Method.PATCH),
"{\"message\":\"Set target OS version for cloud 'cloud1' to 7.5.2 (pinned)\"}", 200);
+ // Certify an OS and Vespa version pair
+ assertResponse(new Request("http://localhost:8080/os/v1/certify/cloud1/7.5.2", "8.200.37", Request.Method.POST),
+ "{\"message\":\"Certified 7.5.2 in cloud cloud1 as compatible with Vespa version 8.200.37\"}", 200);
+ assertResponse(new Request("http://localhost:8080/os/v1/certify/cloud1/7.5.2", "8.200.42", Request.Method.POST),
+ "{\"message\":\"7.5.2 is already certified in cloud cloud1 as compatible with Vespa version 8.200.37. Leaving certification unchanged\"}", 200);
+ assertResponse(new Request("http://localhost:8080/os/v1/certify/cloud1/7.5.2", "", Request.Method.DELETE),
+ "{\"message\":\"Removed certification of 7.5.2 in cloud cloud1\"}", 200);
+
// Error: Missing fields
assertResponse(new Request("http://localhost:8080/os/v1/", "{\"version\": \"7.6\"}", Request.Method.PATCH),
"{\"error-code\":\"BAD_REQUEST\",\"message\":\"Field 'cloud' is required\"}", 400);