aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2023-07-26 22:12:00 +0200
committerGitHub <noreply@github.com>2023-07-26 22:12:00 +0200
commit704d2b6b87d6c5a2fbc932a681f0bc032a18ef2a (patch)
treee122a11bfdf04ea097ff1d122c4d52639c1f1cc4
parent886a16481d5023822629fd4f8f128157af9edce8 (diff)
parent14660b601c602450850ca7916b1b6a371e38d45c (diff)
Merge pull request #27904 from vespa-engine/mpolden/check-schedule-timev8.202.11
Check scheduling time before certification
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java41
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java22
3 files changed, 36 insertions, 29 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
index 9dd90163683..c751c0a130b 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
@@ -43,9 +43,8 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
int attempts = 0;
int failures = 0;
for (var cloud : controller().clouds()) {
- Optional<Change> change = changeIn(cloud, now);
+ Optional<Change> change = changeIn(cloud, now, false);
if (change.isEmpty()) continue;
- if (!change.get().scheduleAt(now)) continue;
try {
attempts++;
controller().os().upgradeTo(change.get().osVersion().version(), cloud, false, false);
@@ -58,15 +57,24 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
return asSuccessFactorDeviation(attempts, failures);
}
- /** Returns the wanted change for cloud at given instant, if any */
- public Optional<Change> changeIn(CloudName cloud, Instant instant) {
+ /**
+ * Returns the next OS version change
+ *
+ * @param cloud The cloud where the change will be deployed
+ * @param now Current time
+ * @param future Whether to return a change that cannot be scheduled now
+ */
+ public Optional<Change> changeIn(CloudName cloud, Instant now, boolean future) {
Optional<OsVersionTarget> currentTarget = controller().os().target(cloud);
if (currentTarget.isEmpty()) return Optional.empty();
if (upgradingToNewMajor(cloud)) return Optional.empty(); // Skip further upgrades until major version upgrade is complete
- Release release = releaseIn(cloud);
- Optional<Change> change = release.change(currentTarget.get().version(), instant);
- return change.filter(this::certified);
+ Version currentVersion = currentTarget.get().version();
+ Change change = releaseIn(cloud).change(currentVersion, now);
+ if (!change.osVersion().version().isAfter(currentVersion)) return Optional.empty();
+ if (!future && !change.scheduleAt(now)) return Optional.empty();
+ if (!certified(change)) return Optional.empty();
+ return Optional.of(change);
}
private boolean certified(Change change) {
@@ -121,8 +129,8 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
private interface Release {
- /** The pending change for this release at given instant, if any */
- Optional<Change> change(Version currentVersion, Instant instant);
+ /** The next available change of this release at given instant */
+ Change change(Version currentVersion, Instant instant);
}
@@ -151,12 +159,11 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
@Override
- public Optional<Change> change(Version currentVersion, Instant instant) {
+ public Change change(Version currentVersion, Instant instant) {
OsRelease release = artifactRepository.osRelease(currentVersion.getMajor(), tag());
- if (!release.version().isAfter(currentVersion)) return Optional.empty();
Duration cooldown = remainingCooldownOf(cooldown(), release.age(instant));
Instant scheduleAt = schedulingInstant(instant.plus(cooldown), system);
- return Optional.of(new Change(new OsVersion(release.version(), cloud), scheduleAt));
+ return new Change(new OsVersion(release.version(), cloud), scheduleAt);
}
/** Returns the release tag tracked by this system */
@@ -193,16 +200,16 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
@Override
- public Optional<Change> change(Version currentVersion, Instant instant) {
+ public Change change(Version currentVersion, Instant instant) {
CalendarVersion version = findVersion(instant, currentVersion);
- Instant predicatedInstant = instant;
+ Instant predicted = instant;
while (!version.version().isAfter(currentVersion)) {
- predicatedInstant = predicatedInstant.plus(Duration.ofDays(1));
- version = findVersion(predicatedInstant, currentVersion);
+ predicted = predicted.plus(Duration.ofDays(1));
+ version = findVersion(predicted, currentVersion);
}
Duration cooldown = remainingCooldownOf(COOLDOWN, version.age(instant));
Instant schedulingInstant = schedulingInstant(instant.plus(cooldown), system);
- return Optional.of(new Change(new OsVersion(version.version(), cloud), schedulingInstant));
+ return new Change(new OsVersion(version.version(), cloud), schedulingInstant);
}
/** Find the most recent version available according to the scheduling step, relative to now */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
index 167c30cb630..1639424b182 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
@@ -210,7 +210,7 @@ public class OsApiHandler extends AuditLoggingRequestHandler {
currentVersionObject.setString("upgradeBudget", Duration.ZERO.toString());
currentVersionObject.setLong("scheduledAt", t.scheduledAt().toEpochMilli());
currentVersionObject.setBool("pinned", t.pinned());
- Optional<Change> nextChange = osUpgradeScheduler.changeIn(t.osVersion().cloud(), now);
+ Optional<Change> nextChange = osUpgradeScheduler.changeIn(t.osVersion().cloud(), now, true);
nextChange.ifPresent(c -> {
currentVersionObject.setString("nextVersion", c.osVersion().version().toFullString());
currentVersionObject.setLong("nextScheduledAt", c.scheduleAt().toEpochMilli());
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
index 22893287979..178e8f18489 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
@@ -64,13 +64,13 @@ public class OsUpgradeSchedulerTest {
assertEquals("2022-03-01T00:05:00", formatInstant(tester.clock().instant()));
// Change does not become available until certification
- assertFalse(scheduler.changeIn(cloud, tester.clock().instant()).isPresent());
+ assertFalse(scheduler.changeIn(cloud, tester.clock().instant(), true).isPresent());
Version systemVersion = tester.controller().readSystemVersion();
Version olderThanSystemVersion = new Version(systemVersion.getMajor(), systemVersion.getMinor() - 1, systemVersion.getMicro());
tester.controller().os().certify(version1, cloud, olderThanSystemVersion);
// Change is now certified
- assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().osVersion().version());
+ assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant(), true).get().osVersion().version());
scheduler.maintain();
assertEquals(version0,
tester.controller().os().target(cloud).get().osVersion().version(),
@@ -86,7 +86,7 @@ public class OsUpgradeSchedulerTest {
// Time constraints have now passed, but the current target has been pinned in the meantime
tester.controller().os().upgradeTo(version0, cloud, false, true);
- Optional<OsUpgradeScheduler.Change> change = scheduler.changeIn(cloud, tester.clock().instant());
+ Optional<OsUpgradeScheduler.Change> change = scheduler.changeIn(cloud, tester.clock().instant(), true);
assertTrue(change.isPresent());
assertEquals(-1, scheduler.maintain());
assertEquals(version0,
@@ -108,7 +108,7 @@ public class OsUpgradeSchedulerTest {
// Estimate next change
Version expected = Version.fromString("7.0.0.20220426");
tester.controller().os().certify(expected, cloud, systemVersion);
- Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant());
+ Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant(), true);
assertTrue(nextChange.isPresent());
assertEquals(expected, nextChange.get().osVersion().version());
assertEquals("2022-04-26T07:00:00", formatInstant(nextChange.get().scheduleAt()));
@@ -136,12 +136,12 @@ public class OsUpgradeSchedulerTest {
assertEquals(version0, tester.controller().os().target(cloud).get().osVersion().version());
// Cool-down passes
tester.clock().advance(Duration.ofHours(4));
- assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().osVersion().version());
+ assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant(), false).get().osVersion().version());
scheduler.maintain();
assertEquals(version1, tester.controller().os().target(cloud).get().osVersion().version());
// Estimate next change
- Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant());
+ Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant(), true);
assertTrue(nextChange.isPresent());
assertEquals("7.0.0.20220426", nextChange.get().osVersion().version().toFullString());
assertEquals("2022-04-26T06:00:00", formatInstant(nextChange.get().scheduleAt()));
@@ -166,13 +166,13 @@ public class OsUpgradeSchedulerTest {
scheduleUpgradeAfter(Duration.ZERO, version0, scheduler, tester);
// No change yet because it hasn't been certified
- Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant());
+ Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud, tester.clock().instant(), true);
assertFalse(nextChange.isPresent(), "No change");
// Change is certified and upgrade is scheduled
Version systemVersion = tester.controller().readSystemVersion();
tester.controller().os().certify(version1, cloud, systemVersion);
- nextChange = scheduler.changeIn(cloud, tester.clock().instant());
+ nextChange = scheduler.changeIn(cloud, tester.clock().instant(), true);
assertTrue(nextChange.isPresent());
assertEquals(version1, nextChange.get().osVersion().version());
assertEquals("2021-06-22T07:00:00", formatInstant(nextChange.get().scheduleAt()));
@@ -184,7 +184,7 @@ public class OsUpgradeSchedulerTest {
// Nothing happens in next iteration as tagged release is older than manually triggered version
scheduleUpgradeAfter(Duration.ofDays(7), version3, scheduler, tester);
- assertTrue(scheduler.changeIn(cloud, tester.clock().instant()).isEmpty());
+ assertTrue(scheduler.changeIn(cloud, tester.clock().instant(), true).isEmpty());
}
@Test
@@ -203,8 +203,8 @@ public class OsUpgradeSchedulerTest {
Version version1 = Version.fromString("8.1");
tester.serviceRegistry().artifactRepository().addRelease(new OsRelease(version1, OsRelease.Tag.latest,
tester.clock().instant()));
- assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant()).get().osVersion().version());
- assertEquals("2021-06-22T07:05:00", formatInstant(scheduler.changeIn(cloud, tester.clock().instant()).get().scheduleAt()),
+ assertEquals(version1, scheduler.changeIn(cloud, tester.clock().instant(), true).get().osVersion().version());
+ assertEquals("2021-06-22T07:05:00", formatInstant(scheduler.changeIn(cloud, tester.clock().instant(), true).get().scheduleAt()),
"Not valid until cool-down period passes");
scheduleUpgradeAfter(Duration.ZERO, version0, scheduler, tester);