diff options
author | Martin Polden <mpolden@mpolden.no> | 2022-08-10 13:47:25 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-10 13:47:25 +0200 |
commit | 29a38c1bba78f3f1680bf1cb4a5f85e7e305cd87 (patch) | |
tree | d659ba00d0bfaead2db05d4ec2df5599b3ba4972 | |
parent | 35b76f6daf216b3d6e158576b05e4e914d58ae13 (diff) | |
parent | 71d90780aee036c9969c8c5b14d452ebc771efa1 (diff) |
Merge pull request #23616 from vespa-engine/mpolden/estimate-next-change
Expose details of next change in /os/v1
6 files changed, 118 insertions, 49 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index a259ed2fdef..ab2e0312b15 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -33,6 +33,7 @@ import static java.time.temporal.ChronoUnit.SECONDS; public class ControllerMaintenance extends AbstractComponent { private final Upgrader upgrader; + private final OsUpgradeScheduler osUpgradeScheduler; private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>(); @Inject @@ -40,7 +41,9 @@ public class ControllerMaintenance extends AbstractComponent { public ControllerMaintenance(Controller controller, Metric metric, UserManagement userManagement, AthenzClientFactory athenzClientFactory) { Intervals intervals = new Intervals(controller.system()); upgrader = new Upgrader(controller, intervals.defaultInterval); + osUpgradeScheduler = new OsUpgradeScheduler(controller, intervals.osUpgradeScheduler); maintainers.add(upgrader); + maintainers.add(osUpgradeScheduler); maintainers.addAll(osUpgraders(controller, intervals.osUpgrader)); maintainers.add(new DeploymentExpirer(controller, intervals.defaultInterval)); maintainers.add(new DeploymentUpgrader(controller, intervals.defaultInterval)); @@ -54,7 +57,6 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new SystemUpgrader(controller, intervals.systemUpgrader)); maintainers.add(new JobRunner(controller, intervals.jobRunner)); maintainers.add(new OsVersionStatusUpdater(controller, intervals.osVersionStatusUpdater)); - maintainers.add(new OsUpgradeScheduler(controller, intervals.osUpgradeScheduler)); maintainers.add(new ContactInformationMaintainer(controller, intervals.contactInformationMaintainer)); maintainers.add(new NameServiceDispatcher(controller, intervals.nameServiceDispatcher)); maintainers.add(new CostReportMaintainer(controller, intervals.costReportMaintainer, controller.serviceRegistry().costReportConsumer())); @@ -80,6 +82,8 @@ public class ControllerMaintenance extends AbstractComponent { public Upgrader upgrader() { return upgrader; } + public OsUpgradeScheduler osUpgradeScheduler() { return osUpgradeScheduler; } + @Override public void deconstruct() { maintainers.forEach(Maintainer::shutdown); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java index 111931b638b..644a8c6c1ed 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java @@ -14,7 +14,9 @@ import java.time.Duration; import java.time.Instant; import java.time.LocalDate; import java.time.ZoneOffset; +import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoUnit; import java.util.Objects; import java.util.Optional; @@ -32,23 +34,39 @@ public class OsUpgradeScheduler extends ControllerMaintainer { @Override protected double maintain() { Instant now = controller().clock().instant(); - if (!canTriggerAt(now)) return 1.0; for (var cloud : controller().clouds()) { - Release release = releaseIn(cloud); - upgradeTo(release, cloud, now); + Optional<Change> change = changeIn(cloud); + if (change.isEmpty()) continue; + if (!change.get().scheduleAt(now)) continue; + controller().upgradeOsIn(cloud, change.get().version(), change.get().upgradeBudget(), false); } return 1.0; } - /** Upgrade to given release in cloud */ - private void upgradeTo(Release release, CloudName cloud, Instant now) { + /** Returns the wanted change for given cloud, if any */ + public Optional<Change> changeIn(CloudName cloud) { Optional<OsVersionTarget> currentTarget = controller().osVersionTarget(cloud); - if (currentTarget.isEmpty()) return; - if (upgradingToNewMajor(cloud)) return; // Skip further upgrades until major version upgrade is complete - - Version version = release.version(currentTarget.get(), now); - if (!version.isAfter(currentTarget.get().osVersion().version())) return; - controller().upgradeOsIn(cloud, version, release.upgradeBudget(), false); + if (currentTarget.isEmpty()) return Optional.empty(); + if (upgradingToNewMajor(cloud)) return Optional.empty(); // Skip further upgrades until major version upgrade is complete + + Release release = releaseIn(cloud); + Instant instant = controller().clock().instant(); + Version wantedVersion = release.version(currentTarget.get(), instant); + Version currentVersion = currentTarget.get().version(); + if (release instanceof CalendarVersionedRelease) { + // Estimate the next change + while (!wantedVersion.isAfter(currentVersion)) { + instant = instant.plus(Duration.ofDays(1)); + wantedVersion = release.version(currentTarget.get(), instant); + } + } else if (!wantedVersion.isAfter(currentVersion)) { + return Optional.empty(); // No change right now, and we cannot predict the next change for this kind of release + } + // Find trigger time + while (!canTriggerAt(instant)) { + instant = instant.truncatedTo(ChronoUnit.HOURS).plus(Duration.ofHours(1)); + } + return Optional.of(new Change(wantedVersion, release.upgradeBudget(), instant)); } private boolean upgradingToNewMajor(CloudName cloud) { @@ -58,23 +76,24 @@ public class OsUpgradeScheduler extends ControllerMaintainer { .count() > 1; } - private boolean canTriggerAt(Instant instant) { - int hourOfDay = instant.atZone(ZoneOffset.UTC).getHour(); - int dayOfWeek = instant.atZone(ZoneOffset.UTC).getDayOfWeek().getValue(); - // Upgrade can only be scheduled between 07:00 (02:00 in CD systems) and 12:59 UTC, Monday-Thursday - int startHour = controller().system().isCd() ? 2 : 7; - return hourOfDay >= startHour && hourOfDay <= 12 && dayOfWeek < 5; - } - private Release releaseIn(CloudName cloud) { boolean useTaggedRelease = controller().zoneRegistry().zones().all().reprovisionToUpgradeOs().in(cloud) - .zones().isEmpty(); + .zones().isEmpty(); if (useTaggedRelease) { return new TaggedRelease(controller().system(), controller().serviceRegistry().artifactRepository()); } return new CalendarVersionedRelease(controller().system()); } + private boolean canTriggerAt(Instant instant) { + ZonedDateTime dateTime = instant.atZone(ZoneOffset.UTC); + int hourOfDay = dateTime.getHour(); + int dayOfWeek = dateTime.getDayOfWeek().getValue(); + // Upgrade can only be scheduled between 07:00 (02:00 in CD systems) and 12:59 UTC, Monday-Thursday + int startHour = controller().system().isCd() ? 2 : 7; + return hourOfDay >= startHour && hourOfDay <= 12 && dayOfWeek < 5; + } + private interface Release { /** The version number of this */ @@ -85,6 +104,22 @@ public class OsUpgradeScheduler extends ControllerMaintainer { } + /** OS version change, its budget and the earliest time it can be scheduled */ + public record Change(Version version, Duration upgradeBudget, Instant scheduleAt) { + + public Change { + Objects.requireNonNull(version); + Objects.requireNonNull(upgradeBudget); + Objects.requireNonNull(scheduleAt); + } + + /** Returns whether this can be scheduled at given instant */ + public boolean scheduleAt(Instant instant) { + return !instant.isBefore(scheduleAt); + } + + } + /** OS release based on a tag */ private record TaggedRelease(SystemName system, ArtifactRepository artifactRepository) implements Release { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java index 853739ee9c3..0e764b98514 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java @@ -22,6 +22,9 @@ import com.yahoo.slime.SlimeUtils; import com.yahoo.slime.Type; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.auditlog.AuditLoggingRequestHandler; +import com.yahoo.vespa.hosted.controller.maintenance.ControllerMaintenance; +import com.yahoo.vespa.hosted.controller.maintenance.OsUpgradeScheduler; +import com.yahoo.vespa.hosted.controller.maintenance.OsUpgradeScheduler.Change; import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget; import com.yahoo.yolean.Exceptions; @@ -47,22 +50,24 @@ import java.util.stream.Collectors; public class OsApiHandler extends AuditLoggingRequestHandler { private final Controller controller; + private final OsUpgradeScheduler osUpgradeScheduler; - public OsApiHandler(Context ctx, Controller controller) { + public OsApiHandler(Context ctx, Controller controller, ControllerMaintenance controllerMaintenance) { super(ctx, controller.auditLogger()); this.controller = controller; + this.osUpgradeScheduler = controllerMaintenance.osUpgradeScheduler(); } @Override public HttpResponse auditAndHandle(HttpRequest request) { try { - switch (request.getMethod()) { - case GET: return get(request); - case POST: return post(request); - case DELETE: return delete(request); - case PATCH: return patch(request); - default: return ErrorResponse.methodNotAllowed("Method '" + request.getMethod() + "' is unsupported"); - } + return switch (request.getMethod()) { + case GET -> get(request); + case POST -> post(request); + case DELETE -> delete(request); + case PATCH -> patch(request); + default -> ErrorResponse.methodNotAllowed("Method '" + request.getMethod() + "' is unsupported"); + }; } catch (IllegalArgumentException e) { return ErrorResponse.badRequest(Exceptions.toMessageString(e)); } catch (RuntimeException e) { @@ -159,8 +164,16 @@ public class OsApiHandler extends AuditLoggingRequestHandler { currentVersionObject.setString("version", osVersion.version().toFullString()); Optional<OsVersionTarget> target = targets.stream().filter(t -> t.osVersion().equals(osVersion)).findFirst(); currentVersionObject.setBool("targetVersion", target.isPresent()); - target.ifPresent(t -> currentVersionObject.setString("upgradeBudget", t.upgradeBudget().toString())); - target.ifPresent(t -> currentVersionObject.setLong("scheduledAt", t.scheduledAt().toEpochMilli())); + target.ifPresent(t -> { + currentVersionObject.setString("upgradeBudget", t.upgradeBudget().toString()); + currentVersionObject.setLong("scheduledAt", t.scheduledAt().toEpochMilli()); + Optional<Change> nextChange = osUpgradeScheduler.changeIn(t.osVersion().cloud()); + nextChange.ifPresent(c -> { + currentVersionObject.setString("nextVersion", c.version().toFullString()); + currentVersionObject.setLong("nextScheduledAt", c.scheduleAt().toEpochMilli()); + }); + }); + currentVersionObject.setString("cloud", osVersion.cloud().value()); Cursor nodesArray = currentVersionObject.setArray("nodes"); nodeVersions.forEach(nodeVersion -> { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java index 9268ea5ca1c..fac15cd23c4 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java @@ -14,9 +14,12 @@ import org.junit.jupiter.api.Test; import java.time.Duration; import java.time.Instant; import java.time.LocalDate; +import java.time.LocalDateTime; import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; import java.util.List; import java.util.Map; +import java.util.Optional; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -57,9 +60,9 @@ public class OsUpgradeSchedulerTest { tester.clock().advance(Duration.ofDays(30)); scheduler.maintain(); assertEquals(version0, - tester.controller().osVersionTarget(cloud).get().osVersion().version(), - "Target is unchanged because we're outside trigger period"); - tester.clock().advance(Duration.ofHours(7)); // Put us inside the trigger period + tester.controller().osVersionTarget(cloud).get().osVersion().version(), + "Target is unchanged because we're outside trigger period"); + tester.clock().advance(Duration.ofHours(7).plusMinutes(5)); // Put us inside the trigger period scheduler.maintain(); assertEquals(version1, tester.controller().osVersionTarget(cloud).get().osVersion().version(), @@ -69,11 +72,19 @@ public class OsUpgradeSchedulerTest { tester.clock().advance(Duration.ofDays(2)); scheduler.maintain(); assertEquals(version1, tester.controller().osVersionTarget(cloud).get().osVersion().version()); + + // Estimate next change + Optional<OsUpgradeScheduler.Change> nextChange = scheduler.changeIn(cloud); + assertTrue(nextChange.isPresent()); + assertEquals("7.0.0.20220425", nextChange.get().version().toFullString()); + assertEquals("2022-05-02T07:00:00", LocalDateTime.ofInstant(nextChange.get().scheduleAt(), ZoneOffset.UTC) + .format(DateTimeFormatter.ISO_DATE_TIME)); } @Test void schedule_stable_release() { ControllerTester tester = new ControllerTester(); + OsUpgradeScheduler scheduler = new OsUpgradeScheduler(tester.controller(), Duration.ofDays(1)); Instant t0 = Instant.parse("2021-06-21T07:00:00.00Z"); // Inside trigger period tester.clock().setInstant(t0); @@ -86,19 +97,23 @@ public class OsUpgradeSchedulerTest { Version version1 = Version.fromString("8.1"); tester.serviceRegistry().artifactRepository().addRelease(new OsRelease(version1, OsRelease.Tag.stable, tester.clock().instant())); - scheduleUpgradeAfter(Duration.ZERO, version1, tester); + scheduleUpgradeAfter(Duration.ZERO, version1, scheduler, tester); // A newer version is triggered manually Version version3 = Version.fromString("8.3"); tester.controller().upgradeOsIn(cloud, version3, Duration.ZERO, false); // Nothing happens in next iteration as tagged release is older than manually triggered version - scheduleUpgradeAfter(Duration.ofDays(7), version3, tester); + scheduleUpgradeAfter(Duration.ofDays(7), version3, scheduler, tester); + + // Next change cannot be estimated for tagged releases + assertTrue(scheduler.changeIn(cloud).isEmpty(), "Next change is unknown"); } @Test void schedule_latest_release_in_cd() { ControllerTester tester = new ControllerTester(SystemName.cd); + OsUpgradeScheduler scheduler = new OsUpgradeScheduler(tester.controller(), Duration.ofDays(1)); Instant t0 = Instant.parse("2021-06-21T07:00:00.00Z"); // Inside trigger period tester.clock().setInstant(t0); @@ -111,10 +126,10 @@ public class OsUpgradeSchedulerTest { Version version1 = Version.fromString("8.1"); tester.serviceRegistry().artifactRepository().addRelease(new OsRelease(version1, OsRelease.Tag.latest, tester.clock().instant())); - scheduleUpgradeAfter(Duration.ZERO, version0, tester); + scheduleUpgradeAfter(Duration.ZERO, version0, scheduler, tester); // Cooldown period passes and latest release is scheduled - scheduleUpgradeAfter(Duration.ofDays(1), version1, tester); + scheduleUpgradeAfter(Duration.ofDays(1), version1, scheduler, tester); } @Test @@ -135,9 +150,9 @@ public class OsUpgradeSchedulerTest { }); } - private void scheduleUpgradeAfter(Duration duration, Version version, ControllerTester tester) { + private void scheduleUpgradeAfter(Duration duration, Version version, OsUpgradeScheduler scheduler, ControllerTester tester) { tester.clock().advance(duration); - new OsUpgradeScheduler(tester.controller(), Duration.ofDays(1)).maintain(); + scheduler.maintain(); CloudName cloud = tester.controller().clouds().iterator().next(); OsVersionTarget target = tester.controller().osVersionTarget(cloud).get(); assertEquals(version, target.osVersion().version()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java index 6ddc58feaea..15f0100ade8 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.restapi.os; import com.yahoo.application.container.handler.Request; +import com.yahoo.component.Version; import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.UpgradePolicy; @@ -11,10 +12,10 @@ import com.yahoo.vespa.athenz.api.AthenzIdentity; import com.yahoo.vespa.athenz.api.AthenzUser; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.OsRelease; import com.yahoo.vespa.hosted.controller.application.SystemApplication; import com.yahoo.vespa.hosted.controller.integration.NodeRepositoryMock; import com.yahoo.vespa.hosted.controller.integration.ZoneApiMock; -import com.yahoo.vespa.hosted.controller.integration.ZoneRegistryMock; import com.yahoo.vespa.hosted.controller.maintenance.ControllerMaintainer; import com.yahoo.vespa.hosted.controller.maintenance.OsUpgrader; import com.yahoo.vespa.hosted.controller.restapi.ContainerTester; @@ -57,10 +58,13 @@ public class OsApiTest extends ControllerContainerTest { tester = new ContainerTester(container, responses); tester.serviceRegistry().clock().setInstant(Instant.ofEpochMilli(1234)); addUserToHostedOperatorRole(operator); - zoneRegistryMock().setZones(zone1, zone2, zone3) - .reprovisionToUpgradeOsIn(zone3) - .setOsUpgradePolicy(cloud1, UpgradePolicy.builder().upgrade(zone1).upgrade(zone2).build()) - .setOsUpgradePolicy(cloud2, UpgradePolicy.builder().upgrade(zone3).build()); + tester.serviceRegistry().zoneRegistry().setZones(zone1, zone2, zone3) + .reprovisionToUpgradeOsIn(zone3) + .setOsUpgradePolicy(cloud1, UpgradePolicy.builder().upgrade(zone1).upgrade(zone2).build()) + .setOsUpgradePolicy(cloud2, UpgradePolicy.builder().upgrade(zone3).build()); + tester.serviceRegistry().artifactRepository().addRelease(new OsRelease(Version.fromString("7.0"), + OsRelease.Tag.latest, + Instant.EPOCH)); osUpgraders = List.of( new OsUpgrader(tester.controller(), Duration.ofDays(1), cloud1), @@ -160,10 +164,6 @@ public class OsApiTest extends ControllerContainerTest { updateVersionStatus(); } - private ZoneRegistryMock zoneRegistryMock() { - return tester.serviceRegistry().zoneRegistry(); - } - private NodeRepositoryMock nodeRepository() { return tester.serviceRegistry().configServerMock().nodeRepository(); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json index a5af4f45370..be94b85f113 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/responses/versions-all-upgraded.json @@ -104,6 +104,8 @@ "targetVersion": true, "upgradeBudget": "PT24H", "scheduledAt": 1234, + "nextVersion": "8.2.1.20211227", + "nextScheduledAt": 7200000, "cloud": "cloud2", "nodes": [ { |