diff options
Diffstat (limited to 'controller-server')
16 files changed, 195 insertions, 74 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 7ceeda08d3a..d83f552ab25 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -228,10 +228,9 @@ public class DeploymentTrigger { Instance instance = application.require(applicationId.instance()); JobId job = new JobId(instance.id(), jobType); JobStatus jobStatus = jobs.jobStatus(new JobId(applicationId, jobType)); - Versions versions = jobStatus.lastTriggered() - .orElseThrow(() -> new IllegalArgumentException(job + " has never been triggered")) - .versions(); - trigger(deploymentJob(instance, versions, jobType, jobStatus, clock.instant()), reason); + Run last = jobStatus.lastTriggered() + .orElseThrow(() -> new IllegalArgumentException(job + " has never been triggered")); + trigger(deploymentJob(instance, last.versions(), last.id().type(), jobStatus.isNodeAllocationFailure(), clock.instant()), reason); return job; } @@ -259,7 +258,12 @@ public class DeploymentTrigger { .collect(toMap(Map.Entry::getKey, Map.Entry::getValue)); jobs.forEach((jobId, versionsList) -> { - trigger(deploymentJob(application.require(job.application().instance()), versionsList.get(0).versions(), jobId.type(), status.jobs().get(jobId).get(), clock.instant()), reason); + trigger(deploymentJob(application.require(job.application().instance()), + versionsList.get(0).versions(), + jobId.type(), + status.jobs().get(jobId).get().isNodeAllocationFailure(), + clock.instant()), + reason); }); return List.copyOf(jobs.keySet()); } @@ -388,7 +392,7 @@ public class DeploymentTrigger { jobs.add(deploymentJob(status.application().require(jobId.application().instance()), job.versions(), job.type(), - status.instanceJobs(jobId.application().instance()).get(jobId.type()), + status.instanceJobs(jobId.application().instance()).get(jobId.type()).isNodeAllocationFailure(), job.readyAt().get())); }); return Collections.unmodifiableList(jobs); @@ -475,8 +479,8 @@ public class DeploymentTrigger { // ---------- Version and job helpers ---------- - private Job deploymentJob(Instance instance, Versions versions, JobType jobType, JobStatus jobStatus, Instant availableSince) { - return new Job(instance, versions, jobType, availableSince, jobStatus.isNodeAllocationFailure(), instance.change().revision().isPresent()); + private Job deploymentJob(Instance instance, Versions versions, JobType jobType, boolean isNodeAllocationFailure, Instant availableSince) { + return new Job(instance, versions, jobType, availableSince, isNodeAllocationFailure, instance.change().revision().isPresent()); } // ---------- Data containers ---------- diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 19b2afb3af9..881107fa0f9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -158,7 +158,7 @@ public class JobController { /** Stores the given log entries for the given run and step. */ public void log(RunId id, Step step, List<LogEntry> entries) { locked(id, __ -> { - logs.append(id.application(), id.type(), step, entries); + logs.append(id.application(), id.type(), step, entries, true); return __; }); } @@ -211,7 +211,7 @@ public class JobController { if (log.isEmpty()) return run; - logs.append(id.application(), id.type(), Step.copyVespaLogs, log); + logs.append(id.application(), id.type(), Step.copyVespaLogs, log, false); return run.with(log.get(log.size() - 1).at()); }); } @@ -230,7 +230,7 @@ public class JobController { if (entries.isEmpty()) return run; - logs.append(id.application(), id.type(), step.get(), entries); + logs.append(id.application(), id.type(), step.get(), entries, false); return run.with(entries.stream().mapToLong(LogEntry::id).max().getAsLong()); }); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index 4aeecdcd4ff..9793cded918 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -70,7 +70,7 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer)); maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer)); maintainers.add(new ChangeRequestMaintainer(controller, intervals.changeRequestMaintainer)); - maintainers.add(new VcmrMaintainer(controller, intervals.vcmrMaintainer)); + maintainers.add(new VcmrMaintainer(controller, intervals.vcmrMaintainer, metric)); maintainers.add(new CloudTrialExpirer(controller, intervals.defaultInterval)); maintainers.add(new RetriggerMaintainer(controller, intervals.retriggerMaintainer)); maintainers.add(new UserManagementMaintainer(controller, intervals.userManagementMaintainer, controller.serviceRegistry().roleMaintainer())); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java index 3bd1c7bb358..5e4d6d71ff6 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java @@ -9,8 +9,10 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.ArtifactRepo import com.yahoo.vespa.hosted.controller.api.integration.deployment.OsRelease; import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget; +import java.time.DayOfWeek; import java.time.Duration; import java.time.Instant; +import java.time.LocalDate; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.util.Objects; @@ -84,14 +86,11 @@ public class OsUpgradeScheduler extends ControllerMaintainer { } /** OS release based on a tag */ - private static class TaggedRelease implements Release { + private record TaggedRelease(SystemName system, ArtifactRepository artifactRepository) implements Release { - private final SystemName system; - private final ArtifactRepository artifactRepository; - - private TaggedRelease(SystemName system, ArtifactRepository artifactRepository) { - this.system = Objects.requireNonNull(system); - this.artifactRepository = Objects.requireNonNull(artifactRepository); + public TaggedRelease { + Objects.requireNonNull(system); + Objects.requireNonNull(artifactRepository); } @Override @@ -119,41 +118,30 @@ public class OsUpgradeScheduler extends ControllerMaintainer { } /** OS release based on calendar-versioning */ - private static class CalendarVersionedRelease implements Release { + record CalendarVersionedRelease(SystemName system) implements Release { - /** The time to wait before scheduling upgrade to next version */ - private static final Duration SCHEDULING_INTERVAL = Duration.ofDays(45); + /** A fixed point in time which the release schedule is calculated from */ + private static final Instant START_OF_SCHEDULE = LocalDate.of(2022, 1, 1) + .atStartOfDay() + .toInstant(ZoneOffset.UTC); - /** - * The interval at which new versions become available. We use this to avoid scheduling upgrades to a version - * that has not been released yet. Example: Version N is the latest one and target is set to N+1. If N+1 does - * not exist the zone will not converge until N+1 has been released and we may end up triggering multiple - * rounds of upgrades. - */ - private static final Duration AVAILABILITY_INTERVAL = Duration.ofDays(7); + /** The time that should elapse between versions */ + private static final Duration SCHEDULING_STEP = Duration.ofDays(45); - private static final DateTimeFormatter CALENDAR_VERSION_PATTERN = DateTimeFormatter.ofPattern("yyyyMMdd"); + /** The day of week new releases are published */ + private static final DayOfWeek RELEASE_DAY = DayOfWeek.MONDAY; - private final SystemName system; + private static final DateTimeFormatter CALENDAR_VERSION_PATTERN = DateTimeFormatter.ofPattern("yyyyMMdd"); - public CalendarVersionedRelease(SystemName system) { - this.system = Objects.requireNonNull(system); + public CalendarVersionedRelease { + Objects.requireNonNull(system); } @Override public Version version(OsVersionTarget currentTarget, Instant now) { - Instant scheduledAt = currentTarget.scheduledAt(); Version currentVersion = currentTarget.osVersion().version(); - if (scheduledAt.isBefore(now.minus(SCHEDULING_INTERVAL))) { - String calendarVersion = now.minus(AVAILABILITY_INTERVAL) - .atZone(ZoneOffset.UTC) - .format(CALENDAR_VERSION_PATTERN); - return new Version(currentVersion.getMajor(), - currentVersion.getMinor(), - currentVersion.getMicro(), - calendarVersion); - } - return currentVersion; // New version should not be scheduled yet + Version wantedVersion = asVersion(dateOfWantedVersion(now), currentVersion); + return wantedVersion.isAfter(currentVersion) ? wantedVersion : currentVersion; } @Override @@ -161,6 +149,32 @@ public class OsUpgradeScheduler extends ControllerMaintainer { return system.isCd() ? Duration.ZERO : Duration.ofDays(14); } + /** + * Calculate the date of the wanted version relative to now. A given zone will choose the oldest release + * available which is not older than this date. + */ + static LocalDate dateOfWantedVersion(Instant now) { + Instant candidate = START_OF_SCHEDULE; + while (!candidate.plus(SCHEDULING_STEP).isAfter(now)) { + candidate = candidate.plus(SCHEDULING_STEP); + } + LocalDate date = LocalDate.ofInstant(candidate, ZoneOffset.UTC); + return releaseDayOf(date); + } + + private static LocalDate releaseDayOf(LocalDate date) { + int releaseDayDelta = RELEASE_DAY.getValue() - date.getDayOfWeek().getValue(); + return date.plusDays(releaseDayDelta); + } + + private static Version asVersion(LocalDate dateOfVersion, Version currentVersion) { + String calendarVersion = dateOfVersion.format(CALENDAR_VERSION_PATTERN); + return new Version(currentVersion.getMajor(), + currentVersion.getMinor(), + currentVersion.getMicro(), + calendarVersion); + } + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java index 0c146179f34..8155476f139 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java @@ -62,7 +62,7 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> { protected boolean expectUpgradeOf(Node node, SystemApplication application, ZoneApi zone) { return cloud.equals(zone.getCloudName()) && // Cloud is managed by this upgrader application.shouldUpgradeOs() && // Application should upgrade in this cloud - canUpgrade(node); // Node is in an upgradable state + canUpgrade(node); } @Override @@ -101,9 +101,9 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> { return !controller().zoneRegistry().systemZone().getVirtualId().equals(zone.getVirtualId()); // Do not spend budget on controller zone } - /** Returns whether node is in a state where it can be upgraded */ + /** Returns whether node currently allows upgrades */ public static boolean canUpgrade(Node node) { - return upgradableNodeStates.contains(node.state()); + return !node.deferOsUpgrade() && upgradableNodeStates.contains(node.state()); } private static String name(CloudName cloud) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java index 551f803f368..daba7e74f34 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java @@ -5,6 +5,7 @@ import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.jdisc.Metric; import com.yahoo.text.Text; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; @@ -46,26 +47,28 @@ public class VcmrMaintainer extends ControllerMaintainer { private static final Logger LOG = Logger.getLogger(VcmrMaintainer.class.getName()); private static final int DAYS_TO_RETIRE = 2; private static final Duration ALLOWED_POSTPONEMENT_TIME = Duration.ofDays(7); + protected static final String TRACKED_CMRS_METRIC = "cmr.tracked"; private final CuratorDb curator; private final NodeRepository nodeRepository; private final ChangeRequestClient changeRequestClient; private final SystemName system; + private final Metric metric; - public VcmrMaintainer(Controller controller, Duration interval) { + public VcmrMaintainer(Controller controller, Duration interval, Metric metric) { super(controller, interval, null, SystemName.allOf(Predicate.not(SystemName::isPublic))); this.curator = controller.curator(); this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); this.changeRequestClient = controller.serviceRegistry().changeRequestClient(); this.system = controller.system(); + this.metric = metric; } @Override protected double maintain() { var changeRequests = curator.readChangeRequests() .stream() - .filter(shouldUpdate()) - .collect(Collectors.toList()); + .filter(shouldUpdate()).toList(); var nodesByZone = nodesByZone(); @@ -86,6 +89,7 @@ public class VcmrMaintainer extends ControllerMaintainer { }); } }); + updateMetrics(); return 1.0; } @@ -357,4 +361,15 @@ public class VcmrMaintainer extends ControllerMaintainer { return time; } + private void updateMetrics() { + var cmrsByStatus = curator.readChangeRequests() + .stream() + .collect(Collectors.groupingBy(VespaChangeRequest::getStatus)); + + for (var status : Status.values()) { + var count = cmrsByStatus.getOrDefault(status, List.of()).size(); + metric.set(TRACKED_CMRS_METRIC, count, metric.createContext(Map.of("status", status.name()))); + } + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java index 9721026c628..ecb9db8195f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java @@ -49,7 +49,7 @@ public class BufferedLogStore { } /** Appends to the log of the given, active run, reassigning IDs as counted here, and converting to Vespa log levels. */ - public void append(ApplicationId id, JobType type, Step step, List<LogEntry> entries) { + public void append(ApplicationId id, JobType type, Step step, List<LogEntry> entries, boolean forceLog) { if (entries.isEmpty()) return; @@ -58,7 +58,7 @@ public class BufferedLogStore { long lastEntryId = buffer.readLastLogEntryId(id, type).orElse(-1L); long lastChunkId = buffer.getLogChunkIds(id, type).max().orElse(0); long numberOfChunks = Math.max(1, buffer.getLogChunkIds(id, type).count()); - if (numberOfChunks > maxLogSize / chunkSize) + if (numberOfChunks > maxLogSize / chunkSize && ! forceLog) return; // Max size exceeded — store no more. byte[] emptyChunk = "[]".getBytes(); @@ -72,8 +72,12 @@ public class BufferedLogStore { buffer.writeLastLogEntryId(id, type, lastEntryId); buffer.writeLog(id, type, lastChunkId, logSerializer.toJson(log)); lastChunkId = lastEntryId + 1; - if (++numberOfChunks > maxLogSize / chunkSize) { - log = Map.of(step, List.of(new LogEntry(++lastEntryId, entry.at(), LogEntry.Type.warning, "Max log size of " + (maxLogSize >> 20) + "Mb exceeded; further entries are discarded."))); + if (++numberOfChunks > maxLogSize / chunkSize && ! forceLog) { + log = Map.of(step, List.of(new LogEntry(++lastEntryId, + entry.at(), + LogEntry.Type.warning, + "Max log size of " + (maxLogSize >> 20) + + "Mb exceeded; further user entries are discarded."))); break; } log = new HashMap<>(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index cfb00db7b63..56eaf2f3a2e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -75,6 +75,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision; import com.yahoo.vespa.hosted.controller.api.integration.noderepository.RestartFilter; import com.yahoo.vespa.hosted.controller.api.integration.secrets.TenantSecretStore; +import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneRegistry; import com.yahoo.vespa.hosted.controller.api.role.Role; import com.yahoo.vespa.hosted.controller.api.role.RoleDefinition; import com.yahoo.vespa.hosted.controller.api.role.SecurityContext; @@ -1473,6 +1474,15 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { private HttpResponse trigger(ApplicationId id, JobType type, HttpRequest request) { + // JobType.fromJobName doesn't properly initiate test jobs. Triggering these without context isn't _really_ + // necessary, but triggering a test in the default cloud is better than failing with a weird error. + ZoneRegistry zones = controller.zoneRegistry(); + type = switch (type.environment()) { + case test -> JobType.systemTest(zones, zones.systemZone().getCloudName()); + case staging -> JobType.stagingTest(zones, zones.systemZone().getCloudName()); + default -> type; + }; + Inspector requestObject = toSlime(request.getData()).get(); boolean requireTests = ! requestObject.field("skipTests").asBool(); boolean reTrigger = requestObject.field("reTrigger").asBool(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index 9c016eccd27..25953c16bf0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -115,6 +115,7 @@ class JobControllerApiHandlerHelper { Run run = jobController.run(runId); detailsObject.setBool("active", ! run.hasEnded()); detailsObject.setString("status", nameOf(run.status())); + run.reason().ifPresent(reason -> detailsObject.setString("reason", reason)); try { jobController.updateTestLog(runId); jobController.updateVespaLog(runId); @@ -421,6 +422,7 @@ class JobControllerApiHandlerHelper { runObject.setLong("start", run.start().toEpochMilli()); run.end().ifPresent(end -> runObject.setLong("end", end.toEpochMilli())); runObject.setString("status", run.status().name()); + run.reason().ifPresent(reason -> runObject.setString("reason", reason)); toSlime(runObject.setObject("versions"), run.versions(), application); Cursor runStepsArray = runObject.setArray("steps"); run.steps().forEach((step, info) -> { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 673cbf9708b..08fc6df37fb 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -2153,6 +2153,7 @@ public class DeploymentTriggerTest { Version version2 = new Version("8"); tester.controllerTester().flagSource().withListFlag(PermanentFlags.INCOMPATIBLE_VERSIONS.id(), List.of("8"), String.class); + // App deploys on version1. tester.controllerTester().upgradeSystem(version1); DeploymentContext app = tester.newDeploymentContext() .submit(new ApplicationPackageBuilder().region("us-east-3") @@ -2160,10 +2161,12 @@ public class DeploymentTriggerTest { .build()) .deploy(); + // System upgrades to version2, but the app is not upgraded. tester.controllerTester().upgradeSystem(version2); tester.upgrader().run(); assertEquals(Change.empty(), app.instance().change()); + // App compiles against version2, and upgrades. app.submit(new ApplicationPackageBuilder().region("us-east-3") .compileVersion(version2) .build()); @@ -2171,6 +2174,18 @@ public class DeploymentTriggerTest { assertEquals(version2, tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetPlatform()); assertEquals(version2, app.application().revisions().get(tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetRevision()).compileVersion().get()); + + // App specifies version1 in deployment spec, compiles against version1, pins to version1, and then downgrades. + app.submit(new ApplicationPackageBuilder().region("us-east-3") + .majorVersion(7) + .compileVersion(version1) + .build()); + tester.deploymentTrigger().forceChange(app.instanceId(), app.instance().change().withPin()); + app.deploy(); + assertEquals(version1, tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetPlatform()); + assertEquals(version1, app.application().revisions().get(tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetRevision()).compileVersion().get()); + + // A new app, compiled against version1, is deployed on version1. DeploymentContext newApp = tester.newDeploymentContext("new", "app", "default") .submit(new ApplicationPackageBuilder().region("us-east-3") .compileVersion(version1) diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java index 300aa86b5ea..478bb943eba 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java @@ -13,7 +13,10 @@ import org.junit.Test; import java.time.Duration; import java.time.Instant; +import java.time.LocalDate; +import java.time.ZoneOffset; import java.util.List; +import java.util.Map; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -27,7 +30,7 @@ public class OsUpgradeSchedulerTest { public void schedule_calendar_versioned_release() { ControllerTester tester = new ControllerTester(); OsUpgradeScheduler scheduler = new OsUpgradeScheduler(tester.controller(), Duration.ofDays(1)); - Instant t0 = Instant.parse("2021-01-23T00:00:00.00Z"); // Outside trigger period + Instant t0 = Instant.parse("2022-01-16T00:00:00.00Z"); // Outside trigger period tester.clock().setInstant(t0); CloudName cloud = CloudName.from("cloud"); @@ -38,8 +41,8 @@ public class OsUpgradeSchedulerTest { scheduler.maintain(); assertTrue("No target set", tester.controller().osVersionTarget(cloud).isEmpty()); - // Target is set - Version version0 = Version.fromString("7.0.0.20210123190005"); + // Target is set manually + Version version0 = Version.fromString("7.0.0.20220101"); tester.controller().upgradeOsIn(cloud, version0, Duration.ofDays(1), false); // Target remains unchanged as it hasn't expired yet @@ -49,8 +52,8 @@ public class OsUpgradeSchedulerTest { assertEquals(version0, tester.controller().osVersionTarget(cloud).get().osVersion().version()); } - // Just over 45 days pass, and a new target replaces the expired one - Version version1 = Version.fromString("7.0.0.20210302"); + // Enough days pass that the next release is triggered + Version version1 = Version.fromString("7.0.0.20220214"); tester.clock().advance(Duration.ofDays(15).plus(Duration.ofSeconds(1))); scheduler.maintain(); assertEquals("Target is unchanged because we're outside trigger period", version0, @@ -60,7 +63,7 @@ public class OsUpgradeSchedulerTest { assertEquals("New target set", version1, tester.controller().osVersionTarget(cloud).get().osVersion().version()); - // A few days pass and target remains unchanged + // A few more days pass and target remains unchanged tester.clock().advance(Duration.ofDays(2)); scheduler.maintain(); assertEquals(version1, tester.controller().osVersionTarget(cloud).get().osVersion().version()); @@ -112,6 +115,24 @@ public class OsUpgradeSchedulerTest { scheduleUpgradeAfter(Duration.ofDays(1), version1, tester); } + @Test + public void schedule_of_calender_versioned_releases() { + Map<String, String> tests = Map.of("2022-01-01", "2021-12-27", + "2022-02-14", "2021-12-27", + "2022-02-15", "2022-02-14", + "2022-03-31", "2022-02-14", + "2022-04-01", "2022-03-28", + "2022-05-15", "2022-03-28", + "2022-05-16", "2022-05-16", + "2022-06-29", "2022-05-16", + "2022-06-30", "2022-06-27"); + tests.forEach((now, expected) -> { + Instant instant = LocalDate.parse(now).atStartOfDay().toInstant(ZoneOffset.UTC); + LocalDate dateOfWantedVersion = OsUpgradeScheduler.CalendarVersionedRelease.dateOfWantedVersion(instant); + assertEquals("scheduled wanted version at " + now, LocalDate.parse(expected), dateOfWantedVersion); + }); + } + private void scheduleUpgradeAfter(Duration duration, Version version, ControllerTester tester) { tester.clock().advance(duration); new OsUpgradeScheduler(tester.controller(), Duration.ofDays(1)).maintain(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java index 8adf55155a5..3a5b4a90baa 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java @@ -19,7 +19,9 @@ import java.time.Duration; import java.util.Collection; import java.util.List; import java.util.function.Function; +import java.util.function.UnaryOperator; import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -52,18 +54,19 @@ public class OsUpgraderTest { OsUpgrader osUpgrader = osUpgrader(upgradePolicy, cloud1, false); // Bootstrap system - List<ZoneId> nonControllerZones = List.of(zone1, zone2, zone3, zone4, zone5).stream() - .map(ZoneApi::getVirtualId) - .collect(Collectors.toList()); + List<ZoneId> nonControllerZones = Stream.of(zone1, zone2, zone3, zone4, zone5) + .map(ZoneApi::getVirtualId) + .collect(Collectors.toList()); tester.configServer().bootstrap(nonControllerZones, List.of(SystemApplication.tenantHost)); tester.configServer().addNodes(List.of(zone0.getVirtualId()), List.of(SystemApplication.controllerHost)); // Add system application that exists in a real system, but isn't eligible for OS upgrades tester.configServer().addNodes(nonControllerZones, List.of(SystemApplication.configServer)); - // Fail a few nodes. Failed nodes should not affect versions + // Change state of a few nodes. These should not affect convergence failNodeIn(zone1, SystemApplication.tenantHost); failNodeIn(zone3, SystemApplication.tenantHost); + Node nodeDeferringOsUpgrade = deferOsUpgradeIn(zone2, SystemApplication.tenantHost); // New OS version released Version version1 = Version.fromString("7.1"); @@ -91,7 +94,7 @@ public class OsUpgraderTest { completeUpgrade(version1, SystemApplication.tenantHost, zone1); statusUpdater.maintain(); assertEquals(5, nodesOn(version1).size()); - assertEquals(11, nodesOn(Version.emptyVersion).size()); + assertEquals(10, nodesOn(Version.emptyVersion).size()); // zone 2 and 3: begins upgrading osUpgrader.maintain(); @@ -102,6 +105,10 @@ public class OsUpgraderTest { // zone 2 and 3: completes upgrade completeUpgrade(version1, SystemApplication.tenantHost, zone2, zone3); + assertEquals("Current version is unchanged for node deferring OS upgrade", Version.emptyVersion, + nodeRepository().list(zone2.getVirtualId(), NodeFilter.all().hostnames(nodeDeferringOsUpgrade.hostname())) + .get(0) + .currentOsVersion()); // zone 4: begins upgrading osUpgrader.maintain(); @@ -271,13 +278,23 @@ public class OsUpgraderTest { .collect(Collectors.toList()); } - private void failNodeIn(ZoneApi zone, SystemApplication application) { + private Node failNodeIn(ZoneApi zone, SystemApplication application) { + return patchOneNodeIn(zone, application, (node) -> Node.builder(node).state(Node.State.failed).build()); + } + + private Node deferOsUpgradeIn(ZoneApi zone, SystemApplication application) { + return patchOneNodeIn(zone, application, (node) -> Node.builder(node).deferOsUpgrade(true).build()); + } + + private Node patchOneNodeIn(ZoneApi zone, SystemApplication application, UnaryOperator<Node> patcher) { List<Node> nodes = nodeRepository().list(zone.getVirtualId(), NodeFilter.all().applications(application.id())); if (nodes.isEmpty()) { throw new IllegalArgumentException("No nodes allocated to " + application.id()); } Node node = nodes.get(0); - nodeRepository().putNodes(zone.getVirtualId(), Node.builder(node).state(Node.State.failed).build()); + Node newNode = patcher.apply(node); + nodeRepository().putNodes(zone.getVirtualId(), newNode); + return newNode; } /** Simulate OS upgrade of nodes allocated to application. In a real system this is done by the node itself */ diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java index bfbd3836ce7..321ec3ad8ea 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java @@ -14,6 +14,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction.State; import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VcmrReport; import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest; import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest.Status; +import com.yahoo.vespa.hosted.controller.integration.MetricsMock; import com.yahoo.vespa.hosted.controller.integration.NodeRepositoryMock; import org.junit.Before; import org.junit.Test; @@ -25,6 +26,7 @@ import java.time.ZonedDateTime; import java.time.temporal.TemporalAdjusters; import java.util.List; +import static com.yahoo.vespa.hosted.controller.maintenance.VcmrMaintainer.TRACKED_CMRS_METRIC; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -37,6 +39,7 @@ public class VcmrMaintainerTest { private ControllerTester tester; private VcmrMaintainer maintainer; private NodeRepositoryMock nodeRepo; + private MetricsMock metrics; private final ZoneId zoneId = ZoneId.from("prod.us-east-3"); private final ZoneId zone2 = ZoneId.from("prod.us-west-1"); private final HostName host1 = HostName.of("host1"); @@ -47,7 +50,8 @@ public class VcmrMaintainerTest { @Before public void setup() { tester = new ControllerTester(); - maintainer = new VcmrMaintainer(tester.controller(), Duration.ofMinutes(1)); + metrics = new MetricsMock(); + maintainer = new VcmrMaintainer(tester.controller(), Duration.ofMinutes(1), metrics); nodeRepo = tester.serviceRegistry().configServer().nodeRepository().allowPatching(true); } @@ -244,6 +248,8 @@ public class VcmrMaintainerTest { assertEquals(State.OUT_OF_SYNC, action.getState()); assertEquals(Status.OUT_OF_SYNC, writtenChangeRequest.getStatus()); + assertEquals(1, metrics.getMetric(context -> "OUT_OF_SYNC".equals(context.get("status")), TRACKED_CMRS_METRIC).get()); + assertEquals(0, metrics.getMetric(context -> "REQUIRES_OPERATOR_ACTION".equals(context.get("status")), TRACKED_CMRS_METRIC).get()); } @Test diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStoreTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStoreTest.java index 0f7f97d333a..87280c0c1a3 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStoreTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStoreTest.java @@ -50,19 +50,19 @@ public class BufferedLogStoreTest { assertEquals(Optional.empty(), logs.readFinished(id, -1)); assertEquals(RunLog.empty(), logs.readActive(id.application(), id.type(), -1)); - logs.append(id.application(), id.type(), Step.deployReal, List.of(entry)); + logs.append(id.application(), id.type(), Step.deployReal, List.of(entry), false); assertEquals(List.of(entry0), logs.readActive(id.application(), id.type(), -1).get(Step.deployReal)); assertEquals(RunLog.empty(), logs.readActive(id.application(), id.type(), 0)); - logs.append(id.application(), id.type(), Step.deployReal, List.of(entry)); + logs.append(id.application(), id.type(), Step.deployReal, List.of(entry), false); assertEquals(List.of(entry0, entry1), logs.readActive(id.application(), id.type(), -1).get(Step.deployReal)); assertEquals(List.of(entry1), logs.readActive(id.application(), id.type(), 0).get(Step.deployReal)); assertEquals(RunLog.empty(), logs.readActive(id.application(), id.type(), 1)); - logs.append(id.application(), id.type(), Step.deployReal, List.of(entry, entry, entry)); + logs.append(id.application(), id.type(), Step.deployReal, List.of(entry, entry, entry), false); assertEquals(List.of(entry0, entry1, entry2, entry3, entry4), logs.readActive(id.application(), id.type(), -1).get(Step.deployReal)); assertEquals(List.of(entry1, entry2, entry3, entry4), @@ -105,17 +105,28 @@ public class BufferedLogStoreTest { logged.remove(logged.size() - 1); logged.remove(logged.size() - 1); logged.remove(logged.size() - 1); - logged.add(new LogEntry(2 * maxChunks, entry.at(), LogEntry.Type.warning, "Max log size of " + ((chunkSize * maxChunks) >> 20) + "Mb exceeded; further entries are discarded.")); + logged.add(new LogEntry(2 * maxChunks, entry.at(), LogEntry.Type.warning, "Max log size of " + ((chunkSize * maxChunks) >> 20) + "Mb exceeded; further user entries are discarded.")); - logs.append(id.application(), id.type(), Step.deployReal, monsterLog); + logs.append(id.application(), id.type(), Step.deployReal, monsterLog, false); assertEquals(logged.size(), logs.readActive(id.application(), id.type(), -1).get(Step.deployReal).size()); assertEquals(logged, logs.readActive(id.application(), id.type(), -1).get(Step.deployReal)); + // An additional, forced entry is appended. + LogEntry forced = new LogEntry(logged.size(), entry.at(), entry.type(), entry.message()); + logs.append(id.application(), id.type(), Step.deployReal, List.of(forced), true); + logged.add(forced); + assertEquals(logged.size(), + logs.readActive(id.application(), id.type(), -1).get(Step.deployReal).size()); + assertEquals(logged, + logs.readActive(id.application(), id.type(), -1).get(Step.deployReal)); + logged.remove(logged.size() - 1); + + // Flushing the buffer clears it again, and makes it ready for reuse. logs.flush(id); for (int i = 0; i < 2 * maxChunks + 3; i++) - logs.append(id.application(), id.type(), Step.deployReal, List.of(entry)); + logs.append(id.application(), id.type(), Step.deployReal, List.of(entry), false); assertEquals(logged.size(), logs.readActive(id.application(), id.type(), -1).get(Step.deployReal).size()); assertEquals(logged, diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json index 696f1ef0ba3..38e9d8c823e 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json @@ -428,6 +428,7 @@ "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-west-1/run/1", "start": 1600000000000, "status": "running", + "reason": "triggered by user.myuser", "versions": { "targetPlatform": "6.1.0", "targetApplication": { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobs.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobs.json index 12430b67539..2477e8df56e 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobs.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobs.json @@ -9,6 +9,7 @@ "start": 1600000000000, "end": 1600000000000, "status": "success", + "reason": "triggered by user.myuser", "versions": { "targetPlatform": "6.1.0", "targetApplication": { |