summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java20
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java78
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java21
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java12
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java15
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java33
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java31
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java8
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStoreTest.java23
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json1
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobs.json1
16 files changed, 195 insertions, 74 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 7ceeda08d3a..d83f552ab25 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -228,10 +228,9 @@ public class DeploymentTrigger {
Instance instance = application.require(applicationId.instance());
JobId job = new JobId(instance.id(), jobType);
JobStatus jobStatus = jobs.jobStatus(new JobId(applicationId, jobType));
- Versions versions = jobStatus.lastTriggered()
- .orElseThrow(() -> new IllegalArgumentException(job + " has never been triggered"))
- .versions();
- trigger(deploymentJob(instance, versions, jobType, jobStatus, clock.instant()), reason);
+ Run last = jobStatus.lastTriggered()
+ .orElseThrow(() -> new IllegalArgumentException(job + " has never been triggered"));
+ trigger(deploymentJob(instance, last.versions(), last.id().type(), jobStatus.isNodeAllocationFailure(), clock.instant()), reason);
return job;
}
@@ -259,7 +258,12 @@ public class DeploymentTrigger {
.collect(toMap(Map.Entry::getKey, Map.Entry::getValue));
jobs.forEach((jobId, versionsList) -> {
- trigger(deploymentJob(application.require(job.application().instance()), versionsList.get(0).versions(), jobId.type(), status.jobs().get(jobId).get(), clock.instant()), reason);
+ trigger(deploymentJob(application.require(job.application().instance()),
+ versionsList.get(0).versions(),
+ jobId.type(),
+ status.jobs().get(jobId).get().isNodeAllocationFailure(),
+ clock.instant()),
+ reason);
});
return List.copyOf(jobs.keySet());
}
@@ -388,7 +392,7 @@ public class DeploymentTrigger {
jobs.add(deploymentJob(status.application().require(jobId.application().instance()),
job.versions(),
job.type(),
- status.instanceJobs(jobId.application().instance()).get(jobId.type()),
+ status.instanceJobs(jobId.application().instance()).get(jobId.type()).isNodeAllocationFailure(),
job.readyAt().get()));
});
return Collections.unmodifiableList(jobs);
@@ -475,8 +479,8 @@ public class DeploymentTrigger {
// ---------- Version and job helpers ----------
- private Job deploymentJob(Instance instance, Versions versions, JobType jobType, JobStatus jobStatus, Instant availableSince) {
- return new Job(instance, versions, jobType, availableSince, jobStatus.isNodeAllocationFailure(), instance.change().revision().isPresent());
+ private Job deploymentJob(Instance instance, Versions versions, JobType jobType, boolean isNodeAllocationFailure, Instant availableSince) {
+ return new Job(instance, versions, jobType, availableSince, isNodeAllocationFailure, instance.change().revision().isPresent());
}
// ---------- Data containers ----------
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
index 19b2afb3af9..881107fa0f9 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
@@ -158,7 +158,7 @@ public class JobController {
/** Stores the given log entries for the given run and step. */
public void log(RunId id, Step step, List<LogEntry> entries) {
locked(id, __ -> {
- logs.append(id.application(), id.type(), step, entries);
+ logs.append(id.application(), id.type(), step, entries, true);
return __;
});
}
@@ -211,7 +211,7 @@ public class JobController {
if (log.isEmpty())
return run;
- logs.append(id.application(), id.type(), Step.copyVespaLogs, log);
+ logs.append(id.application(), id.type(), Step.copyVespaLogs, log, false);
return run.with(log.get(log.size() - 1).at());
});
}
@@ -230,7 +230,7 @@ public class JobController {
if (entries.isEmpty())
return run;
- logs.append(id.application(), id.type(), step.get(), entries);
+ logs.append(id.application(), id.type(), step.get(), entries, false);
return run.with(entries.stream().mapToLong(LogEntry::id).max().getAsLong());
});
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
index 4aeecdcd4ff..9793cded918 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
@@ -70,7 +70,7 @@ public class ControllerMaintenance extends AbstractComponent {
maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer));
maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer));
maintainers.add(new ChangeRequestMaintainer(controller, intervals.changeRequestMaintainer));
- maintainers.add(new VcmrMaintainer(controller, intervals.vcmrMaintainer));
+ maintainers.add(new VcmrMaintainer(controller, intervals.vcmrMaintainer, metric));
maintainers.add(new CloudTrialExpirer(controller, intervals.defaultInterval));
maintainers.add(new RetriggerMaintainer(controller, intervals.retriggerMaintainer));
maintainers.add(new UserManagementMaintainer(controller, intervals.userManagementMaintainer, controller.serviceRegistry().roleMaintainer()));
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
index 3bd1c7bb358..5e4d6d71ff6 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
@@ -9,8 +9,10 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.ArtifactRepo
import com.yahoo.vespa.hosted.controller.api.integration.deployment.OsRelease;
import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
+import java.time.DayOfWeek;
import java.time.Duration;
import java.time.Instant;
+import java.time.LocalDate;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Objects;
@@ -84,14 +86,11 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
/** OS release based on a tag */
- private static class TaggedRelease implements Release {
+ private record TaggedRelease(SystemName system, ArtifactRepository artifactRepository) implements Release {
- private final SystemName system;
- private final ArtifactRepository artifactRepository;
-
- private TaggedRelease(SystemName system, ArtifactRepository artifactRepository) {
- this.system = Objects.requireNonNull(system);
- this.artifactRepository = Objects.requireNonNull(artifactRepository);
+ public TaggedRelease {
+ Objects.requireNonNull(system);
+ Objects.requireNonNull(artifactRepository);
}
@Override
@@ -119,41 +118,30 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
/** OS release based on calendar-versioning */
- private static class CalendarVersionedRelease implements Release {
+ record CalendarVersionedRelease(SystemName system) implements Release {
- /** The time to wait before scheduling upgrade to next version */
- private static final Duration SCHEDULING_INTERVAL = Duration.ofDays(45);
+ /** A fixed point in time which the release schedule is calculated from */
+ private static final Instant START_OF_SCHEDULE = LocalDate.of(2022, 1, 1)
+ .atStartOfDay()
+ .toInstant(ZoneOffset.UTC);
- /**
- * The interval at which new versions become available. We use this to avoid scheduling upgrades to a version
- * that has not been released yet. Example: Version N is the latest one and target is set to N+1. If N+1 does
- * not exist the zone will not converge until N+1 has been released and we may end up triggering multiple
- * rounds of upgrades.
- */
- private static final Duration AVAILABILITY_INTERVAL = Duration.ofDays(7);
+ /** The time that should elapse between versions */
+ private static final Duration SCHEDULING_STEP = Duration.ofDays(45);
- private static final DateTimeFormatter CALENDAR_VERSION_PATTERN = DateTimeFormatter.ofPattern("yyyyMMdd");
+ /** The day of week new releases are published */
+ private static final DayOfWeek RELEASE_DAY = DayOfWeek.MONDAY;
- private final SystemName system;
+ private static final DateTimeFormatter CALENDAR_VERSION_PATTERN = DateTimeFormatter.ofPattern("yyyyMMdd");
- public CalendarVersionedRelease(SystemName system) {
- this.system = Objects.requireNonNull(system);
+ public CalendarVersionedRelease {
+ Objects.requireNonNull(system);
}
@Override
public Version version(OsVersionTarget currentTarget, Instant now) {
- Instant scheduledAt = currentTarget.scheduledAt();
Version currentVersion = currentTarget.osVersion().version();
- if (scheduledAt.isBefore(now.minus(SCHEDULING_INTERVAL))) {
- String calendarVersion = now.minus(AVAILABILITY_INTERVAL)
- .atZone(ZoneOffset.UTC)
- .format(CALENDAR_VERSION_PATTERN);
- return new Version(currentVersion.getMajor(),
- currentVersion.getMinor(),
- currentVersion.getMicro(),
- calendarVersion);
- }
- return currentVersion; // New version should not be scheduled yet
+ Version wantedVersion = asVersion(dateOfWantedVersion(now), currentVersion);
+ return wantedVersion.isAfter(currentVersion) ? wantedVersion : currentVersion;
}
@Override
@@ -161,6 +149,32 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
return system.isCd() ? Duration.ZERO : Duration.ofDays(14);
}
+ /**
+ * Calculate the date of the wanted version relative to now. A given zone will choose the oldest release
+ * available which is not older than this date.
+ */
+ static LocalDate dateOfWantedVersion(Instant now) {
+ Instant candidate = START_OF_SCHEDULE;
+ while (!candidate.plus(SCHEDULING_STEP).isAfter(now)) {
+ candidate = candidate.plus(SCHEDULING_STEP);
+ }
+ LocalDate date = LocalDate.ofInstant(candidate, ZoneOffset.UTC);
+ return releaseDayOf(date);
+ }
+
+ private static LocalDate releaseDayOf(LocalDate date) {
+ int releaseDayDelta = RELEASE_DAY.getValue() - date.getDayOfWeek().getValue();
+ return date.plusDays(releaseDayDelta);
+ }
+
+ private static Version asVersion(LocalDate dateOfVersion, Version currentVersion) {
+ String calendarVersion = dateOfVersion.format(CALENDAR_VERSION_PATTERN);
+ return new Version(currentVersion.getMajor(),
+ currentVersion.getMinor(),
+ currentVersion.getMicro(),
+ calendarVersion);
+ }
+
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
index 0c146179f34..8155476f139 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
@@ -62,7 +62,7 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> {
protected boolean expectUpgradeOf(Node node, SystemApplication application, ZoneApi zone) {
return cloud.equals(zone.getCloudName()) && // Cloud is managed by this upgrader
application.shouldUpgradeOs() && // Application should upgrade in this cloud
- canUpgrade(node); // Node is in an upgradable state
+ canUpgrade(node);
}
@Override
@@ -101,9 +101,9 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> {
return !controller().zoneRegistry().systemZone().getVirtualId().equals(zone.getVirtualId()); // Do not spend budget on controller zone
}
- /** Returns whether node is in a state where it can be upgraded */
+ /** Returns whether node currently allows upgrades */
public static boolean canUpgrade(Node node) {
- return upgradableNodeStates.contains(node.state());
+ return !node.deferOsUpgrade() && upgradableNodeStates.contains(node.state());
}
private static String name(CloudName cloud) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java
index 551f803f368..daba7e74f34 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java
@@ -5,6 +5,7 @@ import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.zone.ZoneId;
+import com.yahoo.jdisc.Metric;
import com.yahoo.text.Text;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
@@ -46,26 +47,28 @@ public class VcmrMaintainer extends ControllerMaintainer {
private static final Logger LOG = Logger.getLogger(VcmrMaintainer.class.getName());
private static final int DAYS_TO_RETIRE = 2;
private static final Duration ALLOWED_POSTPONEMENT_TIME = Duration.ofDays(7);
+ protected static final String TRACKED_CMRS_METRIC = "cmr.tracked";
private final CuratorDb curator;
private final NodeRepository nodeRepository;
private final ChangeRequestClient changeRequestClient;
private final SystemName system;
+ private final Metric metric;
- public VcmrMaintainer(Controller controller, Duration interval) {
+ public VcmrMaintainer(Controller controller, Duration interval, Metric metric) {
super(controller, interval, null, SystemName.allOf(Predicate.not(SystemName::isPublic)));
this.curator = controller.curator();
this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository();
this.changeRequestClient = controller.serviceRegistry().changeRequestClient();
this.system = controller.system();
+ this.metric = metric;
}
@Override
protected double maintain() {
var changeRequests = curator.readChangeRequests()
.stream()
- .filter(shouldUpdate())
- .collect(Collectors.toList());
+ .filter(shouldUpdate()).toList();
var nodesByZone = nodesByZone();
@@ -86,6 +89,7 @@ public class VcmrMaintainer extends ControllerMaintainer {
});
}
});
+ updateMetrics();
return 1.0;
}
@@ -357,4 +361,15 @@ public class VcmrMaintainer extends ControllerMaintainer {
return time;
}
+ private void updateMetrics() {
+ var cmrsByStatus = curator.readChangeRequests()
+ .stream()
+ .collect(Collectors.groupingBy(VespaChangeRequest::getStatus));
+
+ for (var status : Status.values()) {
+ var count = cmrsByStatus.getOrDefault(status, List.of()).size();
+ metric.set(TRACKED_CMRS_METRIC, count, metric.createContext(Map.of("status", status.name())));
+ }
+ }
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java
index 9721026c628..ecb9db8195f 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java
@@ -49,7 +49,7 @@ public class BufferedLogStore {
}
/** Appends to the log of the given, active run, reassigning IDs as counted here, and converting to Vespa log levels. */
- public void append(ApplicationId id, JobType type, Step step, List<LogEntry> entries) {
+ public void append(ApplicationId id, JobType type, Step step, List<LogEntry> entries, boolean forceLog) {
if (entries.isEmpty())
return;
@@ -58,7 +58,7 @@ public class BufferedLogStore {
long lastEntryId = buffer.readLastLogEntryId(id, type).orElse(-1L);
long lastChunkId = buffer.getLogChunkIds(id, type).max().orElse(0);
long numberOfChunks = Math.max(1, buffer.getLogChunkIds(id, type).count());
- if (numberOfChunks > maxLogSize / chunkSize)
+ if (numberOfChunks > maxLogSize / chunkSize && ! forceLog)
return; // Max size exceeded — store no more.
byte[] emptyChunk = "[]".getBytes();
@@ -72,8 +72,12 @@ public class BufferedLogStore {
buffer.writeLastLogEntryId(id, type, lastEntryId);
buffer.writeLog(id, type, lastChunkId, logSerializer.toJson(log));
lastChunkId = lastEntryId + 1;
- if (++numberOfChunks > maxLogSize / chunkSize) {
- log = Map.of(step, List.of(new LogEntry(++lastEntryId, entry.at(), LogEntry.Type.warning, "Max log size of " + (maxLogSize >> 20) + "Mb exceeded; further entries are discarded.")));
+ if (++numberOfChunks > maxLogSize / chunkSize && ! forceLog) {
+ log = Map.of(step, List.of(new LogEntry(++lastEntryId,
+ entry.at(),
+ LogEntry.Type.warning,
+ "Max log size of " + (maxLogSize >> 20) +
+ "Mb exceeded; further user entries are discarded.")));
break;
}
log = new HashMap<>();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
index cfb00db7b63..56eaf2f3a2e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
@@ -75,6 +75,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision;
import com.yahoo.vespa.hosted.controller.api.integration.noderepository.RestartFilter;
import com.yahoo.vespa.hosted.controller.api.integration.secrets.TenantSecretStore;
+import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneRegistry;
import com.yahoo.vespa.hosted.controller.api.role.Role;
import com.yahoo.vespa.hosted.controller.api.role.RoleDefinition;
import com.yahoo.vespa.hosted.controller.api.role.SecurityContext;
@@ -1473,6 +1474,15 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler {
private HttpResponse trigger(ApplicationId id, JobType type, HttpRequest request) {
+ // JobType.fromJobName doesn't properly initiate test jobs. Triggering these without context isn't _really_
+ // necessary, but triggering a test in the default cloud is better than failing with a weird error.
+ ZoneRegistry zones = controller.zoneRegistry();
+ type = switch (type.environment()) {
+ case test -> JobType.systemTest(zones, zones.systemZone().getCloudName());
+ case staging -> JobType.stagingTest(zones, zones.systemZone().getCloudName());
+ default -> type;
+ };
+
Inspector requestObject = toSlime(request.getData()).get();
boolean requireTests = ! requestObject.field("skipTests").asBool();
boolean reTrigger = requestObject.field("reTrigger").asBool();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
index 9c016eccd27..25953c16bf0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
@@ -115,6 +115,7 @@ class JobControllerApiHandlerHelper {
Run run = jobController.run(runId);
detailsObject.setBool("active", ! run.hasEnded());
detailsObject.setString("status", nameOf(run.status()));
+ run.reason().ifPresent(reason -> detailsObject.setString("reason", reason));
try {
jobController.updateTestLog(runId);
jobController.updateVespaLog(runId);
@@ -421,6 +422,7 @@ class JobControllerApiHandlerHelper {
runObject.setLong("start", run.start().toEpochMilli());
run.end().ifPresent(end -> runObject.setLong("end", end.toEpochMilli()));
runObject.setString("status", run.status().name());
+ run.reason().ifPresent(reason -> runObject.setString("reason", reason));
toSlime(runObject.setObject("versions"), run.versions(), application);
Cursor runStepsArray = runObject.setArray("steps");
run.steps().forEach((step, info) -> {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 673cbf9708b..08fc6df37fb 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -2153,6 +2153,7 @@ public class DeploymentTriggerTest {
Version version2 = new Version("8");
tester.controllerTester().flagSource().withListFlag(PermanentFlags.INCOMPATIBLE_VERSIONS.id(), List.of("8"), String.class);
+ // App deploys on version1.
tester.controllerTester().upgradeSystem(version1);
DeploymentContext app = tester.newDeploymentContext()
.submit(new ApplicationPackageBuilder().region("us-east-3")
@@ -2160,10 +2161,12 @@ public class DeploymentTriggerTest {
.build())
.deploy();
+ // System upgrades to version2, but the app is not upgraded.
tester.controllerTester().upgradeSystem(version2);
tester.upgrader().run();
assertEquals(Change.empty(), app.instance().change());
+ // App compiles against version2, and upgrades.
app.submit(new ApplicationPackageBuilder().region("us-east-3")
.compileVersion(version2)
.build());
@@ -2171,6 +2174,18 @@ public class DeploymentTriggerTest {
assertEquals(version2, tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetPlatform());
assertEquals(version2, app.application().revisions().get(tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetRevision()).compileVersion().get());
+
+ // App specifies version1 in deployment spec, compiles against version1, pins to version1, and then downgrades.
+ app.submit(new ApplicationPackageBuilder().region("us-east-3")
+ .majorVersion(7)
+ .compileVersion(version1)
+ .build());
+ tester.deploymentTrigger().forceChange(app.instanceId(), app.instance().change().withPin());
+ app.deploy();
+ assertEquals(version1, tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetPlatform());
+ assertEquals(version1, app.application().revisions().get(tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetRevision()).compileVersion().get());
+
+ // A new app, compiled against version1, is deployed on version1.
DeploymentContext newApp = tester.newDeploymentContext("new", "app", "default")
.submit(new ApplicationPackageBuilder().region("us-east-3")
.compileVersion(version1)
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
index 300aa86b5ea..478bb943eba 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
@@ -13,7 +13,10 @@ import org.junit.Test;
import java.time.Duration;
import java.time.Instant;
+import java.time.LocalDate;
+import java.time.ZoneOffset;
import java.util.List;
+import java.util.Map;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -27,7 +30,7 @@ public class OsUpgradeSchedulerTest {
public void schedule_calendar_versioned_release() {
ControllerTester tester = new ControllerTester();
OsUpgradeScheduler scheduler = new OsUpgradeScheduler(tester.controller(), Duration.ofDays(1));
- Instant t0 = Instant.parse("2021-01-23T00:00:00.00Z"); // Outside trigger period
+ Instant t0 = Instant.parse("2022-01-16T00:00:00.00Z"); // Outside trigger period
tester.clock().setInstant(t0);
CloudName cloud = CloudName.from("cloud");
@@ -38,8 +41,8 @@ public class OsUpgradeSchedulerTest {
scheduler.maintain();
assertTrue("No target set", tester.controller().osVersionTarget(cloud).isEmpty());
- // Target is set
- Version version0 = Version.fromString("7.0.0.20210123190005");
+ // Target is set manually
+ Version version0 = Version.fromString("7.0.0.20220101");
tester.controller().upgradeOsIn(cloud, version0, Duration.ofDays(1), false);
// Target remains unchanged as it hasn't expired yet
@@ -49,8 +52,8 @@ public class OsUpgradeSchedulerTest {
assertEquals(version0, tester.controller().osVersionTarget(cloud).get().osVersion().version());
}
- // Just over 45 days pass, and a new target replaces the expired one
- Version version1 = Version.fromString("7.0.0.20210302");
+ // Enough days pass that the next release is triggered
+ Version version1 = Version.fromString("7.0.0.20220214");
tester.clock().advance(Duration.ofDays(15).plus(Duration.ofSeconds(1)));
scheduler.maintain();
assertEquals("Target is unchanged because we're outside trigger period", version0,
@@ -60,7 +63,7 @@ public class OsUpgradeSchedulerTest {
assertEquals("New target set", version1,
tester.controller().osVersionTarget(cloud).get().osVersion().version());
- // A few days pass and target remains unchanged
+ // A few more days pass and target remains unchanged
tester.clock().advance(Duration.ofDays(2));
scheduler.maintain();
assertEquals(version1, tester.controller().osVersionTarget(cloud).get().osVersion().version());
@@ -112,6 +115,24 @@ public class OsUpgradeSchedulerTest {
scheduleUpgradeAfter(Duration.ofDays(1), version1, tester);
}
+ @Test
+ public void schedule_of_calender_versioned_releases() {
+ Map<String, String> tests = Map.of("2022-01-01", "2021-12-27",
+ "2022-02-14", "2021-12-27",
+ "2022-02-15", "2022-02-14",
+ "2022-03-31", "2022-02-14",
+ "2022-04-01", "2022-03-28",
+ "2022-05-15", "2022-03-28",
+ "2022-05-16", "2022-05-16",
+ "2022-06-29", "2022-05-16",
+ "2022-06-30", "2022-06-27");
+ tests.forEach((now, expected) -> {
+ Instant instant = LocalDate.parse(now).atStartOfDay().toInstant(ZoneOffset.UTC);
+ LocalDate dateOfWantedVersion = OsUpgradeScheduler.CalendarVersionedRelease.dateOfWantedVersion(instant);
+ assertEquals("scheduled wanted version at " + now, LocalDate.parse(expected), dateOfWantedVersion);
+ });
+ }
+
private void scheduleUpgradeAfter(Duration duration, Version version, ControllerTester tester) {
tester.clock().advance(duration);
new OsUpgradeScheduler(tester.controller(), Duration.ofDays(1)).maintain();
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java
index 8adf55155a5..3a5b4a90baa 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java
@@ -19,7 +19,9 @@ import java.time.Duration;
import java.util.Collection;
import java.util.List;
import java.util.function.Function;
+import java.util.function.UnaryOperator;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -52,18 +54,19 @@ public class OsUpgraderTest {
OsUpgrader osUpgrader = osUpgrader(upgradePolicy, cloud1, false);
// Bootstrap system
- List<ZoneId> nonControllerZones = List.of(zone1, zone2, zone3, zone4, zone5).stream()
- .map(ZoneApi::getVirtualId)
- .collect(Collectors.toList());
+ List<ZoneId> nonControllerZones = Stream.of(zone1, zone2, zone3, zone4, zone5)
+ .map(ZoneApi::getVirtualId)
+ .collect(Collectors.toList());
tester.configServer().bootstrap(nonControllerZones, List.of(SystemApplication.tenantHost));
tester.configServer().addNodes(List.of(zone0.getVirtualId()), List.of(SystemApplication.controllerHost));
// Add system application that exists in a real system, but isn't eligible for OS upgrades
tester.configServer().addNodes(nonControllerZones, List.of(SystemApplication.configServer));
- // Fail a few nodes. Failed nodes should not affect versions
+ // Change state of a few nodes. These should not affect convergence
failNodeIn(zone1, SystemApplication.tenantHost);
failNodeIn(zone3, SystemApplication.tenantHost);
+ Node nodeDeferringOsUpgrade = deferOsUpgradeIn(zone2, SystemApplication.tenantHost);
// New OS version released
Version version1 = Version.fromString("7.1");
@@ -91,7 +94,7 @@ public class OsUpgraderTest {
completeUpgrade(version1, SystemApplication.tenantHost, zone1);
statusUpdater.maintain();
assertEquals(5, nodesOn(version1).size());
- assertEquals(11, nodesOn(Version.emptyVersion).size());
+ assertEquals(10, nodesOn(Version.emptyVersion).size());
// zone 2 and 3: begins upgrading
osUpgrader.maintain();
@@ -102,6 +105,10 @@ public class OsUpgraderTest {
// zone 2 and 3: completes upgrade
completeUpgrade(version1, SystemApplication.tenantHost, zone2, zone3);
+ assertEquals("Current version is unchanged for node deferring OS upgrade", Version.emptyVersion,
+ nodeRepository().list(zone2.getVirtualId(), NodeFilter.all().hostnames(nodeDeferringOsUpgrade.hostname()))
+ .get(0)
+ .currentOsVersion());
// zone 4: begins upgrading
osUpgrader.maintain();
@@ -271,13 +278,23 @@ public class OsUpgraderTest {
.collect(Collectors.toList());
}
- private void failNodeIn(ZoneApi zone, SystemApplication application) {
+ private Node failNodeIn(ZoneApi zone, SystemApplication application) {
+ return patchOneNodeIn(zone, application, (node) -> Node.builder(node).state(Node.State.failed).build());
+ }
+
+ private Node deferOsUpgradeIn(ZoneApi zone, SystemApplication application) {
+ return patchOneNodeIn(zone, application, (node) -> Node.builder(node).deferOsUpgrade(true).build());
+ }
+
+ private Node patchOneNodeIn(ZoneApi zone, SystemApplication application, UnaryOperator<Node> patcher) {
List<Node> nodes = nodeRepository().list(zone.getVirtualId(), NodeFilter.all().applications(application.id()));
if (nodes.isEmpty()) {
throw new IllegalArgumentException("No nodes allocated to " + application.id());
}
Node node = nodes.get(0);
- nodeRepository().putNodes(zone.getVirtualId(), Node.builder(node).state(Node.State.failed).build());
+ Node newNode = patcher.apply(node);
+ nodeRepository().putNodes(zone.getVirtualId(), newNode);
+ return newNode;
}
/** Simulate OS upgrade of nodes allocated to application. In a real system this is done by the node itself */
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java
index bfbd3836ce7..321ec3ad8ea 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java
@@ -14,6 +14,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction.State;
import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VcmrReport;
import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest;
import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest.Status;
+import com.yahoo.vespa.hosted.controller.integration.MetricsMock;
import com.yahoo.vespa.hosted.controller.integration.NodeRepositoryMock;
import org.junit.Before;
import org.junit.Test;
@@ -25,6 +26,7 @@ import java.time.ZonedDateTime;
import java.time.temporal.TemporalAdjusters;
import java.util.List;
+import static com.yahoo.vespa.hosted.controller.maintenance.VcmrMaintainer.TRACKED_CMRS_METRIC;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -37,6 +39,7 @@ public class VcmrMaintainerTest {
private ControllerTester tester;
private VcmrMaintainer maintainer;
private NodeRepositoryMock nodeRepo;
+ private MetricsMock metrics;
private final ZoneId zoneId = ZoneId.from("prod.us-east-3");
private final ZoneId zone2 = ZoneId.from("prod.us-west-1");
private final HostName host1 = HostName.of("host1");
@@ -47,7 +50,8 @@ public class VcmrMaintainerTest {
@Before
public void setup() {
tester = new ControllerTester();
- maintainer = new VcmrMaintainer(tester.controller(), Duration.ofMinutes(1));
+ metrics = new MetricsMock();
+ maintainer = new VcmrMaintainer(tester.controller(), Duration.ofMinutes(1), metrics);
nodeRepo = tester.serviceRegistry().configServer().nodeRepository().allowPatching(true);
}
@@ -244,6 +248,8 @@ public class VcmrMaintainerTest {
assertEquals(State.OUT_OF_SYNC, action.getState());
assertEquals(Status.OUT_OF_SYNC, writtenChangeRequest.getStatus());
+ assertEquals(1, metrics.getMetric(context -> "OUT_OF_SYNC".equals(context.get("status")), TRACKED_CMRS_METRIC).get());
+ assertEquals(0, metrics.getMetric(context -> "REQUIRES_OPERATOR_ACTION".equals(context.get("status")), TRACKED_CMRS_METRIC).get());
}
@Test
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStoreTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStoreTest.java
index 0f7f97d333a..87280c0c1a3 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStoreTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStoreTest.java
@@ -50,19 +50,19 @@ public class BufferedLogStoreTest {
assertEquals(Optional.empty(), logs.readFinished(id, -1));
assertEquals(RunLog.empty(), logs.readActive(id.application(), id.type(), -1));
- logs.append(id.application(), id.type(), Step.deployReal, List.of(entry));
+ logs.append(id.application(), id.type(), Step.deployReal, List.of(entry), false);
assertEquals(List.of(entry0),
logs.readActive(id.application(), id.type(), -1).get(Step.deployReal));
assertEquals(RunLog.empty(), logs.readActive(id.application(), id.type(), 0));
- logs.append(id.application(), id.type(), Step.deployReal, List.of(entry));
+ logs.append(id.application(), id.type(), Step.deployReal, List.of(entry), false);
assertEquals(List.of(entry0, entry1),
logs.readActive(id.application(), id.type(), -1).get(Step.deployReal));
assertEquals(List.of(entry1),
logs.readActive(id.application(), id.type(), 0).get(Step.deployReal));
assertEquals(RunLog.empty(), logs.readActive(id.application(), id.type(), 1));
- logs.append(id.application(), id.type(), Step.deployReal, List.of(entry, entry, entry));
+ logs.append(id.application(), id.type(), Step.deployReal, List.of(entry, entry, entry), false);
assertEquals(List.of(entry0, entry1, entry2, entry3, entry4),
logs.readActive(id.application(), id.type(), -1).get(Step.deployReal));
assertEquals(List.of(entry1, entry2, entry3, entry4),
@@ -105,17 +105,28 @@ public class BufferedLogStoreTest {
logged.remove(logged.size() - 1);
logged.remove(logged.size() - 1);
logged.remove(logged.size() - 1);
- logged.add(new LogEntry(2 * maxChunks, entry.at(), LogEntry.Type.warning, "Max log size of " + ((chunkSize * maxChunks) >> 20) + "Mb exceeded; further entries are discarded."));
+ logged.add(new LogEntry(2 * maxChunks, entry.at(), LogEntry.Type.warning, "Max log size of " + ((chunkSize * maxChunks) >> 20) + "Mb exceeded; further user entries are discarded."));
- logs.append(id.application(), id.type(), Step.deployReal, monsterLog);
+ logs.append(id.application(), id.type(), Step.deployReal, monsterLog, false);
assertEquals(logged.size(),
logs.readActive(id.application(), id.type(), -1).get(Step.deployReal).size());
assertEquals(logged,
logs.readActive(id.application(), id.type(), -1).get(Step.deployReal));
+ // An additional, forced entry is appended.
+ LogEntry forced = new LogEntry(logged.size(), entry.at(), entry.type(), entry.message());
+ logs.append(id.application(), id.type(), Step.deployReal, List.of(forced), true);
+ logged.add(forced);
+ assertEquals(logged.size(),
+ logs.readActive(id.application(), id.type(), -1).get(Step.deployReal).size());
+ assertEquals(logged,
+ logs.readActive(id.application(), id.type(), -1).get(Step.deployReal));
+ logged.remove(logged.size() - 1);
+
+ // Flushing the buffer clears it again, and makes it ready for reuse.
logs.flush(id);
for (int i = 0; i < 2 * maxChunks + 3; i++)
- logs.append(id.application(), id.type(), Step.deployReal, List.of(entry));
+ logs.append(id.application(), id.type(), Step.deployReal, List.of(entry), false);
assertEquals(logged.size(),
logs.readActive(id.application(), id.type(), -1).get(Step.deployReal).size());
assertEquals(logged,
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json
index 696f1ef0ba3..38e9d8c823e 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json
@@ -428,6 +428,7 @@
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-west-1/run/1",
"start": 1600000000000,
"status": "running",
+ "reason": "triggered by user.myuser",
"versions": {
"targetPlatform": "6.1.0",
"targetApplication": {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobs.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobs.json
index 12430b67539..2477e8df56e 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobs.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobs.json
@@ -9,6 +9,7 @@
"start": 1600000000000,
"end": 1600000000000,
"status": "success",
+ "reason": "triggered by user.myuser",
"versions": {
"targetPlatform": "6.1.0",
"targetApplication": {