summaryrefslogtreecommitdiffstats
path: root/controller-server/src/main
diff options
context:
space:
mode:
Diffstat (limited to 'controller-server/src/main')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java30
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java21
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedTenant.java26
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java34
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java13
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java23
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java31
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java14
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java9
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java1
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudTrialExpirer.java113
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java26
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java60
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java52
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MeteringMonitorMaintainer.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java171
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java24
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java12
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java20
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java38
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java21
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java25
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java12
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java8
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/TenantSerializer.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java58
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/billing/BillingApiHandlerV2.java9
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java19
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/StatsResponse.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java33
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiHandler.java78
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/security/CloudUserSessionManager.java50
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java8
39 files changed, 648 insertions, 430 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
index af8965bdeff..bdb68f655ff 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
@@ -33,6 +33,7 @@ import java.util.Set;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
* An application. Belongs to a {@link Tenant}, and may have multiple {@link Instance}s.
@@ -161,7 +162,7 @@ public class Application {
public ApplicationActivity activity() {
return ApplicationActivity.from(instances.values().stream()
.flatMap(instance -> instance.deployments().values().stream())
- .collect(Collectors.toUnmodifiableList()));
+ .toList());
}
public Map<InstanceName, List<Deployment>> productionDeployments() {
@@ -183,33 +184,44 @@ public class Application {
.min(Comparator.naturalOrder());
}
- /**
- * Returns the oldest application version this has deployed in a permanent zone (not test or staging).
- */
+ /** Returns the oldest application version this has deployed in a permanent zone (not test or staging) */
public Optional<RevisionId> oldestDeployedRevision() {
+ return productionRevisions().min(Comparator.naturalOrder());
+ }
+
+ /** Returns the latest application version this has deployed in a permanent zone (not test or staging) */
+ public Optional<RevisionId> latestDeployedRevision() {
+ return productionRevisions().max(Comparator.naturalOrder());
+ }
+
+ private Stream<RevisionId> productionRevisions() {
return productionDeployments().values().stream().flatMap(List::stream)
.map(Deployment::revision)
- .filter(RevisionId::isProduction)
- .min(Comparator.naturalOrder());
+ .filter(RevisionId::isProduction);
}
/** Returns the total quota usage for this application, excluding temporary deployments */
public QuotaUsage quotaUsage() {
return instances().values().stream()
- .map(Instance::quotaUsage).reduce(QuotaUsage::add).orElse(QuotaUsage.none);
+ .map(Instance::quotaUsage)
+ .reduce(QuotaUsage::add)
+ .orElse(QuotaUsage.none);
}
/** Returns the total quota usage for manual deployments for this application */
public QuotaUsage manualQuotaUsage() {
return instances().values().stream()
- .map(Instance::manualQuotaUsage).reduce(QuotaUsage::add).orElse(QuotaUsage.none);
+ .map(Instance::manualQuotaUsage)
+ .reduce(QuotaUsage::add)
+ .orElse(QuotaUsage.none);
}
/** Returns the total quota usage for this application, excluding one specific deployment (and temporary deployments) */
public QuotaUsage quotaUsage(ApplicationId application, ZoneId zone) {
return instances().values().stream()
.map(instance -> instance.quotaUsageExcluding(application, zone))
- .reduce(QuotaUsage::add).orElse(QuotaUsage.none);
+ .reduce(QuotaUsage::add)
+ .orElse(QuotaUsage.none);
}
/** Returns the set of deploy keys for this application. */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
index e48ad7596ea..78063a383dc 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
@@ -237,7 +237,7 @@ public class ApplicationController {
}
/** Sets the default target major version. Set to empty to determine target version normally (by confidence) */
- public void setTargetMajorVersion(Optional<Integer> targetMajorVersion) {
+ public void setTargetMajorVersion(OptionalInt targetMajorVersion) {
curator.writeTargetMajorVersion(targetMajorVersion);
}
@@ -340,7 +340,10 @@ public class ApplicationController {
Version oldestInstalledPlatform = oldestInstalledPlatform(id);
// Target platforms are all versions not older than the oldest installed platform, unless forcing a major version change.
- Predicate<Version> isTargetPlatform = targetMajor.isEmpty() || targetMajor.getAsInt() == oldestInstalledPlatform.getMajor()
+ // Only major version specified in deployment spec is enough to force a downgrade, while all sources may force an upgrade.
+ Predicate<Version> isTargetPlatform = targetMajor.isEmpty()
+ || targetMajor.getAsInt() == oldestInstalledPlatform.getMajor()
+ || wantedMajor.isEmpty() && targetMajor.getAsInt() <= oldestInstalledPlatform.getMajor()
? version -> ! version.isBefore(oldestInstalledPlatform)
: version -> targetMajor.getAsInt() == version.getMajor();
Set<Version> platformVersions = versionStatus.versions().stream()
@@ -446,7 +449,7 @@ public class ApplicationController {
}
/** Deploys an application package for an existing application instance. */
- public ActivateResult deploy(JobId job, boolean deploySourceVersions) {
+ public ActivateResult deploy(JobId job, boolean deploySourceVersions, Consumer<String> deployLogger) {
if (job.application().instance().isTester())
throw new IllegalArgumentException("'" + job.application() + "' is a tester application!");
@@ -479,6 +482,7 @@ public class ApplicationController {
applicationPackage = applicationPackage.withTrustedCertificate(run.testerCertificate().get());
endpointCertificateMetadata = endpointCertificates.getMetadata(instance, zone, applicationPackage.deploymentSpec());
+
containerEndpoints = controller.routing().of(deployment).prepare(application);
} // Release application lock while doing the deployment, which is a lengthy task.
@@ -487,6 +491,8 @@ public class ApplicationController {
ActivateResult result = deploy(job.application(), applicationPackage, zone, platform, containerEndpoints,
endpointCertificateMetadata, run.isDryRun());
+ endpointCertificateMetadata.ifPresent(e -> deployLogger.accept("Using CA signed certificate version %s".formatted(e.version())));
+
// Record the quota usage for this application
var quotaUsage = deploymentQuotaUsage(zone, job.application());
@@ -544,10 +550,9 @@ public class ApplicationController {
controller.jobController().deploymentStatus(application.get());
for (Notification notification : controller.notificationsDb().listNotifications(NotificationSource.from(application.get().id()), true)) {
- if ( ! notification.source().instance().map(declaredInstances::contains).orElse(true))
- controller.notificationsDb().removeNotifications(notification.source());
- if (notification.source().instance().isPresent() &&
- ! notification.source().zoneId().map(application.get().require(notification.source().instance().get()).deployments()::containsKey).orElse(false))
+ if ( notification.source().instance().isPresent()
+ && ( ! declaredInstances.contains(notification.source().instance().get())
+ || ! notification.source().zoneId().map(application.get().require(notification.source().instance().get()).deployments()::containsKey).orElse(false)))
controller.notificationsDb().removeNotifications(notification.source());
}
@@ -641,7 +646,7 @@ public class ApplicationController {
.filter(zone -> deploymentSpec.instance(instance).isEmpty()
|| ! deploymentSpec.requireInstance(instance).deploysTo(zone.environment(),
zone.region()))
- .collect(toList());
+ .toList();
if (deploymentsToRemove.isEmpty())
return application;
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedTenant.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedTenant.java
index 4f58e87035b..ac7c6319c1b 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedTenant.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedTenant.java
@@ -129,25 +129,27 @@ public abstract class LockedTenant {
private final TenantInfo info;
private final List<TenantSecretStore> tenantSecretStores;
private final ArchiveAccess archiveAccess;
+ private final Optional<Instant> invalidateUserSessionsBefore;
private Cloud(TenantName name, Instant createdAt, LastLoginInfo lastLoginInfo, Optional<Principal> creator,
BiMap<PublicKey, Principal> developerKeys, TenantInfo info,
- List<TenantSecretStore> tenantSecretStores, ArchiveAccess archiveAccess) {
+ List<TenantSecretStore> tenantSecretStores, ArchiveAccess archiveAccess, Optional<Instant> invalidateUserSessionsBefore) {
super(name, createdAt, lastLoginInfo);
this.developerKeys = ImmutableBiMap.copyOf(developerKeys);
this.creator = creator;
this.info = info;
this.tenantSecretStores = tenantSecretStores;
this.archiveAccess = archiveAccess;
+ this.invalidateUserSessionsBefore = invalidateUserSessionsBefore;
}
private Cloud(CloudTenant tenant) {
- this(tenant.name(), tenant.createdAt(), tenant.lastLoginInfo(), tenant.creator(), tenant.developerKeys(), tenant.info(), tenant.tenantSecretStores(), tenant.archiveAccess());
+ this(tenant.name(), tenant.createdAt(), tenant.lastLoginInfo(), tenant.creator(), tenant.developerKeys(), tenant.info(), tenant.tenantSecretStores(), tenant.archiveAccess(), tenant.invalidateUserSessionsBefore());
}
@Override
public CloudTenant get() {
- return new CloudTenant(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess);
+ return new CloudTenant(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore);
}
public Cloud withDeveloperKey(PublicKey key, Principal principal) {
@@ -155,38 +157,42 @@ public abstract class LockedTenant {
if (keys.containsKey(key))
throw new IllegalArgumentException("Key " + KeyUtils.toPem(key) + " is already owned by " + keys.get(key));
keys.put(key, principal);
- return new Cloud(name, createdAt, lastLoginInfo, creator, keys, info, tenantSecretStores, archiveAccess);
+ return new Cloud(name, createdAt, lastLoginInfo, creator, keys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore);
}
public Cloud withoutDeveloperKey(PublicKey key) {
BiMap<PublicKey, Principal> keys = HashBiMap.create(developerKeys);
keys.remove(key);
- return new Cloud(name, createdAt, lastLoginInfo, creator, keys, info, tenantSecretStores, archiveAccess);
+ return new Cloud(name, createdAt, lastLoginInfo, creator, keys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore);
}
public Cloud withInfo(TenantInfo newInfo) {
- return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, newInfo, tenantSecretStores, archiveAccess);
+ return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, newInfo, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore);
}
@Override
public LockedTenant with(LastLoginInfo lastLoginInfo) {
- return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess);
+ return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore);
}
public Cloud withSecretStore(TenantSecretStore tenantSecretStore) {
ArrayList<TenantSecretStore> secretStores = new ArrayList<>(tenantSecretStores);
secretStores.add(tenantSecretStore);
- return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, secretStores, archiveAccess);
+ return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, secretStores, archiveAccess, invalidateUserSessionsBefore);
}
public Cloud withoutSecretStore(TenantSecretStore tenantSecretStore) {
ArrayList<TenantSecretStore> secretStores = new ArrayList<>(tenantSecretStores);
secretStores.remove(tenantSecretStore);
- return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, secretStores, archiveAccess);
+ return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, secretStores, archiveAccess, invalidateUserSessionsBefore);
}
public Cloud withArchiveAccess(ArchiveAccess archiveAccess) {
- return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess);
+ return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore);
+ }
+
+ public Cloud withInvalidateUserSessionsBefore(Instant invalidateUserSessionsBefore) {
+ return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess, Optional.of(invalidateUserSessionsBefore));
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
index 9f93033a1a2..1d7d75d9193 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
@@ -2,7 +2,6 @@
package com.yahoo.vespa.hosted.controller.deployment;
import com.google.common.collect.ImmutableMap;
-import com.yahoo.collections.Iterables;
import com.yahoo.component.Version;
import com.yahoo.component.VersionCompatibility;
import com.yahoo.config.application.api.DeploymentInstanceSpec;
@@ -231,9 +230,9 @@ public class DeploymentStatus {
firstProductionJobWithDeploymentInCloud.flatMap(this::deploymentFor),
fallbackPlatform(change, job));
if (step.completedAt(change, firstProductionJobWithDeploymentInCloud).isEmpty()) {
- JobType actualType = job.type().isSystemTest() ? systemTest(firstProductionJobWithDeploymentInCloud.map(JobId::type).orElse(null))
- : stagingTest(firstProductionJobWithDeploymentInCloud.map(JobId::type).orElse(null));
- jobs.merge(job, List.of(new Job(actualType, versions, step.readyAt(change), change)), DeploymentStatus::union);
+ CloudName cloud = firstProductionJobWithDeploymentInCloud.map(JobId::type).map(this::findCloud).orElse(zones.systemZone().getCloudName());
+ JobType typeWithZone = job.type().isSystemTest() ? JobType.systemTest(zones, cloud) : JobType.stagingTest(zones, cloud);
+ jobs.merge(job, List.of(new Job(typeWithZone, versions, step.readyAt(change), change)), DeploymentStatus::union);
}
}
});
@@ -291,19 +290,16 @@ public class DeploymentStatus {
}
private <T extends Comparable<T>> Optional<T> newestTested(InstanceName instance, Function<Run, T> runMapper) {
- Set<CloudName> clouds = jobSteps.keySet().stream()
- .filter(job -> job.type().isProduction())
- .map(job -> findCloud(job.type()))
- .collect(toSet());
+ Set<CloudName> clouds = Stream.concat(Stream.of(zones.systemZone().getCloudName()),
+ jobSteps.keySet().stream()
+ .filter(job -> job.type().isProduction())
+ .map(job -> findCloud(job.type())))
+ .collect(toSet());
List<ZoneId> testZones = new ArrayList<>();
- if (application.deploymentSpec().requireInstance(instance).concerns(test)) {
- if (clouds.isEmpty()) testZones.add(JobType.systemTest(zones, null).zone());
- else for (CloudName cloud: clouds) testZones.add(JobType.systemTest(zones, cloud).zone());
- }
- if (application.deploymentSpec().requireInstance(instance).concerns(staging)) {
- if (clouds.isEmpty()) testZones.add(JobType.stagingTest(zones, null).zone());
- else for (CloudName cloud: clouds) testZones.add(JobType.stagingTest(zones, cloud).zone());
- }
+ if (application.deploymentSpec().requireInstance(instance).concerns(test))
+ for (CloudName cloud: clouds) testZones.add(JobType.systemTest(zones, cloud).zone());
+ if (application.deploymentSpec().requireInstance(instance).concerns(staging))
+ for (CloudName cloud: clouds) testZones.add(JobType.stagingTest(zones, cloud).zone());
Map<ZoneId, Optional<T>> newestPerZone = instanceJobs().get(application.id().instance(instance))
.type(systemTest(null), stagingTest(null))
@@ -548,7 +544,9 @@ public class DeploymentStatus {
if (job.type().isProduction() && job.type().isDeployment()) {
declaredTest(job.application(), testType).ifPresent(testJob -> {
for (Job productionJob : versionsList)
- if (allJobs.successOn(testType, productionJob.versions()).asList().isEmpty())
+ if (allJobs.successOn(testType, productionJob.versions())
+ .instance(testJob.application().instance())
+ .asList().isEmpty())
testJobs.merge(testJob, List.of(new Job(testJob.type(),
productionJob.versions(),
jobSteps().get(testJob).readyAt(productionJob.change),
@@ -580,7 +578,7 @@ public class DeploymentStatus {
}
private CloudName findCloud(JobType job) {
- return zones.zones().all().get(job.zone()).map(ZoneApi::getCloudName).orElse(null);
+ return zones.zones().all().get(job.zone()).map(ZoneApi::getCloudName).orElse(zones.systemZone().getCloudName());
}
private JobId firstDeclaredOrElseImplicitTest(JobType testJob) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java
index 22df5ca559e..4a00a272c75 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.deployment;
import com.yahoo.collections.AbstractFilteringList;
import com.yahoo.component.Version;
+import com.yahoo.vespa.hosted.controller.application.Change;
import java.time.Instant;
import java.util.Collection;
@@ -36,8 +37,10 @@ public class DeploymentStatusList extends AbstractFilteringList<DeploymentStatus
/** Returns the subset of applications which have been failing an application change since the given instant */
public DeploymentStatusList failingApplicationChangeSince(Instant threshold) {
- return matching(status -> status.instanceJobs().values().stream()
- .anyMatch(jobs -> failingApplicationChangeSince(jobs, threshold)));
+ return matching(status -> status.instanceJobs().entrySet().stream()
+ .anyMatch(jobs -> failingApplicationChangeSince(jobs.getValue(),
+ status.application().require(jobs.getKey().instance()).change(),
+ threshold)));
}
private static boolean failingUpgradeToVersionSince(JobList jobs, Version version, Instant threshold) {
@@ -47,10 +50,8 @@ public class DeploymentStatusList extends AbstractFilteringList<DeploymentStatus
.isEmpty();
}
- private static boolean failingApplicationChangeSince(JobList jobs, Instant threshold) {
- return ! jobs.failingApplicationChange()
- .firstFailing().endedNoLaterThan(threshold)
- .isEmpty();
+ private static boolean failingApplicationChangeSince(JobList jobs, Change change, Instant threshold) {
+ return change.revision().map(revision -> ! jobs.failingWithBrokenRevisionSince(revision, threshold).isEmpty()).orElse(false);
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index c28f94bc4d7..d83f552ab25 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -61,6 +61,7 @@ import static java.util.stream.Collectors.toMap;
public class DeploymentTrigger {
public static final Duration maxPause = Duration.ofDays(3);
+ public static final Duration maxFailingRevisionTime = Duration.ofDays(5);
private final static Logger log = Logger.getLogger(DeploymentTrigger.class.getName());
private final Controller controller;
@@ -227,10 +228,9 @@ public class DeploymentTrigger {
Instance instance = application.require(applicationId.instance());
JobId job = new JobId(instance.id(), jobType);
JobStatus jobStatus = jobs.jobStatus(new JobId(applicationId, jobType));
- Versions versions = jobStatus.lastTriggered()
- .orElseThrow(() -> new IllegalArgumentException(job + " has never been triggered"))
- .versions();
- trigger(deploymentJob(instance, versions, jobType, jobStatus, clock.instant()), reason);
+ Run last = jobStatus.lastTriggered()
+ .orElseThrow(() -> new IllegalArgumentException(job + " has never been triggered"));
+ trigger(deploymentJob(instance, last.versions(), last.id().type(), jobStatus.isNodeAllocationFailure(), clock.instant()), reason);
return job;
}
@@ -258,7 +258,12 @@ public class DeploymentTrigger {
.collect(toMap(Map.Entry::getKey, Map.Entry::getValue));
jobs.forEach((jobId, versionsList) -> {
- trigger(deploymentJob(application.require(job.application().instance()), versionsList.get(0).versions(), jobId.type(), status.jobs().get(jobId).get(), clock.instant()), reason);
+ trigger(deploymentJob(application.require(job.application().instance()),
+ versionsList.get(0).versions(),
+ jobId.type(),
+ status.jobs().get(jobId).get().isNodeAllocationFailure(),
+ clock.instant()),
+ reason);
});
return List.copyOf(jobs.keySet());
}
@@ -387,7 +392,7 @@ public class DeploymentTrigger {
jobs.add(deploymentJob(status.application().require(jobId.application().instance()),
job.versions(),
job.type(),
- status.instanceJobs(jobId.application().instance()).get(jobId.type()),
+ status.instanceJobs(jobId.application().instance()).get(jobId.type()).isNodeAllocationFailure(),
job.readyAt().get()));
});
return Collections.unmodifiableList(jobs);
@@ -448,6 +453,8 @@ public class DeploymentTrigger {
private boolean acceptNewRevision(DeploymentStatus status, InstanceName instance, RevisionId revision) {
if (status.application().deploymentSpec().instance(instance).isEmpty()) return false; // Unknown instance.
+ if ( ! status.jobs().failingWithBrokenRevisionSince(revision, clock.instant().minus(maxFailingRevisionTime))
+ .isEmpty()) return false; // Don't deploy a broken revision.
boolean isChangingRevision = status.application().require(instance).change().revision().isPresent();
DeploymentInstanceSpec spec = status.application().deploymentSpec().requireInstance(instance);
Predicate<RevisionId> revisionFilter = spec.revisionTarget() == DeploymentSpec.RevisionTarget.next
@@ -472,8 +479,8 @@ public class DeploymentTrigger {
// ---------- Version and job helpers ----------
- private Job deploymentJob(Instance instance, Versions versions, JobType jobType, JobStatus jobStatus, Instant availableSince) {
- return new Job(instance, versions, jobType, availableSince, jobStatus.isNodeAllocationFailure(), instance.change().revision().isPresent());
+ private Job deploymentJob(Instance instance, Versions versions, JobType jobType, boolean isNodeAllocationFailure, Instant availableSince) {
+ return new Job(instance, versions, jobType, availableSince, isNodeAllocationFailure, instance.change().revision().isPresent());
}
// ---------- Data containers ----------
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
index 813e3454e80..ef3474e0c1e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
@@ -174,7 +174,7 @@ public class InternalStepRunner implements StepRunner {
private Optional<RunStatus> deployReal(RunId id, boolean setTheStage, DualLogger logger) {
Optional<X509Certificate> testerCertificate = controller.jobController().run(id).testerCertificate();
- return deploy(() -> controller.applications().deploy(id.job(), setTheStage),
+ return deploy(() -> controller.applications().deploy(id.job(), setTheStage, logger::log),
controller.jobController().run(id)
.stepInfo(setTheStage ? deployInitialReal : deployReal).get()
.startTime().get(),
@@ -224,6 +224,9 @@ public class InternalStepRunner implements StepRunner {
// Retry certain failures for up to one hour.
Optional<RunStatus> result = startTime.isBefore(controller.clock().instant().minus(Duration.ofHours(1)))
? Optional.of(deploymentFailed) : Optional.empty();
+ if (result.isPresent())
+ logger.log(WARNING, "Deployment failed for one hour; giving up now!");
+
switch (e.code()) {
case CERTIFICATE_NOT_READY:
logger.log("No valid CA signed certificate for app available to config server");
@@ -424,10 +427,14 @@ public class InternalStepRunner implements StepRunner {
Optional<ServiceConvergence> services = controller.serviceRegistry().configServer().serviceConvergence(new DeploymentId(testerId, zone),
Optional.of(platform));
if (services.isEmpty()) {
- logger.log("Config status not currently available -- will retry.");
- return run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(5)))
- ? Optional.of(error)
- : Optional.empty();
+ if (run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(30)))) {
+ logger.log(WARNING, "Config status not available after 30 minutes; giving up!");
+ return Optional.of(error);
+ }
+ else {
+ logger.log("Config status not currently available -- will retry.");
+ return Optional.empty();
+ }
}
List<Node> nodes = controller.serviceRegistry().configServer().nodeRepository().list(zone,
NodeFilter.all()
@@ -649,10 +656,13 @@ public class InternalStepRunner implements StepRunner {
controller.jobController().updateTestReport(id);
return Optional.of(testFailure);
case INCONCLUSIVE:
- long sleepMinutes = Math.max(15, Math.min(120, Duration.between(deployment.get().at(), controller.clock().instant()).toMinutes() / 20));
- logger.log("Tests were inconclusive, and will run again in " + sleepMinutes + " minutes.");
controller.jobController().updateTestReport(id);
- controller.jobController().locked(id, run -> run.sleepingUntil(controller.clock().instant().plusSeconds(60 * sleepMinutes)));
+ controller.jobController().locked(id, run -> {
+ Instant nextAttemptAt = run.start();
+ while ( ! nextAttemptAt.isAfter(controller.clock().instant())) nextAttemptAt = nextAttemptAt.plusSeconds(1800);
+ logger.log("Tests were inconclusive, and will run again at " + nextAttemptAt + ".");
+ return run.sleepingUntil(nextAttemptAt);
+ });
return Optional.of(reset);
case ERROR:
logger.log(INFO, "Tester failed running its tests!");
@@ -799,6 +809,7 @@ public class InternalStepRunner implements StepRunner {
Consumer<String> updater = msg -> controller.notificationsDb().setNotification(source, Notification.Type.deployment, Notification.Level.error, msg);
switch (run.status()) {
case aborted: return; // wait and see how the next run goes.
+ case noTests:
case running:
case success:
controller.notificationsDb().removeNotification(source, Notification.Type.deployment);
@@ -815,10 +826,6 @@ public class InternalStepRunner implements StepRunner {
case testFailure:
updater.accept("one or more verification tests against the deployment failed. Please review test output in the deployment job log.");
return;
- case noTests:
- controller.notificationsDb().setNotification(source, Notification.Type.deployment, Notification.Level.warning,
- "no tests were found for this job type. Please review test output in the deployment job log.");
- return;
case error:
case endpointCertificateTimeout:
break;
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
index 5113d386b23..881107fa0f9 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
@@ -158,7 +158,7 @@ public class JobController {
/** Stores the given log entries for the given run and step. */
public void log(RunId id, Step step, List<LogEntry> entries) {
locked(id, __ -> {
- logs.append(id.application(), id.type(), step, entries);
+ logs.append(id.application(), id.type(), step, entries, true);
return __;
});
}
@@ -211,7 +211,7 @@ public class JobController {
if (log.isEmpty())
return run;
- logs.append(id.application(), id.type(), Step.copyVespaLogs, log);
+ logs.append(id.application(), id.type(), Step.copyVespaLogs, log, false);
return run.with(log.get(log.size() - 1).at());
});
}
@@ -230,7 +230,7 @@ public class JobController {
if (entries.isEmpty())
return run;
- logs.append(id.application(), id.type(), step.get(), entries);
+ logs.append(id.application(), id.type(), step.get(), entries, false);
return run.with(entries.stream().mapToLong(LogEntry::id).max().getAsLong());
});
}
@@ -408,11 +408,6 @@ public class JobController {
locked(id, run -> run.with(status, step));
}
- /** Invoked when starting the step */
- public void setStartTimestamp(RunId id, Instant timestamp, LockedStep step) {
- locked(id, run -> run.with(timestamp, step));
- }
-
/**
* Changes the status of the given run to inactive, and stores it as a historic run.
* Throws TimeoutException if some step in this job is still being run.
@@ -774,7 +769,8 @@ public class JobController {
public void locked(RunId id, UnaryOperator<Run> modifications) {
try (Mutex __ = curator.lock(id.application(), id.type())) {
active(id).ifPresent(run -> {
- curator.writeLastRun(modifications.apply(run));
+ Run modified = modifications.apply(run);
+ if (modified != null) curator.writeLastRun(modified);
});
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
index 551f841233e..3074c9ac3ba 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
@@ -4,7 +4,6 @@ package com.yahoo.vespa.hosted.controller.deployment;
import com.yahoo.collections.AbstractFilteringList;
import com.yahoo.component.Version;
import com.yahoo.config.provision.InstanceName;
-import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId;
@@ -74,6 +73,14 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> {
return matching(JobList::failingApplicationChange);
}
+ /** Returns the subset of jobs which are failing because of an application change, and have been since the threshold, on the given revision. */
+ public JobList failingWithBrokenRevisionSince(RevisionId broken, Instant threshold) {
+ return failingApplicationChange().matching(job -> job.runs().values().stream()
+ .anyMatch(run -> run.versions().targetRevision().equals(broken)
+ && run.hasFailed()
+ && run.start().isBefore(threshold)));
+ }
+
/** Returns the subset of jobs which are failing with the given run status. */
public JobList withStatus(RunStatus status) {
return matching(job -> job.lastStatus().map(status::equals).orElse(false));
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java
index f4c4b8bebd4..d683f1cb5c7 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java
@@ -76,7 +76,7 @@ public class Versions {
targetRevision.equals(versions.targetRevision());
}
- /** Returns wheter this change could result in the given target versions. */
+ /** Returns whether this change could result in the given target versions. */
public boolean targetsMatch(Change change) {
return change.platform().map(targetPlatform::equals).orElse(true)
&& change.revision().map(targetRevision::equals).orElse(true);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java
index 09e0fec41d1..c8c5a1834c7 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java
@@ -14,14 +14,17 @@ public class ApplicationMetaDataGarbageCollector extends ControllerMaintainer {
private static final Logger log = Logger.getLogger(ApplicationMetaDataGarbageCollector.class.getName());
+ private final Duration timeToLive;
+
public ApplicationMetaDataGarbageCollector(Controller controller, Duration interval) {
super(controller, interval);
+ this.timeToLive = controller.system().isCd() ? Duration.ofDays(7) : Duration.ofDays(365);
}
@Override
protected double maintain() {
try {
- controller().applications().applicationStore().pruneMeta(controller().clock().instant().minus(Duration.ofDays(365)));
+ controller().applications().applicationStore().pruneMeta(controller().clock().instant().minus(timeToLive));
return 1.0;
}
catch (Exception e) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java
index aa36d204c09..a279cf46415 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java
@@ -70,6 +70,7 @@ public class ChangeRequestMaintainer extends ControllerMaintainer {
var vcmr = existingChangeRequests
.getOrDefault(changeRequest.getId(), new VespaChangeRequest(changeRequest, zone))
.withSource(changeRequest.getChangeRequestSource())
+ .withImpact(changeRequest.getImpact())
.withApproval(changeRequest.getApproval());
logger.fine(() -> "Storing " + vcmr);
curator.writeChangeRequest(vcmr);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudTrialExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudTrialExpirer.java
index b1b7e80e9a0..18ef47759f4 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudTrialExpirer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudTrialExpirer.java
@@ -13,14 +13,13 @@ import com.yahoo.vespa.hosted.controller.tenant.Tenant;
import java.time.Duration;
import java.util.List;
import java.util.Optional;
-import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.logging.Logger;
import java.util.stream.Collectors;
/**
* Expires unused tenants from Vespa Cloud.
- *
+ * <p>
* TODO: Should support sending notifications some time before the various expiry events happen.
*
* @author ogronnesby
@@ -29,7 +28,7 @@ public class CloudTrialExpirer extends ControllerMaintainer {
private static final Logger log = Logger.getLogger(CloudTrialExpirer.class.getName());
private static final Duration nonePlanAfter = Duration.ofDays(14);
- private static final Duration tombstoneAfter = Duration.ofDays(365);
+ private static final Duration tombstoneAfter = Duration.ofDays(183);
private final ListFlag<String> extendedTrialTenants;
public CloudTrialExpirer(Controller controller, Duration interval) {
@@ -39,38 +38,43 @@ public class CloudTrialExpirer extends ControllerMaintainer {
@Override
protected double maintain() {
- tombstoneNonePlanTenants();
- moveInactiveTenantsToNonePlan();
- return 1.0;
+ var a = tombstoneNonePlanTenants();
+ var b = moveInactiveTenantsToNonePlan();
+ return (a ? 0.5 : 0.0) + (b ? 0.5 : 0.0);
}
- private void moveInactiveTenantsToNonePlan() {
- var predicate = tenantReadersNotLoggedIn(nonePlanAfter)
- .and(this::tenantHasTrialPlan);
-
- forTenant("'none' plan", predicate, this::setPlanNone);
- }
+ private boolean moveInactiveTenantsToNonePlan() {
+ var idleTrialTenants = controller().tenants().asList().stream()
+ .filter(this::tenantIsCloudTenant)
+ .filter(this::tenantIsNotExemptFromExpiry)
+ .filter(this::tenantHasNoDeployments)
+ .filter(this::tenantHasTrialPlan)
+ .filter(tenantReadersNotLoggedIn(nonePlanAfter))
+ .toList();
+
+ if (! idleTrialTenants.isEmpty()) {
+ var tenants = idleTrialTenants.stream().map(Tenant::name).map(TenantName::value).collect(Collectors.joining(", "));
+ log.info("Setting tenants to 'none' plan: " + tenants);
+ }
- private void tombstoneNonePlanTenants() {
- var predicate = tenantReadersNotLoggedIn(tombstoneAfter).and(this::tenantHasNonePlan);
- forTenant("tombstoned", predicate, this::tombstoneTenants);
+ return setPlanNone(idleTrialTenants);
}
- private void forTenant(String name, Predicate<Tenant> p, Consumer<List<Tenant>> c) {
- var predicate = p.and(this::tenantIsCloudTenant)
- .and(this::tenantIsNotExemptFromExpiry)
- .and(this::tenantHasNoDeployments);
-
- var tenants = controller().tenants().asList().stream()
- .filter(predicate)
- .collect(Collectors.toList());
-
- if (! tenants.isEmpty()) {
- var tenantNames = tenants.stream().map(Tenant::name).map(TenantName::value).collect(Collectors.joining(", "));
- log.info("Setting tenants as " + name + ": " + tenantNames);
+ private boolean tombstoneNonePlanTenants() {
+ var idleOldPlanTenants = controller().tenants().asList().stream()
+ .filter(this::tenantIsCloudTenant)
+ .filter(this::tenantIsNotExemptFromExpiry)
+ .filter(this::tenantHasNoDeployments)
+ .filter(this::tenantHasNonePlan)
+ .filter(tenantReadersNotLoggedIn(tombstoneAfter))
+ .toList();
+
+ if (! idleOldPlanTenants.isEmpty()) {
+ var tenants = idleOldPlanTenants.stream().map(Tenant::name).map(TenantName::value).collect(Collectors.joining(", "));
+ log.info("Setting tenants as tombstoned: " + tenants);
}
- c.accept(tenants);
+ return tombstoneTenants(idleOldPlanTenants);
}
private boolean tenantIsCloudTenant(Tenant tenant) {
@@ -98,7 +102,7 @@ public class CloudTrialExpirer extends ControllerMaintainer {
}
private boolean tenantIsNotExemptFromExpiry(Tenant tenant) {
- return ! extendedTrialTenants.value().contains(tenant.name().value());
+ return !extendedTrialTenants.value().contains(tenant.name().value());
}
private boolean tenantHasNoDeployments(Tenant tenant) {
@@ -108,23 +112,46 @@ public class CloudTrialExpirer extends ControllerMaintainer {
.sum() == 0;
}
- private void setPlanNone(List<Tenant> tenants) {
- tenants.forEach(tenant -> {
- controller().serviceRegistry().billingController().setPlan(tenant.name(), PlanId.from("none"), false, false);
- });
+ private boolean setPlanNone(List<Tenant> tenants) {
+ var success = true;
+ for (var tenant : tenants) {
+ try {
+ controller().serviceRegistry().billingController().setPlan(tenant.name(), PlanId.from("none"), false, false);
+ } catch (RuntimeException e) {
+ log.info("Could not change plan for " + tenant.name() + ": " + e.getMessage());
+ success = false;
+ }
+ }
+ return success;
}
- private void tombstoneTenants(List<Tenant> tenants) {
- tenants.forEach(tenant -> {
- deleteApplicationsWithNoDeployments(tenant);
- controller().tenants().delete(tenant.name(), Optional.empty(), false);
- });
+ private boolean tombstoneTenants(List<Tenant> tenants) {
+ var success = true;
+ for (var tenant : tenants) {
+ success &= deleteApplicationsWithNoDeployments(tenant);
+ log.fine("Tombstoning empty tenant: " + tenant.name());
+ try {
+ controller().tenants().delete(tenant.name(), Optional.empty(), false);
+ } catch (RuntimeException e) {
+ log.info("Could not tombstone tenant " + tenant.name() + ": " + e.getMessage());
+ success = false;
+ }
+ }
+ return success;
}
- private void deleteApplicationsWithNoDeployments(Tenant tenant) {
- controller().applications().asList(tenant.name()).forEach(application -> {
- // this only removes applications with no active deployments
- controller().applications().deleteApplication(application.id(), Optional.empty());
- });
+ private boolean deleteApplicationsWithNoDeployments(Tenant tenant) {
+ // this method only removes applications with no active deployments in them
+ var success = true;
+ for (var application : controller().applications().asList(tenant.name())) {
+ try {
+ log.fine("Removing empty application: " + application.id());
+ controller().applications().deleteApplication(application.id(), Optional.empty());
+ } catch (RuntimeException e) {
+ log.info("Could not removing application " + application.id() + ": " + e.getMessage());
+ success = false;
+ }
+ }
+ return success;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
index 4aeecdcd4ff..ab2e0312b15 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
@@ -33,6 +33,7 @@ import static java.time.temporal.ChronoUnit.SECONDS;
public class ControllerMaintenance extends AbstractComponent {
private final Upgrader upgrader;
+ private final OsUpgradeScheduler osUpgradeScheduler;
private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>();
@Inject
@@ -40,7 +41,9 @@ public class ControllerMaintenance extends AbstractComponent {
public ControllerMaintenance(Controller controller, Metric metric, UserManagement userManagement, AthenzClientFactory athenzClientFactory) {
Intervals intervals = new Intervals(controller.system());
upgrader = new Upgrader(controller, intervals.defaultInterval);
+ osUpgradeScheduler = new OsUpgradeScheduler(controller, intervals.osUpgradeScheduler);
maintainers.add(upgrader);
+ maintainers.add(osUpgradeScheduler);
maintainers.addAll(osUpgraders(controller, intervals.osUpgrader));
maintainers.add(new DeploymentExpirer(controller, intervals.defaultInterval));
maintainers.add(new DeploymentUpgrader(controller, intervals.defaultInterval));
@@ -54,7 +57,6 @@ public class ControllerMaintenance extends AbstractComponent {
maintainers.add(new SystemUpgrader(controller, intervals.systemUpgrader));
maintainers.add(new JobRunner(controller, intervals.jobRunner));
maintainers.add(new OsVersionStatusUpdater(controller, intervals.osVersionStatusUpdater));
- maintainers.add(new OsUpgradeScheduler(controller, intervals.osUpgradeScheduler));
maintainers.add(new ContactInformationMaintainer(controller, intervals.contactInformationMaintainer));
maintainers.add(new NameServiceDispatcher(controller, intervals.nameServiceDispatcher));
maintainers.add(new CostReportMaintainer(controller, intervals.costReportMaintainer, controller.serviceRegistry().costReportConsumer()));
@@ -70,7 +72,7 @@ public class ControllerMaintenance extends AbstractComponent {
maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer));
maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer));
maintainers.add(new ChangeRequestMaintainer(controller, intervals.changeRequestMaintainer));
- maintainers.add(new VcmrMaintainer(controller, intervals.vcmrMaintainer));
+ maintainers.add(new VcmrMaintainer(controller, intervals.vcmrMaintainer, metric));
maintainers.add(new CloudTrialExpirer(controller, intervals.defaultInterval));
maintainers.add(new RetriggerMaintainer(controller, intervals.retriggerMaintainer));
maintainers.add(new UserManagementMaintainer(controller, intervals.userManagementMaintainer, controller.serviceRegistry().roleMaintainer()));
@@ -80,6 +82,8 @@ public class ControllerMaintenance extends AbstractComponent {
public Upgrader upgrader() { return upgrader; }
+ public OsUpgradeScheduler osUpgradeScheduler() { return osUpgradeScheduler; }
+
@Override
public void deconstruct() {
maintainers.forEach(Maintainer::shutdown);
@@ -156,7 +160,7 @@ public class ControllerMaintenance extends AbstractComponent {
this.containerImageExpirer = duration(12, HOURS);
this.hostInfoUpdater = duration(12, HOURS);
this.reindexingTriggerer = duration(1, HOURS);
- this.endpointCertificateMaintainer = duration(12, HOURS);
+ this.endpointCertificateMaintainer = duration(1, HOURS);
this.trafficFractionUpdater = duration(5, MINUTES);
this.archiveUriUpdater = duration(5, MINUTES);
this.archiveAccessMaintainer = duration(10, MINUTES);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java
index f3256237284..2e2680cd34a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java
@@ -7,7 +7,6 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.container.jdisc.secretstore.SecretNotFoundException;
import com.yahoo.container.jdisc.secretstore.SecretStore;
import com.yahoo.transaction.Mutex;
-import com.yahoo.vespa.curator.Lock;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.Instance;
import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateDetails;
@@ -15,6 +14,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCe
import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateProvider;
import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateRequestMetadata;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger;
import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
@@ -23,6 +23,8 @@ import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -48,6 +50,7 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer {
private final CuratorDb curator;
private final SecretStore secretStore;
private final EndpointCertificateProvider endpointCertificateProvider;
+ final Comparator<EligibleJob> oldestFirst = Comparator.comparing(e -> e.deployment.at());
@Inject
public EndpointCertificateMaintainer(Controller controller, Duration interval) {
@@ -92,11 +95,14 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer {
}));
}
+ record EligibleJob(Deployment deployment, ApplicationId applicationId, JobType job) {}
/**
- * If it's been four days since the cert has been refreshed, re-trigger all prod deployment jobs.
+ * If it's been four days since the cert has been refreshed, re-trigger prod deployment jobs (one at a time).
*/
private void deployRefreshedCertificates() {
var now = clock.instant();
+ var eligibleJobs = new ArrayList<EligibleJob>();
+
curator.readAllEndpointCertificateMetadata().forEach((applicationId, endpointCertificateMetadata) ->
endpointCertificateMetadata.lastRefreshed().ifPresent(lastRefreshTime -> {
Instant refreshTime = Instant.ofEpochSecond(lastRefreshTime);
@@ -105,13 +111,19 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer {
.ifPresent(instance -> instance.productionDeployments().forEach((zone, deployment) -> {
if (deployment.at().isBefore(refreshTime)) {
JobType job = JobType.deploymentTo(zone);
- deploymentTrigger.reTrigger(applicationId, job, "re-triggered by EndpointCertificateMaintainer");
- log.info("Re-triggering deployment job " + job.jobName() + " for instance " +
- applicationId.serializedForm() + " to roll out refreshed endpoint certificate");
+ eligibleJobs.add(new EligibleJob(deployment, applicationId, job));
}
}));
}
}));
+
+ eligibleJobs.stream()
+ .min(oldestFirst)
+ .ifPresent(e -> {
+ deploymentTrigger.reTrigger(e.applicationId, e.job, "re-triggered by EndpointCertificateMaintainer");
+ log.info("Re-triggering deployment job " + e.job.jobName() + " for instance " +
+ e.applicationId.serializedForm() + " to roll out refreshed endpoint certificate");
+ });
}
private OptionalInt latestVersionInSecretStore(EndpointCertificateMetadata originalCertificateMetadata) {
@@ -156,8 +168,8 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer {
List<EndpointCertificateRequestMetadata> endpointCertificateMetadata = endpointCertificateProvider.listCertificates();
Map<ApplicationId, EndpointCertificateMetadata> storedEndpointCertificateMetadata = curator.readAllEndpointCertificateMetadata();
- List<String> leafRequestIds = storedEndpointCertificateMetadata.values().stream().flatMap(m -> m.leafRequestId().stream()).collect(Collectors.toList());
- List<String> rootRequestIds = storedEndpointCertificateMetadata.values().stream().map(EndpointCertificateMetadata::rootRequestId).collect(Collectors.toList());
+ List<String> leafRequestIds = storedEndpointCertificateMetadata.values().stream().flatMap(m -> m.leafRequestId().stream()).toList();
+ List<String> rootRequestIds = storedEndpointCertificateMetadata.values().stream().map(EndpointCertificateMetadata::rootRequestId).toList();
for (var providerCertificateMetadata : endpointCertificateMetadata) {
if (!rootRequestIds.contains(providerCertificateMetadata.requestId()) && !leafRequestIds.contains(providerCertificateMetadata.requestId())) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
index 82413f21222..b051590ac5a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.component.Version;
import com.yahoo.config.provision.SystemName;
+import com.yahoo.config.provision.zone.NodeSlice;
import com.yahoo.config.provision.zone.UpgradePolicy;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.text.Text;
@@ -25,6 +26,7 @@ import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.logging.Logger;
+import java.util.stream.Collectors;
/**
* Base class for maintainers that upgrade zone infrastructure.
@@ -57,22 +59,22 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten
int failures = 0;
// Invert zone order if we're downgrading
UpgradePolicy policy = target.downgrade() ? upgradePolicy.inverted() : upgradePolicy;
- for (Set<ZoneApi> step : policy.steps()) {
+ for (UpgradePolicy.Step step : policy.steps()) {
boolean converged = true;
- for (ZoneApi zone : step) {
+ for (ZoneApi zone : step.zones()) {
try {
attempts++;
- converged &= upgradeAll(target, applications, zone);
+ converged &= upgradeAll(target, applications, zone, step.nodeSlice());
} catch (UnreachableNodeRepositoryException e) {
failures++;
converged = false;
log.warning(Text.format("%s: Failed to communicate with node repository in %s, continuing with next parallel zone: %s",
- this, zone, Exceptions.toMessageString(e)));
+ this, zone, Exceptions.toMessageString(e)));
} catch (Exception e) {
failures++;
converged = false;
log.warning(Text.format("%s: Failed to upgrade zone: %s, continuing with next parallel zone: %s",
- this, zone, Exceptions.toMessageString(e)));
+ this, zone, Exceptions.toMessageString(e)));
}
}
if (!converged) {
@@ -83,7 +85,7 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten
}
/** Returns whether all applications have converged to the target version in zone */
- private boolean upgradeAll(TARGET target, List<SystemApplication> applications, ZoneApi zone) {
+ private boolean upgradeAll(TARGET target, List<SystemApplication> applications, ZoneApi zone, NodeSlice nodeSlice) {
Map<SystemApplication, Set<SystemApplication>> dependenciesByApplication = new HashMap<>();
if (target.downgrade()) { // Invert dependencies when we're downgrading
for (var application : applications) {
@@ -100,20 +102,17 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten
for (var kv : dependenciesByApplication.entrySet()) {
SystemApplication application = kv.getKey();
Set<SystemApplication> dependencies = kv.getValue();
- if (convergedOn(target, dependencies, zone)) {
+ boolean allConverged = dependencies.stream().allMatch(app -> convergedOn(target, app, zone, nodeSlice));
+ if (allConverged) {
if (changeTargetTo(target, application, zone)) {
upgrade(target, application, zone);
}
- converged &= convergedOn(target, application, zone);
+ converged &= convergedOn(target, application, zone, nodeSlice);
}
}
return converged;
}
- private boolean convergedOn(TARGET target, Set<SystemApplication> applications, ZoneApi zone) {
- return applications.stream().allMatch(application -> convergedOn(target, application, zone));
- }
-
/** Returns whether target version for application in zone should be changed */
protected abstract boolean changeTargetTo(TARGET target, SystemApplication application, ZoneApi zone);
@@ -121,7 +120,7 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten
protected abstract void upgrade(TARGET target, SystemApplication application, ZoneApi zone);
/** Returns whether application has converged to target version in zone */
- protected abstract boolean convergedOn(TARGET target, SystemApplication application, ZoneApi zone);
+ protected abstract boolean convergedOn(TARGET target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice);
/** Returns the version target for the component upgraded by this, if any */
protected abstract Optional<TARGET> target();
@@ -129,19 +128,34 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten
/** Returns whether the upgrader should expect given node to upgrade */
protected abstract boolean expectUpgradeOf(Node node, SystemApplication application, ZoneApi zone);
- /** Find the minimum value of a version field in a zone by comparing all nodes */
- protected final Optional<Version> minVersion(ZoneApi zone, SystemApplication application, Function<Node, Version> versionField) {
+ /** Find the highest version used by nodes satisfying nodeSlice in zone. If no such slice exists, the lowest known version is returned */
+ protected final Optional<Version> versionOf(NodeSlice nodeSlice, ZoneApi zone, SystemApplication application, Function<Node, Version> versionField) {
try {
- return controller().serviceRegistry().configServer()
- .nodeRepository()
- .list(zone.getVirtualId(), NodeFilter.all().applications(application.id()))
- .stream()
- .filter(node -> expectUpgradeOf(node, application, zone))
- .map(versionField)
- .min(Comparator.naturalOrder());
+ Map<Version, Long> nodeCountByVersion = controller().serviceRegistry().configServer()
+ .nodeRepository()
+ .list(zone.getVirtualId(), NodeFilter.all().applications(application.id()))
+ .stream()
+ .filter(node -> expectUpgradeOf(node, application, zone))
+ .collect(Collectors.groupingBy(versionField,
+ Collectors.counting()));
+ long totalNodes = nodeCountByVersion.values().stream().reduce(Long::sum).orElse(0L);
+ Set<Version> versionsOfMatchingSlices = new HashSet<>();
+ for (var kv : nodeCountByVersion.entrySet()) {
+ long nodesOnVersion = kv.getValue();
+ if (nodeSlice.satisfiedBy(nodesOnVersion, totalNodes)) {
+ versionsOfMatchingSlices.add(kv.getKey());
+ }
+ }
+ if (!versionsOfMatchingSlices.isEmpty()) {
+ // Choose the highest version in case we have several matching slices
+ return versionsOfMatchingSlices.stream().max(Comparator.naturalOrder());
+ }
+ // No matching slices found, fall back to the lowest known version
+ return nodeCountByVersion.keySet().stream().min(Comparator.naturalOrder());
} catch (Exception e) {
throw new UnreachableNodeRepositoryException(Text.format("Failed to get version for %s in %s: %s",
- application.id(), zone, Exceptions.toMessageString(e)));
+ application.id(), zone,
+ Exceptions.toMessageString(e)));
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
index 94ec4129744..cd48d6839f3 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
@@ -8,7 +8,6 @@ import com.yahoo.vespa.hosted.controller.deployment.InternalStepRunner;
import com.yahoo.vespa.hosted.controller.deployment.JobController;
import com.yahoo.vespa.hosted.controller.deployment.Run;
import com.yahoo.vespa.hosted.controller.deployment.Step;
-import com.yahoo.vespa.hosted.controller.deployment.StepInfo;
import com.yahoo.vespa.hosted.controller.deployment.StepRunner;
import java.time.Duration;
@@ -75,18 +74,26 @@ public class JobRunner extends ControllerMaintainer {
}
}
- /** Advances each of the ready steps for the given run, or marks it as finished, and stashes it. Public for testing. */
public void advance(Run run) {
- if ( ! run.hasFailed()
- && controller().clock().instant().isAfter(run.sleepUntil().orElse(run.start()).plus(jobTimeout)))
- executors.execute(() -> {
- jobs.abort(run.id(), "job timeout of " + jobTimeout + " reached");
- advance(jobs.run(run.id()));
- });
- else if (run.readySteps().isEmpty())
- executors.execute(() -> finish(run.id()));
- else if (run.hasFailed() || run.sleepUntil().map(sleepUntil -> ! sleepUntil.isAfter(controller().clock().instant())).orElse(true))
- run.readySteps().forEach(step -> executors.execute(() -> advance(run.id(), step)));
+ advance(run.id());
+ }
+
+ /** Advances each of the ready steps for the given run, or marks it as finished, and stashes it. Public for testing. */
+ public void advance(RunId id) {
+ jobs.locked(id, run -> {
+ if ( ! run.hasFailed()
+ && controller().clock().instant().isAfter(run.sleepUntil().orElse(run.start()).plus(jobTimeout)))
+ executors.execute(() -> {
+ jobs.abort(run.id(), "job timeout of " + jobTimeout + " reached");
+ advance(run.id());
+ });
+ else if (run.readySteps().isEmpty())
+ executors.execute(() -> finish(run.id()));
+ else if (run.hasFailed() || run.sleepUntil().map(sleepUntil -> ! sleepUntil.isAfter(controller().clock().instant())).orElse(true))
+ run.readySteps().forEach(step -> executors.execute(() -> advance(run.id(), step)));
+
+ return null;
+ });
}
private void finish(RunId id) {
@@ -108,23 +115,24 @@ public class JobRunner extends ControllerMaintainer {
try {
AtomicBoolean changed = new AtomicBoolean(false);
jobs.locked(id.application(), id.type(), step, lockedStep -> {
- jobs.locked(id, run -> run); // Memory visibility.
- jobs.active(id).ifPresent(run -> { // The run may have become inactive, so we bail out.
+ jobs.locked(id, run -> {
if ( ! run.readySteps().contains(step)) {
changed.set(true);
- return; // Someone may have updated the run status, making this step obsolete, so we bail out.
+ return run; // Someone may have updated the run status, making this step obsolete, so we bail out.
}
- StepInfo stepInfo = run.stepInfo(lockedStep.get()).orElseThrow();
- if (stepInfo.startTime().isEmpty()) {
- jobs.setStartTimestamp(run.id(), controller().clock().instant(), lockedStep);
- }
+ if (run.stepInfo(lockedStep.get()).orElseThrow().startTime().isEmpty())
+ run = run.with(controller().clock().instant(), lockedStep);
- runner.run(lockedStep, run.id()).ifPresent(status -> {
- jobs.update(run.id(), status, lockedStep);
+ return run;
+ });
+
+ if ( ! changed.get()) {
+ runner.run(lockedStep, id).ifPresent(status -> {
+ jobs.update(id, status, lockedStep);
changed.set(true);
});
- });
+ }
});
if (changed.get())
jobs.active(id).ifPresent(this::advance);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MeteringMonitorMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MeteringMonitorMaintainer.java
index eadbdf74c3c..519b1001be4 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MeteringMonitorMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MeteringMonitorMaintainer.java
@@ -51,14 +51,15 @@ public class MeteringMonitorMaintainer extends ControllerMaintainer {
return controller().applications().asList()
.stream()
.flatMap(app -> app.instances().values().stream())
- .flatMap(this::instancesToDeployments)
+ .flatMap(this::toProdDeployments)
.collect(Collectors.toSet());
}
- private Stream<DeploymentId> instancesToDeployments(Instance instance) {
+ private Stream<DeploymentId> toProdDeployments(Instance instance) {
return instance.deployments()
.keySet()
.stream()
+ .filter(deployment -> deployment.environment().isProduction())
.map(deployment -> new DeploymentId(instance.id(), deployment));
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
index 3bd1c7bb358..ddcfef23d86 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
@@ -9,10 +9,14 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.ArtifactRepo
import com.yahoo.vespa.hosted.controller.api.integration.deployment.OsRelease;
import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
+import java.time.DayOfWeek;
import java.time.Duration;
import java.time.Instant;
+import java.time.LocalDate;
import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoUnit;
import java.util.Objects;
import java.util.Optional;
@@ -30,23 +34,24 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
@Override
protected double maintain() {
Instant now = controller().clock().instant();
- if (!canTriggerAt(now)) return 1.0;
for (var cloud : controller().clouds()) {
- Release release = releaseIn(cloud);
- upgradeTo(release, cloud, now);
+ Optional<Change> change = changeIn(cloud);
+ if (change.isEmpty()) continue;
+ if (!change.get().scheduleAt(now)) continue;
+ controller().upgradeOsIn(cloud, change.get().version(), change.get().upgradeBudget(), false);
}
return 1.0;
}
- /** Upgrade to given release in cloud */
- private void upgradeTo(Release release, CloudName cloud, Instant now) {
+ /** Returns the wanted change for given cloud, if any */
+ public Optional<Change> changeIn(CloudName cloud) {
Optional<OsVersionTarget> currentTarget = controller().osVersionTarget(cloud);
- if (currentTarget.isEmpty()) return;
- if (upgradingToNewMajor(cloud)) return; // Skip further upgrades until major version upgrade is complete
+ if (currentTarget.isEmpty()) return Optional.empty();
+ if (upgradingToNewMajor(cloud)) return Optional.empty(); // Skip further upgrades until major version upgrade is complete
- Version version = release.version(currentTarget.get(), now);
- if (!version.isAfter(currentTarget.get().osVersion().version())) return;
- controller().upgradeOsIn(cloud, version, release.upgradeBudget(), false);
+ Release release = releaseIn(cloud);
+ Instant now = controller().clock().instant();
+ return release.change(currentTarget.get().version(), now);
}
private boolean upgradingToNewMajor(CloudName cloud) {
@@ -56,54 +61,69 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
.count() > 1;
}
- private boolean canTriggerAt(Instant instant) {
- int hourOfDay = instant.atZone(ZoneOffset.UTC).getHour();
- int dayOfWeek = instant.atZone(ZoneOffset.UTC).getDayOfWeek().getValue();
- // Upgrade can only be scheduled between 07:00 (02:00 in CD systems) and 12:59 UTC, Monday-Thursday
- int startHour = controller().system().isCd() ? 2 : 7;
- return hourOfDay >= startHour && hourOfDay <= 12 && dayOfWeek < 5;
- }
-
private Release releaseIn(CloudName cloud) {
boolean useTaggedRelease = controller().zoneRegistry().zones().all().reprovisionToUpgradeOs().in(cloud)
- .zones().isEmpty();
+ .zones().isEmpty();
if (useTaggedRelease) {
return new TaggedRelease(controller().system(), controller().serviceRegistry().artifactRepository());
}
return new CalendarVersionedRelease(controller().system());
}
- private interface Release {
+ private static boolean canTriggerAt(Instant instant, boolean isCd) {
+ ZonedDateTime dateTime = instant.atZone(ZoneOffset.UTC);
+ int hourOfDay = dateTime.getHour();
+ int dayOfWeek = dateTime.getDayOfWeek().getValue();
+ // Upgrade can only be scheduled between 07:00 (02:00 in CD systems) and 12:59 UTC, Monday-Thursday
+ int startHour = isCd ? 2 : 7;
+ return hourOfDay >= startHour && hourOfDay <= 12 && dayOfWeek < 5;
+ }
- /** The version number of this */
- Version version(OsVersionTarget currentTarget, Instant now);
+ /** Returns the earliest time an upgrade can be scheduled on the day of instant, in given system */
+ private static Instant schedulingInstant(Instant instant, SystemName system) {
+ instant = instant.truncatedTo(ChronoUnit.DAYS);
+ while (!canTriggerAt(instant, system.isCd())) {
+ instant = instant.plus(Duration.ofHours(1));
+ }
+ return instant;
+ }
- /** The budget to use when upgrading to this */
- Duration upgradeBudget();
+ private interface Release {
+
+ /** The pending change for this release at given instant, if any */
+ Optional<Change> change(Version currentVersion, Instant instant);
}
- /** OS release based on a tag */
- private static class TaggedRelease implements Release {
+ /** OS version change, its budget and the earliest time it can be scheduled */
+ public record Change(Version version, Duration upgradeBudget, Instant scheduleAt) {
- private final SystemName system;
- private final ArtifactRepository artifactRepository;
+ public Change {
+ Objects.requireNonNull(version);
+ Objects.requireNonNull(upgradeBudget);
+ Objects.requireNonNull(scheduleAt);
+ }
- private TaggedRelease(SystemName system, ArtifactRepository artifactRepository) {
- this.system = Objects.requireNonNull(system);
- this.artifactRepository = Objects.requireNonNull(artifactRepository);
+ /** Returns whether this can be scheduled at given instant */
+ public boolean scheduleAt(Instant instant) {
+ return !instant.isBefore(scheduleAt);
}
- @Override
- public Version version(OsVersionTarget currentTarget, Instant now) {
- OsRelease release = artifactRepository.osRelease(currentTarget.osVersion().version().getMajor(), tag());
- boolean cooldownPassed = !release.taggedAt().plus(cooldown()).isAfter(now);
- return cooldownPassed ? release.version() : currentTarget.osVersion().version();
+ }
+
+ /** OS release based on a tag */
+ private record TaggedRelease(SystemName system, ArtifactRepository artifactRepository) implements Release {
+
+ public TaggedRelease {
+ Objects.requireNonNull(system);
+ Objects.requireNonNull(artifactRepository);
}
- @Override
- public Duration upgradeBudget() {
- return Duration.ZERO; // Upgrades to tagged releases happen in-place so no budget is required
+ public Optional<Change> change(Version currentVersion, Instant instant) {
+ OsRelease release = artifactRepository.osRelease(currentVersion.getMajor(), tag());
+ if (!release.version().isAfter(currentVersion)) return Optional.empty();
+ Instant scheduleAt = schedulingInstant(release.taggedAt().plus(cooldown()), system);
+ return Optional.of(new Change(release.version(), Duration.ZERO, scheduleAt));
}
/** Returns the release tag tracked by this system */
@@ -119,48 +139,65 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
/** OS release based on calendar-versioning */
- private static class CalendarVersionedRelease implements Release {
+ record CalendarVersionedRelease(SystemName system) implements Release {
- /** The time to wait before scheduling upgrade to next version */
- private static final Duration SCHEDULING_INTERVAL = Duration.ofDays(45);
+ /** A fixed point in time which the release schedule is calculated from */
+ private static final Instant START_OF_SCHEDULE = LocalDate.of(2022, 1, 1)
+ .atStartOfDay()
+ .toInstant(ZoneOffset.UTC);
- /**
- * The interval at which new versions become available. We use this to avoid scheduling upgrades to a version
- * that has not been released yet. Example: Version N is the latest one and target is set to N+1. If N+1 does
- * not exist the zone will not converge until N+1 has been released and we may end up triggering multiple
- * rounds of upgrades.
- */
- private static final Duration AVAILABILITY_INTERVAL = Duration.ofDays(7);
+ /** The time that should elapse between versions */
+ private static final Duration SCHEDULING_STEP = Duration.ofDays(60);
- private static final DateTimeFormatter CALENDAR_VERSION_PATTERN = DateTimeFormatter.ofPattern("yyyyMMdd");
+ /** The day of week new releases are published */
+ private static final DayOfWeek RELEASE_DAY = DayOfWeek.MONDAY;
- private final SystemName system;
+ private static final DateTimeFormatter CALENDAR_VERSION_PATTERN = DateTimeFormatter.ofPattern("yyyyMMdd");
- public CalendarVersionedRelease(SystemName system) {
- this.system = Objects.requireNonNull(system);
+ public CalendarVersionedRelease {
+ Objects.requireNonNull(system);
}
@Override
- public Version version(OsVersionTarget currentTarget, Instant now) {
- Instant scheduledAt = currentTarget.scheduledAt();
- Version currentVersion = currentTarget.osVersion().version();
- if (scheduledAt.isBefore(now.minus(SCHEDULING_INTERVAL))) {
- String calendarVersion = now.minus(AVAILABILITY_INTERVAL)
- .atZone(ZoneOffset.UTC)
- .format(CALENDAR_VERSION_PATTERN);
- return new Version(currentVersion.getMajor(),
- currentVersion.getMinor(),
- currentVersion.getMicro(),
- calendarVersion);
+ public Optional<Change> change(Version currentVersion, Instant instant) {
+ Version wantedVersion = asVersion(dateOfWantedVersion(instant), currentVersion);
+ while (!wantedVersion.isAfter(currentVersion)) {
+ wantedVersion = asVersion(dateOfWantedVersion(instant), currentVersion);
+ instant = instant.plus(Duration.ofDays(1));
}
- return currentVersion; // New version should not be scheduled yet
+ return Optional.of(new Change(wantedVersion, upgradeBudget(), schedulingInstant(instant, system)));
}
- @Override
- public Duration upgradeBudget() {
+ private Duration upgradeBudget() {
return system.isCd() ? Duration.ZERO : Duration.ofDays(14);
}
+ /**
+ * Calculate the date of the wanted version relative to now. A given zone will choose the oldest release
+ * available which is not older than this date.
+ */
+ static LocalDate dateOfWantedVersion(Instant now) {
+ Instant candidate = START_OF_SCHEDULE;
+ while (!candidate.plus(SCHEDULING_STEP).isAfter(now)) {
+ candidate = candidate.plus(SCHEDULING_STEP);
+ }
+ LocalDate date = LocalDate.ofInstant(candidate, ZoneOffset.UTC);
+ return releaseDayOf(date);
+ }
+
+ private static LocalDate releaseDayOf(LocalDate date) {
+ int releaseDayDelta = RELEASE_DAY.getValue() - date.getDayOfWeek().getValue();
+ return date.plusDays(releaseDayDelta);
+ }
+
+ private static Version asVersion(LocalDate dateOfVersion, Version currentVersion) {
+ String calendarVersion = dateOfVersion.format(CALENDAR_VERSION_PATTERN);
+ return new Version(currentVersion.getMajor(),
+ currentVersion.getMinor(),
+ currentVersion.getMicro(),
+ calendarVersion);
+ }
+
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
index fa64a2677f4..f4dcf7f6088 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.component.Version;
import com.yahoo.config.provision.CloudName;
+import com.yahoo.config.provision.zone.NodeSlice;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.text.Text;
import com.yahoo.vespa.hosted.controller.Controller;
@@ -54,15 +55,16 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> {
}
@Override
- protected boolean convergedOn(OsVersionTarget target, SystemApplication application, ZoneApi zone) {
- return !currentVersion(zone, application, target.osVersion().version()).isBefore(target.osVersion().version());
+ protected boolean convergedOn(OsVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) {
+ Version currentVersion = versionOf(nodeSlice, zone, application, Node::currentOsVersion).orElse(target.osVersion().version());
+ return !currentVersion.isBefore(target.osVersion().version());
}
@Override
protected boolean expectUpgradeOf(Node node, SystemApplication application, ZoneApi zone) {
return cloud.equals(zone.getCloudName()) && // Cloud is managed by this upgrader
application.shouldUpgradeOs() && // Application should upgrade in this cloud
- canUpgrade(node); // Node is in an upgradable state
+ canUpgrade(node, false);
}
@Override
@@ -83,29 +85,23 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> {
.orElse(true);
}
- private Version currentVersion(ZoneApi zone, SystemApplication application, Version defaultVersion) {
- return minVersion(zone, application, Node::currentOsVersion).orElse(defaultVersion);
- }
-
/** Returns the available upgrade budget for given zone */
private Duration zoneBudgetOf(Duration totalBudget, ZoneApi zone) {
if (!spendBudgetOn(zone)) return Duration.ZERO;
long consecutiveZones = upgradePolicy.steps().stream()
- .filter(parallelZones -> parallelZones.stream().anyMatch(this::spendBudgetOn))
+ .filter(step -> step.zones().stream().anyMatch(this::spendBudgetOn))
.count();
return totalBudget.dividedBy(consecutiveZones);
}
/** Returns whether to spend upgrade budget on given zone */
private boolean spendBudgetOn(ZoneApi zone) {
- if (!zone.getEnvironment().isProduction()) return false;
- if (controller().zoneRegistry().systemZone().getVirtualId().equals(zone.getVirtualId())) return false; // Controller zone
- return true;
+ return !controller().zoneRegistry().systemZone().getVirtualId().equals(zone.getVirtualId()); // Do not spend budget on controller zone
}
- /** Returns whether node is in a state where it can be upgraded */
- public static boolean canUpgrade(Node node) {
- return upgradableNodeStates.contains(node.state());
+ /** Returns whether node currently allows upgrades */
+ public static boolean canUpgrade(Node node, boolean includeDeferring) {
+ return (includeDeferring || !node.deferOsUpgrade()) && upgradableNodeStates.contains(node.state());
}
private static String name(CloudName cloud) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
index 892ad669e4b..205fb7e0e79 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
@@ -100,7 +100,7 @@ public class ResourceMeterMaintainer extends ControllerMaintainer {
}
if (systemName.isPublic()) reportResourceSnapshots(resourceSnapshots);
- if (systemName.isPublic() && systemName.isCd()) reportAllScalingEvents();
+ if (systemName.isPublic()) reportAllScalingEvents();
updateDeploymentCost(resourceSnapshots);
return 1.0;
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java
index 8d5851be62f..8e74ef9a983 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.component.Version;
+import com.yahoo.config.provision.zone.NodeSlice;
import com.yahoo.config.provision.zone.RoutingMethod;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.text.Text;
@@ -39,12 +40,12 @@ public class SystemUpgrader extends InfrastructureUpgrader<VespaVersionTarget> {
}
@Override
- protected boolean convergedOn(VespaVersionTarget target, SystemApplication application, ZoneApi zone) {
- Optional<Version> minVersion = minVersion(zone, application, Node::currentVersion);
+ protected boolean convergedOn(VespaVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) {
+ Optional<Version> currentVersion = versionOf(nodeSlice, zone, application, Node::currentVersion);
// Skip application convergence check if there are no nodes belonging to the application in the zone
- if (minVersion.isEmpty()) return true;
+ if (currentVersion.isEmpty()) return true;
- return minVersion.get().equals(target.version()) &&
+ return currentVersion.get().equals(target.version()) &&
application.configConvergedIn(zone.getId(), controller(), Optional.of(target.version()));
}
@@ -79,10 +80,9 @@ public class SystemUpgrader extends InfrastructureUpgrader<VespaVersionTarget> {
// the wanted version of each node.
boolean zoneHasSharedRouting = controller().zoneRegistry().routingMethods(zone.getId()).stream()
.anyMatch(RoutingMethod::isShared);
- return minVersion(zone, application, Node::wantedVersion)
+ return versionOf(NodeSlice.ALL, zone, application, Node::wantedVersion)
.map(wantedVersion -> !wantedVersion.equals(target.version()))
.orElse(zoneHasSharedRouting); // Always upgrade if zone uses shared routing, but has no nodes allocated yet
-
}
return controller().serviceRegistry().configServer().nodeRepository()
.targetVersionsOf(zone.getId())
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java
index dad836ca2de..820c67f2d44 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java
@@ -33,21 +33,15 @@ public class TenantRoleMaintainer extends ControllerMaintainer {
.map(Tenant::name)
.collect(Collectors.toList());
roleService.maintainRoles(tenantsWithRoles);
+
+ var deletedTenants = controller().tenants().asList(true).stream()
+ .filter(tenant -> tenant.type() == Tenant.Type.deleted)
+ .map(Tenant::name)
+ .toList();
+ roleService.cleanupRoles(deletedTenants);
+
return 1.0;
}
- private boolean hasProductionDeployment(TenantName tenant) {
- return controller().applications().asList(tenant).stream()
- .map(Application::productionInstances)
- .anyMatch(Predicate.not(Map::isEmpty));
- }
- private boolean hasPerfDeployment(TenantName tenant) {
- List<ZoneId> perfZones = controller().zoneRegistry().zones().controllerUpgraded().in(Environment.perf).ids();
- return controller().applications().asList(tenant).stream()
- .map(Application::instances)
- .flatMap(instances -> instances.values().stream())
- .flatMap(instance -> instance.deployments().values().stream())
- .anyMatch(x -> perfZones.contains(x.zone()));
- }
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
index d654f63fff2..1932dc65657 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
@@ -5,24 +5,27 @@ import com.yahoo.component.Version;
import com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.transaction.Mutex;
-import com.yahoo.vespa.curator.Lock;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.application.ApplicationList;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.InstanceList;
+import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatusList;
+import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger;
+import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger.ChangesToCancel;
import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
import com.yahoo.vespa.hosted.controller.versions.VersionStatus;
import com.yahoo.vespa.hosted.controller.versions.VespaVersion;
import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence;
import java.time.Duration;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.Optional;
import java.util.OptionalInt;
import java.util.Random;
+import java.util.Set;
import java.util.function.UnaryOperator;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -58,18 +61,22 @@ public class Upgrader extends ControllerMaintainer {
cancelBrokenUpgrades(versionStatus);
OptionalInt targetMajorVersion = targetMajorVersion();
- InstanceList instances = instances(versionStatus);
+ DeploymentStatusList deploymentStatuses = deploymentStatuses(versionStatus);
for (UpgradePolicy policy : UpgradePolicy.values())
- updateTargets(versionStatus, instances, policy, targetMajorVersion);
+ updateTargets(versionStatus, deploymentStatuses, policy, targetMajorVersion);
return 1.0;
}
+ private DeploymentStatusList deploymentStatuses(VersionStatus versionStatus) {
+ return controller().jobController().deploymentStatuses(ApplicationList.from(controller().applications().readable())
+ .withProjectId(),
+ versionStatus);
+ }
+
/** Returns a list of all production application instances, except those which are pinned, which we should not manipulate here. */
- private InstanceList instances(VersionStatus versionStatus) {
- return InstanceList.from(controller().jobController().deploymentStatuses(ApplicationList.from(controller().applications().readable())
- .withProjectId(),
- versionStatus))
+ private InstanceList instances(DeploymentStatusList deploymentStatuses) {
+ return InstanceList.from(deploymentStatuses)
.withDeclaredJobs()
.shuffle(random)
.byIncreasingDeployedVersion()
@@ -78,7 +85,7 @@ public class Upgrader extends ControllerMaintainer {
private void cancelBrokenUpgrades(VersionStatus versionStatus) {
// Cancel upgrades to broken targets (let other ongoing upgrades complete to avoid starvation)
- InstanceList instances = instances(controller().readVersionStatus());
+ InstanceList instances = instances(deploymentStatuses(controller().readVersionStatus()));
for (VespaVersion version : versionStatus.versions()) {
if (version.confidence() == Confidence.broken)
cancelUpgradesOf(instances.upgradingTo(version.versionNumber()).not().with(UpgradePolicy.canary),
@@ -86,8 +93,12 @@ public class Upgrader extends ControllerMaintainer {
}
}
- private void updateTargets(VersionStatus versionStatus, InstanceList instances, UpgradePolicy policy, OptionalInt targetMajorVersion) {
+ private void updateTargets(VersionStatus versionStatus, DeploymentStatusList deploymentStatuses, UpgradePolicy policy, OptionalInt targetMajorVersion) {
+ InstanceList instances = instances(deploymentStatuses);
InstanceList remaining = instances.with(policy);
+ Instant failureThreshold = controller().clock().instant().minus(DeploymentTrigger.maxFailingRevisionTime);
+ Set<ApplicationId> failingRevision = InstanceList.from(deploymentStatuses.failingApplicationChangeSince(failureThreshold)).asSet();
+
List<Version> targetAndNewer = new ArrayList<>();
UnaryOperator<InstanceList> cancellationCriterion = policy == UpgradePolicy.canary ? i -> i.not().upgradingTo(targetAndNewer)
: i -> i.failing()
@@ -103,13 +114,16 @@ public class Upgrader extends ControllerMaintainer {
// Prefer the newest target for each instance.
remaining = remaining.not().matching(eligible.asList()::contains)
.not().hasCompleted(Change.of(version));
- for (ApplicationId id : outdated.and(eligible.not().upgrading()).not().changingRevision())
+ for (ApplicationId id : outdated.and(eligible.not().upgrading()))
targets.put(id, version);
}
int numberToUpgrade = policy == UpgradePolicy.canary ? instances.size() : numberOfApplicationsToUpgrade();
for (ApplicationId id : instances.matching(targets.keySet()::contains).first(numberToUpgrade)) {
log.log(Level.INFO, "Triggering upgrade to " + targets.get(id) + " for " + id);
+ if (failingRevision.contains(id))
+ controller().applications().deploymentTrigger().cancelChange(id, ChangesToCancel.APPLICATION);
+
controller().applications().deploymentTrigger().triggerChange(id, Change.of(targets.get(id)));
}
}
@@ -167,7 +181,7 @@ public class Upgrader extends ControllerMaintainer {
}
/** Sets the default target major version. Set to empty to determine target version normally (by confidence) */
- public void setTargetMajorVersion(Optional<Integer> targetMajorVersion) {
+ public void setTargetMajorVersion(OptionalInt targetMajorVersion) {
controller().applications().setTargetMajorVersion(targetMajorVersion);
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java
index 551f803f368..daba7e74f34 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java
@@ -5,6 +5,7 @@ import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.zone.ZoneId;
+import com.yahoo.jdisc.Metric;
import com.yahoo.text.Text;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
@@ -46,26 +47,28 @@ public class VcmrMaintainer extends ControllerMaintainer {
private static final Logger LOG = Logger.getLogger(VcmrMaintainer.class.getName());
private static final int DAYS_TO_RETIRE = 2;
private static final Duration ALLOWED_POSTPONEMENT_TIME = Duration.ofDays(7);
+ protected static final String TRACKED_CMRS_METRIC = "cmr.tracked";
private final CuratorDb curator;
private final NodeRepository nodeRepository;
private final ChangeRequestClient changeRequestClient;
private final SystemName system;
+ private final Metric metric;
- public VcmrMaintainer(Controller controller, Duration interval) {
+ public VcmrMaintainer(Controller controller, Duration interval, Metric metric) {
super(controller, interval, null, SystemName.allOf(Predicate.not(SystemName::isPublic)));
this.curator = controller.curator();
this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository();
this.changeRequestClient = controller.serviceRegistry().changeRequestClient();
this.system = controller.system();
+ this.metric = metric;
}
@Override
protected double maintain() {
var changeRequests = curator.readChangeRequests()
.stream()
- .filter(shouldUpdate())
- .collect(Collectors.toList());
+ .filter(shouldUpdate()).toList();
var nodesByZone = nodesByZone();
@@ -86,6 +89,7 @@ public class VcmrMaintainer extends ControllerMaintainer {
});
}
});
+ updateMetrics();
return 1.0;
}
@@ -357,4 +361,15 @@ public class VcmrMaintainer extends ControllerMaintainer {
return time;
}
+ private void updateMetrics() {
+ var cmrsByStatus = curator.readChangeRequests()
+ .stream()
+ .collect(Collectors.groupingBy(VespaChangeRequest::getStatus));
+
+ for (var status : Status.values()) {
+ var count = cmrsByStatus.getOrDefault(status, List.of()).size();
+ metric.set(TRACKED_CMRS_METRIC, count, metric.createContext(Map.of("status", status.name())));
+ }
+ }
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java
index 49c819548fe..f2c9d55b2a2 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java
@@ -29,6 +29,16 @@ import java.util.stream.Collectors;
* @author enygaard
*/
public class Notifier {
+ private static final String header = """
+ <div style="background: #00598c; height: 55px; width: 100%">
+ <img
+ src="https://vespa.ai/assets/vespa-logo.png"
+ style="width: auto; height: 34px; margin: 10px"
+ />
+ </div>
+ <br>
+ """;
+
private final CuratorDb curatorDb;
private final Mailer mailer;
private final FlagSource flagSource;
@@ -111,14 +121,15 @@ public class Notifier {
public Mail mailOf(FormattedNotification content, Collection<String> recipients) {
var notification = content.notification();
var subject = Text.format("[%s] %s Vespa Notification for %s", notification.level().toString().toUpperCase(), content.prettyType(), applicationIdSource(notification.source()));
- var body = new StringBuilder();
- body.append(content.messagePrefix()).append("\n")
+ String body = new StringBuilder()
+ .append(content.messagePrefix()).append("\n")
.append(notification.messages().stream().map(m -> " * " + m).collect(Collectors.joining("\n"))).append("\n")
.append("\n")
.append("Vespa Console link:\n")
- .append(content.uri().toString());
- var html = new StringBuilder();
- html.append(content.messagePrefix()).append("<br>\n")
+ .append(content.uri().toString()).toString();
+ String html = new StringBuilder()
+ .append(header)
+ .append(content.messagePrefix()).append("<br>\n")
.append("<ul>\n")
.append(notification.messages().stream()
.map(Notifier::linkify)
@@ -126,8 +137,8 @@ public class Notifier {
.collect(Collectors.joining("<br>\n")))
.append("</ul>\n")
.append("<br>\n")
- .append("<a href=\"" + content.uri() + "\">Vespa Console</a>");
- return new Mail(recipients, subject, body.toString(), html.toString());
+ .append("<a href=\"" + content.uri() + "\">Vespa Console</a>").toString();
+ return new Mail(recipients, subject, body, html);
}
@VisibleForTesting
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java
index 9721026c628..ecb9db8195f 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java
@@ -49,7 +49,7 @@ public class BufferedLogStore {
}
/** Appends to the log of the given, active run, reassigning IDs as counted here, and converting to Vespa log levels. */
- public void append(ApplicationId id, JobType type, Step step, List<LogEntry> entries) {
+ public void append(ApplicationId id, JobType type, Step step, List<LogEntry> entries, boolean forceLog) {
if (entries.isEmpty())
return;
@@ -58,7 +58,7 @@ public class BufferedLogStore {
long lastEntryId = buffer.readLastLogEntryId(id, type).orElse(-1L);
long lastChunkId = buffer.getLogChunkIds(id, type).max().orElse(0);
long numberOfChunks = Math.max(1, buffer.getLogChunkIds(id, type).count());
- if (numberOfChunks > maxLogSize / chunkSize)
+ if (numberOfChunks > maxLogSize / chunkSize && ! forceLog)
return; // Max size exceeded — store no more.
byte[] emptyChunk = "[]".getBytes();
@@ -72,8 +72,12 @@ public class BufferedLogStore {
buffer.writeLastLogEntryId(id, type, lastEntryId);
buffer.writeLog(id, type, lastChunkId, logSerializer.toJson(log));
lastChunkId = lastEntryId + 1;
- if (++numberOfChunks > maxLogSize / chunkSize) {
- log = Map.of(step, List.of(new LogEntry(++lastEntryId, entry.at(), LogEntry.Type.warning, "Max log size of " + (maxLogSize >> 20) + "Mb exceeded; further entries are discarded.")));
+ if (++numberOfChunks > maxLogSize / chunkSize && ! forceLog) {
+ log = Map.of(step, List.of(new LogEntry(++lastEntryId,
+ entry.at(),
+ LogEntry.Type.warning,
+ "Max log size of " + (maxLogSize >> 20) +
+ "Mb exceeded; further user entries are discarded.")));
break;
}
log = new HashMap<>();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
index f02f49e7114..54e98877ba3 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
@@ -1,9 +1,9 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.persistence;
-import com.yahoo.component.annotation.Inject;
import com.yahoo.collections.Pair;
import com.yahoo.component.Version;
+import com.yahoo.component.annotation.Inject;
import com.yahoo.concurrent.UncheckedTimeoutException;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.HostName;
@@ -41,6 +41,7 @@ import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus;
import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
import com.yahoo.vespa.hosted.controller.versions.VersionStatus;
import com.yahoo.vespa.hosted.controller.versions.VespaVersion;
+
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
@@ -53,6 +54,7 @@ import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Optional;
+import java.util.OptionalInt;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeoutException;
@@ -271,9 +273,9 @@ public class CuratorDb {
return read(targetMajorVersionPath(), ByteBuffer::wrap).map(ByteBuffer::getInt);
}
- public void writeTargetMajorVersion(Optional<Integer> targetMajorVersion) {
+ public void writeTargetMajorVersion(OptionalInt targetMajorVersion) {
if (targetMajorVersion.isPresent())
- curator.set(targetMajorVersionPath(), ByteBuffer.allocate(Integer.BYTES).putInt(targetMajorVersion.get()).array());
+ curator.set(targetMajorVersionPath(), ByteBuffer.allocate(Integer.BYTES).putInt(targetMajorVersion.getAsInt()).array());
else
curator.delete(targetMajorVersionPath());
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/TenantSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/TenantSerializer.java
index e7cf0c34511..e91fbe8b1b7 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/TenantSerializer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/TenantSerializer.java
@@ -81,6 +81,7 @@ public class TenantSerializer {
private static final String archiveAccessField = "archiveAccess";
private static final String awsArchiveAccessRoleField = "awsArchiveAccessRole";
private static final String gcpArchiveAccessMemberField = "gcpArchiveAccessMember";
+ private static final String invalidateUserSessionsBeforeField = "invalidateUserSessionsBefore";
private static final String awsIdField = "awsId";
private static final String roleField = "role";
@@ -123,6 +124,7 @@ public class TenantSerializer {
toSlime(tenant.info(), root);
toSlime(tenant.tenantSecretStores(), root);
toSlime(tenant.archiveAccess(), root);
+ tenant.invalidateUserSessionsBefore().ifPresent(instant -> root.setLong(invalidateUserSessionsBeforeField, instant.toEpochMilli()));
}
private void toSlime(ArchiveAccess archiveAccess, Cursor root) {
@@ -187,7 +189,8 @@ public class TenantSerializer {
TenantInfo info = tenantInfoFromSlime(tenantObject.field(tenantInfoField));
List<TenantSecretStore> tenantSecretStores = secretStoresFromSlime(tenantObject.field(secretStoresField));
ArchiveAccess archiveAccess = archiveAccessFromSlime(tenantObject);
- return new CloudTenant(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess);
+ Optional<Instant> invalidateUserSessionsBefore = SlimeUtils.optionalInstant(tenantObject.field(invalidateUserSessionsBeforeField));
+ return new CloudTenant(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore);
}
private DeletedTenant deletedTenantFrom(Inspector tenantObject) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
index 670cb775c69..8d2fac84bc0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
@@ -10,8 +10,8 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSet;
-import com.yahoo.component.annotation.Inject;
import com.yahoo.component.Version;
+import com.yahoo.component.annotation.Inject;
import com.yahoo.config.application.api.DeploymentInstanceSpec;
import com.yahoo.config.application.api.DeploymentSpec;
import com.yahoo.config.provision.ApplicationId;
@@ -75,6 +75,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision;
import com.yahoo.vespa.hosted.controller.api.integration.noderepository.RestartFilter;
import com.yahoo.vespa.hosted.controller.api.integration.secrets.TenantSecretStore;
+import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneRegistry;
import com.yahoo.vespa.hosted.controller.api.role.Role;
import com.yahoo.vespa.hosted.controller.api.role.RoleDefinition;
import com.yahoo.vespa.hosted.controller.api.role.SecurityContext;
@@ -137,8 +138,6 @@ import java.security.PublicKey;
import java.time.DayOfWeek;
import java.time.Duration;
import java.time.Instant;
-import java.time.LocalDateTime;
-import java.time.ZoneOffset;
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.Base64;
@@ -955,19 +954,24 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler {
private HttpResponse applicationPackage(String tenantName, String applicationName, HttpRequest request) {
TenantAndApplicationId tenantAndApplication = TenantAndApplicationId.from(tenantName, applicationName);
- long build;
- String parameter = request.getProperty("build");
- if (parameter != null)
- try {
- build = Validation.requireAtLeast(Long.parseLong(request.getProperty("build")), "build number", 1L);
- }
- catch (NumberFormatException e) {
- throw new IllegalArgumentException("invalid value for request parameter 'build'", e);
- }
- else {
+ final long build;
+ String requestedBuild = request.getProperty("build");
+ if (requestedBuild != null) {
+ if (requestedBuild.equals("latestDeployed")) {
+ build = controller.applications().requireApplication(tenantAndApplication).latestDeployedRevision()
+ .map(RevisionId::number)
+ .orElseThrow(() -> new NotExistsException("no application package has been deployed in production for " + tenantAndApplication));
+ } else {
+ try {
+ build = Validation.requireAtLeast(Long.parseLong(request.getProperty("build")), "build number", 1L);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("invalid value for request parameter 'build'", e);
+ }
+ }
+ } else {
build = controller.applications().requireApplication(tenantAndApplication).revisions().last()
- .map(version -> version.id().number())
- .orElseThrow(() -> new NotExistsException("no application package has been submitted for " + tenantAndApplication));
+ .map(version -> version.id().number())
+ .orElseThrow(() -> new NotExistsException("no application package has been submitted for " + tenantAndApplication));
}
RevisionId revision = RevisionId.forProduction(build);
boolean tests = request.getBooleanProperty("tests");
@@ -1470,6 +1474,15 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler {
private HttpResponse trigger(ApplicationId id, JobType type, HttpRequest request) {
+ // JobType.fromJobName doesn't properly initiate test jobs. Triggering these without context isn't _really_
+ // necessary, but triggering a test in the default cloud is better than failing with a weird error.
+ ZoneRegistry zones = controller.zoneRegistry();
+ type = switch (type.environment()) {
+ case test -> JobType.systemTest(zones, zones.systemZone().getCloudName());
+ case staging -> JobType.stagingTest(zones, zones.systemZone().getCloudName());
+ default -> type;
+ };
+
Inspector requestObject = toSlime(request.getData()).get();
boolean requireTests = ! requestObject.field("skipTests").asBool();
boolean reTrigger = requestObject.field("reTrigger").asBool();
@@ -1831,7 +1844,8 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler {
response.setString("status", "complete");
else if (stepStatus.readyAt(instance.change()).map(controller.clock().instant()::isBefore).orElse(true))
response.setString("status", "pending");
- else response.setString("status", "running");
+ else
+ response.setString("status", "running");
});
} else {
var deploymentRun = controller.jobController().last(deploymentId.applicationId(), JobType.deploymentTo(deploymentId.zoneId()));
@@ -2544,12 +2558,11 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler {
cloudTenant.tenantSecretStores());
try {
- var tenantQuota = controller.serviceRegistry().billingController().getQuota(tenant.name());
var usedQuota = applications.stream()
.map(Application::quotaUsage)
.reduce(QuotaUsage.none, QuotaUsage::add);
- toSlime(tenantQuota, usedQuota, object.setObject("quota"));
+ toSlime(object.setObject("quota"), usedQuota);
} catch (Exception e) {
log.warning(String.format("Failed to get quota for tenant %s: %s", tenant.name(), Exceptions.toMessageString(e)));
}
@@ -2592,15 +2605,8 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler {
archiveAccess.gcpMember().ifPresent(member -> object.setString("gcpMember", member));
}
- private void toSlime(Quota quota, QuotaUsage usage, Cursor object) {
- quota.budget().ifPresentOrElse(
- budget -> object.setDouble("budget", budget.doubleValue()),
- () -> object.setNix("budget")
- );
+ private void toSlime(Cursor object, QuotaUsage usage) {
object.setDouble("budgetUsed", usage.rate());
-
- // TODO: Retire when we no longer use maxClusterSize as a meaningful limit
- quota.maxClusterSize().ifPresent(maxClusterSize -> object.setLong("clusterSize", maxClusterSize));
}
private void toSlime(ClusterResources resources, Cursor object) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
index e28bf89e734..25953c16bf0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
@@ -115,6 +115,7 @@ class JobControllerApiHandlerHelper {
Run run = jobController.run(runId);
detailsObject.setBool("active", ! run.hasEnded());
detailsObject.setString("status", nameOf(run.status()));
+ run.reason().ifPresent(reason -> detailsObject.setString("reason", reason));
try {
jobController.updateTestLog(runId);
jobController.updateVespaLog(runId);
@@ -325,7 +326,7 @@ class JobControllerApiHandlerHelper {
"/job/" + job.type().jobName()).normalize();
stepObject.setString("url", baseUriForJob.toString());
stepObject.setString("environment", job.type().environment().value());
- stepObject.setString("region", job.type().zone().value());
+ if ( ! job.type().environment().isTest()) stepObject.setString("region", job.type().zone().value());
if (job.type().isProduction() && job.type().isDeployment()) {
status.deploymentFor(job).ifPresent(deployment -> {
@@ -421,6 +422,7 @@ class JobControllerApiHandlerHelper {
runObject.setLong("start", run.start().toEpochMilli());
run.end().ifPresent(end -> runObject.setLong("end", end.toEpochMilli()));
runObject.setString("status", run.status().name());
+ run.reason().ifPresent(reason -> runObject.setString("reason", reason));
toSlime(runObject.setObject("versions"), run.versions(), application);
Cursor runStepsArray = runObject.setArray("steps");
run.steps().forEach((step, info) -> {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/billing/BillingApiHandlerV2.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/billing/BillingApiHandlerV2.java
index 44a8b636ae0..4532e0c2c18 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/billing/BillingApiHandlerV2.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/billing/BillingApiHandlerV2.java
@@ -14,6 +14,7 @@ import com.yahoo.slime.Cursor;
import com.yahoo.slime.Inspector;
import com.yahoo.slime.Slime;
import com.yahoo.slime.Type;
+import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.ApplicationController;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.TenantController;
@@ -23,8 +24,10 @@ import com.yahoo.vespa.hosted.controller.api.integration.billing.CollectionMetho
import com.yahoo.vespa.hosted.controller.api.integration.billing.Plan;
import com.yahoo.vespa.hosted.controller.api.integration.billing.PlanId;
import com.yahoo.vespa.hosted.controller.api.integration.billing.PlanRegistry;
+import com.yahoo.vespa.hosted.controller.api.integration.billing.Quota;
import com.yahoo.vespa.hosted.controller.api.role.Role;
import com.yahoo.vespa.hosted.controller.api.role.SecurityContext;
+import com.yahoo.vespa.hosted.controller.application.QuotaUsage;
import com.yahoo.vespa.hosted.controller.tenant.CloudTenant;
import com.yahoo.vespa.hosted.controller.tenant.Tenant;
@@ -200,11 +203,13 @@ public class BillingApiHandlerV2 extends RestApiRequestHandler<BillingApiHandler
var response = new Slime();
var tenantsResponse = response.setObject().setArray("tenants");
+
tenants.asList().stream().sorted(Comparator.comparing(Tenant::name)).forEach(tenant -> {
var usage = Optional.ofNullable(usagePerTenant.get(tenant.name()));
var tenantResponse = tenantsResponse.addObject();
tenantResponse.setString("tenant", tenant.name().value());
toSlime(tenantResponse.setObject("plan"), planFor(tenant.name()));
+ toSlime(tenantResponse.setObject("quota"), billing.getQuota(tenant.name()));
tenantResponse.setString("collection", billing.getCollectionMethod(tenant.name()).name());
tenantResponse.setString("lastBill", usage.map(Bill::getStartDate).map(DateTimeFormatter.ISO_DATE::format).orElse(null));
tenantResponse.setString("unbilled", usage.map(Bill::sum).map(BigDecimal::toPlainString).orElse("0.00"));
@@ -357,6 +362,10 @@ public class BillingApiHandlerV2 extends RestApiRequestHandler<BillingApiHandler
cursor.setString("name", plan.displayName());
}
+ private void toSlime(Cursor cursor, Quota quota) {
+ cursor.setDouble("budget", quota.budget().map(BigDecimal::doubleValue).orElse(-1.0));
+ }
+
private Plan planFor(TenantName tenant) {
var planId = billing.getPlan(tenant);
return planRegistry.plan(planId)
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java
index 25ac90ac0ea..776fcbfd03b 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java
@@ -35,6 +35,7 @@ import java.security.Principal;
import java.security.cert.X509Certificate;
import java.time.Instant;
import java.util.Optional;
+import java.util.OptionalInt;
import java.util.Scanner;
import java.util.function.Function;
import java.util.logging.Level;
@@ -60,13 +61,13 @@ public class ControllerApiHandler extends AuditLoggingRequestHandler {
@Override
public HttpResponse auditAndHandle(HttpRequest request) {
try {
- switch (request.getMethod()) {
- case GET: return get(request);
- case POST: return post(request);
- case DELETE: return delete(request);
- case PATCH: return patch(request);
- default: return ErrorResponse.methodNotAllowed("Method '" + request.getMethod() + "' is not supported");
- }
+ return switch (request.getMethod()) {
+ case GET -> get(request);
+ case POST -> post(request);
+ case DELETE -> delete(request);
+ case PATCH -> patch(request);
+ default -> ErrorResponse.methodNotAllowed("Method '" + request.getMethod() + "' is not supported");
+ };
}
catch (IllegalArgumentException e) {
return ErrorResponse.badRequest(Exceptions.toMessageString(e));
@@ -165,8 +166,8 @@ public class ControllerApiHandler extends AuditLoggingRequestHandler {
if (inspect.field(upgradesPerMinuteField).valid()) {
upgrader.setUpgradesPerMinute(inspect.field(upgradesPerMinuteField).asDouble());
} else if (inspect.field(targetMajorVersionField).valid()) {
- int target = (int)inspect.field(targetMajorVersionField).asLong();
- upgrader.setTargetMajorVersion(Optional.ofNullable(target == 0 ? null : target)); // 0 is the default value
+ int target = (int) inspect.field(targetMajorVersionField).asLong();
+ upgrader.setTargetMajorVersion(target == 0 ? OptionalInt.empty() : OptionalInt.of(target)); // 0 is the default value
} else {
return ErrorResponse.badRequest("No such modifiable field(s)");
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/StatsResponse.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/StatsResponse.java
index 19f1ac5449f..96a3c9f177d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/StatsResponse.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/StatsResponse.java
@@ -30,6 +30,8 @@ public class StatsResponse extends SlimeJsonResponse {
if (stats.applicationStats().isEmpty()) continue; // skip empty zones
Cursor zoneObject = zonesArray.addObject();
zoneObject.setString("id", zone.toString());
+ zoneObject.setDouble("totalCost", stats.totalCost());
+ zoneObject.setDouble("totalAllocatedCost", stats.totalAllocatedCost());
toSlime(stats.load(), zoneObject.setObject("load"));
toSlime(stats.activeLoad(), zoneObject.setObject("activeLoad"));
Cursor applicationsArray = zoneObject.setArray("applications");
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
index 853739ee9c3..0e764b98514 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java
@@ -22,6 +22,9 @@ import com.yahoo.slime.SlimeUtils;
import com.yahoo.slime.Type;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.auditlog.AuditLoggingRequestHandler;
+import com.yahoo.vespa.hosted.controller.maintenance.ControllerMaintenance;
+import com.yahoo.vespa.hosted.controller.maintenance.OsUpgradeScheduler;
+import com.yahoo.vespa.hosted.controller.maintenance.OsUpgradeScheduler.Change;
import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
import com.yahoo.yolean.Exceptions;
@@ -47,22 +50,24 @@ import java.util.stream.Collectors;
public class OsApiHandler extends AuditLoggingRequestHandler {
private final Controller controller;
+ private final OsUpgradeScheduler osUpgradeScheduler;
- public OsApiHandler(Context ctx, Controller controller) {
+ public OsApiHandler(Context ctx, Controller controller, ControllerMaintenance controllerMaintenance) {
super(ctx, controller.auditLogger());
this.controller = controller;
+ this.osUpgradeScheduler = controllerMaintenance.osUpgradeScheduler();
}
@Override
public HttpResponse auditAndHandle(HttpRequest request) {
try {
- switch (request.getMethod()) {
- case GET: return get(request);
- case POST: return post(request);
- case DELETE: return delete(request);
- case PATCH: return patch(request);
- default: return ErrorResponse.methodNotAllowed("Method '" + request.getMethod() + "' is unsupported");
- }
+ return switch (request.getMethod()) {
+ case GET -> get(request);
+ case POST -> post(request);
+ case DELETE -> delete(request);
+ case PATCH -> patch(request);
+ default -> ErrorResponse.methodNotAllowed("Method '" + request.getMethod() + "' is unsupported");
+ };
} catch (IllegalArgumentException e) {
return ErrorResponse.badRequest(Exceptions.toMessageString(e));
} catch (RuntimeException e) {
@@ -159,8 +164,16 @@ public class OsApiHandler extends AuditLoggingRequestHandler {
currentVersionObject.setString("version", osVersion.version().toFullString());
Optional<OsVersionTarget> target = targets.stream().filter(t -> t.osVersion().equals(osVersion)).findFirst();
currentVersionObject.setBool("targetVersion", target.isPresent());
- target.ifPresent(t -> currentVersionObject.setString("upgradeBudget", t.upgradeBudget().toString()));
- target.ifPresent(t -> currentVersionObject.setLong("scheduledAt", t.scheduledAt().toEpochMilli()));
+ target.ifPresent(t -> {
+ currentVersionObject.setString("upgradeBudget", t.upgradeBudget().toString());
+ currentVersionObject.setLong("scheduledAt", t.scheduledAt().toEpochMilli());
+ Optional<Change> nextChange = osUpgradeScheduler.changeIn(t.osVersion().cloud());
+ nextChange.ifPresent(c -> {
+ currentVersionObject.setString("nextVersion", c.version().toFullString());
+ currentVersionObject.setLong("nextScheduledAt", c.scheduleAt().toEpochMilli());
+ });
+ });
+
currentVersionObject.setString("cloud", osVersion.cloud().value());
Cursor nodesArray = currentVersionObject.setArray("nodes");
nodeVersions.forEach(nodeVersion -> {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiHandler.java
index fce2d283da2..a407e5aa211 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiHandler.java
@@ -111,7 +111,6 @@ public class UserApiHandler extends ThreadedHttpRequestHandler {
private HttpResponse handlePOST(Path path, HttpRequest request) {
if (path.matches("/user/v1/tenant/{tenant}")) return addTenantRoleMember(path.get("tenant"), request);
- if (path.matches("/user/v1/tenant/{tenant}/application/{application}")) return addApplicationRoleMember(path.get("tenant"), path.get("application"), request);
return ErrorResponse.notFoundError(Text.format("No '%s' handler at '%s'", request.getMethod(),
request.getUri().getPath()));
@@ -119,7 +118,6 @@ public class UserApiHandler extends ThreadedHttpRequestHandler {
private HttpResponse handleDELETE(Path path, HttpRequest request) {
if (path.matches("/user/v1/tenant/{tenant}")) return removeTenantRoleMember(path.get("tenant"), request);
- if (path.matches("/user/v1/tenant/{tenant}/application/{application}")) return removeApplicationRoleMember(path.get("tenant"), path.get("application"), request);
return ErrorResponse.notFoundError(Text.format("No '%s' handler at '%s'", request.getMethod(),
request.getUri().getPath()));
@@ -255,21 +253,6 @@ public class UserApiHandler extends ThreadedHttpRequestHandler {
private HttpResponse addTenantRoleMember(String tenantName, HttpRequest request) {
Inspector requestObject = bodyInspector(request);
- if (requestObject.field("roles").valid()) {
- return addMultipleTenantRoleMembers(tenantName, requestObject);
- }
- return addTenantRoleMember(tenantName, requestObject);
- }
-
- private HttpResponse addTenantRoleMember(String tenantName, Inspector requestObject) {
- String roleName = require("roleName", Inspector::asString, requestObject);
- UserId user = new UserId(require("user", Inspector::asString, requestObject));
- Role role = Roles.toRole(TenantName.from(tenantName), roleName);
- users.addUsers(role, List.of(user));
- return new MessageResponse(user + " is now a member of " + role);
- }
-
- private HttpResponse addMultipleTenantRoleMembers(String tenantName, Inspector requestObject) {
var tenant = TenantName.from(tenantName);
var user = new UserId(require("user", Inspector::asString, requestObject));
var roles = SlimeStream.fromArray(requestObject.field("roles"), Inspector::asString)
@@ -280,37 +263,8 @@ public class UserApiHandler extends ThreadedHttpRequestHandler {
return new MessageResponse(user + " is now a member of " + roles.stream().map(Role::toString).collect(Collectors.joining(", ")));
}
- private HttpResponse addApplicationRoleMember(String tenantName, String applicationName, HttpRequest request) {
- Inspector requestObject = bodyInspector(request);
- String roleName = require("roleName", Inspector::asString, requestObject);
- UserId user = new UserId(require("user", Inspector::asString, requestObject));
- Role role = Roles.toRole(TenantName.from(tenantName), ApplicationName.from(applicationName), roleName);
- users.addUsers(role, List.of(user));
- return new MessageResponse(user + " is now a member of " + role);
- }
-
private HttpResponse removeTenantRoleMember(String tenantName, HttpRequest request) {
Inspector requestObject = bodyInspector(request);
- if (requestObject.field("roles").valid()) {
- return removeMultipleTenantRoleMembers(tenantName, requestObject);
- }
- return removeTenantRoleMember(tenantName, requestObject);
- }
-
- private HttpResponse removeTenantRoleMember(String tenantName, Inspector requestObject) {
- TenantName tenant = TenantName.from(tenantName);
- String roleName = require("roleName", Inspector::asString, requestObject);
- UserId user = new UserId(require("user", Inspector::asString, requestObject));
- List<Role> roles = Collections.singletonList(Roles.toRole(tenant, roleName));
-
- enforceLastAdminOfTenant(tenant, user, roles);
- removeDeveloperKey(tenant, user, roles);
- users.removeFromRoles(user, roles);
-
- return new MessageResponse(user + " is no longer a member of " + roles.stream().map(Role::toString).collect(Collectors.joining(", ")));
- }
-
- private HttpResponse removeMultipleTenantRoleMembers(String tenantName, Inspector requestObject) {
var tenant = TenantName.from(tenantName);
var user = new UserId(require("user", Inspector::asString, requestObject));
var roles = SlimeStream.fromArray(requestObject.field("roles"), Inspector::asString)
@@ -321,6 +275,11 @@ public class UserApiHandler extends ThreadedHttpRequestHandler {
removeDeveloperKey(tenant, user, roles);
users.removeFromRoles(user, roles);
+ controller.tenants().lockIfPresent(tenant, LockedTenant.class, lockedTenant -> {
+ if (lockedTenant instanceof LockedTenant.Cloud cloudTenant)
+ controller.tenants().store(cloudTenant.withInvalidateUserSessionsBefore(controller.clock().instant()));
+ });
+
return new MessageResponse(user + " is no longer a member of " + roles.stream().map(Role::toString).collect(Collectors.joining(", ")));
}
@@ -348,15 +307,6 @@ public class UserApiHandler extends ThreadedHttpRequestHandler {
}
}
- private HttpResponse removeApplicationRoleMember(String tenantName, String applicationName, HttpRequest request) {
- Inspector requestObject = bodyInspector(request);
- String roleName = require("roleName", Inspector::asString, requestObject);
- UserId user = new UserId(require("user", Inspector::asString, requestObject));
- Role role = Roles.toRole(TenantName.from(tenantName), ApplicationName.from(applicationName), roleName);
- users.removeUsers(role, List.of(user));
- return new MessageResponse(user + " is no longer a member of " + role);
- }
-
private boolean hasTrialCapacity() {
if (! controller.system().isPublic()) return true;
var existing = controller.tenants().asList().stream().map(Tenant::name).collect(Collectors.toList());
@@ -384,18 +334,12 @@ public class UserApiHandler extends ThreadedHttpRequestHandler {
}
private static Collection<TenantRole> filterTenantRoles(Role role) {
- if (!(role instanceof TenantRole))
- return Set.of();
-
- TenantRole tenantRole = (TenantRole) role;
- if (tenantRole.definition() == RoleDefinition.administrator
- || tenantRole.definition() == RoleDefinition.developer
- || tenantRole.definition() == RoleDefinition.reader)
- return Set.of(tenantRole);
-
- if (tenantRole.definition() == RoleDefinition.athenzTenantAdmin)
- return Roles.tenantRoles(tenantRole.tenant());
-
+ if (role instanceof TenantRole tenantRole) {
+ switch (tenantRole.definition()) {
+ case administrator, developer, reader, hostedDeveloper: return Set.of(tenantRole);
+ case athenzTenantAdmin: return Roles.tenantRoles(tenantRole.tenant());
+ }
+ }
return Set.of();
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/security/CloudUserSessionManager.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/security/CloudUserSessionManager.java
new file mode 100644
index 00000000000..e2b5083abae
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/security/CloudUserSessionManager.java
@@ -0,0 +1,50 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.controller.security;
+
+import com.yahoo.config.provision.TenantName;
+import com.yahoo.vespa.flags.LongFlag;
+import com.yahoo.vespa.flags.PermanentFlags;
+import com.yahoo.vespa.hosted.controller.Controller;
+import com.yahoo.vespa.hosted.controller.TenantController;
+import com.yahoo.vespa.hosted.controller.api.integration.user.UserSessionManager;
+import com.yahoo.vespa.hosted.controller.api.role.SecurityContext;
+import com.yahoo.vespa.hosted.controller.api.role.TenantRole;
+import com.yahoo.vespa.hosted.controller.tenant.CloudTenant;
+
+import java.time.Instant;
+
+/**
+ * @author freva
+ */
+public class CloudUserSessionManager implements UserSessionManager {
+
+ private final TenantController tenantController;
+ private final LongFlag invalidateConsoleSessions;
+
+ public CloudUserSessionManager(Controller controller) {
+ this.tenantController = controller.tenants();
+ this.invalidateConsoleSessions = PermanentFlags.INVALIDATE_CONSOLE_SESSIONS.bindTo(controller.flagSource());
+ }
+
+ @Override
+ public boolean shouldExpireSessionFor(SecurityContext context) {
+ if (context.issuedAt().isBefore(Instant.ofEpochSecond(invalidateConsoleSessions.value())))
+ return true;
+
+ return context.roles().stream()
+ .filter(TenantRole.class::isInstance)
+ .map(TenantRole.class::cast)
+ .map(TenantRole::tenant)
+ .distinct()
+ .anyMatch(tenantName -> shouldExpireSessionFor(tenantName, context.issuedAt()));
+ }
+
+ private boolean shouldExpireSessionFor(TenantName tenantName, Instant contextIssuedAt) {
+ return tenantController.get(tenantName)
+ .filter(CloudTenant.class::isInstance)
+ .map(CloudTenant.class::cast)
+ .flatMap(CloudTenant::invalidateUserSessionsBefore)
+ .map(contextIssuedAt::isBefore)
+ .orElse(false);
+ }
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java
index 8ee891ae8a6..6f9888b79e0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.versions;
import com.google.common.collect.ImmutableMap;
import com.yahoo.component.Version;
import com.yahoo.config.provision.CloudName;
+import com.yahoo.config.provision.zone.UpgradePolicy;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter;
@@ -66,7 +67,7 @@ public record OsVersionStatus(Map<OsVersion, List<NodeVersion>> versions) {
.orElse(Version.emptyVersion);
for (var node : controller.serviceRegistry().configServer().nodeRepository().list(zone.getVirtualId(), NodeFilter.all().applications(application.id()))) {
- if (!OsUpgrader.canUpgrade(node)) continue;
+ if (!OsUpgrader.canUpgrade(node, true)) continue;
Optional<Instant> suspendedAt = node.suspendedSince();
NodeVersion nodeVersion = new NodeVersion(node.hostname(), zone.getVirtualId(), node.currentOsVersion(),
targetOsVersion, suspendedAt);
@@ -83,6 +84,7 @@ public record OsVersionStatus(Map<OsVersion, List<NodeVersion>> versions) {
private static List<ZoneApi> zonesToUpgrade(Controller controller) {
return controller.zoneRegistry().osUpgradePolicies().stream()
.flatMap(upgradePolicy -> upgradePolicy.steps().stream())
+ .map(UpgradePolicy.Step::zones)
.flatMap(Collection::stream)
.toList();
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java
index 7f33f612cd0..e078df0267f 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java
@@ -15,7 +15,8 @@ import static com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy;
/**
* Information about a particular Vespa version.
- * VespaVersions are identified by their version number and ordered by increasing version numbers.
+ *
+ * Vespa versions are identified by their version number and ordered by increasing version numbers.
*
* @author bratseth
*/
@@ -29,8 +30,11 @@ public record VespaVersion(Version version,
Confidence confidence) implements Comparable<VespaVersion> {
public static Confidence confidenceFrom(DeploymentStatistics statistics, Controller controller) {
+ int thisMajorVersion = statistics.version().getMajor();
+ int defaultMajorVersion = controller.applications().targetMajorVersion().orElse(thisMajorVersion);
InstanceList all = InstanceList.from(controller.jobController().deploymentStatuses(ApplicationList.from(controller.applications().asList())
- .withProductionDeployment()));
+ .withProductionDeployment()))
+ .allowingMajorVersion(thisMajorVersion, defaultMajorVersion);
// 'production on this': All production deployment jobs upgrading to this version have completed without failure
InstanceList productionOnThis = all.matching(instance -> statistics.productionSuccesses().stream().anyMatch(run -> run.id().application().equals(instance)))
.not().failingUpgrade()