diff options
Diffstat (limited to 'controller-server/src/main')
39 files changed, 648 insertions, 430 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java index af8965bdeff..bdb68f655ff 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java @@ -33,6 +33,7 @@ import java.util.Set; import java.util.TreeMap; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * An application. Belongs to a {@link Tenant}, and may have multiple {@link Instance}s. @@ -161,7 +162,7 @@ public class Application { public ApplicationActivity activity() { return ApplicationActivity.from(instances.values().stream() .flatMap(instance -> instance.deployments().values().stream()) - .collect(Collectors.toUnmodifiableList())); + .toList()); } public Map<InstanceName, List<Deployment>> productionDeployments() { @@ -183,33 +184,44 @@ public class Application { .min(Comparator.naturalOrder()); } - /** - * Returns the oldest application version this has deployed in a permanent zone (not test or staging). - */ + /** Returns the oldest application version this has deployed in a permanent zone (not test or staging) */ public Optional<RevisionId> oldestDeployedRevision() { + return productionRevisions().min(Comparator.naturalOrder()); + } + + /** Returns the latest application version this has deployed in a permanent zone (not test or staging) */ + public Optional<RevisionId> latestDeployedRevision() { + return productionRevisions().max(Comparator.naturalOrder()); + } + + private Stream<RevisionId> productionRevisions() { return productionDeployments().values().stream().flatMap(List::stream) .map(Deployment::revision) - .filter(RevisionId::isProduction) - .min(Comparator.naturalOrder()); + .filter(RevisionId::isProduction); } /** Returns the total quota usage for this application, excluding temporary deployments */ public QuotaUsage quotaUsage() { return instances().values().stream() - .map(Instance::quotaUsage).reduce(QuotaUsage::add).orElse(QuotaUsage.none); + .map(Instance::quotaUsage) + .reduce(QuotaUsage::add) + .orElse(QuotaUsage.none); } /** Returns the total quota usage for manual deployments for this application */ public QuotaUsage manualQuotaUsage() { return instances().values().stream() - .map(Instance::manualQuotaUsage).reduce(QuotaUsage::add).orElse(QuotaUsage.none); + .map(Instance::manualQuotaUsage) + .reduce(QuotaUsage::add) + .orElse(QuotaUsage.none); } /** Returns the total quota usage for this application, excluding one specific deployment (and temporary deployments) */ public QuotaUsage quotaUsage(ApplicationId application, ZoneId zone) { return instances().values().stream() .map(instance -> instance.quotaUsageExcluding(application, zone)) - .reduce(QuotaUsage::add).orElse(QuotaUsage.none); + .reduce(QuotaUsage::add) + .orElse(QuotaUsage.none); } /** Returns the set of deploy keys for this application. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index e48ad7596ea..78063a383dc 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -237,7 +237,7 @@ public class ApplicationController { } /** Sets the default target major version. Set to empty to determine target version normally (by confidence) */ - public void setTargetMajorVersion(Optional<Integer> targetMajorVersion) { + public void setTargetMajorVersion(OptionalInt targetMajorVersion) { curator.writeTargetMajorVersion(targetMajorVersion); } @@ -340,7 +340,10 @@ public class ApplicationController { Version oldestInstalledPlatform = oldestInstalledPlatform(id); // Target platforms are all versions not older than the oldest installed platform, unless forcing a major version change. - Predicate<Version> isTargetPlatform = targetMajor.isEmpty() || targetMajor.getAsInt() == oldestInstalledPlatform.getMajor() + // Only major version specified in deployment spec is enough to force a downgrade, while all sources may force an upgrade. + Predicate<Version> isTargetPlatform = targetMajor.isEmpty() + || targetMajor.getAsInt() == oldestInstalledPlatform.getMajor() + || wantedMajor.isEmpty() && targetMajor.getAsInt() <= oldestInstalledPlatform.getMajor() ? version -> ! version.isBefore(oldestInstalledPlatform) : version -> targetMajor.getAsInt() == version.getMajor(); Set<Version> platformVersions = versionStatus.versions().stream() @@ -446,7 +449,7 @@ public class ApplicationController { } /** Deploys an application package for an existing application instance. */ - public ActivateResult deploy(JobId job, boolean deploySourceVersions) { + public ActivateResult deploy(JobId job, boolean deploySourceVersions, Consumer<String> deployLogger) { if (job.application().instance().isTester()) throw new IllegalArgumentException("'" + job.application() + "' is a tester application!"); @@ -479,6 +482,7 @@ public class ApplicationController { applicationPackage = applicationPackage.withTrustedCertificate(run.testerCertificate().get()); endpointCertificateMetadata = endpointCertificates.getMetadata(instance, zone, applicationPackage.deploymentSpec()); + containerEndpoints = controller.routing().of(deployment).prepare(application); } // Release application lock while doing the deployment, which is a lengthy task. @@ -487,6 +491,8 @@ public class ApplicationController { ActivateResult result = deploy(job.application(), applicationPackage, zone, platform, containerEndpoints, endpointCertificateMetadata, run.isDryRun()); + endpointCertificateMetadata.ifPresent(e -> deployLogger.accept("Using CA signed certificate version %s".formatted(e.version()))); + // Record the quota usage for this application var quotaUsage = deploymentQuotaUsage(zone, job.application()); @@ -544,10 +550,9 @@ public class ApplicationController { controller.jobController().deploymentStatus(application.get()); for (Notification notification : controller.notificationsDb().listNotifications(NotificationSource.from(application.get().id()), true)) { - if ( ! notification.source().instance().map(declaredInstances::contains).orElse(true)) - controller.notificationsDb().removeNotifications(notification.source()); - if (notification.source().instance().isPresent() && - ! notification.source().zoneId().map(application.get().require(notification.source().instance().get()).deployments()::containsKey).orElse(false)) + if ( notification.source().instance().isPresent() + && ( ! declaredInstances.contains(notification.source().instance().get()) + || ! notification.source().zoneId().map(application.get().require(notification.source().instance().get()).deployments()::containsKey).orElse(false))) controller.notificationsDb().removeNotifications(notification.source()); } @@ -641,7 +646,7 @@ public class ApplicationController { .filter(zone -> deploymentSpec.instance(instance).isEmpty() || ! deploymentSpec.requireInstance(instance).deploysTo(zone.environment(), zone.region())) - .collect(toList()); + .toList(); if (deploymentsToRemove.isEmpty()) return application; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedTenant.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedTenant.java index 4f58e87035b..ac7c6319c1b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedTenant.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedTenant.java @@ -129,25 +129,27 @@ public abstract class LockedTenant { private final TenantInfo info; private final List<TenantSecretStore> tenantSecretStores; private final ArchiveAccess archiveAccess; + private final Optional<Instant> invalidateUserSessionsBefore; private Cloud(TenantName name, Instant createdAt, LastLoginInfo lastLoginInfo, Optional<Principal> creator, BiMap<PublicKey, Principal> developerKeys, TenantInfo info, - List<TenantSecretStore> tenantSecretStores, ArchiveAccess archiveAccess) { + List<TenantSecretStore> tenantSecretStores, ArchiveAccess archiveAccess, Optional<Instant> invalidateUserSessionsBefore) { super(name, createdAt, lastLoginInfo); this.developerKeys = ImmutableBiMap.copyOf(developerKeys); this.creator = creator; this.info = info; this.tenantSecretStores = tenantSecretStores; this.archiveAccess = archiveAccess; + this.invalidateUserSessionsBefore = invalidateUserSessionsBefore; } private Cloud(CloudTenant tenant) { - this(tenant.name(), tenant.createdAt(), tenant.lastLoginInfo(), tenant.creator(), tenant.developerKeys(), tenant.info(), tenant.tenantSecretStores(), tenant.archiveAccess()); + this(tenant.name(), tenant.createdAt(), tenant.lastLoginInfo(), tenant.creator(), tenant.developerKeys(), tenant.info(), tenant.tenantSecretStores(), tenant.archiveAccess(), tenant.invalidateUserSessionsBefore()); } @Override public CloudTenant get() { - return new CloudTenant(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess); + return new CloudTenant(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore); } public Cloud withDeveloperKey(PublicKey key, Principal principal) { @@ -155,38 +157,42 @@ public abstract class LockedTenant { if (keys.containsKey(key)) throw new IllegalArgumentException("Key " + KeyUtils.toPem(key) + " is already owned by " + keys.get(key)); keys.put(key, principal); - return new Cloud(name, createdAt, lastLoginInfo, creator, keys, info, tenantSecretStores, archiveAccess); + return new Cloud(name, createdAt, lastLoginInfo, creator, keys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore); } public Cloud withoutDeveloperKey(PublicKey key) { BiMap<PublicKey, Principal> keys = HashBiMap.create(developerKeys); keys.remove(key); - return new Cloud(name, createdAt, lastLoginInfo, creator, keys, info, tenantSecretStores, archiveAccess); + return new Cloud(name, createdAt, lastLoginInfo, creator, keys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore); } public Cloud withInfo(TenantInfo newInfo) { - return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, newInfo, tenantSecretStores, archiveAccess); + return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, newInfo, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore); } @Override public LockedTenant with(LastLoginInfo lastLoginInfo) { - return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess); + return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore); } public Cloud withSecretStore(TenantSecretStore tenantSecretStore) { ArrayList<TenantSecretStore> secretStores = new ArrayList<>(tenantSecretStores); secretStores.add(tenantSecretStore); - return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, secretStores, archiveAccess); + return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, secretStores, archiveAccess, invalidateUserSessionsBefore); } public Cloud withoutSecretStore(TenantSecretStore tenantSecretStore) { ArrayList<TenantSecretStore> secretStores = new ArrayList<>(tenantSecretStores); secretStores.remove(tenantSecretStore); - return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, secretStores, archiveAccess); + return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, secretStores, archiveAccess, invalidateUserSessionsBefore); } public Cloud withArchiveAccess(ArchiveAccess archiveAccess) { - return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess); + return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore); + } + + public Cloud withInvalidateUserSessionsBefore(Instant invalidateUserSessionsBefore) { + return new Cloud(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess, Optional.of(invalidateUserSessionsBefore)); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index 9f93033a1a2..1d7d75d9193 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.hosted.controller.deployment; import com.google.common.collect.ImmutableMap; -import com.yahoo.collections.Iterables; import com.yahoo.component.Version; import com.yahoo.component.VersionCompatibility; import com.yahoo.config.application.api.DeploymentInstanceSpec; @@ -231,9 +230,9 @@ public class DeploymentStatus { firstProductionJobWithDeploymentInCloud.flatMap(this::deploymentFor), fallbackPlatform(change, job)); if (step.completedAt(change, firstProductionJobWithDeploymentInCloud).isEmpty()) { - JobType actualType = job.type().isSystemTest() ? systemTest(firstProductionJobWithDeploymentInCloud.map(JobId::type).orElse(null)) - : stagingTest(firstProductionJobWithDeploymentInCloud.map(JobId::type).orElse(null)); - jobs.merge(job, List.of(new Job(actualType, versions, step.readyAt(change), change)), DeploymentStatus::union); + CloudName cloud = firstProductionJobWithDeploymentInCloud.map(JobId::type).map(this::findCloud).orElse(zones.systemZone().getCloudName()); + JobType typeWithZone = job.type().isSystemTest() ? JobType.systemTest(zones, cloud) : JobType.stagingTest(zones, cloud); + jobs.merge(job, List.of(new Job(typeWithZone, versions, step.readyAt(change), change)), DeploymentStatus::union); } } }); @@ -291,19 +290,16 @@ public class DeploymentStatus { } private <T extends Comparable<T>> Optional<T> newestTested(InstanceName instance, Function<Run, T> runMapper) { - Set<CloudName> clouds = jobSteps.keySet().stream() - .filter(job -> job.type().isProduction()) - .map(job -> findCloud(job.type())) - .collect(toSet()); + Set<CloudName> clouds = Stream.concat(Stream.of(zones.systemZone().getCloudName()), + jobSteps.keySet().stream() + .filter(job -> job.type().isProduction()) + .map(job -> findCloud(job.type()))) + .collect(toSet()); List<ZoneId> testZones = new ArrayList<>(); - if (application.deploymentSpec().requireInstance(instance).concerns(test)) { - if (clouds.isEmpty()) testZones.add(JobType.systemTest(zones, null).zone()); - else for (CloudName cloud: clouds) testZones.add(JobType.systemTest(zones, cloud).zone()); - } - if (application.deploymentSpec().requireInstance(instance).concerns(staging)) { - if (clouds.isEmpty()) testZones.add(JobType.stagingTest(zones, null).zone()); - else for (CloudName cloud: clouds) testZones.add(JobType.stagingTest(zones, cloud).zone()); - } + if (application.deploymentSpec().requireInstance(instance).concerns(test)) + for (CloudName cloud: clouds) testZones.add(JobType.systemTest(zones, cloud).zone()); + if (application.deploymentSpec().requireInstance(instance).concerns(staging)) + for (CloudName cloud: clouds) testZones.add(JobType.stagingTest(zones, cloud).zone()); Map<ZoneId, Optional<T>> newestPerZone = instanceJobs().get(application.id().instance(instance)) .type(systemTest(null), stagingTest(null)) @@ -548,7 +544,9 @@ public class DeploymentStatus { if (job.type().isProduction() && job.type().isDeployment()) { declaredTest(job.application(), testType).ifPresent(testJob -> { for (Job productionJob : versionsList) - if (allJobs.successOn(testType, productionJob.versions()).asList().isEmpty()) + if (allJobs.successOn(testType, productionJob.versions()) + .instance(testJob.application().instance()) + .asList().isEmpty()) testJobs.merge(testJob, List.of(new Job(testJob.type(), productionJob.versions(), jobSteps().get(testJob).readyAt(productionJob.change), @@ -580,7 +578,7 @@ public class DeploymentStatus { } private CloudName findCloud(JobType job) { - return zones.zones().all().get(job.zone()).map(ZoneApi::getCloudName).orElse(null); + return zones.zones().all().get(job.zone()).map(ZoneApi::getCloudName).orElse(zones.systemZone().getCloudName()); } private JobId firstDeclaredOrElseImplicitTest(JobType testJob) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java index 22df5ca559e..4a00a272c75 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.deployment; import com.yahoo.collections.AbstractFilteringList; import com.yahoo.component.Version; +import com.yahoo.vespa.hosted.controller.application.Change; import java.time.Instant; import java.util.Collection; @@ -36,8 +37,10 @@ public class DeploymentStatusList extends AbstractFilteringList<DeploymentStatus /** Returns the subset of applications which have been failing an application change since the given instant */ public DeploymentStatusList failingApplicationChangeSince(Instant threshold) { - return matching(status -> status.instanceJobs().values().stream() - .anyMatch(jobs -> failingApplicationChangeSince(jobs, threshold))); + return matching(status -> status.instanceJobs().entrySet().stream() + .anyMatch(jobs -> failingApplicationChangeSince(jobs.getValue(), + status.application().require(jobs.getKey().instance()).change(), + threshold))); } private static boolean failingUpgradeToVersionSince(JobList jobs, Version version, Instant threshold) { @@ -47,10 +50,8 @@ public class DeploymentStatusList extends AbstractFilteringList<DeploymentStatus .isEmpty(); } - private static boolean failingApplicationChangeSince(JobList jobs, Instant threshold) { - return ! jobs.failingApplicationChange() - .firstFailing().endedNoLaterThan(threshold) - .isEmpty(); + private static boolean failingApplicationChangeSince(JobList jobs, Change change, Instant threshold) { + return change.revision().map(revision -> ! jobs.failingWithBrokenRevisionSince(revision, threshold).isEmpty()).orElse(false); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index c28f94bc4d7..d83f552ab25 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -61,6 +61,7 @@ import static java.util.stream.Collectors.toMap; public class DeploymentTrigger { public static final Duration maxPause = Duration.ofDays(3); + public static final Duration maxFailingRevisionTime = Duration.ofDays(5); private final static Logger log = Logger.getLogger(DeploymentTrigger.class.getName()); private final Controller controller; @@ -227,10 +228,9 @@ public class DeploymentTrigger { Instance instance = application.require(applicationId.instance()); JobId job = new JobId(instance.id(), jobType); JobStatus jobStatus = jobs.jobStatus(new JobId(applicationId, jobType)); - Versions versions = jobStatus.lastTriggered() - .orElseThrow(() -> new IllegalArgumentException(job + " has never been triggered")) - .versions(); - trigger(deploymentJob(instance, versions, jobType, jobStatus, clock.instant()), reason); + Run last = jobStatus.lastTriggered() + .orElseThrow(() -> new IllegalArgumentException(job + " has never been triggered")); + trigger(deploymentJob(instance, last.versions(), last.id().type(), jobStatus.isNodeAllocationFailure(), clock.instant()), reason); return job; } @@ -258,7 +258,12 @@ public class DeploymentTrigger { .collect(toMap(Map.Entry::getKey, Map.Entry::getValue)); jobs.forEach((jobId, versionsList) -> { - trigger(deploymentJob(application.require(job.application().instance()), versionsList.get(0).versions(), jobId.type(), status.jobs().get(jobId).get(), clock.instant()), reason); + trigger(deploymentJob(application.require(job.application().instance()), + versionsList.get(0).versions(), + jobId.type(), + status.jobs().get(jobId).get().isNodeAllocationFailure(), + clock.instant()), + reason); }); return List.copyOf(jobs.keySet()); } @@ -387,7 +392,7 @@ public class DeploymentTrigger { jobs.add(deploymentJob(status.application().require(jobId.application().instance()), job.versions(), job.type(), - status.instanceJobs(jobId.application().instance()).get(jobId.type()), + status.instanceJobs(jobId.application().instance()).get(jobId.type()).isNodeAllocationFailure(), job.readyAt().get())); }); return Collections.unmodifiableList(jobs); @@ -448,6 +453,8 @@ public class DeploymentTrigger { private boolean acceptNewRevision(DeploymentStatus status, InstanceName instance, RevisionId revision) { if (status.application().deploymentSpec().instance(instance).isEmpty()) return false; // Unknown instance. + if ( ! status.jobs().failingWithBrokenRevisionSince(revision, clock.instant().minus(maxFailingRevisionTime)) + .isEmpty()) return false; // Don't deploy a broken revision. boolean isChangingRevision = status.application().require(instance).change().revision().isPresent(); DeploymentInstanceSpec spec = status.application().deploymentSpec().requireInstance(instance); Predicate<RevisionId> revisionFilter = spec.revisionTarget() == DeploymentSpec.RevisionTarget.next @@ -472,8 +479,8 @@ public class DeploymentTrigger { // ---------- Version and job helpers ---------- - private Job deploymentJob(Instance instance, Versions versions, JobType jobType, JobStatus jobStatus, Instant availableSince) { - return new Job(instance, versions, jobType, availableSince, jobStatus.isNodeAllocationFailure(), instance.change().revision().isPresent()); + private Job deploymentJob(Instance instance, Versions versions, JobType jobType, boolean isNodeAllocationFailure, Instant availableSince) { + return new Job(instance, versions, jobType, availableSince, isNodeAllocationFailure, instance.change().revision().isPresent()); } // ---------- Data containers ---------- diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 813e3454e80..ef3474e0c1e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -174,7 +174,7 @@ public class InternalStepRunner implements StepRunner { private Optional<RunStatus> deployReal(RunId id, boolean setTheStage, DualLogger logger) { Optional<X509Certificate> testerCertificate = controller.jobController().run(id).testerCertificate(); - return deploy(() -> controller.applications().deploy(id.job(), setTheStage), + return deploy(() -> controller.applications().deploy(id.job(), setTheStage, logger::log), controller.jobController().run(id) .stepInfo(setTheStage ? deployInitialReal : deployReal).get() .startTime().get(), @@ -224,6 +224,9 @@ public class InternalStepRunner implements StepRunner { // Retry certain failures for up to one hour. Optional<RunStatus> result = startTime.isBefore(controller.clock().instant().minus(Duration.ofHours(1))) ? Optional.of(deploymentFailed) : Optional.empty(); + if (result.isPresent()) + logger.log(WARNING, "Deployment failed for one hour; giving up now!"); + switch (e.code()) { case CERTIFICATE_NOT_READY: logger.log("No valid CA signed certificate for app available to config server"); @@ -424,10 +427,14 @@ public class InternalStepRunner implements StepRunner { Optional<ServiceConvergence> services = controller.serviceRegistry().configServer().serviceConvergence(new DeploymentId(testerId, zone), Optional.of(platform)); if (services.isEmpty()) { - logger.log("Config status not currently available -- will retry."); - return run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(5))) - ? Optional.of(error) - : Optional.empty(); + if (run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(30)))) { + logger.log(WARNING, "Config status not available after 30 minutes; giving up!"); + return Optional.of(error); + } + else { + logger.log("Config status not currently available -- will retry."); + return Optional.empty(); + } } List<Node> nodes = controller.serviceRegistry().configServer().nodeRepository().list(zone, NodeFilter.all() @@ -649,10 +656,13 @@ public class InternalStepRunner implements StepRunner { controller.jobController().updateTestReport(id); return Optional.of(testFailure); case INCONCLUSIVE: - long sleepMinutes = Math.max(15, Math.min(120, Duration.between(deployment.get().at(), controller.clock().instant()).toMinutes() / 20)); - logger.log("Tests were inconclusive, and will run again in " + sleepMinutes + " minutes."); controller.jobController().updateTestReport(id); - controller.jobController().locked(id, run -> run.sleepingUntil(controller.clock().instant().plusSeconds(60 * sleepMinutes))); + controller.jobController().locked(id, run -> { + Instant nextAttemptAt = run.start(); + while ( ! nextAttemptAt.isAfter(controller.clock().instant())) nextAttemptAt = nextAttemptAt.plusSeconds(1800); + logger.log("Tests were inconclusive, and will run again at " + nextAttemptAt + "."); + return run.sleepingUntil(nextAttemptAt); + }); return Optional.of(reset); case ERROR: logger.log(INFO, "Tester failed running its tests!"); @@ -799,6 +809,7 @@ public class InternalStepRunner implements StepRunner { Consumer<String> updater = msg -> controller.notificationsDb().setNotification(source, Notification.Type.deployment, Notification.Level.error, msg); switch (run.status()) { case aborted: return; // wait and see how the next run goes. + case noTests: case running: case success: controller.notificationsDb().removeNotification(source, Notification.Type.deployment); @@ -815,10 +826,6 @@ public class InternalStepRunner implements StepRunner { case testFailure: updater.accept("one or more verification tests against the deployment failed. Please review test output in the deployment job log."); return; - case noTests: - controller.notificationsDb().setNotification(source, Notification.Type.deployment, Notification.Level.warning, - "no tests were found for this job type. Please review test output in the deployment job log."); - return; case error: case endpointCertificateTimeout: break; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 5113d386b23..881107fa0f9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -158,7 +158,7 @@ public class JobController { /** Stores the given log entries for the given run and step. */ public void log(RunId id, Step step, List<LogEntry> entries) { locked(id, __ -> { - logs.append(id.application(), id.type(), step, entries); + logs.append(id.application(), id.type(), step, entries, true); return __; }); } @@ -211,7 +211,7 @@ public class JobController { if (log.isEmpty()) return run; - logs.append(id.application(), id.type(), Step.copyVespaLogs, log); + logs.append(id.application(), id.type(), Step.copyVespaLogs, log, false); return run.with(log.get(log.size() - 1).at()); }); } @@ -230,7 +230,7 @@ public class JobController { if (entries.isEmpty()) return run; - logs.append(id.application(), id.type(), step.get(), entries); + logs.append(id.application(), id.type(), step.get(), entries, false); return run.with(entries.stream().mapToLong(LogEntry::id).max().getAsLong()); }); } @@ -408,11 +408,6 @@ public class JobController { locked(id, run -> run.with(status, step)); } - /** Invoked when starting the step */ - public void setStartTimestamp(RunId id, Instant timestamp, LockedStep step) { - locked(id, run -> run.with(timestamp, step)); - } - /** * Changes the status of the given run to inactive, and stores it as a historic run. * Throws TimeoutException if some step in this job is still being run. @@ -774,7 +769,8 @@ public class JobController { public void locked(RunId id, UnaryOperator<Run> modifications) { try (Mutex __ = curator.lock(id.application(), id.type())) { active(id).ifPresent(run -> { - curator.writeLastRun(modifications.apply(run)); + Run modified = modifications.apply(run); + if (modified != null) curator.writeLastRun(modified); }); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java index 551f841233e..3074c9ac3ba 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java @@ -4,7 +4,6 @@ package com.yahoo.vespa.hosted.controller.deployment; import com.yahoo.collections.AbstractFilteringList; import com.yahoo.component.Version; import com.yahoo.config.provision.InstanceName; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId; @@ -74,6 +73,14 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> { return matching(JobList::failingApplicationChange); } + /** Returns the subset of jobs which are failing because of an application change, and have been since the threshold, on the given revision. */ + public JobList failingWithBrokenRevisionSince(RevisionId broken, Instant threshold) { + return failingApplicationChange().matching(job -> job.runs().values().stream() + .anyMatch(run -> run.versions().targetRevision().equals(broken) + && run.hasFailed() + && run.start().isBefore(threshold))); + } + /** Returns the subset of jobs which are failing with the given run status. */ public JobList withStatus(RunStatus status) { return matching(job -> job.lastStatus().map(status::equals).orElse(false)); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java index f4c4b8bebd4..d683f1cb5c7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java @@ -76,7 +76,7 @@ public class Versions { targetRevision.equals(versions.targetRevision()); } - /** Returns wheter this change could result in the given target versions. */ + /** Returns whether this change could result in the given target versions. */ public boolean targetsMatch(Change change) { return change.platform().map(targetPlatform::equals).orElse(true) && change.revision().map(targetRevision::equals).orElse(true); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java index 09e0fec41d1..c8c5a1834c7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java @@ -14,14 +14,17 @@ public class ApplicationMetaDataGarbageCollector extends ControllerMaintainer { private static final Logger log = Logger.getLogger(ApplicationMetaDataGarbageCollector.class.getName()); + private final Duration timeToLive; + public ApplicationMetaDataGarbageCollector(Controller controller, Duration interval) { super(controller, interval); + this.timeToLive = controller.system().isCd() ? Duration.ofDays(7) : Duration.ofDays(365); } @Override protected double maintain() { try { - controller().applications().applicationStore().pruneMeta(controller().clock().instant().minus(Duration.ofDays(365))); + controller().applications().applicationStore().pruneMeta(controller().clock().instant().minus(timeToLive)); return 1.0; } catch (Exception e) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java index aa36d204c09..a279cf46415 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java @@ -70,6 +70,7 @@ public class ChangeRequestMaintainer extends ControllerMaintainer { var vcmr = existingChangeRequests .getOrDefault(changeRequest.getId(), new VespaChangeRequest(changeRequest, zone)) .withSource(changeRequest.getChangeRequestSource()) + .withImpact(changeRequest.getImpact()) .withApproval(changeRequest.getApproval()); logger.fine(() -> "Storing " + vcmr); curator.writeChangeRequest(vcmr); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudTrialExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudTrialExpirer.java index b1b7e80e9a0..18ef47759f4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudTrialExpirer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudTrialExpirer.java @@ -13,14 +13,13 @@ import com.yahoo.vespa.hosted.controller.tenant.Tenant; import java.time.Duration; import java.util.List; import java.util.Optional; -import java.util.function.Consumer; import java.util.function.Predicate; import java.util.logging.Logger; import java.util.stream.Collectors; /** * Expires unused tenants from Vespa Cloud. - * + * <p> * TODO: Should support sending notifications some time before the various expiry events happen. * * @author ogronnesby @@ -29,7 +28,7 @@ public class CloudTrialExpirer extends ControllerMaintainer { private static final Logger log = Logger.getLogger(CloudTrialExpirer.class.getName()); private static final Duration nonePlanAfter = Duration.ofDays(14); - private static final Duration tombstoneAfter = Duration.ofDays(365); + private static final Duration tombstoneAfter = Duration.ofDays(183); private final ListFlag<String> extendedTrialTenants; public CloudTrialExpirer(Controller controller, Duration interval) { @@ -39,38 +38,43 @@ public class CloudTrialExpirer extends ControllerMaintainer { @Override protected double maintain() { - tombstoneNonePlanTenants(); - moveInactiveTenantsToNonePlan(); - return 1.0; + var a = tombstoneNonePlanTenants(); + var b = moveInactiveTenantsToNonePlan(); + return (a ? 0.5 : 0.0) + (b ? 0.5 : 0.0); } - private void moveInactiveTenantsToNonePlan() { - var predicate = tenantReadersNotLoggedIn(nonePlanAfter) - .and(this::tenantHasTrialPlan); - - forTenant("'none' plan", predicate, this::setPlanNone); - } + private boolean moveInactiveTenantsToNonePlan() { + var idleTrialTenants = controller().tenants().asList().stream() + .filter(this::tenantIsCloudTenant) + .filter(this::tenantIsNotExemptFromExpiry) + .filter(this::tenantHasNoDeployments) + .filter(this::tenantHasTrialPlan) + .filter(tenantReadersNotLoggedIn(nonePlanAfter)) + .toList(); + + if (! idleTrialTenants.isEmpty()) { + var tenants = idleTrialTenants.stream().map(Tenant::name).map(TenantName::value).collect(Collectors.joining(", ")); + log.info("Setting tenants to 'none' plan: " + tenants); + } - private void tombstoneNonePlanTenants() { - var predicate = tenantReadersNotLoggedIn(tombstoneAfter).and(this::tenantHasNonePlan); - forTenant("tombstoned", predicate, this::tombstoneTenants); + return setPlanNone(idleTrialTenants); } - private void forTenant(String name, Predicate<Tenant> p, Consumer<List<Tenant>> c) { - var predicate = p.and(this::tenantIsCloudTenant) - .and(this::tenantIsNotExemptFromExpiry) - .and(this::tenantHasNoDeployments); - - var tenants = controller().tenants().asList().stream() - .filter(predicate) - .collect(Collectors.toList()); - - if (! tenants.isEmpty()) { - var tenantNames = tenants.stream().map(Tenant::name).map(TenantName::value).collect(Collectors.joining(", ")); - log.info("Setting tenants as " + name + ": " + tenantNames); + private boolean tombstoneNonePlanTenants() { + var idleOldPlanTenants = controller().tenants().asList().stream() + .filter(this::tenantIsCloudTenant) + .filter(this::tenantIsNotExemptFromExpiry) + .filter(this::tenantHasNoDeployments) + .filter(this::tenantHasNonePlan) + .filter(tenantReadersNotLoggedIn(tombstoneAfter)) + .toList(); + + if (! idleOldPlanTenants.isEmpty()) { + var tenants = idleOldPlanTenants.stream().map(Tenant::name).map(TenantName::value).collect(Collectors.joining(", ")); + log.info("Setting tenants as tombstoned: " + tenants); } - c.accept(tenants); + return tombstoneTenants(idleOldPlanTenants); } private boolean tenantIsCloudTenant(Tenant tenant) { @@ -98,7 +102,7 @@ public class CloudTrialExpirer extends ControllerMaintainer { } private boolean tenantIsNotExemptFromExpiry(Tenant tenant) { - return ! extendedTrialTenants.value().contains(tenant.name().value()); + return !extendedTrialTenants.value().contains(tenant.name().value()); } private boolean tenantHasNoDeployments(Tenant tenant) { @@ -108,23 +112,46 @@ public class CloudTrialExpirer extends ControllerMaintainer { .sum() == 0; } - private void setPlanNone(List<Tenant> tenants) { - tenants.forEach(tenant -> { - controller().serviceRegistry().billingController().setPlan(tenant.name(), PlanId.from("none"), false, false); - }); + private boolean setPlanNone(List<Tenant> tenants) { + var success = true; + for (var tenant : tenants) { + try { + controller().serviceRegistry().billingController().setPlan(tenant.name(), PlanId.from("none"), false, false); + } catch (RuntimeException e) { + log.info("Could not change plan for " + tenant.name() + ": " + e.getMessage()); + success = false; + } + } + return success; } - private void tombstoneTenants(List<Tenant> tenants) { - tenants.forEach(tenant -> { - deleteApplicationsWithNoDeployments(tenant); - controller().tenants().delete(tenant.name(), Optional.empty(), false); - }); + private boolean tombstoneTenants(List<Tenant> tenants) { + var success = true; + for (var tenant : tenants) { + success &= deleteApplicationsWithNoDeployments(tenant); + log.fine("Tombstoning empty tenant: " + tenant.name()); + try { + controller().tenants().delete(tenant.name(), Optional.empty(), false); + } catch (RuntimeException e) { + log.info("Could not tombstone tenant " + tenant.name() + ": " + e.getMessage()); + success = false; + } + } + return success; } - private void deleteApplicationsWithNoDeployments(Tenant tenant) { - controller().applications().asList(tenant.name()).forEach(application -> { - // this only removes applications with no active deployments - controller().applications().deleteApplication(application.id(), Optional.empty()); - }); + private boolean deleteApplicationsWithNoDeployments(Tenant tenant) { + // this method only removes applications with no active deployments in them + var success = true; + for (var application : controller().applications().asList(tenant.name())) { + try { + log.fine("Removing empty application: " + application.id()); + controller().applications().deleteApplication(application.id(), Optional.empty()); + } catch (RuntimeException e) { + log.info("Could not removing application " + application.id() + ": " + e.getMessage()); + success = false; + } + } + return success; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index 4aeecdcd4ff..ab2e0312b15 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -33,6 +33,7 @@ import static java.time.temporal.ChronoUnit.SECONDS; public class ControllerMaintenance extends AbstractComponent { private final Upgrader upgrader; + private final OsUpgradeScheduler osUpgradeScheduler; private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>(); @Inject @@ -40,7 +41,9 @@ public class ControllerMaintenance extends AbstractComponent { public ControllerMaintenance(Controller controller, Metric metric, UserManagement userManagement, AthenzClientFactory athenzClientFactory) { Intervals intervals = new Intervals(controller.system()); upgrader = new Upgrader(controller, intervals.defaultInterval); + osUpgradeScheduler = new OsUpgradeScheduler(controller, intervals.osUpgradeScheduler); maintainers.add(upgrader); + maintainers.add(osUpgradeScheduler); maintainers.addAll(osUpgraders(controller, intervals.osUpgrader)); maintainers.add(new DeploymentExpirer(controller, intervals.defaultInterval)); maintainers.add(new DeploymentUpgrader(controller, intervals.defaultInterval)); @@ -54,7 +57,6 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new SystemUpgrader(controller, intervals.systemUpgrader)); maintainers.add(new JobRunner(controller, intervals.jobRunner)); maintainers.add(new OsVersionStatusUpdater(controller, intervals.osVersionStatusUpdater)); - maintainers.add(new OsUpgradeScheduler(controller, intervals.osUpgradeScheduler)); maintainers.add(new ContactInformationMaintainer(controller, intervals.contactInformationMaintainer)); maintainers.add(new NameServiceDispatcher(controller, intervals.nameServiceDispatcher)); maintainers.add(new CostReportMaintainer(controller, intervals.costReportMaintainer, controller.serviceRegistry().costReportConsumer())); @@ -70,7 +72,7 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer)); maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer)); maintainers.add(new ChangeRequestMaintainer(controller, intervals.changeRequestMaintainer)); - maintainers.add(new VcmrMaintainer(controller, intervals.vcmrMaintainer)); + maintainers.add(new VcmrMaintainer(controller, intervals.vcmrMaintainer, metric)); maintainers.add(new CloudTrialExpirer(controller, intervals.defaultInterval)); maintainers.add(new RetriggerMaintainer(controller, intervals.retriggerMaintainer)); maintainers.add(new UserManagementMaintainer(controller, intervals.userManagementMaintainer, controller.serviceRegistry().roleMaintainer())); @@ -80,6 +82,8 @@ public class ControllerMaintenance extends AbstractComponent { public Upgrader upgrader() { return upgrader; } + public OsUpgradeScheduler osUpgradeScheduler() { return osUpgradeScheduler; } + @Override public void deconstruct() { maintainers.forEach(Maintainer::shutdown); @@ -156,7 +160,7 @@ public class ControllerMaintenance extends AbstractComponent { this.containerImageExpirer = duration(12, HOURS); this.hostInfoUpdater = duration(12, HOURS); this.reindexingTriggerer = duration(1, HOURS); - this.endpointCertificateMaintainer = duration(12, HOURS); + this.endpointCertificateMaintainer = duration(1, HOURS); this.trafficFractionUpdater = duration(5, MINUTES); this.archiveUriUpdater = duration(5, MINUTES); this.archiveAccessMaintainer = duration(10, MINUTES); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java index f3256237284..2e2680cd34a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java @@ -7,7 +7,6 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.container.jdisc.secretstore.SecretNotFoundException; import com.yahoo.container.jdisc.secretstore.SecretStore; import com.yahoo.transaction.Mutex; -import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.Instance; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateDetails; @@ -15,6 +14,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCe import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateProvider; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateRequestMetadata; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; +import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; @@ -23,6 +23,8 @@ import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -48,6 +50,7 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer { private final CuratorDb curator; private final SecretStore secretStore; private final EndpointCertificateProvider endpointCertificateProvider; + final Comparator<EligibleJob> oldestFirst = Comparator.comparing(e -> e.deployment.at()); @Inject public EndpointCertificateMaintainer(Controller controller, Duration interval) { @@ -92,11 +95,14 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer { })); } + record EligibleJob(Deployment deployment, ApplicationId applicationId, JobType job) {} /** - * If it's been four days since the cert has been refreshed, re-trigger all prod deployment jobs. + * If it's been four days since the cert has been refreshed, re-trigger prod deployment jobs (one at a time). */ private void deployRefreshedCertificates() { var now = clock.instant(); + var eligibleJobs = new ArrayList<EligibleJob>(); + curator.readAllEndpointCertificateMetadata().forEach((applicationId, endpointCertificateMetadata) -> endpointCertificateMetadata.lastRefreshed().ifPresent(lastRefreshTime -> { Instant refreshTime = Instant.ofEpochSecond(lastRefreshTime); @@ -105,13 +111,19 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer { .ifPresent(instance -> instance.productionDeployments().forEach((zone, deployment) -> { if (deployment.at().isBefore(refreshTime)) { JobType job = JobType.deploymentTo(zone); - deploymentTrigger.reTrigger(applicationId, job, "re-triggered by EndpointCertificateMaintainer"); - log.info("Re-triggering deployment job " + job.jobName() + " for instance " + - applicationId.serializedForm() + " to roll out refreshed endpoint certificate"); + eligibleJobs.add(new EligibleJob(deployment, applicationId, job)); } })); } })); + + eligibleJobs.stream() + .min(oldestFirst) + .ifPresent(e -> { + deploymentTrigger.reTrigger(e.applicationId, e.job, "re-triggered by EndpointCertificateMaintainer"); + log.info("Re-triggering deployment job " + e.job.jobName() + " for instance " + + e.applicationId.serializedForm() + " to roll out refreshed endpoint certificate"); + }); } private OptionalInt latestVersionInSecretStore(EndpointCertificateMetadata originalCertificateMetadata) { @@ -156,8 +168,8 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer { List<EndpointCertificateRequestMetadata> endpointCertificateMetadata = endpointCertificateProvider.listCertificates(); Map<ApplicationId, EndpointCertificateMetadata> storedEndpointCertificateMetadata = curator.readAllEndpointCertificateMetadata(); - List<String> leafRequestIds = storedEndpointCertificateMetadata.values().stream().flatMap(m -> m.leafRequestId().stream()).collect(Collectors.toList()); - List<String> rootRequestIds = storedEndpointCertificateMetadata.values().stream().map(EndpointCertificateMetadata::rootRequestId).collect(Collectors.toList()); + List<String> leafRequestIds = storedEndpointCertificateMetadata.values().stream().flatMap(m -> m.leafRequestId().stream()).toList(); + List<String> rootRequestIds = storedEndpointCertificateMetadata.values().stream().map(EndpointCertificateMetadata::rootRequestId).toList(); for (var providerCertificateMetadata : endpointCertificateMetadata) { if (!rootRequestIds.contains(providerCertificateMetadata.requestId()) && !leafRequestIds.contains(providerCertificateMetadata.requestId())) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java index 82413f21222..b051590ac5a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.zone.NodeSlice; import com.yahoo.config.provision.zone.UpgradePolicy; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.text.Text; @@ -25,6 +26,7 @@ import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.logging.Logger; +import java.util.stream.Collectors; /** * Base class for maintainers that upgrade zone infrastructure. @@ -57,22 +59,22 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten int failures = 0; // Invert zone order if we're downgrading UpgradePolicy policy = target.downgrade() ? upgradePolicy.inverted() : upgradePolicy; - for (Set<ZoneApi> step : policy.steps()) { + for (UpgradePolicy.Step step : policy.steps()) { boolean converged = true; - for (ZoneApi zone : step) { + for (ZoneApi zone : step.zones()) { try { attempts++; - converged &= upgradeAll(target, applications, zone); + converged &= upgradeAll(target, applications, zone, step.nodeSlice()); } catch (UnreachableNodeRepositoryException e) { failures++; converged = false; log.warning(Text.format("%s: Failed to communicate with node repository in %s, continuing with next parallel zone: %s", - this, zone, Exceptions.toMessageString(e))); + this, zone, Exceptions.toMessageString(e))); } catch (Exception e) { failures++; converged = false; log.warning(Text.format("%s: Failed to upgrade zone: %s, continuing with next parallel zone: %s", - this, zone, Exceptions.toMessageString(e))); + this, zone, Exceptions.toMessageString(e))); } } if (!converged) { @@ -83,7 +85,7 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten } /** Returns whether all applications have converged to the target version in zone */ - private boolean upgradeAll(TARGET target, List<SystemApplication> applications, ZoneApi zone) { + private boolean upgradeAll(TARGET target, List<SystemApplication> applications, ZoneApi zone, NodeSlice nodeSlice) { Map<SystemApplication, Set<SystemApplication>> dependenciesByApplication = new HashMap<>(); if (target.downgrade()) { // Invert dependencies when we're downgrading for (var application : applications) { @@ -100,20 +102,17 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten for (var kv : dependenciesByApplication.entrySet()) { SystemApplication application = kv.getKey(); Set<SystemApplication> dependencies = kv.getValue(); - if (convergedOn(target, dependencies, zone)) { + boolean allConverged = dependencies.stream().allMatch(app -> convergedOn(target, app, zone, nodeSlice)); + if (allConverged) { if (changeTargetTo(target, application, zone)) { upgrade(target, application, zone); } - converged &= convergedOn(target, application, zone); + converged &= convergedOn(target, application, zone, nodeSlice); } } return converged; } - private boolean convergedOn(TARGET target, Set<SystemApplication> applications, ZoneApi zone) { - return applications.stream().allMatch(application -> convergedOn(target, application, zone)); - } - /** Returns whether target version for application in zone should be changed */ protected abstract boolean changeTargetTo(TARGET target, SystemApplication application, ZoneApi zone); @@ -121,7 +120,7 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten protected abstract void upgrade(TARGET target, SystemApplication application, ZoneApi zone); /** Returns whether application has converged to target version in zone */ - protected abstract boolean convergedOn(TARGET target, SystemApplication application, ZoneApi zone); + protected abstract boolean convergedOn(TARGET target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice); /** Returns the version target for the component upgraded by this, if any */ protected abstract Optional<TARGET> target(); @@ -129,19 +128,34 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten /** Returns whether the upgrader should expect given node to upgrade */ protected abstract boolean expectUpgradeOf(Node node, SystemApplication application, ZoneApi zone); - /** Find the minimum value of a version field in a zone by comparing all nodes */ - protected final Optional<Version> minVersion(ZoneApi zone, SystemApplication application, Function<Node, Version> versionField) { + /** Find the highest version used by nodes satisfying nodeSlice in zone. If no such slice exists, the lowest known version is returned */ + protected final Optional<Version> versionOf(NodeSlice nodeSlice, ZoneApi zone, SystemApplication application, Function<Node, Version> versionField) { try { - return controller().serviceRegistry().configServer() - .nodeRepository() - .list(zone.getVirtualId(), NodeFilter.all().applications(application.id())) - .stream() - .filter(node -> expectUpgradeOf(node, application, zone)) - .map(versionField) - .min(Comparator.naturalOrder()); + Map<Version, Long> nodeCountByVersion = controller().serviceRegistry().configServer() + .nodeRepository() + .list(zone.getVirtualId(), NodeFilter.all().applications(application.id())) + .stream() + .filter(node -> expectUpgradeOf(node, application, zone)) + .collect(Collectors.groupingBy(versionField, + Collectors.counting())); + long totalNodes = nodeCountByVersion.values().stream().reduce(Long::sum).orElse(0L); + Set<Version> versionsOfMatchingSlices = new HashSet<>(); + for (var kv : nodeCountByVersion.entrySet()) { + long nodesOnVersion = kv.getValue(); + if (nodeSlice.satisfiedBy(nodesOnVersion, totalNodes)) { + versionsOfMatchingSlices.add(kv.getKey()); + } + } + if (!versionsOfMatchingSlices.isEmpty()) { + // Choose the highest version in case we have several matching slices + return versionsOfMatchingSlices.stream().max(Comparator.naturalOrder()); + } + // No matching slices found, fall back to the lowest known version + return nodeCountByVersion.keySet().stream().min(Comparator.naturalOrder()); } catch (Exception e) { throw new UnreachableNodeRepositoryException(Text.format("Failed to get version for %s in %s: %s", - application.id(), zone, Exceptions.toMessageString(e))); + application.id(), zone, + Exceptions.toMessageString(e))); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java index 94ec4129744..cd48d6839f3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java @@ -8,7 +8,6 @@ import com.yahoo.vespa.hosted.controller.deployment.InternalStepRunner; import com.yahoo.vespa.hosted.controller.deployment.JobController; import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.deployment.Step; -import com.yahoo.vespa.hosted.controller.deployment.StepInfo; import com.yahoo.vespa.hosted.controller.deployment.StepRunner; import java.time.Duration; @@ -75,18 +74,26 @@ public class JobRunner extends ControllerMaintainer { } } - /** Advances each of the ready steps for the given run, or marks it as finished, and stashes it. Public for testing. */ public void advance(Run run) { - if ( ! run.hasFailed() - && controller().clock().instant().isAfter(run.sleepUntil().orElse(run.start()).plus(jobTimeout))) - executors.execute(() -> { - jobs.abort(run.id(), "job timeout of " + jobTimeout + " reached"); - advance(jobs.run(run.id())); - }); - else if (run.readySteps().isEmpty()) - executors.execute(() -> finish(run.id())); - else if (run.hasFailed() || run.sleepUntil().map(sleepUntil -> ! sleepUntil.isAfter(controller().clock().instant())).orElse(true)) - run.readySteps().forEach(step -> executors.execute(() -> advance(run.id(), step))); + advance(run.id()); + } + + /** Advances each of the ready steps for the given run, or marks it as finished, and stashes it. Public for testing. */ + public void advance(RunId id) { + jobs.locked(id, run -> { + if ( ! run.hasFailed() + && controller().clock().instant().isAfter(run.sleepUntil().orElse(run.start()).plus(jobTimeout))) + executors.execute(() -> { + jobs.abort(run.id(), "job timeout of " + jobTimeout + " reached"); + advance(run.id()); + }); + else if (run.readySteps().isEmpty()) + executors.execute(() -> finish(run.id())); + else if (run.hasFailed() || run.sleepUntil().map(sleepUntil -> ! sleepUntil.isAfter(controller().clock().instant())).orElse(true)) + run.readySteps().forEach(step -> executors.execute(() -> advance(run.id(), step))); + + return null; + }); } private void finish(RunId id) { @@ -108,23 +115,24 @@ public class JobRunner extends ControllerMaintainer { try { AtomicBoolean changed = new AtomicBoolean(false); jobs.locked(id.application(), id.type(), step, lockedStep -> { - jobs.locked(id, run -> run); // Memory visibility. - jobs.active(id).ifPresent(run -> { // The run may have become inactive, so we bail out. + jobs.locked(id, run -> { if ( ! run.readySteps().contains(step)) { changed.set(true); - return; // Someone may have updated the run status, making this step obsolete, so we bail out. + return run; // Someone may have updated the run status, making this step obsolete, so we bail out. } - StepInfo stepInfo = run.stepInfo(lockedStep.get()).orElseThrow(); - if (stepInfo.startTime().isEmpty()) { - jobs.setStartTimestamp(run.id(), controller().clock().instant(), lockedStep); - } + if (run.stepInfo(lockedStep.get()).orElseThrow().startTime().isEmpty()) + run = run.with(controller().clock().instant(), lockedStep); - runner.run(lockedStep, run.id()).ifPresent(status -> { - jobs.update(run.id(), status, lockedStep); + return run; + }); + + if ( ! changed.get()) { + runner.run(lockedStep, id).ifPresent(status -> { + jobs.update(id, status, lockedStep); changed.set(true); }); - }); + } }); if (changed.get()) jobs.active(id).ifPresent(this::advance); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MeteringMonitorMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MeteringMonitorMaintainer.java index eadbdf74c3c..519b1001be4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MeteringMonitorMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MeteringMonitorMaintainer.java @@ -51,14 +51,15 @@ public class MeteringMonitorMaintainer extends ControllerMaintainer { return controller().applications().asList() .stream() .flatMap(app -> app.instances().values().stream()) - .flatMap(this::instancesToDeployments) + .flatMap(this::toProdDeployments) .collect(Collectors.toSet()); } - private Stream<DeploymentId> instancesToDeployments(Instance instance) { + private Stream<DeploymentId> toProdDeployments(Instance instance) { return instance.deployments() .keySet() .stream() + .filter(deployment -> deployment.environment().isProduction()) .map(deployment -> new DeploymentId(instance.id(), deployment)); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java index 3bd1c7bb358..ddcfef23d86 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java @@ -9,10 +9,14 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.ArtifactRepo import com.yahoo.vespa.hosted.controller.api.integration.deployment.OsRelease; import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget; +import java.time.DayOfWeek; import java.time.Duration; import java.time.Instant; +import java.time.LocalDate; import java.time.ZoneOffset; +import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoUnit; import java.util.Objects; import java.util.Optional; @@ -30,23 +34,24 @@ public class OsUpgradeScheduler extends ControllerMaintainer { @Override protected double maintain() { Instant now = controller().clock().instant(); - if (!canTriggerAt(now)) return 1.0; for (var cloud : controller().clouds()) { - Release release = releaseIn(cloud); - upgradeTo(release, cloud, now); + Optional<Change> change = changeIn(cloud); + if (change.isEmpty()) continue; + if (!change.get().scheduleAt(now)) continue; + controller().upgradeOsIn(cloud, change.get().version(), change.get().upgradeBudget(), false); } return 1.0; } - /** Upgrade to given release in cloud */ - private void upgradeTo(Release release, CloudName cloud, Instant now) { + /** Returns the wanted change for given cloud, if any */ + public Optional<Change> changeIn(CloudName cloud) { Optional<OsVersionTarget> currentTarget = controller().osVersionTarget(cloud); - if (currentTarget.isEmpty()) return; - if (upgradingToNewMajor(cloud)) return; // Skip further upgrades until major version upgrade is complete + if (currentTarget.isEmpty()) return Optional.empty(); + if (upgradingToNewMajor(cloud)) return Optional.empty(); // Skip further upgrades until major version upgrade is complete - Version version = release.version(currentTarget.get(), now); - if (!version.isAfter(currentTarget.get().osVersion().version())) return; - controller().upgradeOsIn(cloud, version, release.upgradeBudget(), false); + Release release = releaseIn(cloud); + Instant now = controller().clock().instant(); + return release.change(currentTarget.get().version(), now); } private boolean upgradingToNewMajor(CloudName cloud) { @@ -56,54 +61,69 @@ public class OsUpgradeScheduler extends ControllerMaintainer { .count() > 1; } - private boolean canTriggerAt(Instant instant) { - int hourOfDay = instant.atZone(ZoneOffset.UTC).getHour(); - int dayOfWeek = instant.atZone(ZoneOffset.UTC).getDayOfWeek().getValue(); - // Upgrade can only be scheduled between 07:00 (02:00 in CD systems) and 12:59 UTC, Monday-Thursday - int startHour = controller().system().isCd() ? 2 : 7; - return hourOfDay >= startHour && hourOfDay <= 12 && dayOfWeek < 5; - } - private Release releaseIn(CloudName cloud) { boolean useTaggedRelease = controller().zoneRegistry().zones().all().reprovisionToUpgradeOs().in(cloud) - .zones().isEmpty(); + .zones().isEmpty(); if (useTaggedRelease) { return new TaggedRelease(controller().system(), controller().serviceRegistry().artifactRepository()); } return new CalendarVersionedRelease(controller().system()); } - private interface Release { + private static boolean canTriggerAt(Instant instant, boolean isCd) { + ZonedDateTime dateTime = instant.atZone(ZoneOffset.UTC); + int hourOfDay = dateTime.getHour(); + int dayOfWeek = dateTime.getDayOfWeek().getValue(); + // Upgrade can only be scheduled between 07:00 (02:00 in CD systems) and 12:59 UTC, Monday-Thursday + int startHour = isCd ? 2 : 7; + return hourOfDay >= startHour && hourOfDay <= 12 && dayOfWeek < 5; + } - /** The version number of this */ - Version version(OsVersionTarget currentTarget, Instant now); + /** Returns the earliest time an upgrade can be scheduled on the day of instant, in given system */ + private static Instant schedulingInstant(Instant instant, SystemName system) { + instant = instant.truncatedTo(ChronoUnit.DAYS); + while (!canTriggerAt(instant, system.isCd())) { + instant = instant.plus(Duration.ofHours(1)); + } + return instant; + } - /** The budget to use when upgrading to this */ - Duration upgradeBudget(); + private interface Release { + + /** The pending change for this release at given instant, if any */ + Optional<Change> change(Version currentVersion, Instant instant); } - /** OS release based on a tag */ - private static class TaggedRelease implements Release { + /** OS version change, its budget and the earliest time it can be scheduled */ + public record Change(Version version, Duration upgradeBudget, Instant scheduleAt) { - private final SystemName system; - private final ArtifactRepository artifactRepository; + public Change { + Objects.requireNonNull(version); + Objects.requireNonNull(upgradeBudget); + Objects.requireNonNull(scheduleAt); + } - private TaggedRelease(SystemName system, ArtifactRepository artifactRepository) { - this.system = Objects.requireNonNull(system); - this.artifactRepository = Objects.requireNonNull(artifactRepository); + /** Returns whether this can be scheduled at given instant */ + public boolean scheduleAt(Instant instant) { + return !instant.isBefore(scheduleAt); } - @Override - public Version version(OsVersionTarget currentTarget, Instant now) { - OsRelease release = artifactRepository.osRelease(currentTarget.osVersion().version().getMajor(), tag()); - boolean cooldownPassed = !release.taggedAt().plus(cooldown()).isAfter(now); - return cooldownPassed ? release.version() : currentTarget.osVersion().version(); + } + + /** OS release based on a tag */ + private record TaggedRelease(SystemName system, ArtifactRepository artifactRepository) implements Release { + + public TaggedRelease { + Objects.requireNonNull(system); + Objects.requireNonNull(artifactRepository); } - @Override - public Duration upgradeBudget() { - return Duration.ZERO; // Upgrades to tagged releases happen in-place so no budget is required + public Optional<Change> change(Version currentVersion, Instant instant) { + OsRelease release = artifactRepository.osRelease(currentVersion.getMajor(), tag()); + if (!release.version().isAfter(currentVersion)) return Optional.empty(); + Instant scheduleAt = schedulingInstant(release.taggedAt().plus(cooldown()), system); + return Optional.of(new Change(release.version(), Duration.ZERO, scheduleAt)); } /** Returns the release tag tracked by this system */ @@ -119,48 +139,65 @@ public class OsUpgradeScheduler extends ControllerMaintainer { } /** OS release based on calendar-versioning */ - private static class CalendarVersionedRelease implements Release { + record CalendarVersionedRelease(SystemName system) implements Release { - /** The time to wait before scheduling upgrade to next version */ - private static final Duration SCHEDULING_INTERVAL = Duration.ofDays(45); + /** A fixed point in time which the release schedule is calculated from */ + private static final Instant START_OF_SCHEDULE = LocalDate.of(2022, 1, 1) + .atStartOfDay() + .toInstant(ZoneOffset.UTC); - /** - * The interval at which new versions become available. We use this to avoid scheduling upgrades to a version - * that has not been released yet. Example: Version N is the latest one and target is set to N+1. If N+1 does - * not exist the zone will not converge until N+1 has been released and we may end up triggering multiple - * rounds of upgrades. - */ - private static final Duration AVAILABILITY_INTERVAL = Duration.ofDays(7); + /** The time that should elapse between versions */ + private static final Duration SCHEDULING_STEP = Duration.ofDays(60); - private static final DateTimeFormatter CALENDAR_VERSION_PATTERN = DateTimeFormatter.ofPattern("yyyyMMdd"); + /** The day of week new releases are published */ + private static final DayOfWeek RELEASE_DAY = DayOfWeek.MONDAY; - private final SystemName system; + private static final DateTimeFormatter CALENDAR_VERSION_PATTERN = DateTimeFormatter.ofPattern("yyyyMMdd"); - public CalendarVersionedRelease(SystemName system) { - this.system = Objects.requireNonNull(system); + public CalendarVersionedRelease { + Objects.requireNonNull(system); } @Override - public Version version(OsVersionTarget currentTarget, Instant now) { - Instant scheduledAt = currentTarget.scheduledAt(); - Version currentVersion = currentTarget.osVersion().version(); - if (scheduledAt.isBefore(now.minus(SCHEDULING_INTERVAL))) { - String calendarVersion = now.minus(AVAILABILITY_INTERVAL) - .atZone(ZoneOffset.UTC) - .format(CALENDAR_VERSION_PATTERN); - return new Version(currentVersion.getMajor(), - currentVersion.getMinor(), - currentVersion.getMicro(), - calendarVersion); + public Optional<Change> change(Version currentVersion, Instant instant) { + Version wantedVersion = asVersion(dateOfWantedVersion(instant), currentVersion); + while (!wantedVersion.isAfter(currentVersion)) { + wantedVersion = asVersion(dateOfWantedVersion(instant), currentVersion); + instant = instant.plus(Duration.ofDays(1)); } - return currentVersion; // New version should not be scheduled yet + return Optional.of(new Change(wantedVersion, upgradeBudget(), schedulingInstant(instant, system))); } - @Override - public Duration upgradeBudget() { + private Duration upgradeBudget() { return system.isCd() ? Duration.ZERO : Duration.ofDays(14); } + /** + * Calculate the date of the wanted version relative to now. A given zone will choose the oldest release + * available which is not older than this date. + */ + static LocalDate dateOfWantedVersion(Instant now) { + Instant candidate = START_OF_SCHEDULE; + while (!candidate.plus(SCHEDULING_STEP).isAfter(now)) { + candidate = candidate.plus(SCHEDULING_STEP); + } + LocalDate date = LocalDate.ofInstant(candidate, ZoneOffset.UTC); + return releaseDayOf(date); + } + + private static LocalDate releaseDayOf(LocalDate date) { + int releaseDayDelta = RELEASE_DAY.getValue() - date.getDayOfWeek().getValue(); + return date.plusDays(releaseDayDelta); + } + + private static Version asVersion(LocalDate dateOfVersion, Version currentVersion) { + String calendarVersion = dateOfVersion.format(CALENDAR_VERSION_PATTERN); + return new Version(currentVersion.getMajor(), + currentVersion.getMinor(), + currentVersion.getMicro(), + calendarVersion); + } + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java index fa64a2677f4..f4dcf7f6088 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.zone.NodeSlice; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.text.Text; import com.yahoo.vespa.hosted.controller.Controller; @@ -54,15 +55,16 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> { } @Override - protected boolean convergedOn(OsVersionTarget target, SystemApplication application, ZoneApi zone) { - return !currentVersion(zone, application, target.osVersion().version()).isBefore(target.osVersion().version()); + protected boolean convergedOn(OsVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) { + Version currentVersion = versionOf(nodeSlice, zone, application, Node::currentOsVersion).orElse(target.osVersion().version()); + return !currentVersion.isBefore(target.osVersion().version()); } @Override protected boolean expectUpgradeOf(Node node, SystemApplication application, ZoneApi zone) { return cloud.equals(zone.getCloudName()) && // Cloud is managed by this upgrader application.shouldUpgradeOs() && // Application should upgrade in this cloud - canUpgrade(node); // Node is in an upgradable state + canUpgrade(node, false); } @Override @@ -83,29 +85,23 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> { .orElse(true); } - private Version currentVersion(ZoneApi zone, SystemApplication application, Version defaultVersion) { - return minVersion(zone, application, Node::currentOsVersion).orElse(defaultVersion); - } - /** Returns the available upgrade budget for given zone */ private Duration zoneBudgetOf(Duration totalBudget, ZoneApi zone) { if (!spendBudgetOn(zone)) return Duration.ZERO; long consecutiveZones = upgradePolicy.steps().stream() - .filter(parallelZones -> parallelZones.stream().anyMatch(this::spendBudgetOn)) + .filter(step -> step.zones().stream().anyMatch(this::spendBudgetOn)) .count(); return totalBudget.dividedBy(consecutiveZones); } /** Returns whether to spend upgrade budget on given zone */ private boolean spendBudgetOn(ZoneApi zone) { - if (!zone.getEnvironment().isProduction()) return false; - if (controller().zoneRegistry().systemZone().getVirtualId().equals(zone.getVirtualId())) return false; // Controller zone - return true; + return !controller().zoneRegistry().systemZone().getVirtualId().equals(zone.getVirtualId()); // Do not spend budget on controller zone } - /** Returns whether node is in a state where it can be upgraded */ - public static boolean canUpgrade(Node node) { - return upgradableNodeStates.contains(node.state()); + /** Returns whether node currently allows upgrades */ + public static boolean canUpgrade(Node node, boolean includeDeferring) { + return (includeDeferring || !node.deferOsUpgrade()) && upgradableNodeStates.contains(node.state()); } private static String name(CloudName cloud) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java index 892ad669e4b..205fb7e0e79 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java @@ -100,7 +100,7 @@ public class ResourceMeterMaintainer extends ControllerMaintainer { } if (systemName.isPublic()) reportResourceSnapshots(resourceSnapshots); - if (systemName.isPublic() && systemName.isCd()) reportAllScalingEvents(); + if (systemName.isPublic()) reportAllScalingEvents(); updateDeploymentCost(resourceSnapshots); return 1.0; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java index 8d5851be62f..8e74ef9a983 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; +import com.yahoo.config.provision.zone.NodeSlice; import com.yahoo.config.provision.zone.RoutingMethod; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.text.Text; @@ -39,12 +40,12 @@ public class SystemUpgrader extends InfrastructureUpgrader<VespaVersionTarget> { } @Override - protected boolean convergedOn(VespaVersionTarget target, SystemApplication application, ZoneApi zone) { - Optional<Version> minVersion = minVersion(zone, application, Node::currentVersion); + protected boolean convergedOn(VespaVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) { + Optional<Version> currentVersion = versionOf(nodeSlice, zone, application, Node::currentVersion); // Skip application convergence check if there are no nodes belonging to the application in the zone - if (minVersion.isEmpty()) return true; + if (currentVersion.isEmpty()) return true; - return minVersion.get().equals(target.version()) && + return currentVersion.get().equals(target.version()) && application.configConvergedIn(zone.getId(), controller(), Optional.of(target.version())); } @@ -79,10 +80,9 @@ public class SystemUpgrader extends InfrastructureUpgrader<VespaVersionTarget> { // the wanted version of each node. boolean zoneHasSharedRouting = controller().zoneRegistry().routingMethods(zone.getId()).stream() .anyMatch(RoutingMethod::isShared); - return minVersion(zone, application, Node::wantedVersion) + return versionOf(NodeSlice.ALL, zone, application, Node::wantedVersion) .map(wantedVersion -> !wantedVersion.equals(target.version())) .orElse(zoneHasSharedRouting); // Always upgrade if zone uses shared routing, but has no nodes allocated yet - } return controller().serviceRegistry().configServer().nodeRepository() .targetVersionsOf(zone.getId()) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java index dad836ca2de..820c67f2d44 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java @@ -33,21 +33,15 @@ public class TenantRoleMaintainer extends ControllerMaintainer { .map(Tenant::name) .collect(Collectors.toList()); roleService.maintainRoles(tenantsWithRoles); + + var deletedTenants = controller().tenants().asList(true).stream() + .filter(tenant -> tenant.type() == Tenant.Type.deleted) + .map(Tenant::name) + .toList(); + roleService.cleanupRoles(deletedTenants); + return 1.0; } - private boolean hasProductionDeployment(TenantName tenant) { - return controller().applications().asList(tenant).stream() - .map(Application::productionInstances) - .anyMatch(Predicate.not(Map::isEmpty)); - } - private boolean hasPerfDeployment(TenantName tenant) { - List<ZoneId> perfZones = controller().zoneRegistry().zones().controllerUpgraded().in(Environment.perf).ids(); - return controller().applications().asList(tenant).stream() - .map(Application::instances) - .flatMap(instances -> instances.values().stream()) - .flatMap(instance -> instance.deployments().values().stream()) - .anyMatch(x -> perfZones.contains(x.zone())); - } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index d654f63fff2..1932dc65657 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -5,24 +5,27 @@ import com.yahoo.component.Version; import com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy; import com.yahoo.config.provision.ApplicationId; import com.yahoo.transaction.Mutex; -import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.InstanceList; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatusList; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger.ChangesToCancel; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import com.yahoo.vespa.hosted.controller.versions.VersionStatus; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.OptionalInt; import java.util.Random; +import java.util.Set; import java.util.function.UnaryOperator; import java.util.logging.Level; import java.util.logging.Logger; @@ -58,18 +61,22 @@ public class Upgrader extends ControllerMaintainer { cancelBrokenUpgrades(versionStatus); OptionalInt targetMajorVersion = targetMajorVersion(); - InstanceList instances = instances(versionStatus); + DeploymentStatusList deploymentStatuses = deploymentStatuses(versionStatus); for (UpgradePolicy policy : UpgradePolicy.values()) - updateTargets(versionStatus, instances, policy, targetMajorVersion); + updateTargets(versionStatus, deploymentStatuses, policy, targetMajorVersion); return 1.0; } + private DeploymentStatusList deploymentStatuses(VersionStatus versionStatus) { + return controller().jobController().deploymentStatuses(ApplicationList.from(controller().applications().readable()) + .withProjectId(), + versionStatus); + } + /** Returns a list of all production application instances, except those which are pinned, which we should not manipulate here. */ - private InstanceList instances(VersionStatus versionStatus) { - return InstanceList.from(controller().jobController().deploymentStatuses(ApplicationList.from(controller().applications().readable()) - .withProjectId(), - versionStatus)) + private InstanceList instances(DeploymentStatusList deploymentStatuses) { + return InstanceList.from(deploymentStatuses) .withDeclaredJobs() .shuffle(random) .byIncreasingDeployedVersion() @@ -78,7 +85,7 @@ public class Upgrader extends ControllerMaintainer { private void cancelBrokenUpgrades(VersionStatus versionStatus) { // Cancel upgrades to broken targets (let other ongoing upgrades complete to avoid starvation) - InstanceList instances = instances(controller().readVersionStatus()); + InstanceList instances = instances(deploymentStatuses(controller().readVersionStatus())); for (VespaVersion version : versionStatus.versions()) { if (version.confidence() == Confidence.broken) cancelUpgradesOf(instances.upgradingTo(version.versionNumber()).not().with(UpgradePolicy.canary), @@ -86,8 +93,12 @@ public class Upgrader extends ControllerMaintainer { } } - private void updateTargets(VersionStatus versionStatus, InstanceList instances, UpgradePolicy policy, OptionalInt targetMajorVersion) { + private void updateTargets(VersionStatus versionStatus, DeploymentStatusList deploymentStatuses, UpgradePolicy policy, OptionalInt targetMajorVersion) { + InstanceList instances = instances(deploymentStatuses); InstanceList remaining = instances.with(policy); + Instant failureThreshold = controller().clock().instant().minus(DeploymentTrigger.maxFailingRevisionTime); + Set<ApplicationId> failingRevision = InstanceList.from(deploymentStatuses.failingApplicationChangeSince(failureThreshold)).asSet(); + List<Version> targetAndNewer = new ArrayList<>(); UnaryOperator<InstanceList> cancellationCriterion = policy == UpgradePolicy.canary ? i -> i.not().upgradingTo(targetAndNewer) : i -> i.failing() @@ -103,13 +114,16 @@ public class Upgrader extends ControllerMaintainer { // Prefer the newest target for each instance. remaining = remaining.not().matching(eligible.asList()::contains) .not().hasCompleted(Change.of(version)); - for (ApplicationId id : outdated.and(eligible.not().upgrading()).not().changingRevision()) + for (ApplicationId id : outdated.and(eligible.not().upgrading())) targets.put(id, version); } int numberToUpgrade = policy == UpgradePolicy.canary ? instances.size() : numberOfApplicationsToUpgrade(); for (ApplicationId id : instances.matching(targets.keySet()::contains).first(numberToUpgrade)) { log.log(Level.INFO, "Triggering upgrade to " + targets.get(id) + " for " + id); + if (failingRevision.contains(id)) + controller().applications().deploymentTrigger().cancelChange(id, ChangesToCancel.APPLICATION); + controller().applications().deploymentTrigger().triggerChange(id, Change.of(targets.get(id))); } } @@ -167,7 +181,7 @@ public class Upgrader extends ControllerMaintainer { } /** Sets the default target major version. Set to empty to determine target version normally (by confidence) */ - public void setTargetMajorVersion(Optional<Integer> targetMajorVersion) { + public void setTargetMajorVersion(OptionalInt targetMajorVersion) { controller().applications().setTargetMajorVersion(targetMajorVersion); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java index 551f803f368..daba7e74f34 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java @@ -5,6 +5,7 @@ import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.jdisc.Metric; import com.yahoo.text.Text; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; @@ -46,26 +47,28 @@ public class VcmrMaintainer extends ControllerMaintainer { private static final Logger LOG = Logger.getLogger(VcmrMaintainer.class.getName()); private static final int DAYS_TO_RETIRE = 2; private static final Duration ALLOWED_POSTPONEMENT_TIME = Duration.ofDays(7); + protected static final String TRACKED_CMRS_METRIC = "cmr.tracked"; private final CuratorDb curator; private final NodeRepository nodeRepository; private final ChangeRequestClient changeRequestClient; private final SystemName system; + private final Metric metric; - public VcmrMaintainer(Controller controller, Duration interval) { + public VcmrMaintainer(Controller controller, Duration interval, Metric metric) { super(controller, interval, null, SystemName.allOf(Predicate.not(SystemName::isPublic))); this.curator = controller.curator(); this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); this.changeRequestClient = controller.serviceRegistry().changeRequestClient(); this.system = controller.system(); + this.metric = metric; } @Override protected double maintain() { var changeRequests = curator.readChangeRequests() .stream() - .filter(shouldUpdate()) - .collect(Collectors.toList()); + .filter(shouldUpdate()).toList(); var nodesByZone = nodesByZone(); @@ -86,6 +89,7 @@ public class VcmrMaintainer extends ControllerMaintainer { }); } }); + updateMetrics(); return 1.0; } @@ -357,4 +361,15 @@ public class VcmrMaintainer extends ControllerMaintainer { return time; } + private void updateMetrics() { + var cmrsByStatus = curator.readChangeRequests() + .stream() + .collect(Collectors.groupingBy(VespaChangeRequest::getStatus)); + + for (var status : Status.values()) { + var count = cmrsByStatus.getOrDefault(status, List.of()).size(); + metric.set(TRACKED_CMRS_METRIC, count, metric.createContext(Map.of("status", status.name()))); + } + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java index 49c819548fe..f2c9d55b2a2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java @@ -29,6 +29,16 @@ import java.util.stream.Collectors; * @author enygaard */ public class Notifier { + private static final String header = """ + <div style="background: #00598c; height: 55px; width: 100%"> + <img + src="https://vespa.ai/assets/vespa-logo.png" + style="width: auto; height: 34px; margin: 10px" + /> + </div> + <br> + """; + private final CuratorDb curatorDb; private final Mailer mailer; private final FlagSource flagSource; @@ -111,14 +121,15 @@ public class Notifier { public Mail mailOf(FormattedNotification content, Collection<String> recipients) { var notification = content.notification(); var subject = Text.format("[%s] %s Vespa Notification for %s", notification.level().toString().toUpperCase(), content.prettyType(), applicationIdSource(notification.source())); - var body = new StringBuilder(); - body.append(content.messagePrefix()).append("\n") + String body = new StringBuilder() + .append(content.messagePrefix()).append("\n") .append(notification.messages().stream().map(m -> " * " + m).collect(Collectors.joining("\n"))).append("\n") .append("\n") .append("Vespa Console link:\n") - .append(content.uri().toString()); - var html = new StringBuilder(); - html.append(content.messagePrefix()).append("<br>\n") + .append(content.uri().toString()).toString(); + String html = new StringBuilder() + .append(header) + .append(content.messagePrefix()).append("<br>\n") .append("<ul>\n") .append(notification.messages().stream() .map(Notifier::linkify) @@ -126,8 +137,8 @@ public class Notifier { .collect(Collectors.joining("<br>\n"))) .append("</ul>\n") .append("<br>\n") - .append("<a href=\"" + content.uri() + "\">Vespa Console</a>"); - return new Mail(recipients, subject, body.toString(), html.toString()); + .append("<a href=\"" + content.uri() + "\">Vespa Console</a>").toString(); + return new Mail(recipients, subject, body, html); } @VisibleForTesting diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java index 9721026c628..ecb9db8195f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java @@ -49,7 +49,7 @@ public class BufferedLogStore { } /** Appends to the log of the given, active run, reassigning IDs as counted here, and converting to Vespa log levels. */ - public void append(ApplicationId id, JobType type, Step step, List<LogEntry> entries) { + public void append(ApplicationId id, JobType type, Step step, List<LogEntry> entries, boolean forceLog) { if (entries.isEmpty()) return; @@ -58,7 +58,7 @@ public class BufferedLogStore { long lastEntryId = buffer.readLastLogEntryId(id, type).orElse(-1L); long lastChunkId = buffer.getLogChunkIds(id, type).max().orElse(0); long numberOfChunks = Math.max(1, buffer.getLogChunkIds(id, type).count()); - if (numberOfChunks > maxLogSize / chunkSize) + if (numberOfChunks > maxLogSize / chunkSize && ! forceLog) return; // Max size exceeded — store no more. byte[] emptyChunk = "[]".getBytes(); @@ -72,8 +72,12 @@ public class BufferedLogStore { buffer.writeLastLogEntryId(id, type, lastEntryId); buffer.writeLog(id, type, lastChunkId, logSerializer.toJson(log)); lastChunkId = lastEntryId + 1; - if (++numberOfChunks > maxLogSize / chunkSize) { - log = Map.of(step, List.of(new LogEntry(++lastEntryId, entry.at(), LogEntry.Type.warning, "Max log size of " + (maxLogSize >> 20) + "Mb exceeded; further entries are discarded."))); + if (++numberOfChunks > maxLogSize / chunkSize && ! forceLog) { + log = Map.of(step, List.of(new LogEntry(++lastEntryId, + entry.at(), + LogEntry.Type.warning, + "Max log size of " + (maxLogSize >> 20) + + "Mb exceeded; further user entries are discarded."))); break; } log = new HashMap<>(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index f02f49e7114..54e98877ba3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -1,9 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.persistence; -import com.yahoo.component.annotation.Inject; import com.yahoo.collections.Pair; import com.yahoo.component.Version; +import com.yahoo.component.annotation.Inject; import com.yahoo.concurrent.UncheckedTimeoutException; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.HostName; @@ -41,6 +41,7 @@ import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus; import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget; import com.yahoo.vespa.hosted.controller.versions.VersionStatus; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; + import java.io.IOException; import java.io.UncheckedIOException; import java.nio.ByteBuffer; @@ -53,6 +54,7 @@ import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Optional; +import java.util.OptionalInt; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeoutException; @@ -271,9 +273,9 @@ public class CuratorDb { return read(targetMajorVersionPath(), ByteBuffer::wrap).map(ByteBuffer::getInt); } - public void writeTargetMajorVersion(Optional<Integer> targetMajorVersion) { + public void writeTargetMajorVersion(OptionalInt targetMajorVersion) { if (targetMajorVersion.isPresent()) - curator.set(targetMajorVersionPath(), ByteBuffer.allocate(Integer.BYTES).putInt(targetMajorVersion.get()).array()); + curator.set(targetMajorVersionPath(), ByteBuffer.allocate(Integer.BYTES).putInt(targetMajorVersion.getAsInt()).array()); else curator.delete(targetMajorVersionPath()); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/TenantSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/TenantSerializer.java index e7cf0c34511..e91fbe8b1b7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/TenantSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/TenantSerializer.java @@ -81,6 +81,7 @@ public class TenantSerializer { private static final String archiveAccessField = "archiveAccess"; private static final String awsArchiveAccessRoleField = "awsArchiveAccessRole"; private static final String gcpArchiveAccessMemberField = "gcpArchiveAccessMember"; + private static final String invalidateUserSessionsBeforeField = "invalidateUserSessionsBefore"; private static final String awsIdField = "awsId"; private static final String roleField = "role"; @@ -123,6 +124,7 @@ public class TenantSerializer { toSlime(tenant.info(), root); toSlime(tenant.tenantSecretStores(), root); toSlime(tenant.archiveAccess(), root); + tenant.invalidateUserSessionsBefore().ifPresent(instant -> root.setLong(invalidateUserSessionsBeforeField, instant.toEpochMilli())); } private void toSlime(ArchiveAccess archiveAccess, Cursor root) { @@ -187,7 +189,8 @@ public class TenantSerializer { TenantInfo info = tenantInfoFromSlime(tenantObject.field(tenantInfoField)); List<TenantSecretStore> tenantSecretStores = secretStoresFromSlime(tenantObject.field(secretStoresField)); ArchiveAccess archiveAccess = archiveAccessFromSlime(tenantObject); - return new CloudTenant(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess); + Optional<Instant> invalidateUserSessionsBefore = SlimeUtils.optionalInstant(tenantObject.field(invalidateUserSessionsBeforeField)); + return new CloudTenant(name, createdAt, lastLoginInfo, creator, developerKeys, info, tenantSecretStores, archiveAccess, invalidateUserSessionsBefore); } private DeletedTenant deletedTenantFrom(Inspector tenantObject) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index 670cb775c69..8d2fac84bc0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -10,8 +10,8 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableSet; -import com.yahoo.component.annotation.Inject; import com.yahoo.component.Version; +import com.yahoo.component.annotation.Inject; import com.yahoo.config.application.api.DeploymentInstanceSpec; import com.yahoo.config.application.api.DeploymentSpec; import com.yahoo.config.provision.ApplicationId; @@ -75,6 +75,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision; import com.yahoo.vespa.hosted.controller.api.integration.noderepository.RestartFilter; import com.yahoo.vespa.hosted.controller.api.integration.secrets.TenantSecretStore; +import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneRegistry; import com.yahoo.vespa.hosted.controller.api.role.Role; import com.yahoo.vespa.hosted.controller.api.role.RoleDefinition; import com.yahoo.vespa.hosted.controller.api.role.SecurityContext; @@ -137,8 +138,6 @@ import java.security.PublicKey; import java.time.DayOfWeek; import java.time.Duration; import java.time.Instant; -import java.time.LocalDateTime; -import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; import java.util.Arrays; import java.util.Base64; @@ -955,19 +954,24 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { private HttpResponse applicationPackage(String tenantName, String applicationName, HttpRequest request) { TenantAndApplicationId tenantAndApplication = TenantAndApplicationId.from(tenantName, applicationName); - long build; - String parameter = request.getProperty("build"); - if (parameter != null) - try { - build = Validation.requireAtLeast(Long.parseLong(request.getProperty("build")), "build number", 1L); - } - catch (NumberFormatException e) { - throw new IllegalArgumentException("invalid value for request parameter 'build'", e); - } - else { + final long build; + String requestedBuild = request.getProperty("build"); + if (requestedBuild != null) { + if (requestedBuild.equals("latestDeployed")) { + build = controller.applications().requireApplication(tenantAndApplication).latestDeployedRevision() + .map(RevisionId::number) + .orElseThrow(() -> new NotExistsException("no application package has been deployed in production for " + tenantAndApplication)); + } else { + try { + build = Validation.requireAtLeast(Long.parseLong(request.getProperty("build")), "build number", 1L); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("invalid value for request parameter 'build'", e); + } + } + } else { build = controller.applications().requireApplication(tenantAndApplication).revisions().last() - .map(version -> version.id().number()) - .orElseThrow(() -> new NotExistsException("no application package has been submitted for " + tenantAndApplication)); + .map(version -> version.id().number()) + .orElseThrow(() -> new NotExistsException("no application package has been submitted for " + tenantAndApplication)); } RevisionId revision = RevisionId.forProduction(build); boolean tests = request.getBooleanProperty("tests"); @@ -1470,6 +1474,15 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { private HttpResponse trigger(ApplicationId id, JobType type, HttpRequest request) { + // JobType.fromJobName doesn't properly initiate test jobs. Triggering these without context isn't _really_ + // necessary, but triggering a test in the default cloud is better than failing with a weird error. + ZoneRegistry zones = controller.zoneRegistry(); + type = switch (type.environment()) { + case test -> JobType.systemTest(zones, zones.systemZone().getCloudName()); + case staging -> JobType.stagingTest(zones, zones.systemZone().getCloudName()); + default -> type; + }; + Inspector requestObject = toSlime(request.getData()).get(); boolean requireTests = ! requestObject.field("skipTests").asBool(); boolean reTrigger = requestObject.field("reTrigger").asBool(); @@ -1831,7 +1844,8 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { response.setString("status", "complete"); else if (stepStatus.readyAt(instance.change()).map(controller.clock().instant()::isBefore).orElse(true)) response.setString("status", "pending"); - else response.setString("status", "running"); + else + response.setString("status", "running"); }); } else { var deploymentRun = controller.jobController().last(deploymentId.applicationId(), JobType.deploymentTo(deploymentId.zoneId())); @@ -2544,12 +2558,11 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { cloudTenant.tenantSecretStores()); try { - var tenantQuota = controller.serviceRegistry().billingController().getQuota(tenant.name()); var usedQuota = applications.stream() .map(Application::quotaUsage) .reduce(QuotaUsage.none, QuotaUsage::add); - toSlime(tenantQuota, usedQuota, object.setObject("quota")); + toSlime(object.setObject("quota"), usedQuota); } catch (Exception e) { log.warning(String.format("Failed to get quota for tenant %s: %s", tenant.name(), Exceptions.toMessageString(e))); } @@ -2592,15 +2605,8 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { archiveAccess.gcpMember().ifPresent(member -> object.setString("gcpMember", member)); } - private void toSlime(Quota quota, QuotaUsage usage, Cursor object) { - quota.budget().ifPresentOrElse( - budget -> object.setDouble("budget", budget.doubleValue()), - () -> object.setNix("budget") - ); + private void toSlime(Cursor object, QuotaUsage usage) { object.setDouble("budgetUsed", usage.rate()); - - // TODO: Retire when we no longer use maxClusterSize as a meaningful limit - quota.maxClusterSize().ifPresent(maxClusterSize -> object.setLong("clusterSize", maxClusterSize)); } private void toSlime(ClusterResources resources, Cursor object) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index e28bf89e734..25953c16bf0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -115,6 +115,7 @@ class JobControllerApiHandlerHelper { Run run = jobController.run(runId); detailsObject.setBool("active", ! run.hasEnded()); detailsObject.setString("status", nameOf(run.status())); + run.reason().ifPresent(reason -> detailsObject.setString("reason", reason)); try { jobController.updateTestLog(runId); jobController.updateVespaLog(runId); @@ -325,7 +326,7 @@ class JobControllerApiHandlerHelper { "/job/" + job.type().jobName()).normalize(); stepObject.setString("url", baseUriForJob.toString()); stepObject.setString("environment", job.type().environment().value()); - stepObject.setString("region", job.type().zone().value()); + if ( ! job.type().environment().isTest()) stepObject.setString("region", job.type().zone().value()); if (job.type().isProduction() && job.type().isDeployment()) { status.deploymentFor(job).ifPresent(deployment -> { @@ -421,6 +422,7 @@ class JobControllerApiHandlerHelper { runObject.setLong("start", run.start().toEpochMilli()); run.end().ifPresent(end -> runObject.setLong("end", end.toEpochMilli())); runObject.setString("status", run.status().name()); + run.reason().ifPresent(reason -> runObject.setString("reason", reason)); toSlime(runObject.setObject("versions"), run.versions(), application); Cursor runStepsArray = runObject.setArray("steps"); run.steps().forEach((step, info) -> { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/billing/BillingApiHandlerV2.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/billing/BillingApiHandlerV2.java index 44a8b636ae0..4532e0c2c18 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/billing/BillingApiHandlerV2.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/billing/BillingApiHandlerV2.java @@ -14,6 +14,7 @@ import com.yahoo.slime.Cursor; import com.yahoo.slime.Inspector; import com.yahoo.slime.Slime; import com.yahoo.slime.Type; +import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.ApplicationController; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.TenantController; @@ -23,8 +24,10 @@ import com.yahoo.vespa.hosted.controller.api.integration.billing.CollectionMetho import com.yahoo.vespa.hosted.controller.api.integration.billing.Plan; import com.yahoo.vespa.hosted.controller.api.integration.billing.PlanId; import com.yahoo.vespa.hosted.controller.api.integration.billing.PlanRegistry; +import com.yahoo.vespa.hosted.controller.api.integration.billing.Quota; import com.yahoo.vespa.hosted.controller.api.role.Role; import com.yahoo.vespa.hosted.controller.api.role.SecurityContext; +import com.yahoo.vespa.hosted.controller.application.QuotaUsage; import com.yahoo.vespa.hosted.controller.tenant.CloudTenant; import com.yahoo.vespa.hosted.controller.tenant.Tenant; @@ -200,11 +203,13 @@ public class BillingApiHandlerV2 extends RestApiRequestHandler<BillingApiHandler var response = new Slime(); var tenantsResponse = response.setObject().setArray("tenants"); + tenants.asList().stream().sorted(Comparator.comparing(Tenant::name)).forEach(tenant -> { var usage = Optional.ofNullable(usagePerTenant.get(tenant.name())); var tenantResponse = tenantsResponse.addObject(); tenantResponse.setString("tenant", tenant.name().value()); toSlime(tenantResponse.setObject("plan"), planFor(tenant.name())); + toSlime(tenantResponse.setObject("quota"), billing.getQuota(tenant.name())); tenantResponse.setString("collection", billing.getCollectionMethod(tenant.name()).name()); tenantResponse.setString("lastBill", usage.map(Bill::getStartDate).map(DateTimeFormatter.ISO_DATE::format).orElse(null)); tenantResponse.setString("unbilled", usage.map(Bill::sum).map(BigDecimal::toPlainString).orElse("0.00")); @@ -357,6 +362,10 @@ public class BillingApiHandlerV2 extends RestApiRequestHandler<BillingApiHandler cursor.setString("name", plan.displayName()); } + private void toSlime(Cursor cursor, Quota quota) { + cursor.setDouble("budget", quota.budget().map(BigDecimal::doubleValue).orElse(-1.0)); + } + private Plan planFor(TenantName tenant) { var planId = billing.getPlan(tenant); return planRegistry.plan(planId) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java index 25ac90ac0ea..776fcbfd03b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java @@ -35,6 +35,7 @@ import java.security.Principal; import java.security.cert.X509Certificate; import java.time.Instant; import java.util.Optional; +import java.util.OptionalInt; import java.util.Scanner; import java.util.function.Function; import java.util.logging.Level; @@ -60,13 +61,13 @@ public class ControllerApiHandler extends AuditLoggingRequestHandler { @Override public HttpResponse auditAndHandle(HttpRequest request) { try { - switch (request.getMethod()) { - case GET: return get(request); - case POST: return post(request); - case DELETE: return delete(request); - case PATCH: return patch(request); - default: return ErrorResponse.methodNotAllowed("Method '" + request.getMethod() + "' is not supported"); - } + return switch (request.getMethod()) { + case GET -> get(request); + case POST -> post(request); + case DELETE -> delete(request); + case PATCH -> patch(request); + default -> ErrorResponse.methodNotAllowed("Method '" + request.getMethod() + "' is not supported"); + }; } catch (IllegalArgumentException e) { return ErrorResponse.badRequest(Exceptions.toMessageString(e)); @@ -165,8 +166,8 @@ public class ControllerApiHandler extends AuditLoggingRequestHandler { if (inspect.field(upgradesPerMinuteField).valid()) { upgrader.setUpgradesPerMinute(inspect.field(upgradesPerMinuteField).asDouble()); } else if (inspect.field(targetMajorVersionField).valid()) { - int target = (int)inspect.field(targetMajorVersionField).asLong(); - upgrader.setTargetMajorVersion(Optional.ofNullable(target == 0 ? null : target)); // 0 is the default value + int target = (int) inspect.field(targetMajorVersionField).asLong(); + upgrader.setTargetMajorVersion(target == 0 ? OptionalInt.empty() : OptionalInt.of(target)); // 0 is the default value } else { return ErrorResponse.badRequest("No such modifiable field(s)"); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/StatsResponse.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/StatsResponse.java index 19f1ac5449f..96a3c9f177d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/StatsResponse.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/StatsResponse.java @@ -30,6 +30,8 @@ public class StatsResponse extends SlimeJsonResponse { if (stats.applicationStats().isEmpty()) continue; // skip empty zones Cursor zoneObject = zonesArray.addObject(); zoneObject.setString("id", zone.toString()); + zoneObject.setDouble("totalCost", stats.totalCost()); + zoneObject.setDouble("totalAllocatedCost", stats.totalAllocatedCost()); toSlime(stats.load(), zoneObject.setObject("load")); toSlime(stats.activeLoad(), zoneObject.setObject("activeLoad")); Cursor applicationsArray = zoneObject.setArray("applications"); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java index 853739ee9c3..0e764b98514 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiHandler.java @@ -22,6 +22,9 @@ import com.yahoo.slime.SlimeUtils; import com.yahoo.slime.Type; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.auditlog.AuditLoggingRequestHandler; +import com.yahoo.vespa.hosted.controller.maintenance.ControllerMaintenance; +import com.yahoo.vespa.hosted.controller.maintenance.OsUpgradeScheduler; +import com.yahoo.vespa.hosted.controller.maintenance.OsUpgradeScheduler.Change; import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget; import com.yahoo.yolean.Exceptions; @@ -47,22 +50,24 @@ import java.util.stream.Collectors; public class OsApiHandler extends AuditLoggingRequestHandler { private final Controller controller; + private final OsUpgradeScheduler osUpgradeScheduler; - public OsApiHandler(Context ctx, Controller controller) { + public OsApiHandler(Context ctx, Controller controller, ControllerMaintenance controllerMaintenance) { super(ctx, controller.auditLogger()); this.controller = controller; + this.osUpgradeScheduler = controllerMaintenance.osUpgradeScheduler(); } @Override public HttpResponse auditAndHandle(HttpRequest request) { try { - switch (request.getMethod()) { - case GET: return get(request); - case POST: return post(request); - case DELETE: return delete(request); - case PATCH: return patch(request); - default: return ErrorResponse.methodNotAllowed("Method '" + request.getMethod() + "' is unsupported"); - } + return switch (request.getMethod()) { + case GET -> get(request); + case POST -> post(request); + case DELETE -> delete(request); + case PATCH -> patch(request); + default -> ErrorResponse.methodNotAllowed("Method '" + request.getMethod() + "' is unsupported"); + }; } catch (IllegalArgumentException e) { return ErrorResponse.badRequest(Exceptions.toMessageString(e)); } catch (RuntimeException e) { @@ -159,8 +164,16 @@ public class OsApiHandler extends AuditLoggingRequestHandler { currentVersionObject.setString("version", osVersion.version().toFullString()); Optional<OsVersionTarget> target = targets.stream().filter(t -> t.osVersion().equals(osVersion)).findFirst(); currentVersionObject.setBool("targetVersion", target.isPresent()); - target.ifPresent(t -> currentVersionObject.setString("upgradeBudget", t.upgradeBudget().toString())); - target.ifPresent(t -> currentVersionObject.setLong("scheduledAt", t.scheduledAt().toEpochMilli())); + target.ifPresent(t -> { + currentVersionObject.setString("upgradeBudget", t.upgradeBudget().toString()); + currentVersionObject.setLong("scheduledAt", t.scheduledAt().toEpochMilli()); + Optional<Change> nextChange = osUpgradeScheduler.changeIn(t.osVersion().cloud()); + nextChange.ifPresent(c -> { + currentVersionObject.setString("nextVersion", c.version().toFullString()); + currentVersionObject.setLong("nextScheduledAt", c.scheduleAt().toEpochMilli()); + }); + }); + currentVersionObject.setString("cloud", osVersion.cloud().value()); Cursor nodesArray = currentVersionObject.setArray("nodes"); nodeVersions.forEach(nodeVersion -> { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiHandler.java index fce2d283da2..a407e5aa211 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiHandler.java @@ -111,7 +111,6 @@ public class UserApiHandler extends ThreadedHttpRequestHandler { private HttpResponse handlePOST(Path path, HttpRequest request) { if (path.matches("/user/v1/tenant/{tenant}")) return addTenantRoleMember(path.get("tenant"), request); - if (path.matches("/user/v1/tenant/{tenant}/application/{application}")) return addApplicationRoleMember(path.get("tenant"), path.get("application"), request); return ErrorResponse.notFoundError(Text.format("No '%s' handler at '%s'", request.getMethod(), request.getUri().getPath())); @@ -119,7 +118,6 @@ public class UserApiHandler extends ThreadedHttpRequestHandler { private HttpResponse handleDELETE(Path path, HttpRequest request) { if (path.matches("/user/v1/tenant/{tenant}")) return removeTenantRoleMember(path.get("tenant"), request); - if (path.matches("/user/v1/tenant/{tenant}/application/{application}")) return removeApplicationRoleMember(path.get("tenant"), path.get("application"), request); return ErrorResponse.notFoundError(Text.format("No '%s' handler at '%s'", request.getMethod(), request.getUri().getPath())); @@ -255,21 +253,6 @@ public class UserApiHandler extends ThreadedHttpRequestHandler { private HttpResponse addTenantRoleMember(String tenantName, HttpRequest request) { Inspector requestObject = bodyInspector(request); - if (requestObject.field("roles").valid()) { - return addMultipleTenantRoleMembers(tenantName, requestObject); - } - return addTenantRoleMember(tenantName, requestObject); - } - - private HttpResponse addTenantRoleMember(String tenantName, Inspector requestObject) { - String roleName = require("roleName", Inspector::asString, requestObject); - UserId user = new UserId(require("user", Inspector::asString, requestObject)); - Role role = Roles.toRole(TenantName.from(tenantName), roleName); - users.addUsers(role, List.of(user)); - return new MessageResponse(user + " is now a member of " + role); - } - - private HttpResponse addMultipleTenantRoleMembers(String tenantName, Inspector requestObject) { var tenant = TenantName.from(tenantName); var user = new UserId(require("user", Inspector::asString, requestObject)); var roles = SlimeStream.fromArray(requestObject.field("roles"), Inspector::asString) @@ -280,37 +263,8 @@ public class UserApiHandler extends ThreadedHttpRequestHandler { return new MessageResponse(user + " is now a member of " + roles.stream().map(Role::toString).collect(Collectors.joining(", "))); } - private HttpResponse addApplicationRoleMember(String tenantName, String applicationName, HttpRequest request) { - Inspector requestObject = bodyInspector(request); - String roleName = require("roleName", Inspector::asString, requestObject); - UserId user = new UserId(require("user", Inspector::asString, requestObject)); - Role role = Roles.toRole(TenantName.from(tenantName), ApplicationName.from(applicationName), roleName); - users.addUsers(role, List.of(user)); - return new MessageResponse(user + " is now a member of " + role); - } - private HttpResponse removeTenantRoleMember(String tenantName, HttpRequest request) { Inspector requestObject = bodyInspector(request); - if (requestObject.field("roles").valid()) { - return removeMultipleTenantRoleMembers(tenantName, requestObject); - } - return removeTenantRoleMember(tenantName, requestObject); - } - - private HttpResponse removeTenantRoleMember(String tenantName, Inspector requestObject) { - TenantName tenant = TenantName.from(tenantName); - String roleName = require("roleName", Inspector::asString, requestObject); - UserId user = new UserId(require("user", Inspector::asString, requestObject)); - List<Role> roles = Collections.singletonList(Roles.toRole(tenant, roleName)); - - enforceLastAdminOfTenant(tenant, user, roles); - removeDeveloperKey(tenant, user, roles); - users.removeFromRoles(user, roles); - - return new MessageResponse(user + " is no longer a member of " + roles.stream().map(Role::toString).collect(Collectors.joining(", "))); - } - - private HttpResponse removeMultipleTenantRoleMembers(String tenantName, Inspector requestObject) { var tenant = TenantName.from(tenantName); var user = new UserId(require("user", Inspector::asString, requestObject)); var roles = SlimeStream.fromArray(requestObject.field("roles"), Inspector::asString) @@ -321,6 +275,11 @@ public class UserApiHandler extends ThreadedHttpRequestHandler { removeDeveloperKey(tenant, user, roles); users.removeFromRoles(user, roles); + controller.tenants().lockIfPresent(tenant, LockedTenant.class, lockedTenant -> { + if (lockedTenant instanceof LockedTenant.Cloud cloudTenant) + controller.tenants().store(cloudTenant.withInvalidateUserSessionsBefore(controller.clock().instant())); + }); + return new MessageResponse(user + " is no longer a member of " + roles.stream().map(Role::toString).collect(Collectors.joining(", "))); } @@ -348,15 +307,6 @@ public class UserApiHandler extends ThreadedHttpRequestHandler { } } - private HttpResponse removeApplicationRoleMember(String tenantName, String applicationName, HttpRequest request) { - Inspector requestObject = bodyInspector(request); - String roleName = require("roleName", Inspector::asString, requestObject); - UserId user = new UserId(require("user", Inspector::asString, requestObject)); - Role role = Roles.toRole(TenantName.from(tenantName), ApplicationName.from(applicationName), roleName); - users.removeUsers(role, List.of(user)); - return new MessageResponse(user + " is no longer a member of " + role); - } - private boolean hasTrialCapacity() { if (! controller.system().isPublic()) return true; var existing = controller.tenants().asList().stream().map(Tenant::name).collect(Collectors.toList()); @@ -384,18 +334,12 @@ public class UserApiHandler extends ThreadedHttpRequestHandler { } private static Collection<TenantRole> filterTenantRoles(Role role) { - if (!(role instanceof TenantRole)) - return Set.of(); - - TenantRole tenantRole = (TenantRole) role; - if (tenantRole.definition() == RoleDefinition.administrator - || tenantRole.definition() == RoleDefinition.developer - || tenantRole.definition() == RoleDefinition.reader) - return Set.of(tenantRole); - - if (tenantRole.definition() == RoleDefinition.athenzTenantAdmin) - return Roles.tenantRoles(tenantRole.tenant()); - + if (role instanceof TenantRole tenantRole) { + switch (tenantRole.definition()) { + case administrator, developer, reader, hostedDeveloper: return Set.of(tenantRole); + case athenzTenantAdmin: return Roles.tenantRoles(tenantRole.tenant()); + } + } return Set.of(); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/security/CloudUserSessionManager.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/security/CloudUserSessionManager.java new file mode 100644 index 00000000000..e2b5083abae --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/security/CloudUserSessionManager.java @@ -0,0 +1,50 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.security; + +import com.yahoo.config.provision.TenantName; +import com.yahoo.vespa.flags.LongFlag; +import com.yahoo.vespa.flags.PermanentFlags; +import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.TenantController; +import com.yahoo.vespa.hosted.controller.api.integration.user.UserSessionManager; +import com.yahoo.vespa.hosted.controller.api.role.SecurityContext; +import com.yahoo.vespa.hosted.controller.api.role.TenantRole; +import com.yahoo.vespa.hosted.controller.tenant.CloudTenant; + +import java.time.Instant; + +/** + * @author freva + */ +public class CloudUserSessionManager implements UserSessionManager { + + private final TenantController tenantController; + private final LongFlag invalidateConsoleSessions; + + public CloudUserSessionManager(Controller controller) { + this.tenantController = controller.tenants(); + this.invalidateConsoleSessions = PermanentFlags.INVALIDATE_CONSOLE_SESSIONS.bindTo(controller.flagSource()); + } + + @Override + public boolean shouldExpireSessionFor(SecurityContext context) { + if (context.issuedAt().isBefore(Instant.ofEpochSecond(invalidateConsoleSessions.value()))) + return true; + + return context.roles().stream() + .filter(TenantRole.class::isInstance) + .map(TenantRole.class::cast) + .map(TenantRole::tenant) + .distinct() + .anyMatch(tenantName -> shouldExpireSessionFor(tenantName, context.issuedAt())); + } + + private boolean shouldExpireSessionFor(TenantName tenantName, Instant contextIssuedAt) { + return tenantController.get(tenantName) + .filter(CloudTenant.class::isInstance) + .map(CloudTenant.class::cast) + .flatMap(CloudTenant::invalidateUserSessionsBefore) + .map(contextIssuedAt::isBefore) + .orElse(false); + } +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java index 8ee891ae8a6..6f9888b79e0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.versions; import com.google.common.collect.ImmutableMap; import com.yahoo.component.Version; import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.zone.UpgradePolicy; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter; @@ -66,7 +67,7 @@ public record OsVersionStatus(Map<OsVersion, List<NodeVersion>> versions) { .orElse(Version.emptyVersion); for (var node : controller.serviceRegistry().configServer().nodeRepository().list(zone.getVirtualId(), NodeFilter.all().applications(application.id()))) { - if (!OsUpgrader.canUpgrade(node)) continue; + if (!OsUpgrader.canUpgrade(node, true)) continue; Optional<Instant> suspendedAt = node.suspendedSince(); NodeVersion nodeVersion = new NodeVersion(node.hostname(), zone.getVirtualId(), node.currentOsVersion(), targetOsVersion, suspendedAt); @@ -83,6 +84,7 @@ public record OsVersionStatus(Map<OsVersion, List<NodeVersion>> versions) { private static List<ZoneApi> zonesToUpgrade(Controller controller) { return controller.zoneRegistry().osUpgradePolicies().stream() .flatMap(upgradePolicy -> upgradePolicy.steps().stream()) + .map(UpgradePolicy.Step::zones) .flatMap(Collection::stream) .toList(); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java index 7f33f612cd0..e078df0267f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java @@ -15,7 +15,8 @@ import static com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy; /** * Information about a particular Vespa version. - * VespaVersions are identified by their version number and ordered by increasing version numbers. + * + * Vespa versions are identified by their version number and ordered by increasing version numbers. * * @author bratseth */ @@ -29,8 +30,11 @@ public record VespaVersion(Version version, Confidence confidence) implements Comparable<VespaVersion> { public static Confidence confidenceFrom(DeploymentStatistics statistics, Controller controller) { + int thisMajorVersion = statistics.version().getMajor(); + int defaultMajorVersion = controller.applications().targetMajorVersion().orElse(thisMajorVersion); InstanceList all = InstanceList.from(controller.jobController().deploymentStatuses(ApplicationList.from(controller.applications().asList()) - .withProductionDeployment())); + .withProductionDeployment())) + .allowingMajorVersion(thisMajorVersion, defaultMajorVersion); // 'production on this': All production deployment jobs upgrading to this version have completed without failure InstanceList productionOnThis = all.matching(instance -> statistics.productionSuccesses().stream().anyMatch(run -> run.id().application().equals(instance))) .not().failingUpgrade() |