// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.maintenance; import com.google.common.collect.Sets; import com.yahoo.component.annotation.Inject; import com.yahoo.config.application.api.DeploymentSpec; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.InstanceName; import com.yahoo.container.jdisc.secretstore.SecretNotFoundException; import com.yahoo.container.jdisc.secretstore.SecretStore; import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.flags.BooleanFlag; import com.yahoo.vespa.flags.FetchVector; import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.IntFlag; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.flags.StringFlag; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificate; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateDetails; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateProvider; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateRequest; import com.yahoo.vespa.hosted.controller.application.Endpoint; import com.yahoo.vespa.hosted.controller.application.GeneratedEndpoint; import com.yahoo.vespa.hosted.controller.certificate.UnassignedCertificate; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.api.integration.secrets.EndpointSecretManager; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.certificate.AssignedCertificate; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.OptionalInt; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; import java.util.stream.Stream; /** * Updates refreshed endpoint certificates and triggers redeployment, and deletes unused certificates. *

* See also class EndpointCertificates, which provisions, reprovisions and validates certificates on deploy * * @author andreer */ public class EndpointCertificateMaintainer extends ControllerMaintainer { private static final Logger log = Logger.getLogger(EndpointCertificateMaintainer.class.getName()); private final DeploymentTrigger deploymentTrigger; private final Clock clock; private final CuratorDb curator; private final SecretStore secretStore; private final EndpointSecretManager endpointSecretManager; private final EndpointCertificateProvider endpointCertificateProvider; final Comparator oldestFirst = Comparator.comparing(e -> e.deployment.at()); private final StringFlag endpointCertificateAlgo; private final BooleanFlag useAlternateCertProvider; private final IntFlag assignRandomizedIdRate; @Inject public EndpointCertificateMaintainer(Controller controller, Duration interval) { super(controller, interval); this.deploymentTrigger = controller.applications().deploymentTrigger(); this.clock = controller.clock(); this.secretStore = controller.secretStore(); this.endpointSecretManager = controller.serviceRegistry().secretManager(); this.curator = controller().curator(); this.endpointCertificateProvider = controller.serviceRegistry().endpointCertificateProvider(); this.useAlternateCertProvider = PermanentFlags.USE_ALTERNATIVE_ENDPOINT_CERTIFICATE_PROVIDER.bindTo(controller.flagSource()); this.endpointCertificateAlgo = PermanentFlags.ENDPOINT_CERTIFICATE_ALGORITHM.bindTo(controller.flagSource()); this.assignRandomizedIdRate = Flags.ASSIGNED_RANDOMIZED_ID_RATE.bindTo(controller.flagSource()); } @Override protected double maintain() { try { // In order of importance deployRefreshedCertificates(); updateRefreshedCertificates(); deleteUnusedCertificates(); deleteOrReportUnmanagedCertificates(); assignRandomizedIds(); } catch (Exception e) { log.log(Level.SEVERE, "Exception caught while maintaining endpoint certificates", e); return 1.0; } return 0.0; } private void updateRefreshedCertificates() { curator.readAssignedCertificates().forEach(assignedCertificate -> { // Look for and use refreshed certificate var latestAvailableVersion = latestVersionInSecretStore(assignedCertificate.certificate()); if (latestAvailableVersion.isPresent() && latestAvailableVersion.getAsInt() > assignedCertificate.certificate().version()) { var refreshedCertificateMetadata = assignedCertificate.certificate() .withVersion(latestAvailableVersion.getAsInt()) .withLastRefreshed(clock.instant().getEpochSecond()); try (Mutex lock = lock(assignedCertificate.application())) { if (unchanged(assignedCertificate, lock)) { try (NestedTransaction transaction = new NestedTransaction()) { curator.writeAssignedCertificate(assignedCertificate.with(refreshedCertificateMetadata), transaction); // Certificate not validated here, but on deploy. transaction.commit(); } } } } }); } private boolean unchanged(AssignedCertificate assignedCertificate, @SuppressWarnings("unused") Mutex lock) { return Optional.of(assignedCertificate).equals(curator.readAssignedCertificate(assignedCertificate.application(), assignedCertificate.instance())); } record EligibleJob(Deployment deployment, ApplicationId applicationId, JobType job) {} /** * If it's been four days since the cert has been refreshed, re-trigger prod deployment jobs (one at a time). */ private void deployRefreshedCertificates() { var now = clock.instant(); var eligibleJobs = new ArrayList(); curator.readAssignedCertificates().forEach(assignedCertificate -> assignedCertificate.certificate().lastRefreshed().ifPresent(lastRefreshTime -> { Instant refreshTime = Instant.ofEpochSecond(lastRefreshTime); if (now.isAfter(refreshTime.plus(4, ChronoUnit.DAYS))) { if (assignedCertificate.instance().isPresent()) { ApplicationId applicationId = assignedCertificate.application().instance(assignedCertificate.instance().get()); controller().applications().getInstance(applicationId) .ifPresent(instance -> instance.productionDeployments().forEach((zone, deployment) -> { if (deployment.at().isBefore(refreshTime)) { JobType job = JobType.deploymentTo(zone); eligibleJobs.add(new EligibleJob(deployment, applicationId, job)); } })); } else { // This is an application-wide certificate. Trigger all instances controller().applications().getApplication(assignedCertificate.application()).ifPresent(application -> { application.instances().forEach((ignored, i) -> { i.productionDeployments().forEach((zone, deployment) -> { if (deployment.at().isBefore(refreshTime)) { JobType job = JobType.deploymentTo(zone); eligibleJobs.add(new EligibleJob(deployment, i.id(), job)); } }); }); }); } } })); eligibleJobs.stream() .min(oldestFirst) .ifPresent(e -> { deploymentTrigger.reTrigger(e.applicationId, e.job, "re-triggered by EndpointCertificateMaintainer"); log.info("Re-triggering deployment job " + e.job.jobName() + " for instance " + e.applicationId.serializedForm() + " to roll out refreshed endpoint certificate"); }); } private OptionalInt latestVersionInSecretStore(EndpointCertificate originalCertificateMetadata) { try { var certVersions = new HashSet<>(secretStore.listSecretVersions(originalCertificateMetadata.certName())); var keyVersions = new HashSet<>(secretStore.listSecretVersions(originalCertificateMetadata.keyName())); return Sets.intersection(certVersions, keyVersions).stream().mapToInt(Integer::intValue).max(); } catch (SecretNotFoundException s) { return OptionalInt.empty(); // Likely because the certificate is very recently provisioned - keep current version } } private void deleteUnusedCertificates() { var oneMonthAgo = clock.instant().minus(30, ChronoUnit.DAYS); curator.readAssignedCertificates().forEach(assignedCertificate -> { EndpointCertificate certificate = assignedCertificate.certificate(); var lastRequested = Instant.ofEpochSecond(certificate.lastRequested()); if (lastRequested.isBefore(oneMonthAgo) && hasNoDeployments(assignedCertificate.application())) { try (Mutex lock = lock(assignedCertificate.application())) { if (unchanged(assignedCertificate, lock)) { log.log(Level.INFO, "Cert for app " + asString(assignedCertificate.application(), assignedCertificate.instance()) + " has not been requested in a month and app has no deployments, deleting from provider, ZK and secret store"); endpointCertificateProvider.deleteCertificate(certificate.rootRequestId()); curator.removeAssignedCertificate(assignedCertificate.application(), assignedCertificate.instance()); endpointSecretManager.deleteSecret(certificate.certName()); endpointSecretManager.deleteSecret(certificate.keyName()); } } } }); } private Mutex lock(TenantAndApplicationId application) { return curator.lock(application); } private boolean hasNoDeployments(TenantAndApplicationId application) { Optional app = controller().applications().getApplication(application); if (app.isEmpty()) return true; for (var instance : app.get().instances().values()) { if (!instance.deployments().isEmpty()) return false; } return true; } private void deleteOrReportUnmanagedCertificates() { List requests = endpointCertificateProvider.listCertificates(); List assignedCertificates = curator.readAssignedCertificates(); List leafRequestIds = assignedCertificates.stream().map(AssignedCertificate::certificate).flatMap(m -> m.leafRequestId().stream()).toList(); List rootRequestIds = assignedCertificates.stream().map(AssignedCertificate::certificate).map(EndpointCertificate::rootRequestId).toList(); List unassignedCertificates = curator.readUnassignedCertificates(); List certPoolRootIds = unassignedCertificates.stream().map(p -> p.certificate().leafRequestId()).flatMap(Optional::stream).toList(); List certPoolLeafIds = unassignedCertificates.stream().map(p -> p.certificate().rootRequestId()).toList(); var managedIds = new HashSet(); managedIds.addAll(leafRequestIds); managedIds.addAll(rootRequestIds); managedIds.addAll(certPoolRootIds); managedIds.addAll(certPoolLeafIds); for (var request : requests) { if (!managedIds.contains(request.requestId())) { // It could just be a refresh we're not aware of yet. See if it matches the cert/keyname of any known cert EndpointCertificateDetails unknownCertDetails = endpointCertificateProvider.certificateDetails(request.requestId()); boolean matchFound = false; for (AssignedCertificate assignedCertificate : assignedCertificates) { if (assignedCertificate.certificate().certName().equals(unknownCertDetails.certKeyKeyname())) { matchFound = true; try (Mutex lock = lock(assignedCertificate.application())) { if (unchanged(assignedCertificate, lock)) { log.log(Level.INFO, "Cert for app " + asString(assignedCertificate.application(), assignedCertificate.instance()) + " has a new leafRequestId " + unknownCertDetails.requestId() + ", updating in ZK"); try (NestedTransaction transaction = new NestedTransaction()) { EndpointCertificate updated = assignedCertificate.certificate().withLeafRequestId(Optional.of(unknownCertDetails.requestId())); curator.writeAssignedCertificate(assignedCertificate.with(updated), transaction); transaction.commit(); } } break; } } } if (!matchFound) { // The certificate is not known - however it could be in the process of being requested by us or another controller. // So we only delete if it was requested more than 7 days ago. if (Instant.parse(request.createTime()).isBefore(Instant.now().minus(7, ChronoUnit.DAYS))) { log.log(Level.INFO, String.format("Deleting unmaintained certificate with request_id %s and SANs %s", request.requestId(), request.dnsNames().stream().map(EndpointCertificateRequest.DnsNameStatus::dnsName).collect(Collectors.joining(", ")))); endpointCertificateProvider.deleteCertificate(request.requestId()); } } } } } private void assignRandomizedIds() { List assignedCertificates = curator.readAssignedCertificates(); /* only assign randomized id if: * instance is present * randomized id is not already assigned * feature flag is enabled */ assignedCertificates.stream() .filter(c -> c.instance().isPresent()) .filter(c -> c.certificate().randomizedId().isEmpty()) .filter(c -> controller().applications().getApplication(c.application()).isPresent()) // In case application has been deleted, but certificate is pending deletion .limit(assignRandomizedIdRate.value()) .forEach(c -> assignRandomizedId(c.application(), c.instance().get())); } /* Assign randomized id according to these rules: * Instance is not mentioned in the deployment spec for this application -> assume this is a manual deployment. Assign a randomized id to the certificate, save using instance only * Instance is mentioned in deployment spec: -> If there is a random endpoint assigned to tenant:application -> use this also for the "instance" certificate -> Otherwise assign a random endpoint and write to the application and the instance. */ private void assignRandomizedId(TenantAndApplicationId tenantAndApplicationId, InstanceName instanceName) { Optional assignedCertificate = curator.readAssignedCertificate(tenantAndApplicationId, Optional.of(instanceName)); if (assignedCertificate.isEmpty()) { log.log(Level.INFO, "Assigned certificate missing for " + tenantAndApplicationId.instance(instanceName).toFullString() + " when assigning randomized id"); } // Verify that the assigned certificate still does not have randomized id assigned if (assignedCertificate.get().certificate().randomizedId().isPresent()) return; controller().applications().lockApplicationOrThrow(tenantAndApplicationId, application -> { DeploymentSpec deploymentSpec = application.get().deploymentSpec(); if (deploymentSpec.instance(instanceName).isPresent()) { Optional applicationLevelAssignedCertificate = curator.readAssignedCertificate(tenantAndApplicationId, Optional.empty()); assignApplicationRandomId(assignedCertificate.get(), applicationLevelAssignedCertificate); } else { assignInstanceRandomId(assignedCertificate.get()); } }); } private void assignApplicationRandomId(AssignedCertificate instanceLevelAssignedCertificate, Optional applicationLevelAssignedCertificate) { TenantAndApplicationId tenantAndApplicationId = instanceLevelAssignedCertificate.application(); if (applicationLevelAssignedCertificate.isPresent()) { // Application level assigned certificate with randomized id already exists. Copy randomized id to instance level certificate and request with random names. EndpointCertificate withRandomNames = requestRandomNames( tenantAndApplicationId, instanceLevelAssignedCertificate.instance(), applicationLevelAssignedCertificate.get().certificate().randomizedId() .orElseThrow(() -> new IllegalArgumentException("Application certificate already assigned to " + tenantAndApplicationId.toString() + ", but random id is missing")), Optional.of(instanceLevelAssignedCertificate.certificate())); AssignedCertificate assignedCertWithRandomNames = instanceLevelAssignedCertificate.with(withRandomNames); curator.writeAssignedCertificate(assignedCertWithRandomNames); } else { // No application level certificate exists, generate new assigned certificate with the randomized id based names only, then request same names also for instance level cert String randomId = generateRandomId(); EndpointCertificate applicationLevelEndpointCert = requestRandomNames(tenantAndApplicationId, Optional.empty(), randomId, Optional.empty()); AssignedCertificate applicationLevelCert = new AssignedCertificate(tenantAndApplicationId, Optional.empty(), applicationLevelEndpointCert); EndpointCertificate instanceLevelEndpointCert = requestRandomNames(tenantAndApplicationId, instanceLevelAssignedCertificate.instance(), randomId, Optional.of(instanceLevelAssignedCertificate.certificate())); instanceLevelAssignedCertificate = instanceLevelAssignedCertificate.with(instanceLevelEndpointCert); // Save both in transaction try (NestedTransaction transaction = new NestedTransaction()) { curator.writeAssignedCertificate(instanceLevelAssignedCertificate, transaction); curator.writeAssignedCertificate(applicationLevelCert, transaction); transaction.commit(); } } } private void assignInstanceRandomId(AssignedCertificate assignedCertificate) { String randomId = generateRandomId(); EndpointCertificate withRandomNames = requestRandomNames(assignedCertificate.application(), assignedCertificate.instance(), randomId, Optional.of(assignedCertificate.certificate())); AssignedCertificate assignedCertWithRandomNames = assignedCertificate.with(withRandomNames); curator.writeAssignedCertificate(assignedCertWithRandomNames); } private EndpointCertificate requestRandomNames(TenantAndApplicationId tenantAndApplicationId, Optional instanceName, String randomId, Optional previousRequest) { String dnsSuffix = Endpoint.dnsSuffix(controller().system()); List newSanDnsEntries = List.of( "*.%s.z%s".formatted(randomId, dnsSuffix), "*.%s.g%s".formatted(randomId, dnsSuffix), "*.%s.a%s".formatted(randomId, dnsSuffix)); List existingSanDnsEntries = previousRequest.map(EndpointCertificate::requestedDnsSans).orElse(List.of()); List requestNames = Stream.concat(existingSanDnsEntries.stream(), newSanDnsEntries.stream()).toList(); String key = instanceName.map(tenantAndApplicationId::instance).map(ApplicationId::toFullString).orElseGet(tenantAndApplicationId::toString); return endpointCertificateProvider.requestCaSignedCertificate( key, requestNames, previousRequest, endpointCertificateAlgo.value(), useAlternateCertProvider.value()) .withRandomizedId(randomId); } private String generateRandomId() { List unassignedIds = curator.readUnassignedCertificates().stream().map(UnassignedCertificate::id).toList(); List assignedIds = curator.readAssignedCertificates().stream().map(AssignedCertificate::certificate).map(EndpointCertificate::randomizedId).filter(Optional::isPresent).map(Optional::get).toList(); Set allIds = Stream.concat(unassignedIds.stream(), assignedIds.stream()).collect(Collectors.toSet()); String randomId; do { randomId = GeneratedEndpoint.createPart(controller().random(true)); } while (allIds.contains(randomId)); return randomId; } private static String asString(TenantAndApplicationId application, Optional instanceName) { return application.toString() + instanceName.map(name -> "." + name.value()).orElse(""); } }