// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;
import com.google.common.collect.Sets;
import com.yahoo.component.annotation.Inject;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.container.jdisc.secretstore.SecretNotFoundException;
import com.yahoo.container.jdisc.secretstore.SecretStore;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.Instance;
import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateDetails;
import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateMetadata;
import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateProvider;
import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateRequestMetadata;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger;
import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
/**
* Updates refreshed endpoint certificates and triggers redeployment, and deletes unused certificates.
*
* See also class EndpointCertificates, which provisions, reprovisions and validates certificates on deploy
*
* @author andreer
*/
public class EndpointCertificateMaintainer extends ControllerMaintainer {
private static final Logger log = Logger.getLogger(EndpointCertificateMaintainer.class.getName());
private final DeploymentTrigger deploymentTrigger;
private final Clock clock;
private final CuratorDb curator;
private final SecretStore secretStore;
private final EndpointCertificateProvider endpointCertificateProvider;
final Comparator oldestFirst = Comparator.comparing(e -> e.deployment.at());
@Inject
public EndpointCertificateMaintainer(Controller controller, Duration interval) {
super(controller, interval);
this.deploymentTrigger = controller.applications().deploymentTrigger();
this.clock = controller.clock();
this.secretStore = controller.secretStore();
this.curator = controller().curator();
this.endpointCertificateProvider = controller.serviceRegistry().endpointCertificateProvider();
}
@Override
protected double maintain() {
try {
// In order of importance
deployRefreshedCertificates();
updateRefreshedCertificates();
deleteUnusedCertificates();
deleteOrReportUnmanagedCertificates();
} catch (Exception e) {
log.log(Level.SEVERE, "Exception caught while maintaining endpoint certificates", e);
return 0.0;
}
return 1.0;
}
private void updateRefreshedCertificates() {
curator.readAllEndpointCertificateMetadata().forEach(((applicationId, endpointCertificateMetadata) -> {
// Look for and use refreshed certificate
var latestAvailableVersion = latestVersionInSecretStore(endpointCertificateMetadata);
if (latestAvailableVersion.isPresent() && latestAvailableVersion.getAsInt() > endpointCertificateMetadata.version()) {
var refreshedCertificateMetadata = endpointCertificateMetadata
.withVersion(latestAvailableVersion.getAsInt())
.withLastRefreshed(clock.instant().getEpochSecond());
try (Mutex lock = lock(applicationId)) {
if (Optional.of(endpointCertificateMetadata).equals(curator.readEndpointCertificateMetadata(applicationId))) {
curator.writeEndpointCertificateMetadata(applicationId, refreshedCertificateMetadata); // Certificate not validated here, but on deploy.
}
}
}
}));
}
record EligibleJob(Deployment deployment, ApplicationId applicationId, JobType job) {}
/**
* If it's been four days since the cert has been refreshed, re-trigger prod deployment jobs (one at a time).
*/
private void deployRefreshedCertificates() {
var now = clock.instant();
var eligibleJobs = new ArrayList();
curator.readAllEndpointCertificateMetadata().forEach((applicationId, endpointCertificateMetadata) ->
endpointCertificateMetadata.lastRefreshed().ifPresent(lastRefreshTime -> {
Instant refreshTime = Instant.ofEpochSecond(lastRefreshTime);
if (now.isAfter(refreshTime.plus(4, ChronoUnit.DAYS))) {
controller().applications().getInstance(applicationId)
.ifPresent(instance -> instance.productionDeployments().forEach((zone, deployment) -> {
if (deployment.at().isBefore(refreshTime)) {
JobType job = JobType.deploymentTo(zone);
eligibleJobs.add(new EligibleJob(deployment, applicationId, job));
}
}));
}
}));
eligibleJobs.stream()
.min(oldestFirst)
.ifPresent(e -> {
deploymentTrigger.reTrigger(e.applicationId, e.job, "re-triggered by EndpointCertificateMaintainer");
log.info("Re-triggering deployment job " + e.job.jobName() + " for instance " +
e.applicationId.serializedForm() + " to roll out refreshed endpoint certificate");
});
}
private OptionalInt latestVersionInSecretStore(EndpointCertificateMetadata originalCertificateMetadata) {
try {
var certVersions = new HashSet<>(secretStore.listSecretVersions(originalCertificateMetadata.certName()));
var keyVersions = new HashSet<>(secretStore.listSecretVersions(originalCertificateMetadata.keyName()));
return Sets.intersection(certVersions, keyVersions).stream().mapToInt(Integer::intValue).max();
} catch (SecretNotFoundException s) {
return OptionalInt.empty(); // Likely because the certificate is very recently provisioned - keep current version
}
}
private void deleteUnusedCertificates() {
var oneMonthAgo = clock.instant().minus(30, ChronoUnit.DAYS);
curator.readAllEndpointCertificateMetadata().forEach((applicationId, storedMetaData) -> {
var lastRequested = Instant.ofEpochSecond(storedMetaData.lastRequested());
if (lastRequested.isBefore(oneMonthAgo) && hasNoDeployments(applicationId)) {
try (Mutex lock = lock(applicationId)) {
if (Optional.of(storedMetaData).equals(curator.readEndpointCertificateMetadata(applicationId))) {
log.log(Level.INFO, "Cert for app " + applicationId.serializedForm()
+ " has not been requested in a month and app has no deployments, deleting from provider and ZK");
endpointCertificateProvider.deleteCertificate(applicationId, storedMetaData.rootRequestId());
curator.deleteEndpointCertificateMetadata(applicationId);
}
}
}
});
}
private Mutex lock(ApplicationId applicationId) {
return curator.lock(TenantAndApplicationId.from(applicationId));
}
private boolean hasNoDeployments(ApplicationId applicationId) {
return controller().applications().getInstance(applicationId)
.map(Instance::deployments)
.orElseGet(Map::of)
.isEmpty();
}
private void deleteOrReportUnmanagedCertificates() {
List endpointCertificateMetadata = endpointCertificateProvider.listCertificates();
Map storedEndpointCertificateMetadata = curator.readAllEndpointCertificateMetadata();
List leafRequestIds = storedEndpointCertificateMetadata.values().stream().flatMap(m -> m.leafRequestId().stream()).toList();
List rootRequestIds = storedEndpointCertificateMetadata.values().stream().map(EndpointCertificateMetadata::rootRequestId).toList();
for (var providerCertificateMetadata : endpointCertificateMetadata) {
if (!rootRequestIds.contains(providerCertificateMetadata.requestId()) && !leafRequestIds.contains(providerCertificateMetadata.requestId())) {
// It could just be a refresh we're not aware of yet. See if it matches the cert/keyname of any known cert
EndpointCertificateDetails unknownCertDetails = endpointCertificateProvider.certificateDetails(providerCertificateMetadata.requestId());
boolean matchFound = false;
for (Map.Entry storedAppEntry : storedEndpointCertificateMetadata.entrySet()) {
ApplicationId storedApp = storedAppEntry.getKey();
EndpointCertificateMetadata storedAppMetadata = storedAppEntry.getValue();
if (storedAppMetadata.certName().equals(unknownCertDetails.cert_key_keyname())) {
matchFound = true;
try (Mutex lock = lock(storedApp)) {
if (Optional.of(storedAppMetadata).equals(curator.readEndpointCertificateMetadata(storedApp))) {
log.log(Level.INFO, "Cert for app " + storedApp.serializedForm()
+ " has a new leafRequestId " + unknownCertDetails.request_id() + ", updating in ZK");
curator.writeEndpointCertificateMetadata(storedApp, storedAppMetadata.withLeafRequestId(Optional.of(unknownCertDetails.request_id())));
}
break;
}
}
}
if (!matchFound) {
// The certificate is not known - however it could be in the process of being requested by us or another controller.
// So we only delete if it was requested more than 7 days ago.
if (Instant.parse(providerCertificateMetadata.createTime()).isBefore(Instant.now().minus(7, ChronoUnit.DAYS))) {
log.log(Level.INFO, String.format("Deleting unmaintained certificate with request_id %s and SANs %s",
providerCertificateMetadata.requestId(),
providerCertificateMetadata.dnsNames().stream().map(d -> d.dnsName).collect(Collectors.joining(", "))));
endpointCertificateProvider.deleteCertificate(ApplicationId.fromSerializedForm("applicationid:is:unknown"), providerCertificateMetadata.requestId());
}
}
}
}
}
}