diff options
23 files changed, 460 insertions, 203 deletions
diff --git a/config-proxy/pom.xml b/config-proxy/pom.xml index bd1f907f87e..476f5f99b86 100644 --- a/config-proxy/pom.xml +++ b/config-proxy/pom.xml @@ -74,12 +74,6 @@ <version>${project.version}</version> <scope>compile</scope> </dependency> - <dependency> - <groupId>com.yahoo.vespa</groupId> - <artifactId>testutil</artifactId> - <version>${project.version}</version> - <scope>test</scope> - </dependency> </dependencies> <build> <plugins> diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainer.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainer.java index a3dd8d27c0f..4ca5eb4ee90 100644 --- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainer.java +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainer.java @@ -1,17 +1,25 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.config.proxy.filedistribution; +import com.yahoo.io.IOUtils; import com.yahoo.vespa.filedistribution.FileDownloader; -import com.yahoo.vespa.filedistribution.maintenance.FileDistributionCleanup; import java.io.File; -import java.time.Clock; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.attribute.BasicFileAttributes; import java.time.Duration; +import java.time.Instant; +import java.util.Arrays; +import java.util.HashSet; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static java.nio.file.Files.readAttributes; /** - * Deletes file references and url downloads on disk that have not been used for some time + * Deletes cached file references and url downloads that have not been used for some time * * @author hmusum */ @@ -21,29 +29,20 @@ class CachedFilesMaintainer implements Runnable { private static final File defaultUrlDownloadDir = UrlDownloadRpcServer.downloadDir; private static final File defaultFileReferencesDownloadDir = FileDownloader.defaultDownloadDirectory; - private static final Duration defaultDurationToKeepFiles = Duration.ofDays(20); - private static final int defaultKeepCount = 20; + private static final Duration defaultDurationToKeepFiles = Duration.ofDays(14); private final File urlDownloadDir; private final File fileReferencesDownloadDir; private final Duration durationToKeepFiles; - private final FileDistributionCleanup cleanup; - private final int keepCount; // keep this many files no matter how old they are or when they were last accessed CachedFilesMaintainer() { - this(defaultFileReferencesDownloadDir, defaultUrlDownloadDir, defaultDurationToKeepFiles, Clock.systemUTC(), defaultKeepCount); + this(defaultFileReferencesDownloadDir, defaultUrlDownloadDir, defaultDurationToKeepFiles); } - CachedFilesMaintainer(File fileReferencesDownloadDir, - File urlDownloadDir, - Duration durationToKeepFiles, - Clock clock, - int keepCount) { + CachedFilesMaintainer(File fileReferencesDownloadDir, File urlDownloadDir, Duration durationToKeepFiles) { this.fileReferencesDownloadDir = fileReferencesDownloadDir; this.urlDownloadDir = urlDownloadDir; this.durationToKeepFiles = durationToKeepFiles; - this.cleanup = new FileDistributionCleanup(clock); - this.keepCount = keepCount; } @Override @@ -57,7 +56,35 @@ class CachedFilesMaintainer implements Runnable { } private void deleteUnusedFiles(File directory) { - cleanup.deleteUnusedFileReferences(directory, durationToKeepFiles, keepCount, Set.of()); + Instant deleteNotUsedSinceInstant = Instant.now().minus(durationToKeepFiles); + Set<String> filesOnDisk = new HashSet<>(); + File[] files = directory.listFiles(); + if (files != null) + filesOnDisk.addAll(Arrays.stream(files).map(File::getName).collect(Collectors.toSet())); + log.log(Level.FINE, () -> "Files on disk (in " + directory + "): " + filesOnDisk); + + Set<String> filesToDelete = filesOnDisk + .stream() + .filter(fileReference -> isFileLastModifiedBefore(new File(directory, fileReference), deleteNotUsedSinceInstant)) + .collect(Collectors.toSet()); + if (filesToDelete.size() > 0) { + log.log(Level.INFO, "Files that can be deleted in " + directory + " (not used since " + deleteNotUsedSinceInstant + "): " + filesToDelete); + filesToDelete.forEach(fileReference -> { + File file = new File(directory, fileReference); + if (!IOUtils.recursiveDeleteDir(file)) + log.log(Level.WARNING, "Could not delete " + file.getAbsolutePath()); + }); + } + } + + private boolean isFileLastModifiedBefore(File fileReference, Instant instant) { + BasicFileAttributes fileAttributes; + try { + fileAttributes = readAttributes(fileReference.toPath(), BasicFileAttributes.class); + return fileAttributes.lastModifiedTime().toInstant().isBefore(instant); + } catch (IOException e) { + throw new UncheckedIOException(e); + } } } diff --git a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainerTest.java b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainerTest.java index 835982be44c..a491a7b4fc4 100644 --- a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainerTest.java +++ b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainerTest.java @@ -2,17 +2,14 @@ package com.yahoo.vespa.config.proxy.filedistribution; import com.yahoo.io.IOUtils; -import com.yahoo.test.ManualClock; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; + import java.io.File; import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.file.Files; -import java.nio.file.attribute.FileTime; import java.time.Duration; -import java.util.stream.IntStream; +import java.time.Instant; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -22,12 +19,9 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; */ public class CachedFilesMaintainerTest { - private static final int numberToAlwaysKeep = 2; - private File cachedFileReferences; private File cachedDownloads; private CachedFilesMaintainer cachedFilesMaintainer; - private final ManualClock clock = new ManualClock(); @TempDir public File tempFolder; @@ -36,28 +30,28 @@ public class CachedFilesMaintainerTest { public void setup() throws IOException { cachedFileReferences = newFolder(tempFolder, "cachedFileReferences"); cachedDownloads = newFolder(tempFolder, "cachedDownloads"); - cachedFilesMaintainer = new CachedFilesMaintainer(cachedFileReferences, - cachedDownloads, - Duration.ofMinutes(2), - clock, - numberToAlwaysKeep); + cachedFilesMaintainer = new CachedFilesMaintainer(cachedFileReferences, cachedDownloads, Duration.ofMinutes(1)); } @Test - void require_old_files_to_be_deleted() { + void require_old_files_to_be_deleted() throws IOException { runMaintainerAndAssertFiles(0, 0); - clock.advance(Duration.ofSeconds(55)); - // Create file references and downloads - createFiles(); + File fileReference = writeFile(cachedFileReferences, "fileReference"); + File download = writeFile(cachedDownloads, "download"); + runMaintainerAndAssertFiles(1, 1); - runMaintainerAndAssertFiles(4, 4); + updateLastModifiedTimeStamp(fileReference, Instant.now().minus(Duration.ofMinutes(10))); + runMaintainerAndAssertFiles(0, 1); - clock.advance(Duration.ofMinutes(1)); - runMaintainerAndAssertFiles(3, 3); + updateLastModifiedTimeStamp(download, Instant.now().minus(Duration.ofMinutes(10))); + runMaintainerAndAssertFiles(0, 0); + } - clock.advance(Duration.ofMinutes(100)); - runMaintainerAndAssertFiles(numberToAlwaysKeep, numberToAlwaysKeep); + private void updateLastModifiedTimeStamp(File file, Instant instant) { + if (!file.setLastModified(instant.toEpochMilli())) { + throw new RuntimeException("Could not set last modified timestamp for '" + file.getAbsolutePath() + "'"); + } } private void runMaintainerAndAssertFiles(int fileReferenceCount, int downloadCount) { @@ -71,10 +65,10 @@ public class CachedFilesMaintainerTest { assertEquals(downloadCount, downloads.length); } - private void writeFileAndSetLastAccessedTime(File directory, String filename) throws IOException { + private File writeFile(File directory, String filename) throws IOException { File file = new File(directory, filename); IOUtils.writeFile(file, filename, false); - Files.setAttribute(file.toPath(), "lastAccessTime", FileTime.from(clock.instant())); + return file; } private static File newFolder(File root, String... subDirs) throws IOException { @@ -86,16 +80,4 @@ public class CachedFilesMaintainerTest { return result; } - private void createFiles() { - IntStream.of(0,1,2,3).forEach(i -> { - try { - writeFileAndSetLastAccessedTime(cachedFileReferences, "fileReference" + i); - writeFileAndSetLastAccessedTime(cachedDownloads, "download" + i); - clock.advance(Duration.ofMinutes(1)); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - }); - } - } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index f832c504526..2a15f724b29 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -4,9 +4,9 @@ package com.yahoo.vespa.config.server; import ai.vespa.http.DomainName; import ai.vespa.http.HttpURL; import ai.vespa.http.HttpURL.Query; +import com.yahoo.component.annotation.Inject; import com.yahoo.cloud.config.ConfigserverConfig; import com.yahoo.component.Version; -import com.yahoo.component.annotation.Inject; import com.yahoo.config.FileReference; import com.yahoo.config.application.api.ApplicationFile; import com.yahoo.config.application.api.ApplicationMetaData; @@ -83,18 +83,20 @@ import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.stats.LockStats; import com.yahoo.vespa.curator.stats.ThreadLockStats; import com.yahoo.vespa.defaults.Defaults; -import com.yahoo.vespa.filedistribution.maintenance.FileDistributionCleanup; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.orchestrator.Orchestrator; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.UncheckedIOException; import java.nio.file.Files; +import java.nio.file.attribute.BasicFileAttributes; import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -114,9 +116,11 @@ import static com.yahoo.config.model.api.container.ContainerServiceType.LOGSERVE import static com.yahoo.vespa.config.server.application.ConfigConvergenceChecker.ServiceListResponse; import static com.yahoo.vespa.config.server.application.ConfigConvergenceChecker.ServiceResponse; import static com.yahoo.vespa.config.server.filedistribution.FileDistributionUtil.fileReferenceExistsOnDisk; +import static com.yahoo.vespa.config.server.filedistribution.FileDistributionUtil.getFileReferencesOnDisk; import static com.yahoo.vespa.config.server.tenant.TenantRepository.HOSTED_VESPA_TENANT; import static com.yahoo.vespa.curator.Curator.CompletionWaiter; import static com.yahoo.yolean.Exceptions.uncheck; +import static java.nio.file.Files.readAttributes; /** * The API for managing applications. @@ -586,11 +590,30 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye return fileDistributionStatus.status(getApplication(applicationId), timeout); } - public void deleteUnusedFileDistributionReferences(File fileReferencesPath, Duration keepFileReferencesDuration) { - new FileDistributionCleanup(clock).deleteUnusedFileReferences(fileReferencesPath, keepFileReferencesDuration, getFileReferencesInUse()); + public List<String> deleteUnusedFileDistributionReferences(File fileReferencesPath, + Duration keepFileReferencesDuration, + int numberToAlwaysKeep) { + log.log(Level.FINE, () -> "Keep unused file references for " + keepFileReferencesDuration); + if (!fileReferencesPath.isDirectory()) throw new RuntimeException(fileReferencesPath + " is not a directory"); + + Set<String> fileReferencesInUse = getFileReferencesInUse(); + log.log(Level.FINE, () -> "File references in use : " + fileReferencesInUse); + + List<String> candidates = sortedUnusedFileReferences(fileReferencesPath, fileReferencesInUse, keepFileReferencesDuration); + // Do not delete the newest ones + List<String> fileReferencesToDelete = candidates.subList(0, Math.max(0, candidates.size() - numberToAlwaysKeep)); + if (fileReferencesToDelete.size() > 0) { + log.log(Level.FINE, () -> "Will delete file references not in use: " + fileReferencesToDelete); + fileReferencesToDelete.forEach(fileReference -> { + File file = new File(fileReferencesPath, fileReference); + if ( ! IOUtils.recursiveDeleteDir(file)) + log.log(Level.WARNING, "Could not delete " + file.getAbsolutePath()); + }); + } + return fileReferencesToDelete; } - Set<String> getFileReferencesInUse() { + private Set<String> getFileReferencesInUse() { Set<String> fileReferencesInUse = new HashSet<>(); for (var applicationId : listApplications()) { Application app = getApplication(applicationId); @@ -601,6 +624,18 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye return fileReferencesInUse; } + private List<String> sortedUnusedFileReferences(File fileReferencesPath, Set<String> fileReferencesInUse, Duration keepFileReferences) { + Set<String> fileReferencesOnDisk = getFileReferencesOnDisk(fileReferencesPath); + log.log(Level.FINE, () -> "File references on disk (in " + fileReferencesPath + "): " + fileReferencesOnDisk); + Instant instant = clock.instant().minus(keepFileReferences); + return fileReferencesOnDisk + .stream() + .filter(fileReference -> ! fileReferencesInUse.contains(fileReference)) + .filter(fileReference -> isLastFileAccessBefore(new File(fileReferencesPath, fileReference), instant)) + .sorted(Comparator.comparing(a -> lastAccessed(new File(fileReferencesPath, a)))) + .collect(Collectors.toList()); + } + public Set<FileReference> getFileReferences(ApplicationId applicationId) { return getOptionalApplication(applicationId).map(app -> app.getModel().fileReferences()).orElse(Set.of()); } @@ -651,6 +686,20 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye .collect(Collectors.toList()); } + private boolean isLastFileAccessBefore(File fileReference, Instant instant) { + return lastAccessed(fileReference).isBefore(instant); + } + + private Instant lastAccessed(File fileReference) { + BasicFileAttributes fileAttributes; + try { + fileAttributes = readAttributes(fileReference.toPath(), BasicFileAttributes.class); + return fileAttributes.lastAccessTime().toInstant(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + public Optional<String> getApplicationPackageReference(ApplicationId applicationId) { Optional<String> applicationPackage = Optional.empty(); Optional<Session> session = getActiveSession(applicationId); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/FileDistributionUtil.java b/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/FileDistributionUtil.java index c1de9b4e5f6..a1ddad7bfd4 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/FileDistributionUtil.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/FileDistributionUtil.java @@ -5,9 +5,12 @@ import com.yahoo.cloud.config.ConfigserverConfig; import com.yahoo.config.FileReference; import com.yahoo.net.HostName; import com.yahoo.vespa.config.server.ConfigServerSpec; -import com.yahoo.vespa.filedistribution.maintenance.FileDistributionCleanup; + import java.io.File; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; /** @@ -18,6 +21,17 @@ import java.util.stream.Collectors; */ public class FileDistributionUtil { + /** + * Returns all files in the given directory, non-recursive. + */ + public static Set<String> getFileReferencesOnDisk(File directory) { + Set<String> fileReferencesOnDisk = new HashSet<>(); + File[] filesOnDisk = directory.listFiles(); + if (filesOnDisk != null) + fileReferencesOnDisk.addAll(Arrays.stream(filesOnDisk).map(File::getName).collect(Collectors.toSet())); + return fileReferencesOnDisk; + } + public static List<String> getOtherConfigServersInCluster(ConfigserverConfig configserverConfig) { return ConfigServerSpec.fromConfig(configserverConfig) .stream() @@ -27,8 +41,7 @@ public class FileDistributionUtil { } public static boolean fileReferenceExistsOnDisk(File downloadDirectory, FileReference applicationPackageReference) { - return FileDistributionCleanup.getFileReferencesOnDisk(downloadDirectory.toPath()) - .anyMatch(fileReference -> fileReference.equals(applicationPackageReference.value())); + return getFileReferencesOnDisk(downloadDirectory).contains(applicationPackageReference.value()); } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java index 5542d24253b..f6aee416c9c 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java @@ -20,6 +20,8 @@ import java.time.Duration; */ public class FileDistributionMaintainer extends ConfigServerMaintainer { + private static final int numberToAlwaysKeep = 20; + private final ApplicationRepository applicationRepository; private final File fileReferencesDir; private final Duration maxUnusedFileReferenceAge; @@ -37,7 +39,7 @@ public class FileDistributionMaintainer extends ConfigServerMaintainer { @Override protected double maintain() { - applicationRepository.deleteUnusedFileDistributionReferences(fileReferencesDir, maxUnusedFileReferenceAge); + applicationRepository.deleteUnusedFileDistributionReferences(fileReferencesDir, maxUnusedFileReferenceAge, numberToAlwaysKeep); return 1.0; } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java index 4f7be104b9c..99487230c5d 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java @@ -49,7 +49,6 @@ import com.yahoo.vespa.config.server.tenant.TestTenantRepository; import com.yahoo.vespa.config.util.ConfigUtils; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.mock.MockCurator; -import com.yahoo.vespa.filedistribution.maintenance.FileDistributionCleanup; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.model.VespaModelFactory; import org.junit.Before; @@ -294,10 +293,9 @@ public class ApplicationRepositoryTest { PrepareParams prepareParams = new PrepareParams.Builder().applicationId(applicationId()).ignoreValidationErrors(true).build(); deployApp(new File("src/test/apps/app"), prepareParams); - List<String> toBeDeleted = new FileDistributionCleanup(clock).deleteUnusedFileReferences(fileReferencesDir, - keepFileReferencesDuration, - 2, - applicationRepository.getFileReferencesInUse()); + List<String> toBeDeleted = applicationRepository.deleteUnusedFileDistributionReferences(fileReferencesDir, + keepFileReferencesDuration, + 2); Collections.sort(toBeDeleted); assertEquals(List.of("bar0", "foo"), toBeDeleted); // bar0 and foo are the only ones that will be deleted (keeps 2 newest no matter how old they are) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index cf09afa7181..d5a31a07408 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -91,10 +91,31 @@ public class DeploymentTrigger { status, false)); } + + // If app has been broken since it was first submitted, and not fixed for a long time, we stop managing it until a new submission comes in. + if (applicationWasAlwaysBroken(status)) + application = application.withProjectId(OptionalLong.empty()); + applications().store(application); }); } + private boolean applicationWasAlwaysBroken(DeploymentStatus status) { + // If application has a production deployment, we cannot forget it. + if (status.application().instances().values().stream().anyMatch(instance -> ! instance.productionDeployments().isEmpty())) + return false; + + // Then, we need a job that always failed, and failed on the last revision for at least 30 days. + RevisionId last = status.application().revisions().last().get().id(); + Instant threshold = clock.instant().minus(Duration.ofDays(30)); + for (JobStatus job : status.jobs().asList()) + for (Run run : job.runs().descendingMap().values()) + if (run.hasEnded() && ! run.hasFailed() || ! run.versions().targetRevision().equals(last)) break; + else if (run.start().isBefore(threshold)) return true; + + return false; + } + /** * Records information when a job completes (successfully or not). This information is used when deciding what to * trigger next. @@ -339,8 +360,8 @@ public class DeploymentTrigger { /** Returns the set of all jobs which have changes to propagate from the upstream steps. */ private List<Job> computeReadyJobs() { return jobs.deploymentStatuses(ApplicationList.from(applications().readable()) - .withProjectId() // Need to keep this, as we have applications with deployment spec that shouldn't be orchestrated. // Maybe not any longer? - .withDeploymentSpec()) + .withProjectId() // Need to keep this, as we have applications with deployment spec that shouldn't be orchestrated. + .withJobs()) .withChanges() .asList().stream() .filter(status -> ! hasExceededQuota(status.application().id().tenant())) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 46c93c019ee..e685e5d167e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -77,6 +77,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.noTests; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.nodeAllocationFailure; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.reset; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.running; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.success; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.testFailure; import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded; import static com.yahoo.vespa.hosted.controller.deployment.Step.copyVespaLogs; @@ -763,14 +764,18 @@ public class InternalStepRunner implements StepRunner { private Optional<RunStatus> report(RunId id, DualLogger logger) { try { + boolean isRemoved = ! id.type().environment().isManuallyDeployed() + && ! controller.jobController().deploymentStatus(controller.applications().requireApplication(TenantAndApplicationId.from(id.application()))) + .jobSteps().containsKey(id.job()); + controller.jobController().active(id).ifPresent(run -> { if (run.status() == reset) return; - if (run.hasFailed()) + if (run.hasFailed() && ! isRemoved) sendEmailNotification(run, logger); - updateConsoleNotification(run); + updateConsoleNotification(run, isRemoved); }); } catch (IllegalStateException e) { @@ -820,10 +825,10 @@ public class InternalStepRunner implements StepRunner { .orElse(true); } - private void updateConsoleNotification(Run run) { + private void updateConsoleNotification(Run run, boolean isRemoved) { NotificationSource source = NotificationSource.from(run.id()); Consumer<String> updater = msg -> controller.notificationsDb().setNotification(source, Notification.Type.deployment, Notification.Level.error, msg); - switch (run.status()) { + switch (isRemoved ? success : run.status()) { case aborted: return; // wait and see how the next run goes. case noTests: case running: diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java index 42821ea8fe2..0c8a50fa821 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java @@ -8,6 +8,7 @@ import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository; import com.yahoo.vespa.hosted.controller.application.SystemApplication; import com.yahoo.vespa.hosted.controller.archive.CuratorArchiveBucketDb; +import com.yahoo.yolean.Exceptions; import java.net.URI; import java.time.Duration; @@ -15,6 +16,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.logging.Level; /** * Updates archive URIs for tenants in all zones. @@ -51,20 +53,28 @@ public class ArchiveUriUpdater extends ControllerMaintainer { } } - tenantsByZone.forEach((zone, tenants) -> { - Map<TenantName, URI> zoneArchiveUris = nodeRepository.getArchiveUris(zone); - for (TenantName tenant : tenants) { - archiveBucketDb.archiveUriFor(zone, tenant, true) - .filter(uri -> !uri.equals(zoneArchiveUris.get(tenant))) - .ifPresent(uri -> nodeRepository.setArchiveUri(zone, tenant, uri)); - } + int failures = 0; + for (ZoneId zone : tenantsByZone.keySet()) { + try { + Map<TenantName, URI> zoneArchiveUris = nodeRepository.getArchiveUris(zone); + + for (TenantName tenant : tenantsByZone.get(zone)) { + archiveBucketDb.archiveUriFor(zone, tenant, true) + .filter(uri -> !uri.equals(zoneArchiveUris.get(tenant))) + .ifPresent(uri -> nodeRepository.setArchiveUri(zone, tenant, uri)); + } - zoneArchiveUris.keySet().stream() - .filter(tenant -> !tenants.contains(tenant)) - .forEach(tenant -> nodeRepository.removeArchiveUri(zone, tenant)); - }); + zoneArchiveUris.keySet().stream() + .filter(tenant -> !tenantsByZone.get(zone).contains(tenant)) + .forEach(tenant -> nodeRepository.removeArchiveUri(zone, tenant)); + } catch (Exception e) { + log.log(Level.WARNING, "Failed to update archive URI in " + zone + ". Retrying in " + interval() + ". Error: " + + Exceptions.toMessageString(e)); + failures++; + } + } - return 1.0; + return asSuccessFactor(tenantsByZone.size(), failures); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java index b33a43a2031..37b06fea066 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java @@ -26,9 +26,8 @@ public class OutstandingChangeDeployer extends ControllerMaintainer { protected double maintain() { double ok = 0, total = 0; for (Application application : ApplicationList.from(controller().applications().readable()) - .withProductionDeployment() .withProjectId() - .withDeploymentSpec() + .withJobs() .asList()) try { ++total; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 037dacfcac9..d49cb244e47 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -70,7 +70,8 @@ public class Upgrader extends ControllerMaintainer { private DeploymentStatusList deploymentStatuses(VersionStatus versionStatus) { return controller().jobController().deploymentStatuses(ApplicationList.from(controller().applications().readable()) - .withProjectId(), + .withProjectId() + .withJobs(), versionStatus); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index d8cef45f124..537090c6d68 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -2700,6 +2700,54 @@ public class DeploymentTriggerTest { } @Test + void testBrokenApplication() { + DeploymentContext app = tester.newDeploymentContext(); + app.submit().runJob(systemTest).failDeployment(stagingTest).failDeployment(stagingTest); + tester.clock().advance(Duration.ofDays(31)); + tester.outstandingChangeDeployer().run(); + assertEquals(OptionalLong.empty(), app.application().projectId()); + + app.assertNotRunning(stagingTest); + tester.triggerJobs(); + app.assertNotRunning(stagingTest); + assertEquals(4, app.deploymentStatus().jobsToRun().size()); + + app.submit().runJob(systemTest).failDeployment(stagingTest); + tester.clock().advance(Duration.ofDays(20)); + app.submit().runJob(systemTest).failDeployment(stagingTest); + tester.clock().advance(Duration.ofDays(20)); + tester.outstandingChangeDeployer().run(); + assertEquals(OptionalLong.of(1000), app.application().projectId()); + tester.clock().advance(Duration.ofDays(20)); + tester.outstandingChangeDeployer().run(); + assertEquals(OptionalLong.empty(), app.application().projectId()); + + app.assertNotRunning(stagingTest); + tester.triggerJobs(); + app.assertNotRunning(stagingTest); + assertEquals(4, app.deploymentStatus().jobsToRun().size()); + + app.submit().runJob(systemTest).runJob(stagingTest).failDeployment(productionUsCentral1); + tester.clock().advance(Duration.ofDays(31)); + tester.outstandingChangeDeployer().run(); + assertEquals(OptionalLong.empty(), app.application().projectId()); + + app.assertNotRunning(productionUsCentral1); + tester.triggerJobs(); + app.assertNotRunning(productionUsCentral1); + assertEquals(3, app.deploymentStatus().jobsToRun().size()); + + app.submit().runJob(systemTest).runJob(stagingTest).timeOutConvergence(productionUsCentral1); + tester.clock().advance(Duration.ofDays(31)); + tester.outstandingChangeDeployer().run(); + assertEquals(OptionalLong.of(1000), app.application().projectId()); + + app.assertNotRunning(productionUsCentral1); + tester.triggerJobs(); + app.assertRunning(productionUsCentral1); + } + + @Test void testJobNames() { ZoneRegistryMock zones = new ZoneRegistryMock(SystemName.main); List<ZoneApi> existing = new ArrayList<>(zones.zones().all().zones()); diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/maintenance/FileDistributionCleanup.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/maintenance/FileDistributionCleanup.java deleted file mode 100644 index 9c04e7253bb..00000000000 --- a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/maintenance/FileDistributionCleanup.java +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.filedistribution.maintenance; - -import com.yahoo.io.IOUtils; -import java.io.File; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.attribute.BasicFileAttributes; -import java.time.Clock; -import java.time.Duration; -import java.time.Instant; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.logging.Level; -import java.util.logging.Logger; -import java.util.stream.Stream; - -import static java.nio.file.Files.readAttributes; - -/** - * Removes file references not used since a configured time, but always keeps a certain number of file references - * even when they are unused (unused is based on last access time for the file). - * - * @author hmusum - */ -public class FileDistributionCleanup { - - private static final Logger log = Logger.getLogger(FileDistributionCleanup.class.getName()); - private static final int numberToAlwaysKeep = 20; - - private final Clock clock; - - public FileDistributionCleanup(Clock clock) { - this.clock = clock; - } - - public List<String> deleteUnusedFileReferences(File fileReferencesPath, - Duration keepFileReferencesDuration, - Set<String> fileReferencesInUse) { - return deleteUnusedFileReferences(fileReferencesPath, - keepFileReferencesDuration, - numberToAlwaysKeep, - fileReferencesInUse); - } - - public List<String> deleteUnusedFileReferences(File fileReferencesDir, - Duration keepFileReferencesDuration, - int numberToAlwaysKeep, - Set<String> fileReferencesInUse) { - if (!fileReferencesDir.isDirectory()) throw new RuntimeException(fileReferencesDir + " is not a directory"); - - log.log(Level.FINE, () -> "Keep unused file references for " + keepFileReferencesDuration + - ", file references in use : " + fileReferencesInUse); - List<String> fileReferencesDeleted = new ArrayList<>(); - Path fileReferencesPath = fileReferencesDir.toPath(); - try (Stream<String> candidates = sortedUnusedFileReferences(fileReferencesPath, fileReferencesInUse, keepFileReferencesDuration)) { - final AtomicInteger i = new AtomicInteger(0); - candidates.forEach(fileReference -> { - // Do not delete the newest ones - if (i.incrementAndGet() > numberToAlwaysKeep) { - fileReferencesDeleted.add(fileReference); - File file = new File(fileReferencesDir, fileReference); - if (!IOUtils.recursiveDeleteDir(file)) - log.log(Level.WARNING, "Could not delete " + file.getAbsolutePath()); - } - }); - } - return fileReferencesDeleted; - } - - // Sorted, newest first - private Stream<String> sortedUnusedFileReferences(Path fileReferencesPath, Set<String> fileReferencesInUse, Duration keepFileReferences) { - Instant instant = clock.instant().minus(keepFileReferences); - return getFileReferencesOnDisk(fileReferencesPath) - .filter(fileReference -> !fileReferencesInUse.contains(fileReference)) - .filter(fileReference -> isLastFileAccessBefore(new File(fileReferencesPath.toFile(), fileReference), instant)) - .sorted(Comparator.comparing(a -> lastAccessed(new File(fileReferencesPath.toFile(), (String) a))).reversed()); - } - - private boolean isLastFileAccessBefore(File fileReference, Instant instant) { - return lastAccessed(fileReference).isBefore(instant); - } - - private Instant lastAccessed(File fileReference) { - BasicFileAttributes fileAttributes; - try { - fileAttributes = readAttributes(fileReference.toPath(), BasicFileAttributes.class); - return fileAttributes.lastAccessTime().toInstant(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - public static Stream<String> getFileReferencesOnDisk(Path directory) { - try { - return Files.list(directory).map(path -> path.toFile().getName()); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - -} diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index 8edd7fb2c5d..6ecac23d5fa 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -42,6 +42,8 @@ vespa_define_module( src/tests/component src/tests/compress src/tests/compression + src/tests/coro/detached + src/tests/coro/lazy src/tests/cpu_usage src/tests/crc src/tests/crypto @@ -202,9 +204,13 @@ vespa_define_module( src/tests/fastlib/text LIBS + src/vespa/fastlib/io + src/vespa/fastlib/text + src/vespa/fastlib/text/apps src/vespa/vespalib src/vespa/vespalib/btree src/vespa/vespalib/component + src/vespa/vespalib/coro src/vespa/vespalib/crypto src/vespa/vespalib/data src/vespa/vespalib/data/slime @@ -231,7 +237,4 @@ vespa_define_module( src/vespa/vespalib/time src/vespa/vespalib/trace src/vespa/vespalib/util - src/vespa/fastlib/io - src/vespa/fastlib/text - src/vespa/fastlib/text/apps ) diff --git a/vespalib/src/tests/coro/detached/CMakeLists.txt b/vespalib/src/tests/coro/detached/CMakeLists.txt new file mode 100644 index 00000000000..237b8615fec --- /dev/null +++ b/vespalib/src/tests/coro/detached/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_detached_test_app TEST + SOURCES + detached_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_detached_test_app COMMAND vespalib_detached_test_app) diff --git a/vespalib/src/tests/coro/detached/detached_test.cpp b/vespalib/src/tests/coro/detached/detached_test.cpp new file mode 100644 index 00000000000..f23d16cc75c --- /dev/null +++ b/vespalib/src/tests/coro/detached/detached_test.cpp @@ -0,0 +1,19 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/coro/detached.h> +#include <vespa/vespalib/gtest/gtest.h> + +using vespalib::coro::Detached; + +Detached set_result(int &res, int value) { + res = value; + co_return; +} + +TEST(DetachedTest, call_detached_coroutine) { + int result = 0; + set_result(result, 42); + EXPECT_EQ(result, 42); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/tests/coro/lazy/CMakeLists.txt b/vespalib/src/tests/coro/lazy/CMakeLists.txt new file mode 100644 index 00000000000..daa11eb3576 --- /dev/null +++ b/vespalib/src/tests/coro/lazy/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_lazy_test_app TEST + SOURCES + lazy_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_lazy_test_app COMMAND vespalib_lazy_test_app) diff --git a/vespalib/src/tests/coro/lazy/lazy_test.cpp b/vespalib/src/tests/coro/lazy/lazy_test.cpp new file mode 100644 index 00000000000..a715e473aaf --- /dev/null +++ b/vespalib/src/tests/coro/lazy/lazy_test.cpp @@ -0,0 +1,44 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/coro/lazy.h> +#include <vespa/vespalib/coro/sync_wait.h> +#include <vespa/vespalib/gtest/gtest.h> + +using vespalib::coro::Lazy; +using vespalib::coro::sync_wait; + +Lazy<int> make_lazy(int value) { + co_return value; +} + +Lazy<int> async_add_values(int a, int b) { + auto lazy_a = make_lazy(a); + auto lazy_b = make_lazy(b); + co_return (co_await lazy_a + co_await lazy_b); +} + +Lazy<int> async_sum(Lazy<int> a, Lazy<int> b) { + co_return (co_await a + co_await b); +} + +TEST(LazyTest, simple_lazy_value) { + auto lazy = make_lazy(42); + auto result = sync_wait(lazy); + EXPECT_EQ(result, 42); +} + +TEST(LazyTest, async_sum_of_async_values) { + auto lazy = async_add_values(10, 20); + auto result = sync_wait(lazy); + EXPECT_EQ(result, 30); +} + +TEST(LazyTest, async_sum_of_external_async_values) { + auto a = make_lazy(100); + auto b = make_lazy(200); + auto lazy = async_sum(std::move(a), std::move(b)); + auto result = sync_wait(lazy); + EXPECT_EQ(result, 300); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/coro/CMakeLists.txt b/vespalib/src/vespa/vespalib/coro/CMakeLists.txt new file mode 100644 index 00000000000..d190c2e8ddc --- /dev/null +++ b/vespalib/src/vespa/vespalib/coro/CMakeLists.txt @@ -0,0 +1,5 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(vespalib_vespalib_coro OBJECT + SOURCES + DEPENDS +) diff --git a/vespalib/src/vespa/vespalib/coro/detached.h b/vespalib/src/vespa/vespalib/coro/detached.h new file mode 100644 index 00000000000..6d051e53121 --- /dev/null +++ b/vespalib/src/vespa/vespalib/coro/detached.h @@ -0,0 +1,20 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <coroutine> +#include <exception> + +namespace vespalib::coro { + +struct Detached { + struct promise_type { + Detached get_return_object() { return {}; } + static std::suspend_never initial_suspend() noexcept { return {}; } + static std::suspend_never final_suspend() noexcept { return {}; } + static void unhandled_exception() { std::terminate(); } + void return_void() noexcept {}; + }; +}; + +} diff --git a/vespalib/src/vespa/vespalib/coro/lazy.h b/vespalib/src/vespa/vespalib/coro/lazy.h new file mode 100644 index 00000000000..b007f565c93 --- /dev/null +++ b/vespalib/src/vespa/vespalib/coro/lazy.h @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <concepts> +#include <coroutine> +#include <optional> + +namespace vespalib::coro { + +template <std::movable T> +class [[nodiscard]] Lazy { +public: + struct promise_type { + Lazy<T> get_return_object() { return Lazy(Handle::from_promise(*this)); } + static std::suspend_always initial_suspend() noexcept { return {}; } + static auto final_suspend() noexcept { + struct awaiter { + bool await_ready() const noexcept { return false; } + std::coroutine_handle<> await_suspend(Handle handle) const noexcept { + auto waiter = handle.promise().waiter; + return waiter ? waiter : std::noop_coroutine(); + } + void await_resume() const noexcept {} + }; + return awaiter(); + } + void return_value(T ret_value) noexcept { + value = std::move(ret_value); + } + static void unhandled_exception() { std::terminate(); } + std::optional<T> value; + std::coroutine_handle<> waiter; + promise_type(promise_type &&) = delete; + promise_type(const promise_type &) = delete; + promise_type() : value(std::nullopt), waiter(nullptr) {} + }; + using Handle = std::coroutine_handle<promise_type>; + +private: + Handle _handle; + +public: + Lazy(const Lazy &) = delete; + Lazy &operator=(const Lazy &) = delete; + explicit Lazy(Handle handle_in) noexcept : _handle(handle_in) {} + Lazy(Lazy &&rhs) noexcept : _handle(std::exchange(rhs._handle, nullptr)) {} + auto operator co_await() { + struct awaiter { + Handle handle; + bool await_ready() const noexcept { + return handle.done(); + } + Handle await_suspend(std::coroutine_handle<> waiter) const noexcept { + handle.promise().waiter = waiter; + return handle; + } + T &await_resume() const noexcept { + return *handle.promise().value; + } + awaiter(Handle handle_in) : handle(handle_in) {} + }; + return awaiter(_handle); + } + ~Lazy() { + if (_handle) { + _handle.destroy(); + } + } +}; + +} diff --git a/vespalib/src/vespa/vespalib/coro/sync_wait.h b/vespalib/src/vespa/vespalib/coro/sync_wait.h new file mode 100644 index 00000000000..e6a8fdc43f6 --- /dev/null +++ b/vespalib/src/vespa/vespalib/coro/sync_wait.h @@ -0,0 +1,34 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "detached.h" +#include "lazy.h" +#include <coroutine> +#include <vespa/vespalib/util/gate.h> + +namespace vespalib::coro { + +template <typename T, typename S> +Detached signal_when_done(Lazy<T> &value, S &sink) { + sink(co_await value); +} + +template <typename T> +T &sync_wait(Lazy<T> &value) { + struct MySink { + Gate gate; + T *result; + void operator()(T &result_in) { + result = &result_in; + gate.countDown(); + } + MySink() : gate(), result(nullptr) {} + }; + MySink sink; + signal_when_done(value, sink); + sink.gate.await(); + return *sink.result; +} + +} |