From e5b29fd34ba349fce07d13f8ad06b70173b34343 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Sat, 22 Jul 2023 16:32:39 +0200 Subject: Keep some file references and download even if they have not been used for some time --- .../FileReferencesAndDownloadsMaintainer.java | 79 +++++++++++++++------- .../FileReferencesAndDownloadsMaintainerTest.java | 61 ++++++++++++++--- 2 files changed, 106 insertions(+), 34 deletions(-) diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java index 627a15aab65..091f3eebc21 100644 --- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java @@ -4,21 +4,23 @@ package com.yahoo.vespa.config.proxy.filedistribution; import com.yahoo.concurrent.DaemonThreadFactory; import com.yahoo.io.IOUtils; import com.yahoo.vespa.filedistribution.FileDownloader; + import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; +import java.nio.file.Path; import java.nio.file.attribute.BasicFileAttributes; import java.time.Duration; import java.time.Instant; -import java.util.Arrays; +import java.util.Comparator; import java.util.HashSet; +import java.util.List; import java.util.Set; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.stream.Collectors; import static java.nio.file.Files.readAttributes; @@ -35,6 +37,7 @@ class FileReferencesAndDownloadsMaintainer implements Runnable { private static final File defaultUrlDownloadDir = UrlDownloadRpcServer.downloadDir; private static final File defaultFileReferencesDownloadDir = FileDownloader.defaultDownloadDirectory; private static final Duration defaultDurationToKeepFiles = Duration.ofDays(30); + private static final int defaultOutdatedFilesToKeep = 20; private static final Duration interval = Duration.ofMinutes(1); private final ScheduledExecutorService executor = @@ -42,15 +45,20 @@ class FileReferencesAndDownloadsMaintainer implements Runnable { private final File urlDownloadDir; private final File fileReferencesDownloadDir; private final Duration durationToKeepFiles; + private final int outDatedFilesToKeep; FileReferencesAndDownloadsMaintainer() { - this(defaultFileReferencesDownloadDir, defaultUrlDownloadDir, keepFileReferencesDuration()); + this(defaultFileReferencesDownloadDir, defaultUrlDownloadDir, keepFileReferencesDuration(), outDatedFilesToKeep()); } - FileReferencesAndDownloadsMaintainer(File fileReferencesDownloadDir, File urlDownloadDir, Duration durationToKeepFiles) { + FileReferencesAndDownloadsMaintainer(File fileReferencesDownloadDir, + File urlDownloadDir, + Duration durationToKeepFiles, + int outdatedFilesToKeep) { this.fileReferencesDownloadDir = fileReferencesDownloadDir; this.urlDownloadDir = urlDownloadDir; this.durationToKeepFiles = durationToKeepFiles; + this.outDatedFilesToKeep = outdatedFilesToKeep; executor.scheduleAtFixedRate(this, interval.toSeconds(), interval.toSeconds(), TimeUnit.SECONDS); } @@ -75,32 +83,49 @@ class FileReferencesAndDownloadsMaintainer implements Runnable { } private void deleteUnusedFiles(File directory) { - Instant deleteNotUsedSinceInstant = Instant.now().minus(durationToKeepFiles); - Set filesOnDisk = new HashSet<>(); + File[] files = directory.listFiles(); - if (files != null) - filesOnDisk.addAll(Arrays.stream(files).map(File::getName).collect(Collectors.toSet())); - log.log(Level.FINE, () -> "Files on disk (in " + directory + "): " + filesOnDisk); + if (files == null) return; + + List filesToDelete = filesThatCanBeDeleted(files); + filesToDelete.forEach(fileReference -> { + if (IOUtils.recursiveDeleteDir(fileReference)) + log.log(Level.FINE, "Deleted " + fileReference.getAbsolutePath()); + else + log.log(Level.WARNING, "Could not delete " + fileReference.getAbsolutePath()); + }); + } + + private List filesThatCanBeDeleted(File[] files) { + Instant deleteNotUsedSinceInstant = Instant.now().minus(durationToKeepFiles); - Set filesToDelete = filesOnDisk + Set filesOnDisk = new HashSet<>(List.of(files)); + log.log(Level.FINE, () -> "Files on disk: " + filesOnDisk); + int deleteCount = Math.max(0, filesOnDisk.size() - outDatedFilesToKeep); + var canBeDeleted = filesOnDisk .stream() - .filter(fileReference -> isFileLastModifiedBefore(new File(directory, fileReference), deleteNotUsedSinceInstant)) - .collect(Collectors.toSet()); - if (filesToDelete.size() > 0) { - log.log(Level.INFO, "Files that can be deleted in " + directory + " (not used since " + deleteNotUsedSinceInstant + "): " + filesToDelete); - filesToDelete.forEach(fileReference -> { - File file = new File(directory, fileReference); - if (!IOUtils.recursiveDeleteDir(file)) - log.log(Level.WARNING, "Could not delete " + file.getAbsolutePath()); - }); - } + .peek(file -> log.log(Level.FINE, () -> file + ":" + fileLastModifiedTime(file.toPath()))) + .filter(fileReference -> isFileLastModifiedBefore(fileReference, deleteNotUsedSinceInstant)) + .sorted(Comparator.comparing(fileReference -> fileLastModifiedTime(fileReference.toPath()))) + .toList(); + + // Make sure we keep some files + canBeDeleted = canBeDeleted.subList(0, Math.min(canBeDeleted.size(), deleteCount)); + log.log(Level.INFO, "Files that can be deleted (not accessed since " + deleteNotUsedSinceInstant + + ", will also keep " + outDatedFilesToKeep + + " no matter when last accessed): " + canBeDeleted); + + return canBeDeleted; } private boolean isFileLastModifiedBefore(File fileReference, Instant instant) { - BasicFileAttributes fileAttributes; + return fileLastModifiedTime(fileReference.toPath()).isBefore(instant); + } + + private static Instant fileLastModifiedTime(Path fileReference) { try { - fileAttributes = readAttributes(fileReference.toPath(), BasicFileAttributes.class); - return fileAttributes.lastModifiedTime().toInstant().isBefore(instant); + BasicFileAttributes fileAttributes = readAttributes(fileReference, BasicFileAttributes.class); + return fileAttributes.lastModifiedTime().toInstant(); } catch (IOException e) { throw new UncheckedIOException(e); } @@ -114,4 +139,12 @@ class FileReferencesAndDownloadsMaintainer implements Runnable { return defaultDurationToKeepFiles; } + private static int outDatedFilesToKeep() { + String env = System.getenv("VESPA_KEEP_OUTDATED_FILE_REFERENCES_COUNT"); + if (env != null && !env.isEmpty()) + return Integer.parseInt(env); + else + return defaultOutdatedFilesToKeep; + } + } diff --git a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java index fad021c0119..19e2d99c3ae 100644 --- a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java +++ b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java @@ -10,6 +10,9 @@ import java.io.File; import java.io.IOException; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.IntStream; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -19,9 +22,12 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; */ public class FileReferencesAndDownloadsMaintainerTest { + private static final Duration keepDuration = Duration.ofMinutes(1); + private static final int outDatedFilesToKeep = 9; + private File cachedFileReferences; private File cachedDownloads; - private FileReferencesAndDownloadsMaintainer cachedFilesMaintainer; + private FileReferencesAndDownloadsMaintainer maintainer; @TempDir public File tempFolder; @@ -30,22 +36,55 @@ public class FileReferencesAndDownloadsMaintainerTest { public void setup() throws IOException { cachedFileReferences = newFolder(tempFolder, "cachedFileReferences"); cachedDownloads = newFolder(tempFolder, "cachedDownloads"); - cachedFilesMaintainer = new FileReferencesAndDownloadsMaintainer(cachedFileReferences, cachedDownloads, Duration.ofMinutes(1)); + maintainer = new FileReferencesAndDownloadsMaintainer(cachedFileReferences, cachedDownloads, keepDuration, outDatedFilesToKeep); } @Test - void require_old_files_to_be_deleted() throws IOException { + void require_old_files_to_be_deleted() { runMaintainerAndAssertFiles(0, 0); - File fileReference = writeFile(cachedFileReferences, "fileReference"); - File download = writeFile(cachedDownloads, "download"); - runMaintainerAndAssertFiles(1, 1); + var fileReferences = writeFiles(20); + var downloads = writeDownloads(21); + runMaintainerAndAssertFiles(20, 21); - updateLastModifiedTimeStamp(fileReference, Instant.now().minus(Duration.ofMinutes(10))); - runMaintainerAndAssertFiles(0, 1); + updateLastModifiedTimestamp(0, 5, fileReferences, downloads); + runMaintainerAndAssertFiles(15, 16); - updateLastModifiedTimeStamp(download, Instant.now().minus(Duration.ofMinutes(10))); - runMaintainerAndAssertFiles(0, 0); + updateLastModifiedTimestamp(6, 20, fileReferences, downloads); + // Should keep at least outDatedFilesToKeep file references and downloads even if there are more that are old + runMaintainerAndAssertFiles(outDatedFilesToKeep, outDatedFilesToKeep); + } + + private void updateLastModifiedTimestamp(int startInclusive, int endExclusive, List fileReferences, List downloads) { + IntStream.range(startInclusive, endExclusive).forEach(i -> { + Instant instant = Instant.now().minus(keepDuration.plus(Duration.ofMinutes(1)).minus(Duration.ofSeconds(i))); + updateLastModifiedTimeStamp(fileReferences.get(i), instant); + updateLastModifiedTimeStamp(downloads.get(i), instant); + }); + } + + private List writeFiles(int count) { + List files = new ArrayList<>(); + IntStream.range(0, count).forEach(i -> { + try { + files.add(writeFile(cachedFileReferences, "fileReference" + i)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + return files; + } + + private List writeDownloads(int count) { + List files = new ArrayList<>(); + IntStream.range(0, count).forEach(i -> { + try { + files.add(writeFile(cachedDownloads, "download" + i)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + return files; } private void updateLastModifiedTimeStamp(File file, Instant instant) { @@ -55,7 +94,7 @@ public class FileReferencesAndDownloadsMaintainerTest { } private void runMaintainerAndAssertFiles(int fileReferenceCount, int downloadCount) { - cachedFilesMaintainer.run(); + maintainer.run(); File[] fileReferences = cachedFileReferences.listFiles(); assertNotNull(fileReferences); assertEquals(fileReferenceCount, fileReferences.length); -- cgit v1.2.3 From 06074fd0835fc94bd93f6b2b530a954f0e8980b6 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Tue, 25 Jul 2023 10:49:11 +0200 Subject: Do not run maintainer on config server hosts Config server has its own maintainer, running this one might remove files that are still in use (belonging to an active application), so don't run it if this host is one in VESPA_CONFIGSERVERS or it's empty/null --- .../FileReferencesAndDownloadsMaintainer.java | 57 ++++++++++++++++------ .../FileReferencesAndDownloadsMaintainerTest.java | 18 ++++++- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java index 091f3eebc21..eab1368a2a1 100644 --- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.config.proxy.filedistribution; import com.yahoo.concurrent.DaemonThreadFactory; import com.yahoo.io.IOUtils; +import com.yahoo.vespa.config.util.ConfigUtils; import com.yahoo.vespa.filedistribution.FileDownloader; import java.io.File; @@ -15,6 +16,7 @@ import java.time.Instant; import java.util.Comparator; import java.util.HashSet; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; @@ -23,6 +25,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import static java.nio.file.Files.readAttributes; +import static java.util.logging.Level.INFO; /** * Deletes file references and url downloads that have not been used for some time. @@ -40,30 +43,40 @@ class FileReferencesAndDownloadsMaintainer implements Runnable { private static final int defaultOutdatedFilesToKeep = 20; private static final Duration interval = Duration.ofMinutes(1); - private final ScheduledExecutorService executor = - new ScheduledThreadPoolExecutor(1, new DaemonThreadFactory("file references and downloads cleanup")); + private final Optional executor; private final File urlDownloadDir; private final File fileReferencesDownloadDir; private final Duration durationToKeepFiles; private final int outDatedFilesToKeep; FileReferencesAndDownloadsMaintainer() { - this(defaultFileReferencesDownloadDir, defaultUrlDownloadDir, keepFileReferencesDuration(), outDatedFilesToKeep()); + this(defaultFileReferencesDownloadDir, defaultUrlDownloadDir, keepFileReferencesDuration(), + outDatedFilesToKeep(), configServers()); } FileReferencesAndDownloadsMaintainer(File fileReferencesDownloadDir, File urlDownloadDir, Duration durationToKeepFiles, - int outdatedFilesToKeep) { + int outdatedFilesToKeep, + List configServers) { this.fileReferencesDownloadDir = fileReferencesDownloadDir; this.urlDownloadDir = urlDownloadDir; this.durationToKeepFiles = durationToKeepFiles; this.outDatedFilesToKeep = outdatedFilesToKeep; - executor.scheduleAtFixedRate(this, interval.toSeconds(), interval.toSeconds(), TimeUnit.SECONDS); + // Do not run on config servers + if (configServers.contains(ConfigUtils.getCanonicalHostName())) { + log.log(INFO, "Not running maintainer, since this is on a config server host"); + executor = Optional.empty(); + } else { + executor = Optional.of(new ScheduledThreadPoolExecutor(1, new DaemonThreadFactory("file references and downloads cleanup"))); + executor.get().scheduleAtFixedRate(this, interval.toSeconds(), interval.toSeconds(), TimeUnit.SECONDS); + } } @Override public void run() { + if (executor.isEmpty()) return; + try { deleteUnusedFiles(fileReferencesDownloadDir); deleteUnusedFiles(urlDownloadDir); @@ -73,13 +86,15 @@ class FileReferencesAndDownloadsMaintainer implements Runnable { } public void close() { - executor.shutdownNow(); - try { - if ( ! executor.awaitTermination(10, TimeUnit.SECONDS)) - throw new RuntimeException("Unable to shutdown " + executor + " before timeout"); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } + executor.ifPresent(ex -> { + ex.shutdownNow(); + try { + if (! ex.awaitTermination(10, TimeUnit.SECONDS)) + throw new RuntimeException("Unable to shutdown " + executor + " before timeout"); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); } private void deleteUnusedFiles(File directory) { @@ -111,9 +126,10 @@ class FileReferencesAndDownloadsMaintainer implements Runnable { // Make sure we keep some files canBeDeleted = canBeDeleted.subList(0, Math.min(canBeDeleted.size(), deleteCount)); - log.log(Level.INFO, "Files that can be deleted (not accessed since " + deleteNotUsedSinceInstant + - ", will also keep " + outDatedFilesToKeep + - " no matter when last accessed): " + canBeDeleted); + if (canBeDeleted.size() > 0) + log.log(INFO, "Files that can be deleted (not accessed since " + deleteNotUsedSinceInstant + + ", will also keep " + outDatedFilesToKeep + + " no matter when last accessed): " + canBeDeleted); return canBeDeleted; } @@ -140,11 +156,20 @@ class FileReferencesAndDownloadsMaintainer implements Runnable { } private static int outDatedFilesToKeep() { - String env = System.getenv("VESPA_KEEP_OUTDATED_FILE_REFERENCES_COUNT"); + String env = System.getenv("VESPA_KEEP_FILE_REFERENCES_COUNT"); if (env != null && !env.isEmpty()) return Integer.parseInt(env); else return defaultOutdatedFilesToKeep; } + private static List configServers() { + String env = System.getenv("VESPA_CONFIGSERVERS"); + if (env == null || env.isEmpty()) + return List.of(ConfigUtils.getCanonicalHostName()); + else { + return List.of(env.split(",")); + } + } + } diff --git a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java index 19e2d99c3ae..c41305b4dc8 100644 --- a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java +++ b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.config.proxy.filedistribution; import com.yahoo.io.IOUtils; +import com.yahoo.vespa.config.util.ConfigUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -36,11 +37,12 @@ public class FileReferencesAndDownloadsMaintainerTest { public void setup() throws IOException { cachedFileReferences = newFolder(tempFolder, "cachedFileReferences"); cachedDownloads = newFolder(tempFolder, "cachedDownloads"); - maintainer = new FileReferencesAndDownloadsMaintainer(cachedFileReferences, cachedDownloads, keepDuration, outDatedFilesToKeep); } @Test void require_old_files_to_be_deleted() { + maintainer = new FileReferencesAndDownloadsMaintainer(cachedFileReferences, cachedDownloads, keepDuration, outDatedFilesToKeep, + List.of("host1")); runMaintainerAndAssertFiles(0, 0); var fileReferences = writeFiles(20); @@ -55,6 +57,20 @@ public class FileReferencesAndDownloadsMaintainerTest { runMaintainerAndAssertFiles(outDatedFilesToKeep, outDatedFilesToKeep); } + @Test + void require_no_files_deleted_when_running_on_config_server_host() { + maintainer = new FileReferencesAndDownloadsMaintainer(cachedFileReferences, cachedDownloads, keepDuration, + outDatedFilesToKeep, List.of(ConfigUtils.getCanonicalHostName())); + runMaintainerAndAssertFiles(0, 0); + + var fileReferences = writeFiles(10); + var downloads = writeDownloads(10); + runMaintainerAndAssertFiles(10, 10); + + updateLastModifiedTimestamp(0, 10, fileReferences, downloads); + runMaintainerAndAssertFiles(10, 10); + } + private void updateLastModifiedTimestamp(int startInclusive, int endExclusive, List fileReferences, List downloads) { IntStream.range(startInclusive, endExclusive).forEach(i -> { Instant instant = Instant.now().minus(keepDuration.plus(Duration.ofMinutes(1)).minus(Duration.ofSeconds(i))); -- cgit v1.2.3