summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2023-08-14 13:49:24 +0200
committerGitHub <noreply@github.com>2023-08-14 13:49:24 +0200
commit8111830bfae793d4d2fa8183e2ef8781c8524eaf (patch)
tree7a993a7f95cf4a1271d8f0ef0ab035c67d270669
parenta0b301a8f30da95265282dd90c8307fc1e826161 (diff)
parent06074fd0835fc94bd93f6b2b530a954f0e8980b6 (diff)
Merge pull request #27871 from vespa-engine/hmusum/keep-a-minimum-number-of-files-and-downloads
Keep some file references and downloads even if they have not been useā€¦
-rw-r--r--config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java124
-rw-r--r--config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java75
2 files changed, 156 insertions, 43 deletions
diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java
index 627a15aab65..eab1368a2a1 100644
--- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java
+++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java
@@ -3,24 +3,29 @@ package com.yahoo.vespa.config.proxy.filedistribution;
import com.yahoo.concurrent.DaemonThreadFactory;
import com.yahoo.io.IOUtils;
+import com.yahoo.vespa.config.util.ConfigUtils;
import com.yahoo.vespa.filedistribution.FileDownloader;
+
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
+import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.time.Duration;
import java.time.Instant;
-import java.util.Arrays;
+import java.util.Comparator;
import java.util.HashSet;
+import java.util.List;
+import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
-import java.util.stream.Collectors;
import static java.nio.file.Files.readAttributes;
+import static java.util.logging.Level.INFO;
/**
* Deletes file references and url downloads that have not been used for some time.
@@ -35,27 +40,43 @@ class FileReferencesAndDownloadsMaintainer implements Runnable {
private static final File defaultUrlDownloadDir = UrlDownloadRpcServer.downloadDir;
private static final File defaultFileReferencesDownloadDir = FileDownloader.defaultDownloadDirectory;
private static final Duration defaultDurationToKeepFiles = Duration.ofDays(30);
+ private static final int defaultOutdatedFilesToKeep = 20;
private static final Duration interval = Duration.ofMinutes(1);
- private final ScheduledExecutorService executor =
- new ScheduledThreadPoolExecutor(1, new DaemonThreadFactory("file references and downloads cleanup"));
+ private final Optional<ScheduledExecutorService> executor;
private final File urlDownloadDir;
private final File fileReferencesDownloadDir;
private final Duration durationToKeepFiles;
+ private final int outDatedFilesToKeep;
FileReferencesAndDownloadsMaintainer() {
- this(defaultFileReferencesDownloadDir, defaultUrlDownloadDir, keepFileReferencesDuration());
+ this(defaultFileReferencesDownloadDir, defaultUrlDownloadDir, keepFileReferencesDuration(),
+ outDatedFilesToKeep(), configServers());
}
- FileReferencesAndDownloadsMaintainer(File fileReferencesDownloadDir, File urlDownloadDir, Duration durationToKeepFiles) {
+ FileReferencesAndDownloadsMaintainer(File fileReferencesDownloadDir,
+ File urlDownloadDir,
+ Duration durationToKeepFiles,
+ int outdatedFilesToKeep,
+ List<String> configServers) {
this.fileReferencesDownloadDir = fileReferencesDownloadDir;
this.urlDownloadDir = urlDownloadDir;
this.durationToKeepFiles = durationToKeepFiles;
- executor.scheduleAtFixedRate(this, interval.toSeconds(), interval.toSeconds(), TimeUnit.SECONDS);
+ this.outDatedFilesToKeep = outdatedFilesToKeep;
+ // Do not run on config servers
+ if (configServers.contains(ConfigUtils.getCanonicalHostName())) {
+ log.log(INFO, "Not running maintainer, since this is on a config server host");
+ executor = Optional.empty();
+ } else {
+ executor = Optional.of(new ScheduledThreadPoolExecutor(1, new DaemonThreadFactory("file references and downloads cleanup")));
+ executor.get().scheduleAtFixedRate(this, interval.toSeconds(), interval.toSeconds(), TimeUnit.SECONDS);
+ }
}
@Override
public void run() {
+ if (executor.isEmpty()) return;
+
try {
deleteUnusedFiles(fileReferencesDownloadDir);
deleteUnusedFiles(urlDownloadDir);
@@ -65,42 +86,62 @@ class FileReferencesAndDownloadsMaintainer implements Runnable {
}
public void close() {
- executor.shutdownNow();
- try {
- if ( ! executor.awaitTermination(10, TimeUnit.SECONDS))
- throw new RuntimeException("Unable to shutdown " + executor + " before timeout");
- } catch (InterruptedException e) {
- throw new RuntimeException(e);
- }
+ executor.ifPresent(ex -> {
+ ex.shutdownNow();
+ try {
+ if (! ex.awaitTermination(10, TimeUnit.SECONDS))
+ throw new RuntimeException("Unable to shutdown " + executor + " before timeout");
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ });
}
private void deleteUnusedFiles(File directory) {
- Instant deleteNotUsedSinceInstant = Instant.now().minus(durationToKeepFiles);
- Set<String> filesOnDisk = new HashSet<>();
+
File[] files = directory.listFiles();
- if (files != null)
- filesOnDisk.addAll(Arrays.stream(files).map(File::getName).collect(Collectors.toSet()));
- log.log(Level.FINE, () -> "Files on disk (in " + directory + "): " + filesOnDisk);
+ if (files == null) return;
+
+ List<File> filesToDelete = filesThatCanBeDeleted(files);
+ filesToDelete.forEach(fileReference -> {
+ if (IOUtils.recursiveDeleteDir(fileReference))
+ log.log(Level.FINE, "Deleted " + fileReference.getAbsolutePath());
+ else
+ log.log(Level.WARNING, "Could not delete " + fileReference.getAbsolutePath());
+ });
+ }
- Set<String> filesToDelete = filesOnDisk
+ private List<File> filesThatCanBeDeleted(File[] files) {
+ Instant deleteNotUsedSinceInstant = Instant.now().minus(durationToKeepFiles);
+
+ Set<File> filesOnDisk = new HashSet<>(List.of(files));
+ log.log(Level.FINE, () -> "Files on disk: " + filesOnDisk);
+ int deleteCount = Math.max(0, filesOnDisk.size() - outDatedFilesToKeep);
+ var canBeDeleted = filesOnDisk
.stream()
- .filter(fileReference -> isFileLastModifiedBefore(new File(directory, fileReference), deleteNotUsedSinceInstant))
- .collect(Collectors.toSet());
- if (filesToDelete.size() > 0) {
- log.log(Level.INFO, "Files that can be deleted in " + directory + " (not used since " + deleteNotUsedSinceInstant + "): " + filesToDelete);
- filesToDelete.forEach(fileReference -> {
- File file = new File(directory, fileReference);
- if (!IOUtils.recursiveDeleteDir(file))
- log.log(Level.WARNING, "Could not delete " + file.getAbsolutePath());
- });
- }
+ .peek(file -> log.log(Level.FINE, () -> file + ":" + fileLastModifiedTime(file.toPath())))
+ .filter(fileReference -> isFileLastModifiedBefore(fileReference, deleteNotUsedSinceInstant))
+ .sorted(Comparator.comparing(fileReference -> fileLastModifiedTime(fileReference.toPath())))
+ .toList();
+
+ // Make sure we keep some files
+ canBeDeleted = canBeDeleted.subList(0, Math.min(canBeDeleted.size(), deleteCount));
+ if (canBeDeleted.size() > 0)
+ log.log(INFO, "Files that can be deleted (not accessed since " + deleteNotUsedSinceInstant +
+ ", will also keep " + outDatedFilesToKeep +
+ " no matter when last accessed): " + canBeDeleted);
+
+ return canBeDeleted;
}
private boolean isFileLastModifiedBefore(File fileReference, Instant instant) {
- BasicFileAttributes fileAttributes;
+ return fileLastModifiedTime(fileReference.toPath()).isBefore(instant);
+ }
+
+ private static Instant fileLastModifiedTime(Path fileReference) {
try {
- fileAttributes = readAttributes(fileReference.toPath(), BasicFileAttributes.class);
- return fileAttributes.lastModifiedTime().toInstant().isBefore(instant);
+ BasicFileAttributes fileAttributes = readAttributes(fileReference, BasicFileAttributes.class);
+ return fileAttributes.lastModifiedTime().toInstant();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
@@ -114,4 +155,21 @@ class FileReferencesAndDownloadsMaintainer implements Runnable {
return defaultDurationToKeepFiles;
}
+ private static int outDatedFilesToKeep() {
+ String env = System.getenv("VESPA_KEEP_FILE_REFERENCES_COUNT");
+ if (env != null && !env.isEmpty())
+ return Integer.parseInt(env);
+ else
+ return defaultOutdatedFilesToKeep;
+ }
+
+ private static List<String> configServers() {
+ String env = System.getenv("VESPA_CONFIGSERVERS");
+ if (env == null || env.isEmpty())
+ return List.of(ConfigUtils.getCanonicalHostName());
+ else {
+ return List.of(env.split(","));
+ }
+ }
+
}
diff --git a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java
index fad021c0119..c41305b4dc8 100644
--- a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java
+++ b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainerTest.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.config.proxy.filedistribution;
import com.yahoo.io.IOUtils;
+import com.yahoo.vespa.config.util.ConfigUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -10,6 +11,9 @@ import java.io.File;
import java.io.IOException;
import java.time.Duration;
import java.time.Instant;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.IntStream;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -19,9 +23,12 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
*/
public class FileReferencesAndDownloadsMaintainerTest {
+ private static final Duration keepDuration = Duration.ofMinutes(1);
+ private static final int outDatedFilesToKeep = 9;
+
private File cachedFileReferences;
private File cachedDownloads;
- private FileReferencesAndDownloadsMaintainer cachedFilesMaintainer;
+ private FileReferencesAndDownloadsMaintainer maintainer;
@TempDir
public File tempFolder;
@@ -30,22 +37,70 @@ public class FileReferencesAndDownloadsMaintainerTest {
public void setup() throws IOException {
cachedFileReferences = newFolder(tempFolder, "cachedFileReferences");
cachedDownloads = newFolder(tempFolder, "cachedDownloads");
- cachedFilesMaintainer = new FileReferencesAndDownloadsMaintainer(cachedFileReferences, cachedDownloads, Duration.ofMinutes(1));
}
@Test
- void require_old_files_to_be_deleted() throws IOException {
+ void require_old_files_to_be_deleted() {
+ maintainer = new FileReferencesAndDownloadsMaintainer(cachedFileReferences, cachedDownloads, keepDuration, outDatedFilesToKeep,
+ List.of("host1"));
runMaintainerAndAssertFiles(0, 0);
- File fileReference = writeFile(cachedFileReferences, "fileReference");
- File download = writeFile(cachedDownloads, "download");
- runMaintainerAndAssertFiles(1, 1);
+ var fileReferences = writeFiles(20);
+ var downloads = writeDownloads(21);
+ runMaintainerAndAssertFiles(20, 21);
+
+ updateLastModifiedTimestamp(0, 5, fileReferences, downloads);
+ runMaintainerAndAssertFiles(15, 16);
- updateLastModifiedTimeStamp(fileReference, Instant.now().minus(Duration.ofMinutes(10)));
- runMaintainerAndAssertFiles(0, 1);
+ updateLastModifiedTimestamp(6, 20, fileReferences, downloads);
+ // Should keep at least outDatedFilesToKeep file references and downloads even if there are more that are old
+ runMaintainerAndAssertFiles(outDatedFilesToKeep, outDatedFilesToKeep);
+ }
- updateLastModifiedTimeStamp(download, Instant.now().minus(Duration.ofMinutes(10)));
+ @Test
+ void require_no_files_deleted_when_running_on_config_server_host() {
+ maintainer = new FileReferencesAndDownloadsMaintainer(cachedFileReferences, cachedDownloads, keepDuration,
+ outDatedFilesToKeep, List.of(ConfigUtils.getCanonicalHostName()));
runMaintainerAndAssertFiles(0, 0);
+
+ var fileReferences = writeFiles(10);
+ var downloads = writeDownloads(10);
+ runMaintainerAndAssertFiles(10, 10);
+
+ updateLastModifiedTimestamp(0, 10, fileReferences, downloads);
+ runMaintainerAndAssertFiles(10, 10);
+ }
+
+ private void updateLastModifiedTimestamp(int startInclusive, int endExclusive, List<File> fileReferences, List<File> downloads) {
+ IntStream.range(startInclusive, endExclusive).forEach(i -> {
+ Instant instant = Instant.now().minus(keepDuration.plus(Duration.ofMinutes(1)).minus(Duration.ofSeconds(i)));
+ updateLastModifiedTimeStamp(fileReferences.get(i), instant);
+ updateLastModifiedTimeStamp(downloads.get(i), instant);
+ });
+ }
+
+ private List<File> writeFiles(int count) {
+ List<File> files = new ArrayList<>();
+ IntStream.range(0, count).forEach(i -> {
+ try {
+ files.add(writeFile(cachedFileReferences, "fileReference" + i));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ return files;
+ }
+
+ private List<File> writeDownloads(int count) {
+ List<File> files = new ArrayList<>();
+ IntStream.range(0, count).forEach(i -> {
+ try {
+ files.add(writeFile(cachedDownloads, "download" + i));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ return files;
}
private void updateLastModifiedTimeStamp(File file, Instant instant) {
@@ -55,7 +110,7 @@ public class FileReferencesAndDownloadsMaintainerTest {
}
private void runMaintainerAndAssertFiles(int fileReferenceCount, int downloadCount) {
- cachedFilesMaintainer.run();
+ maintainer.run();
File[] fileReferences = cachedFileReferences.listFiles();
assertNotNull(fileReferences);
assertEquals(fileReferenceCount, fileReferences.length);