diff options
author | Harald Musum <musum@verizonmedia.com> | 2019-07-10 15:18:48 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-10 15:18:48 +0200 |
commit | 6a7d07037ce40a637441078afe6951c00d189e7b (patch) | |
tree | 34b39c17fb3d59f829affda32280290574ba286f | |
parent | 9eadbd143c75a5f10b9429deb1ed0d0abc55ff13 (diff) | |
parent | 305addd80363cc103a230e5db96c5a7598d198a6 (diff) |
Merge pull request #10010 from vespa-engine/hmusum/add-maintainer-for-cleanup-of-files-and-downloads
Add maintainer for deleting unused file references and downloads
5 files changed, 188 insertions, 8 deletions
diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainer.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainer.java new file mode 100644 index 00000000000..eec045cdb0a --- /dev/null +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainer.java @@ -0,0 +1,92 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.config.proxy.filedistribution; + +import com.yahoo.io.IOUtils; +import com.yahoo.log.LogLevel; +import com.yahoo.vespa.filedistribution.FileDownloader; + +import java.io.File; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.attribute.BasicFileAttributes; +import java.time.Duration; +import java.time.Instant; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static java.nio.file.Files.readAttributes; + +/** + * Deletes cached file references and url downloads that have not been used for some time + * + * @author hmusum + */ +class CachedFilesMaintainer implements Runnable { + + private final static Logger log = Logger.getLogger(CachedFilesMaintainer.class.getName()); + + private static final File defaultUrlDownloadDir = UrlDownloadRpcServer.downloadDir; + private static final File defaultFileReferencesDownloadDir = FileDownloader.defaultDownloadDirectory; + private static final Duration defaultDurationToKeepFiles = Duration.ofDays(30); + + private final File urlDownloadDir; + private final File fileReferencesDownloadDir; + private final Duration durationToKeepFiles; + + CachedFilesMaintainer() { + this(defaultFileReferencesDownloadDir, defaultUrlDownloadDir, defaultDurationToKeepFiles); + } + + CachedFilesMaintainer(File fileReferencesDownloadDir, File urlDownloadDir, Duration durationToKeepFiles) { + this.fileReferencesDownloadDir = fileReferencesDownloadDir; + this.urlDownloadDir = urlDownloadDir; + this.durationToKeepFiles = durationToKeepFiles; + } + + @Override + public void run() { + try { + deleteUnusedFiles(fileReferencesDownloadDir); + deleteUnusedFiles(urlDownloadDir); + } catch (Throwable t) { + log.log(Level.WARNING, "Deleting unused files failed. ", t); + } + } + + private void deleteUnusedFiles(File directory) { + Instant deleteNotUsedSinceInstant = Instant.now().minus(durationToKeepFiles); + Set<String> filesOnDisk = new HashSet<>(); + File[] files = directory.listFiles(); + if (files != null) + filesOnDisk.addAll(Arrays.stream(files).map(File::getName).collect(Collectors.toSet())); + log.log(LogLevel.DEBUG, "Files on disk (in " + directory + "): " + filesOnDisk); + + Set<String> filesToDelete = filesOnDisk + .stream() + .filter(fileReference -> isFileLastModifiedBefore(new File(directory, fileReference), deleteNotUsedSinceInstant)) + .collect(Collectors.toSet()); + if (filesToDelete.size() > 0) { + log.log(LogLevel.INFO, "Files that can be deleted in " + directory + " (not used since " + deleteNotUsedSinceInstant + "): " + filesToDelete); + filesToDelete.forEach(fileReference -> { + File file = new File(directory, fileReference); + if (!IOUtils.recursiveDeleteDir(file)) + log.log(LogLevel.WARNING, "Could not delete " + file.getAbsolutePath()); + }); + } + } + + private boolean isFileLastModifiedBefore(File fileReference, Instant instant) { + BasicFileAttributes fileAttributes; + try { + fileAttributes = readAttributes(fileReference.toPath(), BasicFileAttributes.class); + return fileAttributes.lastModifiedTime().toInstant().isBefore(instant); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + +} diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileDistributionAndUrlDownload.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileDistributionAndUrlDownload.java index 0b7de6ed562..2767d2c8027 100644 --- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileDistributionAndUrlDownload.java +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileDistributionAndUrlDownload.java @@ -1,11 +1,17 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.config.proxy.filedistribution; +import com.yahoo.concurrent.DaemonThreadFactory; import com.yahoo.config.subscription.ConfigSourceSet; import com.yahoo.jrt.Supervisor; import com.yahoo.vespa.config.JRTConnectionPool; import com.yahoo.vespa.filedistribution.FileDownloader; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + /** * Keeps track of file distribution and url download rpc servers. * @@ -13,17 +19,28 @@ import com.yahoo.vespa.filedistribution.FileDownloader; */ public class FileDistributionAndUrlDownload { + private static final Duration delay = Duration.ofMinutes(1); private final FileDistributionRpcServer fileDistributionRpcServer; private final UrlDownloadRpcServer urlDownloadRpcServer; + private final ScheduledExecutorService cleanupExecutor = + new ScheduledThreadPoolExecutor(1, new DaemonThreadFactory("file references and downloads cleanup")); public FileDistributionAndUrlDownload(Supervisor supervisor, ConfigSourceSet source) { fileDistributionRpcServer = new FileDistributionRpcServer(supervisor, new FileDownloader(new JRTConnectionPool(source))); urlDownloadRpcServer = new UrlDownloadRpcServer(supervisor); + cleanupExecutor.scheduleAtFixedRate(new CachedFilesMaintainer(), delay.toSeconds(), delay.toSeconds(), TimeUnit.SECONDS); } public void close() { fileDistributionRpcServer.close(); urlDownloadRpcServer.close(); + cleanupExecutor.shutdownNow(); + try { + if ( ! cleanupExecutor.awaitTermination(10, TimeUnit.SECONDS)) + throw new RuntimeException("Unable to shutdown " + cleanupExecutor + " before timeout"); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } } diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java index cdf079631fe..592f5211eed 100644 --- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java @@ -44,7 +44,7 @@ class UrlDownloadRpcServer { private static final String CONTENTS_FILE_NAME = "contents"; private static final String LAST_MODIFIED_FILE_NAME = "lastmodified"; - private final File downloadBaseDir; + static final File downloadDir = new File(Defaults.getDefaults().underVespaHome("var/db/vespa/download")); private final ExecutorService rpcDownloadExecutor = Executors.newFixedThreadPool(Math.max(8, Runtime.getRuntime().availableProcessors()), new DaemonThreadFactory("Rpc URL download executor")); @@ -53,7 +53,6 @@ class UrlDownloadRpcServer { .methodDesc("get path to url download") .paramDesc(0, "url", "url") .returnDesc(0, "path", "path to file")); - downloadBaseDir = new File(Defaults.getDefaults().underVespaHome("var/db/vespa/download")); } void close() { @@ -72,7 +71,7 @@ class UrlDownloadRpcServer { private void downloadFile(Request req) { String url = req.parameters().get(0).asString(); - File downloadDir = new File(this.downloadBaseDir, urlToDirName(url)); + File downloadDir = new File(UrlDownloadRpcServer.downloadDir, urlToDirName(url)); try { URL website = new URL(url); diff --git a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainerTest.java b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainerTest.java new file mode 100644 index 00000000000..4ac48d23e18 --- /dev/null +++ b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/filedistribution/CachedFilesMaintainerTest.java @@ -0,0 +1,75 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.config.proxy.filedistribution; + +import com.yahoo.io.IOUtils; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +/** + * @author hmusum + */ +public class CachedFilesMaintainerTest { + + private File cachedFileReferences; + private File cachedDownloads; + private CachedFilesMaintainer cachedFilesMaintainer; + + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); + + @Before + public void setup() throws IOException { + cachedFileReferences = tempFolder.newFolder(); + cachedDownloads = tempFolder.newFolder(); + cachedFilesMaintainer = new CachedFilesMaintainer(cachedFileReferences, cachedDownloads, Duration.ofMinutes(1)); + } + + @Test + public void require_old_files_to_be_deleted() throws IOException { + runMaintainerAndAssertFiles(0, 0); + + File fileReference = writeFile(cachedFileReferences, "fileReference"); + File download = writeFile(cachedDownloads, "download"); + runMaintainerAndAssertFiles(1, 1); + + updateLastModifiedTimeStamp(fileReference, Instant.now().minus(Duration.ofMinutes(10))); + runMaintainerAndAssertFiles(0, 1); + + updateLastModifiedTimeStamp(download, Instant.now().minus(Duration.ofMinutes(10))); + runMaintainerAndAssertFiles(0, 0); + } + + private void updateLastModifiedTimeStamp(File file, Instant instant) { + if (!file.setLastModified(instant.toEpochMilli())) { + throw new RuntimeException("Could not set last modified timestamp for '" + file.getAbsolutePath() + "'"); + } + } + + private void runMaintainerAndAssertFiles(int fileReferenceCount, int downloadCount) { + cachedFilesMaintainer.run(); + File[] fileReferences = cachedFileReferences.listFiles(); + assertNotNull(fileReferences); + assertEquals(fileReferenceCount, fileReferences.length); + + File[] downloads = cachedDownloads.listFiles(); + assertNotNull(downloads); + assertEquals(downloadCount, downloads.length); + } + + private File writeFile(File directory, String filename) throws IOException { + File file = new File(directory, filename); + IOUtils.writeFile(file, filename, false); + return file; + } + +} diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileDownloader.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileDownloader.java index 462dc1d4700..1a84e4895e8 100644 --- a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileDownloader.java +++ b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileDownloader.java @@ -27,17 +27,14 @@ import java.util.logging.Logger; public class FileDownloader { private final static Logger log = Logger.getLogger(FileDownloader.class.getName()); + public static File defaultDownloadDirectory = new File(Defaults.getDefaults().underVespaHome("var/db/vespa/filedistribution")); private final File downloadDirectory; private final Duration timeout; private final FileReferenceDownloader fileReferenceDownloader; public FileDownloader(ConnectionPool connectionPool) { - this(connectionPool, - new File(Defaults.getDefaults().underVespaHome("var/db/vespa/filedistribution")), - new File(Defaults.getDefaults().underVespaHome("var/db/vespa/filedistribution")), - Duration.ofMinutes(15), - Duration.ofSeconds(10)); + this(connectionPool, defaultDownloadDirectory , defaultDownloadDirectory , Duration.ofMinutes(15), Duration.ofSeconds(10)); } FileDownloader(ConnectionPool connectionPool, File downloadDirectory, File tmpDirectory, Duration timeout, Duration sleepBetweenRetries) { |