summaryrefslogtreecommitdiffstats
path: root/config-proxy
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-08-24 15:22:56 +0200
committerHarald Musum <musum@yahooinc.com>2023-08-24 15:22:56 +0200
commit89245892826f9cd7872240116c58760e03e6e12b (patch)
tree6c01c1fc9c43a92ef9d2e1df199a0d11f0835e71 /config-proxy
parent297e0f449ab7bbfd5bc221f1571baf924cbc93cb (diff)
Use cached file if an url has already been downloaded
Stop using If-Modified-Since header when downloading, as most cloud provider solutions don't support it. Instead, always use cached download if it exists, which in the case where If-Modified-Since header is not supported will avoid downloading url for every deployment. This also means that there will be no changes to content used for an url unless the url configured in the application package is explictly changed. is explicitly changed to use another url
Diffstat (limited to 'config-proxy')
-rw-r--r--config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java2
-rw-r--r--config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java72
2 files changed, 23 insertions, 51 deletions
diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java
index eab1368a2a1..c4926fd0250 100644
--- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java
+++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/FileReferencesAndDownloadsMaintainer.java
@@ -37,7 +37,7 @@ import static java.util.logging.Level.INFO;
class FileReferencesAndDownloadsMaintainer implements Runnable {
private static final Logger log = Logger.getLogger(FileReferencesAndDownloadsMaintainer.class.getName());
- private static final File defaultUrlDownloadDir = UrlDownloadRpcServer.downloadDir;
+ private static final File defaultUrlDownloadDir = UrlDownloadRpcServer.defaultDownloadDirectory;
private static final File defaultFileReferencesDownloadDir = FileDownloader.defaultDownloadDirectory;
private static final Duration defaultDurationToKeepFiles = Duration.ofDays(30);
private static final int defaultOutdatedFilesToKeep = 20;
diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java
index 9ba3663f883..35c5bb14755 100644
--- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java
+++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java
@@ -11,12 +11,8 @@ import com.yahoo.text.Utf8;
import com.yahoo.vespa.defaults.Defaults;
import net.jpountz.xxhash.XXHashFactory;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
@@ -25,7 +21,6 @@ import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.file.Files;
import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -33,6 +28,8 @@ import java.util.logging.Logger;
import static com.yahoo.vespa.config.UrlDownloader.DOES_NOT_EXIST;
import static com.yahoo.vespa.config.UrlDownloader.HTTP_ERROR;
import static com.yahoo.vespa.config.UrlDownloader.INTERNAL_ERROR;
+import static java.lang.Runtime.getRuntime;
+import static java.util.concurrent.Executors.newFixedThreadPool;
/**
* An RPC server that handles URL download requests.
@@ -40,16 +37,17 @@ import static com.yahoo.vespa.config.UrlDownloader.INTERNAL_ERROR;
* @author lesters
*/
class UrlDownloadRpcServer {
- private final static Logger log = Logger.getLogger(UrlDownloadRpcServer.class.getName());
+ private static final Logger log = Logger.getLogger(UrlDownloadRpcServer.class.getName());
private static final String CONTENTS_FILE_NAME = "contents";
- private static final String LAST_MODIFIED_FILE_NAME = "lastmodified";
+ static final File defaultDownloadDirectory = new File(Defaults.getDefaults().underVespaHome("var/db/vespa/download"));
- static final File downloadDir = new File(Defaults.getDefaults().underVespaHome("var/db/vespa/download"));
- private final ExecutorService rpcDownloadExecutor = Executors.newFixedThreadPool(Math.max(8, Runtime.getRuntime().availableProcessors()),
- new DaemonThreadFactory("Rpc URL download executor"));
+ private final File rootDownloadDir;
+ private final ExecutorService executor = newFixedThreadPool(Math.max(8, getRuntime().availableProcessors()),
+ new DaemonThreadFactory("Rpc URL download executor"));
UrlDownloadRpcServer(Supervisor supervisor) {
+ this.rootDownloadDir = defaultDownloadDirectory;
supervisor.addMethod(new Method("url.waitFor", "s", "s", this::download)
.requireCapabilities(Capability.CONFIGPROXY__FILEDISTRIBUTION_API)
.methodDesc("get path to url download")
@@ -58,9 +56,9 @@ class UrlDownloadRpcServer {
}
void close() {
- rpcDownloadExecutor.shutdownNow();
+ executor.shutdownNow();
try {
- rpcDownloadExecutor.awaitTermination(10, TimeUnit.SECONDS);
+ executor.awaitTermination(10, TimeUnit.SECONDS);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
@@ -68,33 +66,31 @@ class UrlDownloadRpcServer {
private void download(Request req) {
req.detach();
- rpcDownloadExecutor.execute(() -> downloadFile(req));
+ executor.execute(() -> downloadFile(req));
}
private void downloadFile(Request req) {
String url = req.parameters().get(0).asString();
- File downloadDir = new File(UrlDownloadRpcServer.downloadDir, urlToDirName(url));
+ File downloadDir = new File(rootDownloadDir, urlToDirName(url));
+ if (alreadyDownloaded(downloadDir)) {
+ log.log(Level.INFO, "URL '" + url + "' already downloaded");
+ req.returnValues().add(new StringValue(new File(downloadDir, CONTENTS_FILE_NAME).getAbsolutePath()));
+ req.returnRequest();
+ return;
+ }
try {
URL website = new URL(url);
HttpURLConnection connection = (HttpURLConnection) website.openConnection();
- setIfModifiedSince(connection, downloadDir); // don't download if we already have the file
-
if (connection.getResponseCode() == 200) {
log.log(Level.INFO, "Downloading URL '" + url + "'");
downloadFile(req, connection, downloadDir);
-
- } else if (connection.getResponseCode() == 304) {
- log.log(Level.INFO, "URL '" + url + "' already downloaded (server response: 304)");
- req.returnValues().add(new StringValue(new File(downloadDir, CONTENTS_FILE_NAME).getAbsolutePath()));
-
} else {
log.log(Level.SEVERE, "Download of URL '" + url + "' got server response: " + connection.getResponseCode());
req.setError(HTTP_ERROR, String.valueOf(connection.getResponseCode()));
}
-
} catch (Throwable e) {
- log.log(Level.SEVERE, "Download of URL '" + url + "' got exception: " + e.getMessage());
+ log.log(Level.SEVERE, "Download of URL '" + url + "' failed, got exception: " + e.getMessage());
req.setError(INTERNAL_ERROR, "Download of URL '" + url + "' internal error: " + e.getMessage());
}
req.returnRequest();
@@ -110,12 +106,11 @@ class UrlDownloadRpcServer {
fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
if (contentsPath.exists() && contentsPath.length() > 0) {
- writeLastModifiedTimestamp(downloadDir, connection.getLastModified());
new RequestTracker().trackRequest(downloadDir);
req.returnValues().add(new StringValue(contentsPath.getAbsolutePath()));
log.log(Level.FINE, () -> "URL '" + url + "' available at " + contentsPath);
log.log(Level.INFO, String.format("Download of URL '%s' done in %.3f seconds",
- url, (System.currentTimeMillis() -start) / 1000.0));
+ url, (System.currentTimeMillis() - start) / 1000.0));
} else {
log.log(Level.SEVERE, "Downloaded URL '" + url + "' not found, returning error");
req.setError(DOES_NOT_EXIST, "Downloaded '" + url + "' not found");
@@ -128,32 +123,9 @@ class UrlDownloadRpcServer {
return String.valueOf(XXHashFactory.fastestJavaInstance().hash64().hash(ByteBuffer.wrap(Utf8.toBytes(uri)), 0));
}
- private static void setIfModifiedSince(HttpURLConnection connection, File downloadDir) throws IOException {
+ private static boolean alreadyDownloaded(File downloadDir) {
File contents = new File(downloadDir, CONTENTS_FILE_NAME);
- if (contents.exists() && contents.length() > 0) {
- long lastModified = readLastModifiedTimestamp(downloadDir);
- if (lastModified > 0) {
- connection.setIfModifiedSince(lastModified);
- }
- }
- }
-
- private static long readLastModifiedTimestamp(File downloadDir) throws IOException {
- File lastModified = new File(downloadDir, LAST_MODIFIED_FILE_NAME);
- if (lastModified.exists() && lastModified.length() > 0) {
- try (BufferedReader br = new BufferedReader(new FileReader(lastModified))) {
- String timestamp = br.readLine();
- return Long.parseLong(timestamp);
- }
- }
- return 0;
- }
-
- private static void writeLastModifiedTimestamp(File downloadDir, long timestamp) throws IOException {
- File lastModified = new File(downloadDir, LAST_MODIFIED_FILE_NAME);
- try (BufferedWriter lastModifiedWriter = new BufferedWriter(new FileWriter(lastModified.getAbsolutePath()))) {
- lastModifiedWriter.write(Long.toString(timestamp));
- }
+ return contents.exists() && contents.length() > 0;
}
}