aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2023-09-18 14:57:52 +0200
committerGitHub <noreply@github.com>2023-09-18 14:57:52 +0200
commitebbb044d0830c38d9ceb98425c5b666c7627d42b (patch)
tree35682d85e74d380fc05d1733a84ebaf7ed518311
parent5e5137cf0ab9cc67452f36b488394458f827b2b0 (diff)
parentdb99754f95ae0b5d400a04d84c2eb16152e80e71 (diff)
Merge pull request #28555 from vespa-engine/hmusum/support-download-of-files-from-s3
Hmusum/support download of files from s3
-rw-r--r--config-proxy/pom.xml5
-rw-r--r--config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/Downloader.java31
-rw-r--r--config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/S3Downloader.java75
-rw-r--r--config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java76
-rw-r--r--config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloader.java53
-rw-r--r--vespa-dependencies-enforcer/allowed-maven-dependencies.txt2
6 files changed, 196 insertions, 46 deletions
diff --git a/config-proxy/pom.xml b/config-proxy/pom.xml
index 2984b4b1b65..466515388d4 100644
--- a/config-proxy/pom.xml
+++ b/config-proxy/pom.xml
@@ -14,6 +14,11 @@
<version>8-SNAPSHOT</version>
<dependencies>
<dependency>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-java-sdk-s3</artifactId>
+ <version>${aws-sdk.vespa.version}</version>
+ </dependency>
+ <dependency>
<groupId>com.yahoo.vespa</groupId>
<artifactId>config-lib</artifactId>
<version>${project.version}</version>
diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/Downloader.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/Downloader.java
new file mode 100644
index 00000000000..0692d3ee499
--- /dev/null
+++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/Downloader.java
@@ -0,0 +1,31 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.config.proxy.filedistribution;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.file.Files;
+import java.util.Optional;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author hmusum
+ */
+interface Downloader {
+
+ Optional<File> downloadFile(String url, File downloadDir) throws IOException;
+
+ default String fileName() { return "contents"; }
+
+ default boolean alreadyDownloaded(Downloader downloader, File downloadDir) {
+ File contents = new File(downloadDir, downloader.fileName());
+ return contents.exists() && contents.length() > 0;
+ }
+
+}
diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/S3Downloader.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/S3Downloader.java
new file mode 100644
index 00000000000..ce79cfd5b7e
--- /dev/null
+++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/S3Downloader.java
@@ -0,0 +1,75 @@
+package com.yahoo.vespa.config.proxy.filedistribution;
+
+import com.amazonaws.auth.AWSCredentials;
+import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.auth.AWSSessionCredentials;
+import com.amazonaws.auth.BasicSessionCredentials;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.AmazonS3URI;
+import com.amazonaws.services.s3.model.S3Object;
+import com.yahoo.slime.Cursor;
+import com.yahoo.slime.Slime;
+import com.yahoo.slime.SlimeUtils;
+import com.yahoo.vespa.defaults.Defaults;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Optional;
+
+public class S3Downloader implements Downloader {
+
+ private final AmazonS3 s3Client;
+
+ S3Downloader() {
+ this.s3Client = AmazonS3ClientBuilder.standard()
+ .withRegion(System.getenv("VESPA_CLOUD_NATIVE_REGION"))
+ .withCredentials(new CredentialsProvider())
+ .build();
+ }
+
+ @Override
+ public Optional<File> downloadFile(String url, File targetDir) throws IOException {
+ AmazonS3URI s3URI = new AmazonS3URI(url);
+ S3Object s3Object = s3Client.getObject(s3URI.getBucket(), s3URI.getKey());
+ File file = new File(targetDir, fileName());
+ Files.copy(s3Object.getObjectContent(), file.toPath());
+ return Optional.of(file);
+ }
+
+ private static class CredentialsProvider implements AWSCredentialsProvider {
+
+ private static final String DEFAULT_CREDENTIALS_PATH = Defaults.getDefaults()
+ .underVespaHome("var/vespa/aws/credentials.json");
+
+ private final Path credentialsPath;
+
+ public CredentialsProvider() {
+ this.credentialsPath = Path.of(DEFAULT_CREDENTIALS_PATH);
+ }
+
+ @Override
+ public AWSCredentials getCredentials() { return readCredentials(); }
+
+ @Override
+ public void refresh() { readCredentials(); }
+
+ private AWSSessionCredentials readCredentials() {
+ try {
+ Slime slime = SlimeUtils.jsonToSlime(Files.readAllBytes(credentialsPath));
+ Cursor cursor = slime.get();
+ String accessKey = cursor.field("awsAccessKey").asString();
+ String secretKey = cursor.field("awsSecretKey").asString();
+ String sessionToken = cursor.field("sessionToken").asString();
+ return new BasicSessionCredentials(accessKey, secretKey, sessionToken);
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ }
+
+}
diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java
index 35c5bb14755..d6bc506e51c 100644
--- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java
+++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java
@@ -6,20 +6,17 @@ import com.yahoo.jrt.Method;
import com.yahoo.jrt.Request;
import com.yahoo.jrt.StringValue;
import com.yahoo.jrt.Supervisor;
+import com.yahoo.net.URI;
import com.yahoo.security.tls.Capability;
import com.yahoo.text.Utf8;
import com.yahoo.vespa.defaults.Defaults;
+import com.yahoo.yolean.Exceptions;
import net.jpountz.xxhash.XXHashFactory;
import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.net.HttpURLConnection;
-import java.net.URL;
import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.ReadableByteChannel;
import java.nio.file.Files;
+import java.util.Optional;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
@@ -30,6 +27,7 @@ import static com.yahoo.vespa.config.UrlDownloader.HTTP_ERROR;
import static com.yahoo.vespa.config.UrlDownloader.INTERNAL_ERROR;
import static java.lang.Runtime.getRuntime;
import static java.util.concurrent.Executors.newFixedThreadPool;
+import static java.util.logging.Level.WARNING;
/**
* An RPC server that handles URL download requests.
@@ -39,7 +37,6 @@ import static java.util.concurrent.Executors.newFixedThreadPool;
class UrlDownloadRpcServer {
private static final Logger log = Logger.getLogger(UrlDownloadRpcServer.class.getName());
- private static final String CONTENTS_FILE_NAME = "contents";
static final File defaultDownloadDirectory = new File(Defaults.getDefaults().underVespaHome("var/db/vespa/download"));
private final File rootDownloadDir;
@@ -58,7 +55,8 @@ class UrlDownloadRpcServer {
void close() {
executor.shutdownNow();
try {
- executor.awaitTermination(10, TimeUnit.SECONDS);
+ if ( ! executor.awaitTermination(10, TimeUnit.SECONDS))
+ log.log(WARNING, "Failed to shut down url download rpc server within timeout");
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
@@ -72,60 +70,46 @@ class UrlDownloadRpcServer {
private void downloadFile(Request req) {
String url = req.parameters().get(0).asString();
File downloadDir = new File(rootDownloadDir, urlToDirName(url));
- if (alreadyDownloaded(downloadDir)) {
+ Downloader downloader = downloader(url);
+ if (downloader.alreadyDownloaded(downloader, downloadDir)) {
log.log(Level.INFO, "URL '" + url + "' already downloaded");
- req.returnValues().add(new StringValue(new File(downloadDir, CONTENTS_FILE_NAME).getAbsolutePath()));
+ req.returnValues().add(new StringValue(new File(downloadDir, downloader.fileName()).getAbsolutePath()));
req.returnRequest();
return;
}
try {
- URL website = new URL(url);
- HttpURLConnection connection = (HttpURLConnection) website.openConnection();
- if (connection.getResponseCode() == 200) {
- log.log(Level.INFO, "Downloading URL '" + url + "'");
- downloadFile(req, connection, downloadDir);
- } else {
- log.log(Level.SEVERE, "Download of URL '" + url + "' got server response: " + connection.getResponseCode());
- req.setError(HTTP_ERROR, String.valueOf(connection.getResponseCode()));
- }
+ Files.createDirectories(downloadDir.toPath());
+ Optional<File> file = downloader.downloadFile(url, downloadDir);
+ if (file.isPresent())
+ req.returnValues().add(new StringValue(file.get().getAbsolutePath()));
+ else
+ req.setError(DOES_NOT_EXIST, "URL '" + url + "' not found");
+ } catch (RuntimeException e) {
+ logAndSetRpcError(req, url, e, HTTP_ERROR);
} catch (Throwable e) {
- log.log(Level.SEVERE, "Download of URL '" + url + "' failed, got exception: " + e.getMessage());
- req.setError(INTERNAL_ERROR, "Download of URL '" + url + "' internal error: " + e.getMessage());
+ logAndSetRpcError(req, url, e, INTERNAL_ERROR);
}
req.returnRequest();
}
- private static void downloadFile(Request req, HttpURLConnection connection, File downloadDir) throws IOException {
- long start = System.currentTimeMillis();
- String url = connection.getURL().toString();
- Files.createDirectories(downloadDir.toPath());
- File contentsPath = new File(downloadDir, CONTENTS_FILE_NAME);
- try (ReadableByteChannel rbc = Channels.newChannel(connection.getInputStream())) {
- try (FileOutputStream fos = new FileOutputStream((contentsPath.getAbsolutePath()))) {
- fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
+ private static Downloader downloader(String url) {
+ URI uri = new URI(url);
+ return switch (uri.getScheme()) {
+ case "http", "https" -> new UrlDownloader();
+ case "s3" -> new S3Downloader();
+ default -> throw new IllegalArgumentException("Unsupported scheme '" + uri.getScheme() + "'");
+ };
+ }
- if (contentsPath.exists() && contentsPath.length() > 0) {
- new RequestTracker().trackRequest(downloadDir);
- req.returnValues().add(new StringValue(contentsPath.getAbsolutePath()));
- log.log(Level.FINE, () -> "URL '" + url + "' available at " + contentsPath);
- log.log(Level.INFO, String.format("Download of URL '%s' done in %.3f seconds",
- url, (System.currentTimeMillis() - start) / 1000.0));
- } else {
- log.log(Level.SEVERE, "Downloaded URL '" + url + "' not found, returning error");
- req.setError(DOES_NOT_EXIST, "Downloaded '" + url + "' not found");
- }
- }
- }
+ private static void logAndSetRpcError(Request req, String url, Throwable e, int rpcErrorCode) {
+ String message = "Download of '" + url + "' failed: " + Exceptions.toMessageString(e);
+ log.log(Level.SEVERE, message);
+ req.setError(rpcErrorCode, e.getMessage());
}
private static String urlToDirName(String uri) {
return String.valueOf(XXHashFactory.fastestJavaInstance().hash64().hash(ByteBuffer.wrap(Utf8.toBytes(uri)), 0));
}
- private static boolean alreadyDownloaded(File downloadDir) {
- File contents = new File(downloadDir, CONTENTS_FILE_NAME);
- return contents.exists() && contents.length() > 0;
- }
-
}
diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloader.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloader.java
new file mode 100644
index 00000000000..2f52222a337
--- /dev/null
+++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloader.java
@@ -0,0 +1,53 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.config.proxy.filedistribution;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.file.Files;
+import java.util.Optional;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Download of urls
+ *
+ * @author hmusum
+ */
+class UrlDownloader implements Downloader {
+
+ private static final Logger log = Logger.getLogger(UrlDownloader.class.getName());
+ private static final String CONTENTS_FILE_NAME = "contents";
+
+ @Override
+ public Optional<File> downloadFile(String url, File downloadDir) throws IOException {
+ long start = System.currentTimeMillis();
+ HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
+ if (connection.getResponseCode() != 200)
+ throw new RuntimeException("Download of URL '" + url + "' failed, got response code " + connection.getResponseCode());
+
+ log.log(Level.INFO, "Downloading URL '" + url + "'");
+ File contentsPath = new File(downloadDir, CONTENTS_FILE_NAME);
+ try (ReadableByteChannel rbc = Channels.newChannel(connection.getInputStream())) {
+ try (FileOutputStream fos = new FileOutputStream((contentsPath.getAbsolutePath()))) {
+ fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
+
+ if (contentsPath.exists() && contentsPath.length() > 0) {
+ new RequestTracker().trackRequest(downloadDir);
+ log.log(Level.FINE, () -> "URL '" + url + "' available at " + contentsPath);
+ log.log(Level.INFO, String.format("Download of URL '%s' done in %.3f seconds",
+ url, (System.currentTimeMillis() - start) / 1000.0));
+ return Optional.of(contentsPath);
+ } else {
+ log.log(Level.SEVERE, "Downloaded URL '" + url + "' not found, returning error");
+ return Optional.empty();
+ }
+ }
+ }
+ }
+
+}
diff --git a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
index 6d25c6112c1..4da30f95a54 100644
--- a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
+++ b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
@@ -10,6 +10,8 @@ ch.qos.logback:logback-classic:1.2.10
ch.qos.logback:logback-core:1.2.10
classworlds:classworlds:1.1-alpha-2
com.amazonaws:aws-java-sdk-core:1.12.540
+com.amazonaws:aws-java-sdk-kms:1.12.540
+com.amazonaws:aws-java-sdk-s3:1.12.540
com.amazonaws:aws-java-sdk-ssm:1.12.540
com.amazonaws:aws-java-sdk-sts:1.12.540
com.amazonaws:jmespath-java:1.12.540