diff options
author | Harald Musum <musum@verizonmedia.com> | 2023-09-18 14:57:52 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-18 14:57:52 +0200 |
commit | ebbb044d0830c38d9ceb98425c5b666c7627d42b (patch) | |
tree | 35682d85e74d380fc05d1733a84ebaf7ed518311 | |
parent | 5e5137cf0ab9cc67452f36b488394458f827b2b0 (diff) | |
parent | db99754f95ae0b5d400a04d84c2eb16152e80e71 (diff) |
Merge pull request #28555 from vespa-engine/hmusum/support-download-of-files-from-s3
Hmusum/support download of files from s3
6 files changed, 196 insertions, 46 deletions
diff --git a/config-proxy/pom.xml b/config-proxy/pom.xml index 2984b4b1b65..466515388d4 100644 --- a/config-proxy/pom.xml +++ b/config-proxy/pom.xml @@ -14,6 +14,11 @@ <version>8-SNAPSHOT</version> <dependencies> <dependency> + <groupId>com.amazonaws</groupId> + <artifactId>aws-java-sdk-s3</artifactId> + <version>${aws-sdk.vespa.version}</version> + </dependency> + <dependency> <groupId>com.yahoo.vespa</groupId> <artifactId>config-lib</artifactId> <version>${project.version}</version> diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/Downloader.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/Downloader.java new file mode 100644 index 00000000000..0692d3ee499 --- /dev/null +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/Downloader.java @@ -0,0 +1,31 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.config.proxy.filedistribution; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.nio.file.Files; +import java.util.Optional; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * + * @author hmusum + */ +interface Downloader { + + Optional<File> downloadFile(String url, File downloadDir) throws IOException; + + default String fileName() { return "contents"; } + + default boolean alreadyDownloaded(Downloader downloader, File downloadDir) { + File contents = new File(downloadDir, downloader.fileName()); + return contents.exists() && contents.length() > 0; + } + +} diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/S3Downloader.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/S3Downloader.java new file mode 100644 index 00000000000..ce79cfd5b7e --- /dev/null +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/S3Downloader.java @@ -0,0 +1,75 @@ +package com.yahoo.vespa.config.proxy.filedistribution; + +import com.amazonaws.auth.AWSCredentials; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.AWSSessionCredentials; +import com.amazonaws.auth.BasicSessionCredentials; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.AmazonS3URI; +import com.amazonaws.services.s3.model.S3Object; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Slime; +import com.yahoo.slime.SlimeUtils; +import com.yahoo.vespa.defaults.Defaults; + +import java.io.File; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; + +public class S3Downloader implements Downloader { + + private final AmazonS3 s3Client; + + S3Downloader() { + this.s3Client = AmazonS3ClientBuilder.standard() + .withRegion(System.getenv("VESPA_CLOUD_NATIVE_REGION")) + .withCredentials(new CredentialsProvider()) + .build(); + } + + @Override + public Optional<File> downloadFile(String url, File targetDir) throws IOException { + AmazonS3URI s3URI = new AmazonS3URI(url); + S3Object s3Object = s3Client.getObject(s3URI.getBucket(), s3URI.getKey()); + File file = new File(targetDir, fileName()); + Files.copy(s3Object.getObjectContent(), file.toPath()); + return Optional.of(file); + } + + private static class CredentialsProvider implements AWSCredentialsProvider { + + private static final String DEFAULT_CREDENTIALS_PATH = Defaults.getDefaults() + .underVespaHome("var/vespa/aws/credentials.json"); + + private final Path credentialsPath; + + public CredentialsProvider() { + this.credentialsPath = Path.of(DEFAULT_CREDENTIALS_PATH); + } + + @Override + public AWSCredentials getCredentials() { return readCredentials(); } + + @Override + public void refresh() { readCredentials(); } + + private AWSSessionCredentials readCredentials() { + try { + Slime slime = SlimeUtils.jsonToSlime(Files.readAllBytes(credentialsPath)); + Cursor cursor = slime.get(); + String accessKey = cursor.field("awsAccessKey").asString(); + String secretKey = cursor.field("awsSecretKey").asString(); + String sessionToken = cursor.field("sessionToken").asString(); + return new BasicSessionCredentials(accessKey, secretKey, sessionToken); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + } + +} diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java index 35c5bb14755..d6bc506e51c 100644 --- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloadRpcServer.java @@ -6,20 +6,17 @@ import com.yahoo.jrt.Method; import com.yahoo.jrt.Request; import com.yahoo.jrt.StringValue; import com.yahoo.jrt.Supervisor; +import com.yahoo.net.URI; import com.yahoo.security.tls.Capability; import com.yahoo.text.Utf8; import com.yahoo.vespa.defaults.Defaults; +import com.yahoo.yolean.Exceptions; import net.jpountz.xxhash.XXHashFactory; import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.net.HttpURLConnection; -import java.net.URL; import java.nio.ByteBuffer; -import java.nio.channels.Channels; -import java.nio.channels.ReadableByteChannel; import java.nio.file.Files; +import java.util.Optional; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.logging.Level; @@ -30,6 +27,7 @@ import static com.yahoo.vespa.config.UrlDownloader.HTTP_ERROR; import static com.yahoo.vespa.config.UrlDownloader.INTERNAL_ERROR; import static java.lang.Runtime.getRuntime; import static java.util.concurrent.Executors.newFixedThreadPool; +import static java.util.logging.Level.WARNING; /** * An RPC server that handles URL download requests. @@ -39,7 +37,6 @@ import static java.util.concurrent.Executors.newFixedThreadPool; class UrlDownloadRpcServer { private static final Logger log = Logger.getLogger(UrlDownloadRpcServer.class.getName()); - private static final String CONTENTS_FILE_NAME = "contents"; static final File defaultDownloadDirectory = new File(Defaults.getDefaults().underVespaHome("var/db/vespa/download")); private final File rootDownloadDir; @@ -58,7 +55,8 @@ class UrlDownloadRpcServer { void close() { executor.shutdownNow(); try { - executor.awaitTermination(10, TimeUnit.SECONDS); + if ( ! executor.awaitTermination(10, TimeUnit.SECONDS)) + log.log(WARNING, "Failed to shut down url download rpc server within timeout"); } catch (InterruptedException e) { throw new RuntimeException(e); } @@ -72,60 +70,46 @@ class UrlDownloadRpcServer { private void downloadFile(Request req) { String url = req.parameters().get(0).asString(); File downloadDir = new File(rootDownloadDir, urlToDirName(url)); - if (alreadyDownloaded(downloadDir)) { + Downloader downloader = downloader(url); + if (downloader.alreadyDownloaded(downloader, downloadDir)) { log.log(Level.INFO, "URL '" + url + "' already downloaded"); - req.returnValues().add(new StringValue(new File(downloadDir, CONTENTS_FILE_NAME).getAbsolutePath())); + req.returnValues().add(new StringValue(new File(downloadDir, downloader.fileName()).getAbsolutePath())); req.returnRequest(); return; } try { - URL website = new URL(url); - HttpURLConnection connection = (HttpURLConnection) website.openConnection(); - if (connection.getResponseCode() == 200) { - log.log(Level.INFO, "Downloading URL '" + url + "'"); - downloadFile(req, connection, downloadDir); - } else { - log.log(Level.SEVERE, "Download of URL '" + url + "' got server response: " + connection.getResponseCode()); - req.setError(HTTP_ERROR, String.valueOf(connection.getResponseCode())); - } + Files.createDirectories(downloadDir.toPath()); + Optional<File> file = downloader.downloadFile(url, downloadDir); + if (file.isPresent()) + req.returnValues().add(new StringValue(file.get().getAbsolutePath())); + else + req.setError(DOES_NOT_EXIST, "URL '" + url + "' not found"); + } catch (RuntimeException e) { + logAndSetRpcError(req, url, e, HTTP_ERROR); } catch (Throwable e) { - log.log(Level.SEVERE, "Download of URL '" + url + "' failed, got exception: " + e.getMessage()); - req.setError(INTERNAL_ERROR, "Download of URL '" + url + "' internal error: " + e.getMessage()); + logAndSetRpcError(req, url, e, INTERNAL_ERROR); } req.returnRequest(); } - private static void downloadFile(Request req, HttpURLConnection connection, File downloadDir) throws IOException { - long start = System.currentTimeMillis(); - String url = connection.getURL().toString(); - Files.createDirectories(downloadDir.toPath()); - File contentsPath = new File(downloadDir, CONTENTS_FILE_NAME); - try (ReadableByteChannel rbc = Channels.newChannel(connection.getInputStream())) { - try (FileOutputStream fos = new FileOutputStream((contentsPath.getAbsolutePath()))) { - fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); + private static Downloader downloader(String url) { + URI uri = new URI(url); + return switch (uri.getScheme()) { + case "http", "https" -> new UrlDownloader(); + case "s3" -> new S3Downloader(); + default -> throw new IllegalArgumentException("Unsupported scheme '" + uri.getScheme() + "'"); + }; + } - if (contentsPath.exists() && contentsPath.length() > 0) { - new RequestTracker().trackRequest(downloadDir); - req.returnValues().add(new StringValue(contentsPath.getAbsolutePath())); - log.log(Level.FINE, () -> "URL '" + url + "' available at " + contentsPath); - log.log(Level.INFO, String.format("Download of URL '%s' done in %.3f seconds", - url, (System.currentTimeMillis() - start) / 1000.0)); - } else { - log.log(Level.SEVERE, "Downloaded URL '" + url + "' not found, returning error"); - req.setError(DOES_NOT_EXIST, "Downloaded '" + url + "' not found"); - } - } - } + private static void logAndSetRpcError(Request req, String url, Throwable e, int rpcErrorCode) { + String message = "Download of '" + url + "' failed: " + Exceptions.toMessageString(e); + log.log(Level.SEVERE, message); + req.setError(rpcErrorCode, e.getMessage()); } private static String urlToDirName(String uri) { return String.valueOf(XXHashFactory.fastestJavaInstance().hash64().hash(ByteBuffer.wrap(Utf8.toBytes(uri)), 0)); } - private static boolean alreadyDownloaded(File downloadDir) { - File contents = new File(downloadDir, CONTENTS_FILE_NAME); - return contents.exists() && contents.length() > 0; - } - } diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloader.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloader.java new file mode 100644 index 00000000000..2f52222a337 --- /dev/null +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/filedistribution/UrlDownloader.java @@ -0,0 +1,53 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.config.proxy.filedistribution; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.nio.file.Files; +import java.util.Optional; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Download of urls + * + * @author hmusum + */ +class UrlDownloader implements Downloader { + + private static final Logger log = Logger.getLogger(UrlDownloader.class.getName()); + private static final String CONTENTS_FILE_NAME = "contents"; + + @Override + public Optional<File> downloadFile(String url, File downloadDir) throws IOException { + long start = System.currentTimeMillis(); + HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection(); + if (connection.getResponseCode() != 200) + throw new RuntimeException("Download of URL '" + url + "' failed, got response code " + connection.getResponseCode()); + + log.log(Level.INFO, "Downloading URL '" + url + "'"); + File contentsPath = new File(downloadDir, CONTENTS_FILE_NAME); + try (ReadableByteChannel rbc = Channels.newChannel(connection.getInputStream())) { + try (FileOutputStream fos = new FileOutputStream((contentsPath.getAbsolutePath()))) { + fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); + + if (contentsPath.exists() && contentsPath.length() > 0) { + new RequestTracker().trackRequest(downloadDir); + log.log(Level.FINE, () -> "URL '" + url + "' available at " + contentsPath); + log.log(Level.INFO, String.format("Download of URL '%s' done in %.3f seconds", + url, (System.currentTimeMillis() - start) / 1000.0)); + return Optional.of(contentsPath); + } else { + log.log(Level.SEVERE, "Downloaded URL '" + url + "' not found, returning error"); + return Optional.empty(); + } + } + } + } + +} diff --git a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt index 6d25c6112c1..4da30f95a54 100644 --- a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt +++ b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt @@ -10,6 +10,8 @@ ch.qos.logback:logback-classic:1.2.10 ch.qos.logback:logback-core:1.2.10 classworlds:classworlds:1.1-alpha-2 com.amazonaws:aws-java-sdk-core:1.12.540 +com.amazonaws:aws-java-sdk-kms:1.12.540 +com.amazonaws:aws-java-sdk-s3:1.12.540 com.amazonaws:aws-java-sdk-ssm:1.12.540 com.amazonaws:aws-java-sdk-sts:1.12.540 com.amazonaws:jmespath-java:1.12.540 |