aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2022-04-04 09:25:52 +0200
committerMartin Polden <mpolden@mpolden.no>2022-04-04 15:26:43 +0200
commita72efb9ee5f82b57ceae028fa00841f70755a205 (patch)
treebe707fc34a4e4871698bcbe51a48677b9438a32a
parent02151a078dd5f45defaa42b19bf127bc2c999944 (diff)
Implement ArchiveStreamReader
-rw-r--r--cloud-tenant-base-dependencies-enforcer/pom.xml1
-rw-r--r--container-dev/pom.xml4
-rw-r--r--container-test/pom.xml5
-rw-r--r--vespajlib/pom.xml5
-rw-r--r--vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java194
-rw-r--r--vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java62
6 files changed, 271 insertions, 0 deletions
diff --git a/cloud-tenant-base-dependencies-enforcer/pom.xml b/cloud-tenant-base-dependencies-enforcer/pom.xml
index 21e5de31534..b5281050459 100644
--- a/cloud-tenant-base-dependencies-enforcer/pom.xml
+++ b/cloud-tenant-base-dependencies-enforcer/pom.xml
@@ -236,6 +236,7 @@
<include>org.antlr:antlr-runtime:3.5.2:jar:test</include>
<include>org.antlr:antlr4-runtime:4.9.3:jar:test</include>
<include>org.apache.commons:commons-exec:1.3:jar:test</include>
+ <include>org.apache.commons:commons-compress:1.21:jar:test</include>
<include>org.apache.commons:commons-math3:3.6.1:jar:test</include>
<include>org.apache.httpcomponents.client5:httpclient5:${httpclient5.version}:jar:test</include>
<include>org.apache.httpcomponents.core5:httpcore5:${httpclient5.version}:jar:test</include>
diff --git a/container-dev/pom.xml b/container-dev/pom.xml
index 034081f4620..6268e1e6fb4 100644
--- a/container-dev/pom.xml
+++ b/container-dev/pom.xml
@@ -121,6 +121,10 @@
<groupId>io.airlift</groupId>
<artifactId>aircompressor</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ </exclusion>
</exclusions>
</dependency>
<dependency>
diff --git a/container-test/pom.xml b/container-test/pom.xml
index 7c739faad26..d4ea39fa3c9 100644
--- a/container-test/pom.xml
+++ b/container-test/pom.xml
@@ -92,5 +92,10 @@
<artifactId>aircompressor</artifactId>
<scope>compile</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ <scope>compile</scope>
+ </dependency>
</dependencies>
</project>
diff --git a/vespajlib/pom.xml b/vespajlib/pom.xml
index b4898b14b86..ed6ae3678f4 100644
--- a/vespajlib/pom.xml
+++ b/vespajlib/pom.xml
@@ -36,6 +36,11 @@
<artifactId>aircompressor</artifactId>
<scope>compile</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ <scope>compile</scope>
+ </dependency>
<!-- provided scope -->
<dependency>
diff --git a/vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java b/vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java
new file mode 100644
index 00000000000..9f6bd5b1bb8
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java
@@ -0,0 +1,194 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.compress;
+
+import com.yahoo.path.Path;
+import com.yahoo.yolean.Exceptions;
+import org.apache.commons.compress.archivers.ArchiveEntry;
+import org.apache.commons.compress.archivers.ArchiveInputStream;
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UncheckedIOException;
+import java.util.Objects;
+import java.util.OptionalLong;
+import java.util.function.Predicate;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * Helper class for safely reading files from a compressed archive.
+ *
+ * @author mpolden
+ */
+public class ArchiveStreamReader implements AutoCloseable {
+
+ private final ArchiveInputStream archiveInputStream;
+ private final Options options;
+
+ private long totalRead = 0;
+
+ private ArchiveStreamReader(ArchiveInputStream archiveInputStream, Options options) {
+ this.archiveInputStream = Objects.requireNonNull(archiveInputStream);
+ this.options = Objects.requireNonNull(options);
+ }
+
+ /** Create reader for an inputStream containing a tar.gz file */
+ public static ArchiveStreamReader ofTarGzip(InputStream inputStream, Options options) {
+ return new ArchiveStreamReader(new TarArchiveInputStream(Exceptions.uncheck(() -> new GZIPInputStream(inputStream))), options);
+ }
+
+ /** Create reader for an inputStream containing a ZIP file */
+ public static ArchiveStreamReader ofZip(InputStream inputStream, Options options) {
+ return new ArchiveStreamReader(new ZipArchiveInputStream(inputStream), options);
+ }
+
+ /**
+ * Read the next file in this archive and write it to given outputStream. Returns information about the read archive
+ * file, or null if there are no more files to read.
+ */
+ public ArchiveFile readNextTo(OutputStream outputStream) {
+ ArchiveEntry entry;
+ try {
+ while ((entry = archiveInputStream.getNextEntry()) != null) {
+ Path path = Path.fromString(requireNormalized(entry.getName()));
+ if (isSymlink(entry)) throw new IllegalArgumentException("Archive entry " + path + " is a symbolic link, which is disallowed");
+ if (!options.pathPredicate.test(path.toString())) continue;
+
+ long size = 0;
+ byte[] buffer = new byte[2048];
+ int read;
+ while ((read = archiveInputStream.read(buffer)) != -1) {
+ totalRead += read;
+ size += read;
+ if (totalRead > options.sizeLimit) throw new IllegalArgumentException("Total size of archive exceeds size limit");
+ if (read > options.entrySizeLimit) {
+ if (!options.truncateEntry) throw new IllegalArgumentException("Size of entry " + path + " exceeded entry size limit");
+ } else {
+ outputStream.write(buffer, 0, read);
+ }
+ }
+ return new ArchiveFile(path, crc32(entry), size);
+ }
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ return null;
+ }
+
+ @Override
+ public void close() {
+ Exceptions.uncheck(archiveInputStream::close);
+ }
+
+ /** Information about a file extracted from a compressed archive */
+ public static class ArchiveFile {
+
+ private final Path path;
+ private final OptionalLong crc32;
+ private final long size;
+
+ public ArchiveFile(Path name, OptionalLong crc32, long size) {
+ this.path = Objects.requireNonNull(name);
+ this.crc32 = Objects.requireNonNull(crc32);
+ if (crc32.isPresent()) {
+ requireNonNegative("crc32", crc32.getAsLong());
+ }
+ this.size = requireNonNegative("size", size);
+ }
+
+ /** The path of this file inside its containing archive */
+ public Path path() {
+ return path;
+ }
+
+ /** The CRC-32 checksum of this file, if any */
+ public OptionalLong crc32() {
+ return crc32;
+ }
+
+ /** The decompressed size of this file */
+ public long size() {
+ return size;
+ }
+
+ private static long requireNonNegative(String field, long n) {
+ if (n < 0) throw new IllegalArgumentException(field + " cannot be negative, got " + n);
+ return n;
+ }
+
+ }
+
+ /** Get the CRC-32 checksum of given archive entry, if any */
+ private static OptionalLong crc32(ArchiveEntry entry) {
+ long crc32 = -1;
+ if (entry instanceof ZipArchiveEntry) {
+ crc32 = ((ZipArchiveEntry) entry).getCrc();
+ }
+ return crc32 > -1 ? OptionalLong.of(crc32) : OptionalLong.empty();
+ }
+
+ private static boolean isSymlink(ArchiveEntry entry) {
+ if (entry instanceof ZipArchiveEntry) return ((ZipArchiveEntry) entry).isUnixSymlink();
+ if (entry instanceof TarArchiveEntry) return ((TarArchiveEntry) entry).isSymbolicLink();
+ // TODO: Add workaround for the poor symlink handling in compress library
+ throw new IllegalArgumentException("Unsupported archive entry " + entry.getClass().getSimpleName() + ", cannot check for symbolic link");
+ }
+
+ private static String requireNormalized(String name) {
+ for (var part : name.split("/")) {
+ if (part.equals(".") || part.equals("..") || part.isEmpty()) {
+ throw new IllegalArgumentException("Unexpected non-normalized path found in zip content: '" + name + "'");
+ }
+ }
+ return name;
+ }
+
+ /** Options for reading entries of an archive */
+ public static class Options {
+
+ private long sizeLimit = 8 * (long) Math.pow(1024, 3); // 8 GB
+ private long entrySizeLimit = Long.MAX_VALUE;
+ private boolean truncateEntry = false;
+ private Predicate<String> pathPredicate = (path) -> true;
+
+ private Options() {}
+
+ /** Returns the standard set of read options */
+ public static Options standard() {
+ return new Options();
+ }
+
+ /** Set the total size limit when decompressing entries. Default is 8 GB */
+ public Options sizeLimit(long limit) {
+ this.sizeLimit = limit;
+ return this;
+ }
+
+ /** Set the size limit of a decompressed entry. Default is no limit */
+ public Options entrySizeLimit(long limit) {
+ this.entrySizeLimit = limit;
+ return this;
+ }
+
+ /**
+ * Set whether to truncate the content of an entry exceeding the configured size limit, instead of throwing.
+ * Default is to throw.
+ */
+ public Options truncateEntry(boolean truncate) {
+ this.truncateEntry = truncate;
+ return this;
+ }
+
+ /** Set a predicate that an archive file path must match in order to be extracted. Default is to extract all files */
+ public Options pathPredicate(Predicate<String> predicate) {
+ this.pathPredicate = predicate;
+ return this;
+ }
+
+ }
+
+}
diff --git a/vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java b/vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java
new file mode 100644
index 00000000000..9a292e8b3ea
--- /dev/null
+++ b/vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java
@@ -0,0 +1,62 @@
+package com.yahoo.compress;
+
+import com.yahoo.compress.ArchiveStreamReader.Options;
+import com.yahoo.yolean.Exceptions;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
+import org.junit.jupiter.api.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UncheckedIOException;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * @author mpolden
+ */
+class ArchiveStreamReaderTest {
+
+ @Test
+ void reading() {
+ Map<String, String> zipContents = Map.of("foo", "contents of foo",
+ "bar", "contents of bar",
+ "baz", "0".repeat(2049));
+ ArchiveStreamReader reader = ArchiveStreamReader.ofZip(zip(zipContents), Options.standard());
+ ArchiveStreamReader.ArchiveFile file;
+ Map<String, String> extracted = new HashMap<>();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ while ((file = reader.readNextTo(baos)) != null) {
+ extracted.put(file.path().toString(), baos.toString(StandardCharsets.UTF_8));
+ baos.reset();
+ }
+ assertEquals(zipContents, extracted);
+ }
+
+ private static InputStream zip(Map<String, String> entries) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ZipArchiveOutputStream archiveOutputStream = null;
+ try {
+ archiveOutputStream = new ZipArchiveOutputStream(baos);
+ for (var kv : entries.entrySet()) {
+ String entryName = kv.getKey();
+ String contents = kv.getValue();
+ ZipArchiveEntry entry = new ZipArchiveEntry(entryName);
+ archiveOutputStream.putArchiveEntry(entry);
+ archiveOutputStream.write(contents.getBytes(StandardCharsets.UTF_8));
+ archiveOutputStream.closeArchiveEntry();
+ }
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ } finally {
+ if (archiveOutputStream != null) Exceptions.uncheck(archiveOutputStream::close);
+ }
+ return new ByteArrayInputStream(baos.toByteArray());
+ }
+
+}