summaryrefslogtreecommitdiffstats
path: root/application-model
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-11-26 16:04:20 +0100
committerHenning Baldersheim <balder@yahoo-inc.com>2022-11-26 17:26:52 +0100
commit9b11ce7f3b9080c6c3e640e694b253a1122e8bfa (patch)
treeff388da577a44e69966fd9c0d855d7acccc9bbba /application-model
parent46a1ed45abb3f7635069ff07d9e046406fe1062f (diff)
Move ArchiveStreamReader and its large dependency from vespajlib to application-model.
This avoids it being pulled in to the config-model-fat that make the install larger than necessary.
Diffstat (limited to 'application-model')
-rw-r--r--application-model/pom.xml10
-rw-r--r--application-model/src/main/java/com/yahoo/vespa/archive/ArchiveStreamReader.java216
-rw-r--r--application-model/src/test/java/com/yahoo/vespa/archive/ArchiveStreamReaderTest.java131
3 files changed, 357 insertions, 0 deletions
diff --git a/application-model/pom.xml b/application-model/pom.xml
index 2143f3a5ffd..f81c4ea4b62 100644
--- a/application-model/pom.xml
+++ b/application-model/pom.xml
@@ -22,6 +22,11 @@
<scope>provided</scope>
</dependency>
<dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
<groupId>com.yahoo.vespa</groupId>
<artifactId>vespajlib</artifactId>
<version>${project.version}</version>
@@ -39,6 +44,11 @@
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
<plugins>
diff --git a/application-model/src/main/java/com/yahoo/vespa/archive/ArchiveStreamReader.java b/application-model/src/main/java/com/yahoo/vespa/archive/ArchiveStreamReader.java
new file mode 100644
index 00000000000..87665efc1ef
--- /dev/null
+++ b/application-model/src/main/java/com/yahoo/vespa/archive/ArchiveStreamReader.java
@@ -0,0 +1,216 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.archive;
+
+import com.yahoo.path.Path;
+import com.yahoo.yolean.Exceptions;
+import org.apache.commons.compress.archivers.ArchiveEntry;
+import org.apache.commons.compress.archivers.ArchiveInputStream;
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UncheckedIOException;
+import java.util.Objects;
+import java.util.OptionalLong;
+import java.util.function.Predicate;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * Helper class for safely reading files from a compressed archive.
+ *
+ * @author mpolden
+ */
+public class ArchiveStreamReader implements AutoCloseable {
+
+ private final ArchiveInputStream archiveInputStream;
+ private final Options options;
+
+ private long totalRead = 0;
+ private long entriesRead = 0;
+
+ private ArchiveStreamReader(ArchiveInputStream archiveInputStream, Options options) {
+ this.archiveInputStream = Objects.requireNonNull(archiveInputStream);
+ this.options = Objects.requireNonNull(options);
+ }
+
+ /** Create reader for an inputStream containing a tar.gz file */
+ public static ArchiveStreamReader ofTarGzip(InputStream inputStream, Options options) {
+ return new ArchiveStreamReader(new TarArchiveInputStream(Exceptions.uncheck(() -> new GZIPInputStream(inputStream))), options);
+ }
+
+ /** Create reader for an inputStream containing a ZIP file */
+ public static ArchiveStreamReader ofZip(InputStream inputStream, Options options) {
+ return new ArchiveStreamReader(new ZipArchiveInputStream(inputStream), options);
+ }
+
+ /**
+ * Read the next file in this archive and write it to given outputStream. Returns information about the read archive
+ * file, or null if there are no more files to read.
+ */
+ public ArchiveFile readNextTo(OutputStream outputStream) {
+ ArchiveEntry entry;
+ try {
+ while ((entry = archiveInputStream.getNextEntry()) != null) {
+ Path path = Path.fromString(requireNormalized(entry.getName(), options.allowDotSegment));
+ if (isSymlink(entry)) throw new IllegalArgumentException("Archive entry " + path + " is a symbolic link, which is unsupported");
+ if (entry.isDirectory()) continue;
+ if (!options.pathPredicate.test(path.toString())) continue;
+ if (++entriesRead > options.maxEntries) throw new IllegalArgumentException("Attempted to read more entries than entry limit of " + options.maxEntries);
+
+ long size = 0;
+ byte[] buffer = new byte[2048];
+ int read;
+ while ((read = archiveInputStream.read(buffer)) != -1) {
+ totalRead += read;
+ size += read;
+ if (totalRead > options.maxSize) throw new IllegalArgumentException("Total size of archive exceeds size limit of " + options.maxSize + " bytes");
+ if (read > options.maxEntrySize) {
+ if (!options.truncateEntry) throw new IllegalArgumentException("Size of entry " + path + " exceeded entry size limit of " + options.maxEntrySize + " bytes");
+ } else {
+ outputStream.write(buffer, 0, read);
+ }
+ }
+ return new ArchiveFile(path, crc32(entry), size);
+ }
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ return null;
+ }
+
+ @Override
+ public void close() {
+ Exceptions.uncheck(archiveInputStream::close);
+ }
+
+ /** Information about a file extracted from a compressed archive */
+ public static class ArchiveFile {
+
+ private final Path path;
+ private final OptionalLong crc32;
+ private final long size;
+
+ public ArchiveFile(Path name, OptionalLong crc32, long size) {
+ this.path = Objects.requireNonNull(name);
+ this.crc32 = Objects.requireNonNull(crc32);
+ if (crc32.isPresent()) {
+ requireNonNegative("crc32", crc32.getAsLong());
+ }
+ this.size = requireNonNegative("size", size);
+ }
+
+ /** The path of this file inside its containing archive */
+ public Path path() {
+ return path;
+ }
+
+ /** The CRC-32 checksum of this file, if any */
+ public OptionalLong crc32() {
+ return crc32;
+ }
+
+ /** The decompressed size of this file */
+ public long size() {
+ return size;
+ }
+
+ }
+
+ /** Get the CRC-32 checksum of given archive entry, if any */
+ private static OptionalLong crc32(ArchiveEntry entry) {
+ long crc32 = -1;
+ if (entry instanceof ZipArchiveEntry) {
+ crc32 = ((ZipArchiveEntry) entry).getCrc();
+ }
+ return crc32 > -1 ? OptionalLong.of(crc32) : OptionalLong.empty();
+ }
+
+ private static boolean isSymlink(ArchiveEntry entry) {
+ // Symlinks inside ZIP files are not part of the ZIP spec and are only supported by some implementations, such
+ // as Info-ZIP.
+ //
+ // Commons Compress only has limited support for symlinks as they are only detected when the ZIP file is read
+ // through org.apache.commons.compress.archivers.zip.ZipFile. This is not the case in this class, because it must
+ // support reading ZIP files from generic input streams. The check below thus always returns false.
+ if (entry instanceof ZipArchiveEntry zipEntry) return zipEntry.isUnixSymlink();
+ if (entry instanceof TarArchiveEntry tarEntry) return tarEntry.isSymbolicLink();
+ throw new IllegalArgumentException("Unsupported archive entry " + entry.getClass().getSimpleName() + ", cannot check for symbolic link");
+ }
+
+ private static String requireNormalized(String name, boolean allowDotSegment) {
+ for (var part : name.split("/")) {
+ if (part.isEmpty() || (!allowDotSegment && part.equals(".")) || part.equals("..")) {
+ throw new IllegalArgumentException("Unexpected non-normalized path found in zip content: '" + name + "'");
+ }
+ }
+ return name;
+ }
+
+ private static long requireNonNegative(String field, long n) {
+ if (n < 0) throw new IllegalArgumentException(field + " cannot be negative, got " + n);
+ return n;
+ }
+
+ /** Options for reading entries of an archive */
+ public static class Options {
+
+ private long maxSize = 8 * (long) Math.pow(1024, 3); // 8 GB
+ private long maxEntrySize = Long.MAX_VALUE;
+ private long maxEntries = Long.MAX_VALUE;
+ private boolean truncateEntry = false;
+ private boolean allowDotSegment = false;
+ private Predicate<String> pathPredicate = (path) -> true;
+
+ private Options() {}
+
+ /** Returns the standard set of read options */
+ public static Options standard() {
+ return new Options();
+ }
+
+ /** Set the maximum total size of decompressed entries. Default is 8 GB */
+ public Options maxSize(long size) {
+ this.maxSize = requireNonNegative("size", size);
+ return this;
+ }
+
+ /** Set the maximum size a decompressed entry. Default is no limit */
+ public Options maxEntrySize(long size) {
+ this.maxEntrySize = requireNonNegative("size", size);
+ return this;
+ }
+
+ /** Set the maximum number of entries to decompress. Default is no limit */
+ public Options maxEntries(long count) {
+ this.maxEntries = requireNonNegative("count", count);
+ return this;
+ }
+
+ /**
+ * Set whether to truncate the content of an entry exceeding the configured size limit, instead of throwing.
+ * Default is to throw.
+ */
+ public Options truncateEntry(boolean truncate) {
+ this.truncateEntry = truncate;
+ return this;
+ }
+
+ /** Set a predicate that an entry path must match in order to be extracted. Default is to extract all entries */
+ public Options pathPredicate(Predicate<String> predicate) {
+ this.pathPredicate = predicate;
+ return this;
+ }
+
+ /** Set whether to allow single-dot segments in entry paths. Default is false */
+ public Options allowDotSegment(boolean allow) {
+ this.allowDotSegment = allow;
+ return this;
+ }
+
+ }
+
+}
diff --git a/application-model/src/test/java/com/yahoo/vespa/archive/ArchiveStreamReaderTest.java b/application-model/src/test/java/com/yahoo/vespa/archive/ArchiveStreamReaderTest.java
new file mode 100644
index 00000000000..78ff2a805e5
--- /dev/null
+++ b/application-model/src/test/java/com/yahoo/vespa/archive/ArchiveStreamReaderTest.java
@@ -0,0 +1,131 @@
+package com.yahoo.vespa.archive;
+
+import com.yahoo.vespa.archive.ArchiveStreamReader.Options;
+import com.yahoo.yolean.Exceptions;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
+import org.junit.jupiter.api.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UncheckedIOException;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
+/**
+ * @author mpolden
+ */
+class ArchiveStreamReaderTest {
+
+ @Test
+ void reading() {
+ Map<String, String> zipFiles = Map.of("foo", "contents of foo",
+ "bar", "contents of bar",
+ "baz", "0".repeat(2049));
+ Map<String, String> zipContents = new HashMap<>(zipFiles);
+ zipContents.put("dir/", ""); // Directories are always ignored
+ Map<String, String> extracted = readAll(zip(zipContents), Options.standard());
+ assertEquals(zipFiles, extracted);
+ }
+
+ @Test
+ void entry_size_limit() {
+ Map<String, String> entries = Map.of("foo.xml", "foobar");
+ Options options = Options.standard().pathPredicate("foo.xml"::equals).maxEntrySize(1);
+ try {
+ readAll(zip(entries), options);
+ fail("Expected exception");
+ } catch (IllegalArgumentException ignored) {}
+
+ entries = Map.of("foo.xml", "foobar",
+ "foo.jar", "0".repeat(100) // File not extracted and thus not subject to size limit
+ );
+ Map<String, String> extracted = readAll(zip(entries), options.maxEntrySize(10));
+ assertEquals(Map.of("foo.xml", "foobar"), extracted);
+ }
+
+ @Test
+ void size_limit() {
+ Map<String, String> entries = Map.of("foo.xml", "foo", "bar.xml", "bar");
+ try {
+ readAll(zip(entries), Options.standard().maxSize(4));
+ fail("Expected exception");
+ } catch (IllegalArgumentException ignored) {}
+ }
+
+ @Test
+ void entry_limit() {
+ Map<String, String> entries = Map.of("foo.xml", "foo", "bar.xml", "bar");
+ try {
+ readAll(zip(entries), Options.standard().maxEntries(1));
+ fail("Expected exception");
+ } catch (IllegalArgumentException ignored) {}
+ }
+
+ @Test
+ void paths() {
+ Map<String, Boolean> tests = Map.of(
+ "../../services.xml", true,
+ "/../.././services.xml", true,
+ "./application/././services.xml", true,
+ "application//services.xml", true,
+ "artifacts/", false, // empty dir
+ "services..xml", false,
+ "application/services.xml", false,
+ "components/foo-bar-deploy.jar", false,
+ "services.xml", false
+ );
+
+ Options options = Options.standard().maxEntrySize(1024);
+ tests.forEach((name, expectException) -> {
+ try {
+ readAll(zip(Map.of(name, "foo")), options.pathPredicate(name::equals));
+ assertFalse(expectException, "Expected exception for '" + name + "'");
+ } catch (IllegalArgumentException ignored) {
+ assertTrue(expectException, "Unexpected exception for '" + name + "'");
+ }
+ });
+ }
+
+ private static Map<String, String> readAll(InputStream inputStream, Options options) {
+ ArchiveStreamReader reader = ArchiveStreamReader.ofZip(inputStream, options);
+ ArchiveStreamReader.ArchiveFile file;
+ Map<String, String> entries = new HashMap<>();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ while ((file = reader.readNextTo(baos)) != null) {
+ entries.put(file.path().toString(), baos.toString(StandardCharsets.UTF_8));
+ baos.reset();
+ }
+ return entries;
+ }
+
+ private static InputStream zip(Map<String, String> entries) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ZipArchiveOutputStream archiveOutputStream = null;
+ try {
+ archiveOutputStream = new ZipArchiveOutputStream(baos);
+ for (var kv : entries.entrySet()) {
+ String entryName = kv.getKey();
+ String contents = kv.getValue();
+ ZipArchiveEntry entry = new ZipArchiveEntry(entryName);
+ archiveOutputStream.putArchiveEntry(entry);
+ archiveOutputStream.write(contents.getBytes(StandardCharsets.UTF_8));
+ archiveOutputStream.closeArchiveEntry();
+ }
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ } finally {
+ if (archiveOutputStream != null) Exceptions.uncheck(archiveOutputStream::close);
+ }
+ return new ByteArrayInputStream(baos.toByteArray());
+ }
+
+}