diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-11-26 16:04:20 +0100 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2022-11-26 17:26:52 +0100 |
commit | 9b11ce7f3b9080c6c3e640e694b253a1122e8bfa (patch) | |
tree | ff388da577a44e69966fd9c0d855d7acccc9bbba /application-model | |
parent | 46a1ed45abb3f7635069ff07d9e046406fe1062f (diff) |
Move ArchiveStreamReader and its large dependency from vespajlib to application-model.
This avoids it being pulled in to the config-model-fat that make the install larger than necessary.
Diffstat (limited to 'application-model')
3 files changed, 357 insertions, 0 deletions
diff --git a/application-model/pom.xml b/application-model/pom.xml index 2143f3a5ffd..f81c4ea4b62 100644 --- a/application-model/pom.xml +++ b/application-model/pom.xml @@ -22,6 +22,11 @@ <scope>provided</scope> </dependency> <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + <scope>provided</scope> + </dependency> + <dependency> <groupId>com.yahoo.vespa</groupId> <artifactId>vespajlib</artifactId> <version>${project.version}</version> @@ -39,6 +44,11 @@ <version>${project.version}</version> <scope>provided</scope> </dependency> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter</artifactId> + <scope>test</scope> + </dependency> </dependencies> <build> <plugins> diff --git a/application-model/src/main/java/com/yahoo/vespa/archive/ArchiveStreamReader.java b/application-model/src/main/java/com/yahoo/vespa/archive/ArchiveStreamReader.java new file mode 100644 index 00000000000..87665efc1ef --- /dev/null +++ b/application-model/src/main/java/com/yahoo/vespa/archive/ArchiveStreamReader.java @@ -0,0 +1,216 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.archive; + +import com.yahoo.path.Path; +import com.yahoo.yolean.Exceptions; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UncheckedIOException; +import java.util.Objects; +import java.util.OptionalLong; +import java.util.function.Predicate; +import java.util.zip.GZIPInputStream; + +/** + * Helper class for safely reading files from a compressed archive. + * + * @author mpolden + */ +public class ArchiveStreamReader implements AutoCloseable { + + private final ArchiveInputStream archiveInputStream; + private final Options options; + + private long totalRead = 0; + private long entriesRead = 0; + + private ArchiveStreamReader(ArchiveInputStream archiveInputStream, Options options) { + this.archiveInputStream = Objects.requireNonNull(archiveInputStream); + this.options = Objects.requireNonNull(options); + } + + /** Create reader for an inputStream containing a tar.gz file */ + public static ArchiveStreamReader ofTarGzip(InputStream inputStream, Options options) { + return new ArchiveStreamReader(new TarArchiveInputStream(Exceptions.uncheck(() -> new GZIPInputStream(inputStream))), options); + } + + /** Create reader for an inputStream containing a ZIP file */ + public static ArchiveStreamReader ofZip(InputStream inputStream, Options options) { + return new ArchiveStreamReader(new ZipArchiveInputStream(inputStream), options); + } + + /** + * Read the next file in this archive and write it to given outputStream. Returns information about the read archive + * file, or null if there are no more files to read. + */ + public ArchiveFile readNextTo(OutputStream outputStream) { + ArchiveEntry entry; + try { + while ((entry = archiveInputStream.getNextEntry()) != null) { + Path path = Path.fromString(requireNormalized(entry.getName(), options.allowDotSegment)); + if (isSymlink(entry)) throw new IllegalArgumentException("Archive entry " + path + " is a symbolic link, which is unsupported"); + if (entry.isDirectory()) continue; + if (!options.pathPredicate.test(path.toString())) continue; + if (++entriesRead > options.maxEntries) throw new IllegalArgumentException("Attempted to read more entries than entry limit of " + options.maxEntries); + + long size = 0; + byte[] buffer = new byte[2048]; + int read; + while ((read = archiveInputStream.read(buffer)) != -1) { + totalRead += read; + size += read; + if (totalRead > options.maxSize) throw new IllegalArgumentException("Total size of archive exceeds size limit of " + options.maxSize + " bytes"); + if (read > options.maxEntrySize) { + if (!options.truncateEntry) throw new IllegalArgumentException("Size of entry " + path + " exceeded entry size limit of " + options.maxEntrySize + " bytes"); + } else { + outputStream.write(buffer, 0, read); + } + } + return new ArchiveFile(path, crc32(entry), size); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + return null; + } + + @Override + public void close() { + Exceptions.uncheck(archiveInputStream::close); + } + + /** Information about a file extracted from a compressed archive */ + public static class ArchiveFile { + + private final Path path; + private final OptionalLong crc32; + private final long size; + + public ArchiveFile(Path name, OptionalLong crc32, long size) { + this.path = Objects.requireNonNull(name); + this.crc32 = Objects.requireNonNull(crc32); + if (crc32.isPresent()) { + requireNonNegative("crc32", crc32.getAsLong()); + } + this.size = requireNonNegative("size", size); + } + + /** The path of this file inside its containing archive */ + public Path path() { + return path; + } + + /** The CRC-32 checksum of this file, if any */ + public OptionalLong crc32() { + return crc32; + } + + /** The decompressed size of this file */ + public long size() { + return size; + } + + } + + /** Get the CRC-32 checksum of given archive entry, if any */ + private static OptionalLong crc32(ArchiveEntry entry) { + long crc32 = -1; + if (entry instanceof ZipArchiveEntry) { + crc32 = ((ZipArchiveEntry) entry).getCrc(); + } + return crc32 > -1 ? OptionalLong.of(crc32) : OptionalLong.empty(); + } + + private static boolean isSymlink(ArchiveEntry entry) { + // Symlinks inside ZIP files are not part of the ZIP spec and are only supported by some implementations, such + // as Info-ZIP. + // + // Commons Compress only has limited support for symlinks as they are only detected when the ZIP file is read + // through org.apache.commons.compress.archivers.zip.ZipFile. This is not the case in this class, because it must + // support reading ZIP files from generic input streams. The check below thus always returns false. + if (entry instanceof ZipArchiveEntry zipEntry) return zipEntry.isUnixSymlink(); + if (entry instanceof TarArchiveEntry tarEntry) return tarEntry.isSymbolicLink(); + throw new IllegalArgumentException("Unsupported archive entry " + entry.getClass().getSimpleName() + ", cannot check for symbolic link"); + } + + private static String requireNormalized(String name, boolean allowDotSegment) { + for (var part : name.split("/")) { + if (part.isEmpty() || (!allowDotSegment && part.equals(".")) || part.equals("..")) { + throw new IllegalArgumentException("Unexpected non-normalized path found in zip content: '" + name + "'"); + } + } + return name; + } + + private static long requireNonNegative(String field, long n) { + if (n < 0) throw new IllegalArgumentException(field + " cannot be negative, got " + n); + return n; + } + + /** Options for reading entries of an archive */ + public static class Options { + + private long maxSize = 8 * (long) Math.pow(1024, 3); // 8 GB + private long maxEntrySize = Long.MAX_VALUE; + private long maxEntries = Long.MAX_VALUE; + private boolean truncateEntry = false; + private boolean allowDotSegment = false; + private Predicate<String> pathPredicate = (path) -> true; + + private Options() {} + + /** Returns the standard set of read options */ + public static Options standard() { + return new Options(); + } + + /** Set the maximum total size of decompressed entries. Default is 8 GB */ + public Options maxSize(long size) { + this.maxSize = requireNonNegative("size", size); + return this; + } + + /** Set the maximum size a decompressed entry. Default is no limit */ + public Options maxEntrySize(long size) { + this.maxEntrySize = requireNonNegative("size", size); + return this; + } + + /** Set the maximum number of entries to decompress. Default is no limit */ + public Options maxEntries(long count) { + this.maxEntries = requireNonNegative("count", count); + return this; + } + + /** + * Set whether to truncate the content of an entry exceeding the configured size limit, instead of throwing. + * Default is to throw. + */ + public Options truncateEntry(boolean truncate) { + this.truncateEntry = truncate; + return this; + } + + /** Set a predicate that an entry path must match in order to be extracted. Default is to extract all entries */ + public Options pathPredicate(Predicate<String> predicate) { + this.pathPredicate = predicate; + return this; + } + + /** Set whether to allow single-dot segments in entry paths. Default is false */ + public Options allowDotSegment(boolean allow) { + this.allowDotSegment = allow; + return this; + } + + } + +} diff --git a/application-model/src/test/java/com/yahoo/vespa/archive/ArchiveStreamReaderTest.java b/application-model/src/test/java/com/yahoo/vespa/archive/ArchiveStreamReaderTest.java new file mode 100644 index 00000000000..78ff2a805e5 --- /dev/null +++ b/application-model/src/test/java/com/yahoo/vespa/archive/ArchiveStreamReaderTest.java @@ -0,0 +1,131 @@ +package com.yahoo.vespa.archive; + +import com.yahoo.vespa.archive.ArchiveStreamReader.Options; +import com.yahoo.yolean.Exceptions; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * @author mpolden + */ +class ArchiveStreamReaderTest { + + @Test + void reading() { + Map<String, String> zipFiles = Map.of("foo", "contents of foo", + "bar", "contents of bar", + "baz", "0".repeat(2049)); + Map<String, String> zipContents = new HashMap<>(zipFiles); + zipContents.put("dir/", ""); // Directories are always ignored + Map<String, String> extracted = readAll(zip(zipContents), Options.standard()); + assertEquals(zipFiles, extracted); + } + + @Test + void entry_size_limit() { + Map<String, String> entries = Map.of("foo.xml", "foobar"); + Options options = Options.standard().pathPredicate("foo.xml"::equals).maxEntrySize(1); + try { + readAll(zip(entries), options); + fail("Expected exception"); + } catch (IllegalArgumentException ignored) {} + + entries = Map.of("foo.xml", "foobar", + "foo.jar", "0".repeat(100) // File not extracted and thus not subject to size limit + ); + Map<String, String> extracted = readAll(zip(entries), options.maxEntrySize(10)); + assertEquals(Map.of("foo.xml", "foobar"), extracted); + } + + @Test + void size_limit() { + Map<String, String> entries = Map.of("foo.xml", "foo", "bar.xml", "bar"); + try { + readAll(zip(entries), Options.standard().maxSize(4)); + fail("Expected exception"); + } catch (IllegalArgumentException ignored) {} + } + + @Test + void entry_limit() { + Map<String, String> entries = Map.of("foo.xml", "foo", "bar.xml", "bar"); + try { + readAll(zip(entries), Options.standard().maxEntries(1)); + fail("Expected exception"); + } catch (IllegalArgumentException ignored) {} + } + + @Test + void paths() { + Map<String, Boolean> tests = Map.of( + "../../services.xml", true, + "/../.././services.xml", true, + "./application/././services.xml", true, + "application//services.xml", true, + "artifacts/", false, // empty dir + "services..xml", false, + "application/services.xml", false, + "components/foo-bar-deploy.jar", false, + "services.xml", false + ); + + Options options = Options.standard().maxEntrySize(1024); + tests.forEach((name, expectException) -> { + try { + readAll(zip(Map.of(name, "foo")), options.pathPredicate(name::equals)); + assertFalse(expectException, "Expected exception for '" + name + "'"); + } catch (IllegalArgumentException ignored) { + assertTrue(expectException, "Unexpected exception for '" + name + "'"); + } + }); + } + + private static Map<String, String> readAll(InputStream inputStream, Options options) { + ArchiveStreamReader reader = ArchiveStreamReader.ofZip(inputStream, options); + ArchiveStreamReader.ArchiveFile file; + Map<String, String> entries = new HashMap<>(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + while ((file = reader.readNextTo(baos)) != null) { + entries.put(file.path().toString(), baos.toString(StandardCharsets.UTF_8)); + baos.reset(); + } + return entries; + } + + private static InputStream zip(Map<String, String> entries) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ZipArchiveOutputStream archiveOutputStream = null; + try { + archiveOutputStream = new ZipArchiveOutputStream(baos); + for (var kv : entries.entrySet()) { + String entryName = kv.getKey(); + String contents = kv.getValue(); + ZipArchiveEntry entry = new ZipArchiveEntry(entryName); + archiveOutputStream.putArchiveEntry(entry); + archiveOutputStream.write(contents.getBytes(StandardCharsets.UTF_8)); + archiveOutputStream.closeArchiveEntry(); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } finally { + if (archiveOutputStream != null) Exceptions.uncheck(archiveOutputStream::close); + } + return new ByteArrayInputStream(baos.toByteArray()); + } + +} |