From 9b11ce7f3b9080c6c3e640e694b253a1122e8bfa Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Sat, 26 Nov 2022 16:04:20 +0100 Subject: Move ArchiveStreamReader and its large dependency from vespajlib to application-model. This avoids it being pulled in to the config-model-fat that make the install larger than necessary. --- vespajlib/pom.xml | 4 - .../com/yahoo/compress/ArchiveStreamReader.java | 216 --------------------- .../yahoo/compress/ArchiveStreamReaderTest.java | 131 ------------- 3 files changed, 351 deletions(-) delete mode 100644 vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java delete mode 100644 vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java (limited to 'vespajlib') diff --git a/vespajlib/pom.xml b/vespajlib/pom.xml index 4c57b615c16..d903fb5ec0d 100644 --- a/vespajlib/pom.xml +++ b/vespajlib/pom.xml @@ -35,10 +35,6 @@ io.airlift aircompressor - - org.apache.commons - commons-compress - net.openhft zero-allocation-hashing diff --git a/vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java b/vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java deleted file mode 100644 index f8faf655415..00000000000 --- a/vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.compress; - -import com.yahoo.path.Path; -import com.yahoo.yolean.Exceptions; -import org.apache.commons.compress.archivers.ArchiveEntry; -import org.apache.commons.compress.archivers.ArchiveInputStream; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.UncheckedIOException; -import java.util.Objects; -import java.util.OptionalLong; -import java.util.function.Predicate; -import java.util.zip.GZIPInputStream; - -/** - * Helper class for safely reading files from a compressed archive. - * - * @author mpolden - */ -public class ArchiveStreamReader implements AutoCloseable { - - private final ArchiveInputStream archiveInputStream; - private final Options options; - - private long totalRead = 0; - private long entriesRead = 0; - - private ArchiveStreamReader(ArchiveInputStream archiveInputStream, Options options) { - this.archiveInputStream = Objects.requireNonNull(archiveInputStream); - this.options = Objects.requireNonNull(options); - } - - /** Create reader for an inputStream containing a tar.gz file */ - public static ArchiveStreamReader ofTarGzip(InputStream inputStream, Options options) { - return new ArchiveStreamReader(new TarArchiveInputStream(Exceptions.uncheck(() -> new GZIPInputStream(inputStream))), options); - } - - /** Create reader for an inputStream containing a ZIP file */ - public static ArchiveStreamReader ofZip(InputStream inputStream, Options options) { - return new ArchiveStreamReader(new ZipArchiveInputStream(inputStream), options); - } - - /** - * Read the next file in this archive and write it to given outputStream. Returns information about the read archive - * file, or null if there are no more files to read. - */ - public ArchiveFile readNextTo(OutputStream outputStream) { - ArchiveEntry entry; - try { - while ((entry = archiveInputStream.getNextEntry()) != null) { - Path path = Path.fromString(requireNormalized(entry.getName(), options.allowDotSegment)); - if (isSymlink(entry)) throw new IllegalArgumentException("Archive entry " + path + " is a symbolic link, which is unsupported"); - if (entry.isDirectory()) continue; - if (!options.pathPredicate.test(path.toString())) continue; - if (++entriesRead > options.maxEntries) throw new IllegalArgumentException("Attempted to read more entries than entry limit of " + options.maxEntries); - - long size = 0; - byte[] buffer = new byte[2048]; - int read; - while ((read = archiveInputStream.read(buffer)) != -1) { - totalRead += read; - size += read; - if (totalRead > options.maxSize) throw new IllegalArgumentException("Total size of archive exceeds size limit of " + options.maxSize + " bytes"); - if (read > options.maxEntrySize) { - if (!options.truncateEntry) throw new IllegalArgumentException("Size of entry " + path + " exceeded entry size limit of " + options.maxEntrySize + " bytes"); - } else { - outputStream.write(buffer, 0, read); - } - } - return new ArchiveFile(path, crc32(entry), size); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - return null; - } - - @Override - public void close() { - Exceptions.uncheck(archiveInputStream::close); - } - - /** Information about a file extracted from a compressed archive */ - public static class ArchiveFile { - - private final Path path; - private final OptionalLong crc32; - private final long size; - - public ArchiveFile(Path name, OptionalLong crc32, long size) { - this.path = Objects.requireNonNull(name); - this.crc32 = Objects.requireNonNull(crc32); - if (crc32.isPresent()) { - requireNonNegative("crc32", crc32.getAsLong()); - } - this.size = requireNonNegative("size", size); - } - - /** The path of this file inside its containing archive */ - public Path path() { - return path; - } - - /** The CRC-32 checksum of this file, if any */ - public OptionalLong crc32() { - return crc32; - } - - /** The decompressed size of this file */ - public long size() { - return size; - } - - } - - /** Get the CRC-32 checksum of given archive entry, if any */ - private static OptionalLong crc32(ArchiveEntry entry) { - long crc32 = -1; - if (entry instanceof ZipArchiveEntry) { - crc32 = ((ZipArchiveEntry) entry).getCrc(); - } - return crc32 > -1 ? OptionalLong.of(crc32) : OptionalLong.empty(); - } - - private static boolean isSymlink(ArchiveEntry entry) { - // Symlinks inside ZIP files are not part of the ZIP spec and are only supported by some implementations, such - // as Info-ZIP. - // - // Commons Compress only has limited support for symlinks as they are only detected when the ZIP file is read - // through org.apache.commons.compress.archivers.zip.ZipFile. This is not the case in this class, because it must - // support reading ZIP files from generic input streams. The check below thus always returns false. - if (entry instanceof ZipArchiveEntry zipEntry) return zipEntry.isUnixSymlink(); - if (entry instanceof TarArchiveEntry tarEntry) return tarEntry.isSymbolicLink(); - throw new IllegalArgumentException("Unsupported archive entry " + entry.getClass().getSimpleName() + ", cannot check for symbolic link"); - } - - private static String requireNormalized(String name, boolean allowDotSegment) { - for (var part : name.split("/")) { - if (part.isEmpty() || (!allowDotSegment && part.equals(".")) || part.equals("..")) { - throw new IllegalArgumentException("Unexpected non-normalized path found in zip content: '" + name + "'"); - } - } - return name; - } - - private static long requireNonNegative(String field, long n) { - if (n < 0) throw new IllegalArgumentException(field + " cannot be negative, got " + n); - return n; - } - - /** Options for reading entries of an archive */ - public static class Options { - - private long maxSize = 8 * (long) Math.pow(1024, 3); // 8 GB - private long maxEntrySize = Long.MAX_VALUE; - private long maxEntries = Long.MAX_VALUE; - private boolean truncateEntry = false; - private boolean allowDotSegment = false; - private Predicate pathPredicate = (path) -> true; - - private Options() {} - - /** Returns the standard set of read options */ - public static Options standard() { - return new Options(); - } - - /** Set the maximum total size of decompressed entries. Default is 8 GB */ - public Options maxSize(long size) { - this.maxSize = requireNonNegative("size", size); - return this; - } - - /** Set the maximum size a decompressed entry. Default is no limit */ - public Options maxEntrySize(long size) { - this.maxEntrySize = requireNonNegative("size", size); - return this; - } - - /** Set the maximum number of entries to decompress. Default is no limit */ - public Options maxEntries(long count) { - this.maxEntries = requireNonNegative("count", count); - return this; - } - - /** - * Set whether to truncate the content of an entry exceeding the configured size limit, instead of throwing. - * Default is to throw. - */ - public Options truncateEntry(boolean truncate) { - this.truncateEntry = truncate; - return this; - } - - /** Set a predicate that an entry path must match in order to be extracted. Default is to extract all entries */ - public Options pathPredicate(Predicate predicate) { - this.pathPredicate = predicate; - return this; - } - - /** Set whether to allow single-dot segments in entry paths. Default is false */ - public Options allowDotSegment(boolean allow) { - this.allowDotSegment = allow; - return this; - } - - } - -} diff --git a/vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java b/vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java deleted file mode 100644 index b7f019282b7..00000000000 --- a/vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java +++ /dev/null @@ -1,131 +0,0 @@ -package com.yahoo.compress; - -import com.yahoo.compress.ArchiveStreamReader.Options; -import com.yahoo.yolean.Exceptions; -import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; -import org.junit.jupiter.api.Test; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.UncheckedIOException; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -/** - * @author mpolden - */ -class ArchiveStreamReaderTest { - - @Test - void reading() { - Map zipFiles = Map.of("foo", "contents of foo", - "bar", "contents of bar", - "baz", "0".repeat(2049)); - Map zipContents = new HashMap<>(zipFiles); - zipContents.put("dir/", ""); // Directories are always ignored - Map extracted = readAll(zip(zipContents), Options.standard()); - assertEquals(zipFiles, extracted); - } - - @Test - void entry_size_limit() { - Map entries = Map.of("foo.xml", "foobar"); - Options options = Options.standard().pathPredicate("foo.xml"::equals).maxEntrySize(1); - try { - readAll(zip(entries), options); - fail("Expected exception"); - } catch (IllegalArgumentException ignored) {} - - entries = Map.of("foo.xml", "foobar", - "foo.jar", "0".repeat(100) // File not extracted and thus not subject to size limit - ); - Map extracted = readAll(zip(entries), options.maxEntrySize(10)); - assertEquals(Map.of("foo.xml", "foobar"), extracted); - } - - @Test - void size_limit() { - Map entries = Map.of("foo.xml", "foo", "bar.xml", "bar"); - try { - readAll(zip(entries), Options.standard().maxSize(4)); - fail("Expected exception"); - } catch (IllegalArgumentException ignored) {} - } - - @Test - void entry_limit() { - Map entries = Map.of("foo.xml", "foo", "bar.xml", "bar"); - try { - readAll(zip(entries), Options.standard().maxEntries(1)); - fail("Expected exception"); - } catch (IllegalArgumentException ignored) {} - } - - @Test - void paths() { - Map tests = Map.of( - "../../services.xml", true, - "/../.././services.xml", true, - "./application/././services.xml", true, - "application//services.xml", true, - "artifacts/", false, // empty dir - "services..xml", false, - "application/services.xml", false, - "components/foo-bar-deploy.jar", false, - "services.xml", false - ); - - Options options = Options.standard().maxEntrySize(1024); - tests.forEach((name, expectException) -> { - try { - readAll(zip(Map.of(name, "foo")), options.pathPredicate(name::equals)); - assertFalse(expectException, "Expected exception for '" + name + "'"); - } catch (IllegalArgumentException ignored) { - assertTrue(expectException, "Unexpected exception for '" + name + "'"); - } - }); - } - - private static Map readAll(InputStream inputStream, Options options) { - ArchiveStreamReader reader = ArchiveStreamReader.ofZip(inputStream, options); - ArchiveStreamReader.ArchiveFile file; - Map entries = new HashMap<>(); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - while ((file = reader.readNextTo(baos)) != null) { - entries.put(file.path().toString(), baos.toString(StandardCharsets.UTF_8)); - baos.reset(); - } - return entries; - } - - private static InputStream zip(Map entries) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ZipArchiveOutputStream archiveOutputStream = null; - try { - archiveOutputStream = new ZipArchiveOutputStream(baos); - for (var kv : entries.entrySet()) { - String entryName = kv.getKey(); - String contents = kv.getValue(); - ZipArchiveEntry entry = new ZipArchiveEntry(entryName); - archiveOutputStream.putArchiveEntry(entry); - archiveOutputStream.write(contents.getBytes(StandardCharsets.UTF_8)); - archiveOutputStream.closeArchiveEntry(); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } finally { - if (archiveOutputStream != null) Exceptions.uncheck(archiveOutputStream::close); - } - return new ByteArrayInputStream(baos.toByteArray()); - } - -} -- cgit v1.2.3