summaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-11-26 16:04:20 +0100
committerHenning Baldersheim <balder@yahoo-inc.com>2022-11-26 17:26:52 +0100
commit9b11ce7f3b9080c6c3e640e694b253a1122e8bfa (patch)
treeff388da577a44e69966fd9c0d855d7acccc9bbba /vespajlib
parent46a1ed45abb3f7635069ff07d9e046406fe1062f (diff)
Move ArchiveStreamReader and its large dependency from vespajlib to application-model.
This avoids it being pulled in to the config-model-fat that make the install larger than necessary.
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/pom.xml4
-rw-r--r--vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java216
-rw-r--r--vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java131
3 files changed, 0 insertions, 351 deletions
diff --git a/vespajlib/pom.xml b/vespajlib/pom.xml
index 4c57b615c16..d903fb5ec0d 100644
--- a/vespajlib/pom.xml
+++ b/vespajlib/pom.xml
@@ -36,10 +36,6 @@
<artifactId>aircompressor</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-compress</artifactId>
- </dependency>
- <dependency>
<groupId>net.openhft</groupId>
<artifactId>zero-allocation-hashing</artifactId>
<exclusions>
diff --git a/vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java b/vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java
deleted file mode 100644
index f8faf655415..00000000000
--- a/vespajlib/src/main/java/com/yahoo/compress/ArchiveStreamReader.java
+++ /dev/null
@@ -1,216 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.compress;
-
-import com.yahoo.path.Path;
-import com.yahoo.yolean.Exceptions;
-import org.apache.commons.compress.archivers.ArchiveEntry;
-import org.apache.commons.compress.archivers.ArchiveInputStream;
-import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
-import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
-import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
-import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.UncheckedIOException;
-import java.util.Objects;
-import java.util.OptionalLong;
-import java.util.function.Predicate;
-import java.util.zip.GZIPInputStream;
-
-/**
- * Helper class for safely reading files from a compressed archive.
- *
- * @author mpolden
- */
-public class ArchiveStreamReader implements AutoCloseable {
-
- private final ArchiveInputStream archiveInputStream;
- private final Options options;
-
- private long totalRead = 0;
- private long entriesRead = 0;
-
- private ArchiveStreamReader(ArchiveInputStream archiveInputStream, Options options) {
- this.archiveInputStream = Objects.requireNonNull(archiveInputStream);
- this.options = Objects.requireNonNull(options);
- }
-
- /** Create reader for an inputStream containing a tar.gz file */
- public static ArchiveStreamReader ofTarGzip(InputStream inputStream, Options options) {
- return new ArchiveStreamReader(new TarArchiveInputStream(Exceptions.uncheck(() -> new GZIPInputStream(inputStream))), options);
- }
-
- /** Create reader for an inputStream containing a ZIP file */
- public static ArchiveStreamReader ofZip(InputStream inputStream, Options options) {
- return new ArchiveStreamReader(new ZipArchiveInputStream(inputStream), options);
- }
-
- /**
- * Read the next file in this archive and write it to given outputStream. Returns information about the read archive
- * file, or null if there are no more files to read.
- */
- public ArchiveFile readNextTo(OutputStream outputStream) {
- ArchiveEntry entry;
- try {
- while ((entry = archiveInputStream.getNextEntry()) != null) {
- Path path = Path.fromString(requireNormalized(entry.getName(), options.allowDotSegment));
- if (isSymlink(entry)) throw new IllegalArgumentException("Archive entry " + path + " is a symbolic link, which is unsupported");
- if (entry.isDirectory()) continue;
- if (!options.pathPredicate.test(path.toString())) continue;
- if (++entriesRead > options.maxEntries) throw new IllegalArgumentException("Attempted to read more entries than entry limit of " + options.maxEntries);
-
- long size = 0;
- byte[] buffer = new byte[2048];
- int read;
- while ((read = archiveInputStream.read(buffer)) != -1) {
- totalRead += read;
- size += read;
- if (totalRead > options.maxSize) throw new IllegalArgumentException("Total size of archive exceeds size limit of " + options.maxSize + " bytes");
- if (read > options.maxEntrySize) {
- if (!options.truncateEntry) throw new IllegalArgumentException("Size of entry " + path + " exceeded entry size limit of " + options.maxEntrySize + " bytes");
- } else {
- outputStream.write(buffer, 0, read);
- }
- }
- return new ArchiveFile(path, crc32(entry), size);
- }
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- return null;
- }
-
- @Override
- public void close() {
- Exceptions.uncheck(archiveInputStream::close);
- }
-
- /** Information about a file extracted from a compressed archive */
- public static class ArchiveFile {
-
- private final Path path;
- private final OptionalLong crc32;
- private final long size;
-
- public ArchiveFile(Path name, OptionalLong crc32, long size) {
- this.path = Objects.requireNonNull(name);
- this.crc32 = Objects.requireNonNull(crc32);
- if (crc32.isPresent()) {
- requireNonNegative("crc32", crc32.getAsLong());
- }
- this.size = requireNonNegative("size", size);
- }
-
- /** The path of this file inside its containing archive */
- public Path path() {
- return path;
- }
-
- /** The CRC-32 checksum of this file, if any */
- public OptionalLong crc32() {
- return crc32;
- }
-
- /** The decompressed size of this file */
- public long size() {
- return size;
- }
-
- }
-
- /** Get the CRC-32 checksum of given archive entry, if any */
- private static OptionalLong crc32(ArchiveEntry entry) {
- long crc32 = -1;
- if (entry instanceof ZipArchiveEntry) {
- crc32 = ((ZipArchiveEntry) entry).getCrc();
- }
- return crc32 > -1 ? OptionalLong.of(crc32) : OptionalLong.empty();
- }
-
- private static boolean isSymlink(ArchiveEntry entry) {
- // Symlinks inside ZIP files are not part of the ZIP spec and are only supported by some implementations, such
- // as Info-ZIP.
- //
- // Commons Compress only has limited support for symlinks as they are only detected when the ZIP file is read
- // through org.apache.commons.compress.archivers.zip.ZipFile. This is not the case in this class, because it must
- // support reading ZIP files from generic input streams. The check below thus always returns false.
- if (entry instanceof ZipArchiveEntry zipEntry) return zipEntry.isUnixSymlink();
- if (entry instanceof TarArchiveEntry tarEntry) return tarEntry.isSymbolicLink();
- throw new IllegalArgumentException("Unsupported archive entry " + entry.getClass().getSimpleName() + ", cannot check for symbolic link");
- }
-
- private static String requireNormalized(String name, boolean allowDotSegment) {
- for (var part : name.split("/")) {
- if (part.isEmpty() || (!allowDotSegment && part.equals(".")) || part.equals("..")) {
- throw new IllegalArgumentException("Unexpected non-normalized path found in zip content: '" + name + "'");
- }
- }
- return name;
- }
-
- private static long requireNonNegative(String field, long n) {
- if (n < 0) throw new IllegalArgumentException(field + " cannot be negative, got " + n);
- return n;
- }
-
- /** Options for reading entries of an archive */
- public static class Options {
-
- private long maxSize = 8 * (long) Math.pow(1024, 3); // 8 GB
- private long maxEntrySize = Long.MAX_VALUE;
- private long maxEntries = Long.MAX_VALUE;
- private boolean truncateEntry = false;
- private boolean allowDotSegment = false;
- private Predicate<String> pathPredicate = (path) -> true;
-
- private Options() {}
-
- /** Returns the standard set of read options */
- public static Options standard() {
- return new Options();
- }
-
- /** Set the maximum total size of decompressed entries. Default is 8 GB */
- public Options maxSize(long size) {
- this.maxSize = requireNonNegative("size", size);
- return this;
- }
-
- /** Set the maximum size a decompressed entry. Default is no limit */
- public Options maxEntrySize(long size) {
- this.maxEntrySize = requireNonNegative("size", size);
- return this;
- }
-
- /** Set the maximum number of entries to decompress. Default is no limit */
- public Options maxEntries(long count) {
- this.maxEntries = requireNonNegative("count", count);
- return this;
- }
-
- /**
- * Set whether to truncate the content of an entry exceeding the configured size limit, instead of throwing.
- * Default is to throw.
- */
- public Options truncateEntry(boolean truncate) {
- this.truncateEntry = truncate;
- return this;
- }
-
- /** Set a predicate that an entry path must match in order to be extracted. Default is to extract all entries */
- public Options pathPredicate(Predicate<String> predicate) {
- this.pathPredicate = predicate;
- return this;
- }
-
- /** Set whether to allow single-dot segments in entry paths. Default is false */
- public Options allowDotSegment(boolean allow) {
- this.allowDotSegment = allow;
- return this;
- }
-
- }
-
-}
diff --git a/vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java b/vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java
deleted file mode 100644
index b7f019282b7..00000000000
--- a/vespajlib/src/test/java/com/yahoo/compress/ArchiveStreamReaderTest.java
+++ /dev/null
@@ -1,131 +0,0 @@
-package com.yahoo.compress;
-
-import com.yahoo.compress.ArchiveStreamReader.Options;
-import com.yahoo.yolean.Exceptions;
-import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
-import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
-import org.junit.jupiter.api.Test;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
-
-/**
- * @author mpolden
- */
-class ArchiveStreamReaderTest {
-
- @Test
- void reading() {
- Map<String, String> zipFiles = Map.of("foo", "contents of foo",
- "bar", "contents of bar",
- "baz", "0".repeat(2049));
- Map<String, String> zipContents = new HashMap<>(zipFiles);
- zipContents.put("dir/", ""); // Directories are always ignored
- Map<String, String> extracted = readAll(zip(zipContents), Options.standard());
- assertEquals(zipFiles, extracted);
- }
-
- @Test
- void entry_size_limit() {
- Map<String, String> entries = Map.of("foo.xml", "foobar");
- Options options = Options.standard().pathPredicate("foo.xml"::equals).maxEntrySize(1);
- try {
- readAll(zip(entries), options);
- fail("Expected exception");
- } catch (IllegalArgumentException ignored) {}
-
- entries = Map.of("foo.xml", "foobar",
- "foo.jar", "0".repeat(100) // File not extracted and thus not subject to size limit
- );
- Map<String, String> extracted = readAll(zip(entries), options.maxEntrySize(10));
- assertEquals(Map.of("foo.xml", "foobar"), extracted);
- }
-
- @Test
- void size_limit() {
- Map<String, String> entries = Map.of("foo.xml", "foo", "bar.xml", "bar");
- try {
- readAll(zip(entries), Options.standard().maxSize(4));
- fail("Expected exception");
- } catch (IllegalArgumentException ignored) {}
- }
-
- @Test
- void entry_limit() {
- Map<String, String> entries = Map.of("foo.xml", "foo", "bar.xml", "bar");
- try {
- readAll(zip(entries), Options.standard().maxEntries(1));
- fail("Expected exception");
- } catch (IllegalArgumentException ignored) {}
- }
-
- @Test
- void paths() {
- Map<String, Boolean> tests = Map.of(
- "../../services.xml", true,
- "/../.././services.xml", true,
- "./application/././services.xml", true,
- "application//services.xml", true,
- "artifacts/", false, // empty dir
- "services..xml", false,
- "application/services.xml", false,
- "components/foo-bar-deploy.jar", false,
- "services.xml", false
- );
-
- Options options = Options.standard().maxEntrySize(1024);
- tests.forEach((name, expectException) -> {
- try {
- readAll(zip(Map.of(name, "foo")), options.pathPredicate(name::equals));
- assertFalse(expectException, "Expected exception for '" + name + "'");
- } catch (IllegalArgumentException ignored) {
- assertTrue(expectException, "Unexpected exception for '" + name + "'");
- }
- });
- }
-
- private static Map<String, String> readAll(InputStream inputStream, Options options) {
- ArchiveStreamReader reader = ArchiveStreamReader.ofZip(inputStream, options);
- ArchiveStreamReader.ArchiveFile file;
- Map<String, String> entries = new HashMap<>();
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- while ((file = reader.readNextTo(baos)) != null) {
- entries.put(file.path().toString(), baos.toString(StandardCharsets.UTF_8));
- baos.reset();
- }
- return entries;
- }
-
- private static InputStream zip(Map<String, String> entries) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ZipArchiveOutputStream archiveOutputStream = null;
- try {
- archiveOutputStream = new ZipArchiveOutputStream(baos);
- for (var kv : entries.entrySet()) {
- String entryName = kv.getKey();
- String contents = kv.getValue();
- ZipArchiveEntry entry = new ZipArchiveEntry(entryName);
- archiveOutputStream.putArchiveEntry(entry);
- archiveOutputStream.write(contents.getBytes(StandardCharsets.UTF_8));
- archiveOutputStream.closeArchiveEntry();
- }
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- } finally {
- if (archiveOutputStream != null) Exceptions.uncheck(archiveOutputStream::close);
- }
- return new ByteArrayInputStream(baos.toByteArray());
- }
-
-}