summaryrefslogtreecommitdiffstats
path: root/airlift-zstd/src
diff options
context:
space:
mode:
authorArne Juul <arnej@yahooinc.com>2023-01-04 11:24:33 +0000
committerArne Juul <arnej@yahooinc.com>2023-01-04 11:38:01 +0000
commitf3054672426ebe077e56c750f1a55bb02e91db5f (patch)
treefb3e30d80e968cb77e9011343479906fa4a6ef54 /airlift-zstd/src
parent5267d4415bdc912abc550d50384578122e8598f3 (diff)
copy (zstd only) unit tests from airlift repository
Diffstat (limited to 'airlift-zstd/src')
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/compress/AbstractTestCompression.java626
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/compress/ByteArrayOutputStream.java65
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/compress/HadoopCodecCompressor.java65
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/compress/HadoopCodecDecompressor.java65
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/compress/TestingModule.java55
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/compress/Util.java41
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/compress/benchmark/DataSet.java129
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/compress/thirdparty/ZstdJniCompressor.java48
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/compress/thirdparty/ZstdJniDecompressor.java38
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestCompressor.java97
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestXxHash64.java83
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestZstd.java201
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestZstdInputStream.java242
-rw-r--r--airlift-zstd/src/test/java/ai/vespa/airlift/zstd/ZstdCat.java44
-rw-r--r--airlift-zstd/src/test/resources/data/zstd/bad-second-frame.zstbin0 -> 8152 bytes
-rw-r--r--airlift-zstd/src/test/resources/data/zstd/incompressiblebin0 -> 400 bytes
-rw-r--r--airlift-zstd/src/test/resources/data/zstd/large-rle1
-rw-r--r--airlift-zstd/src/test/resources/data/zstd/multiple-frames406
-rw-r--r--airlift-zstd/src/test/resources/data/zstd/multiple-frames.zstbin0 -> 8152 bytes
-rw-r--r--airlift-zstd/src/test/resources/data/zstd/offset-before-start.zstbin0 -> 1559 bytes
-rw-r--r--airlift-zstd/src/test/resources/data/zstd/small-literals-after-incompressible-literalsbin0 -> 761973 bytes
-rw-r--r--airlift-zstd/src/test/resources/data/zstd/with-checksum203
-rw-r--r--airlift-zstd/src/test/resources/data/zstd/with-checksum.zstbin0 -> 4076 bytes
23 files changed, 2409 insertions, 0 deletions
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/compress/AbstractTestCompression.java b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/AbstractTestCompression.java
new file mode 100644
index 00000000000..c8d530bc238
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/AbstractTestCompression.java
@@ -0,0 +1,626 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.compress;
+
+import com.google.common.primitives.Bytes;
+import ai.vespa.airlift.compress.benchmark.DataSet;
+import org.testng.SkipException;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Guice;
+import org.testng.annotations.Test;
+
+import javax.inject.Inject;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.ThreadLocalRandom;
+
+import static com.google.common.base.Preconditions.checkPositionIndexes;
+import static java.lang.System.arraycopy;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.assertj.core.api.Assertions.catchThrowable;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.fail;
+
+@Guice(modules = TestingModule.class)
+public abstract class AbstractTestCompression
+{
+ private List<DataSet> testCases;
+
+ protected abstract Compressor getCompressor();
+
+ protected abstract Decompressor getDecompressor();
+
+ protected abstract Compressor getVerifyCompressor();
+
+ protected abstract Decompressor getVerifyDecompressor();
+
+ protected boolean isByteBufferSupported()
+ {
+ return true;
+ }
+
+ @Inject
+ public void setup(List<DataSet> dataSets)
+ {
+ testCases = new ArrayList<>();
+
+ testCases.add(new DataSet("nothing", new byte[0]));
+ testCases.add(new DataSet("short literal", "hello world!".getBytes(UTF_8)));
+ testCases.add(new DataSet("small copy", "XXXXabcdabcdABCDABCDwxyzwzyz123".getBytes(UTF_8)));
+ testCases.add(new DataSet("long copy", "XXXXabcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh ABC".getBytes(UTF_8)));
+
+ byte[] data = new byte[256];
+ for (int i = 0; i < data.length; i++) {
+ data[i] = (byte) i;
+ }
+ testCases.add(new DataSet("long literal", data));
+
+ testCases.addAll(dataSets);
+ }
+
+ @Test(dataProvider = "data")
+ public void testDecompress(DataSet dataSet)
+ throws Exception
+ {
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+ byte[] compressed = prepareCompressedData(uncompressedOriginal);
+
+ byte[] uncompressed = new byte[uncompressedOriginal.length];
+
+ Decompressor decompressor = getDecompressor();
+ int uncompressedSize = decompressor.decompress(
+ compressed,
+ 0,
+ compressed.length,
+ uncompressed,
+ 0,
+ uncompressed.length);
+
+ assertByteArraysEqual(uncompressed, 0, uncompressedSize, uncompressedOriginal, 0, uncompressedOriginal.length);
+ }
+
+ // Tests that decompression works correctly when the decompressed data does not span the entire output buffer
+ @Test(dataProvider = "data")
+ public void testDecompressWithOutputPadding(DataSet dataSet)
+ {
+ int padding = 1021;
+
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+ byte[] compressed = prepareCompressedData(uncompressedOriginal);
+
+ byte[] uncompressed = new byte[uncompressedOriginal.length + 2 * padding]; // pre + post padding
+
+ Decompressor decompressor = getDecompressor();
+ int uncompressedSize = decompressor.decompress(
+ compressed,
+ 0,
+ compressed.length,
+ uncompressed,
+ padding,
+ uncompressedOriginal.length + padding);
+
+ assertByteArraysEqual(uncompressed, padding, uncompressedSize, uncompressedOriginal, 0, uncompressedOriginal.length);
+ }
+
+ @Test(dataProvider = "data")
+ public void testDecompressionBufferOverrun(DataSet dataSet)
+ {
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+ byte[] compressed = prepareCompressedData(uncompressedOriginal);
+
+ // add padding with random bytes that we can verify later
+ byte[] padding = new byte[100];
+ ThreadLocalRandom.current().nextBytes(padding);
+
+ byte[] uncompressed = Bytes.concat(new byte[uncompressedOriginal.length], padding);
+
+ Decompressor decompressor = getDecompressor();
+ int uncompressedSize = decompressor.decompress(
+ compressed,
+ 0,
+ compressed.length,
+ uncompressed,
+ 0,
+ uncompressedOriginal.length);
+
+ assertByteArraysEqual(uncompressed, 0, uncompressedSize, uncompressedOriginal, 0, uncompressedOriginal.length);
+
+ // verify padding is intact
+ assertByteArraysEqual(padding, 0, padding.length, uncompressed, uncompressed.length - padding.length, padding.length);
+ }
+
+ @Test
+ public void testDecompressInputBoundsChecks()
+ {
+ byte[] data = new byte[1024];
+ new Random(1234).nextBytes(data);
+ Compressor compressor = getCompressor();
+ byte[] compressed = new byte[compressor.maxCompressedLength(data.length)];
+ int compressedLength = compressor.compress(data, 0, data.length, compressed, 0, compressed.length);
+
+ Decompressor decompressor = getDecompressor();
+ Throwable throwable;
+
+ // null input buffer
+ assertThatThrownBy(() -> decompressor.decompress(null, 0, compressedLength, data, 0, data.length))
+ .isInstanceOf(NullPointerException.class);
+
+ // mis-declared buffer size
+ byte[] compressedChoppedOff = Arrays.copyOf(compressed, compressedLength - 1);
+ throwable = catchThrowable(() -> decompressor.decompress(compressedChoppedOff, 0, compressedLength, data, 0, data.length));
+ if (throwable instanceof UncheckedIOException) {
+ // OK
+ }
+ else {
+ assertThat(throwable)
+ .hasMessageMatching(".*must not be greater than size.*|Invalid offset or length.*");
+ }
+
+ // overrun because of offset
+ byte[] compressedWithPadding = new byte[10 + compressedLength - 1];
+ arraycopy(compressed, 0, compressedWithPadding, 10, compressedLength - 1);
+
+ throwable = catchThrowable(() -> decompressor.decompress(compressedWithPadding, 10, compressedLength, data, 0, data.length));
+ if (throwable instanceof UncheckedIOException) {
+ // OK
+ }
+ else {
+ assertThat(throwable)
+ .hasMessageMatching(".*must not be greater than size.*|Invalid offset or length.*");
+ }
+ }
+
+ @Test
+ public void testDecompressOutputBoundsChecks()
+ {
+ byte[] data = new byte[1024];
+ new Random(1234).nextBytes(data);
+ Compressor compressor = getCompressor();
+ byte[] compressed = new byte[compressor.maxCompressedLength(data.length)];
+ int compressedLength = compressor.compress(data, 0, data.length, compressed, 0, compressed.length);
+ byte[] input = Arrays.copyOf(compressed, compressedLength);
+
+ Decompressor decompressor = getDecompressor();
+ Throwable throwable;
+
+ // null output buffer
+ assertThatThrownBy(() -> decompressor.decompress(input, 0, input.length, null, 0, data.length))
+ .isInstanceOf(NullPointerException.class);
+
+ // small buffer
+ assertThatThrownBy(() -> decompressor.decompress(input, 0, input.length, new byte[1], 0, 1))
+ .hasMessageMatching("All input was not consumed|attempt to write.* outside of destination buffer.*|Malformed input.*|Uncompressed length 1024 must be less than 1|Output buffer too small.*");
+
+ // mis-declared buffer size
+ throwable = catchThrowable(() -> decompressor.decompress(input, 0, input.length, new byte[1], 0, data.length));
+ if (throwable instanceof IndexOutOfBoundsException) {
+ // OK
+ }
+ else {
+ assertThat(throwable)
+ .hasMessageMatching(".*must not be greater than size.*|Invalid offset or length.*");
+ }
+
+ // mis-declared buffer size with greater buffer
+ throwable = catchThrowable(() -> decompressor.decompress(input, 0, input.length, new byte[data.length - 1], 0, data.length));
+ if (throwable instanceof IndexOutOfBoundsException) {
+ // OK
+ }
+ else {
+ assertThat(throwable)
+ .hasMessageMatching(".*must not be greater than size.*|Invalid offset or length.*");
+ }
+ }
+
+ @Test(dataProvider = "data")
+ public void testDecompressByteBufferHeapToHeap(DataSet dataSet)
+ throws Exception
+ {
+ if (!isByteBufferSupported()) {
+ throw new SkipException("ByteBuffer not supported");
+ }
+
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+
+ ByteBuffer compressed = ByteBuffer.wrap(prepareCompressedData(uncompressedOriginal));
+ ByteBuffer uncompressed = ByteBuffer.allocate(uncompressedOriginal.length);
+
+ getDecompressor().decompress(compressed, uncompressed);
+ ((Buffer) uncompressed).flip();
+
+ assertByteBufferEqual(ByteBuffer.wrap(uncompressedOriginal), uncompressed);
+ }
+
+ @Test(dataProvider = "data")
+ public void testDecompressByteBufferHeapToDirect(DataSet dataSet)
+ throws Exception
+ {
+ if (!isByteBufferSupported()) {
+ throw new SkipException("ByteBuffer not supported");
+ }
+
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+
+ ByteBuffer compressed = ByteBuffer.wrap(prepareCompressedData(uncompressedOriginal));
+ ByteBuffer uncompressed = ByteBuffer.allocateDirect(uncompressedOriginal.length);
+
+ getDecompressor().decompress(compressed, uncompressed);
+ ((Buffer) uncompressed).flip();
+
+ assertByteBufferEqual(ByteBuffer.wrap(uncompressedOriginal), uncompressed);
+ }
+
+ @Test(dataProvider = "data")
+ public void testDecompressByteBufferDirectToHeap(DataSet dataSet)
+ throws Exception
+ {
+ if (!isByteBufferSupported()) {
+ throw new SkipException("ByteBuffer not supported");
+ }
+
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+
+ ByteBuffer compressed = toDirectBuffer(prepareCompressedData(uncompressedOriginal));
+ ByteBuffer uncompressed = ByteBuffer.allocate(uncompressedOriginal.length);
+
+ getDecompressor().decompress(compressed, uncompressed);
+ ((Buffer) uncompressed).flip();
+
+ assertByteBufferEqual(ByteBuffer.wrap(uncompressedOriginal), uncompressed);
+ }
+
+ @Test(dataProvider = "data")
+ public void testDecompressByteBufferDirectToDirect(DataSet dataSet)
+ throws Exception
+ {
+ if (!isByteBufferSupported()) {
+ throw new SkipException("ByteBuffer not supported");
+ }
+
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+
+ ByteBuffer compressed = toDirectBuffer(prepareCompressedData(uncompressedOriginal));
+ ByteBuffer uncompressed = ByteBuffer.allocateDirect(uncompressedOriginal.length);
+
+ getDecompressor().decompress(compressed, uncompressed);
+ ((Buffer) uncompressed).flip();
+
+ assertByteBufferEqual(ByteBuffer.wrap(uncompressedOriginal), uncompressed);
+ }
+
+ @Test(dataProvider = "data")
+ public void testCompress(DataSet testCase)
+ throws Exception
+ {
+ Compressor compressor = getCompressor();
+
+ byte[] originalUncompressed = testCase.getUncompressed();
+ byte[] compressed = new byte[compressor.maxCompressedLength(originalUncompressed.length)];
+
+ // attempt to compress slightly different data to ensure the compressor doesn't keep state
+ // between calls that may affect results
+ if (originalUncompressed.length > 1) {
+ byte[] output = new byte[compressor.maxCompressedLength(originalUncompressed.length - 1)];
+ compressor.compress(originalUncompressed, 1, originalUncompressed.length - 1, output, 0, output.length);
+ }
+
+ int compressedLength = compressor.compress(
+ originalUncompressed,
+ 0,
+ originalUncompressed.length,
+ compressed,
+ 0,
+ compressed.length);
+
+ verifyCompressedData(originalUncompressed, compressed, compressedLength);
+ }
+
+ @Test
+ public void testCompressInputBoundsChecks()
+ {
+ Compressor compressor = getCompressor();
+ int declaredInputLength = 1024;
+ int maxCompressedLength = compressor.maxCompressedLength(1024);
+ byte[] output = new byte[maxCompressedLength];
+ Throwable throwable;
+
+ // null input buffer
+ assertThatThrownBy(() -> compressor.compress(null, 0, declaredInputLength, output, 0, output.length))
+ .isInstanceOf(NullPointerException.class);
+
+ // mis-declared buffer size
+ throwable = catchThrowable(() -> compressor.compress(new byte[1], 0, declaredInputLength, output, 0, output.length));
+ if (throwable instanceof IndexOutOfBoundsException) {
+ // OK
+ }
+ else {
+ assertThat(throwable)
+ .hasMessageMatching(".*must not be greater than size.*|Invalid offset or length.*");
+ }
+
+ // max too small
+ throwable = catchThrowable(() -> compressor.compress(new byte[declaredInputLength - 1], 0, declaredInputLength, output, 0, output.length));
+ if (throwable instanceof IndexOutOfBoundsException) {
+ // OK
+ }
+ else {
+ assertThat(throwable)
+ .hasMessageMatching(".*must not be greater than size.*|Invalid offset or length.*");
+ }
+
+ // overrun because of offset
+ throwable = catchThrowable(() -> compressor.compress(new byte[declaredInputLength + 10], 11, declaredInputLength, output, 0, output.length));
+ if (throwable instanceof IndexOutOfBoundsException) {
+ // OK
+ }
+ else {
+ assertThat(throwable)
+ .hasMessageMatching(".*must not be greater than size.*|Invalid offset or length.*");
+ }
+ }
+
+ @Test
+ public void testCompressOutputBoundsChecks()
+ {
+ Compressor compressor = getCompressor();
+ int minCompressionOverhead = compressor.maxCompressedLength(0);
+ byte[] input = new byte[minCompressionOverhead * 4 + 1024];
+ new Random(1234).nextBytes(input);
+ int maxCompressedLength = compressor.maxCompressedLength(input.length);
+ Throwable throwable;
+
+ // null output buffer
+ assertThatThrownBy(() -> compressor.compress(input, 0, input.length, null, 0, maxCompressedLength))
+ .isInstanceOf(NullPointerException.class);
+
+ // small buffer
+ assertThatThrownBy(() -> compressor.compress(input, 0, input.length, new byte[1], 0, 1))
+ .hasMessageMatching(".*must not be greater than size.*|Invalid offset or length.*|Max output length must be larger than .*|Output buffer must be at least.*|Output buffer too small");
+
+ // mis-declared buffer size
+ throwable = catchThrowable(() -> compressor.compress(input, 0, input.length, new byte[1], 0, maxCompressedLength));
+ if (throwable instanceof ArrayIndexOutOfBoundsException) {
+ // OK
+ }
+ else {
+ assertThat(throwable)
+ .hasMessageMatching(".*must not be greater than size.*|Invalid offset or length.*");
+ }
+
+ // mis-declared buffer size with buffer large enough to hold compression frame header (if any)
+ throwable = catchThrowable(() -> compressor.compress(input, 0, input.length, new byte[minCompressionOverhead * 2], 0, maxCompressedLength));
+ if (throwable instanceof ArrayIndexOutOfBoundsException) {
+ // OK
+ }
+ else {
+ assertThat(throwable)
+ .hasMessageMatching(".*must not be greater than size.*|Invalid offset or length.*");
+ }
+ }
+
+ @Test(dataProvider = "data")
+ public void testCompressByteBufferHeapToHeap(DataSet dataSet)
+ throws Exception
+ {
+ if (!isByteBufferSupported()) {
+ throw new SkipException("ByteBuffer not supported");
+ }
+
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+
+ Compressor compressor = getCompressor();
+
+ verifyCompressByteBuffer(
+ compressor,
+ ByteBuffer.wrap(uncompressedOriginal),
+ ByteBuffer.allocate(compressor.maxCompressedLength(uncompressedOriginal.length)));
+ }
+
+ @Test(dataProvider = "data")
+ public void testCompressByteBufferHeapToDirect(DataSet dataSet)
+ throws Exception
+ {
+ if (!isByteBufferSupported()) {
+ throw new SkipException("ByteBuffer not supported");
+ }
+
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+
+ Compressor compressor = getCompressor();
+
+ verifyCompressByteBuffer(
+ compressor,
+ ByteBuffer.wrap(uncompressedOriginal),
+ ByteBuffer.allocateDirect(compressor.maxCompressedLength(uncompressedOriginal.length)));
+ }
+
+ @Test(dataProvider = "data")
+ public void testCompressByteBufferDirectToHeap(DataSet dataSet)
+ throws Exception
+ {
+ if (!isByteBufferSupported()) {
+ throw new SkipException("ByteBuffer not supported");
+ }
+
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+
+ Compressor compressor = getCompressor();
+
+ verifyCompressByteBuffer(
+ compressor,
+ toDirectBuffer(uncompressedOriginal),
+ ByteBuffer.allocate(compressor.maxCompressedLength(uncompressedOriginal.length)));
+ }
+
+ @Test(dataProvider = "data")
+ public void testCompressByteBufferDirectToDirect(DataSet dataSet)
+ throws Exception
+ {
+ if (!isByteBufferSupported()) {
+ throw new SkipException("ByteBuffer not supported");
+ }
+
+ byte[] uncompressedOriginal = dataSet.getUncompressed();
+
+ Compressor compressor = getCompressor();
+
+ verifyCompressByteBuffer(
+ compressor,
+ toDirectBuffer(uncompressedOriginal),
+ ByteBuffer.allocateDirect(compressor.maxCompressedLength(uncompressedOriginal.length)));
+ }
+
+ private void verifyCompressByteBuffer(Compressor compressor, ByteBuffer expected, ByteBuffer compressed)
+ {
+ // attempt to compress slightly different data to ensure the compressor doesn't keep state
+ // between calls that may affect results
+ if (expected.remaining() > 1) {
+ ByteBuffer duplicate = expected.duplicate();
+ duplicate.get(); // skip one byte
+ compressor.compress(duplicate, ByteBuffer.allocate(((Buffer) compressed).remaining()));
+ }
+
+ compressor.compress(expected.duplicate(), compressed);
+ ((Buffer) compressed).flip();
+
+ ByteBuffer uncompressed = ByteBuffer.allocate(((Buffer) expected).remaining());
+
+ // TODO: validate with "control" decompressor
+ getDecompressor().decompress(compressed, uncompressed);
+ ((Buffer) uncompressed).flip();
+
+ assertByteBufferEqual(expected.duplicate(), uncompressed);
+ }
+
+ private void verifyCompressedData(byte[] originalUncompressed, byte[] compressed, int compressedLength)
+ {
+ byte[] uncompressed = new byte[originalUncompressed.length];
+ int uncompressedSize = getVerifyDecompressor().decompress(compressed, 0, compressedLength, uncompressed, 0, uncompressed.length);
+
+ assertByteArraysEqual(uncompressed, 0, uncompressedSize, originalUncompressed, 0, originalUncompressed.length);
+ }
+
+ @Test
+ public void testRoundTripSmallLiteral()
+ throws Exception
+ {
+ byte[] data = new byte[256];
+ for (int i = 0; i < data.length; i++) {
+ data[i] = (byte) i;
+ }
+
+ Compressor compressor = getCompressor();
+ byte[] compressed = new byte[compressor.maxCompressedLength(data.length)];
+ byte[] uncompressed = new byte[data.length];
+
+ for (int i = 1; i < data.length; i++) {
+ try {
+ int written = compressor.compress(
+ data,
+ 0,
+ i,
+ compressed,
+ 0,
+ compressed.length);
+
+ int decompressedSize = getDecompressor().decompress(compressed, 0, written, uncompressed, 0, uncompressed.length);
+
+ assertByteArraysEqual(data, 0, i, uncompressed, 0, decompressedSize);
+ assertEquals(decompressedSize, i);
+ }
+ catch (MalformedInputException e) {
+ throw new RuntimeException("Failed with " + i + " bytes of input", e);
+ }
+ }
+ }
+
+ @DataProvider(name = "data")
+ public Object[][] getTestCases()
+ throws IOException
+ {
+ Object[][] result = new Object[testCases.size()][];
+
+ for (int i = 0; i < testCases.size(); i++) {
+ result[i] = new Object[] {testCases.get(i)};
+ }
+
+ return result;
+ }
+
+ public static void assertByteArraysEqual(byte[] left, int leftOffset, int leftLength, byte[] right, int rightOffset, int rightLength)
+ {
+ checkPositionIndexes(leftOffset, leftOffset + leftLength, left.length);
+ checkPositionIndexes(rightOffset, rightOffset + rightLength, right.length);
+
+ for (int i = 0; i < Math.min(leftLength, rightLength); i++) {
+ if (left[leftOffset + i] != right[rightOffset + i]) {
+ fail(String.format("Byte arrays differ at position %s: 0x%02X vs 0x%02X", i, left[leftOffset + i], right[rightOffset + i]));
+ }
+ }
+
+ assertEquals(leftLength, rightLength, String.format("Array lengths differ: %s vs %s", leftLength, rightLength));
+ }
+
+ private static void assertByteBufferEqual(ByteBuffer left, ByteBuffer right)
+ {
+ Buffer leftBuffer = left;
+ Buffer rightBuffer = right;
+
+ int leftPosition = leftBuffer.position();
+ int rightPosition = rightBuffer.position();
+ for (int i = 0; i < Math.min(leftBuffer.remaining(), rightBuffer.remaining()); i++) {
+ if (left.get(leftPosition + i) != right.get(rightPosition + i)) {
+ fail(String.format("Byte buffers differ at position %s: 0x%02X vs 0x%02X", i, left.get(leftPosition + i), right.get(rightPosition + i)));
+ }
+ }
+
+ assertEquals(leftBuffer.remaining(), rightBuffer.remaining(), String.format("Buffer lengths differ: %s vs %s", leftBuffer.remaining(), leftBuffer.remaining()));
+ }
+
+ private static ByteBuffer toDirectBuffer(byte[] data)
+ {
+ ByteBuffer direct = ByteBuffer.allocateDirect(data.length);
+ direct.put(data);
+
+ ((Buffer) direct).flip();
+
+ return direct;
+ }
+
+ private byte[] prepareCompressedData(byte[] uncompressed)
+ {
+ Compressor compressor = getVerifyCompressor();
+
+ byte[] compressed = new byte[compressor.maxCompressedLength(uncompressed.length)];
+
+ int compressedLength = compressor.compress(
+ uncompressed,
+ 0,
+ uncompressed.length,
+ compressed,
+ 0,
+ compressed.length);
+
+ return Arrays.copyOf(compressed, compressedLength);
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/compress/ByteArrayOutputStream.java b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/ByteArrayOutputStream.java
new file mode 100644
index 00000000000..feb34b03869
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/ByteArrayOutputStream.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.compress;
+
+import java.io.OutputStream;
+
+import static com.google.common.base.Preconditions.checkPositionIndex;
+
+public final class ByteArrayOutputStream
+ extends OutputStream
+{
+ private final byte[] buffer;
+ private final int initialOffset;
+ private final int bufferLimit;
+ private int offset;
+
+ public ByteArrayOutputStream(byte[] buffer)
+ {
+ this(buffer, 0, buffer.length);
+ }
+
+ public ByteArrayOutputStream(byte[] buffer, int offset, int length)
+ {
+ this.buffer = buffer;
+ this.initialOffset = offset;
+ this.bufferLimit = offset + length;
+ this.offset = offset;
+ }
+
+ @Override
+ public void write(int value)
+ {
+ checkPositionIndex(offset + 1, bufferLimit);
+ buffer[offset++] = (byte) value;
+ }
+
+ @Override
+ public void write(byte[] buffer, int offset, int length)
+ {
+ checkPositionIndex(this.offset + length, bufferLimit);
+ System.arraycopy(buffer, offset, this.buffer, this.offset, length);
+ this.offset += length;
+ }
+
+ public int size()
+ {
+ return offset - initialOffset;
+ }
+
+ public byte[] getBuffer()
+ {
+ return buffer;
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/compress/HadoopCodecCompressor.java b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/HadoopCodecCompressor.java
new file mode 100644
index 00000000000..3511c9972d8
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/HadoopCodecCompressor.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.compress;
+
+import org.apache.hadoop.io.compress.CompressionCodec;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.UncheckedIOException;
+import java.nio.ByteBuffer;
+
+public class HadoopCodecCompressor
+ implements Compressor
+{
+ private final CompressionCodec codec;
+ private final Compressor blockCompressorForSizeCalculation;
+
+ public HadoopCodecCompressor(CompressionCodec codec, Compressor blockCompressorForSizeCalculation)
+ {
+ this.codec = codec;
+ this.blockCompressorForSizeCalculation = blockCompressorForSizeCalculation;
+ }
+
+ @Override
+ public int maxCompressedLength(int uncompressedSize)
+ {
+ // assume hadoop stream encoder won't increase size by more than 10% over the block encoder
+ return (int) ((blockCompressorForSizeCalculation.maxCompressedLength(uncompressedSize) * 1.1) + 8);
+ }
+
+ @Override
+ public int compress(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, int maxOutputLength)
+ {
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(output, outputOffset, maxOutputLength);
+
+ try {
+ OutputStream out = codec.createOutputStream(byteArrayOutputStream);
+ // write in a single shot to cause multiple chunks per block
+ out.write(input, inputOffset, inputLength);
+ out.close();
+ }
+ catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+
+ return byteArrayOutputStream.size();
+ }
+
+ @Override
+ public void compress(ByteBuffer input, ByteBuffer output)
+ {
+ throw new UnsupportedOperationException("not yet implemented");
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/compress/HadoopCodecDecompressor.java b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/HadoopCodecDecompressor.java
new file mode 100644
index 00000000000..5425fd8d4ff
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/HadoopCodecDecompressor.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.compress;
+
+import org.apache.hadoop.io.compress.CompressionCodec;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UncheckedIOException;
+import java.nio.ByteBuffer;
+
+public class HadoopCodecDecompressor
+ implements Decompressor
+{
+ private final CompressionCodec codec;
+
+ public HadoopCodecDecompressor(CompressionCodec codec)
+ {
+ this.codec = codec;
+ }
+
+ @Override
+ public int decompress(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, int maxOutputLength)
+ throws MalformedInputException
+ {
+ try (InputStream in = codec.createInputStream(new ByteArrayInputStream(input, inputOffset, inputLength))) {
+ int bytesRead = 0;
+ while (bytesRead < maxOutputLength) {
+ int size = in.read(output, outputOffset + bytesRead, maxOutputLength - bytesRead);
+ if (size < 0) {
+ break;
+ }
+ bytesRead += size;
+ }
+
+ if (in.read() >= 0) {
+ throw new RuntimeException("All input was not consumed");
+ }
+
+ return bytesRead;
+ }
+ catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ @Override
+ public void decompress(ByteBuffer input, ByteBuffer output)
+ throws MalformedInputException
+ {
+ throw new UnsupportedOperationException("not yet implemented");
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/compress/TestingModule.java b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/TestingModule.java
new file mode 100644
index 00000000000..d66c2672c3f
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/TestingModule.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.compress;
+
+import com.google.inject.Binder;
+import com.google.inject.Module;
+import com.google.inject.Provides;
+import ai.vespa.airlift.compress.benchmark.DataSet;
+import org.openjdk.jmh.annotations.Param;
+
+import javax.inject.Singleton;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class TestingModule
+ implements Module
+{
+ @Override
+ public void configure(Binder binder)
+ {
+ }
+
+ @Provides
+ @Singleton
+ public List<DataSet> dataSets()
+ throws NoSuchFieldException, IOException
+ {
+ String[] testNames = DataSet.class
+ .getDeclaredField("name")
+ .getAnnotation(Param.class)
+ .value();
+
+ List<DataSet> result = new ArrayList<>();
+ for (String testName : testNames) {
+ DataSet entry = new DataSet(testName);
+ entry.loadFile();
+ result.add(entry);
+ }
+
+ return result;
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/compress/Util.java b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/Util.java
new file mode 100644
index 00000000000..b4bc80557ee
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/Util.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.compress;
+
+import static java.lang.String.format;
+
+public final class Util
+{
+ private Util()
+ {
+ }
+
+ public static String toHumanReadableSpeed(long bytesPerSecond)
+ {
+ String humanReadableSpeed;
+ if (bytesPerSecond < 1024 * 10L) {
+ humanReadableSpeed = format("%dB/s", bytesPerSecond);
+ }
+ else if (bytesPerSecond < 1024 * 1024 * 10L) {
+ humanReadableSpeed = format("%.1fkB/s", bytesPerSecond / 1024.0f);
+ }
+ else if (bytesPerSecond < 1024 * 1024 * 1024 * 10L) {
+ humanReadableSpeed = format("%.1fMB/s", bytesPerSecond / (1024.0f * 1024.0f));
+ }
+ else {
+ humanReadableSpeed = format("%.1fGB/s", bytesPerSecond / (1024.0f * 1024.0f * 1024.0f));
+ }
+ return humanReadableSpeed;
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/compress/benchmark/DataSet.java b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/benchmark/DataSet.java
new file mode 100644
index 00000000000..5db909eaef8
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/benchmark/DataSet.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.compress.benchmark;
+
+import com.google.common.io.Files;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+
+import java.io.File;
+import java.io.IOException;
+
+@State(Scope.Thread)
+public class DataSet
+{
+ @Param({
+ "canterbury/alice29.txt",
+ "canterbury/asyoulik.txt",
+ "canterbury/cp.html",
+ "canterbury/fields.c",
+ "canterbury/grammar.lsp",
+ "canterbury/kennedy.xls",
+ "canterbury/lcet10.txt",
+ "canterbury/plrabn12.txt",
+ "canterbury/ptt5",
+ "canterbury/sum",
+ "canterbury/xargs.1",
+
+ "silesia/dickens",
+ "silesia/mozilla",
+ "silesia/mr",
+ "silesia/nci",
+ "silesia/ooffice",
+ "silesia/osdb",
+ "silesia/reymont",
+ "silesia/samba",
+ "silesia/sao",
+ "silesia/webster",
+ "silesia/x-ray",
+ "silesia/xml",
+
+ "calgary/bib",
+ "calgary/book1",
+ "calgary/book2",
+ "calgary/geo",
+ "calgary/news",
+ "calgary/obj1",
+ "calgary/obj2",
+ "calgary/paper1",
+ "calgary/paper2",
+ "calgary/paper3",
+ "calgary/paper4",
+ "calgary/paper5",
+ "calgary/paper6",
+ "calgary/pic",
+ "calgary/progc",
+ "calgary/progl",
+ "calgary/progp",
+ "calgary/trans",
+
+ "artificial/a.txt",
+ "artificial/aaa.txt",
+ "artificial/alphabet.txt",
+ "artificial/random.txt",
+ "artificial/uniform_ascii.bin", // random ASCII with uniform probabilities per symbol
+
+ "large/bible.txt",
+ "large/E.coli",
+ "large/world192.txt",
+
+ "geo.protodata",
+ "house.jpg",
+ "html",
+ "kppkn.gtb",
+ "mapreduce-osdi-1.pdf",
+ "urls.10K",
+ })
+ private String name;
+ private byte[] uncompressed;
+
+ public DataSet()
+ {
+ }
+
+ public DataSet(String name)
+ {
+ this.name = name;
+ }
+
+ public DataSet(String name, byte[] uncompressed)
+ {
+ this.name = name;
+ this.uncompressed = uncompressed;
+ }
+
+ @Setup
+ public void loadFile()
+ throws IOException
+ {
+ uncompressed = Files.toByteArray(new File("testdata", name));
+ }
+
+ public byte[] getUncompressed()
+ {
+ return uncompressed;
+ }
+
+ public String getName()
+ {
+ return name;
+ }
+
+ public String toString()
+ {
+ return name;
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/compress/thirdparty/ZstdJniCompressor.java b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/thirdparty/ZstdJniCompressor.java
new file mode 100644
index 00000000000..5874167150a
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/thirdparty/ZstdJniCompressor.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.compress.thirdparty;
+
+import com.github.luben.zstd.Zstd;
+import ai.vespa.airlift.compress.Compressor;
+
+import java.nio.ByteBuffer;
+
+public class ZstdJniCompressor
+ implements Compressor
+{
+ private final int level;
+
+ public ZstdJniCompressor(int level)
+ {
+ this.level = level;
+ }
+
+ @Override
+ public int maxCompressedLength(int uncompressedSize)
+ {
+ return (int) Zstd.compressBound(uncompressedSize);
+ }
+
+ @Override
+ public int compress(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, int maxOutputLength)
+ {
+ return (int) Zstd.compressByteArray(output, outputOffset, maxOutputLength, input, inputOffset, inputLength, level);
+ }
+
+ @Override
+ public void compress(ByteBuffer input, ByteBuffer output)
+ {
+ Zstd.compress(input, output, level);
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/compress/thirdparty/ZstdJniDecompressor.java b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/thirdparty/ZstdJniDecompressor.java
new file mode 100644
index 00000000000..63e9de99f86
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/compress/thirdparty/ZstdJniDecompressor.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.compress.thirdparty;
+
+import com.github.luben.zstd.Zstd;
+import ai.vespa.airlift.compress.Decompressor;
+import ai.vespa.airlift.compress.MalformedInputException;
+
+import java.nio.ByteBuffer;
+
+public class ZstdJniDecompressor
+ implements Decompressor
+{
+ @Override
+ public int decompress(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, int maxOutputLength)
+ throws MalformedInputException
+ {
+ return (int) Zstd.decompressByteArray(output, outputOffset, maxOutputLength, input, inputOffset, inputLength);
+ }
+
+ @Override
+ public void decompress(ByteBuffer input, ByteBuffer output)
+ throws MalformedInputException
+ {
+ Zstd.decompress(output, input);
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestCompressor.java b/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestCompressor.java
new file mode 100644
index 00000000000..884ab8f2577
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestCompressor.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.zstd;
+
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static sun.misc.Unsafe.ARRAY_BYTE_BASE_OFFSET;
+
+public class TestCompressor
+{
+ @Test
+ public void testMagic()
+ {
+ byte[] buffer = new byte[4];
+ int address = ARRAY_BYTE_BASE_OFFSET;
+
+ ZstdFrameCompressor.writeMagic(buffer, address, address + buffer.length);
+ ZstdFrameDecompressor.verifyMagic(buffer, address, address + buffer.length);
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = ".*buffer too small.*")
+ public void testMagicFailsWithSmallBuffer()
+ {
+ byte[] buffer = new byte[3];
+ ZstdFrameCompressor.writeMagic(buffer, ARRAY_BYTE_BASE_OFFSET, ARRAY_BYTE_BASE_OFFSET + buffer.length);
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = ".*buffer too small.*")
+ public void testFrameHeaderFailsWithSmallBuffer()
+ {
+ byte[] buffer = new byte[ZstdFrameCompressor.MAX_FRAME_HEADER_SIZE - 1];
+ ZstdFrameCompressor.writeFrameHeader(buffer, ARRAY_BYTE_BASE_OFFSET, ARRAY_BYTE_BASE_OFFSET + buffer.length, 1000, 1024);
+ }
+
+ @Test
+ public void testFrameHeader()
+ {
+ verifyFrameHeader(1, 1024, new FrameHeader(2, -1, 1, -1, true));
+ verifyFrameHeader(256, 1024, new FrameHeader(3, -1, 256, -1, true));
+
+ verifyFrameHeader(65536 + 256, 1024 + 128, new FrameHeader(6, 1152, 65536 + 256, -1, true));
+ verifyFrameHeader(65536 + 256, 1024 + 128 * 2, new FrameHeader(6, 1024 + 128 * 2, 65536 + 256, -1, true));
+ verifyFrameHeader(65536 + 256, 1024 + 128 * 3, new FrameHeader(6, 1024 + 128 * 3, 65536 + 256, -1, true));
+ verifyFrameHeader(65536 + 256, 1024 + 128 * 4, new FrameHeader(6, 1024 + 128 * 4, 65536 + 256, -1, true));
+ verifyFrameHeader(65536 + 256, 1024 + 128 * 5, new FrameHeader(6, 1024 + 128 * 5, 65536 + 256, -1, true));
+ verifyFrameHeader(65536 + 256, 1024 + 128 * 6, new FrameHeader(6, 1024 + 128 * 6, 65536 + 256, -1, true));
+ verifyFrameHeader(65536 + 256, 1024 + 128 * 7, new FrameHeader(6, 1024 + 128 * 7, 65536 + 256, -1, true));
+ verifyFrameHeader(65536 + 256, 1024 + 128 * 8, new FrameHeader(6, 1024 + 128 * 8, 65536 + 256, -1, true));
+
+ verifyFrameHeader(65536 + 256, 2048, new FrameHeader(6, 2048, 65536 + 256, -1, true));
+
+ verifyFrameHeader(Integer.MAX_VALUE, 1024, new FrameHeader(6, 1024, Integer.MAX_VALUE, -1, true));
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Minimum window size is 1024")
+ public void testMinimumWindowSize()
+ {
+ byte[] buffer = new byte[ZstdFrameCompressor.MAX_FRAME_HEADER_SIZE];
+ int address = ARRAY_BYTE_BASE_OFFSET;
+
+ ZstdFrameCompressor.writeFrameHeader(buffer, address, address + buffer.length, 2000, 1023);
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "\\QWindow size of magnitude 2^10 must be multiple of 128\\E")
+ public void testWindowSizePrecision()
+ {
+ byte[] buffer = new byte[ZstdFrameCompressor.MAX_FRAME_HEADER_SIZE];
+ int address = ARRAY_BYTE_BASE_OFFSET;
+
+ ZstdFrameCompressor.writeFrameHeader(buffer, address, address + buffer.length, 2000, 1025);
+ }
+
+ private void verifyFrameHeader(int inputSize, int windowSize, FrameHeader expected)
+ {
+ byte[] buffer = new byte[ZstdFrameCompressor.MAX_FRAME_HEADER_SIZE];
+ int address = ARRAY_BYTE_BASE_OFFSET;
+
+ int size = ZstdFrameCompressor.writeFrameHeader(buffer, address, address + buffer.length, inputSize, windowSize);
+
+ assertEquals(size, expected.headerSize);
+
+ FrameHeader actual = ZstdFrameDecompressor.readFrameHeader(buffer, address, address + buffer.length);
+ assertEquals(actual, expected);
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestXxHash64.java b/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestXxHash64.java
new file mode 100644
index 00000000000..b78888ca66e
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestXxHash64.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.zstd;
+
+import net.jpountz.xxhash.XXHash64;
+import net.jpountz.xxhash.XXHashFactory;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static sun.misc.Unsafe.ARRAY_BYTE_BASE_OFFSET;
+
+// forked from https://github.com/airlift/slice
+public class TestXxHash64
+{
+ private static final long PRIME = 2654435761L;
+
+ private final byte[] buffer = new byte[101];
+
+ public TestXxHash64()
+ {
+ long value = PRIME;
+ for (int i = 0; i < buffer.length; i++) {
+ buffer[i] = (byte) (value >> 24);
+ value *= value;
+ }
+ }
+
+ @Test
+ public void testSanity()
+ {
+ assertHash(0, buffer, 0, 0xEF46DB3751D8E999L);
+
+ assertHash(0, buffer, 1, 0x4FCE394CC88952D8L);
+ assertHash(PRIME, buffer, 1, 0x739840CB819FA723L);
+
+ assertHash(0, buffer, 4, 0x9256E58AA397AEF1L);
+ assertHash(PRIME, buffer, 4, 0x9D5FFDFB928AB4BL);
+
+ assertHash(0, buffer, 8, 0xF74CB1451B32B8CFL);
+ assertHash(PRIME, buffer, 8, 0x9C44B77FBCC302C5L);
+
+ assertHash(0, buffer, 14, 0xCFFA8DB881BC3A3DL);
+ assertHash(PRIME, buffer, 14, 0x5B9611585EFCC9CBL);
+
+ assertHash(0, buffer, 32, 0xAF5753D39159EDEEL);
+ assertHash(PRIME, buffer, 32, 0xDCAB9233B8CA7B0FL);
+
+ assertHash(0, buffer, buffer.length, 0x0EAB543384F878ADL);
+ assertHash(PRIME, buffer, buffer.length, 0xCAA65939306F1E21L);
+ }
+
+ @Test
+ public void testMultipleLengths()
+ {
+ XXHash64 jpountz = XXHashFactory.fastestInstance().hash64();
+ for (int i = 0; i < 20_000; i++) {
+ byte[] data = new byte[i];
+ long expected = jpountz.hash(data, 0, data.length, 0);
+ assertHash(0, data, data.length, expected);
+ }
+ }
+
+ private static void assertHash(long seed, byte[] data, int length, long expected)
+ {
+ assertEquals(hash(seed, data, length), expected);
+ }
+
+ private static long hash(long seed, byte[] data, int length)
+ {
+ return XxHash64.hash(seed, data, ARRAY_BYTE_BASE_OFFSET, length);
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestZstd.java b/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestZstd.java
new file mode 100644
index 00000000000..f947122ce6a
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestZstd.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.zstd;
+
+import com.google.common.io.Resources;
+import ai.vespa.airlift.compress.AbstractTestCompression;
+import ai.vespa.airlift.compress.Compressor;
+import ai.vespa.airlift.compress.Decompressor;
+import ai.vespa.airlift.compress.MalformedInputException;
+import ai.vespa.airlift.compress.benchmark.DataSet;
+import ai.vespa.airlift.compress.thirdparty.ZstdJniCompressor;
+import ai.vespa.airlift.compress.thirdparty.ZstdJniDecompressor;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.testng.Assert.assertEquals;
+
+public class TestZstd
+ extends AbstractTestCompression
+{
+ @Override
+ protected Compressor getCompressor()
+ {
+ return new ZstdCompressor();
+ }
+
+ @Override
+ protected Decompressor getDecompressor()
+ {
+ return new ZstdDecompressor();
+ }
+
+ @Override
+ protected Compressor getVerifyCompressor()
+ {
+ return new ZstdJniCompressor(3);
+ }
+
+ @Override
+ protected Decompressor getVerifyDecompressor()
+ {
+ return new ZstdJniDecompressor();
+ }
+
+ // Ideally, this should be covered by super.testDecompressWithOutputPadding(...), but the data written by the native
+ // compressor doesn't include checksums, so it's not a comprehensive test. The dataset for this test has a checksum.
+ @Test
+ public void testDecompressWithOutputPaddingAndChecksum()
+ throws IOException
+ {
+ int padding = 1021;
+
+ byte[] compressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/with-checksum.zst"));
+ byte[] uncompressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/with-checksum"));
+
+ byte[] output = new byte[uncompressed.length + padding * 2]; // pre + post padding
+ int decompressedSize = getDecompressor().decompress(compressed, 0, compressed.length, output, padding, output.length - padding);
+
+ assertByteArraysEqual(uncompressed, 0, uncompressed.length, output, padding, decompressedSize);
+ }
+
+ @Test
+ public void testConcatenatedFrames()
+ throws IOException
+ {
+ byte[] compressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/multiple-frames.zst"));
+ byte[] uncompressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/multiple-frames"));
+
+ byte[] output = new byte[uncompressed.length];
+ getDecompressor().decompress(compressed, 0, compressed.length, output, 0, output.length);
+
+ assertByteArraysEqual(uncompressed, 0, uncompressed.length, output, 0, output.length);
+ }
+
+ @Test
+ public void testInvalidSequenceOffset()
+ throws IOException
+ {
+ byte[] compressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/offset-before-start.zst"));
+ byte[] output = new byte[compressed.length * 10];
+
+ assertThatThrownBy(() -> getDecompressor().decompress(compressed, 0, compressed.length, output, 0, output.length))
+ .isInstanceOf(MalformedInputException.class)
+ .hasMessageStartingWith("Input is corrupted: offset=894");
+ }
+
+ @Test
+ public void testSmallLiteralsAfterIncompressibleLiterals()
+ throws IOException
+ {
+ // Ensure the compressor doesn't try to reuse a huffman table that was created speculatively for a previous block
+ // which ended up emitting raw literals due to insufficient gain
+ Compressor compressor = getCompressor();
+
+ byte[] original = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/small-literals-after-incompressible-literals"));
+ int maxCompressLength = compressor.maxCompressedLength(original.length);
+
+ byte[] compressed = new byte[maxCompressLength];
+ int compressedSize = compressor.compress(original, 0, original.length, compressed, 0, compressed.length);
+
+ byte[] decompressed = new byte[original.length];
+ int decompressedSize = getDecompressor().decompress(compressed, 0, compressedSize, decompressed, 0, decompressed.length);
+
+ assertByteArraysEqual(original, 0, original.length, decompressed, 0, decompressedSize);
+ }
+
+ @Test
+ public void testLargeRle()
+ throws IOException
+ {
+ // Dataset that produces an RLE block with 3-byte header
+
+ Compressor compressor = getCompressor();
+
+ byte[] original = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/large-rle"));
+ int maxCompressLength = compressor.maxCompressedLength(original.length);
+
+ byte[] compressed = new byte[maxCompressLength];
+ int compressedSize = compressor.compress(original, 0, original.length, compressed, 0, compressed.length);
+
+ byte[] decompressed = new byte[original.length];
+ int decompressedSize = getDecompressor().decompress(compressed, 0, compressedSize, decompressed, 0, decompressed.length);
+
+ assertByteArraysEqual(original, 0, original.length, decompressed, 0, decompressedSize);
+ }
+
+ @Test
+ public void testIncompressibleData()
+ throws IOException
+ {
+ // Incompressible data that would require more than maxCompressedLength(...) to store
+
+ Compressor compressor = getCompressor();
+
+ byte[] original = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/incompressible"));
+ int maxCompressLength = compressor.maxCompressedLength(original.length);
+
+ byte[] compressed = new byte[maxCompressLength];
+ int compressedSize = compressor.compress(original, 0, original.length, compressed, 0, compressed.length);
+
+ byte[] decompressed = new byte[original.length];
+ int decompressedSize = getDecompressor().decompress(compressed, 0, compressedSize, decompressed, 0, decompressed.length);
+
+ assertByteArraysEqual(original, 0, original.length, decompressed, 0, decompressedSize);
+ }
+
+ @Test
+ public void testMaxCompressedSize()
+ {
+ assertEquals(new ZstdCompressor().maxCompressedLength(0), 64);
+ assertEquals(new ZstdCompressor().maxCompressedLength(64 * 1024), 65_824);
+ assertEquals(new ZstdCompressor().maxCompressedLength(128 * 1024), 131_584);
+ assertEquals(new ZstdCompressor().maxCompressedLength(128 * 1024 + 1), 131_585);
+ }
+
+ // test over data sets, should the result depend on input size or its compressibility
+ @Test(dataProvider = "data")
+ public void testGetDecompressedSize(DataSet dataSet)
+ {
+ Compressor compressor = getCompressor();
+ byte[] originalUncompressed = dataSet.getUncompressed();
+ byte[] compressed = new byte[compressor.maxCompressedLength(originalUncompressed.length)];
+
+ int compressedLength = compressor.compress(originalUncompressed, 0, originalUncompressed.length, compressed, 0, compressed.length);
+
+ assertEquals(ZstdDecompressor.getDecompressedSize(compressed, 0, compressedLength), originalUncompressed.length);
+
+ int padding = 10;
+ byte[] compressedWithPadding = new byte[compressedLength + padding];
+ Arrays.fill(compressedWithPadding, (byte) 42);
+ System.arraycopy(compressed, 0, compressedWithPadding, padding, compressedLength);
+ assertEquals(ZstdDecompressor.getDecompressedSize(compressedWithPadding, padding, compressedLength), originalUncompressed.length);
+ }
+
+ @Test
+ public void testVerifyMagicInAllFrames()
+ throws IOException
+ {
+ Compressor compressor = getCompressor();
+ byte[] compressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/bad-second-frame.zst"));
+ byte[] uncompressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/multiple-frames"));
+ byte[] output = new byte[uncompressed.length];
+ assertThatThrownBy(() -> getDecompressor().decompress(compressed, 0, compressed.length, output, 0, output.length))
+ .isInstanceOf(MalformedInputException.class)
+ .hasMessageStartingWith("Invalid magic prefix");
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestZstdInputStream.java b/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestZstdInputStream.java
new file mode 100644
index 00000000000..b983389f2ef
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/TestZstdInputStream.java
@@ -0,0 +1,242 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.zstd;
+
+import com.google.common.io.Resources;
+import ai.vespa.airlift.compress.AbstractTestCompression;
+import ai.vespa.airlift.compress.Compressor;
+import ai.vespa.airlift.compress.Decompressor;
+import ai.vespa.airlift.compress.MalformedInputException;
+import ai.vespa.airlift.compress.thirdparty.ZstdJniCompressor;
+import ai.vespa.airlift.compress.thirdparty.ZstdJniDecompressor;
+import org.testng.annotations.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+public class TestZstdInputStream
+ extends AbstractTestCompression
+{
+ static class ByteBufferBackedInputStream
+ extends InputStream
+ {
+ ByteBuffer buf;
+
+ public ByteBufferBackedInputStream(ByteBuffer buf)
+ {
+ this.buf = buf;
+ }
+
+ public int read()
+ {
+ if (!buf.hasRemaining()) {
+ return -1;
+ }
+ return buf.get() & 0xFF;
+ }
+
+ public int read(byte[] bytes, int off, int len)
+ {
+ if (!buf.hasRemaining()) {
+ return -1;
+ }
+ len = Math.min(len, buf.remaining());
+ if (buf.position() < 1) {
+ len = 1;
+ }
+ buf.get(bytes, off, len);
+ return len;
+ }
+ }
+
+ static class WrapDecompressor
+ implements Decompressor
+ {
+ public int decompress(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, int maxOutputLength)
+ throws MalformedInputException
+ {
+ verifyRange(input, inputOffset, inputLength);
+ verifyRange(output, outputOffset, maxOutputLength);
+ try {
+ int res = 0;
+ ByteArrayInputStream ba = new ByteArrayInputStream(input, inputOffset, inputLength);
+ InputStream zin = new ZstdInputStream(ba);
+ while (res < maxOutputLength) {
+ int len = zin.read(output, outputOffset, maxOutputLength - res);
+ if (len == -1) {
+ return res;
+ }
+ res += len;
+ outputOffset += len;
+ }
+ if (zin.read() != -1) {
+ throw new RuntimeException("All input was not consumed");
+ }
+ return res;
+ }
+ catch (IOException e) {
+ throw new RuntimeException("bad io", e);
+ }
+ }
+
+ public void decompress(ByteBuffer input, ByteBuffer output)
+ throws MalformedInputException
+ {
+ try {
+ byte[] tmp = new byte[output.remaining()];
+ ByteBufferBackedInputStream bb = new ByteBufferBackedInputStream(input);
+ InputStream zin = new ZstdInputStream(bb);
+ while (output.position() < output.limit()) {
+ int len = zin.read(tmp);
+ if (len == -1) {
+ return;
+ }
+ output.put(tmp, 0, len);
+ }
+ }
+ catch (IOException ignored) {
+ }
+ }
+
+ private static void verifyRange(byte[] data, int offset, int length)
+ {
+ if (offset < 0 || length < 0 || offset + length > data.length) {
+ throw new IllegalArgumentException("Invalid offset or length");
+ }
+ }
+ }
+
+ @Override
+ protected Compressor getCompressor()
+ {
+ return new ZstdCompressor();
+ }
+
+ @Override
+ protected Decompressor getDecompressor()
+ {
+ return new WrapDecompressor();
+ }
+
+ @Override
+ protected Compressor getVerifyCompressor()
+ {
+ return new ZstdJniCompressor(3);
+ }
+
+ @Override
+ protected Decompressor getVerifyDecompressor()
+ {
+ return new ZstdJniDecompressor();
+ }
+
+ // Ideally, this should be covered by super.testDecompressWithOutputPadding(...), but the data written by the native
+ // compressor doesn't include checksums, so it's not a comprehensive test. The dataset for this test has a checksum.
+ @Test
+ public void testDecompressWithOutputPaddingAndChecksum()
+ throws IOException
+ {
+ int padding = 1021;
+
+ byte[] compressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/with-checksum.zst"));
+ byte[] uncompressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/with-checksum"));
+
+ byte[] output = new byte[uncompressed.length + padding * 2]; // pre + post padding
+ int decompressedSize = getDecompressor().decompress(compressed, 0, compressed.length, output, padding, output.length - padding);
+
+ assertByteArraysEqual(uncompressed, 0, uncompressed.length, output, padding, decompressedSize);
+ }
+
+ @Test
+ public void testConcatenatedFrames()
+ throws IOException
+ {
+ byte[] compressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/multiple-frames.zst"));
+ byte[] uncompressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/multiple-frames"));
+
+ byte[] output = new byte[uncompressed.length];
+ getDecompressor().decompress(compressed, 0, compressed.length, output, 0, output.length);
+
+ assertByteArraysEqual(uncompressed, 0, uncompressed.length, output, 0, output.length);
+ }
+
+ @Test
+ public void testInvalidSequenceOffset()
+ throws IOException
+ {
+ byte[] compressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/offset-before-start.zst"));
+ byte[] output = new byte[compressed.length * 10];
+
+ assertThatThrownBy(() -> getDecompressor().decompress(compressed, 0, compressed.length, output, 0, output.length))
+ .isInstanceOf(MalformedInputException.class)
+ .hasMessageStartingWith("Input is corrupted: offset=894");
+ }
+
+ @Test
+ public void testLargeRle()
+ throws IOException
+ {
+ // Dataset that produces an RLE block with 3-byte header
+
+ Compressor compressor = getCompressor();
+
+ byte[] original = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/large-rle"));
+ int maxCompressLength = compressor.maxCompressedLength(original.length);
+
+ byte[] compressed = new byte[maxCompressLength];
+ int compressedSize = compressor.compress(original, 0, original.length, compressed, 0, compressed.length);
+
+ byte[] decompressed = new byte[original.length];
+ int decompressedSize = getDecompressor().decompress(compressed, 0, compressedSize, decompressed, 0, decompressed.length);
+
+ assertByteArraysEqual(original, 0, original.length, decompressed, 0, decompressedSize);
+ }
+
+ @Test
+ public void testIncompressibleData()
+ throws IOException
+ {
+ // Incompressible data that would require more than maxCompressedLength(...) to store
+
+ Compressor compressor = getCompressor();
+
+ byte[] original = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/incompressible"));
+ int maxCompressLength = compressor.maxCompressedLength(original.length);
+
+ byte[] compressed = new byte[maxCompressLength];
+ int compressedSize = compressor.compress(original, 0, original.length, compressed, 0, compressed.length);
+
+ byte[] decompressed = new byte[original.length];
+ int decompressedSize = getDecompressor().decompress(compressed, 0, compressedSize, decompressed, 0, decompressed.length);
+
+ assertByteArraysEqual(original, 0, original.length, decompressed, 0, decompressedSize);
+ }
+
+ @Test
+ public void testVerifyMagicInAllFrames()
+ throws IOException
+ {
+ Compressor compressor = getCompressor();
+ byte[] compressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/bad-second-frame.zst"));
+ byte[] uncompressed = Resources.toByteArray(getClass().getClassLoader().getResource("data/zstd/multiple-frames"));
+ byte[] output = new byte[uncompressed.length];
+ assertThatThrownBy(() -> getDecompressor().decompress(compressed, 0, compressed.length, output, 0, output.length))
+ .isInstanceOf(MalformedInputException.class)
+ .hasMessageStartingWith("Invalid magic prefix");
+ }
+}
diff --git a/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/ZstdCat.java b/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/ZstdCat.java
new file mode 100644
index 00000000000..6fc1a223a6e
--- /dev/null
+++ b/airlift-zstd/src/test/java/ai/vespa/airlift/zstd/ZstdCat.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.vespa.airlift.zstd;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/*
+ * Simple test implementation of "zstdcat".
+ * @author arnej27959
+ */
+public class ZstdCat
+{
+ private ZstdCat() {}
+
+ public static void main(String[] args)
+ {
+ try {
+ InputStream i = new ZstdInputStream(System.in);
+ byte[] buf = new byte[100 * 1024];
+ int rl = 0;
+ do {
+ rl = i.read(buf);
+ if (rl > 0) {
+ System.out.write(buf, 0, rl);
+ }
+ } while (rl > 0);
+ }
+ catch (IOException e) {
+ System.err.println("IO failed" + e);
+ }
+ }
+}
diff --git a/airlift-zstd/src/test/resources/data/zstd/bad-second-frame.zst b/airlift-zstd/src/test/resources/data/zstd/bad-second-frame.zst
new file mode 100644
index 00000000000..a98549e580d
--- /dev/null
+++ b/airlift-zstd/src/test/resources/data/zstd/bad-second-frame.zst
Binary files differ
diff --git a/airlift-zstd/src/test/resources/data/zstd/incompressible b/airlift-zstd/src/test/resources/data/zstd/incompressible
new file mode 100644
index 00000000000..f1567c4067e
--- /dev/null
+++ b/airlift-zstd/src/test/resources/data/zstd/incompressible
Binary files differ
diff --git a/airlift-zstd/src/test/resources/data/zstd/large-rle b/airlift-zstd/src/test/resources/data/zstd/large-rle
new file mode 100644
index 00000000000..8591058e5f4
--- /dev/null
+++ b/airlift-zstd/src/test/resources/data/zstd/large-rle
@@ -0,0 +1 @@
o newline at end of file
diff --git a/airlift-zstd/src/test/resources/data/zstd/multiple-frames b/airlift-zstd/src/test/resources/data/zstd/multiple-frames
new file mode 100644
index 00000000000..5e018aa9551
--- /dev/null
+++ b/airlift-zstd/src/test/resources/data/zstd/multiple-frames
@@ -0,0 +1,406 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
diff --git a/airlift-zstd/src/test/resources/data/zstd/multiple-frames.zst b/airlift-zstd/src/test/resources/data/zstd/multiple-frames.zst
new file mode 100644
index 00000000000..4a9e6582a36
--- /dev/null
+++ b/airlift-zstd/src/test/resources/data/zstd/multiple-frames.zst
Binary files differ
diff --git a/airlift-zstd/src/test/resources/data/zstd/offset-before-start.zst b/airlift-zstd/src/test/resources/data/zstd/offset-before-start.zst
new file mode 100644
index 00000000000..0928f5f35f1
--- /dev/null
+++ b/airlift-zstd/src/test/resources/data/zstd/offset-before-start.zst
Binary files differ
diff --git a/airlift-zstd/src/test/resources/data/zstd/small-literals-after-incompressible-literals b/airlift-zstd/src/test/resources/data/zstd/small-literals-after-incompressible-literals
new file mode 100644
index 00000000000..51eef21f9d3
--- /dev/null
+++ b/airlift-zstd/src/test/resources/data/zstd/small-literals-after-incompressible-literals
Binary files differ
diff --git a/airlift-zstd/src/test/resources/data/zstd/with-checksum b/airlift-zstd/src/test/resources/data/zstd/with-checksum
new file mode 100644
index 00000000000..6b0b1270ff0
--- /dev/null
+++ b/airlift-zstd/src/test/resources/data/zstd/with-checksum
@@ -0,0 +1,203 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
diff --git a/airlift-zstd/src/test/resources/data/zstd/with-checksum.zst b/airlift-zstd/src/test/resources/data/zstd/with-checksum.zst
new file mode 100644
index 00000000000..885ffb8855e
--- /dev/null
+++ b/airlift-zstd/src/test/resources/data/zstd/with-checksum.zst
Binary files differ