From 7a02cf495881c214bab9713249c2e62334eb3179 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Wed, 4 Nov 2020 10:51:33 +0100 Subject: Write content for files in diretory to file Instead of storing data from file reference that is a directory in a byte array (which might be huge and use a lot of memory), store in a temporary file and delete file when calling close() --- .../filedistribution/CompressedFileReference.java | 9 +--- .../filedistribution/EmptyFileReferenceData.java | 55 ++++++++++++++++++++ .../filedistribution/FileReferenceDataBlob.java | 59 ---------------------- .../filedistribution/LazyFileReferenceData.java | 6 ++- .../LazyTemporaryStorageFileReferenceData.java | 28 ++++++++++ .../vespa/filedistribution/FileReceiverTest.java | 5 +- .../filedistribution/FileReferenceDataTest.java | 32 ++++++++++-- 7 files changed, 119 insertions(+), 75 deletions(-) create mode 100644 filedistribution/src/main/java/com/yahoo/vespa/filedistribution/EmptyFileReferenceData.java delete mode 100644 filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceDataBlob.java create mode 100644 filedistribution/src/main/java/com/yahoo/vespa/filedistribution/LazyTemporaryStorageFileReferenceData.java (limited to 'filedistribution/src') diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/CompressedFileReference.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/CompressedFileReference.java index 0ed72076549..c160233dd72 100644 --- a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/CompressedFileReference.java +++ b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/CompressedFileReference.java @@ -1,4 +1,4 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.filedistribution; import com.google.common.io.ByteStreams; @@ -47,13 +47,6 @@ public class CompressedFileReference { .map(Path::toFile).collect(Collectors.toList()), outputFile); } - public static byte[] compress(File directory) throws IOException { - return compress(directory, Files.find(Paths.get(directory.getAbsolutePath()), - recurseDepth, - (p, basicFileAttributes) -> basicFileAttributes.isRegularFile()) - .map(Path::toFile).collect(Collectors.toList())); - } - public static byte[] compress(File baseDir, List inputFiles) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); TarArchiveOutputStream archiveOutputStream = new TarArchiveOutputStream(new GZIPOutputStream(out)); diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/EmptyFileReferenceData.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/EmptyFileReferenceData.java new file mode 100644 index 00000000000..e6e4fdb8598 --- /dev/null +++ b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/EmptyFileReferenceData.java @@ -0,0 +1,55 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.filedistribution; + +import com.yahoo.config.FileReference; + +import java.nio.ByteBuffer; + +public class EmptyFileReferenceData extends FileReferenceData { + + private final byte[] content; + private final long xxhash; + private int contentRead = 0; + + private EmptyFileReferenceData(FileReference fileReference, String filename, Type type, byte[] content, long xxhash) { + super(fileReference, filename, type); + this.content = content; + this.xxhash = xxhash; + } + + public static FileReferenceData empty(FileReference fileReference, String filename) { + return new EmptyFileReferenceData(fileReference, filename, FileReferenceData.Type.file, new byte[0], 0); + } + + public ByteBuffer content() { + return ByteBuffer.wrap(content); + } + + @Override + public int nextContent(ByteBuffer bb) { + if (contentRead >= content.length) { + return -1; + } else { + int left = content.length - contentRead; + int size = Math.min(bb.remaining(), left); + bb.put(content, contentRead, size); + contentRead += size; + return size; + } + } + + @Override + public long xxhash() { + return xxhash; + } + + @Override + public long size() { + return content.length; + } + + @Override + public void close() { + // no-op + } +} diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceDataBlob.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceDataBlob.java deleted file mode 100644 index 1b9b7032698..00000000000 --- a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceDataBlob.java +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.filedistribution; - -import com.yahoo.config.FileReference; -import net.jpountz.xxhash.XXHashFactory; - -import java.nio.ByteBuffer; - -public class FileReferenceDataBlob extends FileReferenceData { - private final byte[] content; - private final long xxhash; - private int contentRead = 0; - - public FileReferenceDataBlob(FileReference fileReference, String filename, Type type, byte[] content) { - this(fileReference, filename, type, content, XXHashFactory.fastestInstance().hash64().hash(ByteBuffer.wrap(content), 0)); - } - - public FileReferenceDataBlob(FileReference fileReference, String filename, Type type, byte[] content, long xxhash) { - super(fileReference, filename, type); - this.content = content; - this.xxhash = xxhash; - } - - public static FileReferenceData empty(FileReference fileReference, String filename) { - return new FileReferenceDataBlob(fileReference, filename, FileReferenceData.Type.file, new byte[0], 0); - } - - public ByteBuffer content() { - return ByteBuffer.wrap(content); - } - - @Override - public int nextContent(ByteBuffer bb) { - if (contentRead >= content.length) { - return -1; - } else { - int left = content.length - contentRead; - int size = Math.min(bb.remaining(), left); - bb.put(content, contentRead, size); - contentRead += size; - return size; - } - } - - @Override - public long xxhash() { - return xxhash; - } - - @Override - public long size() { - return content.length; - } - - @Override - public void close() { - // no-op - } -} diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/LazyFileReferenceData.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/LazyFileReferenceData.java index 0bc8f3b162a..193111c42fb 100644 --- a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/LazyFileReferenceData.java +++ b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/LazyFileReferenceData.java @@ -1,4 +1,4 @@ -// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.filedistribution; import com.yahoo.config.FileReference; @@ -12,9 +12,11 @@ import java.nio.channels.ReadableByteChannel; import java.nio.file.Files; public class LazyFileReferenceData extends FileReferenceData { - private final File file; + + protected final File file; private final ReadableByteChannel channel; private final StreamingXXHash64 hasher; + public LazyFileReferenceData(FileReference fileReference, String filename, Type type, File file) throws IOException { super(fileReference, filename, type); this.file = file; diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/LazyTemporaryStorageFileReferenceData.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/LazyTemporaryStorageFileReferenceData.java new file mode 100644 index 00000000000..3fb33182c41 --- /dev/null +++ b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/LazyTemporaryStorageFileReferenceData.java @@ -0,0 +1,28 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.filedistribution; + +import com.yahoo.config.FileReference; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; + +/** + * File reference data stored in a temporary file that will be deleted when {@link #close()} is called. + */ +public class LazyTemporaryStorageFileReferenceData extends LazyFileReferenceData { + + public LazyTemporaryStorageFileReferenceData(FileReference fileReference, String filename, Type type, File file) throws IOException { + super(fileReference, filename, type, file); + } + + public void close() { + try { + super.close(); + Files.delete(file.toPath()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReceiverTest.java b/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReceiverTest.java index 8dda0bcce66..a9ddff655e3 100644 --- a/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReceiverTest.java +++ b/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReceiverTest.java @@ -59,8 +59,9 @@ public class FileReceiverTest { writerB.write("2"); writerB.close(); - byte[] data = CompressedFileReference.compress(dirWithFiles); - transferCompressedData(new FileReference("ref"), "a", data); + File tempFile = temporaryFolder.newFile(); + File file = CompressedFileReference.compress(dirWithFiles, tempFile); + transferCompressedData(new FileReference("ref"), "a", IOUtils.readFileBytes(file)); File downloadDir = new File(root, "ref"); assertEquals("1", IOUtils.readFile(new File(downloadDir, "a"))); assertEquals("2", IOUtils.readFile(new File(downloadDir, "b"))); diff --git a/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReferenceDataTest.java b/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReferenceDataTest.java index f12935609b9..d1dac1131e2 100644 --- a/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReferenceDataTest.java +++ b/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReferenceDataTest.java @@ -2,34 +2,49 @@ package com.yahoo.vespa.filedistribution; import com.yahoo.config.FileReference; +import com.yahoo.io.IOUtils; import com.yahoo.text.Utf8; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import java.io.File; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; public class FileReferenceDataTest { + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + @Test - public void testDataBlob() { + public void testFileReferenceDataWithTempFile() throws IOException { String content = "blob"; + File tempFile = writeTempFile(content); FileReferenceData fileReferenceData = - new FileReferenceDataBlob(new FileReference("ref"), "foo", FileReferenceData.Type.compressed, Utf8.toBytes(content)); + new LazyTemporaryStorageFileReferenceData(new FileReference("ref"), "foo", FileReferenceData.Type.compressed, tempFile); ByteBuffer byteBuffer = ByteBuffer.allocate(100); assertEquals(4, fileReferenceData.nextContent(byteBuffer)); assertEquals(content, Utf8.toString(Arrays.copyOfRange(byteBuffer.array(), 0, 4))); // nextContent() will always return everything for FileReferenceDataBlob, so nothing more should be read assertEquals(-1, fileReferenceData.nextContent(byteBuffer)); + assertTrue(tempFile.exists()); + fileReferenceData.close(); + assertFalse(tempFile.exists()); // temp file should be removed when closing LazyTemporaryStorageFileReferenceData } @Test - public void testLargerDataBlob() { + public void testFileReferenceData() throws IOException { String content = "blobbblubbblabb"; + File file = writeTempFile(content); FileReferenceData fileReferenceData = - new FileReferenceDataBlob(new FileReference("ref"), "foo", FileReferenceData.Type.compressed, Utf8.toBytes(content)); + new LazyFileReferenceData(new FileReference("ref"), "foo", FileReferenceData.Type.compressed, file); ByteBuffer byteBuffer = ByteBuffer.allocate(10); assertEquals(10, fileReferenceData.nextContent(byteBuffer)); assertEquals(content.substring(0,10), Utf8.toString(Arrays.copyOfRange(byteBuffer.array(), 0, 10))); @@ -39,6 +54,15 @@ public class FileReferenceDataTest { // nextContent() will always return everything for FileReferenceDataBlob, so nothing more should be read assertEquals(-1, fileReferenceData.nextContent(byteBuffer)); + assertTrue(file.exists()); + fileReferenceData.close(); + assertTrue(file.exists()); // file should not be removed + } + + private File writeTempFile(String content) throws IOException { + File file = temporaryFolder.newFile(); + IOUtils.writeFile(file, Utf8.toBytes(content)); + return file; } } -- cgit v1.2.3