diff options
Diffstat (limited to 'document')
-rw-r--r-- | document/CMakeLists.txt | 1 | ||||
-rw-r--r-- | document/src/tests/serialization/compression_test.cpp | 23 | ||||
-rw-r--r-- | document/src/vespa/document/fieldvalue/serializablearray.cpp | 11 | ||||
-rw-r--r-- | document/src/vespa/document/serialization/vespadocumentserializer.cpp | 1 | ||||
-rw-r--r-- | document/src/vespa/document/util/CMakeLists.txt | 6 | ||||
-rw-r--r-- | document/src/vespa/document/util/compressionconfig.h | 17 | ||||
-rw-r--r-- | document/src/vespa/document/util/compressor.cpp | 72 | ||||
-rw-r--r-- | document/src/vespa/document/util/compressor.h | 22 | ||||
-rw-r--r-- | document/src/vespa/document/util/lz4compressor.cpp | 46 | ||||
-rw-r--r-- | document/src/vespa/document/util/lz4compressor.h | 17 | ||||
-rw-r--r-- | document/src/vespa/document/util/zstdcompressor.cpp | 34 | ||||
-rw-r--r-- | document/src/vespa/document/util/zstdcompressor.h | 17 |
12 files changed, 192 insertions, 75 deletions
diff --git a/document/CMakeLists.txt b/document/CMakeLists.txt index 4e8e6a0aab2..e9694390b4b 100644 --- a/document/CMakeLists.txt +++ b/document/CMakeLists.txt @@ -10,6 +10,7 @@ vespa_define_module( EXTERNAL_DEPENDS lz4 + zstd LIBS src/vespa/document diff --git a/document/src/tests/serialization/compression_test.cpp b/document/src/tests/serialization/compression_test.cpp index 6b95f5e933f..6574b4ac34c 100644 --- a/document/src/tests/serialization/compression_test.cpp +++ b/document/src/tests/serialization/compression_test.cpp @@ -1,27 +1,44 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Unit tests for annotation serialization. -#include <vespa/log/log.h> #include <vespa/vespalib/testkit/test_kit.h> #include <vespa/vespalib/stllike/string.h> #include <vespa/document/util/compressor.h> +#include <vespa/vespalib/data/databuffer.h> +#include <vespa/log/log.h> LOG_SETUP("compression_test"); using namespace document; +using namespace document::compression; using namespace vespalib; static vespalib::string _G_compressableText("AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" - "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"); + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "XYZABCDEFGHIJGJMNOPQRSTUVW" + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "XYZABCDEFGHIJGJMNOPQRSTUVW"); TEST("requireThatLZ4CompressFine") { CompressionConfig cfg(CompressionConfig::Type::LZ4); ConstBufferRef ref(_G_compressableText.c_str(), _G_compressableText.size()); DataBuffer compressed; EXPECT_EQUAL(CompressionConfig::Type::LZ4, compress(cfg, ref, compressed, false)); + EXPECT_EQUAL(66u, compressed.getDataLen()); +} + +TEST("requireThatZStdCompressFine") { + CompressionConfig cfg(CompressionConfig::Type::ZSTD); + ConstBufferRef ref(_G_compressableText.c_str(), _G_compressableText.size()); + DataBuffer compressed; + EXPECT_EQUAL(CompressionConfig::Type::ZSTD, compress(cfg, ref, compressed, false)); + EXPECT_EQUAL(64u, compressed.getDataLen()); } TEST_MAIN() { diff --git a/document/src/vespa/document/fieldvalue/serializablearray.cpp b/document/src/vespa/document/fieldvalue/serializablearray.cpp index 6e83e42752f..ac1cabc82c0 100644 --- a/document/src/vespa/document/fieldvalue/serializablearray.cpp +++ b/document/src/vespa/document/fieldvalue/serializablearray.cpp @@ -2,14 +2,15 @@ #include "serializablearray.h" #include <vespa/document/util/serializableexceptions.h> #include <vespa/document/util/bytebuffer.h> +#include <vespa/document/util/compressor.h> #include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/data/databuffer.h> -#include <vespa/document/util/compressor.h> #include <vespa/log/log.h> LOG_SETUP(".document.serializable-array"); using std::vector; +using vespalib::make_string; namespace document { @@ -179,6 +180,7 @@ SerializableArray::clear(int id) void SerializableArray::deCompress() // throw (DeserializeException) { + using document::compression::decompress; // will only do this once LOG_ASSERT(_compSerData); @@ -201,15 +203,14 @@ SerializableArray::deCompress() // throw (DeserializeException) false); } catch (const std::runtime_error & e) { throw DeserializeException( - vespalib::make_string( "Document was compressed with code unknown code %d", _serializedCompression), + make_string( "Document was compressed with code unknown code %d", _serializedCompression), VESPA_STRLOC); } if (unCompressed.getDataLen() != (size_t)_uncompressedLength) { throw DeserializeException( - vespalib::make_string( - "Did not decompress to the expected length: had %" PRIu64 ", wanted %d, got %" PRIu64, - _compSerData->getRemaining(), _uncompressedLength, unCompressed.getDataLen()), + make_string("Did not decompress to the expected length: had %zu, wanted %d, got %zu", + _compSerData->getRemaining(), _uncompressedLength, unCompressed.getDataLen()), VESPA_STRLOC); } assert(newSerialization->getBuffer() == unCompressed.getData()); diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.cpp b/document/src/vespa/document/serialization/vespadocumentserializer.cpp index ed4ea31a5f2..1810b9fb631 100644 --- a/document/src/vespa/document/serialization/vespadocumentserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentserializer.cpp @@ -263,6 +263,7 @@ vespalib::ConstBufferRef compressStream(const CompressionConfig &config, nbostream &stream, vespalib::DataBuffer & compressed_data) { + using compression::compress; vespalib::ConstBufferRef buf(stream.c_str(), stream.size()); if (config.useCompression() && bigEnough(stream.size(), config)) { CompressionConfig::Type compressedType = compress(config, vespalib::ConstBufferRef(stream.c_str(), stream.size()), compressed_data, false); diff --git a/document/src/vespa/document/util/CMakeLists.txt b/document/src/vespa/document/util/CMakeLists.txt index dbb67d6a913..2179b1307d3 100644 --- a/document/src/vespa/document/util/CMakeLists.txt +++ b/document/src/vespa/document/util/CMakeLists.txt @@ -1,11 +1,13 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(document_util OBJECT SOURCES - stringutil.cpp bytebuffer.cpp - serializable.cpp compressor.cpp + lz4compressor.cpp + zstdcompressor.cpp printable.cpp + serializable.cpp + stringutil.cpp DEPENDS AFTER document_documentconfig diff --git a/document/src/vespa/document/util/compressionconfig.h b/document/src/vespa/document/util/compressionconfig.h index fa20bc38578..413bdc1fb3b 100644 --- a/document/src/vespa/document/util/compressionconfig.h +++ b/document/src/vespa/document/util/compressionconfig.h @@ -7,16 +7,16 @@ namespace document { - struct CompressionConfig { enum Type { - NONE = 0, - HISTORIC_1 = 1, - HISTORIC_2 = 2, - HISTORIC_3 = 3, - HISTORIC_4 = 4, - UNCOMPRESSABLE = 5, - LZ4 = 6 + NONE = 0, + HISTORIC_1 = 1, + HISTORIC_2 = 2, + HISTORIC_3 = 3, + HISTORIC_4 = 4, + UNCOMPRESSABLE = 5, + LZ4 = 6, + ZSTD = 7 }; CompressionConfig() @@ -47,6 +47,7 @@ struct CompressionConfig { case 4: return HISTORIC_4; case 5: return UNCOMPRESSABLE; case 6: return LZ4; + case 7: return ZSTD; default: return NONE; } } diff --git a/document/src/vespa/document/util/compressor.cpp b/document/src/vespa/document/util/compressor.cpp index 02a2029b3eb..cd45017dd69 100644 --- a/document/src/vespa/document/util/compressor.cpp +++ b/document/src/vespa/document/util/compressor.cpp @@ -1,56 +1,17 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "compressor.h" + +#include "lz4compressor.h" +#include "zstdcompressor.h" #include <vespa/vespalib/util/memory.h> #include <vespa/vespalib/util/stringfmt.h> -#include <stdexcept> -#include <lz4.h> -#include <lz4hc.h> +#include <vespa/vespalib/data/databuffer.h> using vespalib::alloc::Alloc; using vespalib::ConstBufferRef; using vespalib::DataBuffer; using vespalib::make_string; -namespace document -{ - -size_t LZ4Compressor::adjustProcessLen(uint16_t, size_t len) const { return LZ4_compressBound(len); } -size_t LZ4Compressor::adjustUnProcessLen(uint16_t, size_t len) const { return len; } - -bool -LZ4Compressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) -{ - const char * input(static_cast<const char *>(inputV)); - char * output(static_cast<char *>(outputV)); - int sz(-1); - int maxOutputLen = LZ4_compressBound(inputLen); - if (config.compressionLevel > 6) { - Alloc state = Alloc::alloc(LZ4_sizeofStateHC()); - sz = LZ4_compress_HC_extStateHC(state.get(), input, output, inputLen, maxOutputLen, config.compressionLevel); - } else { - Alloc state = Alloc::alloc(LZ4_sizeofState()); - sz = LZ4_compress_fast_extState(state.get(), input, output, inputLen, maxOutputLen, 1); - } - if (sz != 0) { - outputLenV = sz; - } - assert(sz != 0); - return (sz != 0); - -} - -bool -LZ4Compressor::unprocess(const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) -{ - const char * input(static_cast<const char *>(inputV)); - char * output(static_cast<char *>(outputV)); - int sz = LZ4_decompress_safe(input, output, inputLen, outputLenV); - if (sz > 0) { - outputLenV = sz; - } - assert(sz > 0); - return (sz > 0); -} +namespace document::compression { CompressionConfig::Type compress(ICompressor & compressor, const CompressionConfig & compression, const ConstBufferRef & org, DataBuffer & dest) @@ -78,6 +39,12 @@ docompress(const CompressionConfig & compression, const ConstBufferRef & org, Da type = compress(lz4, compression, org, dest); } break; + case CompressionConfig::ZSTD: + { + ZStdCompressor zstd; + type = compress(zstd, compression, org, dest); + } + break; case CompressionConfig::NONE: default: break; @@ -138,6 +105,12 @@ decompress(const CompressionConfig::Type & type, size_t uncompressedLen, const C decompress(lz4, uncompressedLen, org, dest, allowSwap); } break; + case CompressionConfig::ZSTD: + { + ZStdCompressor zstd; + decompress(zstd, uncompressedLen, org, dest, allowSwap); + } + break; case CompressionConfig::NONE: case CompressionConfig::UNCOMPRESSABLE: if (allowSwap) { @@ -154,4 +127,15 @@ decompress(const CompressionConfig::Type & type, size_t uncompressedLen, const C } } +size_t computeMaxCompressedsize(CompressionConfig::Type type, size_t payloadSize) { + if (type == CompressionConfig::LZ4) { + document::LZ4Compressor lz4; + return lz4.adjustProcessLen(0, payloadSize); + } else if (type == CompressionConfig::ZSTD) { + document::ZStdCompressor zstd; + return zstd.adjustProcessLen(0, payloadSize); + } + return payloadSize; +} + } diff --git a/document/src/vespa/document/util/compressor.h b/document/src/vespa/document/util/compressor.h index 1b857d050ac..a8d4803e038 100644 --- a/document/src/vespa/document/util/compressor.h +++ b/document/src/vespa/document/util/compressor.h @@ -1,12 +1,12 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include <vespa/document/util/compressionconfig.h> -#include <vespa/vespalib/data/databuffer.h> +#include "compressionconfig.h" #include <vespa/vespalib/util/buffer.h> -namespace document -{ +namespace vespalib { class DataBuffer; } + +namespace document { class ICompressor { @@ -15,17 +15,9 @@ public: virtual bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) = 0; virtual bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) = 0; virtual size_t adjustProcessLen(uint16_t options, size_t len) const = 0; - virtual size_t adjustUnProcessLen(uint16_t options, size_t len) const = 0; }; -class LZ4Compressor : public ICompressor -{ -public: - bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override; - bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override; - size_t adjustProcessLen(uint16_t options, size_t len) const override; - size_t adjustUnProcessLen(uint16_t options, size_t len) const override; -}; +namespace compression { /** * Will try to compress a buffer according to the config. If the criteria can not @@ -51,5 +43,9 @@ CompressionConfig::Type compress(const CompressionConfig & compression, const ve */ void decompress(const CompressionConfig::Type & compression, size_t uncompressedLen, const vespalib::ConstBufferRef & org, vespalib::DataBuffer & dest, bool allowSwap); + +size_t computeMaxCompressedsize(CompressionConfig::Type type, size_t uncompressedSize); + } +} diff --git a/document/src/vespa/document/util/lz4compressor.cpp b/document/src/vespa/document/util/lz4compressor.cpp new file mode 100644 index 00000000000..9b41df239a9 --- /dev/null +++ b/document/src/vespa/document/util/lz4compressor.cpp @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "lz4compressor.h" +#include <vespa/vespalib/util/alloc.h> +#include <lz4.h> +#include <lz4hc.h> +#include <cassert> + +using vespalib::alloc::Alloc; + +namespace document { + +size_t LZ4Compressor::adjustProcessLen(uint16_t, size_t len) const { return LZ4_compressBound(len); } + +bool +LZ4Compressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) +{ + const char * input(static_cast<const char *>(inputV)); + char * output(static_cast<char *>(outputV)); + int sz(-1); + int maxOutputLen = LZ4_compressBound(inputLen); + if (config.compressionLevel > 6) { + Alloc state = Alloc::alloc(LZ4_sizeofStateHC()); + sz = LZ4_compress_HC_extStateHC(state.get(), input, output, inputLen, maxOutputLen, config.compressionLevel); + } else { + Alloc state = Alloc::alloc(LZ4_sizeofState()); + sz = LZ4_compress_fast_extState(state.get(), input, output, inputLen, maxOutputLen, 1); + } + assert(sz != 0); + outputLenV = sz; + return (sz != 0); + +} + +bool +LZ4Compressor::unprocess(const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) +{ + const char * input(static_cast<const char *>(inputV)); + char * output(static_cast<char *>(outputV)); + int sz = LZ4_decompress_safe(input, output, inputLen, outputLenV); + assert(sz > 0); + outputLenV = sz; + return (sz > 0); +} + +} diff --git a/document/src/vespa/document/util/lz4compressor.h b/document/src/vespa/document/util/lz4compressor.h new file mode 100644 index 00000000000..aa1726cd639 --- /dev/null +++ b/document/src/vespa/document/util/lz4compressor.h @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "compressor.h" + +namespace document { + +class LZ4Compressor : public ICompressor +{ +public: + bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override; + bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override; + size_t adjustProcessLen(uint16_t options, size_t len) const override; +}; + +} + diff --git a/document/src/vespa/document/util/zstdcompressor.cpp b/document/src/vespa/document/util/zstdcompressor.cpp new file mode 100644 index 00000000000..bb13d8009c1 --- /dev/null +++ b/document/src/vespa/document/util/zstdcompressor.cpp @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "zstdcompressor.h" +#include <vespa/vespalib/util/alloc.h> +#include <zstd.h> +#include <cassert> + +using vespalib::alloc::Alloc; + +namespace document { + +size_t ZStdCompressor::adjustProcessLen(uint16_t, size_t len) const { return ZSTD_compressBound(len); } + +bool +ZStdCompressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) +{ + size_t maxOutputLen = ZSTD_compressBound(inputLen); + size_t sz = ZSTD_compress(outputV, maxOutputLen, inputV, inputLen, config.compressionLevel); + assert( ! ZSTD_isError(sz) ); + outputLenV = sz; + return ! ZSTD_isError(sz); + +} + +bool +ZStdCompressor::unprocess(const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) +{ + size_t sz = ZSTD_decompress(outputV, outputLenV, inputV, inputLen); + assert( ! ZSTD_isError(sz) ); + outputLenV = sz; + return ! ZSTD_isError(sz); +} + +} diff --git a/document/src/vespa/document/util/zstdcompressor.h b/document/src/vespa/document/util/zstdcompressor.h new file mode 100644 index 00000000000..5c52c144651 --- /dev/null +++ b/document/src/vespa/document/util/zstdcompressor.h @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "compressor.h" + +namespace document { + +class ZStdCompressor : public ICompressor +{ +public: + bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override; + bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override; + size_t adjustProcessLen(uint16_t options, size_t len) const override; +}; + +} + |