diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2019-04-01 17:30:38 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-01 17:30:38 +0300 |
commit | abc86be7c729451f1e624e7e13d1d20fb765430a (patch) | |
tree | 12f6806dc0106a56d0cea9f2de15752d768c67de /vespalib/src | |
parent | 0aeeb8324605b10f736af77fa32b5e947d08a816 (diff) | |
parent | 43a0ba72f8ea8dbd4b7e4517ca496a8da4632ae1 (diff) |
Merge pull request #8980 from vespa-engine/havardpe/improved-compression-api
added simplified api for compression/decompression
Diffstat (limited to 'vespalib/src')
-rw-r--r-- | vespalib/src/tests/compression/compression_test.cpp | 27 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/compressor.cpp | 82 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/compressor.h | 39 |
3 files changed, 148 insertions, 0 deletions
diff --git a/vespalib/src/tests/compression/compression_test.cpp b/vespalib/src/tests/compression/compression_test.cpp index 01cfe0af223..ebd02d13875 100644 --- a/vespalib/src/tests/compression/compression_test.cpp +++ b/vespalib/src/tests/compression/compression_test.cpp @@ -40,6 +40,33 @@ TEST("requireThatZStdCompressFine") { EXPECT_EQUAL(64u, compressed.getDataLen()); } +TEST("require that no compression/decompression works") { + CompressionConfig cfg(CompressionConfig::Type::NONE); + Compress compress(cfg, _G_compressableText.c_str(), _G_compressableText.size()); + EXPECT_EQUAL(CompressionConfig::Type::NONE, compress.type()); + EXPECT_EQUAL(1072u, compress.size()); + Decompress decompress(compress.type(), _G_compressableText.size(), compress.data(), compress.size()); + EXPECT_EQUAL(_G_compressableText, vespalib::string(decompress.data(), decompress.size())); +} + +TEST("require that lz4 compression/decompression works") { + CompressionConfig cfg(CompressionConfig::Type::LZ4); + Compress compress(cfg, _G_compressableText.c_str(), _G_compressableText.size()); + EXPECT_EQUAL(CompressionConfig::Type::LZ4, compress.type()); + EXPECT_EQUAL(66u, compress.size()); + Decompress decompress(compress.type(), _G_compressableText.size(), compress.data(), compress.size()); + EXPECT_EQUAL(_G_compressableText, vespalib::string(decompress.data(), decompress.size())); +} + +TEST("requiret that zstd compression/decompression works") { + CompressionConfig cfg(CompressionConfig::Type::ZSTD); + Compress compress(cfg, _G_compressableText.c_str(), _G_compressableText.size()); + EXPECT_EQUAL(CompressionConfig::Type::ZSTD, compress.type()); + EXPECT_EQUAL(64u, compress.size()); + Decompress decompress(compress.type(), _G_compressableText.size(), compress.data(), compress.size()); + EXPECT_EQUAL(_G_compressableText, vespalib::string(decompress.data(), decompress.size())); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/vespalib/src/vespa/vespalib/util/compressor.cpp b/vespalib/src/vespa/vespalib/util/compressor.cpp index efa61197739..8dfdac5ecc7 100644 --- a/vespalib/src/vespa/vespalib/util/compressor.cpp +++ b/vespalib/src/vespa/vespalib/util/compressor.cpp @@ -10,6 +10,35 @@ using vespalib::alloc::Alloc; namespace vespalib::compression { +//----------------------------------------------------------------------------- + +namespace { + +template <typename F> +void with_compressor(const CompressionConfig::Type &type, F &&f) { + switch (type) { + case CompressionConfig::LZ4: + { + LZ4Compressor lz4; + f(lz4); + } + break; + case CompressionConfig::ZSTD: + { + ZStdCompressor zstd; + f(zstd); + } + break; + default: + throw std::runtime_error(make_string("No implementation for compression type '%d'", type)); + break; + } +} + +} + +//----------------------------------------------------------------------------- + CompressionConfig::Type compress(ICompressor & compressor, const CompressionConfig & compression, const ConstBufferRef & org, DataBuffer & dest) { @@ -135,4 +164,57 @@ size_t computeMaxCompressedsize(CompressionConfig::Type type, size_t payloadSize return payloadSize; } +//----------------------------------------------------------------------------- + +Compress::Compress(const CompressionConfig &config, + const char *uncompressed_data, size_t uncompressed_size) + : _space(), + _type(CompressionConfig::NONE), + _data(uncompressed_data), + _size(uncompressed_size) +{ + if (config.useCompression() && (uncompressed_size >= config.minSize)) { + with_compressor(config.type, [this, &config, uncompressed_data, uncompressed_size](ICompressor &compressor) + { + size_t compressed_size = compressor.adjustProcessLen(0, uncompressed_size); + _space = alloc::Alloc::allocHeap(compressed_size); + if (compressor.process(config, uncompressed_data, uncompressed_size, _space.get(), compressed_size) && + (compressed_size < ((uncompressed_size * config.threshold)/100))) + { + _type = config.type; + _data = static_cast<char *>(_space.get()); + _size = compressed_size; + } else { + _space = alloc::Alloc::allocHeap(); + } + }); + } +} + +Decompress::Decompress(const CompressionConfig::Type &type, size_t uncompressed_size, + const char *compressed_data, size_t compressed_size) + : _space(), + _data(compressed_data), + _size(compressed_size) +{ + if (CompressionConfig::isCompressed(type)) { + with_compressor(type, [this, uncompressed_size, compressed_data, compressed_size](ICompressor &compressor) + { + _space = alloc::Alloc::allocHeap(uncompressed_size); + size_t produced_size = uncompressed_size; + if (compressor.unprocess(compressed_data, compressed_size, _space.get(), produced_size) && + (uncompressed_size == produced_size)) + { + _data = static_cast<char *>(_space.get()); + _size = uncompressed_size; + } else { + throw std::runtime_error(make_string("unprocess failed; had %zu, wanted %zu, got %zu", + compressed_size, uncompressed_size, produced_size)); + } + }); + } +} + +//----------------------------------------------------------------------------- + } diff --git a/vespalib/src/vespa/vespalib/util/compressor.h b/vespalib/src/vespa/vespalib/util/compressor.h index 8f319a6735d..106b87c4c90 100644 --- a/vespalib/src/vespa/vespalib/util/compressor.h +++ b/vespalib/src/vespa/vespalib/util/compressor.h @@ -3,6 +3,7 @@ #include "compressionconfig.h" #include "buffer.h" +#include "alloc.h" namespace vespalib { class DataBuffer; } @@ -43,4 +44,42 @@ void decompress(const CompressionConfig::Type & compression, size_t uncompressed size_t computeMaxCompressedsize(CompressionConfig::Type type, size_t uncompressedSize); +//----------------------------------------------------------------------------- + +/** + * Simple utility used to compress data according to a compression + * configuration. + **/ +class Compress { +private: + alloc::Alloc _space; + CompressionConfig::Type _type; + const char *_data; + size_t _size; +public: + Compress(const CompressionConfig &config, + const char *uncompressed_data, size_t uncompressed_size); + const CompressionConfig::Type &type() const { return _type; } + const char *data() const { return _data; } + size_t size() const { return _size; } +}; + +/** + * Simple utility used to decompress data using additional information + * about compression type and uncompressed size. + **/ +class Decompress { +private: + alloc::Alloc _space; + const char *_data; + size_t _size; +public: + Decompress(const CompressionConfig::Type &type, size_t uncompressed_size, + const char *compressed_data, size_t compressed_size); + const char *data() const { return _data; } + size_t size() const { return _size; } +}; + +//----------------------------------------------------------------------------- + } |