From b37ec2bf0340fc6ef42b4a9a1fd7cb9d70a46398 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 12 Jun 2017 12:17:40 +0200 Subject: Split out the explicit compressors. --- .../src/tests/serialization/compression_test.cpp | 1 + document/src/vespa/document/util/CMakeLists.txt | 5 ++- document/src/vespa/document/util/compressor.cpp | 47 ++----------------- document/src/vespa/document/util/compressor.h | 18 ++------ document/src/vespa/document/util/lz4compressor.cpp | 52 ++++++++++++++++++++++ document/src/vespa/document/util/lz4compressor.h | 18 ++++++++ 6 files changed, 82 insertions(+), 59 deletions(-) create mode 100644 document/src/vespa/document/util/lz4compressor.cpp create mode 100644 document/src/vespa/document/util/lz4compressor.h (limited to 'document') diff --git a/document/src/tests/serialization/compression_test.cpp b/document/src/tests/serialization/compression_test.cpp index b41dfad7b36..e5464100673 100644 --- a/document/src/tests/serialization/compression_test.cpp +++ b/document/src/tests/serialization/compression_test.cpp @@ -5,6 +5,7 @@ #include #include #include +#include LOG_SETUP("compression_test"); diff --git a/document/src/vespa/document/util/CMakeLists.txt b/document/src/vespa/document/util/CMakeLists.txt index d8fe0d833ff..8d1275d0207 100644 --- a/document/src/vespa/document/util/CMakeLists.txt +++ b/document/src/vespa/document/util/CMakeLists.txt @@ -1,11 +1,12 @@ # Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(document_util OBJECT SOURCES - stringutil.cpp bytebuffer.cpp - serializable.cpp compressor.cpp + lz4compressor.cpp printable.cpp + serializable.cpp + stringutil.cpp DEPENDS AFTER document_documentconfig diff --git a/document/src/vespa/document/util/compressor.cpp b/document/src/vespa/document/util/compressor.cpp index 0783bb28a60..d29f1db71b2 100644 --- a/document/src/vespa/document/util/compressor.cpp +++ b/document/src/vespa/document/util/compressor.cpp @@ -1,56 +1,17 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "compressor.h" + +#include "lz4compressor.h" #include #include +#include #include -#include -#include using vespalib::alloc::Alloc; using vespalib::ConstBufferRef; using vespalib::DataBuffer; using vespalib::make_string; -namespace document -{ - -size_t LZ4Compressor::adjustProcessLen(uint16_t, size_t len) const { return LZ4_compressBound(len); } -size_t LZ4Compressor::adjustUnProcessLen(uint16_t, size_t len) const { return len; } - -bool -LZ4Compressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) -{ - const char * input(static_cast(inputV)); - char * output(static_cast(outputV)); - int sz(-1); - int maxOutputLen = LZ4_compressBound(inputLen); - if (config.compressionLevel > 6) { - Alloc state = Alloc::alloc(LZ4_sizeofStateHC()); - sz = LZ4_compress_HC_extStateHC(state.get(), input, output, inputLen, maxOutputLen, config.compressionLevel); - } else { - Alloc state = Alloc::alloc(LZ4_sizeofState()); - sz = LZ4_compress_fast_extState(state.get(), input, output, inputLen, maxOutputLen, 1); - } - if (sz != 0) { - outputLenV = sz; - } - assert(sz != 0); - return (sz != 0); - -} - -bool -LZ4Compressor::unprocess(const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) -{ - const char * input(static_cast(inputV)); - char * output(static_cast(outputV)); - int sz = LZ4_decompress_safe(input, output, inputLen, outputLenV); - if (sz > 0) { - outputLenV = sz; - } - assert(sz > 0); - return (sz > 0); -} +namespace document { CompressionConfig::Type compress(ICompressor & compressor, const CompressionConfig & compression, const ConstBufferRef & org, DataBuffer & dest) diff --git a/document/src/vespa/document/util/compressor.h b/document/src/vespa/document/util/compressor.h index 599d9424889..31e265415b7 100644 --- a/document/src/vespa/document/util/compressor.h +++ b/document/src/vespa/document/util/compressor.h @@ -1,12 +1,12 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include -#include +#include "compressionconfig.h" #include -namespace document -{ +namespace vespalib { class DataBuffer; } + +namespace document { class ICompressor { @@ -18,15 +18,6 @@ public: virtual size_t adjustUnProcessLen(uint16_t options, size_t len) const = 0; }; -class LZ4Compressor : public ICompressor -{ -public: - bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override; - bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override; - size_t adjustProcessLen(uint16_t options, size_t len) const override; - size_t adjustUnProcessLen(uint16_t options, size_t len) const override; -}; - /** * Will try to compress a buffer according to the config. If the criteria can not * be met it will return NONE and dest will get the input buffer. @@ -52,4 +43,3 @@ CompressionConfig::Type compress(const CompressionConfig & compression, const ve void decompress(const CompressionConfig::Type & compression, size_t uncompressedLen, const vespalib::ConstBufferRef & org, vespalib::DataBuffer & dest, bool allowSwap); } - diff --git a/document/src/vespa/document/util/lz4compressor.cpp b/document/src/vespa/document/util/lz4compressor.cpp new file mode 100644 index 00000000000..cb2d41269b0 --- /dev/null +++ b/document/src/vespa/document/util/lz4compressor.cpp @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "lz4compressor.h" +#include +#include +#include +#include + +using vespalib::alloc::Alloc; + + +namespace document { + +size_t LZ4Compressor::adjustProcessLen(uint16_t, size_t len) const { return LZ4_compressBound(len); } +size_t LZ4Compressor::adjustUnProcessLen(uint16_t, size_t len) const { return len; } + +bool +LZ4Compressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) +{ + const char * input(static_cast(inputV)); + char * output(static_cast(outputV)); + int sz(-1); + int maxOutputLen = LZ4_compressBound(inputLen); + if (config.compressionLevel > 6) { + Alloc state = Alloc::alloc(LZ4_sizeofStateHC()); + sz = LZ4_compress_HC_extStateHC(state.get(), input, output, inputLen, maxOutputLen, config.compressionLevel); + } else { + Alloc state = Alloc::alloc(LZ4_sizeofState()); + sz = LZ4_compress_fast_extState(state.get(), input, output, inputLen, maxOutputLen, 1); + } + if (sz != 0) { + outputLenV = sz; + } + assert(sz != 0); + return (sz != 0); + +} + +bool +LZ4Compressor::unprocess(const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) +{ + const char * input(static_cast(inputV)); + char * output(static_cast(outputV)); + int sz = LZ4_decompress_safe(input, output, inputLen, outputLenV); + if (sz > 0) { + outputLenV = sz; + } + assert(sz > 0); + return (sz > 0); +} + +} diff --git a/document/src/vespa/document/util/lz4compressor.h b/document/src/vespa/document/util/lz4compressor.h new file mode 100644 index 00000000000..0c3eee82402 --- /dev/null +++ b/document/src/vespa/document/util/lz4compressor.h @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "compressor.h" + +namespace document { + +class LZ4Compressor : public ICompressor +{ +public: + bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override; + bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override; + size_t adjustProcessLen(uint16_t options, size_t len) const override; + size_t adjustUnProcessLen(uint16_t options, size_t len) const override; +}; + +} + -- cgit v1.2.3 From 62fbb0e42a634da08bb330fcb52aa87a6af9a17d Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 12 Jun 2017 12:50:51 +0200 Subject: Add the std compressor. --- document/CMakeLists.txt | 1 + document/src/vespa/document/util/CMakeLists.txt | 1 + .../src/vespa/document/util/zstdcompressor.cpp | 35 ++++++++++++++++++++++ document/src/vespa/document/util/zstdcompressor.h | 18 +++++++++++ 4 files changed, 55 insertions(+) create mode 100644 document/src/vespa/document/util/zstdcompressor.cpp create mode 100644 document/src/vespa/document/util/zstdcompressor.h (limited to 'document') diff --git a/document/CMakeLists.txt b/document/CMakeLists.txt index c0089c2bed8..cc4784c1ee2 100644 --- a/document/CMakeLists.txt +++ b/document/CMakeLists.txt @@ -10,6 +10,7 @@ vespa_define_module( EXTERNAL_DEPENDS lz4 + zstd LIBS src/vespa/document diff --git a/document/src/vespa/document/util/CMakeLists.txt b/document/src/vespa/document/util/CMakeLists.txt index 8d1275d0207..ae1f7441062 100644 --- a/document/src/vespa/document/util/CMakeLists.txt +++ b/document/src/vespa/document/util/CMakeLists.txt @@ -4,6 +4,7 @@ vespa_add_library(document_util OBJECT bytebuffer.cpp compressor.cpp lz4compressor.cpp + zstdcompressor.cpp printable.cpp serializable.cpp stringutil.cpp diff --git a/document/src/vespa/document/util/zstdcompressor.cpp b/document/src/vespa/document/util/zstdcompressor.cpp new file mode 100644 index 00000000000..78ba15884f5 --- /dev/null +++ b/document/src/vespa/document/util/zstdcompressor.cpp @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "zstdcompressor.h" +#include +#include +#include + +using vespalib::alloc::Alloc; + +namespace document { + +size_t ZStdCompressor::adjustProcessLen(uint16_t, size_t len) const { return ZSTD_compressBound(len); } +size_t ZStdCompressor::adjustUnProcessLen(uint16_t, size_t len) const { return len; } + +bool +ZStdCompressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) +{ + size_t maxOutputLen = ZSTD_compressBound(inputLen); + size_t sz = ZSTD_compress(outputV, maxOutputLen, inputV, inputLen, config.compressionLevel); + assert( ! ZSTD_isError(sz) ); + outputLenV = sz; + return ! ZSTD_isError(sz); + +} + +bool +ZStdCompressor::unprocess(const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) +{ + size_t sz = ZSTD_decompress(outputV, outputLenV, inputV, inputLen); + assert( ! ZSTD_isError(sz) ); + outputLenV = sz; + return ! ZSTD_isError(sz); +} + +} diff --git a/document/src/vespa/document/util/zstdcompressor.h b/document/src/vespa/document/util/zstdcompressor.h new file mode 100644 index 00000000000..61b238e0bfb --- /dev/null +++ b/document/src/vespa/document/util/zstdcompressor.h @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "compressor.h" + +namespace document { + +class ZStdCompressor : public ICompressor +{ +public: + bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override; + bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override; + size_t adjustProcessLen(uint16_t options, size_t len) const override; + size_t adjustUnProcessLen(uint16_t options, size_t len) const override; +}; + +} + -- cgit v1.2.3 From 629e5365f224ca6e90cffd9461dc7b83ed5dc51d Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 12 Jun 2017 21:38:51 +0200 Subject: Add support for Zstandard compression in the document store. --- .../src/vespa/document/util/compressionconfig.h | 17 ++++++------ document/src/vespa/document/util/compressor.cpp | 14 +++++++++- searchcore/src/vespa/searchcore/config/proton.def | 6 ++--- .../proton/docsummary/summarymanager.cpp | 1 + searchlib/src/tests/docstore/chunk/chunk_test.cpp | 30 ++++++++++++++++++++++ .../src/vespa/searchlib/docstore/chunkformat.cpp | 5 ++++ .../src/vespa/vespalib/data/databuffer.h | 6 ++--- vespalib/src/vespa/vespalib/util/optimized.h | 2 +- 8 files changed, 64 insertions(+), 17 deletions(-) (limited to 'document') diff --git a/document/src/vespa/document/util/compressionconfig.h b/document/src/vespa/document/util/compressionconfig.h index 1ed6ec882ec..ff02e35977a 100644 --- a/document/src/vespa/document/util/compressionconfig.h +++ b/document/src/vespa/document/util/compressionconfig.h @@ -7,16 +7,16 @@ namespace document { - struct CompressionConfig { enum Type { - NONE = 0, - HISTORIC_1 = 1, - HISTORIC_2 = 2, - HISTORIC_3 = 3, - HISTORIC_4 = 4, - UNCOMPRESSABLE = 5, - LZ4 = 6 + NONE = 0, + HISTORIC_1 = 1, + HISTORIC_2 = 2, + HISTORIC_3 = 3, + HISTORIC_4 = 4, + UNCOMPRESSABLE = 5, + LZ4 = 6, + ZSTD = 7 }; CompressionConfig() @@ -47,6 +47,7 @@ struct CompressionConfig { case 4: return HISTORIC_4; case 5: return UNCOMPRESSABLE; case 6: return LZ4; + case 7: return ZSTD; default: return NONE; } } diff --git a/document/src/vespa/document/util/compressor.cpp b/document/src/vespa/document/util/compressor.cpp index d29f1db71b2..c9fac4cf4b0 100644 --- a/document/src/vespa/document/util/compressor.cpp +++ b/document/src/vespa/document/util/compressor.cpp @@ -1,10 +1,10 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "lz4compressor.h" +#include "zstdcompressor.h" #include #include #include -#include using vespalib::alloc::Alloc; using vespalib::ConstBufferRef; @@ -39,6 +39,12 @@ docompress(const CompressionConfig & compression, const ConstBufferRef & org, Da type = compress(lz4, compression, org, dest); } break; + case CompressionConfig::ZSTD: + { + ZStdCompressor zstd; + type = compress(zstd, compression, org, dest); + } + break; case CompressionConfig::NONE: default: break; @@ -99,6 +105,12 @@ decompress(const CompressionConfig::Type & type, size_t uncompressedLen, const C decompress(lz4, uncompressedLen, org, dest, allowSwap); } break; + case CompressionConfig::ZSTD: + { + ZStdCompressor zstd; + decompress(zstd, uncompressedLen, org, dest, allowSwap); + } + break; case CompressionConfig::NONE: case CompressionConfig::UNCOMPRESSABLE: if (allowSwap) { diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def index c851d124970..8c4e7b4571a 100644 --- a/searchcore/src/vespa/searchcore/config/proton.def +++ b/searchcore/src/vespa/searchcore/config/proton.def @@ -210,21 +210,21 @@ summary.cache.allowvisitcaching bool default=false restart summary.cache.initialentries long default=0 restart ## Control compression type of the summary while in the cache. -summary.cache.compression.type enum {NONE, LZ4} default=LZ4 restart +summary.cache.compression.type enum {NONE, LZ4, ZSTD} default=LZ4 restart ## Control compression level of the summary while in cache. summary.cache.compression.level int default=9 restart ## Control compression type of the summary while in memory during compaction ## NB So far only stragey=LOG honours it. -summary.log.compact.compression.type enum {NONE, LZ4} default=LZ4 restart +summary.log.compact.compression.type enum {NONE, LZ4, ZSTD} default=LZ4 restart ## Control compression level of the summary while in memory during compaction summary.log.compact.compression.level int default=9 restart ## Control compression type of the summary ## NB So far only stragey=LOG honours it. -summary.log.chunk.compression.type enum {NONE, LZ4} default=LZ4 restart +summary.log.chunk.compression.type enum {NONE, LZ4, ZSTD} default=LZ4 restart ## Control compression level of the summary summary.log.chunk.compression.level int default=9 restart diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp index 0253e943883..ae4b52b56fb 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp @@ -117,6 +117,7 @@ deriveCompression(const T & config) { document::CompressionConfig compression; if (config.type == T::LZ4) { compression.type = document::CompressionConfig::LZ4; + } else if (config.type == T::ZSTD) { } compression.compressionLevel = config.level; return compression; diff --git a/searchlib/src/tests/docstore/chunk/chunk_test.cpp b/searchlib/src/tests/docstore/chunk/chunk_test.cpp index 8b08fb278e3..9be11dba84b 100644 --- a/searchlib/src/tests/docstore/chunk/chunk_test.cpp +++ b/searchlib/src/tests/docstore/chunk/chunk_test.cpp @@ -68,4 +68,34 @@ TEST("require that Chunk formats does not change between releases") testChunkFormat(v2, 34, "34 015BA32DE7000000220000000010ABCDEF987654321000000000000000074D000694"); } +constexpr const char * MY_LONG_STRING = "This is medium long string that hopefully will compress to something where lz4, zstandard and none" +" will make a difference. The intentions is to verify that we trigger all compresssions possible and are able to decompress them too." +" I guess that we need a considerable length in order to get the rather inefficient lz4 compression triger. ZStandard compression" +" should trigger a lot earlier"; + +void verifyChunkCompression(CompressionConfig::Type cfgType, const void * buf, size_t sz, size_t expectedLen) { + uint64_t MAGIC_CONTENT(0xabcdef9876543210); + ChunkFormatV2 chunk(10); + chunk.getBuffer() << MAGIC_CONTENT; + chunk.getBuffer().write(buf, sz); + vespalib::DataBuffer buffer; + CompressionConfig cfg(cfgType); + chunk.pack(7, buffer, cfg); + EXPECT_EQUAL(expectedLen, buffer.getDataLen()); + vespalib::nbostream is(buffer.getData(), buffer.getDataLen()); + ChunkFormat::UP deserialized = ChunkFormat::deserialize(buffer.getData(), buffer.getDataLen(), false); + uint64_t magic(0); + deserialized->getBuffer() >> magic; + EXPECT_EQUAL(MAGIC_CONTENT, magic); + std::vector v(sz); + deserialized->getBuffer().read(&v[0], sz); + EXPECT_EQUAL(0, memcmp(buf, &v[0], sz)); +} + +TEST("require that V2 can create and handle lz4, zstd, and none") { + verifyChunkCompression(CompressionConfig::NONE, MY_LONG_STRING, strlen(MY_LONG_STRING), 421); + verifyChunkCompression(CompressionConfig::LZ4, MY_LONG_STRING, strlen(MY_LONG_STRING), 360); + verifyChunkCompression(CompressionConfig::ZSTD, MY_LONG_STRING, strlen(MY_LONG_STRING), 282); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp index 0a77da01785..c690e662136 100644 --- a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp +++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp @@ -2,6 +2,7 @@ #include "chunkformats.h" #include +#include #include namespace search { @@ -53,6 +54,9 @@ ChunkFormat::getMaxPackSize(const document::CompressionConfig & compression) con if (compression.type == document::CompressionConfig::LZ4) { document::LZ4Compressor lz4; rawSize += lz4.adjustProcessLen(0, payloadSize); + } else if (compression.type == document::CompressionConfig::ZSTD) { + document::ZStdCompressor zstd; + rawSize += zstd.adjustProcessLen(0, payloadSize); } else { rawSize += payloadSize; } @@ -63,6 +67,7 @@ void ChunkFormat::verifyCompression(uint8_t type) { if ((type != document::CompressionConfig::LZ4) && + (type != document::CompressionConfig::ZSTD) && (type != document::CompressionConfig::NONE)) { throw ChunkException(make_string("Unknown compressiontype %d", type), VESPA_STRLOC); } diff --git a/staging_vespalib/src/vespa/vespalib/data/databuffer.h b/staging_vespalib/src/vespa/vespalib/data/databuffer.h index a9ed53e2f84..875aa2f0074 100644 --- a/staging_vespalib/src/vespa/vespalib/data/databuffer.h +++ b/staging_vespalib/src/vespa/vespalib/data/databuffer.h @@ -1,10 +1,8 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include -#include -#include -#include +#include +#include #include namespace vespalib { diff --git a/vespalib/src/vespa/vespalib/util/optimized.h b/vespalib/src/vespa/vespalib/util/optimized.h index ec11bbb686a..c5d0404e310 100644 --- a/vespalib/src/vespa/vespalib/util/optimized.h +++ b/vespalib/src/vespa/vespalib/util/optimized.h @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace vespalib { -- cgit v1.2.3 From 024f86caf8031731f24054688a4c783b493eb116 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Tue, 13 Jun 2017 13:46:02 +0200 Subject: Move to separate compression namespace to avoid pollution. --- .../document/fieldvalue/serializablearray.cpp | 11 +-- .../serialization/vespadocumentserializer.cpp | 1 + document/src/vespa/document/util/compressor.cpp | 13 +++- document/src/vespa/document/util/compressor.h | 8 ++- document/src/vespa/document/util/lz4compressor.cpp | 2 - document/src/vespa/document/util/lz4compressor.h | 1 - .../src/vespa/document/util/zstdcompressor.cpp | 1 - document/src/vespa/document/util/zstdcompressor.h | 1 - searchlib/src/vespa/searchlib/common/packets.cpp | 6 +- .../src/vespa/searchlib/docstore/chunkformat.cpp | 33 ++++----- .../src/vespa/searchlib/docstore/documentstore.cpp | 80 ++++++++++------------ .../src/vespa/searchlib/docstore/visitcache.cpp | 6 +- searchlib/src/vespa/searchlib/grouping/sketch.h | 4 +- 13 files changed, 86 insertions(+), 81 deletions(-) (limited to 'document') diff --git a/document/src/vespa/document/fieldvalue/serializablearray.cpp b/document/src/vespa/document/fieldvalue/serializablearray.cpp index c8beea5e79c..0fb85a2aaf2 100644 --- a/document/src/vespa/document/fieldvalue/serializablearray.cpp +++ b/document/src/vespa/document/fieldvalue/serializablearray.cpp @@ -2,14 +2,15 @@ #include "serializablearray.h" #include #include +#include #include #include -#include #include LOG_SETUP(".document.serializable-array"); using std::vector; +using vespalib::make_string; namespace document { @@ -179,6 +180,7 @@ SerializableArray::clear(int id) void SerializableArray::deCompress() // throw (DeserializeException) { + using document::compression::decompress; // will only do this once LOG_ASSERT(_compSerData); @@ -201,15 +203,14 @@ SerializableArray::deCompress() // throw (DeserializeException) false); } catch (const std::runtime_error & e) { throw DeserializeException( - vespalib::make_string( "Document was compressed with code unknown code %d", _serializedCompression), + make_string( "Document was compressed with code unknown code %d", _serializedCompression), VESPA_STRLOC); } if (unCompressed.getDataLen() != (size_t)_uncompressedLength) { throw DeserializeException( - vespalib::make_string( - "Did not decompress to the expected length: had %" PRIu64 ", wanted %d, got %" PRIu64, - _compSerData->getRemaining(), _uncompressedLength, unCompressed.getDataLen()), + make_string("Did not decompress to the expected length: had %zu, wanted %d, got %zu", + _compSerData->getRemaining(), _uncompressedLength, unCompressed.getDataLen()), VESPA_STRLOC); } assert(newSerialization->getBuffer() == unCompressed.getData()); diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.cpp b/document/src/vespa/document/serialization/vespadocumentserializer.cpp index 1f6a4a4f0b0..a95dd092f20 100644 --- a/document/src/vespa/document/serialization/vespadocumentserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentserializer.cpp @@ -263,6 +263,7 @@ vespalib::ConstBufferRef compressStream(const CompressionConfig &config, nbostream &stream, vespalib::DataBuffer & compressed_data) { + using compression::compress; vespalib::ConstBufferRef buf(stream.c_str(), stream.size()); if (config.useCompression() && bigEnough(stream.size(), config)) { CompressionConfig::Type compressedType = compress(config, vespalib::ConstBufferRef(stream.c_str(), stream.size()), compressed_data, false); diff --git a/document/src/vespa/document/util/compressor.cpp b/document/src/vespa/document/util/compressor.cpp index c9fac4cf4b0..6ada9bca3db 100644 --- a/document/src/vespa/document/util/compressor.cpp +++ b/document/src/vespa/document/util/compressor.cpp @@ -11,7 +11,7 @@ using vespalib::ConstBufferRef; using vespalib::DataBuffer; using vespalib::make_string; -namespace document { +namespace document::compression { CompressionConfig::Type compress(ICompressor & compressor, const CompressionConfig & compression, const ConstBufferRef & org, DataBuffer & dest) @@ -127,4 +127,15 @@ decompress(const CompressionConfig::Type & type, size_t uncompressedLen, const C } } +size_t computeMaxCompressedsize(CompressionConfig::Type type, size_t payloadSize) { + if (type == CompressionConfig::LZ4) { + document::LZ4Compressor lz4; + return lz4.adjustProcessLen(0, payloadSize); + } else if (type == CompressionConfig::ZSTD) { + document::ZStdCompressor zstd; + return zstd.adjustProcessLen(0, payloadSize); + } + return payloadSize; +} + } diff --git a/document/src/vespa/document/util/compressor.h b/document/src/vespa/document/util/compressor.h index 31e265415b7..11103beb93c 100644 --- a/document/src/vespa/document/util/compressor.h +++ b/document/src/vespa/document/util/compressor.h @@ -15,9 +15,10 @@ public: virtual bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) = 0; virtual bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) = 0; virtual size_t adjustProcessLen(uint16_t options, size_t len) const = 0; - virtual size_t adjustUnProcessLen(uint16_t options, size_t len) const = 0; }; +namespace compression { + /** * Will try to compress a buffer according to the config. If the criteria can not * be met it will return NONE and dest will get the input buffer. @@ -42,4 +43,9 @@ CompressionConfig::Type compress(const CompressionConfig & compression, const ve */ void decompress(const CompressionConfig::Type & compression, size_t uncompressedLen, const vespalib::ConstBufferRef & org, vespalib::DataBuffer & dest, bool allowSwap); + +size_t computeMaxCompressedsize(CompressionConfig::Type type, size_t uncompressedSize); + +} + } diff --git a/document/src/vespa/document/util/lz4compressor.cpp b/document/src/vespa/document/util/lz4compressor.cpp index cb2d41269b0..f7e9a443dcc 100644 --- a/document/src/vespa/document/util/lz4compressor.cpp +++ b/document/src/vespa/document/util/lz4compressor.cpp @@ -8,11 +8,9 @@ using vespalib::alloc::Alloc; - namespace document { size_t LZ4Compressor::adjustProcessLen(uint16_t, size_t len) const { return LZ4_compressBound(len); } -size_t LZ4Compressor::adjustUnProcessLen(uint16_t, size_t len) const { return len; } bool LZ4Compressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) diff --git a/document/src/vespa/document/util/lz4compressor.h b/document/src/vespa/document/util/lz4compressor.h index 0c3eee82402..aa1726cd639 100644 --- a/document/src/vespa/document/util/lz4compressor.h +++ b/document/src/vespa/document/util/lz4compressor.h @@ -11,7 +11,6 @@ public: bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override; bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override; size_t adjustProcessLen(uint16_t options, size_t len) const override; - size_t adjustUnProcessLen(uint16_t options, size_t len) const override; }; } diff --git a/document/src/vespa/document/util/zstdcompressor.cpp b/document/src/vespa/document/util/zstdcompressor.cpp index 78ba15884f5..bb13d8009c1 100644 --- a/document/src/vespa/document/util/zstdcompressor.cpp +++ b/document/src/vespa/document/util/zstdcompressor.cpp @@ -10,7 +10,6 @@ using vespalib::alloc::Alloc; namespace document { size_t ZStdCompressor::adjustProcessLen(uint16_t, size_t len) const { return ZSTD_compressBound(len); } -size_t ZStdCompressor::adjustUnProcessLen(uint16_t, size_t len) const { return len; } bool ZStdCompressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV) diff --git a/document/src/vespa/document/util/zstdcompressor.h b/document/src/vespa/document/util/zstdcompressor.h index 61b238e0bfb..5c52c144651 100644 --- a/document/src/vespa/document/util/zstdcompressor.h +++ b/document/src/vespa/document/util/zstdcompressor.h @@ -11,7 +11,6 @@ public: bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override; bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override; size_t adjustProcessLen(uint16_t options, size_t len) const override; - size_t adjustUnProcessLen(uint16_t options, size_t len) const override; }; } diff --git a/searchlib/src/vespa/searchlib/common/packets.cpp b/searchlib/src/vespa/searchlib/common/packets.cpp index 50a02995a40..d885442ec03 100644 --- a/searchlib/src/vespa/searchlib/common/packets.cpp +++ b/searchlib/src/vespa/searchlib/common/packets.cpp @@ -151,7 +151,7 @@ FS4PersistentPacketStreamer::Decode(FNET_DataBuffer *src, uint32_t plen, uint32_ uint32_t uncompressed_size = src->ReadInt32(); ConstBufferRef org(src->GetData(), plen - sizeof(uint32_t)); vespalib::DataBuffer uncompressed(uncompressed_size); - document::decompress(compressionType, uncompressed_size, org, uncompressed, false); + document::compression::decompress(compressionType, uncompressed_size, org, uncompressed, false); FNET_DataBuffer buf(uncompressed.getData(), uncompressed.getDataLen()); decodePacket(packet, buf, uncompressed_size, pcode); src->DataToDead(plen - sizeof(uint32_t)); @@ -192,7 +192,7 @@ FS4PersistentPacketStreamer::Encode(FNET_Packet *packet, uint32_t chid, FNET_Dat CompressionConfig config(_compressionType, _compressionLevel, 90); ConstBufferRef org(dst->GetData() + packet_start + header_len, body_len); vespalib::DataBuffer compressed(org.size()); - CompressionConfig::Type r = document::compress(config, org, compressed, false); + CompressionConfig::Type r = document::compression::compress(config, org, compressed, false); if (r != CompressionConfig::NONE) { dst->DataToFree(body_len + header_len); // sizeof(data + header + uncompressed_size) - sizeof(uint32_t) @@ -455,7 +455,7 @@ FS4Packet_PreSerialized::FS4Packet_PreSerialized(FNET_Packet & packet) 90); ConstBufferRef org(tmp.GetData(), tmp.GetDataLen()); vespalib::DataBuffer compressed(org.size()); - _compressionType = document::compress(config, org, compressed, false); + _compressionType = document::compression::compress(config, org, compressed, false); if (_compressionType != CompressionConfig::NONE) { _data.WriteInt32Fast(body_len); _data.WriteBytes(compressed.getData(), compressed.getDataLen()); diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp index c690e662136..4d4d4c48130 100644 --- a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp +++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp @@ -1,14 +1,17 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "chunkformats.h" -#include -#include +#include #include namespace search { using vespalib::make_string; using vespalib::Exception; +using document::compression::compress; +using document::compression::decompress; +using document::compression::computeMaxCompressedsize; +using document::CompressionConfig; ChunkException::ChunkException(const vespalib::stringref & msg, const vespalib::stringref & location) : Exception(make_string("Illegal chunk: %s", msg.c_str()), location) @@ -16,7 +19,7 @@ ChunkException::ChunkException(const vespalib::stringref & msg, const vespalib:: } void -ChunkFormat::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const document::CompressionConfig & compression) +ChunkFormat::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const CompressionConfig & compression) { vespalib::nbostream & os = _dataBuf; os << lastSerial; @@ -30,7 +33,7 @@ ChunkFormat::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const const size_t oldPos(compressed.getDataLen()); compressed.writeInt8(compression.type); compressed.writeInt32(os.size()); - document::CompressionConfig::Type type(document::compress(compression, vespalib::ConstBufferRef(os.c_str(), os.size()), compressed, false)); + CompressionConfig::Type type(compress(compression, vespalib::ConstBufferRef(os.c_str(), os.size()), compressed, false)); if (compression.type != type) { compressed.getData()[oldPos] = type; } @@ -43,32 +46,22 @@ ChunkFormat::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const } size_t -ChunkFormat::getMaxPackSize(const document::CompressionConfig & compression) const +ChunkFormat::getMaxPackSize(const CompressionConfig & compression) const { const size_t OVERHEAD(0); const size_t MINSIZE(1 + 1 + 4 + 4 + includeSerializedSize() ? 4 : 0); // version + type + real length + crc + lastserial const size_t formatSpecificSize(getHeaderSize()); size_t rawSize(MINSIZE + formatSpecificSize + OVERHEAD); const size_t payloadSize(_dataBuf.size() + 8); - // This is a little dirty -> need interface. - if (compression.type == document::CompressionConfig::LZ4) { - document::LZ4Compressor lz4; - rawSize += lz4.adjustProcessLen(0, payloadSize); - } else if (compression.type == document::CompressionConfig::ZSTD) { - document::ZStdCompressor zstd; - rawSize += zstd.adjustProcessLen(0, payloadSize); - } else { - rawSize += payloadSize; - } - return rawSize; + return rawSize + computeMaxCompressedsize(compression.type, payloadSize); } void ChunkFormat::verifyCompression(uint8_t type) { - if ((type != document::CompressionConfig::LZ4) && - (type != document::CompressionConfig::ZSTD) && - (type != document::CompressionConfig::NONE)) { + if ((type != CompressionConfig::LZ4) && + (type != CompressionConfig::ZSTD) && + (type != CompressionConfig::NONE)) { throw ChunkException(make_string("Unknown compressiontype %d", type), VESPA_STRLOC); } } @@ -150,7 +143,7 @@ ChunkFormat::deserializeBody(vespalib::nbostream & is) // This is a dirty trick to fool some odd sanity checking in DataBuffer::swap vespalib::DataBuffer uncompressed(const_cast(is.peek()), (size_t)0); vespalib::ConstBufferRef data(is.peek(), is.size() - sizeof(uint32_t)); - document::decompress(document::CompressionConfig::Type(type), uncompressedLen, data, uncompressed, true); + decompress(CompressionConfig::Type(type), uncompressedLen, data, uncompressed, true); assert(uncompressed.getData() == uncompressed.getDead()); if (uncompressed.getData() != data.c_str()) { const size_t sz(uncompressed.getDataLen()); diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp index fd7df29cd49..ffb56407198 100644 --- a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp @@ -8,6 +8,11 @@ #include #include +using document::DocumentTypeRepo; +using document::CompressionConfig; +using document::compression::compress; +using document::compression::decompress; + namespace search { namespace { @@ -15,13 +20,13 @@ namespace { class DocumentVisitorAdapter : public IBufferVisitor { public: - DocumentVisitorAdapter(const document::DocumentTypeRepo & repo, IDocumentVisitor & visitor) : + DocumentVisitorAdapter(const DocumentTypeRepo & repo, IDocumentVisitor & visitor) : _repo(repo), _visitor(visitor) { } void visit(uint32_t lid, vespalib::ConstBufferRef buf) override; private: - const document::DocumentTypeRepo & _repo; + const DocumentTypeRepo & _repo; IDocumentVisitor & _visitor; }; @@ -45,7 +50,7 @@ public: using Alloc = vespalib::alloc::Alloc; typedef std::unique_ptr UP; - Value() : _compressedSize(0), _uncompressedSize(0), _compression(document::CompressionConfig::NONE) {} + Value() : _compressedSize(0), _uncompressedSize(0), _compression(CompressionConfig::NONE) {} Value(Value &&rhs) : _compressedSize(rhs._compressedSize), @@ -69,12 +74,12 @@ public: return *this; } - void setCompression(document::CompressionConfig::Type comp, size_t uncompressedSize) { + void setCompression(CompressionConfig::Type comp, size_t uncompressedSize) { _compression = comp; _uncompressedSize = uncompressedSize; } - document::CompressionConfig::Type getCompression() const { return _compression; } + CompressionConfig::Type getCompression() const { return _compression; } size_t getUncompressedSize() const { return _uncompressedSize; } @@ -82,13 +87,13 @@ public: * Compress buffer into temporary buffer and copy temporary buffer to * value along with compression config. */ - void set(vespalib::DataBuffer &&buf, ssize_t len, const document::CompressionConfig &compression); + void set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression); /** * Decompress value into temporary buffer and deserialize document from * the temporary buffer. */ - document::Document::UP deserializeDocument(const document::DocumentTypeRepo &repo); + document::Document::UP deserializeDocument(const DocumentTypeRepo &repo); size_t size() const { return _compressedSize; } bool empty() const { return size() == 0; } @@ -98,36 +103,32 @@ public: private: size_t _compressedSize; size_t _uncompressedSize; - document::CompressionConfig::Type _compression; + CompressionConfig::Type _compression; Alloc _buf; }; class BackingStore { public: - BackingStore(IDataStore &store, const document::CompressionConfig &compression) : + BackingStore(IDataStore &store, const CompressionConfig &compression) : _backingStore(store), _compression(compression) { } bool read(DocumentIdT key, Value &value) const; - void visit(const IDocumentStore::LidVector &lids, const document::DocumentTypeRepo &repo, IDocumentVisitor &visitor) const; + void visit(const IDocumentStore::LidVector &lids, const DocumentTypeRepo &repo, IDocumentVisitor &visitor) const; void write(DocumentIdT, const Value &) {} void erase(DocumentIdT) {} - const document::CompressionConfig &getCompression(void) const { return _compression; } + const CompressionConfig &getCompression(void) const { return _compression; } private: IDataStore &_backingStore; - const document::CompressionConfig _compression; + const CompressionConfig _compression; }; void -Value::set(vespalib::DataBuffer &&buf, - ssize_t len, - const document::CompressionConfig &compression) { +Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression) { //Underlying buffer must be identical to allow swap. vespalib::DataBuffer compressed(buf.getData(), 0u); - document::CompressionConfig::Type type = - document::compress(compression, - vespalib::ConstBufferRef(buf.getData(), len), - compressed, true); + CompressionConfig::Type type = compress(compression, vespalib::ConstBufferRef(buf.getData(), len), + compressed, true); _compressedSize = compressed.getDataLen(); if (buf.getData() == compressed.getData()) { // Uncompressed so we can just steal the underlying buffer. @@ -135,28 +136,25 @@ Value::set(vespalib::DataBuffer &&buf, } else { compressed.stealBuffer().swap(_buf); } - assert(((type == document::CompressionConfig::NONE) && + assert(((type == CompressionConfig::NONE) && (len == ssize_t(_compressedSize))) || - ((type != document::CompressionConfig::NONE) && + ((type != CompressionConfig::NONE) && (len > ssize_t(_compressedSize)))); setCompression(type, len); } document::Document::UP -Value::deserializeDocument(const document::DocumentTypeRepo &repo) { +Value::deserializeDocument(const DocumentTypeRepo &repo) { vespalib::DataBuffer uncompressed((char *) _buf.get(), (size_t) 0); - document::decompress(getCompression(), - getUncompressedSize(), - vespalib::ConstBufferRef(*this, size()), - uncompressed, true); + decompress(getCompression(), getUncompressedSize(), vespalib::ConstBufferRef(*this, size()), uncompressed, true); vespalib::nbostream is(uncompressed.getData(), uncompressed.getDataLen()); return document::Document::UP(new document::Document(repo, is)); } void -BackingStore::visit(const IDocumentStore::LidVector &lids, const document::DocumentTypeRepo &repo, +BackingStore::visit(const IDocumentStore::LidVector &lids, const DocumentTypeRepo &repo, IDocumentVisitor &visitor) const { DocumentVisitorAdapter adapter(repo, visitor); _backingStore.read(lids, adapter); @@ -213,7 +211,7 @@ DocumentStore::useCache() const { } void -DocumentStore::visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const +DocumentStore::visit(const LidVector & lids, const DocumentTypeRepo &repo, IDocumentVisitor & visitor) const { if (useCache() && _config.allowVisitCaching() && visitor.allowVisitCaching()) { docstore::BlobSet blobSet = _visitCache->read(lids).getBlobSet(); @@ -227,7 +225,7 @@ DocumentStore::visit(const LidVector & lids, const document::DocumentTypeRepo &r } document::Document::UP -DocumentStore::read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const +DocumentStore::read(DocumentIdT lid, const DocumentTypeRepo &repo) const { document::Document::UP retval; Value value; @@ -305,25 +303,23 @@ DocumentStore::getLastFlushTime() const template class DocumentStore::WrapVisitor : public IDataStoreVisitor { - Visitor &_visitor; - const document::DocumentTypeRepo &_repo; - const document::CompressionConfig &_compression; - IDocumentStore &_ds; - uint64_t _syncToken; + Visitor &_visitor; + const DocumentTypeRepo &_repo; + const CompressionConfig &_compression; + IDocumentStore &_ds; + uint64_t _syncToken; public: void visit(uint32_t lid, const void *buffer, size_t sz) override; WrapVisitor(Visitor &visitor, - const document::DocumentTypeRepo &repo, - const document::CompressionConfig &compresion, + const DocumentTypeRepo &repo, + const CompressionConfig &compresion, IDocumentStore &ds, uint64_t syncToken); inline void rewrite(uint32_t lid, const document::Document &doc); - inline void rewrite(uint32_t lid); - inline void visitRemove(uint32_t lid); }; @@ -429,8 +425,8 @@ DocumentStore::WrapVisitor::visit(uint32_t lid, template DocumentStore::WrapVisitor:: WrapVisitor(Visitor &visitor, - const document::DocumentTypeRepo &repo, - const document::CompressionConfig &compression, + const DocumentTypeRepo &repo, + const CompressionConfig &compression, IDocumentStore &ds, uint64_t syncToken) : _visitor(visitor), @@ -445,7 +441,7 @@ WrapVisitor(Visitor &visitor, void DocumentStore::accept(IDocumentStoreReadVisitor &visitor, IDocumentStoreVisitorProgress &visitorProgress, - const document::DocumentTypeRepo &repo) + const DocumentTypeRepo &repo) { WrapVisitor wrap(visitor, repo, _store->getCompression(), @@ -460,7 +456,7 @@ DocumentStore::accept(IDocumentStoreReadVisitor &visitor, void DocumentStore::accept(IDocumentStoreRewriteVisitor &visitor, IDocumentStoreVisitorProgress &visitorProgress, - const document::DocumentTypeRepo &repo) + const DocumentTypeRepo &repo) { WrapVisitor wrap(visitor, repo, diff --git a/searchlib/src/vespa/searchlib/docstore/visitcache.cpp b/searchlib/src/vespa/searchlib/docstore/visitcache.cpp index 8fac288a23a..6771fb3bec6 100644 --- a/searchlib/src/vespa/searchlib/docstore/visitcache.cpp +++ b/searchlib/src/vespa/searchlib/docstore/visitcache.cpp @@ -90,7 +90,7 @@ CompressedBlobSet::CompressedBlobSet(const document::CompressionConfig &compress if ( ! _positions.empty() ) { DataBuffer compressed; ConstBufferRef org = uncompressed.getBuffer(); - _compression = document::compress(compression, org, compressed, false); + _compression = document::compression::compress(compression, org, compressed, false); _buffer.resize(compressed.getDataLen()); memcpy(_buffer, compressed.getData(), compressed.getDataLen()); } @@ -99,10 +99,12 @@ CompressedBlobSet::CompressedBlobSet(const document::CompressionConfig &compress BlobSet CompressedBlobSet::getBlobSet() const { + using document::compression::decompress; // These are frequent lage allocations that are to expensive to mmap. DataBuffer uncompressed(0, 1, Alloc::alloc(0, 16 * MemoryAllocator::HUGEPAGE_SIZE)); if ( ! _positions.empty() ) { - document::decompress(_compression, getBufferSize(_positions), ConstBufferRef(_buffer.c_str(), _buffer.size()), uncompressed, false); + decompress(_compression, getBufferSize(_positions), + ConstBufferRef(_buffer.c_str(), _buffer.size()), uncompressed, false); } return BlobSet(_positions, uncompressed.stealBuffer()); } diff --git a/searchlib/src/vespa/searchlib/grouping/sketch.h b/searchlib/src/vespa/searchlib/grouping/sketch.h index 50209b19bbd..a036d23ef36 100644 --- a/searchlib/src/vespa/searchlib/grouping/sketch.h +++ b/searchlib/src/vespa/searchlib/grouping/sketch.h @@ -209,7 +209,7 @@ compress_buckets_into(char *buffer, uint32_t size) const { vespalib::ConstBufferRef org(&bucket[0], BUCKET_COUNT); vespalib::DataBuffer compress_buffer(buffer, size); document::CompressionConfig::Type r = - document::compress(config, org, compress_buffer, false); + document::compression::compress(config, org, compress_buffer, false); assert(compress_buffer.getDead() == buffer); if (r == document::CompressionConfig::LZ4) { assert(compress_buffer.getDataLen() < BUCKET_COUNT); @@ -228,7 +228,7 @@ decompress_buckets_from(char *buffer, uint32_t size) { } else { vespalib::ConstBufferRef compressed(buffer, size); vespalib::DataBuffer uncompressed(reinterpret_cast(&bucket[0]), BUCKET_COUNT); - document::decompress(document::CompressionConfig::LZ4, BUCKET_COUNT, compressed, uncompressed, false); + document::compression::decompress(document::CompressionConfig::LZ4, BUCKET_COUNT, compressed, uncompressed, false); } } template -- cgit v1.2.3 From 01ee344cf72c6f9e10f33515d49f3d461004f855 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Tue, 13 Jun 2017 14:01:48 +0200 Subject: Move to separate compression namespace to avoid pollution. --- .../src/tests/serialization/compression_test.cpp | 22 +++++++++++++++++++--- .../tests/proton/summaryengine/summaryengine.cpp | 4 ++-- .../proton/summaryengine/docsum_by_slime.cpp | 6 ++++-- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'document') diff --git a/document/src/tests/serialization/compression_test.cpp b/document/src/tests/serialization/compression_test.cpp index e5464100673..240a309d180 100644 --- a/document/src/tests/serialization/compression_test.cpp +++ b/document/src/tests/serialization/compression_test.cpp @@ -1,28 +1,44 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Unit tests for annotation serialization. -#include #include #include #include #include +#include LOG_SETUP("compression_test"); using namespace document; +using namespace document::compression; using namespace vespalib; static vespalib::string _G_compressableText("AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" - "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"); + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "XYZABCDEFGHIJGJMNOPQRSTUVW" + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE" + "XYZABCDEFGHIJGJMNOPQRSTUVW"); TEST("requireThatLZ4CompressFine") { CompressionConfig cfg(CompressionConfig::Type::LZ4); ConstBufferRef ref(_G_compressableText.c_str(), _G_compressableText.size()); DataBuffer compressed; EXPECT_EQUAL(CompressionConfig::Type::LZ4, compress(cfg, ref, compressed, false)); + EXPECT_EQUAL(66u, compressed.getDataLen()); +} + +TEST("requireThatZStdCompressFine") { + CompressionConfig cfg(CompressionConfig::Type::ZSTD); + ConstBufferRef ref(_G_compressableText.c_str(), _G_compressableText.size()); + DataBuffer compressed; + EXPECT_EQUAL(CompressionConfig::Type::ZSTD, compress(cfg, ref, compressed, false)); + EXPECT_EQUAL(64u, compressed.getDataLen()); } TEST_MAIN() { diff --git a/searchcore/src/tests/proton/summaryengine/summaryengine.cpp b/searchcore/src/tests/proton/summaryengine/summaryengine.cpp index 80b78d420ec..dab4dcf1dff 100644 --- a/searchcore/src/tests/proton/summaryengine/summaryengine.cpp +++ b/searchcore/src/tests/proton/summaryengine/summaryengine.cpp @@ -381,7 +381,7 @@ verifyReply(size_t count, document::CompressionConfig::Type encoding, size_t org DataBuffer uncompressed; ConstBufferRef blob(ret[2]._data._buf, ret[2]._data._len); - document::decompress(CompressionConfig::toType(ret[0]._intval8), ret[1]._intval32, blob, uncompressed, false); + compression::decompress(CompressionConfig::toType(ret[0]._intval8), ret[1]._intval32, blob, uncompressed, false); EXPECT_EQUAL(orgSize, uncompressed.getDataLen()); vespalib::Slime summaries; @@ -402,7 +402,7 @@ verifyRPC(size_t count, CompressionConfig config(requestCompression, 9, 100); DataBuffer compressed(const_cast(buf.get().data), buf.get().size); - CompressionConfig::Type type = document::compress(config, ConstBufferRef(buf.get().data, buf.get().size), compressed, true); + CompressionConfig::Type type = compression::compress(config, ConstBufferRef(buf.get().data, buf.get().size), compressed, true); EXPECT_EQUAL(type, requestCompression); FRT_RPCRequest * request = new FRT_RPCRequest(); diff --git a/searchcore/src/vespa/searchcore/proton/summaryengine/docsum_by_slime.cpp b/searchcore/src/vespa/searchcore/proton/summaryengine/docsum_by_slime.cpp index 8fffc4bb831..465c6fa9d03 100644 --- a/searchcore/src/vespa/searchcore/proton/summaryengine/docsum_by_slime.cpp +++ b/searchcore/src/vespa/searchcore/proton/summaryengine/docsum_by_slime.cpp @@ -90,12 +90,14 @@ DocsumByRPC::DocsumByRPC(DocsumBySlime & slimeDocsumServer) : void DocsumByRPC::getDocsums(FRT_RPCRequest & req) { + using document::compression::decompress; + using document::compression::compress; FRT_Values &arg = *req.GetParams(); uint8_t encoding = arg[0]._intval8; uint32_t uncompressedSize = arg[1]._intval32; DataBuffer uncompressed(arg[2]._data._buf, arg[2]._data._len); ConstBufferRef blob(arg[2]._data._buf, arg[2]._data._len); - document::decompress(CompressionConfig::toType(encoding), uncompressedSize, blob, uncompressed, true); + decompress(CompressionConfig::toType(encoding), uncompressedSize, blob, uncompressed, true); assert(uncompressedSize == uncompressed.getDataLen()); vespalib::Slime summariesToGet; BinaryFormat::decode(Memory(uncompressed.getData(), uncompressed.getDataLen()), summariesToGet); @@ -108,7 +110,7 @@ DocsumByRPC::getDocsums(FRT_RPCRequest & req) BinaryFormat::encode(*summaries, output); ConstBufferRef buf(rbuf.GetDrainPos(), rbuf.GetUsedLen()); DataBuffer compressed(rbuf.GetWritableDrainPos(0), rbuf.GetUsedLen()); - CompressionConfig::Type type = document::compress(getCompressionConfig(), buf, compressed, true); + CompressionConfig::Type type = compress(getCompressionConfig(), buf, compressed, true); FRT_Values &ret = *req.GetReturn(); ret.AddInt8(type); -- cgit v1.2.3 From 50d50dacdcc864f5ed6683e8f9887634846bbaae Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Tue, 13 Jun 2017 14:16:23 +0200 Subject: Followup on @vekterli's comments. Actually propagate ZSTD. --- document/src/vespa/document/util/lz4compressor.cpp | 8 ++------ .../src/vespa/searchcore/proton/docsummary/summarymanager.cpp | 3 +++ .../src/vespa/searchcore/proton/docsummary/summarymanager.h | 1 + 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'document') diff --git a/document/src/vespa/document/util/lz4compressor.cpp b/document/src/vespa/document/util/lz4compressor.cpp index f7e9a443dcc..9b41df239a9 100644 --- a/document/src/vespa/document/util/lz4compressor.cpp +++ b/document/src/vespa/document/util/lz4compressor.cpp @@ -26,10 +26,8 @@ LZ4Compressor::process(const CompressionConfig& config, const void * inputV, siz Alloc state = Alloc::alloc(LZ4_sizeofState()); sz = LZ4_compress_fast_extState(state.get(), input, output, inputLen, maxOutputLen, 1); } - if (sz != 0) { - outputLenV = sz; - } assert(sz != 0); + outputLenV = sz; return (sz != 0); } @@ -40,10 +38,8 @@ LZ4Compressor::unprocess(const void * inputV, size_t inputLen, void * outputV, s const char * input(static_cast(inputV)); char * output(static_cast(outputV)); int sz = LZ4_decompress_safe(input, output, inputLen, outputLenV); - if (sz > 0) { - outputLenV = sz; - } assert(sz > 0); + outputLenV = sz; return (sz > 0); } diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp index ae4b52b56fb..e5a6d6aed22 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp @@ -118,6 +118,7 @@ deriveCompression(const T & config) { if (config.type == T::LZ4) { compression.type = document::CompressionConfig::LZ4; } else if (config.type == T::ZSTD) { + compression.type = document::CompressionConfig::ZSTD; } compression.compressionLevel = config.level; return compression; @@ -161,6 +162,8 @@ SummaryManager::SummaryManager(vespalib::ThreadExecutor & executor, summary.compact2buckets ? bucketizer : search::IBucketizer::SP())); } +SummaryManager::~SummaryManager() {} + void SummaryManager::putDocument(uint64_t syncToken, const Document & doc, search::DocumentIdT lid) { diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.h b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.h index b55345ae470..a1bcd34fd0f 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.h +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.h @@ -82,6 +82,7 @@ public: const search::common::FileHeaderContext &fileHeaderContext, search::transactionlog::SyncProxy &tlSyncer, const std::shared_ptr & bucketizer); + ~SummaryManager(); void putDocument(uint64_t syncToken, const document::Document & doc, search::DocumentIdT lid); -- cgit v1.2.3