summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2017-06-13 18:52:46 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2017-06-13 18:52:46 +0200
commit0155137e7d56730665d406c37aa75649e5a3418a (patch)
tree05100870215c7852cc27b6970b60c9ff5f77e763 /document
parent4e5d87a9baa4176a1089f7afd1f433a8da275725 (diff)
parent6a64952133fc05136a0e445851b49bd0fc71c00e (diff)
Merge with master
Diffstat (limited to 'document')
-rw-r--r--document/CMakeLists.txt1
-rw-r--r--document/src/tests/serialization/compression_test.cpp23
-rw-r--r--document/src/vespa/document/fieldvalue/serializablearray.cpp11
-rw-r--r--document/src/vespa/document/serialization/vespadocumentserializer.cpp1
-rw-r--r--document/src/vespa/document/util/CMakeLists.txt6
-rw-r--r--document/src/vespa/document/util/compressionconfig.h17
-rw-r--r--document/src/vespa/document/util/compressor.cpp72
-rw-r--r--document/src/vespa/document/util/compressor.h22
-rw-r--r--document/src/vespa/document/util/lz4compressor.cpp46
-rw-r--r--document/src/vespa/document/util/lz4compressor.h17
-rw-r--r--document/src/vespa/document/util/zstdcompressor.cpp34
-rw-r--r--document/src/vespa/document/util/zstdcompressor.h17
12 files changed, 192 insertions, 75 deletions
diff --git a/document/CMakeLists.txt b/document/CMakeLists.txt
index 4e8e6a0aab2..e9694390b4b 100644
--- a/document/CMakeLists.txt
+++ b/document/CMakeLists.txt
@@ -10,6 +10,7 @@ vespa_define_module(
EXTERNAL_DEPENDS
lz4
+ zstd
LIBS
src/vespa/document
diff --git a/document/src/tests/serialization/compression_test.cpp b/document/src/tests/serialization/compression_test.cpp
index 6b95f5e933f..6574b4ac34c 100644
--- a/document/src/tests/serialization/compression_test.cpp
+++ b/document/src/tests/serialization/compression_test.cpp
@@ -1,27 +1,44 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Unit tests for annotation serialization.
-#include <vespa/log/log.h>
#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/vespalib/stllike/string.h>
#include <vespa/document/util/compressor.h>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/log/log.h>
LOG_SETUP("compression_test");
using namespace document;
+using namespace document::compression;
using namespace vespalib;
static vespalib::string _G_compressableText("AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"
"AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"
"AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"
"AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"
- "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE");
+ "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"
+ "XYZABCDEFGHIJGJMNOPQRSTUVW"
+ "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"
+ "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"
+ "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"
+ "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"
+ "AAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEE"
+ "XYZABCDEFGHIJGJMNOPQRSTUVW");
TEST("requireThatLZ4CompressFine") {
CompressionConfig cfg(CompressionConfig::Type::LZ4);
ConstBufferRef ref(_G_compressableText.c_str(), _G_compressableText.size());
DataBuffer compressed;
EXPECT_EQUAL(CompressionConfig::Type::LZ4, compress(cfg, ref, compressed, false));
+ EXPECT_EQUAL(66u, compressed.getDataLen());
+}
+
+TEST("requireThatZStdCompressFine") {
+ CompressionConfig cfg(CompressionConfig::Type::ZSTD);
+ ConstBufferRef ref(_G_compressableText.c_str(), _G_compressableText.size());
+ DataBuffer compressed;
+ EXPECT_EQUAL(CompressionConfig::Type::ZSTD, compress(cfg, ref, compressed, false));
+ EXPECT_EQUAL(64u, compressed.getDataLen());
}
TEST_MAIN() {
diff --git a/document/src/vespa/document/fieldvalue/serializablearray.cpp b/document/src/vespa/document/fieldvalue/serializablearray.cpp
index 6e83e42752f..ac1cabc82c0 100644
--- a/document/src/vespa/document/fieldvalue/serializablearray.cpp
+++ b/document/src/vespa/document/fieldvalue/serializablearray.cpp
@@ -2,14 +2,15 @@
#include "serializablearray.h"
#include <vespa/document/util/serializableexceptions.h>
#include <vespa/document/util/bytebuffer.h>
+#include <vespa/document/util/compressor.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/vespalib/data/databuffer.h>
-#include <vespa/document/util/compressor.h>
#include <vespa/log/log.h>
LOG_SETUP(".document.serializable-array");
using std::vector;
+using vespalib::make_string;
namespace document {
@@ -179,6 +180,7 @@ SerializableArray::clear(int id)
void
SerializableArray::deCompress() // throw (DeserializeException)
{
+ using document::compression::decompress;
// will only do this once
LOG_ASSERT(_compSerData);
@@ -201,15 +203,14 @@ SerializableArray::deCompress() // throw (DeserializeException)
false);
} catch (const std::runtime_error & e) {
throw DeserializeException(
- vespalib::make_string( "Document was compressed with code unknown code %d", _serializedCompression),
+ make_string( "Document was compressed with code unknown code %d", _serializedCompression),
VESPA_STRLOC);
}
if (unCompressed.getDataLen() != (size_t)_uncompressedLength) {
throw DeserializeException(
- vespalib::make_string(
- "Did not decompress to the expected length: had %" PRIu64 ", wanted %d, got %" PRIu64,
- _compSerData->getRemaining(), _uncompressedLength, unCompressed.getDataLen()),
+ make_string("Did not decompress to the expected length: had %zu, wanted %d, got %zu",
+ _compSerData->getRemaining(), _uncompressedLength, unCompressed.getDataLen()),
VESPA_STRLOC);
}
assert(newSerialization->getBuffer() == unCompressed.getData());
diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.cpp b/document/src/vespa/document/serialization/vespadocumentserializer.cpp
index ed4ea31a5f2..1810b9fb631 100644
--- a/document/src/vespa/document/serialization/vespadocumentserializer.cpp
+++ b/document/src/vespa/document/serialization/vespadocumentserializer.cpp
@@ -263,6 +263,7 @@ vespalib::ConstBufferRef
compressStream(const CompressionConfig &config, nbostream &stream,
vespalib::DataBuffer & compressed_data)
{
+ using compression::compress;
vespalib::ConstBufferRef buf(stream.c_str(), stream.size());
if (config.useCompression() && bigEnough(stream.size(), config)) {
CompressionConfig::Type compressedType = compress(config, vespalib::ConstBufferRef(stream.c_str(), stream.size()), compressed_data, false);
diff --git a/document/src/vespa/document/util/CMakeLists.txt b/document/src/vespa/document/util/CMakeLists.txt
index dbb67d6a913..2179b1307d3 100644
--- a/document/src/vespa/document/util/CMakeLists.txt
+++ b/document/src/vespa/document/util/CMakeLists.txt
@@ -1,11 +1,13 @@
# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
vespa_add_library(document_util OBJECT
SOURCES
- stringutil.cpp
bytebuffer.cpp
- serializable.cpp
compressor.cpp
+ lz4compressor.cpp
+ zstdcompressor.cpp
printable.cpp
+ serializable.cpp
+ stringutil.cpp
DEPENDS
AFTER
document_documentconfig
diff --git a/document/src/vespa/document/util/compressionconfig.h b/document/src/vespa/document/util/compressionconfig.h
index fa20bc38578..413bdc1fb3b 100644
--- a/document/src/vespa/document/util/compressionconfig.h
+++ b/document/src/vespa/document/util/compressionconfig.h
@@ -7,16 +7,16 @@
namespace document {
-
struct CompressionConfig {
enum Type {
- NONE = 0,
- HISTORIC_1 = 1,
- HISTORIC_2 = 2,
- HISTORIC_3 = 3,
- HISTORIC_4 = 4,
- UNCOMPRESSABLE = 5,
- LZ4 = 6
+ NONE = 0,
+ HISTORIC_1 = 1,
+ HISTORIC_2 = 2,
+ HISTORIC_3 = 3,
+ HISTORIC_4 = 4,
+ UNCOMPRESSABLE = 5,
+ LZ4 = 6,
+ ZSTD = 7
};
CompressionConfig()
@@ -47,6 +47,7 @@ struct CompressionConfig {
case 4: return HISTORIC_4;
case 5: return UNCOMPRESSABLE;
case 6: return LZ4;
+ case 7: return ZSTD;
default: return NONE;
}
}
diff --git a/document/src/vespa/document/util/compressor.cpp b/document/src/vespa/document/util/compressor.cpp
index 02a2029b3eb..cd45017dd69 100644
--- a/document/src/vespa/document/util/compressor.cpp
+++ b/document/src/vespa/document/util/compressor.cpp
@@ -1,56 +1,17 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "compressor.h"
+
+#include "lz4compressor.h"
+#include "zstdcompressor.h"
#include <vespa/vespalib/util/memory.h>
#include <vespa/vespalib/util/stringfmt.h>
-#include <stdexcept>
-#include <lz4.h>
-#include <lz4hc.h>
+#include <vespa/vespalib/data/databuffer.h>
using vespalib::alloc::Alloc;
using vespalib::ConstBufferRef;
using vespalib::DataBuffer;
using vespalib::make_string;
-namespace document
-{
-
-size_t LZ4Compressor::adjustProcessLen(uint16_t, size_t len) const { return LZ4_compressBound(len); }
-size_t LZ4Compressor::adjustUnProcessLen(uint16_t, size_t len) const { return len; }
-
-bool
-LZ4Compressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV)
-{
- const char * input(static_cast<const char *>(inputV));
- char * output(static_cast<char *>(outputV));
- int sz(-1);
- int maxOutputLen = LZ4_compressBound(inputLen);
- if (config.compressionLevel > 6) {
- Alloc state = Alloc::alloc(LZ4_sizeofStateHC());
- sz = LZ4_compress_HC_extStateHC(state.get(), input, output, inputLen, maxOutputLen, config.compressionLevel);
- } else {
- Alloc state = Alloc::alloc(LZ4_sizeofState());
- sz = LZ4_compress_fast_extState(state.get(), input, output, inputLen, maxOutputLen, 1);
- }
- if (sz != 0) {
- outputLenV = sz;
- }
- assert(sz != 0);
- return (sz != 0);
-
-}
-
-bool
-LZ4Compressor::unprocess(const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV)
-{
- const char * input(static_cast<const char *>(inputV));
- char * output(static_cast<char *>(outputV));
- int sz = LZ4_decompress_safe(input, output, inputLen, outputLenV);
- if (sz > 0) {
- outputLenV = sz;
- }
- assert(sz > 0);
- return (sz > 0);
-}
+namespace document::compression {
CompressionConfig::Type
compress(ICompressor & compressor, const CompressionConfig & compression, const ConstBufferRef & org, DataBuffer & dest)
@@ -78,6 +39,12 @@ docompress(const CompressionConfig & compression, const ConstBufferRef & org, Da
type = compress(lz4, compression, org, dest);
}
break;
+ case CompressionConfig::ZSTD:
+ {
+ ZStdCompressor zstd;
+ type = compress(zstd, compression, org, dest);
+ }
+ break;
case CompressionConfig::NONE:
default:
break;
@@ -138,6 +105,12 @@ decompress(const CompressionConfig::Type & type, size_t uncompressedLen, const C
decompress(lz4, uncompressedLen, org, dest, allowSwap);
}
break;
+ case CompressionConfig::ZSTD:
+ {
+ ZStdCompressor zstd;
+ decompress(zstd, uncompressedLen, org, dest, allowSwap);
+ }
+ break;
case CompressionConfig::NONE:
case CompressionConfig::UNCOMPRESSABLE:
if (allowSwap) {
@@ -154,4 +127,15 @@ decompress(const CompressionConfig::Type & type, size_t uncompressedLen, const C
}
}
+size_t computeMaxCompressedsize(CompressionConfig::Type type, size_t payloadSize) {
+ if (type == CompressionConfig::LZ4) {
+ document::LZ4Compressor lz4;
+ return lz4.adjustProcessLen(0, payloadSize);
+ } else if (type == CompressionConfig::ZSTD) {
+ document::ZStdCompressor zstd;
+ return zstd.adjustProcessLen(0, payloadSize);
+ }
+ return payloadSize;
+}
+
}
diff --git a/document/src/vespa/document/util/compressor.h b/document/src/vespa/document/util/compressor.h
index 1b857d050ac..a8d4803e038 100644
--- a/document/src/vespa/document/util/compressor.h
+++ b/document/src/vespa/document/util/compressor.h
@@ -1,12 +1,12 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
-#include <vespa/document/util/compressionconfig.h>
-#include <vespa/vespalib/data/databuffer.h>
+#include "compressionconfig.h"
#include <vespa/vespalib/util/buffer.h>
-namespace document
-{
+namespace vespalib { class DataBuffer; }
+
+namespace document {
class ICompressor
{
@@ -15,17 +15,9 @@ public:
virtual bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) = 0;
virtual bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) = 0;
virtual size_t adjustProcessLen(uint16_t options, size_t len) const = 0;
- virtual size_t adjustUnProcessLen(uint16_t options, size_t len) const = 0;
};
-class LZ4Compressor : public ICompressor
-{
-public:
- bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override;
- bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override;
- size_t adjustProcessLen(uint16_t options, size_t len) const override;
- size_t adjustUnProcessLen(uint16_t options, size_t len) const override;
-};
+namespace compression {
/**
* Will try to compress a buffer according to the config. If the criteria can not
@@ -51,5 +43,9 @@ CompressionConfig::Type compress(const CompressionConfig & compression, const ve
*/
void decompress(const CompressionConfig::Type & compression, size_t uncompressedLen, const vespalib::ConstBufferRef & org, vespalib::DataBuffer & dest, bool allowSwap);
+
+size_t computeMaxCompressedsize(CompressionConfig::Type type, size_t uncompressedSize);
+
}
+}
diff --git a/document/src/vespa/document/util/lz4compressor.cpp b/document/src/vespa/document/util/lz4compressor.cpp
new file mode 100644
index 00000000000..9b41df239a9
--- /dev/null
+++ b/document/src/vespa/document/util/lz4compressor.cpp
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "lz4compressor.h"
+#include <vespa/vespalib/util/alloc.h>
+#include <lz4.h>
+#include <lz4hc.h>
+#include <cassert>
+
+using vespalib::alloc::Alloc;
+
+namespace document {
+
+size_t LZ4Compressor::adjustProcessLen(uint16_t, size_t len) const { return LZ4_compressBound(len); }
+
+bool
+LZ4Compressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV)
+{
+ const char * input(static_cast<const char *>(inputV));
+ char * output(static_cast<char *>(outputV));
+ int sz(-1);
+ int maxOutputLen = LZ4_compressBound(inputLen);
+ if (config.compressionLevel > 6) {
+ Alloc state = Alloc::alloc(LZ4_sizeofStateHC());
+ sz = LZ4_compress_HC_extStateHC(state.get(), input, output, inputLen, maxOutputLen, config.compressionLevel);
+ } else {
+ Alloc state = Alloc::alloc(LZ4_sizeofState());
+ sz = LZ4_compress_fast_extState(state.get(), input, output, inputLen, maxOutputLen, 1);
+ }
+ assert(sz != 0);
+ outputLenV = sz;
+ return (sz != 0);
+
+}
+
+bool
+LZ4Compressor::unprocess(const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV)
+{
+ const char * input(static_cast<const char *>(inputV));
+ char * output(static_cast<char *>(outputV));
+ int sz = LZ4_decompress_safe(input, output, inputLen, outputLenV);
+ assert(sz > 0);
+ outputLenV = sz;
+ return (sz > 0);
+}
+
+}
diff --git a/document/src/vespa/document/util/lz4compressor.h b/document/src/vespa/document/util/lz4compressor.h
new file mode 100644
index 00000000000..aa1726cd639
--- /dev/null
+++ b/document/src/vespa/document/util/lz4compressor.h
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "compressor.h"
+
+namespace document {
+
+class LZ4Compressor : public ICompressor
+{
+public:
+ bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override;
+ bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override;
+ size_t adjustProcessLen(uint16_t options, size_t len) const override;
+};
+
+}
+
diff --git a/document/src/vespa/document/util/zstdcompressor.cpp b/document/src/vespa/document/util/zstdcompressor.cpp
new file mode 100644
index 00000000000..bb13d8009c1
--- /dev/null
+++ b/document/src/vespa/document/util/zstdcompressor.cpp
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "zstdcompressor.h"
+#include <vespa/vespalib/util/alloc.h>
+#include <zstd.h>
+#include <cassert>
+
+using vespalib::alloc::Alloc;
+
+namespace document {
+
+size_t ZStdCompressor::adjustProcessLen(uint16_t, size_t len) const { return ZSTD_compressBound(len); }
+
+bool
+ZStdCompressor::process(const CompressionConfig& config, const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV)
+{
+ size_t maxOutputLen = ZSTD_compressBound(inputLen);
+ size_t sz = ZSTD_compress(outputV, maxOutputLen, inputV, inputLen, config.compressionLevel);
+ assert( ! ZSTD_isError(sz) );
+ outputLenV = sz;
+ return ! ZSTD_isError(sz);
+
+}
+
+bool
+ZStdCompressor::unprocess(const void * inputV, size_t inputLen, void * outputV, size_t & outputLenV)
+{
+ size_t sz = ZSTD_decompress(outputV, outputLenV, inputV, inputLen);
+ assert( ! ZSTD_isError(sz) );
+ outputLenV = sz;
+ return ! ZSTD_isError(sz);
+}
+
+}
diff --git a/document/src/vespa/document/util/zstdcompressor.h b/document/src/vespa/document/util/zstdcompressor.h
new file mode 100644
index 00000000000..5c52c144651
--- /dev/null
+++ b/document/src/vespa/document/util/zstdcompressor.h
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "compressor.h"
+
+namespace document {
+
+class ZStdCompressor : public ICompressor
+{
+public:
+ bool process(const CompressionConfig& config, const void * input, size_t inputLen, void * output, size_t & outputLen) override;
+ bool unprocess(const void * input, size_t inputLen, void * output, size_t & outputLen) override;
+ size_t adjustProcessLen(uint16_t options, size_t len) const override;
+};
+
+}
+