summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorHaavard <havardpe@yahoo-inc.com>2017-02-08 13:43:07 +0000
committerHaavard <havardpe@yahoo-inc.com>2017-02-09 10:01:06 +0000
commite5d2f5aa1c8502ea614137c5bf2cb5f131e781da (patch)
treee66286f1db2f95edf933b7b429b5599c0b977dd7 /vespalib
parent559a76e491e28ca1ec4874d403866fd508064a54 (diff)
added lz4 compress/decompress filters with common test
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/CMakeLists.txt1
-rw-r--r--vespalib/src/tests/data/lz4_encode_decode/CMakeLists.txt8
-rw-r--r--vespalib/src/tests/data/lz4_encode_decode/lz4_encode_decode_test.cpp61
-rw-r--r--vespalib/src/vespa/vespalib/data/CMakeLists.txt2
-rw-r--r--vespalib/src/vespa/vespalib/data/lz4_input_decoder.cpp87
-rw-r--r--vespalib/src/vespa/vespalib/data/lz4_input_decoder.h37
-rw-r--r--vespalib/src/vespa/vespalib/data/lz4_output_encoder.cpp54
-rw-r--r--vespalib/src/vespa/vespalib/data/lz4_output_encoder.h30
8 files changed, 280 insertions, 0 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index 842dbb82ccc..b775cce68f2 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -24,6 +24,7 @@ vespa_define_module(
src/tests/component
src/tests/compress
src/tests/data/input_reader
+ src/tests/data/lz4_encode_decode
src/tests/data/memory_input
src/tests/data/output_writer
src/tests/data/simple_buffer
diff --git a/vespalib/src/tests/data/lz4_encode_decode/CMakeLists.txt b/vespalib/src/tests/data/lz4_encode_decode/CMakeLists.txt
new file mode 100644
index 00000000000..98e50a9a1c4
--- /dev/null
+++ b/vespalib/src/tests/data/lz4_encode_decode/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_lz4_encode_decode_test_app TEST
+ SOURCES
+ lz4_encode_decode_test.cpp
+ DEPENDS
+ vespalib
+)
+vespa_add_test(NAME vespalib_lz4_encode_decode_test_app COMMAND vespalib_lz4_encode_decode_test_app)
diff --git a/vespalib/src/tests/data/lz4_encode_decode/lz4_encode_decode_test.cpp b/vespalib/src/tests/data/lz4_encode_decode/lz4_encode_decode_test.cpp
new file mode 100644
index 00000000000..75a9aa6e89d
--- /dev/null
+++ b/vespalib/src/tests/data/lz4_encode_decode/lz4_encode_decode_test.cpp
@@ -0,0 +1,61 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/data/lz4_output_encoder.h>
+#include <vespa/vespalib/data/lz4_input_decoder.h>
+#include <vespa/vespalib/data/simple_buffer.h>
+#include <vespa/vespalib/data/memory_input.h>
+
+using namespace vespalib;
+
+// make sure input is split into chunks
+struct ChunkedInput : Input {
+ Input &input;
+ ChunkedInput(Input &input_in) : input(input_in) {}
+ Memory obtain() override {
+ Memory memory = input.obtain();
+ memory.size = std::min(memory.size, size_t(3));
+ return memory;
+ }
+ Input &evict(size_t bytes) override {
+ EXPECT_LESS_EQUAL(bytes, 3u);
+ input.evict(bytes);
+ return *this;
+ }
+};
+
+void transfer(Input &input, Output &output) {
+ for (Memory src = input.obtain(); src.size > 0; src = input.obtain()) {
+ auto dst = output.reserve(src.size);
+ ASSERT_GREATER_EQUAL(dst.size, src.size);
+ memcpy(dst.data, src.data, src.size);
+ output.commit(src.size);
+ input.evict(src.size);
+ }
+}
+
+TEST("require that lz4 encode-decode works") {
+ SimpleBuffer data;
+ for (size_t i = 0; i < 100; ++i) {
+ data.add((i % 7) + (i * 5) + (i >> 3));
+ }
+ SimpleBuffer encoded;
+ {
+ MemoryInput memory_input(data.get());
+ ChunkedInput chunked_input(memory_input);
+ Lz4OutputEncoder lz4_encoder(encoded, 10);
+ transfer(chunked_input, lz4_encoder);
+ }
+ SimpleBuffer decoded;
+ {
+ MemoryInput memory_input(encoded.get());
+ ChunkedInput chunked_input(memory_input);
+ Lz4InputDecoder input_decoder(chunked_input, 10);
+ transfer(input_decoder, decoded);
+ EXPECT_TRUE(!input_decoder.failed());
+ EXPECT_EQUAL(input_decoder.reason(), vespalib::string());
+ }
+ EXPECT_NOT_EQUAL(data.get(), encoded.get());
+ EXPECT_EQUAL(data.get(), decoded.get());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/vespalib/src/vespa/vespalib/data/CMakeLists.txt b/vespalib/src/vespa/vespalib/data/CMakeLists.txt
index 9d574953016..fd7afd71101 100644
--- a/vespalib/src/vespa/vespalib/data/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/data/CMakeLists.txt
@@ -3,6 +3,8 @@ vespa_add_library(vespalib_vespalib_data OBJECT
SOURCES
input.cpp
input_reader.cpp
+ lz4_input_decoder.cpp
+ lz4_output_encoder.cpp
memory.cpp
memory_input.cpp
memorydatastore.cpp
diff --git a/vespalib/src/vespa/vespalib/data/lz4_input_decoder.cpp b/vespalib/src/vespa/vespalib/data/lz4_input_decoder.cpp
new file mode 100644
index 00000000000..c7f0fce664f
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/data/lz4_input_decoder.cpp
@@ -0,0 +1,87 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "lz4_input_decoder.h"
+#include <lz4frame.h>
+
+namespace vespalib {
+
+void
+Lz4InputDecoder::fail(const char *reason)
+{
+ _failed = true;
+ _reason = reason;
+ _eof = true;
+}
+
+void
+Lz4InputDecoder::decode_more()
+{
+ assert((_pos == _used) && !_eof);
+ Memory memory = _input.obtain();
+ if (memory.size == 0) {
+ auto res = LZ4F_freeDecompressionContext(_ctx);
+ _ctx = nullptr;
+ _eof = true;
+ if (LZ4F_isError(res)) {
+ fail(LZ4F_getErrorName(res));
+ }
+ } else {
+ size_t input_size = memory.size;
+ size_t output_size = _buffer.size();
+ auto res = LZ4F_decompress(_ctx,
+ &_buffer[0], &output_size,
+ memory.data, &input_size,
+ nullptr);
+ if (LZ4F_isError(res)) {
+ fail(LZ4F_getErrorName(res));
+ } else if (input_size == 0) {
+ fail("lz4 refusing to eat input");
+ } else {
+ assert(input_size <= memory.size);
+ assert(output_size <= _buffer.size());
+ _input.evict(input_size);
+ _used = output_size;
+ _pos = 0;
+ }
+ }
+}
+
+Lz4InputDecoder::Lz4InputDecoder(Input &input, size_t buffer_size)
+ : _input(input),
+ _buffer(buffer_size, 0),
+ _used(0),
+ _pos(0),
+ _eof(false),
+ _failed(false),
+ _reason(),
+ _ctx(nullptr)
+{
+ auto res = LZ4F_createDecompressionContext(&_ctx, LZ4F_VERSION);
+ if (LZ4F_isError(res)) {
+ fail(LZ4F_getErrorName(res));
+ }
+}
+
+Lz4InputDecoder::~Lz4InputDecoder()
+{
+ LZ4F_freeDecompressionContext(_ctx);
+}
+
+Memory
+Lz4InputDecoder::obtain()
+{
+ while ((_pos == _used) && !_eof) {
+ decode_more();
+ }
+ return Memory(&_buffer[_pos], (_used - _pos));
+}
+
+Input &
+Lz4InputDecoder::evict(size_t bytes)
+{
+ _pos += bytes;
+ return *this;
+}
+
+} // namespace vespalib
diff --git a/vespalib/src/vespa/vespalib/data/lz4_input_decoder.h b/vespalib/src/vespa/vespalib/data/lz4_input_decoder.h
new file mode 100644
index 00000000000..f711aef30b5
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/data/lz4_input_decoder.h
@@ -0,0 +1,37 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "input.h"
+
+struct LZ4F_dctx_s;
+
+namespace vespalib {
+
+/**
+ * Input filter decompressing data stored in framed lz4 format.
+ **/
+class Lz4InputDecoder : public Input
+{
+private:
+ Input &_input;
+ std::vector<char> _buffer;
+ size_t _used;
+ size_t _pos;
+ bool _eof;
+ bool _failed;
+ vespalib::string _reason;
+ LZ4F_dctx_s *_ctx;
+
+ void fail(const char *reason);
+ void decode_more();
+public:
+ Lz4InputDecoder(Input &input, size_t buffer_size);
+ ~Lz4InputDecoder();
+ Memory obtain() override;
+ Input &evict(size_t bytes) override;
+ bool failed() const { return _failed; }
+ const vespalib::string &reason() const { return _reason; }
+};
+
+} // namespace vespalib
diff --git a/vespalib/src/vespa/vespalib/data/lz4_output_encoder.cpp b/vespalib/src/vespa/vespalib/data/lz4_output_encoder.cpp
new file mode 100644
index 00000000000..60f8546bde1
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/data/lz4_output_encoder.cpp
@@ -0,0 +1,54 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "lz4_output_encoder.h"
+#include <lz4frame.h>
+
+namespace vespalib {
+
+void
+Lz4OutputEncoder::encode_frame()
+{
+ auto dst = _output.reserve(LZ4F_compressFrameBound(_used, nullptr));
+ size_t written = LZ4F_compressFrame(dst.data, dst.size, &_buffer[0], _used, nullptr);
+ assert(!LZ4F_isError(written));
+ assert(written <= dst.size);
+ _output.commit(written);
+ _used = 0;
+}
+
+Lz4OutputEncoder::Lz4OutputEncoder(Output &output, size_t buffer_size)
+ : _output(output),
+ _buffer(buffer_size, 0),
+ _used(0),
+ _limit(buffer_size)
+{
+}
+
+Lz4OutputEncoder::~Lz4OutputEncoder()
+{
+ if (_used > 0) {
+ encode_frame();
+ }
+}
+
+WritableMemory
+Lz4OutputEncoder::reserve(size_t bytes)
+{
+ if ((_used + bytes) > _buffer.size()) {
+ _buffer.resize(_used + bytes, 0);
+ }
+ return WritableMemory(&_buffer[_used], (_buffer.size() - _used));
+}
+
+Output &
+Lz4OutputEncoder::commit(size_t bytes)
+{
+ _used += bytes;
+ if (_used >= _limit) {
+ encode_frame();
+ }
+ return *this;
+}
+
+} // namespace vespalib
diff --git a/vespalib/src/vespa/vespalib/data/lz4_output_encoder.h b/vespalib/src/vespa/vespalib/data/lz4_output_encoder.h
new file mode 100644
index 00000000000..6936a9a0169
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/data/lz4_output_encoder.h
@@ -0,0 +1,30 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "output.h"
+
+namespace vespalib {
+
+/**
+ * Output filter compressing data into framed lz4 format. This class
+ * will use the simple LZ4 compression API to encode complete frames
+ * at a time, trading performance for simplicity.
+ **/
+class Lz4OutputEncoder : public Output
+{
+private:
+ Output &_output;
+ std::vector<char> _buffer;
+ size_t _used;
+ size_t _limit;
+
+ void encode_frame();
+public:
+ Lz4OutputEncoder(Output &output, size_t buffer_size);
+ ~Lz4OutputEncoder();
+ WritableMemory reserve(size_t bytes) override;
+ Output &commit(size_t bytes) override;
+};
+
+} // namespace vespalib