diff options
author | Haavard <havardpe@yahoo-inc.com> | 2017-02-08 13:43:07 +0000 |
---|---|---|
committer | Haavard <havardpe@yahoo-inc.com> | 2017-02-09 10:01:06 +0000 |
commit | e5d2f5aa1c8502ea614137c5bf2cb5f131e781da (patch) | |
tree | e66286f1db2f95edf933b7b429b5599c0b977dd7 /vespalib | |
parent | 559a76e491e28ca1ec4874d403866fd508064a54 (diff) |
added lz4 compress/decompress filters with common test
Diffstat (limited to 'vespalib')
8 files changed, 280 insertions, 0 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index 842dbb82ccc..b775cce68f2 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -24,6 +24,7 @@ vespa_define_module( src/tests/component src/tests/compress src/tests/data/input_reader + src/tests/data/lz4_encode_decode src/tests/data/memory_input src/tests/data/output_writer src/tests/data/simple_buffer diff --git a/vespalib/src/tests/data/lz4_encode_decode/CMakeLists.txt b/vespalib/src/tests/data/lz4_encode_decode/CMakeLists.txt new file mode 100644 index 00000000000..98e50a9a1c4 --- /dev/null +++ b/vespalib/src/tests/data/lz4_encode_decode/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_lz4_encode_decode_test_app TEST + SOURCES + lz4_encode_decode_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_lz4_encode_decode_test_app COMMAND vespalib_lz4_encode_decode_test_app) diff --git a/vespalib/src/tests/data/lz4_encode_decode/lz4_encode_decode_test.cpp b/vespalib/src/tests/data/lz4_encode_decode/lz4_encode_decode_test.cpp new file mode 100644 index 00000000000..75a9aa6e89d --- /dev/null +++ b/vespalib/src/tests/data/lz4_encode_decode/lz4_encode_decode_test.cpp @@ -0,0 +1,61 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/data/lz4_output_encoder.h> +#include <vespa/vespalib/data/lz4_input_decoder.h> +#include <vespa/vespalib/data/simple_buffer.h> +#include <vespa/vespalib/data/memory_input.h> + +using namespace vespalib; + +// make sure input is split into chunks +struct ChunkedInput : Input { + Input &input; + ChunkedInput(Input &input_in) : input(input_in) {} + Memory obtain() override { + Memory memory = input.obtain(); + memory.size = std::min(memory.size, size_t(3)); + return memory; + } + Input &evict(size_t bytes) override { + EXPECT_LESS_EQUAL(bytes, 3u); + input.evict(bytes); + return *this; + } +}; + +void transfer(Input &input, Output &output) { + for (Memory src = input.obtain(); src.size > 0; src = input.obtain()) { + auto dst = output.reserve(src.size); + ASSERT_GREATER_EQUAL(dst.size, src.size); + memcpy(dst.data, src.data, src.size); + output.commit(src.size); + input.evict(src.size); + } +} + +TEST("require that lz4 encode-decode works") { + SimpleBuffer data; + for (size_t i = 0; i < 100; ++i) { + data.add((i % 7) + (i * 5) + (i >> 3)); + } + SimpleBuffer encoded; + { + MemoryInput memory_input(data.get()); + ChunkedInput chunked_input(memory_input); + Lz4OutputEncoder lz4_encoder(encoded, 10); + transfer(chunked_input, lz4_encoder); + } + SimpleBuffer decoded; + { + MemoryInput memory_input(encoded.get()); + ChunkedInput chunked_input(memory_input); + Lz4InputDecoder input_decoder(chunked_input, 10); + transfer(input_decoder, decoded); + EXPECT_TRUE(!input_decoder.failed()); + EXPECT_EQUAL(input_decoder.reason(), vespalib::string()); + } + EXPECT_NOT_EQUAL(data.get(), encoded.get()); + EXPECT_EQUAL(data.get(), decoded.get()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/vespalib/src/vespa/vespalib/data/CMakeLists.txt b/vespalib/src/vespa/vespalib/data/CMakeLists.txt index 9d574953016..fd7afd71101 100644 --- a/vespalib/src/vespa/vespalib/data/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/data/CMakeLists.txt @@ -3,6 +3,8 @@ vespa_add_library(vespalib_vespalib_data OBJECT SOURCES input.cpp input_reader.cpp + lz4_input_decoder.cpp + lz4_output_encoder.cpp memory.cpp memory_input.cpp memorydatastore.cpp diff --git a/vespalib/src/vespa/vespalib/data/lz4_input_decoder.cpp b/vespalib/src/vespa/vespalib/data/lz4_input_decoder.cpp new file mode 100644 index 00000000000..c7f0fce664f --- /dev/null +++ b/vespalib/src/vespa/vespalib/data/lz4_input_decoder.cpp @@ -0,0 +1,87 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "lz4_input_decoder.h" +#include <lz4frame.h> + +namespace vespalib { + +void +Lz4InputDecoder::fail(const char *reason) +{ + _failed = true; + _reason = reason; + _eof = true; +} + +void +Lz4InputDecoder::decode_more() +{ + assert((_pos == _used) && !_eof); + Memory memory = _input.obtain(); + if (memory.size == 0) { + auto res = LZ4F_freeDecompressionContext(_ctx); + _ctx = nullptr; + _eof = true; + if (LZ4F_isError(res)) { + fail(LZ4F_getErrorName(res)); + } + } else { + size_t input_size = memory.size; + size_t output_size = _buffer.size(); + auto res = LZ4F_decompress(_ctx, + &_buffer[0], &output_size, + memory.data, &input_size, + nullptr); + if (LZ4F_isError(res)) { + fail(LZ4F_getErrorName(res)); + } else if (input_size == 0) { + fail("lz4 refusing to eat input"); + } else { + assert(input_size <= memory.size); + assert(output_size <= _buffer.size()); + _input.evict(input_size); + _used = output_size; + _pos = 0; + } + } +} + +Lz4InputDecoder::Lz4InputDecoder(Input &input, size_t buffer_size) + : _input(input), + _buffer(buffer_size, 0), + _used(0), + _pos(0), + _eof(false), + _failed(false), + _reason(), + _ctx(nullptr) +{ + auto res = LZ4F_createDecompressionContext(&_ctx, LZ4F_VERSION); + if (LZ4F_isError(res)) { + fail(LZ4F_getErrorName(res)); + } +} + +Lz4InputDecoder::~Lz4InputDecoder() +{ + LZ4F_freeDecompressionContext(_ctx); +} + +Memory +Lz4InputDecoder::obtain() +{ + while ((_pos == _used) && !_eof) { + decode_more(); + } + return Memory(&_buffer[_pos], (_used - _pos)); +} + +Input & +Lz4InputDecoder::evict(size_t bytes) +{ + _pos += bytes; + return *this; +} + +} // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/data/lz4_input_decoder.h b/vespalib/src/vespa/vespalib/data/lz4_input_decoder.h new file mode 100644 index 00000000000..f711aef30b5 --- /dev/null +++ b/vespalib/src/vespa/vespalib/data/lz4_input_decoder.h @@ -0,0 +1,37 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "input.h" + +struct LZ4F_dctx_s; + +namespace vespalib { + +/** + * Input filter decompressing data stored in framed lz4 format. + **/ +class Lz4InputDecoder : public Input +{ +private: + Input &_input; + std::vector<char> _buffer; + size_t _used; + size_t _pos; + bool _eof; + bool _failed; + vespalib::string _reason; + LZ4F_dctx_s *_ctx; + + void fail(const char *reason); + void decode_more(); +public: + Lz4InputDecoder(Input &input, size_t buffer_size); + ~Lz4InputDecoder(); + Memory obtain() override; + Input &evict(size_t bytes) override; + bool failed() const { return _failed; } + const vespalib::string &reason() const { return _reason; } +}; + +} // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/data/lz4_output_encoder.cpp b/vespalib/src/vespa/vespalib/data/lz4_output_encoder.cpp new file mode 100644 index 00000000000..60f8546bde1 --- /dev/null +++ b/vespalib/src/vespa/vespalib/data/lz4_output_encoder.cpp @@ -0,0 +1,54 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "lz4_output_encoder.h" +#include <lz4frame.h> + +namespace vespalib { + +void +Lz4OutputEncoder::encode_frame() +{ + auto dst = _output.reserve(LZ4F_compressFrameBound(_used, nullptr)); + size_t written = LZ4F_compressFrame(dst.data, dst.size, &_buffer[0], _used, nullptr); + assert(!LZ4F_isError(written)); + assert(written <= dst.size); + _output.commit(written); + _used = 0; +} + +Lz4OutputEncoder::Lz4OutputEncoder(Output &output, size_t buffer_size) + : _output(output), + _buffer(buffer_size, 0), + _used(0), + _limit(buffer_size) +{ +} + +Lz4OutputEncoder::~Lz4OutputEncoder() +{ + if (_used > 0) { + encode_frame(); + } +} + +WritableMemory +Lz4OutputEncoder::reserve(size_t bytes) +{ + if ((_used + bytes) > _buffer.size()) { + _buffer.resize(_used + bytes, 0); + } + return WritableMemory(&_buffer[_used], (_buffer.size() - _used)); +} + +Output & +Lz4OutputEncoder::commit(size_t bytes) +{ + _used += bytes; + if (_used >= _limit) { + encode_frame(); + } + return *this; +} + +} // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/data/lz4_output_encoder.h b/vespalib/src/vespa/vespalib/data/lz4_output_encoder.h new file mode 100644 index 00000000000..6936a9a0169 --- /dev/null +++ b/vespalib/src/vespa/vespalib/data/lz4_output_encoder.h @@ -0,0 +1,30 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "output.h" + +namespace vespalib { + +/** + * Output filter compressing data into framed lz4 format. This class + * will use the simple LZ4 compression API to encode complete frames + * at a time, trading performance for simplicity. + **/ +class Lz4OutputEncoder : public Output +{ +private: + Output &_output; + std::vector<char> _buffer; + size_t _used; + size_t _limit; + + void encode_frame(); +public: + Lz4OutputEncoder(Output &output, size_t buffer_size); + ~Lz4OutputEncoder(); + WritableMemory reserve(size_t bytes) override; + Output &commit(size_t bytes) override; +}; + +} // namespace vespalib |