From c9b42e84a3d26ad360f83c981697926cacebf0ee Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Tue, 9 Nov 2021 20:31:31 +0000 Subject: Compression is not supported for a very long time. So drop lazy decompression support in favor of thread safe access to field values. --- .../serialization/vespadocumentserializer_test.cpp | 4 +- .../document/fieldvalue/serializablearray.cpp | 99 +++++----------------- .../vespa/document/fieldvalue/serializablearray.h | 45 +--------- .../serialization/vespadocumentserializer.cpp | 9 +- 4 files changed, 27 insertions(+), 130 deletions(-) (limited to 'document') diff --git a/document/src/tests/serialization/vespadocumentserializer_test.cpp b/document/src/tests/serialization/vespadocumentserializer_test.cpp index 3d71371e155..fc5dd2c110a 100644 --- a/document/src/tests/serialization/vespadocumentserializer_test.cpp +++ b/document/src/tests/serialization/vespadocumentserializer_test.cpp @@ -496,7 +496,7 @@ TEST("requireThatCompressedStructFieldValueCanBeSerialized") { checkStructSerialization(value, CompressionConfig::LZ4); } -TEST("requireThatReserializationPreservesCompressionIfUnmodified") { +TEST("requireThatReserializationIsUnompressedIfUnmodified") { StructDataType structType(getStructDataType()); StructFieldValue value = getStructFieldValue(structType); const_cast(static_cast(value.getDataType())) @@ -514,7 +514,7 @@ TEST("requireThatReserializationPreservesCompressionIfUnmodified") { deserializer.read(value2); TEST_DO(checkStructSerialization(value, CompressionConfig::LZ4)); // Lazy serialization of structs.... - TEST_DO(checkStructSerialization(value2, CompressionConfig::LZ4)); + TEST_DO(checkStructSerialization(value2, CompressionConfig::NONE)); EXPECT_EQUAL(value, value2); } diff --git a/document/src/vespa/document/fieldvalue/serializablearray.cpp b/document/src/vespa/document/fieldvalue/serializablearray.cpp index afb89ba0acf..7fdb7005fa8 100644 --- a/document/src/vespa/document/fieldvalue/serializablearray.cpp +++ b/document/src/vespa/document/fieldvalue/serializablearray.cpp @@ -32,17 +32,13 @@ SerializableArray::set(EntryMap entries, ByteBuffer buffer, { _entries = std::move(entries); if (CompressionConfig::isCompressed(comp_type)) { - _unlikely = std::make_unique(); - _unlikely->_compSerData = std::move(buffer); - _unlikely->_serializedCompression = comp_type; - _unlikely->_uncompressedLength = uncompressed_length; - _uncompSerData = ByteBuffer(); + _uncompSerData = deCompress(comp_type, uncompressed_length, std::move(buffer)); } else { _uncompSerData = std::move(buffer); - _unlikely.reset(); } } +SerializableArray::SerializableArray() = default; SerializableArray::SerializableArray(SerializableArray &&) noexcept = default; SerializableArray& SerializableArray::operator=(SerializableArray &&) noexcept = default; SerializableArray::~SerializableArray() = default; @@ -60,25 +56,9 @@ ensure(std::unique_ptr &owned) { } -SerializableArray::RarelyUsedBuffers::RarelyUsedBuffers() - : _owned(), - _compSerData(nullptr, 0), - _serializedCompression(CompressionConfig::NONE), - _uncompressedLength(0) -{ } -SerializableArray::RarelyUsedBuffers::~RarelyUsedBuffers() = default; - -SerializableArray::RarelyUsedBuffers::RarelyUsedBuffers(const RarelyUsedBuffers & rhs) - : _owned(), - _compSerData(rhs._compSerData), - _serializedCompression(rhs._serializedCompression), - _uncompressedLength(rhs._uncompressedLength) -{ } - SerializableArray::SerializableArray(const SerializableArray& rhs) : _entries(rhs._entries), - _uncompSerData(rhs._uncompSerData), - _unlikely(rhs._unlikely ? new RarelyUsedBuffers(*rhs._unlikely) : nullptr) + _uncompSerData(rhs._uncompSerData) { for (size_t i(0); i < _entries.size(); i++) { Entry & e(_entries[i]); @@ -86,7 +66,7 @@ SerializableArray::SerializableArray(const SerializableArray& rhs) // Pointing to a buffer in the _owned structure. ByteBuffer buf(ByteBuffer::copyBuffer(e.getBuffer(&_uncompSerData), e.size())); e.setBuffer(buf.getBuffer()); - ensure(_unlikely->_owned)[e.id()] = std::move(buf); + ensure(_owned)[e.id()] = std::move(buf); } else { // If not it is relative to the buffer _uncompSerData, and hence it is valid as is. } @@ -106,31 +86,20 @@ void SerializableArray::clear() { _entries.clear(); _uncompSerData = ByteBuffer(nullptr, 0); - _unlikely.reset(); -} - -void -SerializableArray::invalidate() -{ - if (_unlikely) { - _unlikely->_compSerData = ByteBuffer(nullptr, 0);; - } } void SerializableArray::set(int id, ByteBuffer buffer) { - maybeDecompress(); Entry e(id, buffer.getRemaining(), buffer.getBuffer()); assert(buffer.getRemaining() < 0x80000000ul); - ensure(ensure(_unlikely)._owned)[id] = std::move(buffer); + ensure(_owned)[id] = std::move(buffer); auto it = find(id); if (it == _entries.end()) { _entries.push_back(e); } else { *it = e; } - invalidate(); } void SerializableArray::set(int id, const char* value, int len) @@ -160,88 +129,58 @@ vespalib::ConstBufferRef SerializableArray::get(int id) const { vespalib::ConstBufferRef buf; - if ( !maybeDecompressAndCatch() ) { - auto found = find(id); + auto found = find(id); - if (found != _entries.end()) { - const Entry& entry = *found; - buf = vespalib::ConstBufferRef(entry.getBuffer(&_uncompSerData), entry.size()); - } - } else { - // should we clear all or what? + if (found != _entries.end()) { + const Entry& entry = *found; + buf = vespalib::ConstBufferRef(entry.getBuffer(&_uncompSerData), entry.size()); } return buf; } -bool -SerializableArray::deCompressAndCatch() const -{ - try { - const_cast(this)->deCompress(); - return false; - } catch (const std::exception & e) { - LOG(warning, "Deserializing compressed content failed: %s", e.what()); - return true; - } -} - void SerializableArray::clear(int id) { - maybeDecompress(); auto it = find(id); if (it != _entries.end()) { _entries.erase(it); - if (_unlikely && _unlikely->_owned) { - _unlikely->_owned->erase(id); - } - invalidate(); } } -void -SerializableArray::deCompress() // throw (DeserializeException) +ByteBuffer +SerializableArray::deCompress(CompressionConfig::Type compression, uint32_t uncompressedLength, ByteBuffer compressed) { using vespalib::compression::decompress; // will only do this once - assert(_unlikely && (_unlikely->_compSerData.getRemaining() != 0)); - assert(_uncompSerData.getRemaining() == 0); - assert(CompressionConfig::isCompressed(_unlikely->_serializedCompression)); - uint32_t uncompressedLength = _unlikely->_uncompressedLength; + assert(compressed.getRemaining() != 0); + assert(CompressionConfig::isCompressed(compression)); ByteBuffer newSerialization(vespalib::alloc::Alloc::alloc(uncompressedLength), uncompressedLength); vespalib::DataBuffer unCompressed(newSerialization.getBuffer(), newSerialization.getLength()); unCompressed.clear(); try { - decompress(_unlikely->_serializedCompression, + decompress(compression, uncompressedLength, - vespalib::ConstBufferRef(_unlikely->_compSerData.getBufferAtPos(), _unlikely->_compSerData.getRemaining()), + vespalib::ConstBufferRef(compressed.getBufferAtPos(), compressed.getRemaining()), unCompressed, false); } catch (const std::runtime_error & e) { throw DeserializeException( - make_string( "Document was compressed with code unknown code %d", _unlikely->_serializedCompression), + make_string( "Document was compressed with code unknown code %d", compression), VESPA_STRLOC); } if (unCompressed.getDataLen() != (size_t)uncompressedLength) { throw DeserializeException( make_string("Did not decompress to the expected length: had %u, wanted %d, got %zu", - _unlikely->_compSerData.getRemaining(), uncompressedLength, unCompressed.getDataLen()), + compressed.getRemaining(), uncompressedLength, unCompressed.getDataLen()), VESPA_STRLOC); } assert(newSerialization.getBuffer() == unCompressed.getData()); - _uncompSerData = std::move(newSerialization); - LOG_ASSERT(uncompressedLength == _uncompSerData.getRemaining()); -} - -vespalib::compression::CompressionInfo -SerializableArray::getCompressionInfo() const { - return _unlikely - ? CompressionInfo(_unlikely->_uncompressedLength, _unlikely->_compSerData.getRemaining()) - : CompressionInfo(_uncompSerData.getRemaining(), CompressionConfig::NONE); + LOG_ASSERT(uncompressedLength == newSerialization.getRemaining()); + return newSerialization; } const char * diff --git a/document/src/vespa/document/fieldvalue/serializablearray.h b/document/src/vespa/document/fieldvalue/serializablearray.h index 126f7072d6a..a396fd01a39 100644 --- a/document/src/vespa/document/fieldvalue/serializablearray.h +++ b/document/src/vespa/document/fieldvalue/serializablearray.h @@ -80,11 +80,9 @@ public: using CP = vespalib::CloneablePtr; using UP = std::unique_ptr; - using ByteBufferUP = std::unique_ptr; using CompressionConfig = vespalib::compression::CompressionConfig; - using CompressionInfo = vespalib::compression::CompressionInfo; - SerializableArray() = default; + SerializableArray(); SerializableArray(const SerializableArray&); SerializableArray& operator=(const SerializableArray&); SerializableArray(SerializableArray &&) noexcept; @@ -129,57 +127,22 @@ public: /** Deletes all stored attributes. */ void clear(); - CompressionConfig::Type getCompression() const { - return _unlikely ? _unlikely->_serializedCompression : CompressionConfig::NONE; - } - CompressionInfo getCompressionInfo() const; - bool empty() const { return _entries.empty(); } const ByteBuffer* getSerializedBuffer() const { - return CompressionConfig::isCompressed(getCompression()) - ? &_unlikely->_compSerData - : &_uncompSerData; + return &_uncompSerData; } const EntryMap & getEntries() const { return _entries; } private: - bool shouldDecompress() const { - return _unlikely && (_unlikely->_compSerData.getRemaining() != 0) && (_uncompSerData.getBuffer() == 0); - } - bool maybeDecompressAndCatch() const { - if ( shouldDecompress() ) { - return deCompressAndCatch(); - } - return false; - } - - bool deCompressAndCatch() const; - void maybeDecompress() const { - if ( shouldDecompress() ) { - const_cast(this)->deCompress(); - } - } - void deCompress(); // throw (DeserializeException); - - struct RarelyUsedBuffers { - /** The buffers we own. */ - RarelyUsedBuffers(); - RarelyUsedBuffers(const RarelyUsedBuffers &); - ~RarelyUsedBuffers(); - std::unique_ptr _owned; - ByteBuffer _compSerData; - CompressionConfig::Type _serializedCompression; - uint32_t _uncompressedLength; - }; /** Contains the stored attributes, with reference to the real data.. */ EntryMap _entries; /** Data we deserialized from, if applicable. */ ByteBuffer _uncompSerData; - std::unique_ptr _unlikely; + std::unique_ptr _owned; - VESPA_DLL_LOCAL void invalidate(); + static ByteBuffer deCompress(CompressionConfig::Type compression, uint32_t uncompressedLength, ByteBuffer compressed); // throw (DeserializeException); VESPA_DLL_LOCAL EntryMap::const_iterator find(int id) const; VESPA_DLL_LOCAL EntryMap::iterator find(int id); }; diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.cpp b/document/src/vespa/document/serialization/vespadocumentserializer.cpp index f68a2202788..45a4375a19d 100644 --- a/document/src/vespa/document/serialization/vespadocumentserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentserializer.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -298,8 +297,7 @@ VespaDocumentSerializer::structNeedsReserialization(const StructFieldValue &valu return false; } - return (value.getFields().getCompression() != value.getCompressionConfig().type && - value.getFields().getCompression() != CompressionConfig::UNCOMPRESSABLE); + return true; } void VespaDocumentSerializer::writeUnchanged(const SerializableArray &value) { @@ -316,10 +314,7 @@ void VespaDocumentSerializer::writeUnchanged(const SerializableArray &value) { size_t estimatedRequiredSpace = sz + 4 + 1 + 8 + 4 + field_info.size()*12; _stream.reserve(_stream.size() + estimatedRequiredSpace); _stream << sz; - _stream << static_cast(value.getCompression()); - if (CompressionConfig::isCompressed(value.getCompression())) { - putInt2_4_8Bytes(_stream, value.getCompressionInfo().getUncompressedSize()); - } + _stream << static_cast(CompressionConfig::NONE); putFieldInfo(_stream, field_info); if (sz) { _stream.write(buffer->getBuffer(), buffer->getLength()); -- cgit v1.2.3