diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-11-10 11:09:04 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-11-10 11:37:54 +0000 |
commit | 518a7e7e470f1c0f792dd9e9fd36b23359b31048 (patch) | |
tree | c253ccfc4cab67d389bf6c64fbb27301c8d6e10d | |
parent | 8efde630ff73357e0f31a34f131a120d327bf224 (diff) |
remove support for compression of document content in backend.
12 files changed, 66 insertions, 304 deletions
diff --git a/document/src/tests/documenttestcase.cpp b/document/src/tests/documenttestcase.cpp index 9fa72b7c1e7..aae4b5c6f40 100644 --- a/document/src/tests/documenttestcase.cpp +++ b/document/src/tests/documenttestcase.cpp @@ -19,7 +19,6 @@ #include <gmock/gmock.h> using vespalib::nbostream; -using vespalib::compression::CompressionConfig; using namespace ::testing; using namespace document::config_builder; @@ -699,7 +698,6 @@ TEST(DocumentTest,testReadSerializedAllVersions) doc.setValue("rawfield", RawFieldValue("RAW DATA", 8)); Document docInDoc(*docInDocType, DocumentId("id:ns:docindoc::http://doc.in.doc/")); docInDoc.set("stringindocfield", "Elvis is dead"); - //docInDoc.setCompression(CompressionConfig(CompressionConfig::NONE, 0, 0)); doc.setValue("docfield", docInDoc); ArrayFieldValue floatArray(*arrayOfFloatDataType); floatArray.add(1.0); @@ -713,7 +711,6 @@ TEST(DocumentTest,testReadSerializedAllVersions) // Write document to disk, (when you bump version and alter stuff, // you can copy this current to new test for new version) { - //doc.setCompression(CompressionConfig(CompressionConfig::NONE, 0, 0)); nbostream buf = doc.serialize(); int fd = open(TEST_PATH("data/document-cpp-currentversion-uncompressed.dat").c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644); @@ -722,20 +719,6 @@ TEST(DocumentTest,testReadSerializedAllVersions) EXPECT_EQ(buf.size(), len); close(fd); } - { - CompressionConfig oldCfg(doc.getType().getFieldsType().getCompressionConfig()); - CompressionConfig newCfg(CompressionConfig::LZ4, 9, 95); - const_cast<StructDataType &>(doc.getType().getFieldsType()).setCompressionConfig(newCfg); - nbostream buf = doc.serialize(); - EXPECT_TRUE(buf.size() <= buf.capacity()); - int fd = open(TEST_PATH("data/document-cpp-currentversion-lz4-9.dat").c_str(), - O_WRONLY | O_CREAT | O_TRUNC, 0644); - EXPECT_TRUE(fd > 0); - size_t len = write(fd, buf.peek(), buf.size()); - EXPECT_EQ(buf.size(), len); - close(fd); - const_cast<StructDataType &>(doc.getType().getFieldsType()).setCompressionConfig(oldCfg); - } } std::string jpath = TEST_PATH("../test/serializeddocuments/"); @@ -877,18 +860,6 @@ TEST(DocumentTest, testGenerateSerializedFile) throw vespalib::Exception("write failed"); } close(fd); - - CompressionConfig newCfg(CompressionConfig::LZ4, 9, 95); - const_cast<StructDataType &>(doc.getType().getFieldsType()).setCompressionConfig(newCfg); - - nbostream lz4buf = doc.serialize(); - - fd = open((serializedDir + "/serializecpp-lz4-level9.dat").c_str(), - O_WRONLY | O_TRUNC | O_CREAT, 0644); - if (write(fd, lz4buf.data(), lz4buf.size()) != (ssize_t)lz4buf.size()) { - throw vespalib::Exception("write failed"); - } - close(fd); } TEST(DocumentTest, testBogusserialize) @@ -1013,84 +984,6 @@ TEST(DocumentTest, testSliceSerialize) EXPECT_EQ(*doc2, doc4); } -TEST(DocumentTest, testCompression) -{ - TestDocMan testDocMan; - Document::UP doc = testDocMan.createDocument(); - - std::string bigString("compress me"); - for (int i = 0; i < 8; ++i) { bigString += bigString; } - - doc->setValue("hstringval", StringFieldValue(bigString)); - - nbostream buf_uncompressed = doc->serialize(); - - CompressionConfig oldCfg(doc->getType().getFieldsType().getCompressionConfig()); - - CompressionConfig newCfg(CompressionConfig::LZ4, 9, 95); - const_cast<StructDataType &>(doc->getType().getFieldsType()).setCompressionConfig(newCfg); - nbostream buf_lz4 = doc->serialize(); - - const_cast<StructDataType &>(doc->getType().getFieldsType()).setCompressionConfig(oldCfg); - - EXPECT_TRUE(buf_lz4.size() < buf_uncompressed.size()); - - Document doc_lz4(testDocMan.getTypeRepo(), buf_lz4); - - EXPECT_EQ(*doc, doc_lz4); -} - -TEST(DocumentTest, testCompressionConfigured) -{ - DocumenttypesConfigBuilderHelper builder; - builder.document(43, "serializetest", - Struct("serializetest.header").setId(44), - Struct("serializetest.body").setId(45) - .addField("stringfield", DataType::T_STRING)); - DocumentTypeRepo repo(builder.config()); - Document doc_uncompressed(*repo.getDocumentType("serializetest"), - DocumentId("id:ns:serializetest::1")); - - std::string bigString("compress me"); - for (int i = 0; i < 8; ++i) { bigString += bigString; } - - doc_uncompressed.setValue("stringfield", StringFieldValue(bigString)); - nbostream buf_uncompressed; - doc_uncompressed.serialize(buf_uncompressed); - - size_t uncompressedSize = buf_uncompressed.size(); - - DocumenttypesConfigBuilderHelper builder2; - builder2.document(43, "serializetest", - Struct("serializetest.header").setId(44), - Struct("serializetest.body").setId(45) - .addField("stringfield", DataType::T_STRING) - .setCompression(DocumenttypesConfig::Documenttype:: - Datatype::Sstruct::Compression::Type::LZ4, - 9, 99, 0)); - DocumentTypeRepo repo2(builder2.config()); - - Document doc(repo2, buf_uncompressed); - - nbostream buf_compressed; - doc.serialize(buf_compressed); - size_t compressedSize = buf_compressed.size(); - - EXPECT_TRUE(compressedSize < uncompressedSize); - - Document doc2(repo2, buf_compressed); - - nbostream buf_compressed2; - doc2.serialize(buf_compressed2); - - EXPECT_EQ(compressedSize, buf_compressed2.size()); - - Document doc3(repo2, buf_compressed2); - - EXPECT_EQ(doc2, doc_uncompressed); - EXPECT_EQ(doc2, doc3); -} - TEST(DocumentTest, testUnknownEntries) { // We should be able to deserialize a document with unknown values in it. diff --git a/document/src/tests/repo/documenttyperepo_test.cpp b/document/src/tests/repo/documenttyperepo_test.cpp index d1456fb40c2..6619d53e609 100644 --- a/document/src/tests/repo/documenttyperepo_test.cpp +++ b/document/src/tests/repo/documenttyperepo_test.cpp @@ -17,7 +17,6 @@ #include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/util/exceptions.h> #include <set> -#include <vespa/config/helper/configgetter.h> using config::AsciiConfigWriter; @@ -26,7 +25,6 @@ using std::vector; using vespalib::Identifiable; using vespalib::IllegalArgumentException; using vespalib::string; -using vespalib::compression::CompressionConfig; using namespace document::config_builder; using namespace document; @@ -42,9 +40,6 @@ const int32_t body_id = 31; const string type_name_2 = "test_2"; const string header_name_2 = type_name_2 + ".header"; const string body_name_2 = type_name_2 + ".body"; -const int32_t comp_level = 10; -const int32_t comp_minres = 80; -const size_t comp_minsize = 120; const string field_name = "field_name"; const string derived_name = "derived"; @@ -80,25 +75,6 @@ TEST("requireThatDocumentTypeCanBeLookedUpWhenIdIsNotAHash") { ASSERT_TRUE(type); } -TEST("requireThatStructsCanConfigureCompression") { - DocumenttypesConfigBuilderHelper builder; - typedef DocumenttypesConfig::Documenttype::Datatype::Sstruct Sstruct; - builder.document(doc_type_id, type_name, - Struct(header_name), - Struct(body_name).setCompression( - Sstruct::Compression::Type::LZ4, - comp_level, comp_minres, comp_minsize)); - DocumentTypeRepo repo(builder.config()); - - const CompressionConfig &comp_config = - repo.getDocumentType(type_name)->getFieldsType() - .getCompressionConfig(); - EXPECT_EQUAL(CompressionConfig::LZ4, comp_config.type); - EXPECT_EQUAL(comp_level, comp_config.compressionLevel); - EXPECT_EQUAL(comp_minres, comp_config.threshold); - EXPECT_EQUAL(comp_minsize, comp_config.minSize); -} - TEST("requireThatStructsCanHaveFields") { DocumenttypesConfigBuilderHelper builder; builder.document(doc_type_id, type_name, diff --git a/document/src/tests/serialization/vespadocumentserializer_test.cpp b/document/src/tests/serialization/vespadocumentserializer_test.cpp index fc5dd2c110a..443c7d1885a 100644 --- a/document/src/tests/serialization/vespadocumentserializer_test.cpp +++ b/document/src/tests/serialization/vespadocumentserializer_test.cpp @@ -44,6 +44,7 @@ #include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/document/base/exceptions.h> +#include <vespa/vespalib/util/compressionconfig.h> using document::DocumenttypesConfig; using vespalib::File; @@ -488,21 +489,11 @@ TEST("requireThatUncompressedStructFieldValueCanBeSerialized") { checkStructSerialization(value, CompressionConfig::NONE); } -TEST("requireThatCompressedStructFieldValueCanBeSerialized") { - StructDataType structType(getStructDataType()); - StructFieldValue value = getStructFieldValue(structType); - const_cast<StructDataType *>(static_cast<const StructDataType *>(value.getDataType())) - ->setCompressionConfig(CompressionConfig(CompressionConfig::LZ4, 0, 95)); - checkStructSerialization(value, CompressionConfig::LZ4); -} - TEST("requireThatReserializationIsUnompressedIfUnmodified") { StructDataType structType(getStructDataType()); StructFieldValue value = getStructFieldValue(structType); - const_cast<StructDataType *>(static_cast<const StructDataType *>(value.getDataType())) - ->setCompressionConfig(CompressionConfig(CompressionConfig::LZ4, 0, 95)); - TEST_DO(checkStructSerialization(value, CompressionConfig::LZ4)); + TEST_DO(checkStructSerialization(value, CompressionConfig::NONE)); nbostream os; VespaDocumentSerializer serializer(os); @@ -512,7 +503,7 @@ TEST("requireThatReserializationIsUnompressedIfUnmodified") { StructFieldValue value2(struct_type); VespaDocumentDeserializer deserializer(repo, os, serialization_version); deserializer.read(value2); - TEST_DO(checkStructSerialization(value, CompressionConfig::LZ4)); + TEST_DO(checkStructSerialization(value, CompressionConfig::NONE)); // Lazy serialization of structs.... TEST_DO(checkStructSerialization(value2, CompressionConfig::NONE)); EXPECT_EQUAL(value, value2); diff --git a/document/src/vespa/document/datatype/structdatatype.cpp b/document/src/vespa/document/datatype/structdatatype.cpp index bb927e6e872..e4b39099d08 100644 --- a/document/src/vespa/document/datatype/structdatatype.cpp +++ b/document/src/vespa/document/datatype/structdatatype.cpp @@ -22,22 +22,19 @@ IMPLEMENT_IDENTIFIABLE(StructDataType, StructuredDataType); StructDataType::StructDataType() : StructuredDataType(), _nameFieldMap(), - _idFieldMap(), - _compressionConfig() + _idFieldMap() { } StructDataType::StructDataType(vespalib::stringref name) : StructuredDataType(name), _nameFieldMap(), - _idFieldMap(), - _compressionConfig() + _idFieldMap() { } StructDataType::StructDataType(vespalib::stringref name, int32_t dataTypeId) : StructuredDataType(name, dataTypeId), _nameFieldMap(), - _idFieldMap(), - _compressionConfig() + _idFieldMap() { } StructDataType::~StructDataType() = default; @@ -54,11 +51,6 @@ StructDataType::print(std::ostream& out, bool verbose, out << "StructDataType(" << getName(); if (verbose) { out << ", id " << getId(); - if (_compressionConfig.type != CompressionConfig::NONE) { - out << ", Compression(" << _compressionConfig.type << "," - << int(_compressionConfig.compressionLevel) << "," - << int(_compressionConfig.threshold) << ")"; - } } out << ")"; if (verbose) { diff --git a/document/src/vespa/document/datatype/structdatatype.h b/document/src/vespa/document/datatype/structdatatype.h index 624ce3011ff..ace9edfb0ab 100644 --- a/document/src/vespa/document/datatype/structdatatype.h +++ b/document/src/vespa/document/datatype/structdatatype.h @@ -11,7 +11,6 @@ #include <vespa/document/datatype/structureddatatype.h> #include <vespa/vespalib/stllike/hash_map.h> -#include <vespa/vespalib/util/compressionconfig.h> #include <memory> namespace document { @@ -20,7 +19,6 @@ class StructDataType final : public StructuredDataType { public: using UP = std::unique_ptr<StructDataType>; using SP = std::shared_ptr<StructDataType>; - using CompressionConfig = vespalib::compression::CompressionConfig; StructDataType(); StructDataType(vespalib::stringref name); @@ -65,9 +63,6 @@ public: Field::Set getFieldSet() const override; StructDataType* clone() const override; - void setCompressionConfig(const CompressionConfig& cfg) { _compressionConfig = cfg; }; - const CompressionConfig& getCompressionConfig() const { return _compressionConfig; } - DECLARE_IDENTIFIABLE(StructDataType); private: @@ -75,7 +70,6 @@ private: using IntFieldMap = vespalib::hash_map<int32_t, Field::SP>; StringFieldMap _nameFieldMap; IntFieldMap _idFieldMap; - CompressionConfig _compressionConfig; /** @return "" if not conflicting. Error message otherwise. */ vespalib::string containsConflictingField(const Field& field) const; diff --git a/document/src/vespa/document/fieldvalue/serializablearray.cpp b/document/src/vespa/document/fieldvalue/serializablearray.cpp index 7fdb7005fa8..605e4a698df 100644 --- a/document/src/vespa/document/fieldvalue/serializablearray.cpp +++ b/document/src/vespa/document/fieldvalue/serializablearray.cpp @@ -2,11 +2,9 @@ #include "serializablearray.h" #include <vespa/document/util/serializableexceptions.h> #include <vespa/document/util/bytebuffer.h> -#include <vespa/vespalib/util/compressor.h> #include <vespa/vespalib/stllike/hash_map.hpp> -#include <vespa/vespalib/data/databuffer.h> #include <algorithm> - +#include <cassert> #include <vespa/log/log.h> LOG_SETUP(".document.serializable-array"); @@ -27,15 +25,10 @@ public: } void -SerializableArray::set(EntryMap entries, ByteBuffer buffer, - CompressionConfig::Type comp_type, uint32_t uncompressed_length) +SerializableArray::set(EntryMap entries, ByteBuffer buffer) { _entries = std::move(entries); - if (CompressionConfig::isCompressed(comp_type)) { - _uncompSerData = deCompress(comp_type, uncompressed_length, std::move(buffer)); - } else { - _uncompSerData = std::move(buffer); - } + _uncompSerData = std::move(buffer); } SerializableArray::SerializableArray() = default; @@ -148,41 +141,6 @@ SerializableArray::clear(int id) } } -ByteBuffer -SerializableArray::deCompress(CompressionConfig::Type compression, uint32_t uncompressedLength, ByteBuffer compressed) -{ - using vespalib::compression::decompress; - // will only do this once - - assert(compressed.getRemaining() != 0); - assert(CompressionConfig::isCompressed(compression)); - - ByteBuffer newSerialization(vespalib::alloc::Alloc::alloc(uncompressedLength), uncompressedLength); - vespalib::DataBuffer unCompressed(newSerialization.getBuffer(), newSerialization.getLength()); - unCompressed.clear(); - try { - decompress(compression, - uncompressedLength, - vespalib::ConstBufferRef(compressed.getBufferAtPos(), compressed.getRemaining()), - unCompressed, - false); - } catch (const std::runtime_error & e) { - throw DeserializeException( - make_string( "Document was compressed with code unknown code %d", compression), - VESPA_STRLOC); - } - - if (unCompressed.getDataLen() != (size_t)uncompressedLength) { - throw DeserializeException( - make_string("Did not decompress to the expected length: had %u, wanted %d, got %zu", - compressed.getRemaining(), uncompressedLength, unCompressed.getDataLen()), - VESPA_STRLOC); - } - assert(newSerialization.getBuffer() == unCompressed.getData()); - LOG_ASSERT(uncompressedLength == newSerialization.getRemaining()); - return newSerialization; -} - const char * SerializableArray::Entry::getBuffer(const ByteBuffer * readOnlyBuffer) const { return hasBuffer() ? _data._buffer : readOnlyBuffer->getBuffer() + getOffset(); diff --git a/document/src/vespa/document/fieldvalue/serializablearray.h b/document/src/vespa/document/fieldvalue/serializablearray.h index a396fd01a39..3fef1d21c3c 100644 --- a/document/src/vespa/document/fieldvalue/serializablearray.h +++ b/document/src/vespa/document/fieldvalue/serializablearray.h @@ -16,7 +16,6 @@ #pragma once -#include <vespa/vespalib/util/compressionconfig.h> #include <vespa/vespalib/util/buffer.h> #include <vespa/vespalib/util/memory.h> #include <vespa/document/util/bytebuffer.h> @@ -80,7 +79,6 @@ public: using CP = vespalib::CloneablePtr<SerializableArray>; using UP = std::unique_ptr<SerializableArray>; - using CompressionConfig = vespalib::compression::CompressionConfig; SerializableArray(); SerializableArray(const SerializableArray&); @@ -89,8 +87,7 @@ public: SerializableArray& operator=(SerializableArray &&) noexcept; ~SerializableArray(); - void set(EntryMap entries, ByteBuffer buffer, - CompressionConfig::Type comp_type, uint32_t uncompressed_length); + void set(EntryMap entries, ByteBuffer buffer); /** * Stores a value in the array. * @@ -141,8 +138,6 @@ private: ByteBuffer _uncompSerData; std::unique_ptr<serializablearray::BufferMap> _owned; - - static ByteBuffer deCompress(CompressionConfig::Type compression, uint32_t uncompressedLength, ByteBuffer compressed); // throw (DeserializeException); VESPA_DLL_LOCAL EntryMap::const_iterator find(int id) const; VESPA_DLL_LOCAL EntryMap::iterator find(int id); }; diff --git a/document/src/vespa/document/fieldvalue/structfieldvalue.cpp b/document/src/vespa/document/fieldvalue/structfieldvalue.cpp index fc87fbe3a59..555964d8b34 100644 --- a/document/src/vespa/document/fieldvalue/structfieldvalue.cpp +++ b/document/src/vespa/document/fieldvalue/structfieldvalue.cpp @@ -23,7 +23,6 @@ using std::vector; using vespalib::nbostream; using vespalib::nbostream_longlivedbuf; using vespalib::make_string; -using vespalib::compression::CompressionConfig; using namespace vespalib::xml; namespace document { @@ -50,24 +49,14 @@ StructFieldValue::getStructType() const { return static_cast<const StructDataType &>(getType()); } -const CompressionConfig & -StructFieldValue::getCompressionConfig() const { - return getStructType().getCompressionConfig(); -} - void -StructFieldValue::lazyDeserialize(const FixedTypeRepo &repo, - uint16_t version, - SerializableArray::EntryMap && fm, - ByteBuffer buffer, - CompressionConfig::Type comp_type, - int32_t uncompressed_length) +StructFieldValue::lazyDeserialize(const FixedTypeRepo &repo, uint16_t version, SerializableArray::EntryMap && fm, ByteBuffer buffer) { _repo = &repo.getDocumentTypeRepo(); _doc_type = &repo.getDocumentType(); _version = version; - _fields.set(std::move(fm), std::move(buffer), comp_type, uncompressed_length); + _fields.set(std::move(fm), std::move(buffer)); _hasChanged = false; } diff --git a/document/src/vespa/document/fieldvalue/structfieldvalue.h b/document/src/vespa/document/fieldvalue/structfieldvalue.h index cd8bd0fea0f..ab35dc04421 100644 --- a/document/src/vespa/document/fieldvalue/structfieldvalue.h +++ b/document/src/vespa/document/fieldvalue/structfieldvalue.h @@ -34,7 +34,6 @@ private: public: using UP = std::unique_ptr<StructFieldValue>; - using CompressionConfig = vespalib::compression::CompressionConfig; StructFieldValue(const DataType &type); StructFieldValue(const StructFieldValue & rhs); @@ -48,12 +47,7 @@ public: void setDocumentType(const DocumentType & docType) { _doc_type = & docType; } const SerializableArray & getFields() const { return _fields; } - void lazyDeserialize(const FixedTypeRepo &repo, - uint16_t version, - SerializableArray::EntryMap && fields, - ByteBuffer buffer, - CompressionConfig::Type comp_type, - int32_t uncompressed_length); + void lazyDeserialize(const FixedTypeRepo &repo, uint16_t version, SerializableArray::EntryMap && fields, ByteBuffer buffer); // returns false if the field could not be serialized. bool serializeField(int raw_field_id, uint16_t version, FieldValueWriter &writer) const; @@ -70,8 +64,6 @@ public: const Field& getField(vespalib::stringref name) const override; void clear() override; - const CompressionConfig &getCompressionConfig() const; - // FieldValue implementation. FieldValue& assign(const FieldValue&) override; int compare(const FieldValue& other) const override; diff --git a/document/src/vespa/document/repo/documenttyperepo.cpp b/document/src/vespa/document/repo/documenttyperepo.cpp index b993f50d7b6..15730d14a86 100644 --- a/document/src/vespa/document/repo/documenttyperepo.cpp +++ b/document/src/vespa/document/repo/documenttyperepo.cpp @@ -31,7 +31,6 @@ using vespalib::hash_map; using vespalib::make_string; using vespalib::string; using vespalib::stringref; -using vespalib::compression::CompressionConfig; namespace document { @@ -304,14 +303,6 @@ void addStruct(int32_t id, const Datatype::Sstruct &s, Repo &repo) { } } - CompressionConfig::Type type = CompressionConfig::NONE; - if (s.compression.type == Datatype::Sstruct::Compression::Type::LZ4) { - type = CompressionConfig::LZ4; - } - - struct_type->setCompressionConfig( - CompressionConfig(type, s.compression.level, s.compression.threshold, s.compression.minsize)); - for (size_t i = 0; i < s.field.size(); ++i) { addField(s.field[i], repo, *struct_type); } diff --git a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp index 6dd6a4c21bd..d15ebf311bb 100644 --- a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp @@ -22,6 +22,8 @@ #include <vespa/vespalib/data/slime/slime.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/util/backtrace.h> +#include <vespa/vespalib/util/compressor.h> +#include <vespa/vespalib/data/databuffer.h> #include <vespa/eval/eval/fast_value.h> #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/eval/value.h> @@ -42,6 +44,8 @@ using vespalib::nbostream; using vespalib::Memory; using vespalib::stringref; using vespalib::compression::CompressionConfig; +using vespalib::ConstBufferRef; +using vespalib::make_string_short::fmt; using vespalib::eval::FastValueBuilderFactory; namespace document { @@ -274,7 +278,8 @@ typedef SerializableArray::EntryMap FieldInfo; void readFieldInfo(nbostream& input, SerializableArray::EntryMap & field_info) __attribute__((noinline)); -void readFieldInfo(nbostream& input, SerializableArray::EntryMap & field_info) { +void +readFieldInfo(nbostream& input, SerializableArray::EntryMap & field_info) { size_t field_count = getInt1_4Bytes(input); field_info.reserve(field_count); uint32_t offset = 0; @@ -285,6 +290,36 @@ void readFieldInfo(nbostream& input, SerializableArray::EntryMap & field_info) { offset += size; } } + +ByteBuffer +deCompress(CompressionConfig::Type compression, uint32_t uncompressedLength, vespalib::ConstBufferRef compressed) __attribute__((noinline)); + +ByteBuffer +deCompress(CompressionConfig::Type compression, uint32_t uncompressedLength, vespalib::ConstBufferRef compressed) +{ + using vespalib::compression::decompress; + + assert(compressed.size() != 0); + + ByteBuffer newSerialization(vespalib::alloc::Alloc::alloc(uncompressedLength), uncompressedLength); + vespalib::DataBuffer unCompressed(newSerialization.getBuffer(), newSerialization.getLength()); + unCompressed.clear(); + try { + decompress(compression, uncompressedLength, compressed,unCompressed,false); + } catch (const std::runtime_error & e) { + throw DeserializeException(fmt( "Document was compressed with code unknown code %d", compression), VESPA_STRLOC); + } + + if (unCompressed.getDataLen() != (size_t)uncompressedLength) { + throw DeserializeException(fmt("Did not decompress to the expected length: had %lu, wanted %d, got %zu", + compressed.size(), uncompressedLength, unCompressed.getDataLen()), + VESPA_STRLOC); + } + assert(newSerialization.getBuffer() == unCompressed.getData()); + LOG_ASSERT(uncompressedLength == newSerialization.getRemaining()); + return newSerialization; +} + } // namespace void VespaDocumentDeserializer::readStructNoReset(StructFieldValue &value) { @@ -308,19 +343,18 @@ void VespaDocumentDeserializer::readStructNoReset(StructFieldValue &value) { } if (data_size > 0) { - ByteBuffer buffer(_stream.isLongLivedBuffer() - ? ByteBuffer(_stream.peek(), data_size) - : ByteBuffer::copyBuffer(_stream.peek(), data_size)); + ByteBuffer buffer = CompressionConfig::isCompressed(compression_type) + ? deCompress(compression_type, uncompressed_size, ConstBufferRef(_stream.peek(), data_size)) + : _stream.isLongLivedBuffer() + ? ByteBuffer(_stream.peek(), data_size) + : ByteBuffer::copyBuffer(_stream.peek(), data_size); if (value.getFields().empty()) { - LOG(spam, "Lazy deserializing into %s with _version %u", - value.getDataType()->getName().c_str(), _version); - value.lazyDeserialize(_repo, _version, std::move(field_info), - std::move(buffer), compression_type, uncompressed_size); + LOG(spam, "Lazy deserializing into %s with _version %u", value.getDataType()->getName().c_str(), _version); + value.lazyDeserialize(_repo, _version, std::move(field_info), std::move(buffer)); } else { LOG(debug, "Legacy dual header/body format. -> Merging."); StructFieldValue tmp(*value.getDataType()); - tmp.lazyDeserialize(_repo, _version, std::move(field_info), - std::move(buffer), compression_type, uncompressed_size); + tmp.lazyDeserialize(_repo, _version, std::move(field_info), std::move(buffer)); for (const auto & entry : tmp) { try { FieldValue::UP decoded = tmp.getValue(entry); @@ -367,7 +401,7 @@ VespaDocumentDeserializer::readTensor() { size_t length = _stream.getInt1_4Bytes(); if (length > _stream.size()) { - throw DeserializeException(vespalib::make_string("Stream failed size(%zu), needed(%zu) to deserialize tensor field value", _stream.size(), length), + throw DeserializeException(fmt("Stream failed size(%zu), needed(%zu) to deserialize tensor field value", _stream.size(), length), VESPA_STRLOC); } std::unique_ptr<vespalib::eval::Value> tensor; diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.cpp b/document/src/vespa/document/serialization/vespadocumentserializer.cpp index 45a4375a19d..dc37e036775 100644 --- a/document/src/vespa/document/serialization/vespadocumentserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentserializer.cpp @@ -235,36 +235,6 @@ serializeFields(const StructFieldValue &value, nbostream &stream, } } -bool compressionSufficient(const CompressionConfig &config, uint64_t old_size, size_t new_size) -{ - return (new_size + 8) < (old_size * config.threshold / 100); -} - -bool bigEnough(size_t size, const CompressionConfig &config) -{ - return (size >= config.minSize); -} - -vespalib::ConstBufferRef -compressStream(const CompressionConfig &config, nbostream &stream, vespalib::DataBuffer & compressed_data) -{ - using vespalib::compression::compress; - vespalib::ConstBufferRef buf(stream.data(), stream.size()); - if (config.useCompression() && bigEnough(stream.size(), config)) { - CompressionConfig::Type compressedType = compress(config, - vespalib::ConstBufferRef(stream.data(), stream.size()), - compressed_data, false); - if (compressedType != config.type || - ! compressionSufficient(config, stream.size(), compressed_data.getDataLen())) - { - compressed_data.clear(); - } else { - buf = vespalib::ConstBufferRef(compressed_data.getData(), compressed_data.getDataLen()); - } - } - return buf; -} - void putFieldInfo(nbostream &output, const vector<pair<uint32_t, uint32_t> > &field_info) { putInt1_4Bytes(output, field_info.size()); @@ -293,14 +263,11 @@ VespaDocumentSerializer::structNeedsReserialization(const StructFieldValue &valu return true; } - if (value.getCompressionConfig().type == CompressionConfig::NONE) { - return false; - } - - return true; + return false; } -void VespaDocumentSerializer::writeUnchanged(const SerializableArray &value) { +void +VespaDocumentSerializer::writeUnchanged(const SerializableArray &value) { vector<pair<uint32_t, uint32_t> > field_info; const std::vector<SerializableArray::Entry>& entries = value.getEntries(); @@ -321,28 +288,18 @@ void VespaDocumentSerializer::writeUnchanged(const SerializableArray &value) { } } -void VespaDocumentSerializer::write(const StructFieldValue &value, const FieldSet& fieldSet) +void +VespaDocumentSerializer::write(const StructFieldValue &value, const FieldSet& fieldSet) { nbostream value_stream; vector<pair<uint32_t, uint32_t> > field_info; serializeFields(value, value_stream, field_info, fieldSet); - const CompressionConfig &comp_config = value.getCompressionConfig(); - vespalib::DataBuffer compressed_data; - vespalib::ConstBufferRef toSerialize = compressStream(comp_config, value_stream, compressed_data); - - uint8_t comp_type = (compressed_data.getDataLen() == 0) - ? (comp_config.type == CompressionConfig::NONE - ? CompressionConfig::NONE - : CompressionConfig::UNCOMPRESSABLE) - : comp_config.type; - _stream << static_cast<uint32_t>(toSerialize.size()); + uint8_t comp_type = CompressionConfig::NONE; + _stream << static_cast<uint32_t>(value_stream.size()); _stream << comp_type; - if (compressed_data.getDataLen() != 0) { - putInt2_4_8Bytes(_stream, value_stream.size()); - } putFieldInfo(_stream, field_info); - _stream.write(toSerialize.c_str(), toSerialize.size()); + _stream.write(value_stream.data(), value_stream.size()); } void |