diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-01-22 12:22:16 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2020-01-23 05:56:45 +0000 |
commit | 694e53f832e65010f246ba9ef4ab02796700c873 (patch) | |
tree | f3009fb4130bd175c2fc67605e0b4e0509143ad6 | |
parent | c430519892bc41b296c7b8bf8885014b734940fd (diff) |
Use a single chunk
36 files changed, 155 insertions, 257 deletions
diff --git a/document/src/test/document/serializecpp-lz4-level9.dat b/document/src/test/document/serializecpp-lz4-level9.dat Binary files differindex db759871107..1dffaa2d7a7 100644 --- a/document/src/test/document/serializecpp-lz4-level9.dat +++ b/document/src/test/document/serializecpp-lz4-level9.dat diff --git a/document/src/test/document/serializecpp.dat b/document/src/test/document/serializecpp.dat Binary files differindex 6435dc088ac..d004d11e47b 100644 --- a/document/src/test/document/serializecpp.dat +++ b/document/src/test/document/serializecpp.dat diff --git a/document/src/test/document/serializecppsplit_header.dat b/document/src/test/document/serializecppsplit_header.dat Binary files differindex 08176e4e737..d004d11e47b 100755 --- a/document/src/test/document/serializecppsplit_header.dat +++ b/document/src/test/document/serializecppsplit_header.dat diff --git a/document/src/test/resources/predicates/false__cpp b/document/src/test/resources/predicates/false__cpp Binary files differindex 00a71d5fe73..c9f426b3aaf 100644 --- a/document/src/test/resources/predicates/false__cpp +++ b/document/src/test/resources/predicates/false__cpp diff --git a/document/src/test/resources/predicates/foo_in_6_9__cpp b/document/src/test/resources/predicates/foo_in_6_9__cpp Binary files differindex f9f7249e65b..e063a3aad6a 100644 --- a/document/src/test/resources/predicates/foo_in_6_9__cpp +++ b/document/src/test/resources/predicates/foo_in_6_9__cpp diff --git a/document/src/test/resources/predicates/foo_in_6_x__cpp b/document/src/test/resources/predicates/foo_in_6_x__cpp Binary files differindex 0ccf8e9794b..e2b261668ed 100644 --- a/document/src/test/resources/predicates/foo_in_6_x__cpp +++ b/document/src/test/resources/predicates/foo_in_6_x__cpp diff --git a/document/src/test/resources/predicates/foo_in_bar__cpp b/document/src/test/resources/predicates/foo_in_bar__cpp Binary files differindex a2761427c6a..8a19ba3dc58 100644 --- a/document/src/test/resources/predicates/foo_in_bar__cpp +++ b/document/src/test/resources/predicates/foo_in_bar__cpp diff --git a/document/src/test/resources/predicates/foo_in_bar_and_baz_in_cox__cpp b/document/src/test/resources/predicates/foo_in_bar_and_baz_in_cox__cpp Binary files differindex 8a2705e6f62..8583f37da57 100644 --- a/document/src/test/resources/predicates/foo_in_bar_and_baz_in_cox__cpp +++ b/document/src/test/resources/predicates/foo_in_bar_and_baz_in_cox__cpp diff --git a/document/src/test/resources/predicates/foo_in_bar_baz__cpp b/document/src/test/resources/predicates/foo_in_bar_baz__cpp Binary files differindex b4cf08bbf57..ed5bc82e2eb 100644 --- a/document/src/test/resources/predicates/foo_in_bar_baz__cpp +++ b/document/src/test/resources/predicates/foo_in_bar_baz__cpp diff --git a/document/src/test/resources/predicates/foo_in_bar_or_baz_in_cox__cpp b/document/src/test/resources/predicates/foo_in_bar_or_baz_in_cox__cpp Binary files differindex 4d5474c24e4..3b1dbd541fa 100644 --- a/document/src/test/resources/predicates/foo_in_bar_or_baz_in_cox__cpp +++ b/document/src/test/resources/predicates/foo_in_bar_or_baz_in_cox__cpp diff --git a/document/src/test/resources/predicates/foo_in_x_9__cpp b/document/src/test/resources/predicates/foo_in_x_9__cpp Binary files differindex 29218b9e944..9490703c980 100644 --- a/document/src/test/resources/predicates/foo_in_x_9__cpp +++ b/document/src/test/resources/predicates/foo_in_x_9__cpp diff --git a/document/src/test/resources/predicates/foo_in_x__cpp b/document/src/test/resources/predicates/foo_in_x__cpp Binary files differindex bd2916835c6..636fdffb856 100644 --- a/document/src/test/resources/predicates/foo_in_x__cpp +++ b/document/src/test/resources/predicates/foo_in_x__cpp diff --git a/document/src/test/resources/predicates/foo_in_x_x__cpp b/document/src/test/resources/predicates/foo_in_x_x__cpp Binary files differindex e0d8282c46d..57e88bc4e80 100644 --- a/document/src/test/resources/predicates/foo_in_x_x__cpp +++ b/document/src/test/resources/predicates/foo_in_x_x__cpp diff --git a/document/src/test/resources/predicates/not_foo_in_bar__cpp b/document/src/test/resources/predicates/not_foo_in_bar__cpp Binary files differindex b654de6d53e..a7cbacbaf35 100644 --- a/document/src/test/resources/predicates/not_foo_in_bar__cpp +++ b/document/src/test/resources/predicates/not_foo_in_bar__cpp diff --git a/document/src/test/resources/predicates/true__cpp b/document/src/test/resources/predicates/true__cpp Binary files differindex 2b5da7409d5..87356a5ed44 100644 --- a/document/src/test/resources/predicates/true__cpp +++ b/document/src/test/resources/predicates/true__cpp diff --git a/document/src/test/resources/tensor/empty_tensor__cpp b/document/src/test/resources/tensor/empty_tensor__cpp Binary files differindex 2c15c152558..cf878f0e689 100644 --- a/document/src/test/resources/tensor/empty_tensor__cpp +++ b/document/src/test/resources/tensor/empty_tensor__cpp diff --git a/document/src/test/resources/tensor/multi_cell_tensor__cpp b/document/src/test/resources/tensor/multi_cell_tensor__cpp Binary files differindex d4c7c5fbbe5..9adda236a4a 100644 --- a/document/src/test/resources/tensor/multi_cell_tensor__cpp +++ b/document/src/test/resources/tensor/multi_cell_tensor__cpp diff --git a/document/src/test/resources/tensor/non_existing_tensor__cpp b/document/src/test/resources/tensor/non_existing_tensor__cpp Binary files differindex 08cbcac6dd3..7a1d95ff132 100644 --- a/document/src/test/resources/tensor/non_existing_tensor__cpp +++ b/document/src/test/resources/tensor/non_existing_tensor__cpp diff --git a/document/src/tests/data/document-cpp-currentversion-lz4-9.dat b/document/src/tests/data/document-cpp-currentversion-lz4-9.dat Binary files differindex 7b0650996db..3383d97f253 100644 --- a/document/src/tests/data/document-cpp-currentversion-lz4-9.dat +++ b/document/src/tests/data/document-cpp-currentversion-lz4-9.dat diff --git a/document/src/tests/data/document-cpp-currentversion-uncompressed.dat b/document/src/tests/data/document-cpp-currentversion-uncompressed.dat Binary files differindex 2b39c42f8a4..07d63edf576 100644 --- a/document/src/tests/data/document-cpp-currentversion-uncompressed.dat +++ b/document/src/tests/data/document-cpp-currentversion-uncompressed.dat diff --git a/document/src/tests/documenttestcase.cpp b/document/src/tests/documenttestcase.cpp index 7735ccdbb1d..99aebe26720 100644 --- a/document/src/tests/documenttestcase.cpp +++ b/document/src/tests/documenttestcase.cpp @@ -36,8 +36,8 @@ TEST(DocumentTest, testSizeOf) EXPECT_EQ(32u, sizeof(vespalib::GrowableByteBuffer)); EXPECT_EQ(88ul, sizeof(IdString)); EXPECT_EQ(104ul, sizeof(DocumentId)); - EXPECT_EQ(200ul, sizeof(Document)); - EXPECT_EQ(64ul, sizeof(StructFieldValue)); + EXPECT_EQ(248ul, sizeof(Document)); + EXPECT_EQ(112ul, sizeof(StructFieldValue)); EXPECT_EQ(24ul, sizeof(StructuredFieldValue)); EXPECT_EQ(64ul, sizeof(SerializableArray)); } @@ -585,8 +585,7 @@ TEST(DocumentTest, testReadSerializedFile) EXPECT_TRUE(buf2.empty()); buf2.rp(0); - EXPECT_EQ(len, buf2.size()); - EXPECT_TRUE(memcmp(buf2.peek(), buf.get(), buf2.size()) == 0); + EXPECT_EQ(len - 13, buf2.size()); // Size is smaller as we are merging to one chunk. doc2.setValue("stringfield", StringFieldValue("hei")); diff --git a/document/src/tests/serialization/vespadocumentserializer_test.cpp b/document/src/tests/serialization/vespadocumentserializer_test.cpp index f680b3e6cff..2ad06dc93de 100644 --- a/document/src/tests/serialization/vespadocumentserializer_test.cpp +++ b/document/src/tests/serialization/vespadocumentserializer_test.cpp @@ -535,12 +535,12 @@ TEST("requireThatDocumentCanBeSerialized") { uint32_t size; stream >> read_version >> size; EXPECT_EQUAL(serialization_version, read_version); - EXPECT_EQUAL(70u, size); + EXPECT_EQUAL(64u, size); EXPECT_EQUAL(doc_id.getScheme().toString(), stream.peek()); stream.adjustReadPos(doc_id.getScheme().toString().size() + 1); uint8_t content_code; stream >> content_code; - EXPECT_EQUAL(0x07, content_code); + EXPECT_EQUAL(0x03, content_code); EXPECT_EQUAL(type.getName(), stream.peek()); stream.adjustReadPos(type.getName().size() + 1); stream >> read_version; diff --git a/document/src/vespa/document/datatype/documenttype.cpp b/document/src/vespa/document/datatype/documenttype.cpp index 8198abfc7b1..ffc250eacba 100644 --- a/document/src/vespa/document/datatype/documenttype.cpp +++ b/document/src/vespa/document/datatype/documenttype.cpp @@ -18,9 +18,7 @@ namespace document { IMPLEMENT_IDENTIFIABLE(DocumentType, StructuredDataType); -DocumentType::DocumentType() -{ -} +DocumentType::DocumentType() = default; DocumentType::DocumentType(stringref name, int32_t id) : StructuredDataType(name, id), @@ -68,9 +66,7 @@ DocumentType::DocumentType(stringref name, const StructDataType& fields) } } -DocumentType::~DocumentType() -{ -} +DocumentType::~DocumentType() = default; DocumentType & DocumentType::addFieldSet(const vespalib::string & name, const FieldSet::Fields & fields) @@ -150,10 +146,8 @@ DocumentType::inherit(const DocumentType &docType) { bool DocumentType::isA(const DataType& other) const { - for (std::vector<const DocumentType *>::const_iterator - it = _inheritedTypes.begin(); it != _inheritedTypes.end(); ++it) - { - if ((*it)->isA(other)) return true; + for (const DocumentType * docType : _inheritedTypes) { + if (docType->isA(other)) return true; } return (*this == other); } @@ -161,12 +155,11 @@ DocumentType::isA(const DataType& other) const FieldValue::UP DocumentType::createFieldValue() const { - return FieldValue::UP(new Document(*this, DocumentId("doc::"))); + return std::make_unique<Document>(*this, DocumentId("id::" + getName() + "::")); } void -DocumentType::print(std::ostream& out, bool verbose, - const std::string& indent) const +DocumentType::print(std::ostream& out, bool verbose, const std::string& indent) const { out << "DocumentType(" << getName(); if (verbose) { diff --git a/document/src/vespa/document/fieldset/fieldsets.cpp b/document/src/vespa/document/fieldset/fieldsets.cpp index cb162017777..1f9ea32273b 100644 --- a/document/src/vespa/document/fieldset/fieldsets.cpp +++ b/document/src/vespa/document/fieldset/fieldsets.cpp @@ -149,7 +149,7 @@ Document::UP FieldSet::createDocumentSubsetCopy(const Document& src, const FieldSet& fields) { - Document::UP ret(new Document(src.getType(), src.getId())); + auto ret = std::make_unique<Document>(src.getType(), src.getId()); copyFields(*ret, src, fields); return ret; } diff --git a/document/src/vespa/document/fieldvalue/document.cpp b/document/src/vespa/document/fieldvalue/document.cpp index 969684f104b..744409ac206 100644 --- a/document/src/vespa/document/fieldvalue/document.cpp +++ b/document/src/vespa/document/fieldvalue/document.cpp @@ -210,7 +210,7 @@ Document::calculateChecksum() const void Document::serializeHeader(nbostream& stream) const { VespaDocumentSerializer serializer(stream); - serializer.write(*this, WITHOUT_BODY); + serializer.write(*this); } void Document::deserialize(const DocumentTypeRepo& repo, vespalib::nbostream & os) { diff --git a/document/src/vespa/document/fieldvalue/serializablearray.cpp b/document/src/vespa/document/fieldvalue/serializablearray.cpp index 722904659ef..cb638b2a0b7 100644 --- a/document/src/vespa/document/fieldvalue/serializablearray.cpp +++ b/document/src/vespa/document/fieldvalue/serializablearray.cpp @@ -81,6 +81,13 @@ SerializableArray::SerializableArray(const SerializableArray& other) } } +SerializableArray & +SerializableArray::operator=(const SerializableArray &rhs) +{ + *this = SerializableArray(rhs); + return *this; +} + void SerializableArray::clear() { _entries.clear(); @@ -90,6 +97,8 @@ void SerializableArray::clear() _uncompressedLength = 0; } +SerializableArray::SerializableArray(SerializableArray &&) noexcept = default; +SerializableArray& SerializableArray::operator=(SerializableArray &&) noexcept = default; SerializableArray::~SerializableArray() = default; void diff --git a/document/src/vespa/document/fieldvalue/serializablearray.h b/document/src/vespa/document/fieldvalue/serializablearray.h index 1e766599dff..12f3f28cdf7 100644 --- a/document/src/vespa/document/fieldvalue/serializablearray.h +++ b/document/src/vespa/document/fieldvalue/serializablearray.h @@ -137,8 +137,10 @@ public: } SerializableArray* clone() const override { return new SerializableArray(*this); } - SerializableArray(const SerializableArray&); // Public only for test - SerializableArray& operator=(const SerializableArray&) = delete; + SerializableArray(const SerializableArray&); + SerializableArray& operator=(const SerializableArray&); + SerializableArray(SerializableArray &&) noexcept; + SerializableArray& operator=(SerializableArray &&) noexcept; const EntryMap & getEntries() const { return _entries; } private: bool shouldDecompress() const { diff --git a/document/src/vespa/document/fieldvalue/structfieldvalue.cpp b/document/src/vespa/document/fieldvalue/structfieldvalue.cpp index b861537f6f7..55c389c503e 100644 --- a/document/src/vespa/document/fieldvalue/structfieldvalue.cpp +++ b/document/src/vespa/document/fieldvalue/structfieldvalue.cpp @@ -32,6 +32,7 @@ IMPLEMENT_IDENTIFIABLE_ABSTRACT(StructFieldValue, StructuredFieldValue); StructFieldValue::StructFieldValue(const DataType &type) : StructuredFieldValue(type), + _fields(), _repo(nullptr), _doc_type(nullptr), _version(Document::getNewestSerializationVersion()), @@ -44,20 +45,6 @@ StructFieldValue & StructFieldValue::operator = (const StructFieldValue & rhs) = StructFieldValue::~StructFieldValue() = default; -StructFieldValue::Chunks::~Chunks() = default; - -void -StructFieldValue::Chunks::push_back(SerializableArray::UP item) { - assert(size() < 2); - _chunks[size()] = std::move(item); -} - -void -StructFieldValue::Chunks::clear() { - _chunks[0].reset(); - _chunks[1].reset(); -} - const StructDataType & StructFieldValue::getStructType() const { return static_cast<const StructDataType &>(getType()); @@ -80,18 +67,15 @@ StructFieldValue::lazyDeserialize(const FixedTypeRepo &repo, _doc_type = &repo.getDocumentType(); _version = version; - _chunks.push_back(std::make_unique<SerializableArray>(std::move(fm), std::move(buffer), comp_type, uncompressed_length)); + _fields = SerializableArray(std::move(fm), std::move(buffer), comp_type, uncompressed_length); _hasChanged = false; } bool StructFieldValue::serializeField(int field_id, uint16_t version, FieldValueWriter &writer) const { if (version == _version) { - for (int i = _chunks.size() - 1; i >= 0; --i) { - vespalib::ConstBufferRef buf = _chunks[i].get(field_id); - if ( buf.size() != 0) { - writer.writeSerializedData(buf.data(), buf.size()); - break; - } + vespalib::ConstBufferRef buf = _fields.get(field_id); + if ( buf.size() != 0) { + writer.writeSerializedData(buf.data(), buf.size()); } return true; @@ -108,33 +92,21 @@ bool StructFieldValue::serializeField(int field_id, uint16_t version, FieldValue void StructFieldValue::getRawFieldIds(vector<int> &raw_ids) const { raw_ids.clear(); - - size_t count(0); - for (uint32_t i = 0; i < _chunks.size(); ++i) { - count += _chunks[i].getEntries().size(); - } - raw_ids.reserve(count); - for (uint32_t i = 0; i < _chunks.size(); ++i) { - const SerializableArray::EntryMap & entries = _chunks[i].getEntries(); - for (const SerializableArray::Entry & entry : entries) { - raw_ids.emplace_back(entry.id()); - } + raw_ids.reserve(_fields.getEntries().size()); + for (const SerializableArray::Entry & entry : _fields.getEntries()) { + raw_ids.emplace_back(entry.id()); } sort(raw_ids.begin(), raw_ids.end()); raw_ids.erase(unique(raw_ids.begin(), raw_ids.end()), raw_ids.end()); } void -StructFieldValue::getRawFieldIds(vector<int> &raw_ids, - const FieldSet& fieldSet) const { +StructFieldValue::getRawFieldIds(vector<int> &raw_ids,const FieldSet& fieldSet) const { raw_ids.clear(); - for (uint32_t i = 0; i < _chunks.size(); ++i) { - const SerializableArray::EntryMap & entries = _chunks[i].getEntries(); - for (const SerializableArray::Entry & entry : entries) { - if (fieldSet.contains(getStructType().getField(entry.id()))) { - raw_ids.push_back(entry.id()); - } + for (const SerializableArray::Entry & entry : _fields.getEntries()) { + if (fieldSet.contains(getStructType().getField(entry.id()))) { + raw_ids.emplace_back(entry.id()); } } sort(raw_ids.begin(), raw_ids.end()); @@ -175,19 +147,17 @@ StructFieldValue::getFieldValue(const Field& field) const { int fieldId = field.getId(); - for (int i = _chunks.size() - 1; i >= 0; --i) { - vespalib::ConstBufferRef buf = _chunks[i].get(fieldId); - if (buf.size() != 0) { - nbostream stream(buf.c_str(), buf.size()); - FieldValue::UP value(field.getDataType().createFieldValue()); - if ((_repo == nullptr) && (_doc_type != nullptr)) { - DocumentTypeRepo tmpRepo(*_doc_type); - createFV(*value, tmpRepo, stream, *_doc_type, _version); - } else { - createFV(*value, *_repo, stream, *_doc_type, _version); - } - return value; + vespalib::ConstBufferRef buf = _fields.get(fieldId); + if (buf.size() != 0) { + nbostream stream(buf.c_str(), buf.size()); + FieldValue::UP value(field.getDataType().createFieldValue()); + if ((_repo == nullptr) && (_doc_type != nullptr)) { + DocumentTypeRepo tmpRepo(*_doc_type); + createFV(*value, tmpRepo, stream, *_doc_type, _version); + } else { + createFV(*value, *_repo, stream, *_doc_type, _version); } + return value; } return FieldValue::UP(); } @@ -195,11 +165,9 @@ StructFieldValue::getFieldValue(const Field& field) const vespalib::ConstBufferRef StructFieldValue::getRawField(uint32_t id) const { - for (int i = _chunks.size() - 1; i >= 0; --i) { - vespalib::ConstBufferRef buf = _chunks[i].get(id); - if (buf.size() > 0) { - return buf; - } + vespalib::ConstBufferRef buf = _fields.get(id); + if (buf.size() > 0) { + return buf; } return vespalib::ConstBufferRef(); @@ -227,13 +195,7 @@ StructFieldValue::getFieldValue(const Field& field, FieldValue& value) const bool StructFieldValue::hasFieldValue(const Field& field) const { - for (int i = _chunks.size() - 1; i >= 0; --i) { - if (_chunks[i].has(field.getId())) { - return true; - } - } - - return false; + return _fields.has(field.getId()); } namespace { @@ -254,11 +216,8 @@ StructFieldValue::setFieldValue(const Field& field, FieldValue::UP value) int fieldId = field.getId(); std::unique_ptr<ByteBuffer> serialized = serializeDoc(*value); - if (_chunks.empty()) { - _chunks.push_back(std::make_unique<SerializableArray>()); - } - _chunks[0].set(fieldId, std::move(serialized)); + _fields.set(fieldId, std::move(serialized)); _hasChanged = true; } @@ -266,16 +225,14 @@ StructFieldValue::setFieldValue(const Field& field, FieldValue::UP value) void StructFieldValue::removeFieldValue(const Field& field) { - for (uint32_t i = 0; i < _chunks.size(); ++i) { - _chunks[i].clear(field.getId()); - } + _fields.clear(field.getId()); _hasChanged = true; } void StructFieldValue::clear() { - _chunks.clear(); + _fields.clear(); _hasChanged = true; } @@ -374,13 +331,7 @@ StructFieldValue::print(std::ostream& out, bool verbose, bool StructFieldValue::empty() const { - for (uint32_t i = 0; i < _chunks.size(); ++i) { - if (!_chunks[i].empty()) { - return false; - } - } - - return true; + return _fields.empty(); } void @@ -428,15 +379,12 @@ struct StructFieldValue::FieldIterator : public StructuredIterator { StructuredFieldValue::StructuredIterator::UP StructFieldValue::getIterator(const Field* toFind) const { - StructuredIterator::UP ret; - - auto *fi = new FieldIterator(*this); - ret.reset(fi); + auto fi = std::make_unique<FieldIterator>(*this); if (toFind != nullptr) { fi->skipTo(toFind->getId()); } - return ret; + return fi; } void diff --git a/document/src/vespa/document/fieldvalue/structfieldvalue.h b/document/src/vespa/document/fieldvalue/structfieldvalue.h index 30500229813..85912fe199a 100644 --- a/document/src/vespa/document/fieldvalue/structfieldvalue.h +++ b/document/src/vespa/document/fieldvalue/structfieldvalue.h @@ -24,23 +24,8 @@ class StructDataType; class StructFieldValue : public StructuredFieldValue { -public: - class Chunks { - public: - Chunks() { } - ~Chunks(); - SerializableArray & operator [] (size_t i) { return *_chunks[i]; } - const SerializableArray & operator [] (size_t i) const { return *_chunks[i]; } - VESPA_DLL_LOCAL void push_back(SerializableArray::UP item); - size_t size() const { return _chunks[1] ? 2 : _chunks[0] ? 1 : 0; } - bool empty() const { return !_chunks[0]; } - VESPA_DLL_LOCAL void clear(); - private: - SerializableArray::CP _chunks[2]; - }; private: - Chunks _chunks; - + SerializableArray _fields; // As we do lazy deserialization, we need these saved const DocumentTypeRepo *_repo; const DocumentType *_doc_type; @@ -54,11 +39,14 @@ public: StructFieldValue(const DataType &type); StructFieldValue(const StructFieldValue & rhs); StructFieldValue & operator = (const StructFieldValue & rhs); + StructFieldValue(StructFieldValue && rhs) = default; + StructFieldValue & operator = (StructFieldValue && rhs) = default; ~StructFieldValue() override; void setRepo(const DocumentTypeRepo & repo) { _repo = & repo; } const DocumentTypeRepo * getRepo() const { return _repo; } void setDocumentType(const DocumentType & docType) { _doc_type = & docType; } + const SerializableArray & getFields() const { return _fields; } void lazyDeserialize(const FixedTypeRepo &repo, uint16_t version, @@ -71,8 +59,6 @@ public: bool serializeField(int raw_field_id, uint16_t version, FieldValueWriter &writer) const; uint16_t getVersion() const { return _version; } - const Chunks & getChunks() const { return _chunks; } - // raw_ids may contain ids for elements not in the struct's datatype. void getRawFieldIds(std::vector<int> &raw_ids) const; void getRawFieldIds(std::vector<int> &raw_ids, const FieldSet& fieldSet) const; @@ -119,7 +105,6 @@ private: VESPA_DLL_LOCAL vespalib::ConstBufferRef getRawField(uint32_t id) const; VESPA_DLL_LOCAL const StructDataType & getStructType() const; - // Iterator implementation struct FieldIterator; friend struct FieldIterator; diff --git a/document/src/vespa/document/fieldvalue/structuredfieldvalue.h b/document/src/vespa/document/fieldvalue/structuredfieldvalue.h index b81dc8e3936..a96e9a95af1 100644 --- a/document/src/vespa/document/fieldvalue/structuredfieldvalue.h +++ b/document/src/vespa/document/fieldvalue/structuredfieldvalue.h @@ -147,8 +147,12 @@ public: * * @throws vespalib::IllegalArgumentException If value given has wrong type */ - inline void setValue(const Field& field, const FieldValue& value) - { setFieldValue(field, value); } + void setValue(const Field& field, const FieldValue& value) { + setFieldValue(field, value); + } + void setValue(const Field& field, FieldValue::UP value) { + setFieldValue(field, std::move(value)); + } /** Remove the value of given field if it is set. */ //These are affected by the begin/commitTanasaction @@ -157,12 +161,15 @@ public: virtual void clear() = 0; // Utility functions for easy but less efficient access - bool hasValue(vespalib::stringref fieldName) const - { return hasFieldValue(getField(fieldName)); } - void remove(vespalib::stringref fieldName) - { removeFieldValue(getField(fieldName)); } - void setValue(vespalib::stringref fieldName, const FieldValue& value) - { setFieldValue(getField(fieldName), value); } + bool hasValue(vespalib::stringref fieldName) const { + return hasFieldValue(getField(fieldName)); + } + void remove(vespalib::stringref fieldName) { + removeFieldValue(getField(fieldName)); + } + void setValue(vespalib::stringref fieldName, const FieldValue& value) { + setFieldValue(getField(fieldName), value); + } template<typename PrimitiveType> void set(const Field& field, PrimitiveType value); template<typename PrimitiveType> @@ -176,7 +183,7 @@ public: virtual bool empty() const = 0; typedef Iterator const_iterator; - const_iterator begin() const { return const_iterator(*this, NULL); } + const_iterator begin() const { return const_iterator(*this, nullptr); } const_iterator end() const { return const_iterator(); } /** diff --git a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp index 4fe7cfc6d29..9fa2fb5c005 100644 --- a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp @@ -2,7 +2,6 @@ #include "vespadocumentdeserializer.h" #include "annotationdeserializer.h" -#include <vespa/document/annotation/spantree.h> #include <vespa/document/fieldvalue/annotationreferencefieldvalue.h> #include <vespa/document/fieldvalue/arrayfieldvalue.h> #include <vespa/document/fieldvalue/boolfieldvalue.h> @@ -19,7 +18,6 @@ #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/document/fieldvalue/tensorfieldvalue.h> #include <vespa/document/fieldvalue/referencefieldvalue.h> -#include <vespa/document/repo/documenttyperepo.h> #include <vespa/vespalib/data/slime/binary_format.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/vespalib/stllike/asciistream.h> @@ -30,6 +28,8 @@ #include <vespa/document/base/exceptions.h> #include <vespa/vespalib/objects/nbostream.h> #include <vespa/document/util/bytebuffer.h> +#include <vespa/document/base/idstringexception.h> + #include <vespa/log/log.h> LOG_SETUP(".vespadocumentdeserializer"); @@ -309,10 +309,26 @@ void VespaDocumentDeserializer::readStructNoReset(StructFieldValue &value) { ByteBuffer::UP buffer(_stream.isLongLivedBuffer() ? new ByteBuffer(_stream.peek(), data_size) : ByteBuffer::copyBuffer(_stream.peek(), data_size)); - LOG(spam, "Lazy deserializing into %s with _version %u", - value.getDataType()->getName().c_str(), _version); - value.lazyDeserialize(_repo, _version, std::move(field_info), - std::move(buffer), compression_type, uncompressed_size); + if (value.getFields().empty()) { + LOG(spam, "Lazy deserializing into %s with _version %u", + value.getDataType()->getName().c_str(), _version); + value.lazyDeserialize(_repo, _version, std::move(field_info), + std::move(buffer), compression_type, uncompressed_size); + } else { + LOG(debug, "Legacy dual header/body format. -> Merging."); + StructFieldValue tmp(*value.getDataType()); + tmp.lazyDeserialize(_repo, _version, std::move(field_info), + std::move(buffer), compression_type, uncompressed_size); + for (const auto & entry : tmp) { + try { + FieldValue::UP decoded = tmp.getValue(entry); + value.setValue(entry, std::move(decoded)); + } catch (const vespalib::Exception & e) { + LOG(warning, "Failed decoding field '%s' in legacy bodyfield -> Skipping it: %s", + entry.getName().data(), e.what()); + } + } + } _stream.adjustReadPos(data_size); } } @@ -352,9 +368,7 @@ VespaDocumentDeserializer::read(TensorFieldValue &value) nbostream wrapStream(_stream.peek(), length); tensor = vespalib::tensor::TypedBinaryFormat::deserialize(wrapStream); if (wrapStream.size() != 0) { - throw DeserializeException("Leftover bytes deserializing " - "tensor field value.", - VESPA_STRLOC); + throw DeserializeException("Leftover bytes deserializing tensor field value.", VESPA_STRLOC); } } value.assignDeserialized(std::move(tensor)); diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.cpp b/document/src/vespa/document/serialization/vespadocumentserializer.cpp index 4d60befba52..eadbd4b5a8a 100644 --- a/document/src/vespa/document/serialization/vespadocumentserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentserializer.cpp @@ -86,89 +86,45 @@ void VespaDocumentSerializer::write(const DocumentType &value) { << static_cast<uint16_t>(0); // version } +namespace { + uint8_t -VespaDocumentSerializer::getContentCode(bool hasHeader, bool hasBody) const +getContentCode(bool hasContent) { - uint8_t content = 0x01; // Document type is always present. - if (hasHeader) { - content |= 0x02; // Header is present. - } - if (hasBody) { - content |= 0x04; // Body is present. - } - return content; + return 0x01u | // Document type is always present + (hasContent ? 0x02u : 0x00u); // Payload ? } -static inline size_t wantChunks(bool hasHeader, bool hasBody) { - size_t res = 0; - if (hasHeader) ++res; - if (hasBody) ++res; - return res; } void -VespaDocumentSerializer::write(const Document &value, DocSerializationMode mode) { +VespaDocumentSerializer::write(const Document &value) { nbostream doc_stream; VespaDocumentSerializer doc_serializer(doc_stream); doc_serializer.write(value.getId()); - bool hasHeader = false; - bool hasBody = false; - - const StructFieldValue::Chunks & chunks = value.getFields().getChunks(); - - for (const Field & field : value.getFields()) { - if (field.isHeaderField()) { - hasHeader = true; - } else { - hasBody = true; - } - if (hasHeader && hasBody) { - break; - } - } - if (mode != COMPLETE) { - hasBody = false; - } - doc_stream << getContentCode(hasHeader, hasBody); + bool hasContent = ! value.getFields().empty(); + doc_stream << getContentCode(hasContent); doc_serializer.write(value.getType()); - if (chunks.size() == wantChunks(hasHeader, hasBody) && - !structNeedsReserialization(value.getFields())) - { - // here we assume the receiver can handle whatever serialization the - // chunks contain, so we just send them as-is, even if some fields - // may have moved from header to body or vice versa. - if (hasHeader || hasBody) { - assert( ! chunks.empty()); - doc_serializer.writeUnchanged(chunks[0]); - } - if (hasHeader && hasBody) { - assert(chunks.size() == 2); - doc_serializer.writeUnchanged(chunks[1]); - } - } else { - if (hasHeader) { - doc_serializer.write(value.getFields(), HeaderFields()); - } - if (hasBody) { - doc_serializer.write(value.getFields(), BodyFields()); + if ( hasContent ) { + if (!structNeedsReserialization(value.getFields())) { + doc_serializer.writeUnchanged(value.getFields().getFields()); + } else { + doc_serializer.write(value.getFields(), AllFields()); } } const uint16_t version = serialize_version; - _stream << version - << static_cast<uint32_t>(doc_stream.size()); + _stream << version << static_cast<uint32_t>(doc_stream.size()); _stream.write(doc_stream.peek(), doc_stream.size()); } void VespaDocumentSerializer::visit(const StructFieldValue &value) { - const StructFieldValue::Chunks & chunks = value.getChunks(); - if (!structNeedsReserialization(value) && ! chunks.empty()) { - assert(chunks.size() == 1); - writeUnchanged(chunks[0]); + if (!structNeedsReserialization(value)) { + writeUnchanged(value.getFields()); } else { write(value, AllFields()); } @@ -247,7 +203,7 @@ VespaDocumentSerializer::write(const ShortFieldValue &value) { void VespaDocumentSerializer::write(const StringFieldValue &value) { - uint8_t coding = (value.hasSpanTrees() << 6); + uint8_t coding = (value.hasSpanTrees() << 6u); _stream << coding; putInt1_4Bytes(_stream, value.getValueRef().size() + 1); _stream.write(value.getValueRef().data(), value.getValueRef().size()); @@ -341,17 +297,8 @@ VespaDocumentSerializer::structNeedsReserialization(const StructFieldValue &valu return false; } - const StructFieldValue::Chunks & chunks = value.getChunks(); - - for (uint32_t i = 0; i < chunks.size(); ++i) { - if (chunks[i].getCompression() != value.getCompressionConfig().type && - chunks[i].getCompression() != CompressionConfig::UNCOMPRESSABLE) - { - return true; - } - } - - return false; + return (value.getFields().getCompression() != value.getCompressionConfig().type && + value.getFields().getCompression() != CompressionConfig::UNCOMPRESSABLE); } void VespaDocumentSerializer::writeUnchanged(const SerializableArray &value) { @@ -378,8 +325,7 @@ void VespaDocumentSerializer::writeUnchanged(const SerializableArray &value) { } } -void VespaDocumentSerializer::write(const StructFieldValue &value, - const FieldSet& fieldSet) +void VespaDocumentSerializer::write(const StructFieldValue &value, const FieldSet& fieldSet) { nbostream value_stream; vector<pair<uint32_t, uint32_t> > field_info; diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.h b/document/src/vespa/document/serialization/vespadocumentserializer.h index d83532771e8..f664a32d893 100644 --- a/document/src/vespa/document/serialization/vespadocumentserializer.h +++ b/document/src/vespa/document/serialization/vespadocumentserializer.h @@ -17,13 +17,11 @@ class SerializableArray; class ValueUpdate; class FieldPathUpdate; -enum DocSerializationMode { COMPLETE, WITHOUT_BODY }; - class VespaDocumentSerializer : private ConstFieldValueVisitor, private UpdateVisitor, public FieldValueWriter { public: - VespaDocumentSerializer(vespalib::nbostream &stream); + explicit VespaDocumentSerializer(vespalib::nbostream &stream); static bool structNeedsReserialization(const StructFieldValue &value); @@ -34,7 +32,7 @@ public: void write(const DocumentId &value); void write(const DocumentType &value); - void write(const Document &value, DocSerializationMode mode); + void write(const Document &value); void write(const AnnotationReferenceFieldValue &value); void write(const ArrayFieldValue &value); void write(const MapFieldValue &map); @@ -60,8 +58,6 @@ public: private: static constexpr int serialize_version = 8; void writeUnchanged(const SerializableArray &val); - uint8_t getContentCode(bool hasHeader, bool hasBody) const; - void write(const FieldPathUpdate &value); void write(const RemoveValueUpdate &value); @@ -96,7 +92,7 @@ private: void visit(const ArrayFieldValue &value) override { write(value); } void visit(const BoolFieldValue &value) override { write(value); } void visit(const ByteFieldValue &value) override { write(value); } - void visit(const Document &value) override { write(value, COMPLETE); } + void visit(const Document &value) override { write(value); } void visit(const DoubleFieldValue &value) override { write(value); } void visit(const FloatFieldValue &value) override { write(value); } void visit(const IntFieldValue &value) override { write(value); } diff --git a/document/src/vespa/document/update/assignfieldpathupdate.h b/document/src/vespa/document/update/assignfieldpathupdate.h index a1349bab96a..718d001e060 100644 --- a/document/src/vespa/document/update/assignfieldpathupdate.h +++ b/document/src/vespa/document/update/assignfieldpathupdate.h @@ -11,9 +11,9 @@ class AssignFieldPathUpdate : public FieldPathUpdate public: enum SerializationFlag { - ARITHMETIC_EXPRESSION = 1, - REMOVE_IF_ZERO = 2, - CREATE_MISSING_PATH = 4 + ARITHMETIC_EXPRESSION = 1u, + REMOVE_IF_ZERO = 2u, + CREATE_MISSING_PATH = 4u }; /** For deserialization */ diff --git a/documentapi/src/tests/messages/messages60test.cpp b/documentapi/src/tests/messages/messages60test.cpp index 8d76cf5974e..c7bb1015e02 100644 --- a/documentapi/src/tests/messages/messages60test.cpp +++ b/documentapi/src/tests/messages/messages60test.cpp @@ -218,7 +218,7 @@ Messages60Test::testDocumentListMessage() DocumentListMessage tmp(document::BucketId(16, 1234)); tmp.getDocuments().push_back(entry); - EXPECT_EQUAL(MESSAGE_BASE_LENGTH + (size_t)69, serialize("DocumentListMessage", tmp)); + EXPECT_EQUAL(MESSAGE_BASE_LENGTH + 69ul, serialize("DocumentListMessage", tmp)); for (uint32_t lang = 0; lang < NUM_LANGUAGES; ++lang) { mbus::Routable::UP obj = deserialize("DocumentListMessage", DocumentProtocol::MESSAGE_DOCUMENTLIST, lang); @@ -407,7 +407,7 @@ Messages60Test::testPutDocumentMessage() for (uint32_t lang = 0; lang < NUM_LANGUAGES; ++lang) { auto routableUp = deserialize("PutDocumentMessage", DocumentProtocol::MESSAGE_PUTDOCUMENT, lang); - if (EXPECT_TRUE(routableUp.get() != nullptr)) { + if (EXPECT_TRUE(routableUp)) { auto & deserializedMsg = static_cast<PutDocumentMessage &>(*routableUp); EXPECT_EQUAL(msg.getDocument().getType().getName(), deserializedMsg.getDocument().getType().getName()); diff --git a/storage/src/tests/persistence/processalltest.cpp b/storage/src/tests/persistence/processalltest.cpp index 2bf7f7c3855..46a5aaf6488 100644 --- a/storage/src/tests/persistence/processalltest.cpp +++ b/storage/src/tests/persistence/processalltest.cpp @@ -110,12 +110,11 @@ TEST_F(ProcessAllHandlerTest, bucket_stat_request_returns_document_metadata_matc vespalib::string expected = "Persistence bucket BucketId(0x4000000000000004), partition 0\n" - " Timestamp: 100, Doc(id:mail:testdoctype1:n=4:3619.html), gid(0x0400000092bb8d298934253a), size: 169\n" - " Timestamp: 102, Doc(id:mail:testdoctype1:n=4:62608.html), gid(0x04000000ce878d2488413bc4), size: 147\n" - " Timestamp: 104, Doc(id:mail:testdoctype1:n=4:56061.html), gid(0x040000002b8f80f0160f6c5c), size: 124\n" - " Timestamp: 106, Doc(id:mail:testdoctype1:n=4:49514.html), gid(0x04000000d45ca9abb47567f0), size: 101\n" - " Timestamp: 108, Doc(id:mail:testdoctype1:n=4:42967.html), gid(0x04000000f19ece1668e6de48), size: 206\n"; - + " Timestamp: 100, Doc(id:mail:testdoctype1:n=4:3619.html), gid(0x0400000092bb8d298934253a), size: 163\n" + " Timestamp: 102, Doc(id:mail:testdoctype1:n=4:62608.html), gid(0x04000000ce878d2488413bc4), size: 141\n" + " Timestamp: 104, Doc(id:mail:testdoctype1:n=4:56061.html), gid(0x040000002b8f80f0160f6c5c), size: 118\n" + " Timestamp: 106, Doc(id:mail:testdoctype1:n=4:49514.html), gid(0x04000000d45ca9abb47567f0), size: 95\n" + " Timestamp: 108, Doc(id:mail:testdoctype1:n=4:42967.html), gid(0x04000000f19ece1668e6de48), size: 200\n"; EXPECT_EQ(expected, reply.getResults()); } @@ -145,16 +144,16 @@ TEST_F(ProcessAllHandlerTest, stat_bucket_request_can_returned_removed_entries) vespalib::string expected = "Persistence bucket BucketId(0x4000000000000004), partition 0\n" - " Timestamp: 100, Doc(id:mail:testdoctype1:n=4:3619.html), gid(0x0400000092bb8d298934253a), size: 169\n" - " Timestamp: 101, Doc(id:mail:testdoctype1:n=4:33113.html), gid(0x04000000b121a632741db368), size: 95\n" - " Timestamp: 102, Doc(id:mail:testdoctype1:n=4:62608.html), gid(0x04000000ce878d2488413bc4), size: 147\n" - " Timestamp: 103, Doc(id:mail:testdoctype1:n=4:26566.html), gid(0x04000000177f8240bdd2bef0), size: 200\n" - " Timestamp: 104, Doc(id:mail:testdoctype1:n=4:56061.html), gid(0x040000002b8f80f0160f6c5c), size: 124\n" - " Timestamp: 105, Doc(id:mail:testdoctype1:n=4:20019.html), gid(0x040000001550c67f28ea7b03), size: 177\n" - " Timestamp: 106, Doc(id:mail:testdoctype1:n=4:49514.html), gid(0x04000000d45ca9abb47567f0), size: 101\n" - " Timestamp: 107, Doc(id:mail:testdoctype1:n=4:13472.html), gid(0x040000005d01f3fd960f8098), size: 154\n" - " Timestamp: 108, Doc(id:mail:testdoctype1:n=4:42967.html), gid(0x04000000f19ece1668e6de48), size: 206\n" - " Timestamp: 109, Doc(id:mail:testdoctype1:n=4:6925.html), gid(0x04000000667c0b3cada830be), size: 130\n" + " Timestamp: 100, Doc(id:mail:testdoctype1:n=4:3619.html), gid(0x0400000092bb8d298934253a), size: 163\n" + " Timestamp: 101, Doc(id:mail:testdoctype1:n=4:33113.html), gid(0x04000000b121a632741db368), size: 89\n" + " Timestamp: 102, Doc(id:mail:testdoctype1:n=4:62608.html), gid(0x04000000ce878d2488413bc4), size: 141\n" + " Timestamp: 103, Doc(id:mail:testdoctype1:n=4:26566.html), gid(0x04000000177f8240bdd2bef0), size: 194\n" + " Timestamp: 104, Doc(id:mail:testdoctype1:n=4:56061.html), gid(0x040000002b8f80f0160f6c5c), size: 118\n" + " Timestamp: 105, Doc(id:mail:testdoctype1:n=4:20019.html), gid(0x040000001550c67f28ea7b03), size: 171\n" + " Timestamp: 106, Doc(id:mail:testdoctype1:n=4:49514.html), gid(0x04000000d45ca9abb47567f0), size: 95\n" + " Timestamp: 107, Doc(id:mail:testdoctype1:n=4:13472.html), gid(0x040000005d01f3fd960f8098), size: 148\n" + " Timestamp: 108, Doc(id:mail:testdoctype1:n=4:42967.html), gid(0x04000000f19ece1668e6de48), size: 200\n" + " Timestamp: 109, Doc(id:mail:testdoctype1:n=4:6925.html), gid(0x04000000667c0b3cada830be), size: 124\n" " Timestamp: 200, id:mail:testdoctype1:n=4:3619.html, gid(0x0400000092bb8d298934253a) (remove)\n" " Timestamp: 201, id:mail:testdoctype1:n=4:33113.html, gid(0x04000000b121a632741db368) (remove)\n" " Timestamp: 202, id:mail:testdoctype1:n=4:62608.html, gid(0x04000000ce878d2488413bc4) (remove)\n" @@ -166,7 +165,7 @@ TEST_F(ProcessAllHandlerTest, stat_bucket_request_can_returned_removed_entries) " Timestamp: 208, id:mail:testdoctype1:n=4:42967.html, gid(0x04000000f19ece1668e6de48) (remove)\n" " Timestamp: 209, id:mail:testdoctype1:n=4:6925.html, gid(0x04000000667c0b3cada830be) (remove)\n"; - EXPECT_EQ(expected, reply.getResults()); + EXPECT_EQ(expected, reply.getResults()); } // TODO is this test neccessary? Seems to not test anything more than the above tests @@ -191,16 +190,16 @@ TEST_F(ProcessAllHandlerTest, bucket_stat_request_can_return_all_put_entries_in_ vespalib::string expected = "Persistence bucket BucketId(0x4000000000000004), partition 0\n" - " Timestamp: 100, Doc(id:mail:testdoctype1:n=4:3619.html), gid(0x0400000092bb8d298934253a), size: 169\n" - " Timestamp: 101, Doc(id:mail:testdoctype1:n=4:33113.html), gid(0x04000000b121a632741db368), size: 95\n" - " Timestamp: 102, Doc(id:mail:testdoctype1:n=4:62608.html), gid(0x04000000ce878d2488413bc4), size: 147\n" - " Timestamp: 103, Doc(id:mail:testdoctype1:n=4:26566.html), gid(0x04000000177f8240bdd2bef0), size: 200\n" - " Timestamp: 104, Doc(id:mail:testdoctype1:n=4:56061.html), gid(0x040000002b8f80f0160f6c5c), size: 124\n" - " Timestamp: 105, Doc(id:mail:testdoctype1:n=4:20019.html), gid(0x040000001550c67f28ea7b03), size: 177\n" - " Timestamp: 106, Doc(id:mail:testdoctype1:n=4:49514.html), gid(0x04000000d45ca9abb47567f0), size: 101\n" - " Timestamp: 107, Doc(id:mail:testdoctype1:n=4:13472.html), gid(0x040000005d01f3fd960f8098), size: 154\n" - " Timestamp: 108, Doc(id:mail:testdoctype1:n=4:42967.html), gid(0x04000000f19ece1668e6de48), size: 206\n" - " Timestamp: 109, Doc(id:mail:testdoctype1:n=4:6925.html), gid(0x04000000667c0b3cada830be), size: 130\n"; + " Timestamp: 100, Doc(id:mail:testdoctype1:n=4:3619.html), gid(0x0400000092bb8d298934253a), size: 163\n" + " Timestamp: 101, Doc(id:mail:testdoctype1:n=4:33113.html), gid(0x04000000b121a632741db368), size: 89\n" + " Timestamp: 102, Doc(id:mail:testdoctype1:n=4:62608.html), gid(0x04000000ce878d2488413bc4), size: 141\n" + " Timestamp: 103, Doc(id:mail:testdoctype1:n=4:26566.html), gid(0x04000000177f8240bdd2bef0), size: 194\n" + " Timestamp: 104, Doc(id:mail:testdoctype1:n=4:56061.html), gid(0x040000002b8f80f0160f6c5c), size: 118\n" + " Timestamp: 105, Doc(id:mail:testdoctype1:n=4:20019.html), gid(0x040000001550c67f28ea7b03), size: 171\n" + " Timestamp: 106, Doc(id:mail:testdoctype1:n=4:49514.html), gid(0x04000000d45ca9abb47567f0), size: 95\n" + " Timestamp: 107, Doc(id:mail:testdoctype1:n=4:13472.html), gid(0x040000005d01f3fd960f8098), size: 148\n" + " Timestamp: 108, Doc(id:mail:testdoctype1:n=4:42967.html), gid(0x04000000f19ece1668e6de48), size: 200\n" + " Timestamp: 109, Doc(id:mail:testdoctype1:n=4:6925.html), gid(0x04000000667c0b3cada830be), size: 124\n"; EXPECT_EQ(expected, reply.getResults()); } |