diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-01-07 06:15:12 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-07 06:15:12 +0100 |
commit | 776a02c236bb78b64d6bb7643f4a5acf2ab0f1e1 (patch) | |
tree | 3077f322cc345a30ab801c25da6fbdb845d951b2 /persistence | |
parent | 499d884cefeac2a4f5dc071814d1a64ddc249875 (diff) |
Revert "Revert "Balder/refactor docentry""
Diffstat (limited to 'persistence')
11 files changed, 320 insertions, 230 deletions
diff --git a/persistence/src/tests/dummyimpl/dummypersistence_test.cpp b/persistence/src/tests/dummyimpl/dummypersistence_test.cpp index 21cf7cb9ae3..3fa1a8a9b8d 100644 --- a/persistence/src/tests/dummyimpl/dummypersistence_test.cpp +++ b/persistence/src/tests/dummyimpl/dummypersistence_test.cpp @@ -9,7 +9,6 @@ #include <vespa/vdslib/state/clusterstate.h> #include <vespa/config-stor-distribution.h> - using namespace storage::spi; using namespace storage; using document::test::makeBucketSpace; @@ -20,14 +19,14 @@ namespace { struct Fixture { BucketContent content; - void insert(DocumentId id, Timestamp timestamp, int meta_flags) { - content.insert(DocEntry::UP(new DocEntry(timestamp, meta_flags, id))); + void insert(DocumentId id, Timestamp timestamp, DocumentMetaEnum meta_flags) { + content.insert(DocEntry::create(timestamp, meta_flags, id)); } Fixture() { - insert(DocumentId("id:ns:type::test:3"), Timestamp(3), NONE); - insert(DocumentId("id:ns:type::test:1"), Timestamp(1), NONE); - insert(DocumentId("id:ns:type::test:2"), Timestamp(2), NONE); + insert(DocumentId("id:ns:type::test:3"), Timestamp(3), DocumentMetaEnum::NONE); + insert(DocumentId("id:ns:type::test:1"), Timestamp(1), DocumentMetaEnum::NONE); + insert(DocumentId("id:ns:type::test:2"), Timestamp(2), DocumentMetaEnum::NONE); } }; @@ -64,13 +63,13 @@ TEST_F("require that BucketContent can provide bucket info", Fixture) { uint32_t lastChecksum = 0; EXPECT_NOT_EQUAL(lastChecksum, f.content.getBucketInfo().getChecksum()); lastChecksum = f.content.getBucketInfo().getChecksum(); - f.insert(DocumentId("id:ns:type::test:3"), Timestamp(4), NONE); + f.insert(DocumentId("id:ns:type::test:3"), Timestamp(4), DocumentMetaEnum::NONE); EXPECT_NOT_EQUAL(lastChecksum, f.content.getBucketInfo().getChecksum()); lastChecksum = f.content.getBucketInfo().getChecksum(); - f.insert(DocumentId("id:ns:type::test:2"), Timestamp(5), REMOVE_ENTRY); + f.insert(DocumentId("id:ns:type::test:2"), Timestamp(5), DocumentMetaEnum::REMOVE_ENTRY); EXPECT_NOT_EQUAL(lastChecksum, f.content.getBucketInfo().getChecksum()); - f.insert(DocumentId("id:ns:type::test:1"), Timestamp(6), REMOVE_ENTRY); - f.insert(DocumentId("id:ns:type::test:3"), Timestamp(7), REMOVE_ENTRY); + f.insert(DocumentId("id:ns:type::test:1"), Timestamp(6), DocumentMetaEnum::REMOVE_ENTRY); + f.insert(DocumentId("id:ns:type::test:3"), Timestamp(7), DocumentMetaEnum::REMOVE_ENTRY); EXPECT_EQUAL(0u, f.content.getBucketInfo().getChecksum()); } diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp index ac67903244f..2186d408791 100644 --- a/persistence/src/tests/spi/clusterstatetest.cpp +++ b/persistence/src/tests/spi/clusterstatetest.cpp @@ -5,10 +5,12 @@ #include <vespa/vdslib/distribution/distribution.h> #include <vespa/vdslib/state/clusterstate.h> #include <vespa/config-stor-distribution.h> +#include <vespa/document/base/testdocman.h> #include <gtest/gtest.h> using storage::spi::test::makeSpiBucket; using vespalib::Trinary; +using document::GlobalId; namespace storage::spi { @@ -260,4 +262,57 @@ TEST(ClusterStateTest, node_maintenance_state_is_set_independent_of_bucket_space EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0, false)); } +TEST(DocEntryTest, test_basics) { + EXPECT_EQ(24, sizeof(DocEntry)); +} + +TEST(DocEntryTest, test_meta_only) { + DocEntry::UP e = DocEntry::create(Timestamp(9), DocumentMetaEnum::NONE); + EXPECT_EQ(9, e->getTimestamp()); + EXPECT_FALSE(e->isRemove()); + EXPECT_EQ(24, e->getSize()); + EXPECT_EQ(nullptr, e->getDocument()); + EXPECT_EQ(nullptr, e->getDocumentId()); + EXPECT_EQ("", e->getDocumentType()); + EXPECT_EQ(GlobalId(), e->getGid()); + + DocEntry::UP r = DocEntry::create(Timestamp(666), DocumentMetaEnum::REMOVE_ENTRY); + EXPECT_EQ(666, r->getTimestamp()); + EXPECT_TRUE(r->isRemove()); +} + +TEST(DocEntryTest, test_docid_only) { + DocEntry::UP e = DocEntry::create(Timestamp(9), DocumentMetaEnum::NONE, DocumentId("id:test:test::1")); + EXPECT_EQ(9, e->getTimestamp()); + EXPECT_FALSE(e->isRemove()); + EXPECT_EQ(16, e->getSize()); + EXPECT_EQ(nullptr, e->getDocument()); + EXPECT_NE(nullptr, e->getDocumentId()); + EXPECT_EQ("test", e->getDocumentType()); + EXPECT_EQ(GlobalId::parse("gid(0xc4ca4238f9f9649222750be2)"), e->getGid()); +} + +TEST(DocEntryTest, test_doctype_and_gid) { + DocEntry::UP e = DocEntry::create(Timestamp(9), DocumentMetaEnum::NONE, "doc_type", GlobalId::parse("gid(0xc4cef118f9f9649222750be2)")); + EXPECT_EQ(9, e->getTimestamp()); + EXPECT_FALSE(e->isRemove()); + EXPECT_EQ(20, e->getSize()); + EXPECT_EQ(nullptr, e->getDocument()); + EXPECT_EQ(nullptr, e->getDocumentId()); + EXPECT_EQ("doc_type", e->getDocumentType()); + EXPECT_EQ(GlobalId::parse("gid(0xc4cef118f9f9649222750be2)"), e->getGid()); +} + +TEST(DocEntryTest, test_document_only) { + document::TestDocMan testDocMan; + DocEntry::UP e = DocEntry::create(Timestamp(9), testDocMan.createRandomDocument(0, 1000)); + EXPECT_EQ(9, e->getTimestamp()); + EXPECT_FALSE(e->isRemove()); + EXPECT_EQ(632, e->getSize()); + EXPECT_NE(nullptr, e->getDocument()); + EXPECT_NE(nullptr, e->getDocumentId()); + EXPECT_EQ("testdoctype1", e->getDocumentType()); + EXPECT_EQ(GlobalId::parse("gid(0x4bc7000087365609f22f1f4b)"), e->getGid()); +} + } diff --git a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp index 810f2ad2356..6afdd142457 100644 --- a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp +++ b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp @@ -5,6 +5,7 @@ #include <vespa/persistence/spi/test.h> #include <vespa/persistence/spi/catchresult.h> #include <vespa/persistence/spi/resource_usage_listener.h> +#include <vespa/persistence/spi/docentry.h> #include <vespa/document/fieldset/fieldsets.h> #include <vespa/document/update/documentupdate.h> #include <vespa/document/update/assignvalueupdate.h> @@ -25,15 +26,17 @@ using document::BucketId; using document::BucketSpace; using document::test::makeBucketSpace; using storage::spi::test::makeSpiBucket; +using storage::spi::test::cloneDocEntry; namespace storage::spi { using PersistenceProviderUP = std::unique_ptr<PersistenceProvider>; +using DocEntryList = std::vector<DocEntry::UP>; namespace { -std::unique_ptr<PersistenceProvider> getSpi(ConformanceTest::PersistenceFactory &factory, - const document::TestDocMan &testDocMan) { +std::unique_ptr<PersistenceProvider> +getSpi(ConformanceTest::PersistenceFactory &factory, const document::TestDocMan &testDocMan) { PersistenceProviderUP result(factory.getPersistenceImplementation( testDocMan.getTypeRepoSP(), *testDocMan.getTypeConfig())); EXPECT_TRUE(!result->initialize().hasError()); @@ -123,7 +126,7 @@ struct DocAndTimestamp */ struct Chunk { - std::vector<DocEntry::UP> _entries; + DocEntryList _entries; }; struct DocEntryIndirectTimestampComparator @@ -166,7 +169,7 @@ doIterate(PersistenceProvider& spi, } size_t -getRemoveEntryCount(const std::vector<spi::DocEntry::UP>& entries) +getRemoveEntryCount(const DocEntryList& entries) { size_t ret = 0; for (size_t i = 0; i < entries.size(); ++i) { @@ -177,13 +180,13 @@ getRemoveEntryCount(const std::vector<spi::DocEntry::UP>& entries) return ret; } -std::vector<DocEntry::UP> +DocEntryList getEntriesFromChunks(const std::vector<Chunk>& chunks) { - std::vector<spi::DocEntry::UP> ret; + DocEntryList ret; for (size_t chunk = 0; chunk < chunks.size(); ++chunk) { for (size_t i = 0; i < chunks[chunk]._entries.size(); ++i) { - ret.push_back(DocEntry::UP(chunks[chunk]._entries[i]->clone())); + ret.push_back(cloneDocEntry(*chunks[chunk]._entries[i])); } } std::sort(ret.begin(), @@ -193,12 +196,12 @@ getEntriesFromChunks(const std::vector<Chunk>& chunks) } -std::vector<DocEntry::UP> +DocEntryList iterateBucket(PersistenceProvider& spi, const Bucket& bucket, IncludedVersions versions) { - std::vector<DocEntry::UP> ret; + DocEntryList ret; DocumentSelection docSel(""); Selection sel(docSel); @@ -217,7 +220,7 @@ iterateBucket(PersistenceProvider& spi, spi.iterate(iter.getIteratorId(), std::numeric_limits<int64_t>().max(), context); if (result.getErrorCode() != Result::ErrorType::NONE) { - return std::vector<DocEntry::UP>(); + return DocEntryList(); } auto list = result.steal_entries(); std::move(list.begin(), list.end(), std::back_inserter(ret)); @@ -238,8 +241,7 @@ verifyDocs(const std::vector<DocAndTimestamp>& wanted, const std::vector<Chunk>& chunks, const std::set<string>& removes = std::set<string>()) { - std::vector<DocEntry::UP> retrieved( - getEntriesFromChunks(chunks)); + DocEntryList retrieved = getEntriesFromChunks(chunks); size_t removeCount = getRemoveEntryCount(retrieved); // Ensure that we've got the correct number of puts and removes EXPECT_EQ(removes.size(), removeCount); @@ -257,15 +259,13 @@ verifyDocs(const std::vector<DocAndTimestamp>& wanted, } EXPECT_EQ(wanted[wantedIdx].timestamp, entry.getTimestamp()); size_t serSize = wanted[wantedIdx].doc->serialize().size(); - EXPECT_EQ(serSize + sizeof(DocEntry), size_t(entry.getSize())); - EXPECT_EQ(serSize, size_t(entry.getDocumentSize())); + EXPECT_EQ(serSize, size_t(entry.getSize())); ++wantedIdx; } else { // Remove-entry EXPECT_TRUE(entry.getDocumentId() != 0); size_t serSize = entry.getDocumentId()->getSerializedSize(); - EXPECT_EQ(serSize + sizeof(DocEntry), size_t(entry.getSize())); - EXPECT_EQ(serSize, size_t(entry.getDocumentSize())); + EXPECT_EQ(serSize, size_t(entry.getSize())); if (removes.find(entry.getDocumentId()->toString()) == removes.end()) { FAIL() << "Got unexpected remove entry for document id " << *entry.getDocumentId(); @@ -697,8 +697,7 @@ TEST_F(ConformanceTest, testPutDuplicate) EXPECT_EQ(1, (int)info.getDocumentCount()); EXPECT_EQ(checksum, info.getChecksum()); } - std::vector<DocEntry::UP> entries( - iterateBucket(*spi, bucket, ALL_VERSIONS)); + DocEntryList entries = iterateBucket(*spi, bucket, ALL_VERSIONS); EXPECT_EQ(size_t(1), entries.size()); } @@ -722,8 +721,7 @@ TEST_F(ConformanceTest, testRemove) EXPECT_EQ(1, (int)info.getDocumentCount()); EXPECT_TRUE(info.getChecksum() != 0); - std::vector<DocEntry::UP> entries( - iterateBucket(*spi, bucket, NEWEST_DOCUMENT_ONLY)); + DocEntryList entries = iterateBucket(*spi, bucket, NEWEST_DOCUMENT_ONLY); EXPECT_EQ(size_t(1), entries.size()); } @@ -741,15 +739,11 @@ TEST_F(ConformanceTest, testRemove) EXPECT_EQ(true, result2.wasFound()); } { - std::vector<DocEntry::UP> entries(iterateBucket(*spi, - bucket, - NEWEST_DOCUMENT_ONLY)); + DocEntryList entries = iterateBucket(*spi, bucket,NEWEST_DOCUMENT_ONLY); EXPECT_EQ(size_t(0), entries.size()); } { - std::vector<DocEntry::UP> entries(iterateBucket(*spi, - bucket, - NEWEST_DOCUMENT_OR_REMOVE)); + DocEntryList entries = iterateBucket(*spi, bucket,NEWEST_DOCUMENT_OR_REMOVE); EXPECT_EQ(size_t(1), entries.size()); } @@ -862,8 +856,7 @@ TEST_F(ConformanceTest, testRemoveMerge) // Remove entry should exist afterwards { - std::vector<DocEntry::UP> entries(iterateBucket( - *spi, bucket, ALL_VERSIONS)); + DocEntryList entries = iterateBucket(*spi, bucket, ALL_VERSIONS); EXPECT_EQ(size_t(2), entries.size()); // Timestamp-sorted by iterateBucket EXPECT_EQ(removeId, *entries.back()->getDocumentId()); @@ -889,7 +882,7 @@ TEST_F(ConformanceTest, testRemoveMerge) } // Must have new remove. We don't check for the presence of the old remove. { - std::vector<DocEntry::UP> entries(iterateBucket(*spi, bucket, ALL_VERSIONS)); + DocEntryList entries = iterateBucket(*spi, bucket, ALL_VERSIONS); EXPECT_TRUE(entries.size() >= 2); EXPECT_EQ(removeId, *entries.back()->getDocumentId()); EXPECT_EQ(Timestamp(11), entries.back()->getTimestamp()); @@ -915,7 +908,7 @@ TEST_F(ConformanceTest, testRemoveMerge) } // Must have newest remove. We don't check for the presence of the old remove. { - std::vector<DocEntry::UP> entries(iterateBucket(*spi, bucket, ALL_VERSIONS)); + DocEntryList entries = iterateBucket(*spi, bucket, ALL_VERSIONS); EXPECT_TRUE(entries.size() >= 2); EXPECT_EQ(removeId, *entries.back()->getDocumentId()); EXPECT_EQ(Timestamp(11), entries.back()->getTimestamp()); @@ -1351,7 +1344,7 @@ TEST_F(ConformanceTest, testIterateRemoves) CreateIteratorResult iter(createIterator(*spi, b, sel, NEWEST_DOCUMENT_OR_REMOVE)); std::vector<Chunk> chunks = doIterate(*spi, iter.getIteratorId(), 4_Ki); - std::vector<DocEntry::UP> entries = getEntriesFromChunks(chunks); + DocEntryList entries = getEntriesFromChunks(chunks); EXPECT_EQ(docs.size(), entries.size()); verifyDocs(nonRemovedDocs, chunks, removedDocs); diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp index 9d9f31b63a3..d947ca51f49 100644 --- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp +++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp @@ -9,6 +9,7 @@ #include <vespa/persistence/spi/i_resource_usage_listener.h> #include <vespa/persistence/spi/resource_usage.h> #include <vespa/persistence/spi/bucketexecutor.h> +#include <vespa/persistence/spi/test.h> #include <vespa/vespalib/util/crc.h> #include <vespa/document/fieldset/fieldsetrepo.h> #include <vespa/vespalib/stllike/asciistream.h> @@ -24,6 +25,9 @@ using vespalib::make_string; using std::binary_search; using std::lower_bound; using document::FixedBucketSpaces; +using document::FieldSet; +using storage::spi::test::cloneDocEntry; +using storage::spi::test::equal; namespace storage::spi::dummy { @@ -162,7 +166,7 @@ BucketContent::insert(DocEntry::SP e) { auto it = lower_bound(_entries.begin(), _entries.end(), e->getTimestamp(), TimestampLess()); if (it != _entries.end()) { if (it->entry->getTimestamp() == e->getTimestamp()) { - if (*it->entry.get() == *e) { + if (equal(*it->entry, *e)) { LOG(debug, "Ignoring duplicate put entry %s", e->toString().c_str()); return; } else { @@ -431,7 +435,7 @@ DummyPersistence::putAsync(const Bucket& b, Timestamp t, Document::SP doc, Conte } } else { LOG(spam, "Inserting document %s", doc->toString(true).c_str()); - auto entry = std::make_unique<DocEntry>(t, NONE, Document::UP(doc->clone())); + auto entry = DocEntry::create(t, Document::UP(doc->clone())); (*bc)->insert(std::move(entry)); bc.reset(); onComplete->onComplete(std::make_unique<Result>()); @@ -489,7 +493,7 @@ DummyPersistence::removeAsync(const Bucket& b, std::vector<TimeStampAndDocumentI } DocEntry::SP entry((*bc)->getEntry(id)); numRemoves += (entry.get() && !entry->isRemove()) ? 1 : 0; - auto remEntry = std::make_unique<DocEntry>(t, REMOVE_ENTRY, id); + auto remEntry = DocEntry::create(t, DocumentMetaEnum::REMOVE_ENTRY, id); if ((*bc)->hasTimestamp(t)) { (*bc)->eraseEntry(t); @@ -501,7 +505,7 @@ DummyPersistence::removeAsync(const Bucket& b, std::vector<TimeStampAndDocumentI } GetResult -DummyPersistence::get(const Bucket& b, const document::FieldSet& fieldSet, const DocumentId& did, Context&) const +DummyPersistence::get(const Bucket& b, const FieldSet& fieldSet, const DocumentId& did, Context&) const { DUMMYPERSISTENCE_VERIFY_INITIALIZED; LOG(debug, "get(%s, %s)", @@ -518,8 +522,8 @@ DummyPersistence::get(const Bucket& b, const document::FieldSet& fieldSet, const return GetResult::make_for_tombstone(entry->getTimestamp()); } else { Document::UP doc(entry->getDocument()->clone()); - if (fieldSet.getType() != document::FieldSet::Type::ALL) { - document::FieldSet::stripFields(*doc, fieldSet); + if (fieldSet.getType() != FieldSet::Type::ALL) { + FieldSet::stripFields(*doc, fieldSet); } return GetResult(std::move(doc), entry->getTimestamp()); } @@ -649,22 +653,16 @@ DummyPersistence::iterate(IteratorId id, uint64_t maxByteSize, Context& ctx) con if (currentSize != 0 && currentSize + size > maxByteSize) break; currentSize += size; if (!entry->isRemove() - && it->_fieldSet->getType() != document::FieldSet::Type::ALL) + && it->_fieldSet->getType() != FieldSet::Type::ALL) { assert(entry->getDocument()); // Create new document with only wanted fields. - Document::UP filtered( - document::FieldSet::createDocumentSubsetCopy( - *entry->getDocument(), - *it->_fieldSet)); - DocEntry::UP ret(new DocEntry(entry->getTimestamp(), - entry->getFlags(), - std::move(filtered), - entry->getPersistedDocumentSize())); + Document::UP filtered(FieldSet::createDocumentSubsetCopy(*entry->getDocument(), *it->_fieldSet)); + auto ret = DocEntry::create(entry->getTimestamp(), std::move(filtered), entry->getSize()); entries.push_back(std::move(ret)); } else { // Use entry as-is. - entries.push_back(DocEntry::UP(entry->clone())); + entries.push_back(cloneDocEntry(*entry)); ++fastPath; } } diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h index a7a784d0479..3b6fc9f1449 100644 --- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h +++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h @@ -9,6 +9,7 @@ #pragma once #include <vespa/persistence/spi/abstractpersistenceprovider.h> +#include <vespa/persistence/spi/docentry.h> #include <vespa/document/base/globalid.h> #include <vespa/document/fieldset/fieldsets.h> #include <vespa/vespalib/stllike/hash_map.h> diff --git a/persistence/src/vespa/persistence/spi/docentry.cpp b/persistence/src/vespa/persistence/spi/docentry.cpp index 8669bbf666b..f0329e8cc5e 100644 --- a/persistence/src/vespa/persistence/spi/docentry.cpp +++ b/persistence/src/vespa/persistence/spi/docentry.cpp @@ -4,90 +4,104 @@ #include <vespa/document/fieldvalue/document.h> #include <vespa/vespalib/objects/nbostream.h> #include <sstream> -#include <cassert> namespace storage::spi { -DocEntry::DocEntry(Timestamp t, int metaFlags, DocumentUP doc) - : _timestamp(t), - _metaFlags(metaFlags), - _persistedDocumentSize(doc->serialize().size()), - _size(_persistedDocumentSize + sizeof(DocEntry)), - _documentId(), +namespace { + +class DocEntryWithId final : public DocEntry { +public: + DocEntryWithId(Timestamp t, DocumentMetaEnum metaEnum, const DocumentId &docId); + ~DocEntryWithId(); + vespalib::string toString() const override; + const DocumentId* getDocumentId() const override { return & _documentId; } + vespalib::stringref getDocumentType() const override { return _documentId.getDocType(); } + GlobalId getGid() const override { return _documentId.getGlobalId(); } +private: + DocumentId _documentId; +}; + +class DocEntryWithTypeAndGid final : public DocEntry { +public: + DocEntryWithTypeAndGid(Timestamp t, DocumentMetaEnum metaEnum, vespalib::stringref docType, GlobalId gid); + ~DocEntryWithTypeAndGid(); + vespalib::string toString() const override; + vespalib::stringref getDocumentType() const override { return _type; } + GlobalId getGid() const override { return _gid; } +private: + vespalib::string _type; + GlobalId _gid; +}; + +class DocEntryWithDoc final : public DocEntry { +public: + DocEntryWithDoc(Timestamp t, DocumentUP doc); + + /** + * Constructor that can be used by providers that already know + * the serialized size of the document, so the potentially expensive + * call to getSerializedSize can be avoided. This value shall be the size of the document _before_ + * any field filtering is performed. + */ + DocEntryWithDoc(Timestamp t, DocumentUP doc, size_t serializedDocumentSize); + ~DocEntryWithDoc(); + vespalib::string toString() const override; + const Document* getDocument() const override { return _document.get(); } + const DocumentId* getDocumentId() const override { return &_document->getId(); } + DocumentUP releaseDocument() override { return std::move(_document); } + vespalib::stringref getDocumentType() const override { return _document->getId().getDocType(); } + GlobalId getGid() const override { return _document->getId().getGlobalId(); } +private: + DocumentUP _document; +}; + +DocEntryWithDoc::DocEntryWithDoc(Timestamp t, DocumentUP doc) + : DocEntry(t, DocumentMetaEnum::NONE, doc->serialize().size()), _document(std::move(doc)) { } -DocEntry::DocEntry(Timestamp t, int metaFlags, DocumentUP doc, size_t serializedDocumentSize) - : _timestamp(t), - _metaFlags(metaFlags), - _persistedDocumentSize(serializedDocumentSize), - _size(_persistedDocumentSize + sizeof(DocEntry)), - _documentId(), +DocEntryWithDoc::DocEntryWithDoc(Timestamp t, DocumentUP doc, size_t serializedDocumentSize) + : DocEntry(t, DocumentMetaEnum::NONE, serializedDocumentSize), _document(std::move(doc)) { } -DocEntry::DocEntry(Timestamp t, int metaFlags, const DocumentId& docId) - : _timestamp(t), - _metaFlags(metaFlags), - _persistedDocumentSize(docId.getSerializedSize()), - _size(_persistedDocumentSize + sizeof(DocEntry)), - _documentId(new DocumentId(docId)), - _document() +DocEntryWithId::DocEntryWithId(Timestamp t, DocumentMetaEnum metaEnum, const DocumentId& docId) + : DocEntry(t, metaEnum, docId.getSerializedSize()), + _documentId(docId) { } -DocEntry::DocEntry(Timestamp t, int metaFlags) - : _timestamp(t), - _metaFlags(metaFlags), - _persistedDocumentSize(0), - _size(sizeof(DocEntry)), - _documentId(), - _document() +DocEntryWithTypeAndGid::DocEntryWithTypeAndGid(Timestamp t, DocumentMetaEnum metaEnum, vespalib::stringref docType, GlobalId gid) + : DocEntry(t, metaEnum, docType.size() + sizeof(gid)), + _type(docType), + _gid(gid) { } -DocEntry::~DocEntry() = default; - -DocEntry* -DocEntry::clone() const { - DocEntry* ret; - if (_documentId) { - ret = new DocEntry(_timestamp, _metaFlags, *_documentId); - ret->setPersistedDocumentSize(_persistedDocumentSize); - } else if (_document) { - ret = new DocEntry(_timestamp, _metaFlags, - std::make_unique<Document>(*_document), - _persistedDocumentSize); - } else { - ret = new DocEntry(_timestamp, _metaFlags); - ret->setPersistedDocumentSize(_persistedDocumentSize); - } - return ret; -} - -const DocumentId* -DocEntry::getDocumentId() const { - return (_document ? &_document->getId() : _documentId.get()); -} +DocEntryWithTypeAndGid::~DocEntryWithTypeAndGid() = default; +DocEntryWithId::~DocEntryWithId() = default; +DocEntryWithDoc::~DocEntryWithDoc() = default; -DocumentUP -DocEntry::releaseDocument() { - return std::move(_document); +vespalib::string +DocEntryWithId::toString() const +{ + std::ostringstream out; + out << "DocEntry(" << getTimestamp() << ", " << int(getMetaEnum()) << ", " << _documentId << ")"; + return out.str(); } -DocEntry::SizeType -DocEntry::getDocumentSize() const +vespalib::string +DocEntryWithTypeAndGid::toString() const { - assert(_size >= sizeof(DocEntry)); - return _size - sizeof(DocEntry); + std::ostringstream out; + out << "DocEntry(" << getTimestamp() << ", " << int(getMetaEnum()) << ", " << _type << ", " << _gid << ")"; + return out.str(); } vespalib::string -DocEntry::toString() const +DocEntryWithDoc::toString() const { std::ostringstream out; - out << "DocEntry(" << _timestamp << ", " << _metaFlags << ", "; - if (_documentId) { - out << *_documentId; - } else if (_document.get()) { + out << "DocEntry(" << getTimestamp() << ", " << int(getMetaEnum()) << ", "; + if (_document.get()) { out << "Doc(" << _document->getId() << ")"; } else { out << "metadata only"; @@ -96,53 +110,47 @@ DocEntry::toString() const return out.str(); } -std::ostream & -operator << (std::ostream & os, const DocEntry & r) { - return os << r.toString(); } -bool -DocEntry::operator==(const DocEntry& entry) const { - if (_timestamp != entry._timestamp) { - return false; - } - - if (_metaFlags != entry._metaFlags) { - return false; - } - - if (_documentId) { - if (!entry._documentId) { - return false; - } +DocEntry::UP +DocEntry::create(Timestamp t, DocumentMetaEnum metaEnum) { + return UP(new DocEntry(t, metaEnum)); +} +DocEntry::UP +DocEntry::create(Timestamp t, DocumentMetaEnum metaEnum, const DocumentId &docId) { + return std::make_unique<DocEntryWithId>(t, metaEnum, docId); +} +DocEntry::UP +DocEntry::create(Timestamp t, DocumentMetaEnum metaEnum, vespalib::stringref docType, GlobalId gid) { + return std::make_unique<DocEntryWithTypeAndGid>(t, metaEnum, docType, gid); +} +DocEntry::UP +DocEntry::create(Timestamp t, DocumentUP doc) { + return std::make_unique<DocEntryWithDoc>(t, std::move(doc)); +} +DocEntry::UP +DocEntry::create(Timestamp t, DocumentUP doc, SizeType serializedDocumentSize) { + return std::make_unique<DocEntryWithDoc>(t, std::move(doc), serializedDocumentSize); +} - if (*_documentId != *entry._documentId) { - return false; - } - } else { - if (entry._documentId) { - return false; - } - } +DocEntry::~DocEntry() = default; - if (_document) { - if (!entry._document) { - return false; - } +DocumentUP +DocEntry::releaseDocument() { + return {}; +} - if (*_document != *entry._document) { - return false; - } - } else { - if (entry._document) { - return false; - } - } - if (_persistedDocumentSize != entry._persistedDocumentSize) { - return false; - } +vespalib::string +DocEntry::toString() const +{ + std::ostringstream out; + out << "DocEntry(" << _timestamp << ", " << int(_metaEnum) << ", metadata only)"; + return out.str(); +} - return true; +std::ostream & +operator << (std::ostream & os, const DocEntry & r) { + return os << r.toString(); } } diff --git a/persistence/src/vespa/persistence/spi/docentry.h b/persistence/src/vespa/persistence/spi/docentry.h index 3374ef6c02d..9ad06b41e90 100644 --- a/persistence/src/vespa/persistence/spi/docentry.h +++ b/persistence/src/vespa/persistence/spi/docentry.h @@ -14,80 +14,59 @@ #pragma once #include <persistence/spi/types.h> +#include <vespa/document/base/globalid.h> namespace storage::spi { -enum DocumentMetaFlags { +enum class DocumentMetaEnum { NONE = 0x0, REMOVE_ENTRY = 0x1 }; class DocEntry { public: - typedef uint32_t SizeType; -private: - Timestamp _timestamp; - int _metaFlags; - SizeType _persistedDocumentSize; - SizeType _size; - DocumentIdUP _documentId; - DocumentUP _document; -public: + using SizeType = uint32_t; using UP = std::unique_ptr<DocEntry>; using SP = std::shared_ptr<DocEntry>; - DocEntry(Timestamp t, int metaFlags, DocumentUP doc); - - /** - * Constructor that can be used by providers that already know - * the serialized size of the document, so the potentially expensive - * call to getSerializedSize can be avoided. - */ - DocEntry(Timestamp t, int metaFlags, DocumentUP doc, size_t serializedDocumentSize); - DocEntry(Timestamp t, int metaFlags, const DocumentId& docId); - - DocEntry(Timestamp t, int metaFlags); - ~DocEntry(); - DocEntry* clone() const; - const Document* getDocument() const { return _document.get(); } - const DocumentId* getDocumentId() const; - DocumentUP releaseDocument(); - bool isRemove() const { return (_metaFlags & REMOVE_ENTRY); } + DocEntry(const DocEntry &) = delete; + DocEntry & operator=(const DocEntry &) = delete; + DocEntry(DocEntry &&) = delete; + DocEntry & operator=(DocEntry &&) = delete; + virtual ~DocEntry(); + bool isRemove() const { return (_metaEnum == DocumentMetaEnum::REMOVE_ENTRY); } Timestamp getTimestamp() const { return _timestamp; } - int getFlags() const { return _metaFlags; } - void setFlags(int flags) { _metaFlags = flags; } - /** - * @return In-memory size of this doc entry, including document instance. - * In essence: serialized size of document + sizeof(DocEntry). - */ - SizeType getSize() const { return _size; } + DocumentMetaEnum getMetaEnum() const { return _metaEnum; } /** * If entry contains a document, returns its serialized size. * If entry contains a document id, returns the serialized size of * the id alone. - * Otherwise (i.e. metadata only), returns zero. - */ - SizeType getDocumentSize() const; - /** - * Return size of document as it exists in persisted form. By default - * this will return the serialized size of the entry's document instance, - * but for persistence providers that are able to provide this information - * efficiently, this value can be set explicitly to provide better statistical - * tracking for e.g. visiting operations in the service layer. - * If explicitly set, this value shall be the size of the document _before_ - * any field filtering is performed. + * Otherwise (i.e. metadata only), returns sizeof(DocEntry). */ - SizeType getPersistedDocumentSize() const { return _persistedDocumentSize; } - /** - * Set persisted size of document. Optional. - * @see getPersistedDocumentSize - */ - void setPersistedDocumentSize(SizeType persistedDocumentSize) { - _persistedDocumentSize = persistedDocumentSize; - } + SizeType getSize() const { return _size; } - vespalib::string toString() const; - bool operator==(const DocEntry& entry) const; + virtual vespalib::string toString() const; + virtual const Document* getDocument() const { return nullptr; } + virtual const DocumentId* getDocumentId() const { return nullptr; } + virtual vespalib::stringref getDocumentType() const { return vespalib::stringref(); } + virtual GlobalId getGid() const { return GlobalId(); } + virtual DocumentUP releaseDocument(); + static UP create(Timestamp t, DocumentMetaEnum metaEnum); + static UP create(Timestamp t, DocumentMetaEnum metaEnum, const DocumentId &docId); + static UP create(Timestamp t, DocumentMetaEnum metaEnum, vespalib::stringref docType, GlobalId gid); + static UP create(Timestamp t, DocumentUP doc); + static UP create(Timestamp t, DocumentUP doc, SizeType serializedDocumentSize); +protected: + DocEntry(Timestamp t, DocumentMetaEnum metaEnum, SizeType size) + : _timestamp(t), + _metaEnum(metaEnum), + _size(size) + {} +private: + DocEntry(Timestamp t, DocumentMetaEnum metaEnum) : DocEntry(t, metaEnum, sizeof(DocEntry)) { } + Timestamp _timestamp; + DocumentMetaEnum _metaEnum; + SizeType _size; }; std::ostream & operator << (std::ostream & os, const DocEntry & r); diff --git a/persistence/src/vespa/persistence/spi/result.cpp b/persistence/src/vespa/persistence/spi/result.cpp index e458d58fe69..a728f93e60a 100644 --- a/persistence/src/vespa/persistence/spi/result.cpp +++ b/persistence/src/vespa/persistence/spi/result.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "result.h" +#include "docentry.h" #include <vespa/document/fieldvalue/document.h> #include <vespa/vespalib/stllike/asciistream.h> #include <ostream> @@ -48,6 +49,24 @@ GetResult::~GetResult() = default; BucketIdListResult::~BucketIdListResult() = default; IterateResult::~IterateResult() = default; +IterateResult::IterateResult(IterateResult &&) noexcept = default; +IterateResult & IterateResult::operator=(IterateResult &&) noexcept = default; + +IterateResult::IterateResult(ErrorType error, const vespalib::string& errorMessage) + : Result(error, errorMessage), + _completed(false) +{ } + + +IterateResult::IterateResult(List entries, bool completed) + : _completed(completed), + _entries(std::move(entries)) +{ } + +IterateResult::List +IterateResult::steal_entries() { + return std::move(_entries); +} } diff --git a/persistence/src/vespa/persistence/spi/result.h b/persistence/src/vespa/persistence/spi/result.h index c734a885b12..10c589307ba 100644 --- a/persistence/src/vespa/persistence/spi/result.h +++ b/persistence/src/vespa/persistence/spi/result.h @@ -3,11 +3,12 @@ #include "bucketinfo.h" #include "bucket.h" -#include "docentry.h" #include <vespa/document/bucket/bucketidlist.h> namespace storage::spi { +class DocEntry; + class Result { public: typedef std::unique_ptr<Result> UP; @@ -279,15 +280,12 @@ private: class IterateResult : public Result { public: - typedef std::vector<DocEntry::UP> List; + using List = std::vector<std::unique_ptr<DocEntry>>; /** * Constructor used when there was an error creating the iterator. */ - IterateResult(ErrorType error, const vespalib::string& errorMessage) - : Result(error, errorMessage), - _completed(false) - { } + IterateResult(ErrorType error, const vespalib::string& errorMessage); /** * Constructor used when the iteration was successful. @@ -296,24 +294,21 @@ public: * * @param completed Set to true if iteration has been completed. */ - IterateResult(List entries, bool completed) - : _completed(completed), - _entries(std::move(entries)) - { } + IterateResult(List entries, bool completed); IterateResult(const IterateResult &) = delete; - IterateResult(IterateResult &&rhs) noexcept = default; - IterateResult &operator=(IterateResult &&rhs) noexcept = default; + IterateResult(IterateResult &&rhs) noexcept; + IterateResult &operator=(IterateResult &&rhs) noexcept; ~IterateResult(); const List& getEntries() const { return _entries; } - List steal_entries() { return std::move(_entries); } + List steal_entries(); bool isCompleted() const { return _completed; } private: bool _completed; - std::vector<DocEntry::UP> _entries; + List _entries; }; } diff --git a/persistence/src/vespa/persistence/spi/test.cpp b/persistence/src/vespa/persistence/spi/test.cpp index 32381110630..58a8ce3fe52 100644 --- a/persistence/src/vespa/persistence/spi/test.cpp +++ b/persistence/src/vespa/persistence/spi/test.cpp @@ -1,7 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "test.h" +#include "docentry.h" #include <vespa/document/test/make_bucket_space.h> +#include <vespa/document/fieldvalue/document.h> using document::BucketId; using document::BucketSpace; @@ -9,9 +11,45 @@ using document::test::makeBucketSpace; namespace storage::spi::test { -Bucket makeSpiBucket(BucketId bucketId) +Bucket +makeSpiBucket(BucketId bucketId) { return Bucket(document::Bucket(makeBucketSpace(), bucketId)); } +std::unique_ptr<DocEntry> +cloneDocEntry(const DocEntry & e) { + std::unique_ptr<DocEntry> ret; + if (e.getDocument()) { + ret = DocEntry::create(e.getTimestamp(), std::make_unique<Document>(*e.getDocument()), e.getSize()); + } else if (e.getDocumentId()) { + ret = DocEntry::create(e.getTimestamp(), e.getMetaEnum(), *e.getDocumentId()); + } else { + ret = DocEntry::create(e.getTimestamp(), e.getMetaEnum()); + } + return ret; +} + +bool +equal(const DocEntry & a, const DocEntry & b) { + if (a.getTimestamp() != b.getTimestamp()) return false; + if (a.getMetaEnum() != b.getMetaEnum()) return false; + if (a.getSize() != b.getSize()) return false; + + if (a.getDocument()) { + if (!b.getDocument()) return false; + if (*a.getDocument() != *b.getDocument()) return false; + } else { + if (b.getDocument()) return false; + } + if (a.getDocumentId()) { + if (!b.getDocumentId()) return false; + if (*a.getDocumentId() != *b.getDocumentId()) return false; + } else { + if (b.getDocumentId()) return false; + } + + return true; +} + } diff --git a/persistence/src/vespa/persistence/spi/test.h b/persistence/src/vespa/persistence/spi/test.h index af7109ec80c..1660e5f14fd 100644 --- a/persistence/src/vespa/persistence/spi/test.h +++ b/persistence/src/vespa/persistence/spi/test.h @@ -3,11 +3,16 @@ #pragma once #include "bucket.h" +#include <memory> + +namespace storage::spi { class DocEntry; } namespace storage::spi::test { // Helper functions used by unit tests Bucket makeSpiBucket(document::BucketId bucketId); +std::unique_ptr<DocEntry> cloneDocEntry(const DocEntry & entry); +bool equal(const DocEntry & a, const DocEntry & b); } |