diff options
author | Arnstein Ressem <aressem@gmail.com> | 2022-01-07 00:30:48 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-07 00:30:48 +0100 |
commit | 7152fe6fd7782fc453ce0a6cf87ea0cc4ed89c3b (patch) | |
tree | 7c804eea5aa92beaee1b1af8089baec5a80fe7eb /persistence | |
parent | 62b74775db404dd2f7e1c37efae427b0c5d352d9 (diff) |
Revert "Balder/refactor docentry"
Diffstat (limited to 'persistence')
11 files changed, 230 insertions, 320 deletions
diff --git a/persistence/src/tests/dummyimpl/dummypersistence_test.cpp b/persistence/src/tests/dummyimpl/dummypersistence_test.cpp index 3fa1a8a9b8d..21cf7cb9ae3 100644 --- a/persistence/src/tests/dummyimpl/dummypersistence_test.cpp +++ b/persistence/src/tests/dummyimpl/dummypersistence_test.cpp @@ -9,6 +9,7 @@ #include <vespa/vdslib/state/clusterstate.h> #include <vespa/config-stor-distribution.h> + using namespace storage::spi; using namespace storage; using document::test::makeBucketSpace; @@ -19,14 +20,14 @@ namespace { struct Fixture { BucketContent content; - void insert(DocumentId id, Timestamp timestamp, DocumentMetaEnum meta_flags) { - content.insert(DocEntry::create(timestamp, meta_flags, id)); + void insert(DocumentId id, Timestamp timestamp, int meta_flags) { + content.insert(DocEntry::UP(new DocEntry(timestamp, meta_flags, id))); } Fixture() { - insert(DocumentId("id:ns:type::test:3"), Timestamp(3), DocumentMetaEnum::NONE); - insert(DocumentId("id:ns:type::test:1"), Timestamp(1), DocumentMetaEnum::NONE); - insert(DocumentId("id:ns:type::test:2"), Timestamp(2), DocumentMetaEnum::NONE); + insert(DocumentId("id:ns:type::test:3"), Timestamp(3), NONE); + insert(DocumentId("id:ns:type::test:1"), Timestamp(1), NONE); + insert(DocumentId("id:ns:type::test:2"), Timestamp(2), NONE); } }; @@ -63,13 +64,13 @@ TEST_F("require that BucketContent can provide bucket info", Fixture) { uint32_t lastChecksum = 0; EXPECT_NOT_EQUAL(lastChecksum, f.content.getBucketInfo().getChecksum()); lastChecksum = f.content.getBucketInfo().getChecksum(); - f.insert(DocumentId("id:ns:type::test:3"), Timestamp(4), DocumentMetaEnum::NONE); + f.insert(DocumentId("id:ns:type::test:3"), Timestamp(4), NONE); EXPECT_NOT_EQUAL(lastChecksum, f.content.getBucketInfo().getChecksum()); lastChecksum = f.content.getBucketInfo().getChecksum(); - f.insert(DocumentId("id:ns:type::test:2"), Timestamp(5), DocumentMetaEnum::REMOVE_ENTRY); + f.insert(DocumentId("id:ns:type::test:2"), Timestamp(5), REMOVE_ENTRY); EXPECT_NOT_EQUAL(lastChecksum, f.content.getBucketInfo().getChecksum()); - f.insert(DocumentId("id:ns:type::test:1"), Timestamp(6), DocumentMetaEnum::REMOVE_ENTRY); - f.insert(DocumentId("id:ns:type::test:3"), Timestamp(7), DocumentMetaEnum::REMOVE_ENTRY); + f.insert(DocumentId("id:ns:type::test:1"), Timestamp(6), REMOVE_ENTRY); + f.insert(DocumentId("id:ns:type::test:3"), Timestamp(7), REMOVE_ENTRY); EXPECT_EQUAL(0u, f.content.getBucketInfo().getChecksum()); } diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp index 2186d408791..ac67903244f 100644 --- a/persistence/src/tests/spi/clusterstatetest.cpp +++ b/persistence/src/tests/spi/clusterstatetest.cpp @@ -5,12 +5,10 @@ #include <vespa/vdslib/distribution/distribution.h> #include <vespa/vdslib/state/clusterstate.h> #include <vespa/config-stor-distribution.h> -#include <vespa/document/base/testdocman.h> #include <gtest/gtest.h> using storage::spi::test::makeSpiBucket; using vespalib::Trinary; -using document::GlobalId; namespace storage::spi { @@ -262,57 +260,4 @@ TEST(ClusterStateTest, node_maintenance_state_is_set_independent_of_bucket_space EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0, false)); } -TEST(DocEntryTest, test_basics) { - EXPECT_EQ(24, sizeof(DocEntry)); -} - -TEST(DocEntryTest, test_meta_only) { - DocEntry::UP e = DocEntry::create(Timestamp(9), DocumentMetaEnum::NONE); - EXPECT_EQ(9, e->getTimestamp()); - EXPECT_FALSE(e->isRemove()); - EXPECT_EQ(24, e->getSize()); - EXPECT_EQ(nullptr, e->getDocument()); - EXPECT_EQ(nullptr, e->getDocumentId()); - EXPECT_EQ("", e->getDocumentType()); - EXPECT_EQ(GlobalId(), e->getGid()); - - DocEntry::UP r = DocEntry::create(Timestamp(666), DocumentMetaEnum::REMOVE_ENTRY); - EXPECT_EQ(666, r->getTimestamp()); - EXPECT_TRUE(r->isRemove()); -} - -TEST(DocEntryTest, test_docid_only) { - DocEntry::UP e = DocEntry::create(Timestamp(9), DocumentMetaEnum::NONE, DocumentId("id:test:test::1")); - EXPECT_EQ(9, e->getTimestamp()); - EXPECT_FALSE(e->isRemove()); - EXPECT_EQ(16, e->getSize()); - EXPECT_EQ(nullptr, e->getDocument()); - EXPECT_NE(nullptr, e->getDocumentId()); - EXPECT_EQ("test", e->getDocumentType()); - EXPECT_EQ(GlobalId::parse("gid(0xc4ca4238f9f9649222750be2)"), e->getGid()); -} - -TEST(DocEntryTest, test_doctype_and_gid) { - DocEntry::UP e = DocEntry::create(Timestamp(9), DocumentMetaEnum::NONE, "doc_type", GlobalId::parse("gid(0xc4cef118f9f9649222750be2)")); - EXPECT_EQ(9, e->getTimestamp()); - EXPECT_FALSE(e->isRemove()); - EXPECT_EQ(20, e->getSize()); - EXPECT_EQ(nullptr, e->getDocument()); - EXPECT_EQ(nullptr, e->getDocumentId()); - EXPECT_EQ("doc_type", e->getDocumentType()); - EXPECT_EQ(GlobalId::parse("gid(0xc4cef118f9f9649222750be2)"), e->getGid()); -} - -TEST(DocEntryTest, test_document_only) { - document::TestDocMan testDocMan; - DocEntry::UP e = DocEntry::create(Timestamp(9), testDocMan.createRandomDocument(0, 1000)); - EXPECT_EQ(9, e->getTimestamp()); - EXPECT_FALSE(e->isRemove()); - EXPECT_EQ(632, e->getSize()); - EXPECT_NE(nullptr, e->getDocument()); - EXPECT_NE(nullptr, e->getDocumentId()); - EXPECT_EQ("testdoctype1", e->getDocumentType()); - EXPECT_EQ(GlobalId::parse("gid(0x4bc7000087365609f22f1f4b)"), e->getGid()); -} - } diff --git a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp index 6afdd142457..810f2ad2356 100644 --- a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp +++ b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp @@ -5,7 +5,6 @@ #include <vespa/persistence/spi/test.h> #include <vespa/persistence/spi/catchresult.h> #include <vespa/persistence/spi/resource_usage_listener.h> -#include <vespa/persistence/spi/docentry.h> #include <vespa/document/fieldset/fieldsets.h> #include <vespa/document/update/documentupdate.h> #include <vespa/document/update/assignvalueupdate.h> @@ -26,17 +25,15 @@ using document::BucketId; using document::BucketSpace; using document::test::makeBucketSpace; using storage::spi::test::makeSpiBucket; -using storage::spi::test::cloneDocEntry; namespace storage::spi { using PersistenceProviderUP = std::unique_ptr<PersistenceProvider>; -using DocEntryList = std::vector<DocEntry::UP>; namespace { -std::unique_ptr<PersistenceProvider> -getSpi(ConformanceTest::PersistenceFactory &factory, const document::TestDocMan &testDocMan) { +std::unique_ptr<PersistenceProvider> getSpi(ConformanceTest::PersistenceFactory &factory, + const document::TestDocMan &testDocMan) { PersistenceProviderUP result(factory.getPersistenceImplementation( testDocMan.getTypeRepoSP(), *testDocMan.getTypeConfig())); EXPECT_TRUE(!result->initialize().hasError()); @@ -126,7 +123,7 @@ struct DocAndTimestamp */ struct Chunk { - DocEntryList _entries; + std::vector<DocEntry::UP> _entries; }; struct DocEntryIndirectTimestampComparator @@ -169,7 +166,7 @@ doIterate(PersistenceProvider& spi, } size_t -getRemoveEntryCount(const DocEntryList& entries) +getRemoveEntryCount(const std::vector<spi::DocEntry::UP>& entries) { size_t ret = 0; for (size_t i = 0; i < entries.size(); ++i) { @@ -180,13 +177,13 @@ getRemoveEntryCount(const DocEntryList& entries) return ret; } -DocEntryList +std::vector<DocEntry::UP> getEntriesFromChunks(const std::vector<Chunk>& chunks) { - DocEntryList ret; + std::vector<spi::DocEntry::UP> ret; for (size_t chunk = 0; chunk < chunks.size(); ++chunk) { for (size_t i = 0; i < chunks[chunk]._entries.size(); ++i) { - ret.push_back(cloneDocEntry(*chunks[chunk]._entries[i])); + ret.push_back(DocEntry::UP(chunks[chunk]._entries[i]->clone())); } } std::sort(ret.begin(), @@ -196,12 +193,12 @@ getEntriesFromChunks(const std::vector<Chunk>& chunks) } -DocEntryList +std::vector<DocEntry::UP> iterateBucket(PersistenceProvider& spi, const Bucket& bucket, IncludedVersions versions) { - DocEntryList ret; + std::vector<DocEntry::UP> ret; DocumentSelection docSel(""); Selection sel(docSel); @@ -220,7 +217,7 @@ iterateBucket(PersistenceProvider& spi, spi.iterate(iter.getIteratorId(), std::numeric_limits<int64_t>().max(), context); if (result.getErrorCode() != Result::ErrorType::NONE) { - return DocEntryList(); + return std::vector<DocEntry::UP>(); } auto list = result.steal_entries(); std::move(list.begin(), list.end(), std::back_inserter(ret)); @@ -241,7 +238,8 @@ verifyDocs(const std::vector<DocAndTimestamp>& wanted, const std::vector<Chunk>& chunks, const std::set<string>& removes = std::set<string>()) { - DocEntryList retrieved = getEntriesFromChunks(chunks); + std::vector<DocEntry::UP> retrieved( + getEntriesFromChunks(chunks)); size_t removeCount = getRemoveEntryCount(retrieved); // Ensure that we've got the correct number of puts and removes EXPECT_EQ(removes.size(), removeCount); @@ -259,13 +257,15 @@ verifyDocs(const std::vector<DocAndTimestamp>& wanted, } EXPECT_EQ(wanted[wantedIdx].timestamp, entry.getTimestamp()); size_t serSize = wanted[wantedIdx].doc->serialize().size(); - EXPECT_EQ(serSize, size_t(entry.getSize())); + EXPECT_EQ(serSize + sizeof(DocEntry), size_t(entry.getSize())); + EXPECT_EQ(serSize, size_t(entry.getDocumentSize())); ++wantedIdx; } else { // Remove-entry EXPECT_TRUE(entry.getDocumentId() != 0); size_t serSize = entry.getDocumentId()->getSerializedSize(); - EXPECT_EQ(serSize, size_t(entry.getSize())); + EXPECT_EQ(serSize + sizeof(DocEntry), size_t(entry.getSize())); + EXPECT_EQ(serSize, size_t(entry.getDocumentSize())); if (removes.find(entry.getDocumentId()->toString()) == removes.end()) { FAIL() << "Got unexpected remove entry for document id " << *entry.getDocumentId(); @@ -697,7 +697,8 @@ TEST_F(ConformanceTest, testPutDuplicate) EXPECT_EQ(1, (int)info.getDocumentCount()); EXPECT_EQ(checksum, info.getChecksum()); } - DocEntryList entries = iterateBucket(*spi, bucket, ALL_VERSIONS); + std::vector<DocEntry::UP> entries( + iterateBucket(*spi, bucket, ALL_VERSIONS)); EXPECT_EQ(size_t(1), entries.size()); } @@ -721,7 +722,8 @@ TEST_F(ConformanceTest, testRemove) EXPECT_EQ(1, (int)info.getDocumentCount()); EXPECT_TRUE(info.getChecksum() != 0); - DocEntryList entries = iterateBucket(*spi, bucket, NEWEST_DOCUMENT_ONLY); + std::vector<DocEntry::UP> entries( + iterateBucket(*spi, bucket, NEWEST_DOCUMENT_ONLY)); EXPECT_EQ(size_t(1), entries.size()); } @@ -739,11 +741,15 @@ TEST_F(ConformanceTest, testRemove) EXPECT_EQ(true, result2.wasFound()); } { - DocEntryList entries = iterateBucket(*spi, bucket,NEWEST_DOCUMENT_ONLY); + std::vector<DocEntry::UP> entries(iterateBucket(*spi, + bucket, + NEWEST_DOCUMENT_ONLY)); EXPECT_EQ(size_t(0), entries.size()); } { - DocEntryList entries = iterateBucket(*spi, bucket,NEWEST_DOCUMENT_OR_REMOVE); + std::vector<DocEntry::UP> entries(iterateBucket(*spi, + bucket, + NEWEST_DOCUMENT_OR_REMOVE)); EXPECT_EQ(size_t(1), entries.size()); } @@ -856,7 +862,8 @@ TEST_F(ConformanceTest, testRemoveMerge) // Remove entry should exist afterwards { - DocEntryList entries = iterateBucket(*spi, bucket, ALL_VERSIONS); + std::vector<DocEntry::UP> entries(iterateBucket( + *spi, bucket, ALL_VERSIONS)); EXPECT_EQ(size_t(2), entries.size()); // Timestamp-sorted by iterateBucket EXPECT_EQ(removeId, *entries.back()->getDocumentId()); @@ -882,7 +889,7 @@ TEST_F(ConformanceTest, testRemoveMerge) } // Must have new remove. We don't check for the presence of the old remove. { - DocEntryList entries = iterateBucket(*spi, bucket, ALL_VERSIONS); + std::vector<DocEntry::UP> entries(iterateBucket(*spi, bucket, ALL_VERSIONS)); EXPECT_TRUE(entries.size() >= 2); EXPECT_EQ(removeId, *entries.back()->getDocumentId()); EXPECT_EQ(Timestamp(11), entries.back()->getTimestamp()); @@ -908,7 +915,7 @@ TEST_F(ConformanceTest, testRemoveMerge) } // Must have newest remove. We don't check for the presence of the old remove. { - DocEntryList entries = iterateBucket(*spi, bucket, ALL_VERSIONS); + std::vector<DocEntry::UP> entries(iterateBucket(*spi, bucket, ALL_VERSIONS)); EXPECT_TRUE(entries.size() >= 2); EXPECT_EQ(removeId, *entries.back()->getDocumentId()); EXPECT_EQ(Timestamp(11), entries.back()->getTimestamp()); @@ -1344,7 +1351,7 @@ TEST_F(ConformanceTest, testIterateRemoves) CreateIteratorResult iter(createIterator(*spi, b, sel, NEWEST_DOCUMENT_OR_REMOVE)); std::vector<Chunk> chunks = doIterate(*spi, iter.getIteratorId(), 4_Ki); - DocEntryList entries = getEntriesFromChunks(chunks); + std::vector<DocEntry::UP> entries = getEntriesFromChunks(chunks); EXPECT_EQ(docs.size(), entries.size()); verifyDocs(nonRemovedDocs, chunks, removedDocs); diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp index d947ca51f49..9d9f31b63a3 100644 --- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp +++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp @@ -9,7 +9,6 @@ #include <vespa/persistence/spi/i_resource_usage_listener.h> #include <vespa/persistence/spi/resource_usage.h> #include <vespa/persistence/spi/bucketexecutor.h> -#include <vespa/persistence/spi/test.h> #include <vespa/vespalib/util/crc.h> #include <vespa/document/fieldset/fieldsetrepo.h> #include <vespa/vespalib/stllike/asciistream.h> @@ -25,9 +24,6 @@ using vespalib::make_string; using std::binary_search; using std::lower_bound; using document::FixedBucketSpaces; -using document::FieldSet; -using storage::spi::test::cloneDocEntry; -using storage::spi::test::equal; namespace storage::spi::dummy { @@ -166,7 +162,7 @@ BucketContent::insert(DocEntry::SP e) { auto it = lower_bound(_entries.begin(), _entries.end(), e->getTimestamp(), TimestampLess()); if (it != _entries.end()) { if (it->entry->getTimestamp() == e->getTimestamp()) { - if (equal(*it->entry, *e)) { + if (*it->entry.get() == *e) { LOG(debug, "Ignoring duplicate put entry %s", e->toString().c_str()); return; } else { @@ -435,7 +431,7 @@ DummyPersistence::putAsync(const Bucket& b, Timestamp t, Document::SP doc, Conte } } else { LOG(spam, "Inserting document %s", doc->toString(true).c_str()); - auto entry = DocEntry::create(t, Document::UP(doc->clone())); + auto entry = std::make_unique<DocEntry>(t, NONE, Document::UP(doc->clone())); (*bc)->insert(std::move(entry)); bc.reset(); onComplete->onComplete(std::make_unique<Result>()); @@ -493,7 +489,7 @@ DummyPersistence::removeAsync(const Bucket& b, std::vector<TimeStampAndDocumentI } DocEntry::SP entry((*bc)->getEntry(id)); numRemoves += (entry.get() && !entry->isRemove()) ? 1 : 0; - auto remEntry = DocEntry::create(t, DocumentMetaEnum::REMOVE_ENTRY, id); + auto remEntry = std::make_unique<DocEntry>(t, REMOVE_ENTRY, id); if ((*bc)->hasTimestamp(t)) { (*bc)->eraseEntry(t); @@ -505,7 +501,7 @@ DummyPersistence::removeAsync(const Bucket& b, std::vector<TimeStampAndDocumentI } GetResult -DummyPersistence::get(const Bucket& b, const FieldSet& fieldSet, const DocumentId& did, Context&) const +DummyPersistence::get(const Bucket& b, const document::FieldSet& fieldSet, const DocumentId& did, Context&) const { DUMMYPERSISTENCE_VERIFY_INITIALIZED; LOG(debug, "get(%s, %s)", @@ -522,8 +518,8 @@ DummyPersistence::get(const Bucket& b, const FieldSet& fieldSet, const DocumentI return GetResult::make_for_tombstone(entry->getTimestamp()); } else { Document::UP doc(entry->getDocument()->clone()); - if (fieldSet.getType() != FieldSet::Type::ALL) { - FieldSet::stripFields(*doc, fieldSet); + if (fieldSet.getType() != document::FieldSet::Type::ALL) { + document::FieldSet::stripFields(*doc, fieldSet); } return GetResult(std::move(doc), entry->getTimestamp()); } @@ -653,16 +649,22 @@ DummyPersistence::iterate(IteratorId id, uint64_t maxByteSize, Context& ctx) con if (currentSize != 0 && currentSize + size > maxByteSize) break; currentSize += size; if (!entry->isRemove() - && it->_fieldSet->getType() != FieldSet::Type::ALL) + && it->_fieldSet->getType() != document::FieldSet::Type::ALL) { assert(entry->getDocument()); // Create new document with only wanted fields. - Document::UP filtered(FieldSet::createDocumentSubsetCopy(*entry->getDocument(), *it->_fieldSet)); - auto ret = DocEntry::create(entry->getTimestamp(), std::move(filtered), entry->getSize()); + Document::UP filtered( + document::FieldSet::createDocumentSubsetCopy( + *entry->getDocument(), + *it->_fieldSet)); + DocEntry::UP ret(new DocEntry(entry->getTimestamp(), + entry->getFlags(), + std::move(filtered), + entry->getPersistedDocumentSize())); entries.push_back(std::move(ret)); } else { // Use entry as-is. - entries.push_back(cloneDocEntry(*entry)); + entries.push_back(DocEntry::UP(entry->clone())); ++fastPath; } } diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h index 3b6fc9f1449..a7a784d0479 100644 --- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h +++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h @@ -9,7 +9,6 @@ #pragma once #include <vespa/persistence/spi/abstractpersistenceprovider.h> -#include <vespa/persistence/spi/docentry.h> #include <vespa/document/base/globalid.h> #include <vespa/document/fieldset/fieldsets.h> #include <vespa/vespalib/stllike/hash_map.h> diff --git a/persistence/src/vespa/persistence/spi/docentry.cpp b/persistence/src/vespa/persistence/spi/docentry.cpp index f0329e8cc5e..8669bbf666b 100644 --- a/persistence/src/vespa/persistence/spi/docentry.cpp +++ b/persistence/src/vespa/persistence/spi/docentry.cpp @@ -4,104 +4,90 @@ #include <vespa/document/fieldvalue/document.h> #include <vespa/vespalib/objects/nbostream.h> #include <sstream> +#include <cassert> namespace storage::spi { -namespace { - -class DocEntryWithId final : public DocEntry { -public: - DocEntryWithId(Timestamp t, DocumentMetaEnum metaEnum, const DocumentId &docId); - ~DocEntryWithId(); - vespalib::string toString() const override; - const DocumentId* getDocumentId() const override { return & _documentId; } - vespalib::stringref getDocumentType() const override { return _documentId.getDocType(); } - GlobalId getGid() const override { return _documentId.getGlobalId(); } -private: - DocumentId _documentId; -}; - -class DocEntryWithTypeAndGid final : public DocEntry { -public: - DocEntryWithTypeAndGid(Timestamp t, DocumentMetaEnum metaEnum, vespalib::stringref docType, GlobalId gid); - ~DocEntryWithTypeAndGid(); - vespalib::string toString() const override; - vespalib::stringref getDocumentType() const override { return _type; } - GlobalId getGid() const override { return _gid; } -private: - vespalib::string _type; - GlobalId _gid; -}; - -class DocEntryWithDoc final : public DocEntry { -public: - DocEntryWithDoc(Timestamp t, DocumentUP doc); - - /** - * Constructor that can be used by providers that already know - * the serialized size of the document, so the potentially expensive - * call to getSerializedSize can be avoided. This value shall be the size of the document _before_ - * any field filtering is performed. - */ - DocEntryWithDoc(Timestamp t, DocumentUP doc, size_t serializedDocumentSize); - ~DocEntryWithDoc(); - vespalib::string toString() const override; - const Document* getDocument() const override { return _document.get(); } - const DocumentId* getDocumentId() const override { return &_document->getId(); } - DocumentUP releaseDocument() override { return std::move(_document); } - vespalib::stringref getDocumentType() const override { return _document->getId().getDocType(); } - GlobalId getGid() const override { return _document->getId().getGlobalId(); } -private: - DocumentUP _document; -}; - -DocEntryWithDoc::DocEntryWithDoc(Timestamp t, DocumentUP doc) - : DocEntry(t, DocumentMetaEnum::NONE, doc->serialize().size()), +DocEntry::DocEntry(Timestamp t, int metaFlags, DocumentUP doc) + : _timestamp(t), + _metaFlags(metaFlags), + _persistedDocumentSize(doc->serialize().size()), + _size(_persistedDocumentSize + sizeof(DocEntry)), + _documentId(), _document(std::move(doc)) { } -DocEntryWithDoc::DocEntryWithDoc(Timestamp t, DocumentUP doc, size_t serializedDocumentSize) - : DocEntry(t, DocumentMetaEnum::NONE, serializedDocumentSize), +DocEntry::DocEntry(Timestamp t, int metaFlags, DocumentUP doc, size_t serializedDocumentSize) + : _timestamp(t), + _metaFlags(metaFlags), + _persistedDocumentSize(serializedDocumentSize), + _size(_persistedDocumentSize + sizeof(DocEntry)), + _documentId(), _document(std::move(doc)) { } -DocEntryWithId::DocEntryWithId(Timestamp t, DocumentMetaEnum metaEnum, const DocumentId& docId) - : DocEntry(t, metaEnum, docId.getSerializedSize()), - _documentId(docId) +DocEntry::DocEntry(Timestamp t, int metaFlags, const DocumentId& docId) + : _timestamp(t), + _metaFlags(metaFlags), + _persistedDocumentSize(docId.getSerializedSize()), + _size(_persistedDocumentSize + sizeof(DocEntry)), + _documentId(new DocumentId(docId)), + _document() { } -DocEntryWithTypeAndGid::DocEntryWithTypeAndGid(Timestamp t, DocumentMetaEnum metaEnum, vespalib::stringref docType, GlobalId gid) - : DocEntry(t, metaEnum, docType.size() + sizeof(gid)), - _type(docType), - _gid(gid) +DocEntry::DocEntry(Timestamp t, int metaFlags) + : _timestamp(t), + _metaFlags(metaFlags), + _persistedDocumentSize(0), + _size(sizeof(DocEntry)), + _documentId(), + _document() { } -DocEntryWithTypeAndGid::~DocEntryWithTypeAndGid() = default; -DocEntryWithId::~DocEntryWithId() = default; -DocEntryWithDoc::~DocEntryWithDoc() = default; +DocEntry::~DocEntry() = default; -vespalib::string -DocEntryWithId::toString() const -{ - std::ostringstream out; - out << "DocEntry(" << getTimestamp() << ", " << int(getMetaEnum()) << ", " << _documentId << ")"; - return out.str(); +DocEntry* +DocEntry::clone() const { + DocEntry* ret; + if (_documentId) { + ret = new DocEntry(_timestamp, _metaFlags, *_documentId); + ret->setPersistedDocumentSize(_persistedDocumentSize); + } else if (_document) { + ret = new DocEntry(_timestamp, _metaFlags, + std::make_unique<Document>(*_document), + _persistedDocumentSize); + } else { + ret = new DocEntry(_timestamp, _metaFlags); + ret->setPersistedDocumentSize(_persistedDocumentSize); + } + return ret; } -vespalib::string -DocEntryWithTypeAndGid::toString() const +const DocumentId* +DocEntry::getDocumentId() const { + return (_document ? &_document->getId() : _documentId.get()); +} + +DocumentUP +DocEntry::releaseDocument() { + return std::move(_document); +} + +DocEntry::SizeType +DocEntry::getDocumentSize() const { - std::ostringstream out; - out << "DocEntry(" << getTimestamp() << ", " << int(getMetaEnum()) << ", " << _type << ", " << _gid << ")"; - return out.str(); + assert(_size >= sizeof(DocEntry)); + return _size - sizeof(DocEntry); } vespalib::string -DocEntryWithDoc::toString() const +DocEntry::toString() const { std::ostringstream out; - out << "DocEntry(" << getTimestamp() << ", " << int(getMetaEnum()) << ", "; - if (_document.get()) { + out << "DocEntry(" << _timestamp << ", " << _metaFlags << ", "; + if (_documentId) { + out << *_documentId; + } else if (_document.get()) { out << "Doc(" << _document->getId() << ")"; } else { out << "metadata only"; @@ -110,47 +96,53 @@ DocEntryWithDoc::toString() const return out.str(); } +std::ostream & +operator << (std::ostream & os, const DocEntry & r) { + return os << r.toString(); } -DocEntry::UP -DocEntry::create(Timestamp t, DocumentMetaEnum metaEnum) { - return UP(new DocEntry(t, metaEnum)); -} -DocEntry::UP -DocEntry::create(Timestamp t, DocumentMetaEnum metaEnum, const DocumentId &docId) { - return std::make_unique<DocEntryWithId>(t, metaEnum, docId); -} -DocEntry::UP -DocEntry::create(Timestamp t, DocumentMetaEnum metaEnum, vespalib::stringref docType, GlobalId gid) { - return std::make_unique<DocEntryWithTypeAndGid>(t, metaEnum, docType, gid); -} -DocEntry::UP -DocEntry::create(Timestamp t, DocumentUP doc) { - return std::make_unique<DocEntryWithDoc>(t, std::move(doc)); -} -DocEntry::UP -DocEntry::create(Timestamp t, DocumentUP doc, SizeType serializedDocumentSize) { - return std::make_unique<DocEntryWithDoc>(t, std::move(doc), serializedDocumentSize); -} +bool +DocEntry::operator==(const DocEntry& entry) const { + if (_timestamp != entry._timestamp) { + return false; + } -DocEntry::~DocEntry() = default; + if (_metaFlags != entry._metaFlags) { + return false; + } -DocumentUP -DocEntry::releaseDocument() { - return {}; -} + if (_documentId) { + if (!entry._documentId) { + return false; + } -vespalib::string -DocEntry::toString() const -{ - std::ostringstream out; - out << "DocEntry(" << _timestamp << ", " << int(_metaEnum) << ", metadata only)"; - return out.str(); -} + if (*_documentId != *entry._documentId) { + return false; + } + } else { + if (entry._documentId) { + return false; + } + } -std::ostream & -operator << (std::ostream & os, const DocEntry & r) { - return os << r.toString(); + if (_document) { + if (!entry._document) { + return false; + } + + if (*_document != *entry._document) { + return false; + } + } else { + if (entry._document) { + return false; + } + } + if (_persistedDocumentSize != entry._persistedDocumentSize) { + return false; + } + + return true; } } diff --git a/persistence/src/vespa/persistence/spi/docentry.h b/persistence/src/vespa/persistence/spi/docentry.h index 9ad06b41e90..3374ef6c02d 100644 --- a/persistence/src/vespa/persistence/spi/docentry.h +++ b/persistence/src/vespa/persistence/spi/docentry.h @@ -14,59 +14,80 @@ #pragma once #include <persistence/spi/types.h> -#include <vespa/document/base/globalid.h> namespace storage::spi { -enum class DocumentMetaEnum { +enum DocumentMetaFlags { NONE = 0x0, REMOVE_ENTRY = 0x1 }; class DocEntry { public: - using SizeType = uint32_t; + typedef uint32_t SizeType; +private: + Timestamp _timestamp; + int _metaFlags; + SizeType _persistedDocumentSize; + SizeType _size; + DocumentIdUP _documentId; + DocumentUP _document; +public: using UP = std::unique_ptr<DocEntry>; using SP = std::shared_ptr<DocEntry>; - DocEntry(const DocEntry &) = delete; - DocEntry & operator=(const DocEntry &) = delete; - DocEntry(DocEntry &&) = delete; - DocEntry & operator=(DocEntry &&) = delete; - virtual ~DocEntry(); - bool isRemove() const { return (_metaEnum == DocumentMetaEnum::REMOVE_ENTRY); } + DocEntry(Timestamp t, int metaFlags, DocumentUP doc); + + /** + * Constructor that can be used by providers that already know + * the serialized size of the document, so the potentially expensive + * call to getSerializedSize can be avoided. + */ + DocEntry(Timestamp t, int metaFlags, DocumentUP doc, size_t serializedDocumentSize); + DocEntry(Timestamp t, int metaFlags, const DocumentId& docId); + + DocEntry(Timestamp t, int metaFlags); + ~DocEntry(); + DocEntry* clone() const; + const Document* getDocument() const { return _document.get(); } + const DocumentId* getDocumentId() const; + DocumentUP releaseDocument(); + bool isRemove() const { return (_metaFlags & REMOVE_ENTRY); } Timestamp getTimestamp() const { return _timestamp; } - DocumentMetaEnum getMetaEnum() const { return _metaEnum; } + int getFlags() const { return _metaFlags; } + void setFlags(int flags) { _metaFlags = flags; } + /** + * @return In-memory size of this doc entry, including document instance. + * In essence: serialized size of document + sizeof(DocEntry). + */ + SizeType getSize() const { return _size; } /** * If entry contains a document, returns its serialized size. * If entry contains a document id, returns the serialized size of * the id alone. - * Otherwise (i.e. metadata only), returns sizeof(DocEntry). + * Otherwise (i.e. metadata only), returns zero. */ - SizeType getSize() const { return _size; } + SizeType getDocumentSize() const; + /** + * Return size of document as it exists in persisted form. By default + * this will return the serialized size of the entry's document instance, + * but for persistence providers that are able to provide this information + * efficiently, this value can be set explicitly to provide better statistical + * tracking for e.g. visiting operations in the service layer. + * If explicitly set, this value shall be the size of the document _before_ + * any field filtering is performed. + */ + SizeType getPersistedDocumentSize() const { return _persistedDocumentSize; } + /** + * Set persisted size of document. Optional. + * @see getPersistedDocumentSize + */ + void setPersistedDocumentSize(SizeType persistedDocumentSize) { + _persistedDocumentSize = persistedDocumentSize; + } - virtual vespalib::string toString() const; - virtual const Document* getDocument() const { return nullptr; } - virtual const DocumentId* getDocumentId() const { return nullptr; } - virtual vespalib::stringref getDocumentType() const { return vespalib::stringref(); } - virtual GlobalId getGid() const { return GlobalId(); } - virtual DocumentUP releaseDocument(); - static UP create(Timestamp t, DocumentMetaEnum metaEnum); - static UP create(Timestamp t, DocumentMetaEnum metaEnum, const DocumentId &docId); - static UP create(Timestamp t, DocumentMetaEnum metaEnum, vespalib::stringref docType, GlobalId gid); - static UP create(Timestamp t, DocumentUP doc); - static UP create(Timestamp t, DocumentUP doc, SizeType serializedDocumentSize); -protected: - DocEntry(Timestamp t, DocumentMetaEnum metaEnum, SizeType size) - : _timestamp(t), - _metaEnum(metaEnum), - _size(size) - {} -private: - DocEntry(Timestamp t, DocumentMetaEnum metaEnum) : DocEntry(t, metaEnum, sizeof(DocEntry)) { } - Timestamp _timestamp; - DocumentMetaEnum _metaEnum; - SizeType _size; + vespalib::string toString() const; + bool operator==(const DocEntry& entry) const; }; std::ostream & operator << (std::ostream & os, const DocEntry & r); diff --git a/persistence/src/vespa/persistence/spi/result.cpp b/persistence/src/vespa/persistence/spi/result.cpp index a728f93e60a..e458d58fe69 100644 --- a/persistence/src/vespa/persistence/spi/result.cpp +++ b/persistence/src/vespa/persistence/spi/result.cpp @@ -1,7 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "result.h" -#include "docentry.h" #include <vespa/document/fieldvalue/document.h> #include <vespa/vespalib/stllike/asciistream.h> #include <ostream> @@ -49,24 +48,6 @@ GetResult::~GetResult() = default; BucketIdListResult::~BucketIdListResult() = default; IterateResult::~IterateResult() = default; -IterateResult::IterateResult(IterateResult &&) noexcept = default; -IterateResult & IterateResult::operator=(IterateResult &&) noexcept = default; - -IterateResult::IterateResult(ErrorType error, const vespalib::string& errorMessage) - : Result(error, errorMessage), - _completed(false) -{ } - - -IterateResult::IterateResult(List entries, bool completed) - : _completed(completed), - _entries(std::move(entries)) -{ } - -IterateResult::List -IterateResult::steal_entries() { - return std::move(_entries); -} } diff --git a/persistence/src/vespa/persistence/spi/result.h b/persistence/src/vespa/persistence/spi/result.h index 10c589307ba..c734a885b12 100644 --- a/persistence/src/vespa/persistence/spi/result.h +++ b/persistence/src/vespa/persistence/spi/result.h @@ -3,12 +3,11 @@ #include "bucketinfo.h" #include "bucket.h" +#include "docentry.h" #include <vespa/document/bucket/bucketidlist.h> namespace storage::spi { -class DocEntry; - class Result { public: typedef std::unique_ptr<Result> UP; @@ -280,12 +279,15 @@ private: class IterateResult : public Result { public: - using List = std::vector<std::unique_ptr<DocEntry>>; + typedef std::vector<DocEntry::UP> List; /** * Constructor used when there was an error creating the iterator. */ - IterateResult(ErrorType error, const vespalib::string& errorMessage); + IterateResult(ErrorType error, const vespalib::string& errorMessage) + : Result(error, errorMessage), + _completed(false) + { } /** * Constructor used when the iteration was successful. @@ -294,21 +296,24 @@ public: * * @param completed Set to true if iteration has been completed. */ - IterateResult(List entries, bool completed); + IterateResult(List entries, bool completed) + : _completed(completed), + _entries(std::move(entries)) + { } IterateResult(const IterateResult &) = delete; - IterateResult(IterateResult &&rhs) noexcept; - IterateResult &operator=(IterateResult &&rhs) noexcept; + IterateResult(IterateResult &&rhs) noexcept = default; + IterateResult &operator=(IterateResult &&rhs) noexcept = default; ~IterateResult(); const List& getEntries() const { return _entries; } - List steal_entries(); + List steal_entries() { return std::move(_entries); } bool isCompleted() const { return _completed; } private: bool _completed; - List _entries; + std::vector<DocEntry::UP> _entries; }; } diff --git a/persistence/src/vespa/persistence/spi/test.cpp b/persistence/src/vespa/persistence/spi/test.cpp index 58a8ce3fe52..32381110630 100644 --- a/persistence/src/vespa/persistence/spi/test.cpp +++ b/persistence/src/vespa/persistence/spi/test.cpp @@ -1,9 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "test.h" -#include "docentry.h" #include <vespa/document/test/make_bucket_space.h> -#include <vespa/document/fieldvalue/document.h> using document::BucketId; using document::BucketSpace; @@ -11,45 +9,9 @@ using document::test::makeBucketSpace; namespace storage::spi::test { -Bucket -makeSpiBucket(BucketId bucketId) +Bucket makeSpiBucket(BucketId bucketId) { return Bucket(document::Bucket(makeBucketSpace(), bucketId)); } -std::unique_ptr<DocEntry> -cloneDocEntry(const DocEntry & e) { - std::unique_ptr<DocEntry> ret; - if (e.getDocument()) { - ret = DocEntry::create(e.getTimestamp(), std::make_unique<Document>(*e.getDocument()), e.getSize()); - } else if (e.getDocumentId()) { - ret = DocEntry::create(e.getTimestamp(), e.getMetaEnum(), *e.getDocumentId()); - } else { - ret = DocEntry::create(e.getTimestamp(), e.getMetaEnum()); - } - return ret; -} - -bool -equal(const DocEntry & a, const DocEntry & b) { - if (a.getTimestamp() != b.getTimestamp()) return false; - if (a.getMetaEnum() != b.getMetaEnum()) return false; - if (a.getSize() != b.getSize()) return false; - - if (a.getDocument()) { - if (!b.getDocument()) return false; - if (*a.getDocument() != *b.getDocument()) return false; - } else { - if (b.getDocument()) return false; - } - if (a.getDocumentId()) { - if (!b.getDocumentId()) return false; - if (*a.getDocumentId() != *b.getDocumentId()) return false; - } else { - if (b.getDocumentId()) return false; - } - - return true; -} - } diff --git a/persistence/src/vespa/persistence/spi/test.h b/persistence/src/vespa/persistence/spi/test.h index 1660e5f14fd..af7109ec80c 100644 --- a/persistence/src/vespa/persistence/spi/test.h +++ b/persistence/src/vespa/persistence/spi/test.h @@ -3,16 +3,11 @@ #pragma once #include "bucket.h" -#include <memory> - -namespace storage::spi { class DocEntry; } namespace storage::spi::test { // Helper functions used by unit tests Bucket makeSpiBucket(document::BucketId bucketId); -std::unique_ptr<DocEntry> cloneDocEntry(const DocEntry & entry); -bool equal(const DocEntry & a, const DocEntry & b); } |