diff options
6 files changed, 102 insertions, 22 deletions
diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp index 863bde5c225..bbe563274ec 100644 --- a/persistence/src/tests/spi/clusterstatetest.cpp +++ b/persistence/src/tests/spi/clusterstatetest.cpp @@ -297,7 +297,7 @@ TEST(DocEntryTest, test_doctype_and_gid) { DocEntry::UP e = DocEntry::create(Timestamp(9), DocumentMetaEnum::NONE, "doc_type", GlobalId::parse("gid(0xc4cef118f9f9649222750be2)")); EXPECT_EQ(9, e->getTimestamp()); EXPECT_FALSE(e->isRemove()); - EXPECT_EQ(20, e->getSize()); + EXPECT_EQ(44, e->getSize()); EXPECT_EQ(nullptr, e->getDocument()); EXPECT_EQ(nullptr, e->getDocumentId()); EXPECT_EQ("doc_type", e->getDocumentType()); diff --git a/persistence/src/vespa/persistence/spi/docentry.cpp b/persistence/src/vespa/persistence/spi/docentry.cpp index 5077af568ac..6e92223cb6e 100644 --- a/persistence/src/vespa/persistence/spi/docentry.cpp +++ b/persistence/src/vespa/persistence/spi/docentry.cpp @@ -71,7 +71,7 @@ DocEntryWithId::DocEntryWithId(Timestamp t, DocumentMetaEnum metaEnum, const Doc { } DocEntryWithTypeAndGid::DocEntryWithTypeAndGid(Timestamp t, DocumentMetaEnum metaEnum, vespalib::stringref docType, GlobalId gid) - : DocEntry(t, metaEnum, docType.size() + sizeof(gid)), + : DocEntry(t, metaEnum, sizeof(DocEntry) + docType.size() + sizeof(gid)), _type(docType), _gid(gid) { } diff --git a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp index 48ce2015420..5f4f6d61c38 100644 --- a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp +++ b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp @@ -1,9 +1,11 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchcore/proton/common/attribute_updater.h> +#include <vespa/searchcore/proton/common/doctypename.h> #include <vespa/searchcore/proton/common/pendinglidtracker.h> #include <vespa/searchcore/proton/persistenceengine/document_iterator.h> #include <vespa/searchcore/proton/persistenceengine/commit_and_wait_document_retriever.h> +#include <vespa/searchcore/proton/persistenceengine/ipersistencehandler.h> #include <vespa/searchlib/attribute/attributecontext.h> #include <vespa/searchlib/attribute/attributefactory.h> #include <vespa/searchlib/test/mock_attribute_manager.h> @@ -30,6 +32,7 @@ using document::DocumentId; using document::DocumentType; using document::DoubleFieldValue; using document::Field; +using document::GlobalId; using document::IntFieldValue; using document::StringFieldValue; using search::AttributeContext; @@ -174,6 +177,40 @@ UnitDR::UnitDR(const document::DocumentType &dt, document::Document::UP d, Times {} UnitDR::~UnitDR() = default; +struct MockPersistenceHandler : IPersistenceHandler { + DocTypeName _doc_type_name; + + explicit MockPersistenceHandler(vespalib::stringref type_name) + : _doc_type_name(type_name) + { + } + ~MockPersistenceHandler() override = default; + + void initialize() override { abort(); } + void handlePut(FeedToken, const storage::spi::Bucket&, storage::spi::Timestamp, DocumentSP ) override { abort(); } + void handleUpdate(FeedToken, const storage::spi::Bucket&, + storage::spi::Timestamp, DocumentUpdateSP) override { abort(); } + void handleRemove(FeedToken, const storage::spi::Bucket&, + storage::spi::Timestamp, const document::DocumentId&) override { abort(); } + void handleRemoveByGid(FeedToken, const storage::spi::Bucket&, storage::spi::Timestamp, + vespalib::stringref, const document::GlobalId&) override { abort(); } + void handleListBuckets(IBucketIdListResultHandler&) override { abort(); } + void handleSetClusterState(const storage::spi::ClusterState&, IGenericResultHandler&) override { abort(); } + void handleSetActiveState(const storage::spi::Bucket&, storage::spi::BucketInfo::ActiveState, + std::shared_ptr<IGenericResultHandler>) override { abort(); } + void handleGetBucketInfo(const storage::spi::Bucket&, IBucketInfoResultHandler&) override { abort(); } + void handleCreateBucket(FeedToken, const storage::spi::Bucket&) override { abort(); }; + void handleDeleteBucket(FeedToken, const storage::spi::Bucket&) override { abort(); } + void handleGetModifiedBuckets(IBucketIdListResultHandler&) override { abort(); } + void handleSplit(FeedToken, const storage::spi::Bucket&, + const storage::spi::Bucket&, const storage::spi::Bucket&) override { abort(); } + void handleJoin(FeedToken, const storage::spi::Bucket&, const storage::spi::Bucket&, + const storage::spi::Bucket&) override { abort(); } + RetrieversSP getDocumentRetrievers(storage::spi::ReadConsistency) override { abort(); } + void handleListActiveBuckets(IBucketIdListResultHandler&) override { abort(); } + void handlePopulateActiveBuckets(document::BucketId::List, IGenericResultHandler&) override { abort(); } + const DocTypeName &doc_type_name() const noexcept override { return _doc_type_name; } +}; struct VisitRecordingUnitDR : UnitDR { using VisitedLIDs = std::unordered_set<DocumentIdT>; @@ -397,6 +434,15 @@ void checkEntry(const IterateResult &res, size_t idx, const Timestamp ×tamp EXPECT_EQUAL(sizeof(DocEntry), res.getEntries()[idx]->getSize()); } +void checkEntry(const IterateResult &res, size_t idx, const Timestamp ×tamp, DocumentMetaEnum flags, + const GlobalId &gid, vespalib::stringref doc_type_name) +{ + ASSERT_LESS(idx, res.getEntries().size()); + auto expect = DocEntry::create(timestamp, flags, doc_type_name, gid); + EXPECT_TRUE(equal(*expect, *res.getEntries()[idx])); + EXPECT_EQUAL(sizeof(DocEntry) + sizeof(GlobalId) + doc_type_name.size(), res.getEntries()[idx]->getSize()); +} + void checkEntry(const IterateResult &res, size_t idx, const DocumentId &id, const Timestamp ×tamp) { ASSERT_LESS(idx, res.getEntries().size()); @@ -415,6 +461,10 @@ void checkEntry(const IterateResult &res, size_t idx, const Document &doc, const EXPECT_GREATER(getSize(doc), 0u); } +GlobalId gid_of(vespalib::stringref id_str) { + return DocumentId(id_str).getGlobalId(); +} + TEST("require that custom retrievers work as expected") { DocumentId id1("id:ns:document::1"); DocumentId id2("id:ns:document::2"); @@ -605,15 +655,17 @@ TEST("require that iterating all versions returns both documents and removes") { TEST("require that using an empty field set returns meta-data only") { DocumentIterator itr(bucket(5), std::make_shared<document::NoFields>(), selectAll(), newestV(), -1, false); - itr.add(doc("id:ns:document::1", Timestamp(2), bucket(5))); - itr.add(cat(doc("id:ns:document::2", Timestamp(3), bucket(5)), - rem("id:ns:document::3", Timestamp(4), bucket(5)))); + MockPersistenceHandler foo_handler("foo"); + MockPersistenceHandler doc_handler("document"); + itr.add(&foo_handler, doc_with_fields("id:ns:foo::1", Timestamp(2), bucket(5))); + itr.add(&doc_handler, cat(doc("id:ns:document::2", Timestamp(3), bucket(5)), + rem("id:ns:document::3", Timestamp(4), bucket(5)))); IterateResult res = itr.iterate(largeNum); EXPECT_TRUE(res.isCompleted()); EXPECT_EQUAL(3u, res.getEntries().size()); - TEST_DO(checkEntry(res, 0, Timestamp(2), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res, 1, Timestamp(3), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res, 2, Timestamp(4), DocumentMetaEnum::REMOVE_ENTRY)); + TEST_DO(checkEntry(res, 0, Timestamp(2), DocumentMetaEnum::NONE, gid_of("id:ns:foo::1"), "foo")); + TEST_DO(checkEntry(res, 1, Timestamp(3), DocumentMetaEnum::NONE, gid_of("id:ns:document::2"), "document")); + TEST_DO(checkEntry(res, 2, Timestamp(4), DocumentMetaEnum::REMOVE_ENTRY, gid_of("id:ns:document::3"), "document")); } TEST("require that entries in other buckets are skipped") { @@ -656,12 +708,13 @@ TEST("require that maxBytes splits iteration results for meta-data only iteratio IterateResult res1 = itr.iterate(2 * sizeof(DocEntry)); EXPECT_TRUE(!res1.isCompleted()); EXPECT_EQUAL(2u, res1.getEntries().size()); - TEST_DO(checkEntry(res1, 0, Timestamp(2), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res1, 1, Timestamp(3), DocumentMetaEnum::REMOVE_ENTRY)); + // Note: empty doc types since we did not pass in a handler alongside the retrievers + TEST_DO(checkEntry(res1, 0, Timestamp(2), DocumentMetaEnum::NONE, gid_of("id:ns:document::1"), "")); + TEST_DO(checkEntry(res1, 1, Timestamp(3), DocumentMetaEnum::REMOVE_ENTRY, gid_of("id:ns:document::2"), "")); IterateResult res2 = itr.iterate(largeNum); EXPECT_TRUE(res2.isCompleted()); - TEST_DO(checkEntry(res2, 0, Timestamp(4), DocumentMetaEnum::NONE)); + TEST_DO(checkEntry(res2, 0, Timestamp(4), DocumentMetaEnum::NONE, gid_of("id:ns:document::3"), "")); IterateResult res3 = itr.iterate(largeNum); EXPECT_TRUE(res3.isCompleted()); diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp index 52ae32634a5..bfcb2724592 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp @@ -1,8 +1,10 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_iterator.h" +#include "ipersistencehandler.h" #include <vespa/persistence/spi/docentry.h> #include <vespa/searchcore/proton/common/cachedselect.h> +#include <vespa/searchcore/proton/common/doctypename.h> #include <vespa/searchcore/proton/common/selectcontext.h> #include <vespa/document/select/gid_filter.h> #include <vespa/document/select/node.h> @@ -18,7 +20,9 @@ using storage::spi::DocEntry; using storage::spi::Timestamp; using document::Document; using document::DocumentId; +using document::GlobalId; using storage::spi::DocumentMetaEnum; +using vespalib::stringref; namespace proton { @@ -30,6 +34,11 @@ createDocEntry(Timestamp timestamp, bool removed) { } std::unique_ptr<DocEntry> +createDocEntry(Timestamp timestamp, bool removed, stringref doc_type, const GlobalId &gid) { + return DocEntry::create(timestamp, (removed ? DocumentMetaEnum::REMOVE_ENTRY : DocumentMetaEnum::NONE), doc_type, gid); +} + +std::unique_ptr<DocEntry> createDocEntry(Timestamp timestamp, bool removed, Document::UP doc, ssize_t defaultSerializedSize) { if (doc) { if (removed) { @@ -92,17 +101,23 @@ DocumentIterator::DocumentIterator(const storage::spi::Bucket &bucket, DocumentIterator::~DocumentIterator() = default; void +DocumentIterator::add(const IPersistenceHandler *handler, IDocumentRetriever::SP retriever) +{ + _sources.emplace_back(handler, std::move(retriever)); +} + +void DocumentIterator::add(IDocumentRetriever::SP retriever) { - _sources.push_back(std::move(retriever)); + add(nullptr, std::move(retriever)); } IterateResult DocumentIterator::iterate(size_t maxBytes) { if ( ! _fetchedData ) { - for (const IDocumentRetriever::SP & source : _sources) { - fetchCompleteSource(*source, _list); + for (const auto & source : _sources) { + fetchCompleteSource(source.first, *source.second, _list); } _fetchedData = true; } @@ -235,7 +250,9 @@ private: } void -DocumentIterator::fetchCompleteSource(const IDocumentRetriever & source, IterateResult::List & list) +DocumentIterator::fetchCompleteSource(const IPersistenceHandler * handler, + const IDocumentRetriever & source, + IterateResult::List & list) { IDocumentRetriever::ReadGuard sourceReadGuard(source.getReadGuard()); search::DocumentMetaData::Vector metaData; @@ -266,10 +283,11 @@ DocumentIterator::fetchCompleteSource(const IDocumentRetriever & source, Iterate list.reserve(lidsToFetch.size()); if ( _metaOnly ) { + stringref doc_type = (handler ? stringref(handler->doc_type_name().getName()) : stringref()); for (uint32_t lid : lidsToFetch) { const search::DocumentMetaData & meta = metaData[lidIndexMap[lid]]; assert(lid == meta.lid); - list.push_back(createDocEntry(storage::spi::Timestamp(meta.timestamp), meta.removed)); + list.push_back(createDocEntry(storage::spi::Timestamp(meta.timestamp), meta.removed, doc_type, meta.gid)); } } else { MatchVisitor visitor(matcher, metaData, lidIndexMap, _fields.get(), list, _defaultSerializedSize); diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h index e307c249dc0..dd4891def45 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h @@ -12,10 +12,14 @@ namespace proton { +class IPersistenceHandler; + class DocumentIterator { private: using ReadConsistency = storage::spi::ReadConsistency; + using HandlerWithRetriever = std::pair<const IPersistenceHandler*, IDocumentRetriever::SP>; + const storage::spi::Bucket _bucket;; const storage::spi::Selection _selection; const storage::spi::IncludedVersions _versions; @@ -25,14 +29,16 @@ private: const bool _metaOnly; const bool _ignoreMaxBytes; bool _fetchedData; - std::vector<IDocumentRetriever::SP> _sources; + std::vector<HandlerWithRetriever> _sources; size_t _nextItem; storage::spi::IterateResult::List _list; - bool checkMeta(const search::DocumentMetaData &meta) const; - void fetchCompleteSource(const IDocumentRetriever & source, storage::spi::IterateResult::List & list); - bool isWeakRead() const { return _readConsistency == ReadConsistency::WEAK; } + [[nodiscard]] bool checkMeta(const search::DocumentMetaData &meta) const; + void fetchCompleteSource(const IPersistenceHandler * handler, + const IDocumentRetriever & source, + storage::spi::IterateResult::List & list); + [[nodiscard]] bool isWeakRead() const { return _readConsistency == ReadConsistency::WEAK; } public: DocumentIterator(const storage::spi::Bucket &bucket, document::FieldSet::SP fields, @@ -40,6 +46,7 @@ public: ssize_t defaultSerializedSize, bool ignoreMaxBytes, ReadConsistency readConsistency=ReadConsistency::STRONG); ~DocumentIterator(); + void add(const IPersistenceHandler *handler, IDocumentRetriever::SP retriever); void add(IDocumentRetriever::SP retriever); storage::spi::IterateResult iterate(size_t maxBytes); }; diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp index bf8915b2505..0bae2e70785 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp @@ -544,9 +544,11 @@ PersistenceEngine::createIterator(const Bucket &bucket, FieldSetSP fields, const auto entry = std::make_unique<IteratorEntry>(context.getReadConsistency(), bucket, std::move(fields), selection, versions, _defaultSerializedSize, _ignoreMaxBytes); for (; snap.handlers().valid(); snap.handlers().next()) { - IPersistenceHandler::RetrieversSP retrievers = snap.handlers().get()->getDocumentRetrievers(context.getReadConsistency()); + auto *handler = snap.handlers().get(); + IPersistenceHandler::RetrieversSP retrievers = handler->getDocumentRetrievers(context.getReadConsistency()); for (const auto & retriever : *retrievers) { - entry->it.add(retriever); + // Handler ptr validity and lifetime is maintained by handler snapshot owned by iterator + entry->it.add(handler, retriever); } } entry->handler_sequence = HandlerSnapshot::release(std::move(snap)); |