diff options
author | Tor Brede Vekterli <vekterli@vespa.ai> | 2023-11-07 16:17:01 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@vespa.ai> | 2023-11-08 15:05:46 +0000 |
commit | 7881facd0f0e312e9bafefccdf00301f1b74a0ff (patch) | |
tree | e4e088a0c30e32fc68805e82e2114e8ade9a2dd1 /searchcore | |
parent | e7b517e5705538cd90a72920c3edb0d36abb274e (diff) |
Include doc type name and GID in metadata iteration results
Document type is fetched from the associated `IPersistenceHandler`
on-demand; it is assumed the lifetime of the pointer must be valid
for the entire lifetime of the iterator itself, as the latter holds
a valid handler snapshot.
For simplicity, it's possible to _not_ pass in a handler, in which
case the doc type name will be implicitly empty.
Some expected `DocEntry` sizes have been adjusted, as we now
report the size of the document type and GID alongside the
base type size.
Diffstat (limited to 'searchcore')
4 files changed, 100 insertions, 20 deletions
diff --git a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp index 48ce2015420..5f4f6d61c38 100644 --- a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp +++ b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp @@ -1,9 +1,11 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchcore/proton/common/attribute_updater.h> +#include <vespa/searchcore/proton/common/doctypename.h> #include <vespa/searchcore/proton/common/pendinglidtracker.h> #include <vespa/searchcore/proton/persistenceengine/document_iterator.h> #include <vespa/searchcore/proton/persistenceengine/commit_and_wait_document_retriever.h> +#include <vespa/searchcore/proton/persistenceengine/ipersistencehandler.h> #include <vespa/searchlib/attribute/attributecontext.h> #include <vespa/searchlib/attribute/attributefactory.h> #include <vespa/searchlib/test/mock_attribute_manager.h> @@ -30,6 +32,7 @@ using document::DocumentId; using document::DocumentType; using document::DoubleFieldValue; using document::Field; +using document::GlobalId; using document::IntFieldValue; using document::StringFieldValue; using search::AttributeContext; @@ -174,6 +177,40 @@ UnitDR::UnitDR(const document::DocumentType &dt, document::Document::UP d, Times {} UnitDR::~UnitDR() = default; +struct MockPersistenceHandler : IPersistenceHandler { + DocTypeName _doc_type_name; + + explicit MockPersistenceHandler(vespalib::stringref type_name) + : _doc_type_name(type_name) + { + } + ~MockPersistenceHandler() override = default; + + void initialize() override { abort(); } + void handlePut(FeedToken, const storage::spi::Bucket&, storage::spi::Timestamp, DocumentSP ) override { abort(); } + void handleUpdate(FeedToken, const storage::spi::Bucket&, + storage::spi::Timestamp, DocumentUpdateSP) override { abort(); } + void handleRemove(FeedToken, const storage::spi::Bucket&, + storage::spi::Timestamp, const document::DocumentId&) override { abort(); } + void handleRemoveByGid(FeedToken, const storage::spi::Bucket&, storage::spi::Timestamp, + vespalib::stringref, const document::GlobalId&) override { abort(); } + void handleListBuckets(IBucketIdListResultHandler&) override { abort(); } + void handleSetClusterState(const storage::spi::ClusterState&, IGenericResultHandler&) override { abort(); } + void handleSetActiveState(const storage::spi::Bucket&, storage::spi::BucketInfo::ActiveState, + std::shared_ptr<IGenericResultHandler>) override { abort(); } + void handleGetBucketInfo(const storage::spi::Bucket&, IBucketInfoResultHandler&) override { abort(); } + void handleCreateBucket(FeedToken, const storage::spi::Bucket&) override { abort(); }; + void handleDeleteBucket(FeedToken, const storage::spi::Bucket&) override { abort(); } + void handleGetModifiedBuckets(IBucketIdListResultHandler&) override { abort(); } + void handleSplit(FeedToken, const storage::spi::Bucket&, + const storage::spi::Bucket&, const storage::spi::Bucket&) override { abort(); } + void handleJoin(FeedToken, const storage::spi::Bucket&, const storage::spi::Bucket&, + const storage::spi::Bucket&) override { abort(); } + RetrieversSP getDocumentRetrievers(storage::spi::ReadConsistency) override { abort(); } + void handleListActiveBuckets(IBucketIdListResultHandler&) override { abort(); } + void handlePopulateActiveBuckets(document::BucketId::List, IGenericResultHandler&) override { abort(); } + const DocTypeName &doc_type_name() const noexcept override { return _doc_type_name; } +}; struct VisitRecordingUnitDR : UnitDR { using VisitedLIDs = std::unordered_set<DocumentIdT>; @@ -397,6 +434,15 @@ void checkEntry(const IterateResult &res, size_t idx, const Timestamp ×tamp EXPECT_EQUAL(sizeof(DocEntry), res.getEntries()[idx]->getSize()); } +void checkEntry(const IterateResult &res, size_t idx, const Timestamp ×tamp, DocumentMetaEnum flags, + const GlobalId &gid, vespalib::stringref doc_type_name) +{ + ASSERT_LESS(idx, res.getEntries().size()); + auto expect = DocEntry::create(timestamp, flags, doc_type_name, gid); + EXPECT_TRUE(equal(*expect, *res.getEntries()[idx])); + EXPECT_EQUAL(sizeof(DocEntry) + sizeof(GlobalId) + doc_type_name.size(), res.getEntries()[idx]->getSize()); +} + void checkEntry(const IterateResult &res, size_t idx, const DocumentId &id, const Timestamp ×tamp) { ASSERT_LESS(idx, res.getEntries().size()); @@ -415,6 +461,10 @@ void checkEntry(const IterateResult &res, size_t idx, const Document &doc, const EXPECT_GREATER(getSize(doc), 0u); } +GlobalId gid_of(vespalib::stringref id_str) { + return DocumentId(id_str).getGlobalId(); +} + TEST("require that custom retrievers work as expected") { DocumentId id1("id:ns:document::1"); DocumentId id2("id:ns:document::2"); @@ -605,15 +655,17 @@ TEST("require that iterating all versions returns both documents and removes") { TEST("require that using an empty field set returns meta-data only") { DocumentIterator itr(bucket(5), std::make_shared<document::NoFields>(), selectAll(), newestV(), -1, false); - itr.add(doc("id:ns:document::1", Timestamp(2), bucket(5))); - itr.add(cat(doc("id:ns:document::2", Timestamp(3), bucket(5)), - rem("id:ns:document::3", Timestamp(4), bucket(5)))); + MockPersistenceHandler foo_handler("foo"); + MockPersistenceHandler doc_handler("document"); + itr.add(&foo_handler, doc_with_fields("id:ns:foo::1", Timestamp(2), bucket(5))); + itr.add(&doc_handler, cat(doc("id:ns:document::2", Timestamp(3), bucket(5)), + rem("id:ns:document::3", Timestamp(4), bucket(5)))); IterateResult res = itr.iterate(largeNum); EXPECT_TRUE(res.isCompleted()); EXPECT_EQUAL(3u, res.getEntries().size()); - TEST_DO(checkEntry(res, 0, Timestamp(2), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res, 1, Timestamp(3), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res, 2, Timestamp(4), DocumentMetaEnum::REMOVE_ENTRY)); + TEST_DO(checkEntry(res, 0, Timestamp(2), DocumentMetaEnum::NONE, gid_of("id:ns:foo::1"), "foo")); + TEST_DO(checkEntry(res, 1, Timestamp(3), DocumentMetaEnum::NONE, gid_of("id:ns:document::2"), "document")); + TEST_DO(checkEntry(res, 2, Timestamp(4), DocumentMetaEnum::REMOVE_ENTRY, gid_of("id:ns:document::3"), "document")); } TEST("require that entries in other buckets are skipped") { @@ -656,12 +708,13 @@ TEST("require that maxBytes splits iteration results for meta-data only iteratio IterateResult res1 = itr.iterate(2 * sizeof(DocEntry)); EXPECT_TRUE(!res1.isCompleted()); EXPECT_EQUAL(2u, res1.getEntries().size()); - TEST_DO(checkEntry(res1, 0, Timestamp(2), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res1, 1, Timestamp(3), DocumentMetaEnum::REMOVE_ENTRY)); + // Note: empty doc types since we did not pass in a handler alongside the retrievers + TEST_DO(checkEntry(res1, 0, Timestamp(2), DocumentMetaEnum::NONE, gid_of("id:ns:document::1"), "")); + TEST_DO(checkEntry(res1, 1, Timestamp(3), DocumentMetaEnum::REMOVE_ENTRY, gid_of("id:ns:document::2"), "")); IterateResult res2 = itr.iterate(largeNum); EXPECT_TRUE(res2.isCompleted()); - TEST_DO(checkEntry(res2, 0, Timestamp(4), DocumentMetaEnum::NONE)); + TEST_DO(checkEntry(res2, 0, Timestamp(4), DocumentMetaEnum::NONE, gid_of("id:ns:document::3"), "")); IterateResult res3 = itr.iterate(largeNum); EXPECT_TRUE(res3.isCompleted()); diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp index 52ae32634a5..bfcb2724592 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp @@ -1,8 +1,10 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_iterator.h" +#include "ipersistencehandler.h" #include <vespa/persistence/spi/docentry.h> #include <vespa/searchcore/proton/common/cachedselect.h> +#include <vespa/searchcore/proton/common/doctypename.h> #include <vespa/searchcore/proton/common/selectcontext.h> #include <vespa/document/select/gid_filter.h> #include <vespa/document/select/node.h> @@ -18,7 +20,9 @@ using storage::spi::DocEntry; using storage::spi::Timestamp; using document::Document; using document::DocumentId; +using document::GlobalId; using storage::spi::DocumentMetaEnum; +using vespalib::stringref; namespace proton { @@ -30,6 +34,11 @@ createDocEntry(Timestamp timestamp, bool removed) { } std::unique_ptr<DocEntry> +createDocEntry(Timestamp timestamp, bool removed, stringref doc_type, const GlobalId &gid) { + return DocEntry::create(timestamp, (removed ? DocumentMetaEnum::REMOVE_ENTRY : DocumentMetaEnum::NONE), doc_type, gid); +} + +std::unique_ptr<DocEntry> createDocEntry(Timestamp timestamp, bool removed, Document::UP doc, ssize_t defaultSerializedSize) { if (doc) { if (removed) { @@ -92,17 +101,23 @@ DocumentIterator::DocumentIterator(const storage::spi::Bucket &bucket, DocumentIterator::~DocumentIterator() = default; void +DocumentIterator::add(const IPersistenceHandler *handler, IDocumentRetriever::SP retriever) +{ + _sources.emplace_back(handler, std::move(retriever)); +} + +void DocumentIterator::add(IDocumentRetriever::SP retriever) { - _sources.push_back(std::move(retriever)); + add(nullptr, std::move(retriever)); } IterateResult DocumentIterator::iterate(size_t maxBytes) { if ( ! _fetchedData ) { - for (const IDocumentRetriever::SP & source : _sources) { - fetchCompleteSource(*source, _list); + for (const auto & source : _sources) { + fetchCompleteSource(source.first, *source.second, _list); } _fetchedData = true; } @@ -235,7 +250,9 @@ private: } void -DocumentIterator::fetchCompleteSource(const IDocumentRetriever & source, IterateResult::List & list) +DocumentIterator::fetchCompleteSource(const IPersistenceHandler * handler, + const IDocumentRetriever & source, + IterateResult::List & list) { IDocumentRetriever::ReadGuard sourceReadGuard(source.getReadGuard()); search::DocumentMetaData::Vector metaData; @@ -266,10 +283,11 @@ DocumentIterator::fetchCompleteSource(const IDocumentRetriever & source, Iterate list.reserve(lidsToFetch.size()); if ( _metaOnly ) { + stringref doc_type = (handler ? stringref(handler->doc_type_name().getName()) : stringref()); for (uint32_t lid : lidsToFetch) { const search::DocumentMetaData & meta = metaData[lidIndexMap[lid]]; assert(lid == meta.lid); - list.push_back(createDocEntry(storage::spi::Timestamp(meta.timestamp), meta.removed)); + list.push_back(createDocEntry(storage::spi::Timestamp(meta.timestamp), meta.removed, doc_type, meta.gid)); } } else { MatchVisitor visitor(matcher, metaData, lidIndexMap, _fields.get(), list, _defaultSerializedSize); diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h index e307c249dc0..dd4891def45 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h @@ -12,10 +12,14 @@ namespace proton { +class IPersistenceHandler; + class DocumentIterator { private: using ReadConsistency = storage::spi::ReadConsistency; + using HandlerWithRetriever = std::pair<const IPersistenceHandler*, IDocumentRetriever::SP>; + const storage::spi::Bucket _bucket;; const storage::spi::Selection _selection; const storage::spi::IncludedVersions _versions; @@ -25,14 +29,16 @@ private: const bool _metaOnly; const bool _ignoreMaxBytes; bool _fetchedData; - std::vector<IDocumentRetriever::SP> _sources; + std::vector<HandlerWithRetriever> _sources; size_t _nextItem; storage::spi::IterateResult::List _list; - bool checkMeta(const search::DocumentMetaData &meta) const; - void fetchCompleteSource(const IDocumentRetriever & source, storage::spi::IterateResult::List & list); - bool isWeakRead() const { return _readConsistency == ReadConsistency::WEAK; } + [[nodiscard]] bool checkMeta(const search::DocumentMetaData &meta) const; + void fetchCompleteSource(const IPersistenceHandler * handler, + const IDocumentRetriever & source, + storage::spi::IterateResult::List & list); + [[nodiscard]] bool isWeakRead() const { return _readConsistency == ReadConsistency::WEAK; } public: DocumentIterator(const storage::spi::Bucket &bucket, document::FieldSet::SP fields, @@ -40,6 +46,7 @@ public: ssize_t defaultSerializedSize, bool ignoreMaxBytes, ReadConsistency readConsistency=ReadConsistency::STRONG); ~DocumentIterator(); + void add(const IPersistenceHandler *handler, IDocumentRetriever::SP retriever); void add(IDocumentRetriever::SP retriever); storage::spi::IterateResult iterate(size_t maxBytes); }; diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp index bf8915b2505..0bae2e70785 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp @@ -544,9 +544,11 @@ PersistenceEngine::createIterator(const Bucket &bucket, FieldSetSP fields, const auto entry = std::make_unique<IteratorEntry>(context.getReadConsistency(), bucket, std::move(fields), selection, versions, _defaultSerializedSize, _ignoreMaxBytes); for (; snap.handlers().valid(); snap.handlers().next()) { - IPersistenceHandler::RetrieversSP retrievers = snap.handlers().get()->getDocumentRetrievers(context.getReadConsistency()); + auto *handler = snap.handlers().get(); + IPersistenceHandler::RetrieversSP retrievers = handler->getDocumentRetrievers(context.getReadConsistency()); for (const auto & retriever : *retrievers) { - entry->it.add(retriever); + // Handler ptr validity and lifetime is maintained by handler snapshot owned by iterator + entry->it.add(handler, retriever); } } entry->handler_sequence = HandlerSnapshot::release(std::move(snap)); |