diff options
author | Tor Brede Vekterli <vekterli@vespa.ai> | 2023-11-09 10:24:04 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-09 10:24:04 +0100 |
commit | d1a69ad4cf19eae5efb7ff5ba3854d33551221bc (patch) | |
tree | 38b3c12d7d4ebeb403e70b04f4d610d7c4f7342f /searchcore | |
parent | e2c2ddf4547aac8c42d3a8684b7d26b5517401db (diff) | |
parent | c37f2cec2392622d818922d3d80d00cfb30cc309 (diff) |
Merge pull request #29284 from vespa-engine/vekterli/include-doctype-and-gid-with-metadata-doc-entries
Include doc type name and GID in metadata iteration results
Diffstat (limited to 'searchcore')
4 files changed, 62 insertions, 21 deletions
diff --git a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp index 48ce2015420..85883324080 100644 --- a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp +++ b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchcore/proton/common/attribute_updater.h> +#include <vespa/searchcore/proton/common/doctypename.h> #include <vespa/searchcore/proton/common/pendinglidtracker.h> #include <vespa/searchcore/proton/persistenceengine/document_iterator.h> #include <vespa/searchcore/proton/persistenceengine/commit_and_wait_document_retriever.h> @@ -30,6 +31,7 @@ using document::DocumentId; using document::DocumentType; using document::DoubleFieldValue; using document::Field; +using document::GlobalId; using document::IntFieldValue; using document::StringFieldValue; using search::AttributeContext; @@ -174,7 +176,6 @@ UnitDR::UnitDR(const document::DocumentType &dt, document::Document::UP d, Times {} UnitDR::~UnitDR() = default; - struct VisitRecordingUnitDR : UnitDR { using VisitedLIDs = std::unordered_set<DocumentIdT>; VisitedLIDs& visited_lids; @@ -397,6 +398,15 @@ void checkEntry(const IterateResult &res, size_t idx, const Timestamp ×tamp EXPECT_EQUAL(sizeof(DocEntry), res.getEntries()[idx]->getSize()); } +void checkEntry(const IterateResult &res, size_t idx, const Timestamp ×tamp, DocumentMetaEnum flags, + const GlobalId &gid, vespalib::stringref doc_type_name) +{ + ASSERT_LESS(idx, res.getEntries().size()); + auto expect = DocEntry::create(timestamp, flags, doc_type_name, gid); + EXPECT_TRUE(equal(*expect, *res.getEntries()[idx])); + EXPECT_EQUAL(sizeof(DocEntry) + sizeof(GlobalId) + doc_type_name.size(), res.getEntries()[idx]->getSize()); +} + void checkEntry(const IterateResult &res, size_t idx, const DocumentId &id, const Timestamp ×tamp) { ASSERT_LESS(idx, res.getEntries().size()); @@ -415,6 +425,10 @@ void checkEntry(const IterateResult &res, size_t idx, const Document &doc, const EXPECT_GREATER(getSize(doc), 0u); } +GlobalId gid_of(vespalib::stringref id_str) { + return DocumentId(id_str).getGlobalId(); +} + TEST("require that custom retrievers work as expected") { DocumentId id1("id:ns:document::1"); DocumentId id2("id:ns:document::2"); @@ -605,15 +619,15 @@ TEST("require that iterating all versions returns both documents and removes") { TEST("require that using an empty field set returns meta-data only") { DocumentIterator itr(bucket(5), std::make_shared<document::NoFields>(), selectAll(), newestV(), -1, false); - itr.add(doc("id:ns:document::1", Timestamp(2), bucket(5))); - itr.add(cat(doc("id:ns:document::2", Timestamp(3), bucket(5)), - rem("id:ns:document::3", Timestamp(4), bucket(5)))); + itr.add(DocTypeName("foo"), doc_with_fields("id:ns:foo::1", Timestamp(2), bucket(5))); + itr.add(DocTypeName("document"), cat(doc("id:ns:document::2", Timestamp(3), bucket(5)), + rem("id:ns:document::3", Timestamp(4), bucket(5)))); IterateResult res = itr.iterate(largeNum); EXPECT_TRUE(res.isCompleted()); EXPECT_EQUAL(3u, res.getEntries().size()); - TEST_DO(checkEntry(res, 0, Timestamp(2), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res, 1, Timestamp(3), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res, 2, Timestamp(4), DocumentMetaEnum::REMOVE_ENTRY)); + TEST_DO(checkEntry(res, 0, Timestamp(2), DocumentMetaEnum::NONE, gid_of("id:ns:foo::1"), "foo")); + TEST_DO(checkEntry(res, 1, Timestamp(3), DocumentMetaEnum::NONE, gid_of("id:ns:document::2"), "document")); + TEST_DO(checkEntry(res, 2, Timestamp(4), DocumentMetaEnum::REMOVE_ENTRY, gid_of("id:ns:document::3"), "document")); } TEST("require that entries in other buckets are skipped") { @@ -656,12 +670,13 @@ TEST("require that maxBytes splits iteration results for meta-data only iteratio IterateResult res1 = itr.iterate(2 * sizeof(DocEntry)); EXPECT_TRUE(!res1.isCompleted()); EXPECT_EQUAL(2u, res1.getEntries().size()); - TEST_DO(checkEntry(res1, 0, Timestamp(2), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res1, 1, Timestamp(3), DocumentMetaEnum::REMOVE_ENTRY)); + // Note: empty doc types since we did not pass in an explicit doc type alongside the retrievers + TEST_DO(checkEntry(res1, 0, Timestamp(2), DocumentMetaEnum::NONE, gid_of("id:ns:document::1"), "")); + TEST_DO(checkEntry(res1, 1, Timestamp(3), DocumentMetaEnum::REMOVE_ENTRY, gid_of("id:ns:document::2"), "")); IterateResult res2 = itr.iterate(largeNum); EXPECT_TRUE(res2.isCompleted()); - TEST_DO(checkEntry(res2, 0, Timestamp(4), DocumentMetaEnum::NONE)); + TEST_DO(checkEntry(res2, 0, Timestamp(4), DocumentMetaEnum::NONE, gid_of("id:ns:document::3"), "")); IterateResult res3 = itr.iterate(largeNum); EXPECT_TRUE(res3.isCompleted()); diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp index 52ae32634a5..e9d233ef6ec 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp @@ -1,8 +1,10 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_iterator.h" +#include "ipersistencehandler.h" #include <vespa/persistence/spi/docentry.h> #include <vespa/searchcore/proton/common/cachedselect.h> +#include <vespa/searchcore/proton/common/doctypename.h> #include <vespa/searchcore/proton/common/selectcontext.h> #include <vespa/document/select/gid_filter.h> #include <vespa/document/select/node.h> @@ -18,7 +20,9 @@ using storage::spi::DocEntry; using storage::spi::Timestamp; using document::Document; using document::DocumentId; +using document::GlobalId; using storage::spi::DocumentMetaEnum; +using vespalib::stringref; namespace proton { @@ -30,6 +34,11 @@ createDocEntry(Timestamp timestamp, bool removed) { } std::unique_ptr<DocEntry> +createDocEntry(Timestamp timestamp, bool removed, stringref doc_type, const GlobalId &gid) { + return DocEntry::create(timestamp, (removed ? DocumentMetaEnum::REMOVE_ENTRY : DocumentMetaEnum::NONE), doc_type, gid); +} + +std::unique_ptr<DocEntry> createDocEntry(Timestamp timestamp, bool removed, Document::UP doc, ssize_t defaultSerializedSize) { if (doc) { if (removed) { @@ -92,17 +101,23 @@ DocumentIterator::DocumentIterator(const storage::spi::Bucket &bucket, DocumentIterator::~DocumentIterator() = default; void +DocumentIterator::add(const DocTypeName &doc_type_name, IDocumentRetriever::SP retriever) +{ + _sources.emplace_back(doc_type_name, std::move(retriever)); +} + +void DocumentIterator::add(IDocumentRetriever::SP retriever) { - _sources.push_back(std::move(retriever)); + add(DocTypeName(""), std::move(retriever)); } IterateResult DocumentIterator::iterate(size_t maxBytes) { if ( ! _fetchedData ) { - for (const IDocumentRetriever::SP & source : _sources) { - fetchCompleteSource(*source, _list); + for (const auto & source : _sources) { + fetchCompleteSource(source.first, *source.second, _list); } _fetchedData = true; } @@ -235,7 +250,9 @@ private: } void -DocumentIterator::fetchCompleteSource(const IDocumentRetriever & source, IterateResult::List & list) +DocumentIterator::fetchCompleteSource(const DocTypeName & doc_type_name, + const IDocumentRetriever & source, + IterateResult::List & list) { IDocumentRetriever::ReadGuard sourceReadGuard(source.getReadGuard()); search::DocumentMetaData::Vector metaData; @@ -269,7 +286,7 @@ DocumentIterator::fetchCompleteSource(const IDocumentRetriever & source, Iterate for (uint32_t lid : lidsToFetch) { const search::DocumentMetaData & meta = metaData[lidIndexMap[lid]]; assert(lid == meta.lid); - list.push_back(createDocEntry(storage::spi::Timestamp(meta.timestamp), meta.removed)); + list.push_back(createDocEntry(storage::spi::Timestamp(meta.timestamp), meta.removed, doc_type_name.getName(), meta.gid)); } } else { MatchVisitor visitor(matcher, metaData, lidIndexMap, _fields.get(), list, _defaultSerializedSize); diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h index e307c249dc0..5d2b2af24b9 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h @@ -4,6 +4,7 @@ #include "i_document_retriever.h" #include <vespa/searchlib/common/idocumentmetastore.h> +#include <vespa/searchcore/proton/common/doctypename.h> #include <vespa/persistence/spi/bucket.h> #include <vespa/persistence/spi/selection.h> #include <vespa/persistence/spi/result.h> @@ -12,10 +13,14 @@ namespace proton { +class IPersistenceHandler; + class DocumentIterator { private: using ReadConsistency = storage::spi::ReadConsistency; + using DocTypeNameAndRetriever = std::pair<DocTypeName, IDocumentRetriever::SP>; + const storage::spi::Bucket _bucket;; const storage::spi::Selection _selection; const storage::spi::IncludedVersions _versions; @@ -25,14 +30,16 @@ private: const bool _metaOnly; const bool _ignoreMaxBytes; bool _fetchedData; - std::vector<IDocumentRetriever::SP> _sources; + std::vector<DocTypeNameAndRetriever> _sources; size_t _nextItem; storage::spi::IterateResult::List _list; - bool checkMeta(const search::DocumentMetaData &meta) const; - void fetchCompleteSource(const IDocumentRetriever & source, storage::spi::IterateResult::List & list); - bool isWeakRead() const { return _readConsistency == ReadConsistency::WEAK; } + [[nodiscard]] bool checkMeta(const search::DocumentMetaData &meta) const; + void fetchCompleteSource(const DocTypeName & doc_type_name, + const IDocumentRetriever & source, + storage::spi::IterateResult::List & list); + [[nodiscard]] bool isWeakRead() const { return _readConsistency == ReadConsistency::WEAK; } public: DocumentIterator(const storage::spi::Bucket &bucket, document::FieldSet::SP fields, @@ -40,6 +47,7 @@ public: ssize_t defaultSerializedSize, bool ignoreMaxBytes, ReadConsistency readConsistency=ReadConsistency::STRONG); ~DocumentIterator(); + void add(const DocTypeName & doc_type_name, IDocumentRetriever::SP retriever); void add(IDocumentRetriever::SP retriever); storage::spi::IterateResult iterate(size_t maxBytes); }; diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp index bf8915b2505..4208e696a08 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp @@ -544,9 +544,10 @@ PersistenceEngine::createIterator(const Bucket &bucket, FieldSetSP fields, const auto entry = std::make_unique<IteratorEntry>(context.getReadConsistency(), bucket, std::move(fields), selection, versions, _defaultSerializedSize, _ignoreMaxBytes); for (; snap.handlers().valid(); snap.handlers().next()) { - IPersistenceHandler::RetrieversSP retrievers = snap.handlers().get()->getDocumentRetrievers(context.getReadConsistency()); + auto *handler = snap.handlers().get(); + IPersistenceHandler::RetrieversSP retrievers = handler->getDocumentRetrievers(context.getReadConsistency()); for (const auto & retriever : *retrievers) { - entry->it.add(retriever); + entry->it.add(handler->doc_type_name(), retriever); } } entry->handler_sequence = HandlerSnapshot::release(std::move(snap)); |