diff options
7 files changed, 108 insertions, 37 deletions
diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp index 863bde5c225..bbe563274ec 100644 --- a/persistence/src/tests/spi/clusterstatetest.cpp +++ b/persistence/src/tests/spi/clusterstatetest.cpp @@ -297,7 +297,7 @@ TEST(DocEntryTest, test_doctype_and_gid) { DocEntry::UP e = DocEntry::create(Timestamp(9), DocumentMetaEnum::NONE, "doc_type", GlobalId::parse("gid(0xc4cef118f9f9649222750be2)")); EXPECT_EQ(9, e->getTimestamp()); EXPECT_FALSE(e->isRemove()); - EXPECT_EQ(20, e->getSize()); + EXPECT_EQ(44, e->getSize()); EXPECT_EQ(nullptr, e->getDocument()); EXPECT_EQ(nullptr, e->getDocumentId()); EXPECT_EQ("doc_type", e->getDocumentType()); diff --git a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp index f08d8ef2344..a40d6b01e14 100644 --- a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp +++ b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp @@ -26,6 +26,7 @@ #include <vespa/config-stor-distribution.h> #include <limits> #include <gtest/gtest.h> +#include <gmock/gmock.h> using document::BucketId; using document::BucketSpace; @@ -36,6 +37,8 @@ using document::IntFieldValue; using storage::spi::test::makeSpiBucket; using storage::spi::test::cloneDocEntry; +using namespace ::testing; + namespace storage::spi { using PersistenceProviderUP = std::unique_ptr<PersistenceProvider>; @@ -58,8 +61,7 @@ getSpi(ConformanceTest::PersistenceFactory &factory, const document::TestDocMan return result; } -enum SELECTION_FIELDS -{ +enum class SelectionFields { METADATA_ONLY = 0, ALL_FIELDS = 1 }; @@ -68,18 +70,10 @@ CreateIteratorResult createIterator(PersistenceProvider& spi, const Bucket& b, const Selection& sel, - IncludedVersions versions = NEWEST_DOCUMENT_ONLY, - int fields = ALL_FIELDS) + IncludedVersions versions = NEWEST_DOCUMENT_ONLY) { - document::FieldSet::SP fieldSet; - if (fields & ALL_FIELDS) { - fieldSet = std::make_shared<document::AllFields>(); - } else { - fieldSet = std::make_shared<document::DocIdOnly>(); - } - Context context(Priority(0), Trace::TraceLevel(0)); - return spi.createIterator(b, std::move(fieldSet), sel, versions, context); + return spi.createIterator(b, std::make_shared<document::AllFields>(), sel, versions, context); } Selection @@ -206,14 +200,21 @@ getEntriesFromChunks(const std::vector<Chunk>& chunks) DocEntryList iterateBucket(PersistenceProvider& spi, const Bucket& bucket, - IncludedVersions versions) + IncludedVersions versions, + SelectionFields fields = SelectionFields::ALL_FIELDS) { DocEntryList ret; DocumentSelection docSel(""); Selection sel(docSel); + document::FieldSet::SP field_set; + if (fields == SelectionFields::ALL_FIELDS) { + field_set = std::make_shared<document::AllFields>(); + } else { + field_set = std::make_shared<document::NoFields>(); + } Context context(Priority(0), Trace::TraceLevel(0)); - CreateIteratorResult iter = spi.createIterator(bucket, std::make_shared<document::AllFields>(), sel, versions, context); + CreateIteratorResult iter = spi.createIterator(bucket, std::move(field_set), sel, versions, context); EXPECT_EQ(Result::ErrorType::NONE, iter.getErrorCode()); @@ -1487,6 +1488,35 @@ TEST_F(ConformanceTest, test_iterate_missing_bucket) test_iterate_empty_or_missing_bucket(false); } +TEST_F(ConformanceTest, metadata_iteration_includes_doctype_and_gid) +{ + document::TestDocMan testDocMan; + _factory->clear(); + PersistenceProviderUP spi(getSpi(*_factory, testDocMan)); + + Context context(Priority(0), Trace::TraceLevel(0)); + Bucket bucket(makeSpiBucket(BucketId(8, 0x01))); + Document::SP doc1 = testDocMan.createRandomDocumentAtLocation(0x01, 1); + Document::SP doc2 = testDocMan.createRandomDocumentAtLocation(0x01, 2); + spi->createBucket(bucket); + EXPECT_EQ(Result(), Result(spi->put(bucket, Timestamp(1), doc1))); + EXPECT_EQ(Result(), Result(spi->put(bucket, Timestamp(2), doc2))); + EXPECT_EQ(Result(), Result(spi->remove(bucket, Timestamp(3), doc1->getId()))); + + auto entries = iterateBucket(*spi, bucket, NEWEST_DOCUMENT_OR_REMOVE, SelectionFields::METADATA_ONLY); + ASSERT_THAT(entries, SizeIs(2)); + + EXPECT_EQ(entries[0]->getMetaEnum(), DocumentMetaEnum::NONE); + EXPECT_EQ(entries[0]->getGid(), doc2->getId().getGlobalId()); + EXPECT_EQ(entries[0]->getTimestamp(), Timestamp(2)); + EXPECT_EQ(entries[0]->getDocumentType(), "testdoctype1"); + + EXPECT_EQ(entries[1]->getMetaEnum(), DocumentMetaEnum::REMOVE_ENTRY); + EXPECT_EQ(entries[1]->getGid(), doc1->getId().getGlobalId()); + EXPECT_EQ(entries[1]->getTimestamp(), Timestamp(3)); + EXPECT_EQ(entries[1]->getDocumentType(), "testdoctype1"); +} + TEST_F(ConformanceTest, testDeleteBucket) { document::TestDocMan testDocMan; diff --git a/persistence/src/vespa/persistence/spi/docentry.cpp b/persistence/src/vespa/persistence/spi/docentry.cpp index 5077af568ac..6e92223cb6e 100644 --- a/persistence/src/vespa/persistence/spi/docentry.cpp +++ b/persistence/src/vespa/persistence/spi/docentry.cpp @@ -71,7 +71,7 @@ DocEntryWithId::DocEntryWithId(Timestamp t, DocumentMetaEnum metaEnum, const Doc { } DocEntryWithTypeAndGid::DocEntryWithTypeAndGid(Timestamp t, DocumentMetaEnum metaEnum, vespalib::stringref docType, GlobalId gid) - : DocEntry(t, metaEnum, docType.size() + sizeof(gid)), + : DocEntry(t, metaEnum, sizeof(DocEntry) + docType.size() + sizeof(gid)), _type(docType), _gid(gid) { } diff --git a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp index 48ce2015420..85883324080 100644 --- a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp +++ b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchcore/proton/common/attribute_updater.h> +#include <vespa/searchcore/proton/common/doctypename.h> #include <vespa/searchcore/proton/common/pendinglidtracker.h> #include <vespa/searchcore/proton/persistenceengine/document_iterator.h> #include <vespa/searchcore/proton/persistenceengine/commit_and_wait_document_retriever.h> @@ -30,6 +31,7 @@ using document::DocumentId; using document::DocumentType; using document::DoubleFieldValue; using document::Field; +using document::GlobalId; using document::IntFieldValue; using document::StringFieldValue; using search::AttributeContext; @@ -174,7 +176,6 @@ UnitDR::UnitDR(const document::DocumentType &dt, document::Document::UP d, Times {} UnitDR::~UnitDR() = default; - struct VisitRecordingUnitDR : UnitDR { using VisitedLIDs = std::unordered_set<DocumentIdT>; VisitedLIDs& visited_lids; @@ -397,6 +398,15 @@ void checkEntry(const IterateResult &res, size_t idx, const Timestamp ×tamp EXPECT_EQUAL(sizeof(DocEntry), res.getEntries()[idx]->getSize()); } +void checkEntry(const IterateResult &res, size_t idx, const Timestamp ×tamp, DocumentMetaEnum flags, + const GlobalId &gid, vespalib::stringref doc_type_name) +{ + ASSERT_LESS(idx, res.getEntries().size()); + auto expect = DocEntry::create(timestamp, flags, doc_type_name, gid); + EXPECT_TRUE(equal(*expect, *res.getEntries()[idx])); + EXPECT_EQUAL(sizeof(DocEntry) + sizeof(GlobalId) + doc_type_name.size(), res.getEntries()[idx]->getSize()); +} + void checkEntry(const IterateResult &res, size_t idx, const DocumentId &id, const Timestamp ×tamp) { ASSERT_LESS(idx, res.getEntries().size()); @@ -415,6 +425,10 @@ void checkEntry(const IterateResult &res, size_t idx, const Document &doc, const EXPECT_GREATER(getSize(doc), 0u); } +GlobalId gid_of(vespalib::stringref id_str) { + return DocumentId(id_str).getGlobalId(); +} + TEST("require that custom retrievers work as expected") { DocumentId id1("id:ns:document::1"); DocumentId id2("id:ns:document::2"); @@ -605,15 +619,15 @@ TEST("require that iterating all versions returns both documents and removes") { TEST("require that using an empty field set returns meta-data only") { DocumentIterator itr(bucket(5), std::make_shared<document::NoFields>(), selectAll(), newestV(), -1, false); - itr.add(doc("id:ns:document::1", Timestamp(2), bucket(5))); - itr.add(cat(doc("id:ns:document::2", Timestamp(3), bucket(5)), - rem("id:ns:document::3", Timestamp(4), bucket(5)))); + itr.add(DocTypeName("foo"), doc_with_fields("id:ns:foo::1", Timestamp(2), bucket(5))); + itr.add(DocTypeName("document"), cat(doc("id:ns:document::2", Timestamp(3), bucket(5)), + rem("id:ns:document::3", Timestamp(4), bucket(5)))); IterateResult res = itr.iterate(largeNum); EXPECT_TRUE(res.isCompleted()); EXPECT_EQUAL(3u, res.getEntries().size()); - TEST_DO(checkEntry(res, 0, Timestamp(2), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res, 1, Timestamp(3), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res, 2, Timestamp(4), DocumentMetaEnum::REMOVE_ENTRY)); + TEST_DO(checkEntry(res, 0, Timestamp(2), DocumentMetaEnum::NONE, gid_of("id:ns:foo::1"), "foo")); + TEST_DO(checkEntry(res, 1, Timestamp(3), DocumentMetaEnum::NONE, gid_of("id:ns:document::2"), "document")); + TEST_DO(checkEntry(res, 2, Timestamp(4), DocumentMetaEnum::REMOVE_ENTRY, gid_of("id:ns:document::3"), "document")); } TEST("require that entries in other buckets are skipped") { @@ -656,12 +670,13 @@ TEST("require that maxBytes splits iteration results for meta-data only iteratio IterateResult res1 = itr.iterate(2 * sizeof(DocEntry)); EXPECT_TRUE(!res1.isCompleted()); EXPECT_EQUAL(2u, res1.getEntries().size()); - TEST_DO(checkEntry(res1, 0, Timestamp(2), DocumentMetaEnum::NONE)); - TEST_DO(checkEntry(res1, 1, Timestamp(3), DocumentMetaEnum::REMOVE_ENTRY)); + // Note: empty doc types since we did not pass in an explicit doc type alongside the retrievers + TEST_DO(checkEntry(res1, 0, Timestamp(2), DocumentMetaEnum::NONE, gid_of("id:ns:document::1"), "")); + TEST_DO(checkEntry(res1, 1, Timestamp(3), DocumentMetaEnum::REMOVE_ENTRY, gid_of("id:ns:document::2"), "")); IterateResult res2 = itr.iterate(largeNum); EXPECT_TRUE(res2.isCompleted()); - TEST_DO(checkEntry(res2, 0, Timestamp(4), DocumentMetaEnum::NONE)); + TEST_DO(checkEntry(res2, 0, Timestamp(4), DocumentMetaEnum::NONE, gid_of("id:ns:document::3"), "")); IterateResult res3 = itr.iterate(largeNum); EXPECT_TRUE(res3.isCompleted()); diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp index 52ae32634a5..e9d233ef6ec 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp @@ -1,8 +1,10 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_iterator.h" +#include "ipersistencehandler.h" #include <vespa/persistence/spi/docentry.h> #include <vespa/searchcore/proton/common/cachedselect.h> +#include <vespa/searchcore/proton/common/doctypename.h> #include <vespa/searchcore/proton/common/selectcontext.h> #include <vespa/document/select/gid_filter.h> #include <vespa/document/select/node.h> @@ -18,7 +20,9 @@ using storage::spi::DocEntry; using storage::spi::Timestamp; using document::Document; using document::DocumentId; +using document::GlobalId; using storage::spi::DocumentMetaEnum; +using vespalib::stringref; namespace proton { @@ -30,6 +34,11 @@ createDocEntry(Timestamp timestamp, bool removed) { } std::unique_ptr<DocEntry> +createDocEntry(Timestamp timestamp, bool removed, stringref doc_type, const GlobalId &gid) { + return DocEntry::create(timestamp, (removed ? DocumentMetaEnum::REMOVE_ENTRY : DocumentMetaEnum::NONE), doc_type, gid); +} + +std::unique_ptr<DocEntry> createDocEntry(Timestamp timestamp, bool removed, Document::UP doc, ssize_t defaultSerializedSize) { if (doc) { if (removed) { @@ -92,17 +101,23 @@ DocumentIterator::DocumentIterator(const storage::spi::Bucket &bucket, DocumentIterator::~DocumentIterator() = default; void +DocumentIterator::add(const DocTypeName &doc_type_name, IDocumentRetriever::SP retriever) +{ + _sources.emplace_back(doc_type_name, std::move(retriever)); +} + +void DocumentIterator::add(IDocumentRetriever::SP retriever) { - _sources.push_back(std::move(retriever)); + add(DocTypeName(""), std::move(retriever)); } IterateResult DocumentIterator::iterate(size_t maxBytes) { if ( ! _fetchedData ) { - for (const IDocumentRetriever::SP & source : _sources) { - fetchCompleteSource(*source, _list); + for (const auto & source : _sources) { + fetchCompleteSource(source.first, *source.second, _list); } _fetchedData = true; } @@ -235,7 +250,9 @@ private: } void -DocumentIterator::fetchCompleteSource(const IDocumentRetriever & source, IterateResult::List & list) +DocumentIterator::fetchCompleteSource(const DocTypeName & doc_type_name, + const IDocumentRetriever & source, + IterateResult::List & list) { IDocumentRetriever::ReadGuard sourceReadGuard(source.getReadGuard()); search::DocumentMetaData::Vector metaData; @@ -269,7 +286,7 @@ DocumentIterator::fetchCompleteSource(const IDocumentRetriever & source, Iterate for (uint32_t lid : lidsToFetch) { const search::DocumentMetaData & meta = metaData[lidIndexMap[lid]]; assert(lid == meta.lid); - list.push_back(createDocEntry(storage::spi::Timestamp(meta.timestamp), meta.removed)); + list.push_back(createDocEntry(storage::spi::Timestamp(meta.timestamp), meta.removed, doc_type_name.getName(), meta.gid)); } } else { MatchVisitor visitor(matcher, metaData, lidIndexMap, _fields.get(), list, _defaultSerializedSize); diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h index e307c249dc0..5d2b2af24b9 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h @@ -4,6 +4,7 @@ #include "i_document_retriever.h" #include <vespa/searchlib/common/idocumentmetastore.h> +#include <vespa/searchcore/proton/common/doctypename.h> #include <vespa/persistence/spi/bucket.h> #include <vespa/persistence/spi/selection.h> #include <vespa/persistence/spi/result.h> @@ -12,10 +13,14 @@ namespace proton { +class IPersistenceHandler; + class DocumentIterator { private: using ReadConsistency = storage::spi::ReadConsistency; + using DocTypeNameAndRetriever = std::pair<DocTypeName, IDocumentRetriever::SP>; + const storage::spi::Bucket _bucket;; const storage::spi::Selection _selection; const storage::spi::IncludedVersions _versions; @@ -25,14 +30,16 @@ private: const bool _metaOnly; const bool _ignoreMaxBytes; bool _fetchedData; - std::vector<IDocumentRetriever::SP> _sources; + std::vector<DocTypeNameAndRetriever> _sources; size_t _nextItem; storage::spi::IterateResult::List _list; - bool checkMeta(const search::DocumentMetaData &meta) const; - void fetchCompleteSource(const IDocumentRetriever & source, storage::spi::IterateResult::List & list); - bool isWeakRead() const { return _readConsistency == ReadConsistency::WEAK; } + [[nodiscard]] bool checkMeta(const search::DocumentMetaData &meta) const; + void fetchCompleteSource(const DocTypeName & doc_type_name, + const IDocumentRetriever & source, + storage::spi::IterateResult::List & list); + [[nodiscard]] bool isWeakRead() const { return _readConsistency == ReadConsistency::WEAK; } public: DocumentIterator(const storage::spi::Bucket &bucket, document::FieldSet::SP fields, @@ -40,6 +47,7 @@ public: ssize_t defaultSerializedSize, bool ignoreMaxBytes, ReadConsistency readConsistency=ReadConsistency::STRONG); ~DocumentIterator(); + void add(const DocTypeName & doc_type_name, IDocumentRetriever::SP retriever); void add(IDocumentRetriever::SP retriever); storage::spi::IterateResult iterate(size_t maxBytes); }; diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp index bf8915b2505..4208e696a08 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp @@ -544,9 +544,10 @@ PersistenceEngine::createIterator(const Bucket &bucket, FieldSetSP fields, const auto entry = std::make_unique<IteratorEntry>(context.getReadConsistency(), bucket, std::move(fields), selection, versions, _defaultSerializedSize, _ignoreMaxBytes); for (; snap.handlers().valid(); snap.handlers().next()) { - IPersistenceHandler::RetrieversSP retrievers = snap.handlers().get()->getDocumentRetrievers(context.getReadConsistency()); + auto *handler = snap.handlers().get(); + IPersistenceHandler::RetrieversSP retrievers = handler->getDocumentRetrievers(context.getReadConsistency()); for (const auto & retriever : *retrievers) { - entry->it.add(retriever); + entry->it.add(handler->doc_type_name(), retriever); } } entry->handler_sequence = HandlerSnapshot::release(std::move(snap)); |