summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@vespa.ai>2023-11-07 16:17:01 +0000
committerTor Brede Vekterli <vekterli@vespa.ai>2023-11-08 15:05:46 +0000
commit7881facd0f0e312e9bafefccdf00301f1b74a0ff (patch)
treee4e088a0c30e32fc68805e82e2114e8ade9a2dd1 /searchcore
parente7b517e5705538cd90a72920c3edb0d36abb274e (diff)
Include doc type name and GID in metadata iteration results
Document type is fetched from the associated `IPersistenceHandler` on-demand; it is assumed the lifetime of the pointer must be valid for the entire lifetime of the iterator itself, as the latter holds a valid handler snapshot. For simplicity, it's possible to _not_ pass in a handler, in which case the doc type name will be implicitly empty. Some expected `DocEntry` sizes have been adjusted, as we now report the size of the document type and GID alongside the base type size.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp71
-rw-r--r--searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp28
-rw-r--r--searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h15
-rw-r--r--searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp6
4 files changed, 100 insertions, 20 deletions
diff --git a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp
index 48ce2015420..5f4f6d61c38 100644
--- a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp
+++ b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp
@@ -1,9 +1,11 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchcore/proton/common/attribute_updater.h>
+#include <vespa/searchcore/proton/common/doctypename.h>
#include <vespa/searchcore/proton/common/pendinglidtracker.h>
#include <vespa/searchcore/proton/persistenceengine/document_iterator.h>
#include <vespa/searchcore/proton/persistenceengine/commit_and_wait_document_retriever.h>
+#include <vespa/searchcore/proton/persistenceengine/ipersistencehandler.h>
#include <vespa/searchlib/attribute/attributecontext.h>
#include <vespa/searchlib/attribute/attributefactory.h>
#include <vespa/searchlib/test/mock_attribute_manager.h>
@@ -30,6 +32,7 @@ using document::DocumentId;
using document::DocumentType;
using document::DoubleFieldValue;
using document::Field;
+using document::GlobalId;
using document::IntFieldValue;
using document::StringFieldValue;
using search::AttributeContext;
@@ -174,6 +177,40 @@ UnitDR::UnitDR(const document::DocumentType &dt, document::Document::UP d, Times
{}
UnitDR::~UnitDR() = default;
+struct MockPersistenceHandler : IPersistenceHandler {
+ DocTypeName _doc_type_name;
+
+ explicit MockPersistenceHandler(vespalib::stringref type_name)
+ : _doc_type_name(type_name)
+ {
+ }
+ ~MockPersistenceHandler() override = default;
+
+ void initialize() override { abort(); }
+ void handlePut(FeedToken, const storage::spi::Bucket&, storage::spi::Timestamp, DocumentSP ) override { abort(); }
+ void handleUpdate(FeedToken, const storage::spi::Bucket&,
+ storage::spi::Timestamp, DocumentUpdateSP) override { abort(); }
+ void handleRemove(FeedToken, const storage::spi::Bucket&,
+ storage::spi::Timestamp, const document::DocumentId&) override { abort(); }
+ void handleRemoveByGid(FeedToken, const storage::spi::Bucket&, storage::spi::Timestamp,
+ vespalib::stringref, const document::GlobalId&) override { abort(); }
+ void handleListBuckets(IBucketIdListResultHandler&) override { abort(); }
+ void handleSetClusterState(const storage::spi::ClusterState&, IGenericResultHandler&) override { abort(); }
+ void handleSetActiveState(const storage::spi::Bucket&, storage::spi::BucketInfo::ActiveState,
+ std::shared_ptr<IGenericResultHandler>) override { abort(); }
+ void handleGetBucketInfo(const storage::spi::Bucket&, IBucketInfoResultHandler&) override { abort(); }
+ void handleCreateBucket(FeedToken, const storage::spi::Bucket&) override { abort(); };
+ void handleDeleteBucket(FeedToken, const storage::spi::Bucket&) override { abort(); }
+ void handleGetModifiedBuckets(IBucketIdListResultHandler&) override { abort(); }
+ void handleSplit(FeedToken, const storage::spi::Bucket&,
+ const storage::spi::Bucket&, const storage::spi::Bucket&) override { abort(); }
+ void handleJoin(FeedToken, const storage::spi::Bucket&, const storage::spi::Bucket&,
+ const storage::spi::Bucket&) override { abort(); }
+ RetrieversSP getDocumentRetrievers(storage::spi::ReadConsistency) override { abort(); }
+ void handleListActiveBuckets(IBucketIdListResultHandler&) override { abort(); }
+ void handlePopulateActiveBuckets(document::BucketId::List, IGenericResultHandler&) override { abort(); }
+ const DocTypeName &doc_type_name() const noexcept override { return _doc_type_name; }
+};
struct VisitRecordingUnitDR : UnitDR {
using VisitedLIDs = std::unordered_set<DocumentIdT>;
@@ -397,6 +434,15 @@ void checkEntry(const IterateResult &res, size_t idx, const Timestamp &timestamp
EXPECT_EQUAL(sizeof(DocEntry), res.getEntries()[idx]->getSize());
}
+void checkEntry(const IterateResult &res, size_t idx, const Timestamp &timestamp, DocumentMetaEnum flags,
+ const GlobalId &gid, vespalib::stringref doc_type_name)
+{
+ ASSERT_LESS(idx, res.getEntries().size());
+ auto expect = DocEntry::create(timestamp, flags, doc_type_name, gid);
+ EXPECT_TRUE(equal(*expect, *res.getEntries()[idx]));
+ EXPECT_EQUAL(sizeof(DocEntry) + sizeof(GlobalId) + doc_type_name.size(), res.getEntries()[idx]->getSize());
+}
+
void checkEntry(const IterateResult &res, size_t idx, const DocumentId &id, const Timestamp &timestamp)
{
ASSERT_LESS(idx, res.getEntries().size());
@@ -415,6 +461,10 @@ void checkEntry(const IterateResult &res, size_t idx, const Document &doc, const
EXPECT_GREATER(getSize(doc), 0u);
}
+GlobalId gid_of(vespalib::stringref id_str) {
+ return DocumentId(id_str).getGlobalId();
+}
+
TEST("require that custom retrievers work as expected") {
DocumentId id1("id:ns:document::1");
DocumentId id2("id:ns:document::2");
@@ -605,15 +655,17 @@ TEST("require that iterating all versions returns both documents and removes") {
TEST("require that using an empty field set returns meta-data only") {
DocumentIterator itr(bucket(5), std::make_shared<document::NoFields>(), selectAll(), newestV(), -1, false);
- itr.add(doc("id:ns:document::1", Timestamp(2), bucket(5)));
- itr.add(cat(doc("id:ns:document::2", Timestamp(3), bucket(5)),
- rem("id:ns:document::3", Timestamp(4), bucket(5))));
+ MockPersistenceHandler foo_handler("foo");
+ MockPersistenceHandler doc_handler("document");
+ itr.add(&foo_handler, doc_with_fields("id:ns:foo::1", Timestamp(2), bucket(5)));
+ itr.add(&doc_handler, cat(doc("id:ns:document::2", Timestamp(3), bucket(5)),
+ rem("id:ns:document::3", Timestamp(4), bucket(5))));
IterateResult res = itr.iterate(largeNum);
EXPECT_TRUE(res.isCompleted());
EXPECT_EQUAL(3u, res.getEntries().size());
- TEST_DO(checkEntry(res, 0, Timestamp(2), DocumentMetaEnum::NONE));
- TEST_DO(checkEntry(res, 1, Timestamp(3), DocumentMetaEnum::NONE));
- TEST_DO(checkEntry(res, 2, Timestamp(4), DocumentMetaEnum::REMOVE_ENTRY));
+ TEST_DO(checkEntry(res, 0, Timestamp(2), DocumentMetaEnum::NONE, gid_of("id:ns:foo::1"), "foo"));
+ TEST_DO(checkEntry(res, 1, Timestamp(3), DocumentMetaEnum::NONE, gid_of("id:ns:document::2"), "document"));
+ TEST_DO(checkEntry(res, 2, Timestamp(4), DocumentMetaEnum::REMOVE_ENTRY, gid_of("id:ns:document::3"), "document"));
}
TEST("require that entries in other buckets are skipped") {
@@ -656,12 +708,13 @@ TEST("require that maxBytes splits iteration results for meta-data only iteratio
IterateResult res1 = itr.iterate(2 * sizeof(DocEntry));
EXPECT_TRUE(!res1.isCompleted());
EXPECT_EQUAL(2u, res1.getEntries().size());
- TEST_DO(checkEntry(res1, 0, Timestamp(2), DocumentMetaEnum::NONE));
- TEST_DO(checkEntry(res1, 1, Timestamp(3), DocumentMetaEnum::REMOVE_ENTRY));
+ // Note: empty doc types since we did not pass in a handler alongside the retrievers
+ TEST_DO(checkEntry(res1, 0, Timestamp(2), DocumentMetaEnum::NONE, gid_of("id:ns:document::1"), ""));
+ TEST_DO(checkEntry(res1, 1, Timestamp(3), DocumentMetaEnum::REMOVE_ENTRY, gid_of("id:ns:document::2"), ""));
IterateResult res2 = itr.iterate(largeNum);
EXPECT_TRUE(res2.isCompleted());
- TEST_DO(checkEntry(res2, 0, Timestamp(4), DocumentMetaEnum::NONE));
+ TEST_DO(checkEntry(res2, 0, Timestamp(4), DocumentMetaEnum::NONE, gid_of("id:ns:document::3"), ""));
IterateResult res3 = itr.iterate(largeNum);
EXPECT_TRUE(res3.isCompleted());
diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp
index 52ae32634a5..bfcb2724592 100644
--- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp
+++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp
@@ -1,8 +1,10 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "document_iterator.h"
+#include "ipersistencehandler.h"
#include <vespa/persistence/spi/docentry.h>
#include <vespa/searchcore/proton/common/cachedselect.h>
+#include <vespa/searchcore/proton/common/doctypename.h>
#include <vespa/searchcore/proton/common/selectcontext.h>
#include <vespa/document/select/gid_filter.h>
#include <vespa/document/select/node.h>
@@ -18,7 +20,9 @@ using storage::spi::DocEntry;
using storage::spi::Timestamp;
using document::Document;
using document::DocumentId;
+using document::GlobalId;
using storage::spi::DocumentMetaEnum;
+using vespalib::stringref;
namespace proton {
@@ -30,6 +34,11 @@ createDocEntry(Timestamp timestamp, bool removed) {
}
std::unique_ptr<DocEntry>
+createDocEntry(Timestamp timestamp, bool removed, stringref doc_type, const GlobalId &gid) {
+ return DocEntry::create(timestamp, (removed ? DocumentMetaEnum::REMOVE_ENTRY : DocumentMetaEnum::NONE), doc_type, gid);
+}
+
+std::unique_ptr<DocEntry>
createDocEntry(Timestamp timestamp, bool removed, Document::UP doc, ssize_t defaultSerializedSize) {
if (doc) {
if (removed) {
@@ -92,17 +101,23 @@ DocumentIterator::DocumentIterator(const storage::spi::Bucket &bucket,
DocumentIterator::~DocumentIterator() = default;
void
+DocumentIterator::add(const IPersistenceHandler *handler, IDocumentRetriever::SP retriever)
+{
+ _sources.emplace_back(handler, std::move(retriever));
+}
+
+void
DocumentIterator::add(IDocumentRetriever::SP retriever)
{
- _sources.push_back(std::move(retriever));
+ add(nullptr, std::move(retriever));
}
IterateResult
DocumentIterator::iterate(size_t maxBytes)
{
if ( ! _fetchedData ) {
- for (const IDocumentRetriever::SP & source : _sources) {
- fetchCompleteSource(*source, _list);
+ for (const auto & source : _sources) {
+ fetchCompleteSource(source.first, *source.second, _list);
}
_fetchedData = true;
}
@@ -235,7 +250,9 @@ private:
}
void
-DocumentIterator::fetchCompleteSource(const IDocumentRetriever & source, IterateResult::List & list)
+DocumentIterator::fetchCompleteSource(const IPersistenceHandler * handler,
+ const IDocumentRetriever & source,
+ IterateResult::List & list)
{
IDocumentRetriever::ReadGuard sourceReadGuard(source.getReadGuard());
search::DocumentMetaData::Vector metaData;
@@ -266,10 +283,11 @@ DocumentIterator::fetchCompleteSource(const IDocumentRetriever & source, Iterate
list.reserve(lidsToFetch.size());
if ( _metaOnly ) {
+ stringref doc_type = (handler ? stringref(handler->doc_type_name().getName()) : stringref());
for (uint32_t lid : lidsToFetch) {
const search::DocumentMetaData & meta = metaData[lidIndexMap[lid]];
assert(lid == meta.lid);
- list.push_back(createDocEntry(storage::spi::Timestamp(meta.timestamp), meta.removed));
+ list.push_back(createDocEntry(storage::spi::Timestamp(meta.timestamp), meta.removed, doc_type, meta.gid));
}
} else {
MatchVisitor visitor(matcher, metaData, lidIndexMap, _fields.get(), list, _defaultSerializedSize);
diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h
index e307c249dc0..dd4891def45 100644
--- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h
+++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.h
@@ -12,10 +12,14 @@
namespace proton {
+class IPersistenceHandler;
+
class DocumentIterator
{
private:
using ReadConsistency = storage::spi::ReadConsistency;
+ using HandlerWithRetriever = std::pair<const IPersistenceHandler*, IDocumentRetriever::SP>;
+
const storage::spi::Bucket _bucket;;
const storage::spi::Selection _selection;
const storage::spi::IncludedVersions _versions;
@@ -25,14 +29,16 @@ private:
const bool _metaOnly;
const bool _ignoreMaxBytes;
bool _fetchedData;
- std::vector<IDocumentRetriever::SP> _sources;
+ std::vector<HandlerWithRetriever> _sources;
size_t _nextItem;
storage::spi::IterateResult::List _list;
- bool checkMeta(const search::DocumentMetaData &meta) const;
- void fetchCompleteSource(const IDocumentRetriever & source, storage::spi::IterateResult::List & list);
- bool isWeakRead() const { return _readConsistency == ReadConsistency::WEAK; }
+ [[nodiscard]] bool checkMeta(const search::DocumentMetaData &meta) const;
+ void fetchCompleteSource(const IPersistenceHandler * handler,
+ const IDocumentRetriever & source,
+ storage::spi::IterateResult::List & list);
+ [[nodiscard]] bool isWeakRead() const { return _readConsistency == ReadConsistency::WEAK; }
public:
DocumentIterator(const storage::spi::Bucket &bucket, document::FieldSet::SP fields,
@@ -40,6 +46,7 @@ public:
ssize_t defaultSerializedSize, bool ignoreMaxBytes,
ReadConsistency readConsistency=ReadConsistency::STRONG);
~DocumentIterator();
+ void add(const IPersistenceHandler *handler, IDocumentRetriever::SP retriever);
void add(IDocumentRetriever::SP retriever);
storage::spi::IterateResult iterate(size_t maxBytes);
};
diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp
index bf8915b2505..0bae2e70785 100644
--- a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp
+++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp
@@ -544,9 +544,11 @@ PersistenceEngine::createIterator(const Bucket &bucket, FieldSetSP fields, const
auto entry = std::make_unique<IteratorEntry>(context.getReadConsistency(), bucket, std::move(fields), selection,
versions, _defaultSerializedSize, _ignoreMaxBytes);
for (; snap.handlers().valid(); snap.handlers().next()) {
- IPersistenceHandler::RetrieversSP retrievers = snap.handlers().get()->getDocumentRetrievers(context.getReadConsistency());
+ auto *handler = snap.handlers().get();
+ IPersistenceHandler::RetrieversSP retrievers = handler->getDocumentRetrievers(context.getReadConsistency());
for (const auto & retriever : *retrievers) {
- entry->it.add(retriever);
+ // Handler ptr validity and lifetime is maintained by handler snapshot owned by iterator
+ entry->it.add(handler, retriever);
}
}
entry->handler_sequence = HandlerSnapshot::release(std::move(snap));