summaryrefslogtreecommitdiffstats
path: root/streamingvisitors
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-02-12 14:27:18 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-02-13 12:47:35 +0000
commit21b88682408cceb19a04a29e5e3a2404e722a46d (patch)
treefe661f0ac6103c6d5f5c906f5b736858b8294538 /streamingvisitors
parent2e26884433d90011cadcb5b07e92ed0d7a6de8f4 (diff)
Take owenship for the stuff you provide. Do not rely on the caller.
Diffstat (limited to 'streamingvisitors')
-rw-r--r--streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp8
-rw-r--r--streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp10
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp20
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.h18
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp42
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.h10
-rw-r--r--streamingvisitors/src/vespa/vsm/common/storagedocument.h4
7 files changed, 48 insertions, 64 deletions
diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp
index adab7d57028..b0373ae635a 100644
--- a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp
+++ b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp
@@ -74,7 +74,6 @@ protected:
void testFeatureSet();
DocumentType _docType;
- std::vector<vsm::StorageDocument::UP> _backedHits;
HitCollectorTest();
~HitCollectorTest() override;
@@ -85,7 +84,7 @@ HitCollectorTest::HitCollectorTest()
{
}
-HitCollectorTest::~HitCollectorTest() {}
+HitCollectorTest::~HitCollectorTest() = default;
void
HitCollectorTest::assertHit(SearchResult::RankType expRank, uint32_t hitNo, SearchResult & rs)
@@ -109,11 +108,10 @@ void
HitCollectorTest::addHit(HitCollector &hc, uint32_t docId, double score, const char *sortData, size_t sortDataSize)
{
auto doc = document::Document::make_without_repo(_docType, DocumentId("id:ns:testdoc::"));
- auto sdoc = std::make_unique<StorageDocument>(std::move(doc), SharedFieldPathMap(), 0);
+ auto sdoc = std::make_shared<StorageDocument>(std::move(doc), SharedFieldPathMap(), 0);
ASSERT_TRUE(sdoc->valid());
MatchData md(MatchData::params());
- hc.addHit(sdoc.get(), docId, md, score, sortData, sortDataSize);
- _backedHits.push_back(std::move(sdoc));
+ hc.addHit(std::move(sdoc), docId, md, score, sortData, sortDataSize);
}
TEST_F(HitCollectorTest, simple)
diff --git a/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp b/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp
index 4f57b5a0b92..28adb9669de 100644
--- a/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp
+++ b/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp
@@ -282,7 +282,6 @@ class MatchingElementsFillerTest : public ::testing::Test {
Query _query;
std::unique_ptr<MatchingElementsFiller> _matching_elements_filler;
std::unique_ptr<MatchingElements> _matching_elements;
- std::unique_ptr<StorageDocument> _sdoc;
public:
MatchingElementsFillerTest();
~MatchingElementsFillerTest();
@@ -303,15 +302,14 @@ MatchingElementsFillerTest::MatchingElementsFillerTest()
_search_result(),
_query(),
_matching_elements_filler(),
- _matching_elements(),
- _sdoc()
+ _matching_elements()
{
_env.field_paths = make_field_path_map(_doc_type);
_search_result.addHit(1, "id::test::1", 0.0, nullptr, 0);
- _sdoc = std::make_unique<StorageDocument>(_doc_type.make_test_doc(), _env.field_paths, _env.field_paths->size());
- EXPECT_TRUE(_sdoc->valid());
+ auto sdoc = std::make_shared<StorageDocument>(_doc_type.make_test_doc(), _env.field_paths, _env.field_paths->size());
+ EXPECT_TRUE(sdoc->valid());
MatchData md(MatchData::params());
- _hit_collector.addHit(_sdoc.get(), 1, md, 0.0, nullptr, 0);
+ _hit_collector.addHit(std::move(sdoc), 1, md, 0.0, nullptr, 0);
}
MatchingElementsFillerTest::~MatchingElementsFillerTest() = default;
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
index 305e76477ca..75eccaef83c 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
@@ -17,11 +17,11 @@ using FefUtils = search::fef::Utils;
namespace streaming {
-HitCollector::Hit::Hit(const vsm::StorageDocument * doc, uint32_t docId, const MatchData & matchData,
+HitCollector::Hit::Hit(vsm::StorageDocument::SP doc, uint32_t docId, const MatchData & matchData,
double score, const void * sortData, size_t sortDataLen)
: _docid(docId),
_score(score),
- _document(doc),
+ _document(std::move(doc)),
_matchData(),
_sortBlob(sortData, sortDataLen)
{
@@ -33,10 +33,10 @@ HitCollector::Hit::Hit(const vsm::StorageDocument * doc, uint32_t docId, const
HitCollector::Hit::~Hit() = default;
-HitCollector::HitCollector(size_t wantedHits, bool use_sort_blob) :
- _hits(),
- _heap(),
- _use_sort_blob(use_sort_blob)
+HitCollector::HitCollector(size_t wantedHits, bool use_sort_blob)
+ : _hits(),
+ _heap(),
+ _use_sort_blob(use_sort_blob)
{
_hits.reserve(16);
_heap.reserve(wantedHits);
@@ -56,16 +56,16 @@ HitCollector::getDocSum(const search::DocumentIdT & docId) const
}
bool
-HitCollector::addHit(const vsm::StorageDocument * doc, uint32_t docId, const MatchData & data, double score)
+HitCollector::addHit(vsm::StorageDocument::SP doc, uint32_t docId, const MatchData & data, double score)
{
- return addHit(Hit(doc, docId, data, score));
+ return addHit(Hit(std::move(doc), docId, data, score));
}
bool
-HitCollector::addHit(const vsm::StorageDocument * doc, uint32_t docId, const MatchData & data,
+HitCollector::addHit(vsm::StorageDocument::SP doc, uint32_t docId, const MatchData & data,
double score, const void * sortData, size_t sortDataLen)
{
- return addHit(Hit(doc, docId, data, score, sortData, sortDataLen));
+ return addHit(Hit(std::move(doc), docId, data, score, sortData, sortDataLen));
}
bool
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
index 0a3a3ce3062..3a05f9dca86 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
@@ -26,10 +26,10 @@ private:
class Hit
{
public:
- Hit(const vsm::StorageDocument * doc, uint32_t docId, const MatchData & matchData,
+ Hit(vsm::StorageDocument::SP doc, uint32_t docId, const MatchData & matchData,
double score, const void * sortData, size_t sortDataLen);
- Hit(const vsm::StorageDocument * doc, uint32_t docId, const MatchData & matchData, double score)
- : Hit(doc, docId, matchData, score, nullptr, 0)
+ Hit(vsm::StorageDocument::SP doc, uint32_t docId, const MatchData & matchData, double score)
+ : Hit(std::move(doc), docId, matchData, score, nullptr, 0)
{ }
~Hit();
Hit(const Hit &) = delete;
@@ -52,14 +52,14 @@ private:
return (diff == 0) ? cmpDocId(b) : diff;
}
- void dropDocument() noexcept { _document = nullptr; }
+ void dropDocument() noexcept { _document.reset(); }
private:
uint32_t _docid;
double _score;
- const vsm::StorageDocument * _document;
+ vsm::StorageDocument::SP _document;
std::vector<TermFieldMatchData> _matchData;
- vespalib::string _sortBlob;
+ vespalib::string _sortBlob;
};
using HitVector = std::vector<Hit>;
using Lids = std::vector<uint32_t>;
@@ -118,7 +118,7 @@ public:
* @param data The match data for the hit.
* @return true if the document was added to the heap
**/
- bool addHit(const vsm::StorageDocument * doc, uint32_t docId, const MatchData & data, double score);
+ bool addHit(vsm::StorageDocument::SP doc, uint32_t docId, const MatchData & data, double score);
/**
* Adds a hit to this hit collector.
@@ -126,13 +126,13 @@ public:
* If you add a nullptr document you should not use getDocSum() or fillSearchResult(),
* as these functions expect valid documents.
*
- * @param doc The document that is a hit. Must be kept alive on the outside.
+ * @param doc The document that is a hit.
* @param data The match data for the hit.
* @param sortData The buffer of the sortdata.
* @param sortDataLen The length of the sortdata.
* @return true if the document was added to the heap
**/
- bool addHit(const vsm::StorageDocument * doc, uint32_t docId, const MatchData & data,
+ bool addHit(vsm::StorageDocument::SP doc, uint32_t docId, const MatchData & data,
double score, const void * sortData, size_t sortDataLen);
/**
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index 7bd637ae5f4..9a0b720f054 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -725,31 +725,32 @@ SearchVisitor::RankController::keepMatchedDocument()
return (!(_rankProcessor->getRankScore() <= _rankSetup->getRankScoreDropLimit()));
}
-bool
+void
SearchVisitor::RankController::collectMatchedDocument(bool hasSorting,
SearchVisitor & visitor,
const std::vector<char> & tmpSortBuffer,
- const StorageDocument * document)
+ StorageDocument::SP document)
{
- bool amongTheBest(false);
uint32_t docId = _rankProcessor->getDocId();
if (!hasSorting) {
- amongTheBest = _rankProcessor->getHitCollector().addHit(document, docId, _rankProcessor->getMatchData(),
- _rankProcessor->getRankScore());
+ bool amongTheBest = _rankProcessor->getHitCollector().addHit(std::move(document), docId,
+ _rankProcessor->getMatchData(),
+ _rankProcessor->getRankScore());
if (amongTheBest && _dumpFeatures) {
- _dumpProcessor->getHitCollector().addHit(nullptr, docId, _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore());
+ _dumpProcessor->getHitCollector().addHit({}, docId, _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore());
}
} else {
size_t pos = visitor.fillSortBuffer();
LOG(spam, "SortBlob is %ld bytes", pos);
- amongTheBest = _rankProcessor->getHitCollector().addHit(document, docId, _rankProcessor->getMatchData(),
- _rankProcessor->getRankScore(), &tmpSortBuffer[0], pos);
+ bool amongTheBest = _rankProcessor->getHitCollector().addHit(std::move(document), docId,
+ _rankProcessor->getMatchData(),
+ _rankProcessor->getRankScore(),
+ &tmpSortBuffer[0], pos);
if (amongTheBest && _dumpFeatures) {
- _dumpProcessor->getHitCollector().addHit(nullptr, docId, _dumpProcessor->getMatchData(),
+ _dumpProcessor->getHitCollector().addHit({}, docId, _dumpProcessor->getMatchData(),
_dumpProcessor->getRankScore(), &tmpSortBuffer[0], pos);
}
}
- return amongTheBest;
}
void
@@ -1042,7 +1043,7 @@ SearchVisitor::handleDocuments(const document::BucketId&, DocEntryList & entries
const document::DocumentType* defaultDocType = _docTypeMapping.getDefaultDocumentType();
assert(defaultDocType);
for (const auto & entry : entries) {
- auto document = std::make_unique<StorageDocument>(entry->releaseDocument(), _fieldPathMap, highestFieldNo);
+ auto document = std::make_shared<StorageDocument>(entry->releaseDocument(), _fieldPathMap, highestFieldNo);
try {
if (defaultDocType != nullptr
@@ -1051,9 +1052,7 @@ SearchVisitor::handleDocuments(const document::BucketId&, DocEntryList & entries
LOG(debug, "Skipping document of type '%s' when handling only documents of type '%s'",
document->docDoc().getType().getName().c_str(), defaultDocType->getName().c_str());
} else {
- if (handleDocument(*document)) {
- _backingDocuments.push_back(std::move(document));
- }
+ handleDocument(document);
}
} catch (const std::exception & e) {
LOG(warning, "Caught exception handling document '%s'. Exception='%s'",
@@ -1062,10 +1061,10 @@ SearchVisitor::handleDocuments(const document::BucketId&, DocEntryList & entries
}
}
-bool
-SearchVisitor::handleDocument(StorageDocument & document)
+void
+SearchVisitor::handleDocument(StorageDocument::SP documentSP)
{
- bool needToKeepDocument(false);
+ StorageDocument & document = *documentSP;
_syntheticFieldsController.onDocument(document);
group(document.docDoc(), 0, true);
if (match(document)) {
@@ -1079,14 +1078,11 @@ SearchVisitor::handleDocument(StorageDocument & document)
_rankAttribute.add(rp.getRankScore());
}
if (_rankController.keepMatchedDocument()) {
- bool amongTheBest = _rankController.collectMatchedDocument(!_sortList.empty(), *this, _tmpSortBuffer, &document);
+ _rankController.collectMatchedDocument(!_sortList.empty(), *this, _tmpSortBuffer, std::move(documentSP));
_syntheticFieldsController.onDocumentMatch(document, documentId);
SingleDocumentStore single(document);
_summaryGenerator.setDocsumCache(single);
group(document.docDoc(), rp.getRankScore(), false);
- if (amongTheBest) {
- needToKeepDocument = true;
- }
} else {
_hitsRejectedCount++;
LOG(debug, "Do not keep document with id '%s' because rank score (%f) <= rank score drop limit (%f)",
@@ -1095,7 +1091,6 @@ SearchVisitor::handleDocument(StorageDocument & document)
} else {
LOG(debug, "Did not match document with id '%s'", document.docDoc().getId().getScheme().toString().c_str());
}
- return needToKeepDocument;
}
void
@@ -1228,10 +1223,7 @@ SearchVisitor::completedVisitingInternal(HitCounter& hitCounter)
}
generateGroupingResults();
-
generateDocumentSummaries();
- _backingDocuments.clear();
-
documentSummary.sort();
LOG(debug, "Docsum count: %lu", documentSummary.getSummaryCount());
}
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
index bb16655193f..d8d97830244 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
@@ -198,12 +198,11 @@ private:
* @param visitor the search visitor.
* @param tmpSortBuffer the sort buffer containing the sort data.
* @param document the document to collect. Must be kept alive on the outside.
- * @return true if the document was added to the heap
**/
- bool collectMatchedDocument(bool hasSorting,
+ void collectMatchedDocument(bool hasSorting,
SearchVisitor & visitor,
const std::vector<char> & tmpSortBuffer,
- const vsm::StorageDocument * document);
+ vsm::StorageDocument::SP document);
/**
* Callback function that is called when visiting is completed.
* Perform second phase ranking and calculate summary features / rank features if asked for.
@@ -323,9 +322,8 @@ private:
/**
* Process one document
* @param document Document to process.
- * @return true if the underlying buffer is needed later on, then it must be kept.
*/
- bool handleDocument(vsm::StorageDocument & document);
+ void handleDocument(vsm::StorageDocument::SP document);
/**
* Collect the given document for grouping.
@@ -397,7 +395,6 @@ private:
size_t _limit;
};
using GroupingList = std::vector< GroupingEntry >;
- using DocumentVector = std::vector<vsm::StorageDocument::UP>;
class StreamingDocsumsState {
using ResolveClassInfo = search::docsummary::IDocsumWriter::ResolveClassInfo;
@@ -485,7 +482,6 @@ private:
bool _shouldFillRankAttribute;
SyntheticFieldsController _syntheticFieldsController;
RankController _rankController;
- DocumentVector _backingDocuments;
vsm::StringFieldIdTMapT _fieldsUnion;
void setupAttributeVector(const vsm::FieldPath &fieldPath);
diff --git a/streamingvisitors/src/vespa/vsm/common/storagedocument.h b/streamingvisitors/src/vespa/vsm/common/storagedocument.h
index fb75373200b..4812ab20c79 100644
--- a/streamingvisitors/src/vespa/vsm/common/storagedocument.h
+++ b/streamingvisitors/src/vespa/vsm/common/storagedocument.h
@@ -13,7 +13,7 @@ using SharedFieldPathMap = std::shared_ptr<FieldPathMapT>;
class StorageDocument : public Document {
public:
- using UP = std::unique_ptr<StorageDocument>;
+ using SP = std::shared_ptr<StorageDocument>;
class SubDocument {
public:
@@ -40,7 +40,7 @@ public:
StorageDocument(document::Document::UP doc, const SharedFieldPathMap &fim, size_t fieldNoLimit);
StorageDocument(const StorageDocument &) = delete;
StorageDocument & operator = (const StorageDocument &) = delete;
- ~StorageDocument();
+ ~StorageDocument() override;
const document::Document &docDoc() const { return *_doc; }
bool valid() const { return _doc.get() != nullptr; }