diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-07-04 17:35:37 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-04 17:35:37 +0200 |
commit | aa6eb29bf11b4479e1c46ad7c337daf76f82e3c5 (patch) | |
tree | 1da79831721f2ecf056d52e63738ef8af2e9204f | |
parent | ad62ec51457fb8b826b7e379430fadac1a46c71d (diff) | |
parent | 2aedae160b680b395ac4ec3f9bb4512e35d1c36a (diff) |
Merge pull request #27630 from vespa-engine/balder/double-check-removed-docs-when-generating-result
Also check if document has been removed before returning result.
7 files changed, 66 insertions, 47 deletions
diff --git a/searchcore/src/vespa/searchcore/grouping/groupingcontext.h b/searchcore/src/vespa/searchcore/grouping/groupingcontext.h index 6f23fd13378..f37046a8b3b 100644 --- a/searchcore/src/vespa/searchcore/grouping/groupingcontext.h +++ b/searchcore/src/vespa/searchcore/grouping/groupingcontext.h @@ -107,6 +107,7 @@ public: */ bool needRanking() const; bool enableNestedMultivalueGrouping() const noexcept { return _enableNestedMultivalueGrouping; } + const search::BitVector & getValidLids() const { return _validLids; } void groupUnordered(const RankedHit *searchResults, uint32_t binSize, const search::BitVector * overflow); void groupInRelevanceOrder(const RankedHit *searchResults, uint32_t binSize); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index 0bb183d1dc0..26555a0b9f0 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -6,12 +6,12 @@ #include "match_thread.h" #include "match_tools.h" #include "extract_features.h" +#include "partial_result.h" #include <vespa/searchlib/engine/trace.h> #include <vespa/searchlib/engine/searchreply.h> #include <vespa/vespalib/util/thread_bundle.h> #include <vespa/vespalib/util/issue.h> #include <vespa/vespalib/data/slime/inserter.h> -#include <vespa/vespalib/data/slime/inject.h> #include <vespa/vespalib/data/slime/cursor.h> namespace proton::matching { diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp index 9ce8fcd9269..52d7d906d4c 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp @@ -3,6 +3,7 @@ #include "match_thread.h" #include "document_scorer.h" #include "match_tools.h" +#include "partial_result.h" #include <vespa/searchcore/grouping/groupingmanager.h> #include <vespa/searchcore/grouping/groupingcontext.h> #include <vespa/searchlib/engine/trace.h> @@ -57,6 +58,43 @@ LazyValue get_score_feature(const RankProgram &rankProgram) { return resolver.resolve(0); } +void +fillPartialResult(ResultProcessor::Context & context, size_t totalHits, size_t numHits, + const search::RankedHit *hits, const search::BitVector * bits) __attribute__((noinline)); + +void +fillPartialResult(ResultProcessor::Context & context, size_t totalHits, size_t numHits, + const search::RankedHit *hits, const search::BitVector * bits) { + PartialResult &pr = *context.result; + pr.totalHits(totalHits); + size_t maxHits = std::min(numHits, pr.maxSize()); + const search::BitVector & validLids = context._validLids; + if (pr.hasSortData()) { + FastS_SortSpec &spec = context.sort->sortSpec; + for (size_t i = 0; i < maxHits; ++i) { + if (validLids.testBit(hits[i].getDocId())) { + pr.add(hits[i], spec.getSortRef(i)); + } + } + } else { + for (size_t i = 0; i < maxHits; ++i) { + if (validLids.testBit(hits[i].getDocId())) { + pr.add(hits[i]); + } + } + if ((bits != nullptr) && (pr.size() < pr.maxSize())) { + for (unsigned int bitId = bits->getFirstTrueBit(); + (bitId < bits->size()) && (pr.size() < pr.maxSize()); + bitId = bits->getNextTrueBit(bitId + 1)) + { + if (validLids.testBit(bitId)) { + pr.add(search::RankedHit(bitId)); + } + } + } + } +} + } // namespace proton::matching::<unnamed> //----------------------------------------------------------------------------- @@ -336,7 +374,6 @@ MatchThread::processResult(const Doom & doom, search::ResultSet::UP result, Resu result->mergeWithBitOverflow(fallback_rank_value()); } if (doom.hard_doom()) return; - size_t totalHits = result->getNumHits(); const search::RankedHit *hits = result->getArray(); size_t numHits = result->getArrayUsed(); search::BitVector *bits = result->getBitOverflow(); @@ -357,27 +394,7 @@ MatchThread::processResult(const Doom & doom, search::ResultSet::UP result, Resu man.groupInRelevanceOrder(hits, numHits); } if (doom.hard_doom()) return; - PartialResult &pr = *context.result; - pr.totalHits(totalHits); - size_t maxHits = std::min(numHits, pr.maxSize()); - if (pr.hasSortData()) { - FastS_SortSpec &spec = context.sort->sortSpec; - for (size_t i = 0; i < maxHits; ++i) { - pr.add(hits[i], spec.getSortRef(i)); - } - } else { - for (size_t i = 0; i < maxHits; ++i) { - pr.add(hits[i]); - } - if ((bits != nullptr) && (pr.size() < pr.maxSize())) { - for (unsigned int bitId = bits->getFirstTrueBit(); - (bitId < bits->size()) && (pr.size() < pr.maxSize()); - bitId = bits->getNextTrueBit(bitId + 1)) - { - pr.add(search::RankedHit(bitId)); - } - } - } + fillPartialResult(context, result->getNumHits(), numHits, hits, bits); if (auto task = matchToolsFactory.createOnMatchTask()) { task->run(result->copyResult()); @@ -479,4 +496,9 @@ MatchThread::run() } } +std::unique_ptr<PartialResult> +MatchThread::extract_result() { + return std::move(resultContext->result); +} + } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h index 757caae0e75..03ba34eca1f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h @@ -5,7 +5,6 @@ #include "i_match_loop_communicator.h" #include "match_params.h" #include "matching_stats.h" -#include "partial_result.h" #include "result_processor.h" #include "docid_range_scheduler.h" #include <vespa/vespalib/util/runnable.h> @@ -60,7 +59,7 @@ private: uint32_t _distributionKey; ResultProcessor &resultProcessor; vespalib::DualMergeDirector &mergeDirector; - ResultProcessor::Context::UP resultContext; + std::unique_ptr<ResultProcessor::Context> resultContext; MatchingStats::Partition thread_stats; double total_time_s; double match_time_s; @@ -135,7 +134,7 @@ public: void run() override; const MatchingStats::Partition &get_thread_stats() const { return thread_stats; } double get_match_time() const { return match_time_s; } - PartialResult::UP extract_result() { return std::move(resultContext->result); } + std::unique_ptr<PartialResult> extract_result(); const Trace & getTrace() const { return *trace; } const UniqueIssues &get_issues() const { return my_issues; } }; diff --git a/searchcore/src/vespa/searchcore/proton/matching/partial_result.h b/searchcore/src/vespa/searchcore/proton/matching/partial_result.h index 314fefa3cc0..d031e1893ce 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/partial_result.h +++ b/searchcore/src/vespa/searchcore/proton/matching/partial_result.h @@ -5,7 +5,6 @@ #include <vespa/vespalib/util/dual_merge_director.h> #include <vespa/searchlib/common/rankedhit.h> #include <vector> -#include <cassert> namespace proton::matching { @@ -16,19 +15,10 @@ namespace proton::matching { class PartialResult : public vespalib::DualMergeDirector::Source { public: - using UP = std::unique_ptr<PartialResult>; using SortRef = std::pair<const char *, size_t>; - -private: - std::vector<search::RankedHit> _hits; - std::vector<SortRef> _sortData; - size_t _maxSize; - size_t _totalHits; - bool _hasSortData; - size_t _sortDataSize; - -public: PartialResult(size_t maxSize_in, bool hasSortData_in); + PartialResult(const PartialResult &) = delete; + PartialResult & operator =(const PartialResult &) = delete; ~PartialResult() override; size_t size() const { return _hits.size(); } size_t maxSize() const { return _maxSize; } @@ -39,16 +29,21 @@ public: const SortRef &sortData(size_t i) const { return _sortData[i]; } void totalHits(size_t th) { _totalHits = th; } void add(const search::RankedHit &h) { - assert(!_hasSortData); _hits.push_back(h); } void add(const search::RankedHit &h, const SortRef &sd) { - assert(_hasSortData); _hits.push_back(h); _sortData.push_back(sd); _sortDataSize += sd.second; } void merge(Source &rhs) override; +private: + std::vector<search::RankedHit> _hits; + std::vector<SortRef> _sortData; + size_t _maxSize; + size_t _totalHits; + bool _hasSortData; + size_t _sortDataSize; }; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp b/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp index 1608c633124..a973e264269 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp @@ -36,8 +36,9 @@ ResultProcessor::Sort::Sort(uint32_t partitionId, const vespalib::Doom & doom, I } } -ResultProcessor::Context::Context(Sort::UP s, PartialResult::UP r, GroupingContext::UP g) - : sort(std::move(s)), +ResultProcessor::Context::Context(const search::BitVector & validLids, Sort::UP s, PartialResultUP r, GroupingContext::UP g) + : _validLids(validLids), + sort(std::move(s)), result(std::move(r)), grouping(std::move(g)), groupingSource(grouping.get()) @@ -90,7 +91,7 @@ ResultProcessor::prepareThreadContextCreation(size_t num_threads) } } -ResultProcessor::Context::UP +std::unique_ptr<ResultProcessor::Context> ResultProcessor::createThreadContext(const vespalib::Doom & hardDoom, size_t thread_id, uint32_t distributionKey) { auto sort = std::make_unique<Sort>(distributionKey, hardDoom, _attrContext, _sortSpec); @@ -99,7 +100,7 @@ ResultProcessor::createThreadContext(const vespalib::Doom & hardDoom, size_t thr if (_groupingSession) { groupingContext = _groupingSession->createThreadContext(thread_id, _attrContext); } - return std::make_unique<Context>(std::move(sort), std::move(result), std::move(groupingContext)); + return std::make_unique<Context>(_metaStore.getValidLids(), std::move(sort), std::move(result), std::move(groupingContext)); } std::vector<std::pair<uint32_t,uint32_t>> diff --git a/searchcore/src/vespa/searchcore/proton/matching/result_processor.h b/searchcore/src/vespa/searchcore/proton/matching/result_processor.h index 54b9adc4723..49fd9f37063 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/result_processor.h +++ b/searchcore/src/vespa/searchcore/proton/matching/result_processor.h @@ -14,6 +14,7 @@ namespace search { class GroupingSession; } struct IDocumentMetaStore; + class BitVector; } namespace proton::matching { @@ -58,15 +59,15 @@ public: * Context per thread used for result processing. **/ struct Context { - using UP = std::unique_ptr<Context>; using GroupingContextUP = std::unique_ptr<GroupingContext>; + const search::BitVector & _validLids; Sort::UP sort; PartialResultUP result; GroupingContextUP grouping; GroupingSource groupingSource; - Context(Sort::UP s, PartialResultUP r, GroupingContextUP g); + Context(const search::BitVector & validLids, Sort::UP s, PartialResultUP r, GroupingContextUP g); ~Context(); }; @@ -101,7 +102,7 @@ public: ~ResultProcessor(); void prepareThreadContextCreation(size_t num_threads); - Context::UP createThreadContext(const vespalib::Doom & hardDoom, size_t thread_id, uint32_t distributionKey); + std::unique_ptr<Context> createThreadContext(const vespalib::Doom & hardDoom, size_t thread_id, uint32_t distributionKey); std::vector<std::pair<uint32_t,uint32_t>> extract_docid_ordering(const PartialResult &result) const; std::unique_ptr<Result> makeReply(PartialResultUP full_result); }; |