Also check if document has been removed before returning result.

author: Henning Baldersheim <balder@yahoo-inc.com> 2023-07-04 14:32:07 +0000
committer: Henning Baldersheim <balder@yahoo-inc.com> 2023-07-04 14:32:07 +0000
commit: 2aedae160b680b395ac4ec3f9bb4512e35d1c36a (patch)
tree: 862a924c3975fab46a0341cde97c842c55fda934
parent: f2157bd7004552ba64090139423f536283a630d8 (diff)
7 files changed, 66 insertions, 47 deletions
diff --git a/searchcore/src/vespa/searchcore/grouping/groupingcontext.h b/searchcore/src/vespa/searchcore/grouping/groupingcontext.h
index 6f23fd13378..f37046a8b3b 100644
--- a/searchcore/src/vespa/searchcore/grouping/groupingcontext.h
+++ b/searchcore/src/vespa/searchcore/grouping/groupingcontext.h
@@ -107,6 +107,7 @@ public:
      */
     bool needRanking() const;
     bool enableNestedMultivalueGrouping() const noexcept { return _enableNestedMultivalueGrouping; }
+    const search::BitVector & getValidLids() const { return _validLids; }
 
     void groupUnordered(const RankedHit *searchResults, uint32_t binSize, const search::BitVector * overflow);
     void groupInRelevanceOrder(const RankedHit *searchResults, uint32_t binSize);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
index 0bb183d1dc0..26555a0b9f0 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
@@ -6,12 +6,12 @@
 #include "match_thread.h"
 #include "match_tools.h"
 #include "extract_features.h"
+#include "partial_result.h"
 #include <vespa/searchlib/engine/trace.h>
 #include <vespa/searchlib/engine/searchreply.h>
 #include <vespa/vespalib/util/thread_bundle.h>
 #include <vespa/vespalib/util/issue.h>
 #include <vespa/vespalib/data/slime/inserter.h>
-#include <vespa/vespalib/data/slime/inject.h>
 #include <vespa/vespalib/data/slime/cursor.h>
 
 namespace proton::matching {
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
index 9ce8fcd9269..52d7d906d4c 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
@@ -3,6 +3,7 @@
 #include "match_thread.h"
 #include "document_scorer.h"
 #include "match_tools.h"
+#include "partial_result.h"
 #include <vespa/searchcore/grouping/groupingmanager.h>
 #include <vespa/searchcore/grouping/groupingcontext.h>
 #include <vespa/searchlib/engine/trace.h>
@@ -57,6 +58,43 @@ LazyValue get_score_feature(const RankProgram &rankProgram) {
     return resolver.resolve(0);
 }
 
+void
+fillPartialResult(ResultProcessor::Context & context, size_t totalHits, size_t numHits,
+                  const search::RankedHit *hits, const search::BitVector * bits) __attribute__((noinline));
+
+void
+fillPartialResult(ResultProcessor::Context & context, size_t totalHits, size_t numHits,
+                  const search::RankedHit *hits, const search::BitVector * bits) {
+    PartialResult &pr = *context.result;
+    pr.totalHits(totalHits);
+    size_t maxHits = std::min(numHits, pr.maxSize());
+    const search::BitVector & validLids = context._validLids;
+    if (pr.hasSortData()) {
+        FastS_SortSpec &spec = context.sort->sortSpec;
+        for (size_t i = 0; i < maxHits; ++i) {
+            if (validLids.testBit(hits[i].getDocId())) {
+                pr.add(hits[i], spec.getSortRef(i));
+            }
+        }
+    } else {
+        for (size_t i = 0; i < maxHits; ++i) {
+            if (validLids.testBit(hits[i].getDocId())) {
+                pr.add(hits[i]);
+            }
+        }
+        if ((bits != nullptr) && (pr.size() < pr.maxSize())) {
+            for (unsigned int bitId = bits->getFirstTrueBit();
+                 (bitId < bits->size()) && (pr.size() < pr.maxSize());
+                 bitId = bits->getNextTrueBit(bitId + 1))
+            {
+                if (validLids.testBit(bitId)) {
+                    pr.add(search::RankedHit(bitId));
+                }
+            }
+        }
+    }
+}
+
 } // namespace proton::matching::<unnamed>
 
 //-----------------------------------------------------------------------------
@@ -336,7 +374,6 @@ MatchThread::processResult(const Doom & doom, search::ResultSet::UP result, Resu
         result->mergeWithBitOverflow(fallback_rank_value());
     }
     if (doom.hard_doom()) return;
-    size_t             totalHits = result->getNumHits();
     const search::RankedHit *hits = result->getArray();
     size_t             numHits   = result->getArrayUsed();
     search::BitVector *bits  = result->getBitOverflow();
@@ -357,27 +394,7 @@ MatchThread::processResult(const Doom & doom, search::ResultSet::UP result, Resu
         man.groupInRelevanceOrder(hits, numHits);
     }
     if (doom.hard_doom()) return;
-    PartialResult &pr = *context.result;
-    pr.totalHits(totalHits);
-    size_t maxHits = std::min(numHits, pr.maxSize());
-    if (pr.hasSortData()) {
-        FastS_SortSpec &spec = context.sort->sortSpec;
-        for (size_t i = 0; i < maxHits; ++i) {
-            pr.add(hits[i], spec.getSortRef(i));
-        }
-    } else {
-        for (size_t i = 0; i < maxHits; ++i) {
-            pr.add(hits[i]);
-        }
-        if ((bits != nullptr) && (pr.size() < pr.maxSize())) {
-            for (unsigned int bitId = bits->getFirstTrueBit();
-                 (bitId < bits->size()) && (pr.size() < pr.maxSize());
-                 bitId = bits->getNextTrueBit(bitId + 1))
-            {
-                pr.add(search::RankedHit(bitId));
-            }
-        }
-    }
+    fillPartialResult(context, result->getNumHits(), numHits, hits, bits);
 
     if (auto task = matchToolsFactory.createOnMatchTask()) {
         task->run(result->copyResult());
@@ -479,4 +496,9 @@ MatchThread::run()
     }
 }
 
+std::unique_ptr<PartialResult>
+MatchThread::extract_result() {
+    return std::move(resultContext->result);
+}
+
 }
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
index 757caae0e75..03ba34eca1f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
@@ -5,7 +5,6 @@
 #include "i_match_loop_communicator.h"
 #include "match_params.h"
 #include "matching_stats.h"
-#include "partial_result.h"
 #include "result_processor.h"
 #include "docid_range_scheduler.h"
 #include <vespa/vespalib/util/runnable.h>
@@ -60,7 +59,7 @@ private:
     uint32_t                      _distributionKey;
     ResultProcessor              &resultProcessor;
     vespalib::DualMergeDirector  &mergeDirector;
-    ResultProcessor::Context::UP  resultContext;
+    std::unique_ptr<ResultProcessor::Context>  resultContext;
     MatchingStats::Partition      thread_stats;
     double                        total_time_s;
     double                        match_time_s;
@@ -135,7 +134,7 @@ public:
     void run() override;
     const MatchingStats::Partition &get_thread_stats() const { return thread_stats; }
     double get_match_time() const { return match_time_s; }
-    PartialResult::UP extract_result() { return std::move(resultContext->result); }
+    std::unique_ptr<PartialResult> extract_result();
     const Trace & getTrace() const { return *trace; }
     const UniqueIssues &get_issues() const { return my_issues; }
 };
diff --git a/searchcore/src/vespa/searchcore/proton/matching/partial_result.h b/searchcore/src/vespa/searchcore/proton/matching/partial_result.h
index 314fefa3cc0..d031e1893ce 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/partial_result.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/partial_result.h
@@ -5,7 +5,6 @@
 #include <vespa/vespalib/util/dual_merge_director.h>
 #include <vespa/searchlib/common/rankedhit.h>
 #include <vector>
-#include <cassert>
 
 namespace proton::matching {
 
@@ -16,19 +15,10 @@ namespace proton::matching {
 class PartialResult : public vespalib::DualMergeDirector::Source
 {
 public:
-    using UP = std::unique_ptr<PartialResult>;
     using SortRef = std::pair<const char *, size_t>;
-
-private:
-    std::vector<search::RankedHit> _hits;
-    std::vector<SortRef>           _sortData;
-    size_t                         _maxSize;
-    size_t                         _totalHits;
-    bool                           _hasSortData;
-    size_t                         _sortDataSize;
-
-public:
     PartialResult(size_t maxSize_in, bool hasSortData_in);
+    PartialResult(const PartialResult &) = delete;
+    PartialResult & operator =(const PartialResult &) = delete;
     ~PartialResult() override;
     size_t size() const { return _hits.size(); }
     size_t maxSize() const { return _maxSize; }
@@ -39,16 +29,21 @@ public:
     const SortRef &sortData(size_t i) const { return _sortData[i]; }
     void totalHits(size_t th) { _totalHits = th; }
     void add(const search::RankedHit &h) {
-        assert(!_hasSortData);
         _hits.push_back(h);
     }
     void add(const search::RankedHit &h, const SortRef &sd) {
-        assert(_hasSortData);
         _hits.push_back(h);
         _sortData.push_back(sd);
         _sortDataSize += sd.second;
     }
     void merge(Source &rhs) override;
+private:
+    std::vector<search::RankedHit> _hits;
+    std::vector<SortRef>           _sortData;
+    size_t                         _maxSize;
+    size_t                         _totalHits;
+    bool                           _hasSortData;
+    size_t                         _sortDataSize;
 };
 
 }
diff --git a/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp b/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp
index 1608c633124..a973e264269 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp
@@ -36,8 +36,9 @@ ResultProcessor::Sort::Sort(uint32_t partitionId, const vespalib::Doom & doom, I
     }
 }
 
-ResultProcessor::Context::Context(Sort::UP s, PartialResult::UP r, GroupingContext::UP g)
-    : sort(std::move(s)),
+ResultProcessor::Context::Context(const search::BitVector & validLids, Sort::UP s, PartialResultUP r, GroupingContext::UP g)
+    : _validLids(validLids),
+      sort(std::move(s)),
       result(std::move(r)),
       grouping(std::move(g)),
       groupingSource(grouping.get())
@@ -90,7 +91,7 @@ ResultProcessor::prepareThreadContextCreation(size_t num_threads)
     }
 }
 
-ResultProcessor::Context::UP
+std::unique_ptr<ResultProcessor::Context>
 ResultProcessor::createThreadContext(const vespalib::Doom & hardDoom, size_t thread_id, uint32_t distributionKey)
 {
     auto sort = std::make_unique<Sort>(distributionKey, hardDoom, _attrContext, _sortSpec);
@@ -99,7 +100,7 @@ ResultProcessor::createThreadContext(const vespalib::Doom & hardDoom, size_t thr
     if (_groupingSession) {
         groupingContext = _groupingSession->createThreadContext(thread_id, _attrContext);
     }
-    return std::make_unique<Context>(std::move(sort), std::move(result), std::move(groupingContext));
+    return std::make_unique<Context>(_metaStore.getValidLids(), std::move(sort), std::move(result), std::move(groupingContext));
 }
 
 std::vector<std::pair<uint32_t,uint32_t>>
diff --git a/searchcore/src/vespa/searchcore/proton/matching/result_processor.h b/searchcore/src/vespa/searchcore/proton/matching/result_processor.h
index 54b9adc4723..49fd9f37063 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/result_processor.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/result_processor.h
@@ -14,6 +14,7 @@ namespace search {
         class GroupingSession;
     }
     struct IDocumentMetaStore;
+    class BitVector;
 }
 
 namespace proton::matching {
@@ -58,15 +59,15 @@ public:
      * Context per thread used for result processing.
      **/
     struct Context {
-        using UP = std::unique_ptr<Context>;
         using GroupingContextUP = std::unique_ptr<GroupingContext>;
 
+        const search::BitVector & _validLids;
         Sort::UP          sort;
         PartialResultUP   result;
         GroupingContextUP grouping;
         GroupingSource    groupingSource;
 
-        Context(Sort::UP s, PartialResultUP r, GroupingContextUP g);
+        Context(const search::BitVector & validLids, Sort::UP s, PartialResultUP r, GroupingContextUP g);
         ~Context();
     };
 
@@ -101,7 +102,7 @@ public:
     ~ResultProcessor();
 
     void prepareThreadContextCreation(size_t num_threads);
-    Context::UP createThreadContext(const vespalib::Doom & hardDoom, size_t thread_id, uint32_t distributionKey);
+    std::unique_ptr<Context> createThreadContext(const vespalib::Doom & hardDoom, size_t thread_id, uint32_t distributionKey);
     std::vector<std::pair<uint32_t,uint32_t>> extract_docid_ordering(const PartialResult &result) const;
     std::unique_ptr<Result> makeReply(PartialResultUP full_result);
 };
author	Henning Baldersheim <balder@yahoo-inc.com>	2023-07-04 14:32:07 +0000
committer	Henning Baldersheim <balder@yahoo-inc.com>	2023-07-04 14:32:07 +0000
commit	2aedae160b680b395ac4ec3f9bb4512e35d1c36a (patch)
tree	862a924c3975fab46a0341cde97c842c55fda934
parent	f2157bd7004552ba64090139423f536283a630d8 (diff)