summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchcore/src/vespa/searchcore/fdispatch/search/fnet_search.h4
-rw-r--r--searchcore/src/vespa/searchcore/fdispatch/search/mergehits.cpp41
-rw-r--r--searchcore/src/vespa/searchcore/fdispatch/search/mergehits.h1
3 files changed, 35 insertions, 11 deletions
diff --git a/searchcore/src/vespa/searchcore/fdispatch/search/fnet_search.h b/searchcore/src/vespa/searchcore/fdispatch/search/fnet_search.h
index a19dcff025d..ca0053e0261 100644
--- a/searchcore/src/vespa/searchcore/fdispatch/search/fnet_search.h
+++ b/searchcore/src/vespa/searchcore/fdispatch/search/fnet_search.h
@@ -329,6 +329,10 @@ public:
_util.CalcHitCount();
_util.AllocAlignedHitBuf();
}
+ void ST_AdjustNumHits(uint32_t numHits) {
+ _util.SetAlignedHitCount(numHits);
+ _util.CalcHitCount();
+ }
uint32_t ST_GetAlignedSearchOffset() const { return _util.GetAlignedSearchOffset(); }
uint32_t ST_GetAlignedMaxHits() const { return _util.GetAlignedMaxHits(); }
uint32_t ST_GetAlignedHitCount() const { return _util.GetAlignedHitCount(); }
diff --git a/searchcore/src/vespa/searchcore/fdispatch/search/mergehits.cpp b/searchcore/src/vespa/searchcore/fdispatch/search/mergehits.cpp
index 846d95bd722..8c4a08a3bbb 100644
--- a/searchcore/src/vespa/searchcore/fdispatch/search/mergehits.cpp
+++ b/searchcore/src/vespa/searchcore/fdispatch/search/mergehits.cpp
@@ -5,6 +5,8 @@
#include "fnet_dataset.h"
#include "fnet_search.h"
#include <vespa/searchcore/util/stlishheap.h>
+#include <vespa/vespalib/stllike/hash_set.h>
+#include <vespa/vespalib/stllike/hash_set.hpp>
#include <vespa/log/log.h>
LOG_SETUP(".fdispatch.mergehits");
@@ -65,10 +67,17 @@ FastS_MergeCopyHit(typename T::HitType *src,
dst->setDistributionKey(src->getDistributionKey());
}
+struct GlobalIdHasher {
+ vespalib::hash_set<document::GlobalId, document::GlobalId::hash> seenSet;
+ bool insert(const document::GlobalId & g_id) {
+ return seenSet.insert(g_id).second;
+ }
+ GlobalIdHasher(size_t expected_size) : seenSet(expected_size * 3) {}
+};
template <typename T, typename F>
-void
+size_t
FastS_InternalMergeHits(FastS_HitMerger<T> *merger)
{
typename T::SearchType *search = merger->GetSearch();
@@ -89,18 +98,22 @@ FastS_InternalMergeHits(FastS_HitMerger<T> *merger)
sortItr = sortRef;
}
+ GlobalIdHasher seenGids(end - beg);
+
FastS_make_heap(heap, heapSize, FastS_MergeCompare<T, F>);
- while (pt < end) {
+ while ((pt < end) && (heapSize > 0)) {
node = *heap;
- FastS_assert(heapSize > 0);
+ bool useHit = seenGids.insert(node->NT_GetHit()->HT_GetGlobalID());
if (F::UseSortData()) {
- if (!F::DropSortData()) {
+ if (!F::DropSortData() && useHit) {
FastS_MergeCopySortData<T>(node, sortItr++, sortDataLen);
}
node->NT_GetSortDataIterator()->Next();
}
- FastS_MergeCopyHit<T>(node->NT_GetHit(), pt++);
+ if (useHit) {
+ FastS_MergeCopyHit<T>(node->NT_GetHit(), pt++);
+ }
node->NT_NextHit();
if (node->NT_GetNumHitsLeft() > 0) {
FastS_pop_push_heap(heap, heapSize, node, FastS_MergeCompare<T, F>);
@@ -108,9 +121,13 @@ FastS_InternalMergeHits(FastS_HitMerger<T> *merger)
FastS_pop_heap(heap, heapSize--, FastS_MergeCompare<T, F>);
}
}
+ if (pt != end) {
+ LOG(warning, "Duplicate removal lead to %zd missing hits (wanted %zd, got %zd)",
+ end - pt, end - beg, pt - beg);
+ }
merger->SetLastNode(node); // source of last hit
if (F::UseSortData()) {
- FastS_assert(F::DropSortData() || sortItr == sortRef + (end - beg));
+ FastS_assert(F::DropSortData() || sortItr == sortRef + (pt - beg));
}
// generate merged sort data
@@ -124,16 +141,17 @@ FastS_InternalMergeHits(FastS_HitMerger<T> *merger)
char *sortData = search->ST_GetSortData();
sortItr = sortRef;
- for (uint32_t residue = (end - beg); residue > 0; residue--) {
+ for (uint32_t residue = (pt - beg); residue > 0; residue--) {
*sortIdx++ = offset;
memcpy(sortData + offset, sortItr->_buf, sortItr->_len);
offset += sortItr->_len;
sortItr++;
}
*sortIdx = offset;
- FastS_assert(sortItr == sortRef + (end - beg));
+ FastS_assert(sortItr == sortRef + (pt - beg));
FastS_assert(offset == sortDataLen);
}
+ return (pt - beg);
}
//-----------------------------------------------------------------------------
@@ -219,16 +237,17 @@ FastS_HitMerger<T>::MergeHits()
// do actual merging by invoking templated function
if (useSortData) {
if (dropSortData) {
- FastS_InternalMergeHits
+ numDocs = FastS_InternalMergeHits
<T, FastS_MergeFeatures<true, true> >(this);
} else {
- FastS_InternalMergeHits
+ numDocs = FastS_InternalMergeHits
<T, FastS_MergeFeatures<true, false> >(this);
}
} else {
- FastS_InternalMergeHits
+ numDocs = FastS_InternalMergeHits
<T, FastS_MergeFeatures<false, false> >(this);
}
+ _search->ST_AdjustNumHits(numDocs);
// detect incomplete/fuzzy results
if (_search->ST_ShouldLimitHitsPerNode()) {
diff --git a/searchcore/src/vespa/searchcore/fdispatch/search/mergehits.h b/searchcore/src/vespa/searchcore/fdispatch/search/mergehits.h
index 79157acb175..306229b4730 100644
--- a/searchcore/src/vespa/searchcore/fdispatch/search/mergehits.h
+++ b/searchcore/src/vespa/searchcore/fdispatch/search/mergehits.h
@@ -64,6 +64,7 @@ struct FastS_MergeHits_DummySearch
bool ST_ShouldDropSortData() { return false; }
bool ST_ShouldLimitHitsPerNode() { return false; }
void ST_SetNumHits(uint32_t numHits) { (void) numHits; }
+ void ST_AdjustNumHits(uint32_t nH) { (void) nH; }
uint32_t ST_GetAlignedSearchOffset() { return 0; }
uint32_t ST_GetAlignedMaxHits() { return 0; }
uint32_t ST_GetAlignedHitCount() { return 0; }