summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2020-06-08 21:25:55 +0200
committerTor Egge <Tor.Egge@broadpark.no>2020-06-09 11:15:56 +0200
commit4cc712585ad755850f6e9af5930581bb45f5d749 (patch)
treedd635c772ffdfb5bff6e1e0932745321de1b5603 /searchlib
parent66c66aa167c2ba431943ad7287da3f20b11a05ab (diff)
Handle fetchPostings being called multiple times.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_search_context.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/posting_list_merger.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp42
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp60
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h1
8 files changed, 62 insertions, 66 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp
index 5f49111f77b..bc0d965bcc1 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp
@@ -44,7 +44,6 @@ ImportedSearchContext::ImportedSearchContext(
_target_search_context(_target_attribute.createSearchContext(std::move(term), params)),
_targetLids(_reference_attribute.getTargetLids()),
_merger(_reference_attribute.getCommittedDocIdLimit()),
- _fetchPostingsDone(false),
_params(params)
{
}
@@ -239,15 +238,11 @@ ImportedSearchContext::considerAddSearchCacheEntry()
}
void ImportedSearchContext::fetchPostings(const queryeval::ExecuteInfo &execInfo) {
- assert(!_fetchPostingsDone);
- _fetchPostingsDone = true;
if (!_searchCacheLookup) {
_target_search_context->fetchPostings(execInfo);
- if (execInfo.isStrict()
- || (_target_attribute.getIsFastSearch() && execInfo.hitRate() > 0.01))
- {
- makeMergedPostings(_target_attribute.getIsFilter());
- considerAddSearchCacheEntry();
+ if (!_merger.merge_done() && (execInfo.isStrict() || (_target_attribute.getIsFastSearch() && execInfo.hitRate() > 0.01))) {
+ makeMergedPostings(_target_attribute.getIsFilter());
+ considerAddSearchCacheEntry();
}
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h
index 1c73ac6c8c2..4c3b6a89a14 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h
@@ -37,7 +37,6 @@ class ImportedSearchContext : public ISearchContext {
std::unique_ptr<ISearchContext> _target_search_context;
TargetLids _targetLids;
PostingListMerger<int32_t> _merger;
- bool _fetchPostingsDone;
SearchContextParams _params;
uint32_t getTargetLid(uint32_t lid) const {
diff --git a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h
index 6a10ba73951..1c2e6583ad9 100644
--- a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h
+++ b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h
@@ -62,6 +62,8 @@ public:
{ if (__builtin_expect(key < limit, true)) { bv.setBit(key); } });
}
+ bool merge_done() const { return hasArray() || hasBitVector(); }
+
// Until diversity handling has been rewritten
PostingVector &getWritableArray() { return _array; }
StartVector &getWritableStartPos() { return _startPos; }
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index 69450acd98d..8cd7a7064f6 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -107,7 +107,6 @@ protected:
* Synthetic posting lists for range search, in array or bitvector form
*/
PostingListMerger<DataT> _merger;
- bool _fetchPostingsDone;
static const long MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION = 100;
static const long MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 20;
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
index 4cd8db9010a..09e5a9da5bc 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
@@ -25,8 +25,7 @@ PostingListSearchContextT(const Dictionary &dictionary, uint32_t docIdLimit, uin
uint32_t minBvDocFreq, bool useBitVector, const ISearchContext &searchContext)
: PostingListSearchContext(dictionary, docIdLimit, numValues, hasWeight, esb, minBvDocFreq, useBitVector, searchContext),
_postingList(postingList),
- _merger(docIdLimit),
- _fetchPostingsDone(false)
+ _merger(docIdLimit)
{
}
@@ -116,22 +115,18 @@ template <typename DataT>
void
PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & execInfo)
{
- if (_fetchPostingsDone) return;
-
- _fetchPostingsDone = true;
-
- if (_uniqueValues < 2u) return;
-
- if (execInfo.isStrict() && !fallbackToFiltering()) {
- size_t sum(countHits());
- if (sum < _docIdLimit / 64) {
- _merger.reserveArray(_uniqueValues, sum);
- fillArray();
- } else {
- _merger.allocBitVector();
- fillBitVector();
+ if (!_merger.merge_done() && _uniqueValues >= 2u) {
+ if (execInfo.isStrict() && !fallbackToFiltering()) {
+ size_t sum(countHits());
+ if (sum < _docIdLimit / 64) {
+ _merger.reserveArray(_uniqueValues, sum);
+ fillArray();
+ } else {
+ _merger.allocBitVector();
+ fillBitVector();
+ }
+ _merger.merge();
}
- _merger.merge();
}
}
@@ -141,12 +136,12 @@ void
PostingListSearchContextT<DataT>::diversify(bool forward, size_t wanted_hits, const IAttributeVector &diversity_attr,
size_t max_per_group, size_t cutoff_groups, bool cutoff_strict)
{
- assert(!_fetchPostingsDone);
- _fetchPostingsDone = true;
- _merger.reserveArray(128, wanted_hits);
- diversity::diversify(forward, _lowerDictItr, _upperDictItr, _postingList, wanted_hits, diversity_attr,
- max_per_group, cutoff_groups, cutoff_strict, _merger.getWritableArray(), _merger.getWritableStartPos());
- _merger.merge();
+ if (!_merger.merge_done()) {
+ _merger.reserveArray(128, wanted_hits);
+ diversity::diversify(forward, _lowerDictItr, _upperDictItr, _postingList, wanted_hits, diversity_attr,
+ max_per_group, cutoff_groups, cutoff_strict, _merger.getWritableArray(), _merger.getWritableStartPos());
+ _merger.merge();
+ }
}
@@ -155,7 +150,6 @@ SearchIterator::UP
PostingListSearchContextT<DataT>::
createPostingIterator(fef::TermFieldMatchData *matchData, bool strict)
{
- assert(_fetchPostingsDone);
if (_uniqueValues == 0u) {
return std::make_unique<EmptySearch>();
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
index 09024505450..8231d0b4cd7 100644
--- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
@@ -70,10 +70,12 @@ void
DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo)
{
(void) execInfo;
- _hasEquivParent = areAnyParentsEquiv(getParent());
- _bitVector = _diskIndex.readBitVector(*_lookupRes);
- if (!_useBitVector || !_bitVector) {
- _postingHandle = _diskIndex.readPostingList(*_lookupRes);
+ if (!_fetchPostingsDone) {
+ _hasEquivParent = areAnyParentsEquiv(getParent());
+ _bitVector = _diskIndex.readBitVector(*_lookupRes);
+ if (!_useBitVector || !_bitVector) {
+ _postingHandle = _diskIndex.readPostingList(*_lookupRes);
+ }
}
_fetchPostingsDone = true;
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp
index 034f57bbb36..97e7044ec0f 100644
--- a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp
@@ -163,7 +163,8 @@ PredicateBlueprint::PredicateBlueprint(const FieldSpecBase &field,
_bounds_btree_iterators(),
_bounds_vector_iterators(),
_zstar_btree_iterator(),
- _zstar_vector_iterator()
+ _zstar_vector_iterator(),
+ _fetch_postings_done(false)
{
const auto &interval_index = _index.getIntervalIndex();
const auto zero_constraints_docs = _index.getZeroConstraintDocs();
@@ -234,36 +235,39 @@ namespace {
}
void PredicateBlueprint::fetchPostings(const ExecuteInfo &) {
- const auto &interval_index = _index.getIntervalIndex();
- const auto &bounds_index = _index.getBoundsIndex();
- lookupPostingLists(_interval_dict_entries, _interval_vector_iterators,
- _interval_btree_iterators, interval_index);
- lookupPostingLists(_bounds_dict_entries, _bounds_vector_iterators,
- _bounds_btree_iterators, bounds_index);
-
- // Lookup zstar interval iterator
- if (_zstar_dict_entry.valid()) {
- auto vector_iterator = interval_index.getVectorPostingList(Constants::z_star_compressed_hash);
- if (vector_iterator) {
- _zstar_vector_iterator.emplace(std::move(*vector_iterator));
- } else {
- _zstar_btree_iterator.emplace(interval_index.getBTreePostingList(_zstar_dict_entry));
+ if (!_fetch_postings_done) {
+ const auto &interval_index = _index.getIntervalIndex();
+ const auto &bounds_index = _index.getBoundsIndex();
+ lookupPostingLists(_interval_dict_entries, _interval_vector_iterators,
+ _interval_btree_iterators, interval_index);
+ lookupPostingLists(_bounds_dict_entries, _bounds_vector_iterators,
+ _bounds_btree_iterators, bounds_index);
+
+ // Lookup zstar interval iterator
+ if (_zstar_dict_entry.valid()) {
+ auto vector_iterator = interval_index.getVectorPostingList(Constants::z_star_compressed_hash);
+ if (vector_iterator) {
+ _zstar_vector_iterator.emplace(std::move(*vector_iterator));
+ } else {
+ _zstar_btree_iterator.emplace(interval_index.getBTreePostingList(_zstar_dict_entry));
+ }
}
- }
- PredicateAttribute::MinFeatureHandle mfh = predicate_attribute().getMinFeatureVector();
- Alloc kv(Alloc::alloc(mfh.second, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE*4));
- _kVBacking.swap(kv);
- _kV = BitVectorCache::CountVector(static_cast<uint8_t *>(_kVBacking.get()), mfh.second);
- _index.computeCountVector(_cachedFeatures, _kV);
- for (const auto & entry : _bounds_dict_entries) {
- addBoundsPostingToK(entry.feature);
- }
- for (const auto & entry : _interval_dict_entries) {
- addPostingToK(entry.feature);
+ PredicateAttribute::MinFeatureHandle mfh = predicate_attribute().getMinFeatureVector();
+ Alloc kv(Alloc::alloc(mfh.second, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE*4));
+ _kVBacking.swap(kv);
+ _kV = BitVectorCache::CountVector(static_cast<uint8_t *>(_kVBacking.get()), mfh.second);
+ _index.computeCountVector(_cachedFeatures, _kV);
+ for (const auto & entry : _bounds_dict_entries) {
+ addBoundsPostingToK(entry.feature);
+ }
+ for (const auto & entry : _interval_dict_entries) {
+ addPostingToK(entry.feature);
+ }
+ addPostingToK(Constants::z_star_compressed_hash);
+ addZeroConstraintToK();
+ _fetch_postings_done = true;
}
- addPostingToK(Constants::z_star_compressed_hash);
- addZeroConstraintToK();
}
SearchIterator::UP
diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h
index c9a19a0f5bb..9609cd4f6c9 100644
--- a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h
@@ -87,6 +87,7 @@ private:
// The zstar iterator is either a vector or a btree iterator.
optional<BTreeIterator> _zstar_btree_iterator;
optional<VectorIterator> _zstar_vector_iterator;
+ bool _fetch_postings_done;
};
}