diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2020-06-08 21:25:55 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2020-06-09 11:15:56 +0200 |
commit | 4cc712585ad755850f6e9af5930581bb45f5d749 (patch) | |
tree | dd635c772ffdfb5bff6e1e0932745321de1b5603 /searchlib | |
parent | 66c66aa167c2ba431943ad7287da3f20b11a05ab (diff) |
Handle fetchPostings being called multiple times.
Diffstat (limited to 'searchlib')
8 files changed, 62 insertions, 66 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp index 5f49111f77b..bc0d965bcc1 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp +++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp @@ -44,7 +44,6 @@ ImportedSearchContext::ImportedSearchContext( _target_search_context(_target_attribute.createSearchContext(std::move(term), params)), _targetLids(_reference_attribute.getTargetLids()), _merger(_reference_attribute.getCommittedDocIdLimit()), - _fetchPostingsDone(false), _params(params) { } @@ -239,15 +238,11 @@ ImportedSearchContext::considerAddSearchCacheEntry() } void ImportedSearchContext::fetchPostings(const queryeval::ExecuteInfo &execInfo) { - assert(!_fetchPostingsDone); - _fetchPostingsDone = true; if (!_searchCacheLookup) { _target_search_context->fetchPostings(execInfo); - if (execInfo.isStrict() - || (_target_attribute.getIsFastSearch() && execInfo.hitRate() > 0.01)) - { - makeMergedPostings(_target_attribute.getIsFilter()); - considerAddSearchCacheEntry(); + if (!_merger.merge_done() && (execInfo.isStrict() || (_target_attribute.getIsFastSearch() && execInfo.hitRate() > 0.01))) { + makeMergedPostings(_target_attribute.getIsFilter()); + considerAddSearchCacheEntry(); } } } diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h index 1c73ac6c8c2..4c3b6a89a14 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h +++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h @@ -37,7 +37,6 @@ class ImportedSearchContext : public ISearchContext { std::unique_ptr<ISearchContext> _target_search_context; TargetLids _targetLids; PostingListMerger<int32_t> _merger; - bool _fetchPostingsDone; SearchContextParams _params; uint32_t getTargetLid(uint32_t lid) const { diff --git a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h index 6a10ba73951..1c2e6583ad9 100644 --- a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h +++ b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h @@ -62,6 +62,8 @@ public: { if (__builtin_expect(key < limit, true)) { bv.setBit(key); } }); } + bool merge_done() const { return hasArray() || hasBitVector(); } + // Until diversity handling has been rewritten PostingVector &getWritableArray() { return _array; } StartVector &getWritableStartPos() { return _startPos; } diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index 69450acd98d..8cd7a7064f6 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -107,7 +107,6 @@ protected: * Synthetic posting lists for range search, in array or bitvector form */ PostingListMerger<DataT> _merger; - bool _fetchPostingsDone; static const long MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION = 100; static const long MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 20; diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index 4cd8db9010a..09e5a9da5bc 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -25,8 +25,7 @@ PostingListSearchContextT(const Dictionary &dictionary, uint32_t docIdLimit, uin uint32_t minBvDocFreq, bool useBitVector, const ISearchContext &searchContext) : PostingListSearchContext(dictionary, docIdLimit, numValues, hasWeight, esb, minBvDocFreq, useBitVector, searchContext), _postingList(postingList), - _merger(docIdLimit), - _fetchPostingsDone(false) + _merger(docIdLimit) { } @@ -116,22 +115,18 @@ template <typename DataT> void PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & execInfo) { - if (_fetchPostingsDone) return; - - _fetchPostingsDone = true; - - if (_uniqueValues < 2u) return; - - if (execInfo.isStrict() && !fallbackToFiltering()) { - size_t sum(countHits()); - if (sum < _docIdLimit / 64) { - _merger.reserveArray(_uniqueValues, sum); - fillArray(); - } else { - _merger.allocBitVector(); - fillBitVector(); + if (!_merger.merge_done() && _uniqueValues >= 2u) { + if (execInfo.isStrict() && !fallbackToFiltering()) { + size_t sum(countHits()); + if (sum < _docIdLimit / 64) { + _merger.reserveArray(_uniqueValues, sum); + fillArray(); + } else { + _merger.allocBitVector(); + fillBitVector(); + } + _merger.merge(); } - _merger.merge(); } } @@ -141,12 +136,12 @@ void PostingListSearchContextT<DataT>::diversify(bool forward, size_t wanted_hits, const IAttributeVector &diversity_attr, size_t max_per_group, size_t cutoff_groups, bool cutoff_strict) { - assert(!_fetchPostingsDone); - _fetchPostingsDone = true; - _merger.reserveArray(128, wanted_hits); - diversity::diversify(forward, _lowerDictItr, _upperDictItr, _postingList, wanted_hits, diversity_attr, - max_per_group, cutoff_groups, cutoff_strict, _merger.getWritableArray(), _merger.getWritableStartPos()); - _merger.merge(); + if (!_merger.merge_done()) { + _merger.reserveArray(128, wanted_hits); + diversity::diversify(forward, _lowerDictItr, _upperDictItr, _postingList, wanted_hits, diversity_attr, + max_per_group, cutoff_groups, cutoff_strict, _merger.getWritableArray(), _merger.getWritableStartPos()); + _merger.merge(); + } } @@ -155,7 +150,6 @@ SearchIterator::UP PostingListSearchContextT<DataT>:: createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) { - assert(_fetchPostingsDone); if (_uniqueValues == 0u) { return std::make_unique<EmptySearch>(); } diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp index 09024505450..8231d0b4cd7 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp @@ -70,10 +70,12 @@ void DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo) { (void) execInfo; - _hasEquivParent = areAnyParentsEquiv(getParent()); - _bitVector = _diskIndex.readBitVector(*_lookupRes); - if (!_useBitVector || !_bitVector) { - _postingHandle = _diskIndex.readPostingList(*_lookupRes); + if (!_fetchPostingsDone) { + _hasEquivParent = areAnyParentsEquiv(getParent()); + _bitVector = _diskIndex.readBitVector(*_lookupRes); + if (!_useBitVector || !_bitVector) { + _postingHandle = _diskIndex.readPostingList(*_lookupRes); + } } _fetchPostingsDone = true; } diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp index 034f57bbb36..97e7044ec0f 100644 --- a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp @@ -163,7 +163,8 @@ PredicateBlueprint::PredicateBlueprint(const FieldSpecBase &field, _bounds_btree_iterators(), _bounds_vector_iterators(), _zstar_btree_iterator(), - _zstar_vector_iterator() + _zstar_vector_iterator(), + _fetch_postings_done(false) { const auto &interval_index = _index.getIntervalIndex(); const auto zero_constraints_docs = _index.getZeroConstraintDocs(); @@ -234,36 +235,39 @@ namespace { } void PredicateBlueprint::fetchPostings(const ExecuteInfo &) { - const auto &interval_index = _index.getIntervalIndex(); - const auto &bounds_index = _index.getBoundsIndex(); - lookupPostingLists(_interval_dict_entries, _interval_vector_iterators, - _interval_btree_iterators, interval_index); - lookupPostingLists(_bounds_dict_entries, _bounds_vector_iterators, - _bounds_btree_iterators, bounds_index); - - // Lookup zstar interval iterator - if (_zstar_dict_entry.valid()) { - auto vector_iterator = interval_index.getVectorPostingList(Constants::z_star_compressed_hash); - if (vector_iterator) { - _zstar_vector_iterator.emplace(std::move(*vector_iterator)); - } else { - _zstar_btree_iterator.emplace(interval_index.getBTreePostingList(_zstar_dict_entry)); + if (!_fetch_postings_done) { + const auto &interval_index = _index.getIntervalIndex(); + const auto &bounds_index = _index.getBoundsIndex(); + lookupPostingLists(_interval_dict_entries, _interval_vector_iterators, + _interval_btree_iterators, interval_index); + lookupPostingLists(_bounds_dict_entries, _bounds_vector_iterators, + _bounds_btree_iterators, bounds_index); + + // Lookup zstar interval iterator + if (_zstar_dict_entry.valid()) { + auto vector_iterator = interval_index.getVectorPostingList(Constants::z_star_compressed_hash); + if (vector_iterator) { + _zstar_vector_iterator.emplace(std::move(*vector_iterator)); + } else { + _zstar_btree_iterator.emplace(interval_index.getBTreePostingList(_zstar_dict_entry)); + } } - } - PredicateAttribute::MinFeatureHandle mfh = predicate_attribute().getMinFeatureVector(); - Alloc kv(Alloc::alloc(mfh.second, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE*4)); - _kVBacking.swap(kv); - _kV = BitVectorCache::CountVector(static_cast<uint8_t *>(_kVBacking.get()), mfh.second); - _index.computeCountVector(_cachedFeatures, _kV); - for (const auto & entry : _bounds_dict_entries) { - addBoundsPostingToK(entry.feature); - } - for (const auto & entry : _interval_dict_entries) { - addPostingToK(entry.feature); + PredicateAttribute::MinFeatureHandle mfh = predicate_attribute().getMinFeatureVector(); + Alloc kv(Alloc::alloc(mfh.second, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE*4)); + _kVBacking.swap(kv); + _kV = BitVectorCache::CountVector(static_cast<uint8_t *>(_kVBacking.get()), mfh.second); + _index.computeCountVector(_cachedFeatures, _kV); + for (const auto & entry : _bounds_dict_entries) { + addBoundsPostingToK(entry.feature); + } + for (const auto & entry : _interval_dict_entries) { + addPostingToK(entry.feature); + } + addPostingToK(Constants::z_star_compressed_hash); + addZeroConstraintToK(); + _fetch_postings_done = true; } - addPostingToK(Constants::z_star_compressed_hash); - addZeroConstraintToK(); } SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h index c9a19a0f5bb..9609cd4f6c9 100644 --- a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h @@ -87,6 +87,7 @@ private: // The zstar iterator is either a vector or a btree iterator. optional<BTreeIterator> _zstar_btree_iterator; optional<VectorIterator> _zstar_vector_iterator; + bool _fetch_postings_done; }; } |