diff options
author | Henning Baldersheim <balder@oath.com> | 2018-05-07 15:56:58 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@oath.com> | 2018-05-08 10:40:06 +0200 |
commit | 166a0dc167b0fa87cd62d74b5b77ece472e68bce (patch) | |
tree | 850844ed8fa002032b6df1967c096d3427b4ae8f | |
parent | 0196b41f4e5c11188f020a17a4c48ecfcf638293 (diff) |
Use the prefilter if present.
7 files changed, 60 insertions, 45 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/diversity.h b/searchlib/src/vespa/searchlib/attribute/diversity.h index fe2874a65a1..3425b17cb6e 100644 --- a/searchlib/src/vespa/searchlib/attribute/diversity.h +++ b/searchlib/src/vespa/searchlib/attribute/diversity.h @@ -4,6 +4,7 @@ #include "singleenumattribute.h" #include "singlenumericattribute.h" +#include <vespa/searchlib/common/prefilter.h> #include <vespa/vespalib/stllike/hash_map.h> /** @@ -157,17 +158,25 @@ template <typename DictRange, typename PostingStore, typename Fetcher, typename void diversify_3(const DictRange &range_in, const PostingStore &posting, size_t wanted_hits, const Fetcher &diversity, size_t max_per_group, size_t cutoff_max_groups, bool cutoff_strict, - Result &result, std::vector<size_t> &fragments) + Result &result, std::vector<size_t> &fragments, const PreFilter * preFilter) { + (void) preFilter; DictRange range(range_in); using DataType = typename PostingStore::DataType; using KeyDataType = typename PostingStore::KeyDataType; DiversityFilter<Fetcher, Result> filter(diversity, max_per_group, cutoff_max_groups, cutoff_strict, result, wanted_hits); while (range.has_next() && (result.size() < wanted_hits)) { typename DictRange::Next dict_entry(range); - posting.foreach_frozen(dict_entry.get().getData(), - [&](uint32_t key, const DataType &data) - { filter.push_back(KeyDataType(key, data)); }); + if (preFilter) { + posting.foreach_frozen(dict_entry.get().getData(), + [&](uint32_t key, const DataType &data) + { if (preFilter->keep(key)) { filter.push_back(KeyDataType(key, data)); }}); + } else { + posting.foreach_frozen(dict_entry.get().getData(), + [&](uint32_t key, const DataType &data) + { filter.push_back(KeyDataType(key, data)); }); + } + if (fragments.back() < result.size()) { fragments.push_back(result.size()); } @@ -178,34 +187,34 @@ template <typename DictRange, typename PostingStore, typename Result> void diversify_2(const DictRange &range_in, const PostingStore &posting, size_t wanted_hits, const IAttributeVector &diversity_attr, size_t max_per_group, size_t cutoff_max_groups, bool cutoff_strict, - Result &result, std::vector<size_t> &fragments) + Result &result, std::vector<size_t> &fragments, const PreFilter * filter) { if (diversity_attr.hasEnum()) { // must handle enum first FetchEnumFast fastEnum(diversity_attr); if (fastEnum.valid()) { - diversify_3(range_in, posting, wanted_hits, fastEnum, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments); + diversify_3(range_in, posting, wanted_hits, fastEnum, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter); } else { - diversify_3(range_in, posting, wanted_hits, FetchEnum(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments); + diversify_3(range_in, posting, wanted_hits, FetchEnum(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter); } } else if (diversity_attr.isIntegerType()) { FetchNumberFast<SingleValueNumericAttribute<IntegerAttributeTemplate<int32_t> > > fastInt32(diversity_attr); FetchNumberFast<SingleValueNumericAttribute<IntegerAttributeTemplate<int64_t> > > fastInt64(diversity_attr); if (fastInt32.valid()) { - diversify_3(range_in, posting, wanted_hits, fastInt32, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments); + diversify_3(range_in, posting, wanted_hits, fastInt32, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter); } else if (fastInt64.valid()) { - diversify_3(range_in, posting, wanted_hits, fastInt64, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments); + diversify_3(range_in, posting, wanted_hits, fastInt64, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter); } else { - diversify_3(range_in, posting, wanted_hits, FetchInteger(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments); + diversify_3(range_in, posting, wanted_hits, FetchInteger(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter); } } else if (diversity_attr.isFloatingPointType()) { FetchNumberFast<SingleValueNumericAttribute<FloatingPointAttributeTemplate<float> > > fastFloat(diversity_attr); FetchNumberFast<SingleValueNumericAttribute<FloatingPointAttributeTemplate<double> > > fastDouble(diversity_attr); if (fastFloat.valid()) { - diversify_3(range_in, posting, wanted_hits, fastFloat, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments); + diversify_3(range_in, posting, wanted_hits, fastFloat, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter); } else if (fastDouble.valid()) { - diversify_3(range_in, posting, wanted_hits, fastDouble, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments); + diversify_3(range_in, posting, wanted_hits, fastDouble, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter); } else { - diversify_3(range_in, posting, wanted_hits, FetchFloat(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments); + diversify_3(range_in, posting, wanted_hits, FetchFloat(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter); } } } @@ -214,14 +223,14 @@ template <typename DictItr, typename PostingStore, typename Result> void diversify(bool forward, const DictItr &lower, const DictItr &upper, const PostingStore &posting, size_t wanted_hits, const IAttributeVector &diversity_attr, size_t max_per_group, size_t cutoff_max_groups, bool cutoff_strict, - Result &array, std::vector<size_t> &fragments) + Result &array, std::vector<size_t> &fragments, const PreFilter * filter) { if (forward) { diversify_2(ForwardRange<DictItr>(lower, upper), posting, wanted_hits, - diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments); + diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments, filter); } else { diversify_2(ReverseRange<DictItr>(lower, upper), posting, wanted_hits, - diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments); + diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments, filter); } } diff --git a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.cpp b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.cpp index d022e806b91..855ea77684e 100644 --- a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.cpp +++ b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.cpp @@ -9,15 +9,14 @@ PostingListMerger<DataT>::PostingListMerger(uint32_t docIdLimit) : _array(), _startPos(), _bitVector(), + _preFilter(nullptr), _docIdLimit(docIdLimit), _arrayValid(false) { } template <typename DataT> -PostingListMerger<DataT>::~PostingListMerger() -{ -} +PostingListMerger<DataT>::~PostingListMerger() = default; template <typename DataT> void diff --git a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h index 8568661dfdd..b2fb1126b32 100644 --- a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h +++ b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h @@ -4,6 +4,7 @@ #include <vespa/searchlib/btree/btree_key_data.h> #include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/common/prefilter.h> #include <vespa/vespalib/util/arrayref.h> namespace search::attribute { @@ -19,11 +20,12 @@ class PostingListMerger using PostingVector = std::vector<Posting>; using StartVector = std::vector<size_t>; - PostingVector _array; - StartVector _startPos; + PostingVector _array; + StartVector _startPos; std::shared_ptr<BitVector> _bitVector; - uint32_t _docIdLimit; - bool _arrayValid; + const search::PreFilter *_preFilter; + uint32_t _docIdLimit; + bool _arrayValid; PostingVector &merge(PostingVector &v, PostingVector &temp, const StartVector &startPos) __attribute__((noinline)); public: @@ -31,6 +33,7 @@ public: ~PostingListMerger(); + void setPreFilter(const search::PreFilter *filter) { _preFilter = filter; } void reserveArray(uint32_t postingsCount, size_t postingsSize); void allocBitVector(); void merge(); @@ -46,8 +49,13 @@ public: void addToArray(const PostingListType & postingList) { PostingVector &array = _array; - postingList.foreach([&array](uint32_t key, const DataT &data) - { array.emplace_back(key, data); }); + if (_preFilter) { + postingList.foreach([&array, filter=_preFilter](uint32_t key, const DataT &data) + { if (filter->keep(key)) { array.emplace_back(key, data); }} ); + } else { + postingList.foreach([&array](uint32_t key, const DataT &data) + { array.emplace_back(key, data); }); + } if (_startPos.back() < array.size()) { _startPos.push_back(array.size()); } @@ -58,8 +66,13 @@ public: { BitVector &bv = *_bitVector; uint32_t limit = _docIdLimit; - postingList.foreach_key([&bv, limit](uint32_t key) - { if (__builtin_expect(key < limit, true)) { bv.setBit(key); } }); + if (_preFilter) { + postingList.foreach_key([&bv, limit, filter=_preFilter](uint32_t key) + { if (filter->keep(key) && __builtin_expect(key < limit, true)) { bv.setBit(key); } }); + } else { + postingList.foreach_key([&bv, limit](uint32_t key) + { if (__builtin_expect(key < limit, true)) { bv.setBit(key); } }); + } } // Until diversity handling has been rewritten diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp index f9a638a6090..c71470fc98c 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp @@ -6,11 +6,7 @@ #include "diversity.hpp" #include <vespa/searchlib/btree/btreeiterator.hpp> - - -namespace search { - -namespace attribute { +namespace search::attribute { using btree::BTreeNode; @@ -85,7 +81,4 @@ template class PostingListSearchContextT<int32_t>; template class PostingListFoldedSearchContextT<btree::BTreeNoLeafData>; template class PostingListFoldedSearchContextT<int32_t>; - -} // namespace attribute - -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index f8349b6ad36..f47e3217743 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -121,7 +121,8 @@ protected: void fetchPostings(bool strict, const PreFilter * filter) override; // this will be called instead of the fetchPostings function in some cases - void diversify(bool forward, size_t wanted_hits, const IAttributeVector &diversity_attr, + void diversify(bool forward, size_t wanted_hits, const search::PreFilter * filter, + const IAttributeVector &diversity_attr, size_t max_per_group, size_t cutoff_groups, bool cutoff_strict); std::unique_ptr<queryeval::SearchIterator> @@ -228,7 +229,7 @@ private: if (params().diversityAttribute() != nullptr) { bool forward = (this->getRangeLimit() > 0); size_t wanted_hits = std::abs(this->getRangeLimit()); - PostingListSearchContextT<DataT>::diversify(forward, wanted_hits, + PostingListSearchContextT<DataT>::diversify(forward, wanted_hits, filter, *(params().diversityAttribute()), this->getMaxPerGroup(), params().diversityCutoffGroups(), params().diversityCutoffStrict()); } else { diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index 92dda8174fc..3d7ece0211b 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -121,6 +121,7 @@ PostingListSearchContextT<DataT>::fetchPostings(bool strict, const PreFilter * f } if (strict && !fallbackToFiltering()) { size_t sum(countHits()); + _merger.setPreFilter(filter); if (sum < _docIdLimit / 64) { _merger.reserveArray(_uniqueValues, sum); fillArray(); @@ -135,14 +136,16 @@ PostingListSearchContextT<DataT>::fetchPostings(bool strict, const PreFilter * f template <typename DataT> void -PostingListSearchContextT<DataT>::diversify(bool forward, size_t wanted_hits, const IAttributeVector &diversity_attr, +PostingListSearchContextT<DataT>::diversify(bool forward, size_t wanted_hits, const search::PreFilter * filter, + const IAttributeVector &diversity_attr, size_t max_per_group, size_t cutoff_groups, bool cutoff_strict) { assert(!_fetchPostingsDone); _fetchPostingsDone = true; _merger.reserveArray(128, wanted_hits); diversity::diversify(forward, _lowerDictItr, _upperDictItr, _postingList, wanted_hits, diversity_attr, - max_per_group, cutoff_groups, cutoff_strict, _merger.getWritableArray(), _merger.getWritableStartPos()); + max_per_group, cutoff_groups, cutoff_strict, + _merger.getWritableArray(), _merger.getWritableStartPos(), filter); _merger.merge(); } diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp index 741121aebab..849359dece9 100644 --- a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp +++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp @@ -7,8 +7,7 @@ #include "btreenode.hpp" #include <vespa/vespalib/stllike/asciistream.h> -namespace search { -namespace btree { +namespace search::btree { #define STRICT_BTREE_ITERATOR_SEEK @@ -1375,6 +1374,4 @@ BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::adjustGivenEntriesToRightL } } -} // namespace search::btree -} // namespace search - +} |