diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-09-27 00:13:49 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-09-27 00:13:49 +0200 |
commit | 218491b5586e52e92c274c01a0f7d7c51dbdc39f (patch) | |
tree | 0a958f26273f33d91d238523885f2be1fc015086 /searchlib | |
parent | 886a6df86cedbc0138f0112921620bb4df5be8bc (diff) |
Factor out fallback_to_approx_num_hits() member function in
posting list search contexts.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h | 16 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp | 32 |
2 files changed, 16 insertions, 32 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index 6f148d1d5ba..1baf75d1bdf 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -32,6 +32,10 @@ protected: using FrozenDictionary = Dictionary::FrozenView; using EnumIndex = IEnumStore::Index; + static constexpr long MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION = 100; + static constexpr long MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 20; + static constexpr long MIN_APPROXHITS_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 10; + const IEnumStoreDictionary & _dictionary; const ISearchContext &_baseSearchCtx; const BitVector *_bv; // bitvector if _useBitVector has been set @@ -89,6 +93,12 @@ protected: return (numHits > 1000) && (calculateFilteringCost() < calculatePostingListCost(numHits)); } + virtual bool fallback_to_approx_num_hits() const { + return ((_uniqueValues > MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION) && + ((_uniqueValues * MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION > static_cast<int>(_docIdLimit)) || + (calculateApproxNumHits() * MIN_APPROXHITS_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION > _docIdLimit) || + (_uniqueValues > MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION*10))); + } }; @@ -110,10 +120,6 @@ protected: */ PostingListMerger<DataT> _merger; - static const long MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION = 100; - static const long MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 20; - static const long MIN_APPROXHITS_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 10; - PostingListSearchContextT(const IEnumStoreDictionary& dictionary, uint32_t docIdLimit, uint64_t numValues, bool hasWeight, const PostingList &postingList, bool useBitVector, const ISearchContext &baseSearchCtx); @@ -156,7 +162,7 @@ protected: bool hasWeight, const PostingList &postingList, bool useBitVector, const ISearchContext &baseSearchCtx); - unsigned int approximateHits() const override; + bool fallback_to_approx_num_hits() const override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index b466f8189f1..91054b02d1e 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -234,18 +234,9 @@ PostingListSearchContextT<DataT>::approximateHits() const } else { if (this->fallbackToFiltering()) { numHits = _docIdLimit; - } else if (_uniqueValues > MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION) { - if ((_uniqueValues * MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION > static_cast<int>(_docIdLimit)) || - (this->calculateApproxNumHits() * MIN_APPROXHITS_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION > _docIdLimit) || - (_uniqueValues > MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION*10)) - { - numHits = this->calculateApproxNumHits(); - } else { - // XXX: Unsafe - numHits = countHits(); - } + } else if (this->fallback_to_approx_num_hits()) { + numHits = this->calculateApproxNumHits(); } else { - // XXX: Unsafe numHits = countHits(); } } @@ -286,24 +277,11 @@ PostingListFoldedSearchContextT(const IEnumStoreDictionary& dictionary, uint32_t { } - template <typename DataT> -unsigned int -PostingListFoldedSearchContextT<DataT>::approximateHits() const +bool +PostingListFoldedSearchContextT<DataT>::fallback_to_approx_num_hits() const { - unsigned int numHits = 0; - if (_uniqueValues == 0u) { - } else if (_uniqueValues == 1u) { - numHits = singleHits(); - } else { - if (this->fallbackToFiltering()) { - numHits = _docIdLimit; - } else { - // XXX: Unsafe - numHits = countHits(); - } - } - return numHits; + return false; } } |