aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-09-27 12:52:31 +0200
committerTor Egge <Tor.Egge@online.no>2023-09-27 12:52:31 +0200
commit03574a1f04b8f8b54401135ee077a86008123573 (patch)
tree807e82f7a538f18262a20e987fb3d7fa2c71d9a6 /searchlib
parent23301859bd735adfd293248031e0959485a29874 (diff)
Avoid counting hits in range multiple times.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h56
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp78
2 files changed, 91 insertions, 43 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index 1baf75d1bdf..7e2002f95fd 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -13,6 +13,7 @@
#include <vespa/vespalib/util/regexp.h>
#include <vespa/vespalib/fuzzy/fuzzy_matcher.h>
#include <regex>
+#include <optional>
namespace search::attribute {
@@ -30,28 +31,30 @@ protected:
using Dictionary = EnumPostingTree;
using DictionaryConstIterator = Dictionary::ConstIterator;
using FrozenDictionary = Dictionary::FrozenView;
+ using EntryRef = vespalib::datastore::EntryRef;
using EnumIndex = IEnumStore::Index;
static constexpr long MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION = 100;
static constexpr long MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 20;
static constexpr long MIN_APPROXHITS_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 10;
- const IEnumStoreDictionary & _dictionary;
- const ISearchContext &_baseSearchCtx;
- const BitVector *_bv; // bitvector if _useBitVector has been set
- const FrozenDictionary _frozenDictionary;
- DictionaryConstIterator _lowerDictItr;
- DictionaryConstIterator _upperDictItr;
- uint64_t _numValues; // attr.getStatus().getNumValues();
- uint32_t _uniqueValues;
- uint32_t _docIdLimit;
- uint32_t _dictSize;
- vespalib::datastore::EntryRef _pidx;
- vespalib::datastore::EntryRef _frozenRoot; // Posting list in tree form
- float _FSTC; // Filtering Search Time Constant
- float _PLSTC; // Posting List Search Time Constant
- bool _hasWeight;
- bool _useBitVector;
+ const IEnumStoreDictionary& _dictionary;
+ const ISearchContext& _baseSearchCtx;
+ const BitVector* _bv; // bitvector if _useBitVector has been set
+ const FrozenDictionary _frozenDictionary;
+ DictionaryConstIterator _lowerDictItr;
+ DictionaryConstIterator _upperDictItr;
+ uint64_t _numValues; // attr.getStatus().getNumValues();
+ uint32_t _uniqueValues;
+ uint32_t _docIdLimit;
+ uint32_t _dictSize;
+ EntryRef _pidx;
+ EntryRef _frozenRoot; // Posting list in tree form
+ float _FSTC; // Filtering Search Time Constant
+ float _PLSTC; // Posting List Search Time Constant
+ bool _hasWeight;
+ bool _useBitVector;
+ mutable std::optional<size_t> _counted_hits; // Snapshot of size of posting lists in range
PostingListSearchContext(const IEnumStoreDictionary& dictionary, bool has_btree_dictionary, uint32_t docIdLimit,
uint64_t numValues, bool hasWeight, bool useBitVector, const ISearchContext &baseSearchCtx);
@@ -99,6 +102,9 @@ protected:
(calculateApproxNumHits() * MIN_APPROXHITS_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION > _docIdLimit) ||
(_uniqueValues > MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION*10)));
}
+ virtual size_t countHits() const = 0;
+ virtual void fillArray() = 0;
+ virtual void fillBitVector() = 0;
};
@@ -126,9 +132,9 @@ protected:
~PostingListSearchContextT() override;
void lookupSingle();
- size_t countHits() const;
- void fillArray();
- void fillBitVector();
+ size_t countHits() const override;
+ void fillArray() override;
+ void fillBitVector() override;
void fetchPostings(const queryeval::ExecuteInfo & strict) override;
// this will be called instead of the fetchPostings function in some cases
@@ -151,18 +157,24 @@ protected:
using Parent = PostingListSearchContextT<DataT>;
using Dictionary = typename Parent::Dictionary;
using PostingList = typename Parent::PostingList;
+ using Parent::_counted_hits;
+ using Parent::_docIdLimit;
using Parent::_lowerDictItr;
- using Parent::_uniqueValues;
+ using Parent::_merger;
using Parent::_postingList;
- using Parent::_docIdLimit;
- using Parent::countHits;
+ using Parent::_uniqueValues;
+ using Parent::_upperDictItr;
using Parent::singleHits;
+ using Parent::use_dictionary_entry;
PostingListFoldedSearchContextT(const IEnumStoreDictionary& dictionary, uint32_t docIdLimit, uint64_t numValues,
bool hasWeight, const PostingList &postingList,
bool useBitVector, const ISearchContext &baseSearchCtx);
bool fallback_to_approx_num_hits() const override;
+ size_t countHits() const override;
+ void fillArray() override;
+ void fillBitVector() override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
index 91054b02d1e..714da9997bb 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
@@ -58,51 +58,42 @@ PostingListSearchContextT<DataT>::lookupSingle()
}
}
-
template <typename DataT>
size_t
PostingListSearchContextT<DataT>::countHits() const
{
+ if (_counted_hits.has_value()) {
+ return _counted_hits.value();
+ }
size_t sum(0);
- for (auto it(_lowerDictItr); it != _upperDictItr;) {
- if (use_dictionary_entry(it)) {
- sum += _postingList.frozenSize(it.getData().load_acquire());
- ++it;
- }
+ for (auto it(_lowerDictItr); it != _upperDictItr; ++it) {
+ sum += _postingList.frozenSize(it.getData().load_acquire());
}
+ _counted_hits = sum;
return sum;
}
-
template <typename DataT>
void
PostingListSearchContextT<DataT>::fillArray()
{
- for (auto it(_lowerDictItr); it != _upperDictItr;) {
- if (use_dictionary_entry(it)) {
- _merger.addToArray(PostingListTraverser<PostingList>(_postingList,
- it.getData().load_acquire()));
- ++it;
- }
+ for (auto it(_lowerDictItr); it != _upperDictItr; ++it) {
+ _merger.addToArray(PostingListTraverser<PostingList>(_postingList,
+ it.getData().load_acquire()));
}
_merger.merge();
}
-
template <typename DataT>
void
PostingListSearchContextT<DataT>::fillBitVector()
{
- for (auto it(_lowerDictItr); it != _upperDictItr;) {
- if (use_dictionary_entry(it)) {
- _merger.addToBitVector(PostingListTraverser<PostingList>(_postingList,
- it.getData().load_acquire()));
- ++it;
- }
+ for (auto it(_lowerDictItr); it != _upperDictItr; ++it) {
+ _merger.addToBitVector(PostingListTraverser<PostingList>(_postingList,
+ it.getData().load_acquire()));
}
}
-
template <typename DataT>
void
PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & execInfo)
@@ -284,4 +275,49 @@ PostingListFoldedSearchContextT<DataT>::fallback_to_approx_num_hits() const
return false;
}
+template <typename DataT>
+size_t
+PostingListFoldedSearchContextT<DataT>::countHits() const
+{
+ if (_counted_hits.has_value()) {
+ return _counted_hits.value();
+ }
+ size_t sum(0);
+ for (auto it(_lowerDictItr); it != this->_upperDictItr;) {
+ if (use_dictionary_entry(it)) {
+ sum += _postingList.frozenSize(it.getData().load_acquire());
+ ++it;
+ }
+ }
+ _counted_hits = sum;
+ return sum;
+}
+
+template <typename DataT>
+void
+PostingListFoldedSearchContextT<DataT>::fillArray()
+{
+ for (auto it(_lowerDictItr); it != _upperDictItr;) {
+ if (use_dictionary_entry(it)) {
+ _merger.addToArray(PostingListTraverser<PostingList>(_postingList,
+ it.getData().load_acquire()));
+ ++it;
+ }
+ }
+ _merger.merge();
+}
+
+template <typename DataT>
+void
+PostingListFoldedSearchContextT<DataT>::fillBitVector()
+{
+ for (auto it(_lowerDictItr); it != _upperDictItr;) {
+ if (use_dictionary_entry(it)) {
+ _merger.addToBitVector(PostingListTraverser<PostingList>(_postingList,
+ it.getData().load_acquire()));
+ ++it;
+ }
+ }
+}
+
}