diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-12 14:49:54 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-12 15:37:14 +0000 |
commit | 790d5c8961f7bc8037d9de31dcd80e03922bc1da (patch) | |
tree | ee0ff8e4e7d4ec2dfa278d0edc8791bfcc2d8832 | |
parent | 2c1c8038e9a701ba115c365e56f5d19a458df263 (diff) |
Use provided thread bundle to compute effective range in parallel.
5 files changed, 85 insertions, 18 deletions
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp index c701d5ac19f..6e334fffa75 100644 --- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp @@ -22,6 +22,7 @@ #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/util/compress.h> +#include <vespa/vespalib/util/simple_thread_bundle.h> #include <vespa/vespalib/util/stringfmt.h> #include <initializer_list> #include <set> @@ -150,9 +151,13 @@ private: ResultSetPtr performSearch(SearchIterator & sb, uint32_t numDocs); template <typename V, typename T> - ResultSetPtr performSearch(const V & vec, const T & term, TermType termType=TermType::WORD); + ResultSetPtr performSearch(const V & vec, const T & term); + template <typename V, typename T> + ResultSetPtr performSearch(const queryeval::ExecuteInfo & executeInfo, const V & vec, const T & term, TermType termType); + template <typename V> + void performSearch(const V & vec, const vespalib::string & term, const DocSet & expected, TermType termType); template <typename V> - void performSearch(const V & vec, const vespalib::string & term, + void performSearch(const queryeval::ExecuteInfo & executeInfo, const V & vec, const vespalib::string & term, const DocSet & expected, TermType termType); void checkResultSet(const ResultSet & rs, const DocSet & exp, bool bitVector); @@ -461,11 +466,18 @@ SearchContextTest::performSearch(SearchIterator & sb, uint32_t numDocs) template <typename V, typename T> ResultSetPtr -SearchContextTest::performSearch(const V & vec, const T & term, TermType termType) +SearchContextTest::performSearch(const V & vec, const T & term) +{ + return performSearch(search::queryeval::ExecuteInfo::TRUE, vec, term, TermType::WORD); +} + +template <typename V, typename T> +ResultSetPtr +SearchContextTest::performSearch(const queryeval::ExecuteInfo & executeInfo, const V & vec, const T & term, TermType termType) { TermFieldMatchData dummy; SearchContextPtr sc = getSearch(vec, term, termType); - sc->fetchPostings(queryeval::ExecuteInfo::TRUE); + sc->fetchPostings(executeInfo); SearchBasePtr sb = sc->createIterator(&dummy, true); ResultSetPtr rs = performSearch(*sb, vec.getNumDocs()); return rs; @@ -473,7 +485,7 @@ SearchContextTest::performSearch(const V & vec, const T & term, TermType termTyp template <typename V> void -SearchContextTest::performSearch(const V & vec, const vespalib::string & term, +SearchContextTest::performSearch(const queryeval::ExecuteInfo & executeInfo, const V & vec, const vespalib::string & term, const DocSet & expected, TermType termType) { #if 0 @@ -482,10 +494,17 @@ SearchContextTest::performSearch(const V & vec, const vespalib::string & term, std::cout << "}, prefix(" << (prefix ? "true" : "false") << ")" << std::endl; #endif { // strict search iterator - ResultSetPtr rs = performSearch(vec, term, termType); + ResultSetPtr rs = performSearch(executeInfo, vec, term, termType); checkResultSet(*rs, expected, false); } } +template <typename V> +void +SearchContextTest::performSearch(const V & vec, const vespalib::string & term, + const DocSet & expected, TermType termType) +{ + performSearch(search::queryeval::ExecuteInfo::TRUE, vec, term, expected, termType); +} void SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected, bool bitVector) @@ -1090,10 +1109,13 @@ SearchContextTest::testSearchIteratorUnpacking() template <typename VectorType> void -SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::string & term, - const DocSet & expected) +SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::string & term, const DocSet & expected) { - performSearch(vec, term, expected, TermType::WORD); + for (size_t num_threads : {1,3}) { + vespalib::SimpleThreadBundle thread_bundle(num_threads); + auto executeInfo = search::queryeval::ExecuteInfo::create(true, 1.0, nullptr, thread_bundle, true, true); + performSearch(executeInfo, vec, term, expected, TermType::WORD); + } } template <typename VectorType, typename ValueType> diff --git a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h index 8e2bbb46cb2..2943fdaecc6 100644 --- a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h +++ b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h @@ -39,6 +39,7 @@ public: bool emptyArray() const noexcept { return _array.empty(); } vespalib::ConstArrayRef<Posting> getArray() const noexcept { return _array; } const BitVector *getBitVector() const noexcept { return _bitVector.get(); } + BitVector *getBitVector() noexcept { return _bitVector.get(); } const std::shared_ptr<BitVector> &getBitVectorSP() const noexcept { return _bitVector; } uint32_t getDocIdLimit() const noexcept { return _docIdLimit; } diff --git a/searchlib/src/vespa/searchlib/attribute/posting_list_traverser.h b/searchlib/src/vespa/searchlib/attribute/posting_list_traverser.h index bc792b68a88..18b80e10432 100644 --- a/searchlib/src/vespa/searchlib/attribute/posting_list_traverser.h +++ b/searchlib/src/vespa/searchlib/attribute/posting_list_traverser.h @@ -19,7 +19,7 @@ public: _pidx(pidx) { } - ~PostingListTraverser() { } + ~PostingListTraverser() = default; template <typename Func> void diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index a291318e837..04181f6f479 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -73,7 +73,7 @@ protected: */ virtual size_t calc_estimated_hits_in_range() const = 0; virtual void fillArray() = 0; - virtual void fillBitVector() = 0; + virtual void fillBitVector(vespalib::ThreadBundle & thread_bundle) = 0; }; @@ -102,7 +102,7 @@ protected: void lookupSingle(); void fillArray() override; - void fillBitVector() override; + void fillBitVector(vespalib::ThreadBundle & thread_bundle) override; void fetchPostings(const queryeval::ExecuteInfo & strict) override; // this will be called instead of the fetchPostings function in some cases @@ -115,6 +115,7 @@ protected: unsigned int singleHits() const; unsigned int approximateHits() const override; void applyRangeLimit(long rangeLimit); + struct FillPart; }; @@ -153,7 +154,7 @@ protected: template <bool fill_array> void fill_array_or_bitvector(); void fillArray() override; - void fillBitVector() override; + void fillBitVector(vespalib::ThreadBundle & thread_bundle) override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index ecdfad433ee..4f24c2788f3 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -68,11 +68,53 @@ PostingListSearchContextT<DataT>::fillArray() } template <typename DataT> +struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable { + FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count, + BitVector & bv, uint32_t limit) + : _posting_store(posting_store), _bv(bv), + _docIdLimit(limit), + _from(from), + _to(from) + { + _to += count; + } + void run() override { + for (;_from != _to;++_from) { + addToBitVector(PostingListTraverser<PostingStore>(_posting_store, _from.getData().load_acquire())); + } + } + void addToBitVector(const PostingListTraverser<PostingStore> & postingList) + { + postingList.foreach_key([this](uint32_t key) { + if (__builtin_expect(key < _docIdLimit, true)) { _bv.setBit(key); } + }); + } + const PostingStore &_posting_store; + BitVector &_bv; + uint32_t _docIdLimit; + DictionaryConstIterator _from; + DictionaryConstIterator _to; +}; + +template <typename DataT> void -PostingListSearchContextT<DataT>::fillBitVector() +PostingListSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_bundle) { - for (auto it(_lowerDictItr); it != _upperDictItr; ++it) { - _merger.addToBitVector(PostingListTraverser<PostingStore>(_posting_store, it.getData().load_acquire())); + size_t num_iter = _upperDictItr - _lowerDictItr; + size_t num_per_thread = (num_iter + (thread_bundle.size() - 1))/ thread_bundle.size(); + std::vector<FillPart> parts; + parts.reserve(thread_bundle.size()); + BitVector & master = *_merger.getBitVector(); + std::vector<std::unique_ptr<BitVector>> scratch_bvs; + parts.emplace_back(_posting_store, _lowerDictItr, num_per_thread, master, _merger.getDocIdLimit()); + for (size_t i(1); i < thread_bundle.size(); i++) { + scratch_bvs.push_back(BitVector::create(master.size())); + size_t num_this_thread = std::min(num_per_thread, num_iter - num_per_thread*i); + parts.emplace_back(_posting_store, parts[i-1]._to, num_this_thread, *scratch_bvs.back(), _merger.getDocIdLimit()); + } + thread_bundle.run(parts); + for (const auto & bv : scratch_bvs) { + master.orWith(*bv); } } @@ -116,7 +158,7 @@ PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & e fillArray(); } else { _merger.allocBitVector(); - fillBitVector(); + fillBitVector(execInfo.thread_bundle()); } _merger.merge(); } @@ -367,8 +409,9 @@ PostingListFoldedSearchContextT<DataT>::fillArray() template <typename DataT> void -PostingListFoldedSearchContextT<DataT>::fillBitVector() +PostingListFoldedSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_bundle) { + (void) thread_bundle; fill_array_or_bitvector<false>(); } |