diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-13 20:58:11 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-14 07:36:17 +0000 |
commit | ff23cdd1d87eae676f04d04ded9a566e59f71bf8 (patch) | |
tree | 19a87ba5412807caf672dd3b04ca22a0d9a4dbee /searchlib | |
parent | 1ad5ec5fa814a92fdbf98db14121197023f434f0 (diff) |
Amdahl's law needs due respect. By allocating bitvector and zero initializing it in the producing thread we achieve:
- Shift work from sequential path to parallell path.
- Avoid filling master threads cache during bitvector creation.
- Pull directly into correct workers cache.
- And increase the chance the memory is allocated in a numa region close to you.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp | 40 |
1 files changed, 22 insertions, 18 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index bb6608d7544..9dad6653c61 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -68,9 +68,12 @@ PostingListSearchContextT<DataT>::fillArray() template <typename DataT> struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable { - FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count, - BitVector & bv, uint32_t limit) - : _posting_store(posting_store), _bv(bv), + FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count, uint32_t limit) + : FillPart(posting_store, from, count, nullptr, limit) + { } + FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count, BitVector * bv, uint32_t limit) + : _posting_store(posting_store), + _bv(bv), _docIdLimit(limit), _from(from), _to(from) @@ -78,21 +81,25 @@ struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable { _to += count; } void run() override { + if (_bv == nullptr) { + _owned_bv = BitVector::create(_docIdLimit); + _bv = _owned_bv.get(); + } for (;_from != _to;++_from) { addToBitVector(PostingListTraverser<PostingStore>(_posting_store, _from.getData().load_acquire())); } } - void addToBitVector(const PostingListTraverser<PostingStore> & postingList) - { + void addToBitVector(const PostingListTraverser<PostingStore> & postingList) { postingList.foreach_key([this](uint32_t key) { - if (__builtin_expect(key < _docIdLimit, true)) { _bv.setBit(key); } + if (__builtin_expect(key < _docIdLimit, true)) { _bv->setBit(key); } }); } - const PostingStore &_posting_store; - BitVector &_bv; - uint32_t _docIdLimit; - DictionaryConstIterator _from; - DictionaryConstIterator _to; + const PostingStore &_posting_store; + BitVector *_bv; + uint32_t _docIdLimit; + DictionaryConstIterator _from; + DictionaryConstIterator _to; + std::unique_ptr<BitVector> _owned_bv; }; template <typename DataT> @@ -106,18 +113,15 @@ PostingListSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_ uint32_t rest_docs = num_iter % num_threads; std::vector<FillPart> parts; parts.reserve(num_threads); - BitVector & master = *_merger.getBitVector(); - std::vector<std::unique_ptr<BitVector>> scratch_bvs; - scratch_bvs.reserve(num_threads - 1); + BitVector * master = _merger.getBitVector(); parts.emplace_back(_posting_store, _lowerDictItr, per_thread + (rest_docs > 0), master, _merger.getDocIdLimit()); for (size_t i(1); i < num_threads; i++) { - scratch_bvs.push_back(BitVector::create(master.size())); size_t num_this_thread = per_thread + (i < rest_docs); - parts.emplace_back(_posting_store, parts[i-1]._to, num_this_thread, *scratch_bvs.back(), _merger.getDocIdLimit()); + parts.emplace_back(_posting_store, parts[i-1]._to, num_this_thread, _merger.getDocIdLimit()); } thread_bundle.run(parts); - for (const auto & bv : scratch_bvs) { - master.orWith(*bv); + for (size_t i(1); i < parts.size(); i++) { + master->orWith(*parts[i]._bv); } } |