summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-12-13 20:58:11 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2023-12-14 07:36:17 +0000
commitff23cdd1d87eae676f04d04ded9a566e59f71bf8 (patch)
tree19a87ba5412807caf672dd3b04ca22a0d9a4dbee /searchlib
parent1ad5ec5fa814a92fdbf98db14121197023f434f0 (diff)
Amdahl's law needs due respect. By allocating bitvector and zero initializing it in the producing thread we achieve:
- Shift work from sequential path to parallell path. - Avoid filling master threads cache during bitvector creation. - Pull directly into correct workers cache. - And increase the chance the memory is allocated in a numa region close to you.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp40
1 files changed, 22 insertions, 18 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
index bb6608d7544..9dad6653c61 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
@@ -68,9 +68,12 @@ PostingListSearchContextT<DataT>::fillArray()
template <typename DataT>
struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable {
- FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count,
- BitVector & bv, uint32_t limit)
- : _posting_store(posting_store), _bv(bv),
+ FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count, uint32_t limit)
+ : FillPart(posting_store, from, count, nullptr, limit)
+ { }
+ FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count, BitVector * bv, uint32_t limit)
+ : _posting_store(posting_store),
+ _bv(bv),
_docIdLimit(limit),
_from(from),
_to(from)
@@ -78,21 +81,25 @@ struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable {
_to += count;
}
void run() override {
+ if (_bv == nullptr) {
+ _owned_bv = BitVector::create(_docIdLimit);
+ _bv = _owned_bv.get();
+ }
for (;_from != _to;++_from) {
addToBitVector(PostingListTraverser<PostingStore>(_posting_store, _from.getData().load_acquire()));
}
}
- void addToBitVector(const PostingListTraverser<PostingStore> & postingList)
- {
+ void addToBitVector(const PostingListTraverser<PostingStore> & postingList) {
postingList.foreach_key([this](uint32_t key) {
- if (__builtin_expect(key < _docIdLimit, true)) { _bv.setBit(key); }
+ if (__builtin_expect(key < _docIdLimit, true)) { _bv->setBit(key); }
});
}
- const PostingStore &_posting_store;
- BitVector &_bv;
- uint32_t _docIdLimit;
- DictionaryConstIterator _from;
- DictionaryConstIterator _to;
+ const PostingStore &_posting_store;
+ BitVector *_bv;
+ uint32_t _docIdLimit;
+ DictionaryConstIterator _from;
+ DictionaryConstIterator _to;
+ std::unique_ptr<BitVector> _owned_bv;
};
template <typename DataT>
@@ -106,18 +113,15 @@ PostingListSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_
uint32_t rest_docs = num_iter % num_threads;
std::vector<FillPart> parts;
parts.reserve(num_threads);
- BitVector & master = *_merger.getBitVector();
- std::vector<std::unique_ptr<BitVector>> scratch_bvs;
- scratch_bvs.reserve(num_threads - 1);
+ BitVector * master = _merger.getBitVector();
parts.emplace_back(_posting_store, _lowerDictItr, per_thread + (rest_docs > 0), master, _merger.getDocIdLimit());
for (size_t i(1); i < num_threads; i++) {
- scratch_bvs.push_back(BitVector::create(master.size()));
size_t num_this_thread = per_thread + (i < rest_docs);
- parts.emplace_back(_posting_store, parts[i-1]._to, num_this_thread, *scratch_bvs.back(), _merger.getDocIdLimit());
+ parts.emplace_back(_posting_store, parts[i-1]._to, num_this_thread, _merger.getDocIdLimit());
}
thread_bundle.run(parts);
- for (const auto & bv : scratch_bvs) {
- master.orWith(*bv);
+ for (size_t i(1); i < parts.size(); i++) {
+ master->orWith(*parts[i]._bv);
}
}