diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-09-26 20:13:37 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-09-27 13:19:53 +0000 |
commit | 9797c472ace1eb81496bfad66a6d04a9d8973af9 (patch) | |
tree | 3d42d94d56461364c1766cba534a5ee18e1cb604 /searchlib | |
parent | 15d233bafef8452ffb2148c922037577ff6170ed (diff) |
Split MultiBitVectorIterator into implementation and Iterator interface for reuse.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp | 209 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h | 48 |
2 files changed, 158 insertions, 99 deletions
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp index 8b8db0f293a..4eb57eda911 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp @@ -11,51 +11,13 @@ namespace search::queryeval { using vespalib::Trinary; using vespalib::hwaccelrated::IAccelrated; +using Meta = MultiBitVectorBase::Meta; namespace { -template<typename Update> -class MultiBitVectorIterator : public MultiBitVectorIteratorBase -{ -public: - explicit MultiBitVectorIterator(Children children) - : MultiBitVectorIteratorBase(std::move(children)), - _update(), - _accel(IAccelrated::getAccelerator()), - _lastWords() - { - static_assert(sizeof(_lastWords) == 64, "Lastwords should have 64 byte size"); - static_assert(NumWordsInBatch == 8, "Batch size should be 8 words."); - memset(_lastWords, 0, sizeof(_lastWords)); - } -protected: - void updateLastValue(uint32_t docId) noexcept; - void strictSeek(uint32_t docId) noexcept; -private: - void doSeek(uint32_t docId) override; - Trinary is_strict() const override { return Trinary::False; } - bool acceptExtraFilter() const noexcept final { return Update::isAnd(); } - Update _update; - const IAccelrated & _accel; - alignas(64) Word _lastWords[8]; - static constexpr size_t NumWordsInBatch = sizeof(_lastWords) / sizeof(Word); -}; - -template<typename Update> -class MultiBitVectorIteratorStrict final : public MultiBitVectorIterator<Update> -{ -public: - explicit MultiBitVectorIteratorStrict(MultiSearch::Children children) - : MultiBitVectorIterator<Update>(std::move(children)) - { } -private: - void doSeek(uint32_t docId) override { this->strictSeek(docId); } - Trinary is_strict() const override { return Trinary::True; } -}; - struct And { using Word = BitWord::Word; - void operator () (const IAccelrated & accel, size_t offset, const std::vector<std::pair<const void *, bool>> & src, void *dest) noexcept { + void operator () (const IAccelrated & accel, size_t offset, const std::vector<Meta> & src, void *dest) noexcept { accel.and64(offset, src, dest); } static bool isAnd() noexcept { return true; } @@ -63,19 +25,49 @@ struct And { struct Or { using Word = BitWord::Word; - void operator () (const IAccelrated & accel, size_t offset, const std::vector<std::pair<const void *, bool>> & src, void *dest) noexcept { + void operator () (const IAccelrated & accel, size_t offset, const std::vector<Meta> & src, void *dest) noexcept { accel.or64(offset, src, dest); } static bool isAnd() noexcept { return false; } }; +} + +MultiBitVectorBase::MultiBitVectorBase(size_t reserved) + : _numDocs(std::numeric_limits<uint32_t>::max()), + _lastMaxDocIdLimit(0), + _lastMaxDocIdLimitRequireFetch(0), + _lastValue(0), + _bvs() +{ + _bvs.reserve(reserved); +} + +void +MultiBitVectorBase::addBitVector(Meta bv, uint32_t docIdLimit) { + _numDocs = std::min(_numDocs, docIdLimit); + _bvs.push_back(bv); +} + +template <typename Update> +MultiBitVector<Update>::MultiBitVector(size_t reserved) + : MultiBitVectorBase(reserved), + _update(), + _accel(IAccelrated::getAccelerator()), + _lastWords() +{ + static_assert(sizeof(_lastWords) == 64, "Lastwords should have 64 byte size"); + static_assert(NumWordsInBatch == 8, "Batch size should be 8 words."); + memset(_lastWords, 0, sizeof(_lastWords)); +} + template<typename Update> -void MultiBitVectorIterator<Update>::updateLastValue(uint32_t docId) noexcept +bool +MultiBitVector<Update>::updateLastValue(uint32_t docId) noexcept { if (docId >= _lastMaxDocIdLimit) { if (__builtin_expect(docId >= _numDocs, false)) { - setAtEnd(); - return; + return true; } const uint32_t index(BitWord::wordNum(docId)); if (docId >= _lastMaxDocIdLimitRequireFetch) { @@ -86,37 +78,104 @@ void MultiBitVectorIterator<Update>::updateLastValue(uint32_t docId) noexcept _lastValue = _lastWords[index % NumWordsInBatch]; _lastMaxDocIdLimit = (index + 1) * BitWord::WordLen; } + return false; } template<typename Update> -void -MultiBitVectorIterator<Update>::doSeek(uint32_t docId) +uint32_t +MultiBitVector<Update>::strictSeek(uint32_t docId) noexcept +{ + bool atEnd; + for (atEnd = updateLastValue(docId), _lastValue = _lastValue & BitWord::checkTab(docId); + (_lastValue == 0) && __builtin_expect(! atEnd, true); + atEnd = updateLastValue(_lastMaxDocIdLimit)); + if (__builtin_expect(!atEnd, true)) { + return _lastMaxDocIdLimit - BitWord::WordLen + vespalib::Optimized::lsbIdx(_lastValue); + } + return _numDocs; +} + +template<typename Update> +bool +MultiBitVector<Update>::seek(uint32_t docId) noexcept { - updateLastValue(docId); - if (__builtin_expect( ! isAtEnd(), true)) { + bool atEnd = updateLastValue(docId); + if (__builtin_expect( ! atEnd, true)) { if (_lastValue & BitWord::mask(docId)) { - setDocId(docId); + return true; } } + return false; } +namespace { + template<typename Update> -void -MultiBitVectorIterator<Update>::strictSeek(uint32_t docId) noexcept +class MultiBitVectorIterator : public MultiBitVectorIteratorBase { - for (updateLastValue(docId), _lastValue = _lastValue & BitWord::checkTab(docId); - (_lastValue == 0) && __builtin_expect(! isAtEnd(), true); - updateLastValue(_lastMaxDocIdLimit)); - if (__builtin_expect(!isAtEnd(), true)) { - docId = _lastMaxDocIdLimit - BitWord::WordLen + vespalib::Optimized::lsbIdx(_lastValue); - if (__builtin_expect(docId >= _numDocs, false)) { - setAtEnd(); +public: + explicit MultiBitVectorIterator(Children children) + : MultiBitVectorIteratorBase(std::move(children)), + _mbv(getChildren().size() + 1) + { + for (const auto & child : getChildren()) { + const auto * bv = static_cast<const BitVectorIterator *>(child.get()); + _mbv.addBitVector(Meta(bv->getBitValues(), bv->isInverted()), bv->getDocIdLimit()); + } + } + void initRange(uint32_t beginId, uint32_t endId) override { + MultiBitVectorIteratorBase::initRange(beginId, endId); + _mbv.reset(); + } + UP andWith(UP filter, uint32_t estimate) override; +protected: + void doSeek(uint32_t docId) override; + Trinary is_strict() const override { return Trinary::False; } + bool acceptExtraFilter() const noexcept final { return _mbv.acceptExtraFilter(); } + MultiBitVector<Update> _mbv; +}; + +template<typename Update> +class MultiBitVectorIteratorStrict final : public MultiBitVectorIterator<Update> +{ +public: + explicit MultiBitVectorIteratorStrict(MultiSearch::Children children) + : MultiBitVectorIterator<Update>(std::move(children)) + { } +private: + void doSeek(uint32_t docId) override { + docId = this->_mbv.strictSeek(docId); + if (__builtin_expect(docId >= this->getEndId(), false)) { + this->setAtEnd(); } else { - setDocId(docId); + this->setDocId(docId); } } + Trinary is_strict() const override { return Trinary::True; } +}; + +template<typename Update> +void +MultiBitVectorIterator<Update>::doSeek(uint32_t docId) +{ + if (_mbv.seek(docId)) { + setDocId(docId); + } } +template <typename Update> +SearchIterator::UP +MultiBitVectorIterator<Update>::andWith(UP filter, uint32_t estimate) +{ + (void) estimate; + if (filter->isBitVector() && acceptExtraFilter()) { + const auto & bv = static_cast<const BitVectorIterator &>(*filter); + _mbv.addBitVector(Meta(bv.getBitValues(), bv.isInverted()), bv.getDocIdLimit()); + insert(getChildren().size(), std::move(filter)); + _mbv.reset(); + } + return filter; +} using AndBVIterator = MultiBitVectorIterator<And>; using AndBVIteratorStrict = MultiBitVectorIteratorStrict<And>; @@ -148,20 +207,8 @@ bool canOptimize(const MultiSearch & s) { } MultiBitVectorIteratorBase::MultiBitVectorIteratorBase(Children children) : - MultiSearch(std::move(children)), - _numDocs(std::numeric_limits<unsigned int>::max()), - _lastMaxDocIdLimit(0), - _lastMaxDocIdLimitRequireFetch(0), - _lastValue(0), - _bvs() -{ - _bvs.reserve(getChildren().size()); - for (const auto & child : getChildren()) { - const auto * bv = static_cast<const BitVectorIterator *>(child.get()); - _bvs.emplace_back(bv->getBitValues(), bv->isInverted()); - _numDocs = std::min(_numDocs, bv->getDocIdLimit()); - } -} + MultiSearch(std::move(children)) +{ } MultiBitVectorIteratorBase::~MultiBitVectorIteratorBase() = default; @@ -169,22 +216,6 @@ void MultiBitVectorIteratorBase::initRange(uint32_t beginId, uint32_t endId) { MultiSearch::initRange(beginId, endId); - _lastMaxDocIdLimit = 0; - _lastMaxDocIdLimitRequireFetch = 0; -} - -SearchIterator::UP -MultiBitVectorIteratorBase::andWith(UP filter, uint32_t estimate) -{ - (void) estimate; - if (filter->isBitVector() && acceptExtraFilter()) { - const auto & bv = static_cast<const BitVectorIterator &>(*filter); - _bvs.emplace_back(bv.getBitValues(), bv.isInverted()); - insert(getChildren().size(), std::move(filter)); - _lastMaxDocIdLimit = 0; // force reload - _lastMaxDocIdLimitRequireFetch = 0; - } - return filter; } void diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h index d99e439af0b..5c3d17c6786 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h @@ -6,8 +6,45 @@ #include "unpackinfo.h" #include <vespa/searchlib/common/bitword.h> +namespace vespalib::hwaccelrated { class IAccelrated; } + namespace search::queryeval { +class MultiBitVectorBase { +public: + using Meta = std::pair<const void *, bool>; + MultiBitVectorBase(size_t reserved); + using Word = BitWord::Word; + void reset() { + _lastMaxDocIdLimit = 0; + _lastMaxDocIdLimitRequireFetch = 0; + } + void addBitVector(Meta bv, uint32_t docIdLimit); +protected: + uint32_t _numDocs; + uint32_t _lastMaxDocIdLimit; // next documentid requiring recomputation. + uint32_t _lastMaxDocIdLimitRequireFetch; + Word _lastValue; // Last value computed + std::vector<Meta> _bvs; +}; + +template <typename Update> +class MultiBitVector : public MultiBitVectorBase { +public: + explicit MultiBitVector(size_t reserved); + uint32_t strictSeek(uint32_t docId) noexcept; + bool seek(uint32_t docId) noexcept; + bool acceptExtraFilter() const noexcept { return Update::isAnd(); } +private: + bool updateLastValue(uint32_t docId) noexcept; + using IAccelrated = vespalib::hwaccelrated::IAccelrated; + + Update _update; + const IAccelrated & _accel; + alignas(64) Word _lastWords[8]; + static constexpr size_t NumWordsInBatch = sizeof(_lastWords) / sizeof(Word); +}; + class MultiBitVectorIteratorBase : public MultiSearch { public: @@ -20,18 +57,9 @@ public: */ static SearchIterator::UP optimize(SearchIterator::UP parent); protected: - using Word = BitWord::Word; - MultiBitVectorIteratorBase(Children children); - using MetaWord = std::pair<const void *, bool>; - - uint32_t _numDocs; - uint32_t _lastMaxDocIdLimit; // next documentid requiring recomputation. - uint32_t _lastMaxDocIdLimitRequireFetch; - Word _lastValue; // Last value computed - std::vector<MetaWord> _bvs; + explicit MultiBitVectorIteratorBase(Children children); private: virtual bool acceptExtraFilter() const noexcept = 0; - UP andWith(UP filter, uint32_t estimate) override; void doUnpack(uint32_t docid) override; static SearchIterator::UP optimizeMultiSearch(SearchIterator::UP parent); |