diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2018-12-18 20:07:16 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2018-12-18 20:07:16 +0000 |
commit | f3c9e8af1ef5086057f929342528ef4f793ecf31 (patch) | |
tree | 01ef37f7eeebe23e3ac0e144207df4d7a98da4ca /searchlib/src | |
parent | 90c861d4818ba4e8cf91f624656a24064e4995eb (diff) |
Add inverted BitvectorIterator
Diffstat (limited to 'searchlib/src')
5 files changed, 148 insertions, 83 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index a97e817b28f..10bf5ab7dc2 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -1218,7 +1218,7 @@ TEST("require that children does not optimize when parents refuse them to") { EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); { const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); - EXPECT_EQUAL("search::BitVectorIteratorStrict", e.getChildren()[0]->getClassName()); + EXPECT_EQUAL("search::BitVectorIteratorStrictT<false>", e.getChildren()[0]->getClassName()); EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[1]->getClassName()); EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[2]->getClassName()); } @@ -1228,7 +1228,7 @@ TEST("require that children does not optimize when parents refuse them to") { EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); { const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); - EXPECT_EQUAL("search::BitVectorIteratorStrict", e.getChildren()[0]->getClassName()); + EXPECT_EQUAL("search::BitVectorIteratorStrictT<false>", e.getChildren()[0]->getClassName()); EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[1]->getClassName()); EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[2]->getClassName()); } diff --git a/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp b/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp index 07f9a245617..6471b0f5c33 100644 --- a/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp @@ -10,6 +10,7 @@ namespace search { using fef::TermFieldMatchDataArray; using fef::TermFieldMatchData; +using vespalib::Trinary; BitVectorIterator::BitVectorIterator(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) : _docIdLimit(std::min(docIdLimit, bv.size())), @@ -30,16 +31,6 @@ BitVectorIterator::initRange(uint32_t begin, uint32_t end) } void -BitVectorIterator::doSeek(uint32_t docId) -{ - if (__builtin_expect(docId >= _docIdLimit, false)) { - setAtEnd(); - } else if (_bv.testBit(docId)) { - setDocId(docId); - } -} - -void BitVectorIterator::visitMembers(vespalib::ObjectVisitor &visitor) const { SearchIterator::visitMembers(visitor); @@ -54,83 +45,157 @@ BitVectorIterator::doUnpack(uint32_t docId) _tfmd.resetOnlyDocId(docId); } -class BitVectorIteratorStrict : public BitVectorIterator +template<bool inverse> +class BitVectorIteratorT : public BitVectorIterator { +public: + BitVectorIteratorT(const BitVector &other, uint32_t docIdLimit, fef::TermFieldMatchData &matchData); + + void doSeek(uint32_t docId) override; + BitVector::UP get_hits(uint32_t begin_id) override; + void or_hits_into(BitVector &result, uint32_t begin_id) override; + void and_hits_into(BitVector &result, uint32_t begin_id) override; + bool isInverted() const override { return inverse; } +private: + bool isSet(uint32_t docId) const { return inverse == ! _bv.testBit(docId); } +}; + +template<bool inverse> +BitVectorIteratorT<inverse>::BitVectorIteratorT(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) : + BitVectorIterator(bv, docIdLimit, matchData) +{ +} + +template<bool inverse> +void +BitVectorIteratorT<inverse>::doSeek(uint32_t docId) +{ + if (__builtin_expect(docId >= _docIdLimit, false)) { + setAtEnd(); + } else if (isSet(docId)) { + setDocId(docId); + } +} + +template<bool inverse> +class BitVectorIteratorStrictT : public BitVectorIteratorT<inverse> { public: - BitVectorIteratorStrict(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData); + BitVectorIteratorStrictT(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData); private: void initRange(uint32_t begin, uint32_t end) override; void doSeek(uint32_t docId) override; Trinary is_strict() const override { return Trinary::True; } + uint32_t getNextBit(uint32_t docId) const { + return inverse ? ! this->_bv.getNextFalseBit(docId) : this->_bv.getNextTrueBit(docId); + } + uint32_t getFirstBit(uint32_t docId) const { + return inverse ? ! this->_bv.getFirstFalseBit(docId) : this->_bv.getFirstTrueBit(docId); + } }; -BitVectorIteratorStrict::BitVectorIteratorStrict(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) : - BitVectorIterator(bv, docIdLimit, matchData) +template<bool inverse> +BitVectorIteratorStrictT<inverse>::BitVectorIteratorStrictT(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) : + BitVectorIteratorT<inverse>(bv, docIdLimit, matchData) { } +template<bool inverse> void -BitVectorIteratorStrict::doSeek(uint32_t docId) +BitVectorIteratorStrictT<inverse>::doSeek(uint32_t docId) { - if (__builtin_expect(docId >= _docIdLimit, false)) { - setAtEnd(); + if (__builtin_expect(docId >= this->_docIdLimit, false)) { + this->setAtEnd(); return; } - docId = _bv.getNextTrueBit(docId); - if (__builtin_expect(docId >= _docIdLimit, false)) { - setAtEnd(); + docId = getNextBit(docId); + if (__builtin_expect(docId >= this->_docIdLimit, false)) { + this->setAtEnd(); } else { - setDocId(docId); + this->setDocId(docId); } } +template<bool inverse> void -BitVectorIteratorStrict::initRange(uint32_t begin, uint32_t end) +BitVectorIteratorStrictT<inverse>::initRange(uint32_t begin, uint32_t end) { BitVectorIterator::initRange(begin, end); - if (!isAtEnd()) { - uint32_t docId = _bv.getFirstTrueBit(begin); - if (docId >= _docIdLimit) { - setAtEnd(); + if (!this->isAtEnd()) { + uint32_t docId = getFirstBit(begin); + if (docId >= this->_docIdLimit) { + this->setAtEnd(); } else { - setDocId(docId); + this->setDocId(docId); } } } -queryeval::SearchIterator::UP BitVectorIterator::create(const BitVector *const bv, const TermFieldMatchDataArray &matchData, bool strict) +queryeval::SearchIterator::UP +BitVectorIterator::create(const BitVector *const bv, const TermFieldMatchDataArray &matchData, bool strict) { assert(matchData.size() == 1); return create(bv, bv->size(), *matchData[0], strict); } -queryeval::SearchIterator::UP BitVectorIterator::create(const BitVector *const bv, uint32_t docIdLimit, TermFieldMatchData &matchData, bool strict) + +queryeval::SearchIterator::UP +BitVectorIterator::create(const BitVector *const bv, uint32_t docIdLimit, TermFieldMatchData &matchData, bool strict) { - if (bv == NULL) { - return UP(new queryeval::EmptySearch()); + if (bv == nullptr) { + return std::make_unique<queryeval::EmptySearch>(); } else if (strict) { - return UP(new BitVectorIteratorStrict(*bv, docIdLimit, matchData)); + return std::make_unique<BitVectorIteratorStrictT<false>>(*bv, docIdLimit, matchData); } else { - return UP(new BitVectorIterator(*bv, docIdLimit, matchData)); + return std::make_unique<BitVectorIteratorT<false>>(*bv, docIdLimit, matchData); } } -BitVector::UP BitVectorIterator::get_hits(uint32_t begin_id) { +queryeval::SearchIterator::UP +BitVectorIterator::createInverse(const BitVector *const bv, uint32_t docIdLimit, TermFieldMatchData &matchData, bool strict) +{ + if (bv == nullptr) { + return std::make_unique<queryeval::EmptySearch>(); + } else if (strict) { + return std::make_unique<BitVectorIteratorStrictT<true>>(*bv, docIdLimit, matchData); + } else { + return std::make_unique<BitVectorIteratorT<true>>(*bv, docIdLimit, matchData); + } +} + + +template<bool inverse> +BitVector::UP +BitVectorIteratorT<inverse>::get_hits(uint32_t begin_id) { BitVector::UP result = BitVector::create(_bv, begin_id, getEndId()); + if (inverse) { + result->notSelf(); + } if (begin_id < getDocId()) { result->clearInterval(begin_id, getDocId()); } return result; } -void BitVectorIterator::or_hits_into(BitVector &result, uint32_t begin_id) { - (void) begin_id; - result.orWith(_bv); +template<bool inverse> +void +BitVectorIteratorT<inverse>::or_hits_into(BitVector &result, uint32_t) { + if (inverse) { + result.notSelf(); + result.andWith(_bv); + result.notSelf(); + } else { + result.orWith(_bv); + } } -void BitVectorIterator::and_hits_into(BitVector &result, uint32_t begin_id) { - (void) begin_id; - result.andWith(_bv); +template<bool inverse> +void +BitVectorIteratorT<inverse>::and_hits_into(BitVector &result, uint32_t) { + if (inverse) { + result.andNotWith(_bv); + } else { + result.andWith(_bv); + } } } // namespace search diff --git a/searchlib/src/vespa/searchlib/common/bitvectoriterator.h b/searchlib/src/vespa/searchlib/common/bitvectoriterator.h index f21a983b3a3..ee899d708f6 100644 --- a/searchlib/src/vespa/searchlib/common/bitvectoriterator.h +++ b/searchlib/src/vespa/searchlib/common/bitvectoriterator.h @@ -13,28 +13,28 @@ namespace fef { class TermFieldMatchData; } class BitVectorIterator : public queryeval::SearchIterator { protected: - BitVectorIterator(const BitVector & other, uint32_t docIdLimit, fef::TermFieldMatchData &matchData); + BitVectorIterator(const BitVector & bv, uint32_t docIdLimit, fef::TermFieldMatchData &matchData); void initRange(uint32_t begin, uint32_t end) override; uint32_t _docIdLimit; const BitVector & _bv; private: void visitMembers(vespalib::ObjectVisitor &visitor) const override; - void doSeek(uint32_t docId) override; void doUnpack(uint32_t docId) override; - BitVector::UP get_hits(uint32_t begin_id) override; - void or_hits_into(BitVector &result, uint32_t begin_id) override; - void and_hits_into(BitVector &result, uint32_t begin_id) override; bool isBitVector() const override { return true; } fef::TermFieldMatchData &_tfmd; public: - const void * getBitValues() const { return _bv.getStart(); } + virtual bool isInverted() const = 0; + const void *getBitValues() const { return _bv.getStart(); } Trinary is_strict() const override { return Trinary::False; } virtual bool isStrict() const { return (is_strict() == Trinary::True); } uint32_t getDocIdLimit() const { return _docIdLimit; } static UP create(const BitVector *const other, const fef::TermFieldMatchDataArray &matchData, bool strict); - static UP create(const BitVector *const other, uint32_t docIdLimit, fef::TermFieldMatchData &matchData, bool strict); + static UP create(const BitVector *const other, uint32_t docIdLimit, + fef::TermFieldMatchData &matchData, bool strict); + static UP createInverse(const BitVector *const other, uint32_t docIdLimit, + fef::TermFieldMatchData &matchData, bool strict); }; } // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp index 1495b446bd7..4254ee70b05 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp @@ -11,8 +11,7 @@ #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/vespalib/util/optimized.h> -namespace search { -namespace queryeval { +namespace search::queryeval { namespace { @@ -111,8 +110,8 @@ typedef MultiBitVectorIteratorStrict<Or> OrBVIteratorStrict; bool hasAtLeast2Bitvectors(const MultiSearch::Children & children) { size_t count(0); - for (auto it(children.begin()); it != children.end(); it++) { - if ((*it)->isBitVector()) { + for (const SearchIterator * search : children) { + if (search->isBitVector()) { count++; } } @@ -137,26 +136,25 @@ MultiBitVectorIteratorBase::MultiBitVectorIteratorBase(const Children & children _numDocs(std::numeric_limits<unsigned int>::max()), _lastValue(0), _lastMaxDocIdLimit(0), - _bvs(children.size()) + _bvs() { + _bvs.reserve(children.size()); for (size_t i(0); i < children.size(); i++) { - const BitVectorIterator * bv = static_cast<const BitVectorIterator *>(children[i]); - _bvs[i] = reinterpret_cast<const Word *>(bv->getBitValues()); + const auto * bv = static_cast<const BitVectorIterator *>(children[i]); + _bvs.emplace_back(reinterpret_cast<const Word *>(bv->getBitValues()), bv->isInverted()); _numDocs = std::min(_numDocs, bv->getDocIdLimit()); } } -MultiBitVectorIteratorBase::~MultiBitVectorIteratorBase() -{ -} +MultiBitVectorIteratorBase::~MultiBitVectorIteratorBase() = default; SearchIterator::UP MultiBitVectorIteratorBase::andWith(UP filter, uint32_t estimate) { (void) estimate; if (filter->isBitVector() && acceptExtraFilter()) { - const BitVectorIterator & bv = static_cast<const BitVectorIterator &>(*filter); - _bvs.push_back(reinterpret_cast<const Word *>(bv.getBitValues())); + const auto & bv = static_cast<const BitVectorIterator &>(*filter); + _bvs.emplace_back(reinterpret_cast<const Word *>(bv.getBitValues()), bv.isInverted()); insert(getChildren().size(), std::move(filter)); _lastMaxDocIdLimit = 0; // force reload } @@ -170,8 +168,7 @@ MultiBitVectorIteratorBase::doUnpack(uint32_t docid) MultiSearch::doUnpack(docid); } else { auto &children = getChildren(); - _unpackInfo.each([&children,docid](size_t i){children[i]->doUnpack(docid);}, - children.size()); + _unpackInfo.each([&children,docid](size_t i){children[i]->doUnpack(docid);}, children.size()); } } @@ -179,7 +176,7 @@ SearchIterator::UP MultiBitVectorIteratorBase::optimize(SearchIterator::UP parentIt) { if (parentIt->isSourceBlender()) { - SourceBlenderSearch & parent(static_cast<SourceBlenderSearch &>(*parentIt)); + auto & parent(static_cast<SourceBlenderSearch &>(*parentIt)); for (size_t i(0); i < parent.getNumChildren(); i++) { parent.setChild(i, optimize(parent.steal(i))); } @@ -192,7 +189,7 @@ MultiBitVectorIteratorBase::optimize(SearchIterator::UP parentIt) SearchIterator::UP MultiBitVectorIteratorBase::optimizeMultiSearch(SearchIterator::UP parentIt) { - MultiSearch & parent(static_cast<MultiSearch &>(*parentIt)); + auto & parent(static_cast<MultiSearch &>(*parentIt)); if (canOptimize(parent)) { MultiSearch::Children stolen; std::vector<size_t> _unpackIndex; @@ -218,24 +215,24 @@ MultiBitVectorIteratorBase::optimizeMultiSearch(SearchIterator::UP parentIt) SearchIterator::UP next; if (parent.isAnd()) { if (strict) { - next.reset(new AndBVIteratorStrict(stolen)); + next = std::make_unique<AndBVIteratorStrict>(stolen); } else { - next.reset(new AndBVIterator(stolen)); + next = std::make_unique<AndBVIterator>(stolen); } } else if (parent.isOr()) { if (strict) { - next.reset(new OrBVIteratorStrict(stolen)); + next = std::make_unique<OrBVIteratorStrict>(stolen); } else { - next.reset(new OrBVIterator(stolen)); + next = std::make_unique<OrBVIterator>(stolen); } } else if (parent.isAndNot()) { if (strict) { - next.reset(new OrBVIteratorStrict(stolen)); + next = std::make_unique<OrBVIteratorStrict>(stolen); } else { - next.reset(new OrBVIterator(stolen)); + next = std::make_unique<OrBVIterator>(stolen); } } - MultiBitVectorIteratorBase & nextM(static_cast<MultiBitVectorIteratorBase &>(*next)); + auto & nextM(static_cast<MultiBitVectorIteratorBase &>(*next)); for (size_t index : _unpackIndex) { nextM.addUnpackIndex(index); } @@ -245,14 +242,12 @@ MultiBitVectorIteratorBase::optimizeMultiSearch(SearchIterator::UP parentIt) parent.insert(insertPosition, std::move(next)); } } - MultiSearch::Children & toOptimize(const_cast<MultiSearch::Children &>(parent.getChildren())); - for (size_t i(0); i < toOptimize.size(); i++) { - toOptimize[i] = optimize(MultiSearch::UP(toOptimize[i])).release(); + auto & toOptimize(const_cast<MultiSearch::Children &>(parent.getChildren())); + for (SearchIterator * & search : toOptimize) { + search = optimize(MultiSearch::UP(search)).release(); } return parentIt; } } - -} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h index 15fa044474c..d21026993d7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h @@ -6,8 +6,7 @@ #include "unpackinfo.h" #include <vespa/searchlib/common/bitword.h> -namespace search { -namespace queryeval { +namespace search::queryeval { class MultiBitVectorIteratorBase : public MultiSearch, protected BitWord { @@ -22,11 +21,19 @@ public: static SearchIterator::UP optimize(SearchIterator::UP parent); protected: MultiBitVectorIteratorBase(const Children & children); + class MetaWord { + public: + MetaWord(const Word * words, bool inverted) : _words(words), _inverted(inverted) { } + Word operator [] (uint32_t index) const { return _inverted ? ~_words[index] : _words[index]; } + private: + const Word * _words; + bool _inverted; + }; uint32_t _numDocs; Word _lastValue; // Last value computed uint32_t _lastMaxDocIdLimit; // next documentid requiring recomputation. - std::vector<const Word *> _bvs; + std::vector<MetaWord> _bvs; private: virtual bool acceptExtraFilter() const = 0; UP andWith(UP filter, uint32_t estimate) override; @@ -35,6 +42,4 @@ private: static SearchIterator::UP optimizeMultiSearch(SearchIterator::UP parent); }; -} // namespace queryeval -} // namespace search - +} |