diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2017-03-03 15:28:04 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-03-03 15:28:04 +0100 |
commit | 4aba3f28bba5b92697ddc8a5dd59bca4f40e63e8 (patch) | |
tree | 86a016dc23d19edc2c3a8cd6601e925205240bbd | |
parent | 6ea31a214f05c61112035820fd395246bb082998 (diff) | |
parent | 1fc29f374e9b5be80183392eeba9493b4b61fab3 (diff) |
Merge pull request #1849 from yahoo/balder/or_hits_into
Balder/or hits into
7 files changed, 87 insertions, 14 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeiterators.h b/searchlib/src/vespa/searchlib/attribute/attributeiterators.h index 613c666b673..332bd9e79a0 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributeiterators.h +++ b/searchlib/src/vespa/searchlib/attribute/attributeiterators.h @@ -214,6 +214,7 @@ private: void initRange(uint32_t begin, uint32_t end) override; std::unique_ptr<BitVector> get_hits(uint32_t begin_id) override; + void or_hits_into(BitVector &result, uint32_t begin_id) override; public: // Note: iterator constructor argument is destroyed @@ -229,6 +230,7 @@ private: PL _iterator; public: std::unique_ptr<BitVector> get_hits(uint32_t begin_id) override; + void or_hits_into(BitVector &result, uint32_t begin_id) override; private: queryeval::MinMaxPostingInfo _postingInfo; diff --git a/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp b/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp index 8f80a128bea..489d22fcfe0 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp +++ b/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp @@ -108,6 +108,17 @@ AttributePostingListIteratorT<PL>::get_hits(uint32_t begin_id) { } template <typename PL> +void +AttributePostingListIteratorT<PL>::or_hits_into(BitVector & result, uint32_t begin_id) { + (void) begin_id; + for (; _iterator.valid() && _iterator.getKey() < getEndId(); ++_iterator) { + if ( ! result.testBit(_iterator.getKey()) ) { + result.setBit(_iterator.getKey()); + } + } +} + +template <typename PL> std::unique_ptr<BitVector> FilterAttributePostingListIteratorT<PL>::get_hits(uint32_t begin_id) { BitVector::UP result(BitVector::create(begin_id, getEndId())); @@ -119,6 +130,17 @@ FilterAttributePostingListIteratorT<PL>::get_hits(uint32_t begin_id) { template <typename PL> void +FilterAttributePostingListIteratorT<PL>::or_hits_into(BitVector & result, uint32_t begin_id) { + (void) begin_id; + for (; _iterator.valid() && _iterator.getKey() < getEndId(); ++_iterator) { + if ( ! result.testBit(_iterator.getKey()) ) { + result.setBit(_iterator.getKey()); + } + } +} + +template <typename PL> +void FilterAttributePostingListIteratorT<PL>::doSeek(uint32_t docId) { _iterator.linearSeek(docId); diff --git a/searchlib/src/vespa/searchlib/queryeval/andsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/andsearch.cpp index 9ce9680f90b..a9c9c55d5d3 100644 --- a/searchlib/src/vespa/searchlib/queryeval/andsearch.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/andsearch.cpp @@ -10,10 +10,7 @@ namespace queryeval { BitVector::UP AndSearch::get_hits(uint32_t begin_id) { const Children &children = getChildren(); - BitVector::UP result = children.front()->get_hits(begin_id); - for (size_t i = 1; i < children.size(); ++i) { - children[i]->and_hits_into(*result, begin_id); - } + BitVector::UP result = andChildren(children, begin_id); return result; } diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp index c1fba54c825..8553a1acbbe 100644 --- a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp @@ -8,13 +8,10 @@ namespace queryeval { std::unique_ptr<BitVector> SearchIteratorPack::get_hits(uint32_t begin_id, uint32_t end_id) { - if (_children.empty()) { - return BitVector::create(begin_id, end_id); - } - BitVector::UP result = _children[0]->get_hits(begin_id); - for (size_t i = 1; i < size(); ++i) { - _children[i]->or_hits_into(*result, begin_id); + BitVector::UP result = SearchIterator::orChildren(_children, begin_id); + if (! result ) { + result = BitVector::create(begin_id, end_id); } return result; } diff --git a/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp index c5db2c21b50..1831a07c9d2 100644 --- a/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp @@ -79,10 +79,7 @@ private: BitVector::UP OrSearch::get_hits(uint32_t begin_id) { const Children &children = getChildren(); - BitVector::UP result = children.front()->get_hits(begin_id); - for (size_t i = 1; i < children.size(); ++i) { - children[i]->or_hits_into(*result, begin_id); - } + BitVector::UP result = orChildren(children, begin_id); return result; } diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp b/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp index 486f45bfef4..4df201af6be 100644 --- a/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp @@ -80,6 +80,56 @@ SearchIterator::visitMembers(vespalib::ObjectVisitor &visitor) const visit(visitor, "endid", _endid); } +namespace { + +using Children = SearchIterator::Children; + +BitVector::UP +andIterators(BitVector::UP result, const Children &children, uint32_t begin_id, bool select_bitvector) { + for (SearchIterator *child : children) { + if (child->isBitVector() == select_bitvector) { + if (!result) { + result = child->get_hits(begin_id); + } else { + child->and_hits_into(*result, begin_id); + } + } + } + return result; +} + +template<typename Children> +BitVector::UP +orIterators(BitVector::UP result, const Children &children, uint32_t begin_id, bool select_bitvector) { + for (auto & child : children) { + if (child->isBitVector() == select_bitvector) { + if (!result) { + result = child->get_hits(begin_id); + } else { + child->or_hits_into(*result, begin_id); + } + } + } + return result; +} + +} + +BitVector::UP +SearchIterator::andChildren(const Children &children, uint32_t begin_id) { + return andIterators(andIterators(BitVector::UP(), children, begin_id, true), children, begin_id, false); +} + +BitVector::UP +SearchIterator::orChildren(const Children &children, uint32_t begin_id) { + return orIterators(orIterators(BitVector::UP(), children, begin_id, true), children, begin_id, false); +} + +BitVector::UP +SearchIterator::orChildren(const OwnedChildren &children, uint32_t begin_id) { + return orIterators(orIterators(BitVector::UP(), children, begin_id, true), children, begin_id, false); +} + } // namespace queryeval } // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h index d4246defc18..2b7b4200041 100644 --- a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h @@ -7,6 +7,7 @@ #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/util/trinary.h> #include <memory> +#include <vector> namespace vespalib { class ObjectVisitor; }; @@ -326,6 +327,13 @@ public: virtual UP andWith(UP filter, uint32_t estimate); virtual Trinary is_strict() const { return Trinary::Undefined; } + + using Children = std::vector<SearchIterator *>; + using OwnedChildren = std::vector<std::unique_ptr<SearchIterator>>; + + static std::unique_ptr<BitVector> andChildren(const Children & children, uint32_t begin_id); + static std::unique_ptr<BitVector> orChildren(const Children & children, uint32_t begin_id); + static std::unique_ptr<BitVector> orChildren(const OwnedChildren & children, uint32_t begin_id); }; } // namespace queryeval |