diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-09-26 12:24:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-26 12:24:29 +0200 |
commit | 589f8faf81c9ed1ace32ffb67653d2bc9b95cc51 (patch) | |
tree | 2b20aee35dda739c8921a7b4c412fff96897c999 | |
parent | d186922bea9bb26d9ab59182a9bf12340c024579 (diff) | |
parent | 40940483b8f551d2284f582bbb4af07a1c18ac87 (diff) |
Merge pull request #28654 from vespa-engine/balder/return-early-on-match
- Return early in doSeek if docId found.
9 files changed, 60 insertions, 48 deletions
diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp index b9c70d76934..1fd9dde09c7 100644 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp +++ b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp @@ -24,14 +24,14 @@ class DocumentWeightOrFilterSearchTest : public ::testing::Test { uint32_t _range_end; public: DocumentWeightOrFilterSearchTest(); - ~DocumentWeightOrFilterSearchTest(); + ~DocumentWeightOrFilterSearchTest() override; void inc_generation(); size_t num_trees() const { return _trees.size(); } Iterator get_tree(size_t idx) const { if (idx < _trees.size()) { return _postings.beginFrozen(_trees[idx]); } else { - return Iterator(); + return {}; } } void ensure_tree(size_t idx) { @@ -39,13 +39,13 @@ public: _trees.resize(idx + 1); } } - void add_tree(size_t idx, std::vector<uint32_t> keys) { + void add_tree(size_t idx, const std::vector<uint32_t>& keys) { ensure_tree(idx); std::vector<KeyData> adds; std::vector<uint32_t> removes; adds.reserve(keys.size()); for (auto& key : keys) { - adds.emplace_back(KeyData(key, 1)); + adds.emplace_back(key, 1); } _postings.apply(_trees[idx], adds.data(), adds.data() + adds.size(), removes.data(), removes.data() + removes.size()); } @@ -67,7 +67,7 @@ public: return result; }; - std::vector<uint32_t> eval_daat(SearchIterator &iterator) { + std::vector<uint32_t> eval_daat(SearchIterator &iterator) const { std::vector<uint32_t> result; uint32_t doc_id = _range_start; while (doc_id < _range_end) { @@ -81,7 +81,7 @@ public: return result; } - std::vector<uint32_t> frombv(const BitVector &bv) { + std::vector<uint32_t> frombv(const BitVector &bv) const { std::vector<uint32_t> result; uint32_t doc_id = _range_start; doc_id = bv.getNextTrueBit(doc_id); @@ -93,7 +93,7 @@ public: return result; } - std::unique_ptr<BitVector> tobv(std::vector<uint32_t> values) { + std::unique_ptr<BitVector> tobv(const std::vector<uint32_t> & values) const { auto bv = BitVector::create(_range_start, _range_end); for (auto value : values) { bv->setBit(value); @@ -102,7 +102,7 @@ public: return bv; } - void expect_result(std::vector<uint32_t> exp, std::vector<uint32_t> act) + static void expect_result(const std::vector<uint32_t> & exp, const std::vector<uint32_t> & act) { EXPECT_EQ(exp, act); } @@ -227,7 +227,7 @@ public: } _test.inc_generation(); } - ~Verifier() { + ~Verifier() override { for (uint32_t tree_id = 0; tree_id < _test.num_trees(); ++tree_id) { _test.clear_tree(tree_id); } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index b4cdd621b71..71ea2a67299 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -482,6 +482,9 @@ DirectWeightedSetBlueprint<SearchType>::createLeafSearch(const TermFieldMatchDat _attr.create(r.posting_idx, iterators); } bool field_is_filter = getState().fields()[0].isFilter(); + if (field_is_filter && tfmda[0]->isNotNeeded()) { + return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators)); + } return SearchType::create(*tfmda[0], field_is_filter, _weights, std::move(iterators)); } diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp index e2566c94f1c..c840c5cbc91 100644 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp +++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp @@ -10,6 +10,7 @@ namespace search::attribute { class DocumentWeightOrFilterSearchImpl : public DocumentWeightOrFilterSearch { AttributeIteratorPack _children; + void seek_all(uint32_t docId); public: explicit DocumentWeightOrFilterSearchImpl(AttributeIteratorPack&& children); ~DocumentWeightOrFilterSearchImpl() override; @@ -32,6 +33,7 @@ public: } std::unique_ptr<BitVector> get_hits(uint32_t begin_id) override { + seek_all(getDocId()); return _children.get_hits(begin_id, getEndId()); } @@ -47,17 +49,29 @@ DocumentWeightOrFilterSearchImpl::DocumentWeightOrFilterSearchImpl(AttributeIter DocumentWeightOrFilterSearchImpl::~DocumentWeightOrFilterSearchImpl() = default; void +DocumentWeightOrFilterSearchImpl::seek_all(uint32_t docId) { + for (uint16_t i = 0; i < _children.size(); ++i) { + uint32_t next = _children.get_docid(i); + if (next < docId) { + _children.seek(i, docId); + } + } +} + +void DocumentWeightOrFilterSearchImpl::doSeek(uint32_t docId) { - if (_children.get_docid(0) < docId) { - _children.seek(0, docId); - } - uint32_t min_doc_id = _children.get_docid(0); - for (uint16_t i = 1; i < _children.size(); ++i) { - if (_children.get_docid(i) < docId) { - _children.seek(i, docId); + uint32_t min_doc_id = endDocId; + for (uint16_t i = 0; i < _children.size(); ++i) { + uint32_t next = _children.get_docid(i); + if (next < docId) { + next = _children.seek(i, docId); + } + if (next == docId) { + setDocId(next); + return; } - min_doc_id = std::min(min_doc_id, _children.get_docid(i)); + min_doc_id = std::min(min_doc_id, next); } setDocId(min_doc_id); } @@ -73,6 +87,8 @@ DocumentWeightOrFilterSearch::create(std::vector<DocumentWeightIterator>&& child if (children.empty()) { return std::make_unique<queryeval::EmptySearch>(); } else { + std::sort(children.begin(), children.end(), + [](const auto & a, const auto & b) { return a.size() > b.size(); }); return std::make_unique<DocumentWeightOrFilterSearchImpl>(AttributeIteratorPack(std::move(children))); } } diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp b/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp index 147f56d6d47..ab06fc270bd 100644 --- a/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp +++ b/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp @@ -17,9 +17,9 @@ AttributeIteratorPack::or_hits_into(BitVector &result, uint32_t begin_id) { for (size_t i = 0; i < size(); ++i) { uint32_t docId = get_docid(i); if (begin_id > docId) { - seek(i, begin_id); + docId = seek(i, begin_id); } - for (docId = get_docid(i); docId < result.size(); docId = next(i)) { + for (uint32_t limit = result.size(); docId < limit; docId = next(i)) { result.setBit(docId); } } diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.h b/searchlib/src/vespa/searchlib/attribute/iterator_pack.h index e042aab5eae..1753a3d0c2d 100644 --- a/searchlib/src/vespa/searchlib/attribute/iterator_pack.h +++ b/searchlib/src/vespa/searchlib/attribute/iterator_pack.h @@ -41,7 +41,7 @@ public: std::unique_ptr<BitVector> get_hits(uint32_t begin_id, uint32_t end_id); void or_hits_into(BitVector &result, uint32_t begin_id); - size_t size() const { return _children.size(); } + size_t size() const noexcept { return _children.size(); } void initRange(uint32_t begin, uint32_t end) { (void) end; for (auto &child: _children) { diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h index e3e12c27f28..b30d3bc3301 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h @@ -10,12 +10,9 @@ #include <memory> #include <vector> -namespace search { -namespace fef { -class TermFieldMatchData; -} // namespace fef +namespace search::fef { class TermFieldMatchData; } -namespace queryeval { +namespace search::queryeval { class Blueprint; @@ -26,7 +23,7 @@ class Blueprint; class WeightedSetTermSearch : public SearchIterator { protected: - WeightedSetTermSearch() {} + WeightedSetTermSearch() = default; public: // TODO: pass ownership with unique_ptr @@ -47,6 +44,4 @@ public: virtual void find_matching_elements(uint32_t docid, const std::vector<std::unique_ptr<Blueprint>> &child_blueprints, std::vector<uint32_t> &dst) = 0; }; -} // namespace search::queryeval -} // namespace search - +} diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.h b/vespalib/src/vespa/vespalib/btree/btreeiterator.h index 2418da18c23..9480e5880e0 100644 --- a/vespalib/src/vespa/vespalib/btree/btreeiterator.h +++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.h @@ -302,22 +302,22 @@ public: /** * Get key at current iterator location. */ - const KeyType & getKey() const { return _leaf.getKey(); } + const KeyType & getKey() const noexcept { return _leaf.getKey(); } /** * Get data at current iterator location. */ - const DataType & getData() const { return _leaf.getData(); } + const DataType & getData() const noexcept { return _leaf.getData(); } /** * Check if iterator is at a valid element, i.e. not at end. */ - bool valid() const { return _leaf.valid(); } + bool valid() const noexcept{ return _leaf.valid(); } /** * Return the number of elements in the tree. */ - size_t size() const; + size_t size() const noexcept; /** @@ -333,7 +333,7 @@ public: /** * Return if the tree has data or not (e.g. keys and data or only keys). */ - static bool hasData() { return LeafNodeType::hasData(); } + static bool hasData() noexcept { return LeafNodeType::hasData(); } /** * Move the iterator directly to end. Used by findHelper method in BTree. diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp index b7927feaa1a..d6dda0047ce 100644 --- a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp +++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp @@ -387,15 +387,13 @@ position(uint32_t levels) const res += inode->validLeaves(); for (uint32_t c = elem.getIdx(); c < slots; ++c) { BTreeNode::Ref node = inode->getChild(c); - const InternalNodeType *jnode = - _allocator->mapInternalRef(node); + const InternalNodeType *jnode = _allocator->mapInternalRef(node); res -= jnode->validLeaves(); } } else { for (uint32_t c = 0; c < elem.getIdx(); ++c) { BTreeNode::Ref node = inode->getChild(c); - const InternalNodeType *jnode = - _allocator->mapInternalRef(node); + const InternalNodeType *jnode = _allocator->mapInternalRef(node); res += jnode->validLeaves(); } } @@ -484,7 +482,7 @@ template <typename KeyT, typename DataT, typename AggrT, uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE> size_t BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>:: -size() const +size() const noexcept { if (_pathSize > 0) { return _path[_pathSize - 1].getNode()->validLeaves(); diff --git a/vespalib/src/vespa/vespalib/btree/btreenode.h b/vespalib/src/vespa/vespalib/btree/btreenode.h index 0a77a0b4685..4931021d771 100644 --- a/vespalib/src/vespa/vespalib/btree/btreenode.h +++ b/vespalib/src/vespa/vespalib/btree/btreenode.h @@ -67,14 +67,14 @@ public: using Ref = datastore::EntryRef; using ChildRef = datastore::AtomicEntryRef; - bool isLeaf() const { return _level == 0u; } - bool getFrozen() const { return _isFrozen; } - void freeze() { _isFrozen = true; } - void unFreeze() { _isFrozen = false; } - void setLevel(uint8_t level) { _level = level; } - uint32_t getLevel() const { return _level; } - uint32_t validSlots() const { return _validSlots; } - void setValidSlots(uint16_t validSlots_) { _validSlots = validSlots_; } + bool isLeaf() const noexcept { return _level == 0u; } + bool getFrozen() const noexcept { return _isFrozen; } + void freeze() noexcept { _isFrozen = true; } + void unFreeze() noexcept { _isFrozen = false; } + void setLevel(uint8_t level) noexcept { _level = level; } + uint32_t getLevel() const noexcept { return _level; } + uint32_t validSlots() const noexcept { return _validSlots; } + void setValidSlots(uint16_t validSlots_) noexcept { _validSlots = validSlots_; } }; @@ -358,7 +358,7 @@ public: void insert(uint32_t idx, const KeyT & key, BTreeNode::Ref child) { insert(idx, key, BTreeNode::ChildRef(child)); } - uint32_t validLeaves() const { return _validLeaves; } + uint32_t validLeaves() const noexcept { return _validLeaves; } void setValidLeaves(uint32_t newValidLeaves) { _validLeaves = newValidLeaves; } void incValidLeaves(uint32_t delta) { _validLeaves += delta; } void decValidLeaves(uint32_t delta) { _validLeaves -= delta; } |