diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-09-26 12:24:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-26 12:24:29 +0200 |
commit | 589f8faf81c9ed1ace32ffb67653d2bc9b95cc51 (patch) | |
tree | 2b20aee35dda739c8921a7b4c412fff96897c999 /searchlib | |
parent | d186922bea9bb26d9ab59182a9bf12340c024579 (diff) | |
parent | 40940483b8f551d2284f582bbb4af07a1c18ac87 (diff) |
Merge pull request #28654 from vespa-engine/balder/return-early-on-match
- Return early in doSeek if docId found.
Diffstat (limited to 'searchlib')
6 files changed, 43 insertions, 29 deletions
diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp index b9c70d76934..1fd9dde09c7 100644 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp +++ b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp @@ -24,14 +24,14 @@ class DocumentWeightOrFilterSearchTest : public ::testing::Test { uint32_t _range_end; public: DocumentWeightOrFilterSearchTest(); - ~DocumentWeightOrFilterSearchTest(); + ~DocumentWeightOrFilterSearchTest() override; void inc_generation(); size_t num_trees() const { return _trees.size(); } Iterator get_tree(size_t idx) const { if (idx < _trees.size()) { return _postings.beginFrozen(_trees[idx]); } else { - return Iterator(); + return {}; } } void ensure_tree(size_t idx) { @@ -39,13 +39,13 @@ public: _trees.resize(idx + 1); } } - void add_tree(size_t idx, std::vector<uint32_t> keys) { + void add_tree(size_t idx, const std::vector<uint32_t>& keys) { ensure_tree(idx); std::vector<KeyData> adds; std::vector<uint32_t> removes; adds.reserve(keys.size()); for (auto& key : keys) { - adds.emplace_back(KeyData(key, 1)); + adds.emplace_back(key, 1); } _postings.apply(_trees[idx], adds.data(), adds.data() + adds.size(), removes.data(), removes.data() + removes.size()); } @@ -67,7 +67,7 @@ public: return result; }; - std::vector<uint32_t> eval_daat(SearchIterator &iterator) { + std::vector<uint32_t> eval_daat(SearchIterator &iterator) const { std::vector<uint32_t> result; uint32_t doc_id = _range_start; while (doc_id < _range_end) { @@ -81,7 +81,7 @@ public: return result; } - std::vector<uint32_t> frombv(const BitVector &bv) { + std::vector<uint32_t> frombv(const BitVector &bv) const { std::vector<uint32_t> result; uint32_t doc_id = _range_start; doc_id = bv.getNextTrueBit(doc_id); @@ -93,7 +93,7 @@ public: return result; } - std::unique_ptr<BitVector> tobv(std::vector<uint32_t> values) { + std::unique_ptr<BitVector> tobv(const std::vector<uint32_t> & values) const { auto bv = BitVector::create(_range_start, _range_end); for (auto value : values) { bv->setBit(value); @@ -102,7 +102,7 @@ public: return bv; } - void expect_result(std::vector<uint32_t> exp, std::vector<uint32_t> act) + static void expect_result(const std::vector<uint32_t> & exp, const std::vector<uint32_t> & act) { EXPECT_EQ(exp, act); } @@ -227,7 +227,7 @@ public: } _test.inc_generation(); } - ~Verifier() { + ~Verifier() override { for (uint32_t tree_id = 0; tree_id < _test.num_trees(); ++tree_id) { _test.clear_tree(tree_id); } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index b4cdd621b71..71ea2a67299 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -482,6 +482,9 @@ DirectWeightedSetBlueprint<SearchType>::createLeafSearch(const TermFieldMatchDat _attr.create(r.posting_idx, iterators); } bool field_is_filter = getState().fields()[0].isFilter(); + if (field_is_filter && tfmda[0]->isNotNeeded()) { + return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators)); + } return SearchType::create(*tfmda[0], field_is_filter, _weights, std::move(iterators)); } diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp index e2566c94f1c..c840c5cbc91 100644 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp +++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp @@ -10,6 +10,7 @@ namespace search::attribute { class DocumentWeightOrFilterSearchImpl : public DocumentWeightOrFilterSearch { AttributeIteratorPack _children; + void seek_all(uint32_t docId); public: explicit DocumentWeightOrFilterSearchImpl(AttributeIteratorPack&& children); ~DocumentWeightOrFilterSearchImpl() override; @@ -32,6 +33,7 @@ public: } std::unique_ptr<BitVector> get_hits(uint32_t begin_id) override { + seek_all(getDocId()); return _children.get_hits(begin_id, getEndId()); } @@ -47,17 +49,29 @@ DocumentWeightOrFilterSearchImpl::DocumentWeightOrFilterSearchImpl(AttributeIter DocumentWeightOrFilterSearchImpl::~DocumentWeightOrFilterSearchImpl() = default; void +DocumentWeightOrFilterSearchImpl::seek_all(uint32_t docId) { + for (uint16_t i = 0; i < _children.size(); ++i) { + uint32_t next = _children.get_docid(i); + if (next < docId) { + _children.seek(i, docId); + } + } +} + +void DocumentWeightOrFilterSearchImpl::doSeek(uint32_t docId) { - if (_children.get_docid(0) < docId) { - _children.seek(0, docId); - } - uint32_t min_doc_id = _children.get_docid(0); - for (uint16_t i = 1; i < _children.size(); ++i) { - if (_children.get_docid(i) < docId) { - _children.seek(i, docId); + uint32_t min_doc_id = endDocId; + for (uint16_t i = 0; i < _children.size(); ++i) { + uint32_t next = _children.get_docid(i); + if (next < docId) { + next = _children.seek(i, docId); + } + if (next == docId) { + setDocId(next); + return; } - min_doc_id = std::min(min_doc_id, _children.get_docid(i)); + min_doc_id = std::min(min_doc_id, next); } setDocId(min_doc_id); } @@ -73,6 +87,8 @@ DocumentWeightOrFilterSearch::create(std::vector<DocumentWeightIterator>&& child if (children.empty()) { return std::make_unique<queryeval::EmptySearch>(); } else { + std::sort(children.begin(), children.end(), + [](const auto & a, const auto & b) { return a.size() > b.size(); }); return std::make_unique<DocumentWeightOrFilterSearchImpl>(AttributeIteratorPack(std::move(children))); } } diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp b/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp index 147f56d6d47..ab06fc270bd 100644 --- a/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp +++ b/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp @@ -17,9 +17,9 @@ AttributeIteratorPack::or_hits_into(BitVector &result, uint32_t begin_id) { for (size_t i = 0; i < size(); ++i) { uint32_t docId = get_docid(i); if (begin_id > docId) { - seek(i, begin_id); + docId = seek(i, begin_id); } - for (docId = get_docid(i); docId < result.size(); docId = next(i)) { + for (uint32_t limit = result.size(); docId < limit; docId = next(i)) { result.setBit(docId); } } diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.h b/searchlib/src/vespa/searchlib/attribute/iterator_pack.h index e042aab5eae..1753a3d0c2d 100644 --- a/searchlib/src/vespa/searchlib/attribute/iterator_pack.h +++ b/searchlib/src/vespa/searchlib/attribute/iterator_pack.h @@ -41,7 +41,7 @@ public: std::unique_ptr<BitVector> get_hits(uint32_t begin_id, uint32_t end_id); void or_hits_into(BitVector &result, uint32_t begin_id); - size_t size() const { return _children.size(); } + size_t size() const noexcept { return _children.size(); } void initRange(uint32_t begin, uint32_t end) { (void) end; for (auto &child: _children) { diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h index e3e12c27f28..b30d3bc3301 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h @@ -10,12 +10,9 @@ #include <memory> #include <vector> -namespace search { -namespace fef { -class TermFieldMatchData; -} // namespace fef +namespace search::fef { class TermFieldMatchData; } -namespace queryeval { +namespace search::queryeval { class Blueprint; @@ -26,7 +23,7 @@ class Blueprint; class WeightedSetTermSearch : public SearchIterator { protected: - WeightedSetTermSearch() {} + WeightedSetTermSearch() = default; public: // TODO: pass ownership with unique_ptr @@ -47,6 +44,4 @@ public: virtual void find_matching_elements(uint32_t docid, const std::vector<std::unique_ptr<Blueprint>> &child_blueprints, std::vector<uint32_t> &dst) = 0; }; -} // namespace search::queryeval -} // namespace search - +} |