From c9f6710f6ea2e63b4f8b86ab63e549793799b9ae Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Fri, 5 Apr 2024 13:58:55 +0000 Subject: Support more than 64k child iterators for IN and weightedSet. This adds an iterator pack that uses a uint32_t to address the child iterators instead of the default uint16_t. --- .../direct_multi_term_blueprint_test.cpp | 9 +++++ .../searchlib/attribute/posting_iterator_pack.cpp | 38 ++++++++++++++------ .../searchlib/attribute/posting_iterator_pack.h | 12 ++++--- .../vespa/searchlib/queryeval/iterator_pack.cpp | 41 +++++++++++++++------- .../src/vespa/searchlib/queryeval/iterator_pack.h | 26 ++++++++------ .../queryeval/weighted_set_term_search.cpp | 30 ++++++++++++---- 6 files changed, 113 insertions(+), 43 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp index 1d66c59d2c9..196b1cbb475 100644 --- a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp +++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp @@ -435,4 +435,13 @@ TEST_P(DirectMultiTermBlueprintTest, hash_filter_with_string_folding_used_for_no expect_hits({30, 31, 40, 41}, *itr); } +TEST_P(DirectMultiTermBlueprintTest, supports_more_than_64k_btree_iterators) { + setup(false, true); + std::vector term_values(std::numeric_limits::max() + 1, 3); + add_terms(term_values); + auto itr = create_leaf_search(); + EXPECT_THAT(itr->asString(), StartsWith(resolve_iterator_with_unpack())); + expect_hits({30, 31}, *itr); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp index 662d77dd5d7..93bc4735916 100644 --- a/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp +++ b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp @@ -6,27 +6,34 @@ namespace search { -template -PostingIteratorPack::~PostingIteratorPack() = default; +template +PostingIteratorPack::~PostingIteratorPack() = default; -template -PostingIteratorPack::PostingIteratorPack(std::vector &&children) +template +PostingIteratorPack::PostingIteratorPack(std::vector &&children) : _children(std::move(children)) { assert(_children.size() <= std::numeric_limits::max()); } -template +template +bool +PostingIteratorPack::can_handle_iterators(size_t num_iterators) +{ + return num_iterators <= std::numeric_limits::max(); +} + +template std::unique_ptr -PostingIteratorPack::get_hits(uint32_t begin_id, uint32_t end_id) { +PostingIteratorPack::get_hits(uint32_t begin_id, uint32_t end_id) { BitVector::UP result(BitVector::create(begin_id, end_id)); or_hits_into(*result, begin_id); return result; } -template +template void -PostingIteratorPack::or_hits_into(BitVector &result, uint32_t begin_id) { +PostingIteratorPack::or_hits_into(BitVector &result, uint32_t begin_id) { for (size_t i = 0; i < size(); ++i) { uint32_t docId = get_docid(i); if (begin_id > docId) { @@ -41,12 +48,21 @@ PostingIteratorPack::or_hits_into(BitVector &result, uint32_t begi template <> int32_t -PostingIteratorPack::get_weight(ref_t, uint32_t) +PostingIteratorPack::get_weight(ref_t, uint32_t) +{ + return 1; +} + +template <> +int32_t +PostingIteratorPack::get_weight(ref_t, uint32_t) { return 1; } -template class PostingIteratorPack; -template class PostingIteratorPack; +template class PostingIteratorPack; +template class PostingIteratorPack; +template class PostingIteratorPack; +template class PostingIteratorPack; } diff --git a/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h index 28150730bad..7c6b85dd3ec 100644 --- a/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h +++ b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h @@ -12,13 +12,13 @@ class BitVector; /** * Class that wraps a set of underlying low-level posting lists and provides an API to search in them. */ -template +template class PostingIteratorPack { private: std::vector _children; public: - using ref_t = uint16_t; + using ref_t = RefType; PostingIteratorPack() noexcept : _children() {} PostingIteratorPack(PostingIteratorPack &&rhs) noexcept = default; PostingIteratorPack &operator=(PostingIteratorPack &&rhs) noexcept = default; @@ -26,6 +26,8 @@ public: explicit PostingIteratorPack(std::vector &&children); ~PostingIteratorPack(); + static bool can_handle_iterators(size_t num_iterators); + uint32_t get_docid(ref_t ref) const { return _children[ref].valid() ? _children[ref].getKey() : endDocId; } @@ -59,8 +61,10 @@ private: } }; -using DocidIteratorPack = PostingIteratorPack; -using DocidWithWeightIteratorPack = PostingIteratorPack; +using DocidIteratorPack = PostingIteratorPack; +using DocidIteratorPackUint32 = PostingIteratorPack; +using DocidWithWeightIteratorPack = PostingIteratorPack; +using DocidWithWeightIteratorPackUint32 = PostingIteratorPack; } diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp index fdea54424de..5b3a67d7b5e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp @@ -8,18 +8,23 @@ namespace search::queryeval { -SearchIteratorPack::~SearchIteratorPack() = default; +template +SearchIteratorPackT::~SearchIteratorPackT() = default; -SearchIteratorPack::SearchIteratorPack() = default; +template +SearchIteratorPackT::SearchIteratorPackT() = default; -SearchIteratorPack::SearchIteratorPack(SearchIteratorPack &&rhs) noexcept = default; +template +SearchIteratorPackT::SearchIteratorPackT(SearchIteratorPackT &&rhs) noexcept = default; -SearchIteratorPack & -SearchIteratorPack::operator=(SearchIteratorPack &&rhs) noexcept = default; +template +SearchIteratorPackT & +SearchIteratorPackT::operator=(SearchIteratorPackT &&rhs) noexcept = default; -SearchIteratorPack::SearchIteratorPack(const std::vector &children, - const std::vector &childMatch, - MatchDataUP md) +template +SearchIteratorPackT::SearchIteratorPackT(const std::vector &children, + const std::vector &childMatch, + MatchDataUP md) : _children(), _childMatch(childMatch), _md(std::move(md)) @@ -32,12 +37,21 @@ SearchIteratorPack::SearchIteratorPack(const std::vector &child assert(_children.size() <= std::numeric_limits::max()); } -SearchIteratorPack::SearchIteratorPack(const std::vector &children, MatchDataUP md) - : SearchIteratorPack(children, std::vector(), MatchDataUP(std::move(md))) +template +SearchIteratorPackT::SearchIteratorPackT(const std::vector &children, MatchDataUP md) + : SearchIteratorPackT(children, std::vector(), MatchDataUP(std::move(md))) { } +template +bool +SearchIteratorPackT::can_handle_iterators(size_t num_iterators) +{ + return num_iterators <= std::numeric_limits::max(); +} + +template std::unique_ptr -SearchIteratorPack::get_hits(uint32_t begin_id, uint32_t end_id) const { +SearchIteratorPackT::get_hits(uint32_t begin_id, uint32_t end_id) const { BitVector::UP result = TermwiseHelper::orChildren(_children.begin(), _children.end(), begin_id); if (! result ) { result = BitVector::create(begin_id, end_id); @@ -45,10 +59,13 @@ SearchIteratorPack::get_hits(uint32_t begin_id, uint32_t end_id) const { return result; } +template void -SearchIteratorPack::or_hits_into(BitVector &result, uint32_t begin_id) const { +SearchIteratorPackT::or_hits_into(BitVector &result, uint32_t begin_id) const { TermwiseHelper::orChildren(result, _children.begin(), _children.end(), begin_id); } +template class SearchIteratorPackT; +template class SearchIteratorPackT; } diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h index 0a1b140f28a..f05bf8e1adc 100644 --- a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h +++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h @@ -9,7 +9,8 @@ namespace search::fef { class MatchData; } namespace search::queryeval { -class SearchIteratorPack +template +class SearchIteratorPackT { private: using MatchDataUP = std::unique_ptr; @@ -18,19 +19,21 @@ private: MatchDataUP _md; public: - using ref_t = uint16_t; - SearchIteratorPack(); - ~SearchIteratorPack(); - SearchIteratorPack(SearchIteratorPack &&rhs) noexcept; - SearchIteratorPack &operator=(SearchIteratorPack &&rhs) noexcept; + using ref_t = RefType; + SearchIteratorPackT(); + ~SearchIteratorPackT(); + SearchIteratorPackT(SearchIteratorPackT &&rhs) noexcept; + SearchIteratorPackT &operator=(SearchIteratorPackT &&rhs) noexcept; // TODO: use MultiSearch::Children to pass ownership - SearchIteratorPack(const std::vector &children, - const std::vector &childMatch, - MatchDataUP md); + SearchIteratorPackT(const std::vector &children, + const std::vector &childMatch, + MatchDataUP md); // TODO: use MultiSearch::Children to pass ownership - SearchIteratorPack(const std::vector &children, MatchDataUP md); + SearchIteratorPackT(const std::vector &children, MatchDataUP md); + + static bool can_handle_iterators(size_t num_iterators); uint32_t get_docid(ref_t ref) const { return _children[ref]->getDocId(); @@ -60,5 +63,8 @@ public: void or_hits_into(BitVector &result, uint32_t begin_id) const; }; +using SearchIteratorPack = SearchIteratorPackT; +using SearchIteratorPackUint32 = SearchIteratorPackT; + } diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp index 0be014474d0..6a93c2fe322 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp @@ -196,11 +196,21 @@ WeightedSetTermSearch::create(const std::vector &children, fef::MatchData::UP match_data) { if (children.size() < 128) { - return create_helper(tmd, is_filter_search, std::cref(weights), - SearchIteratorPack(children, std::move(match_data))); + if (SearchIteratorPack::can_handle_iterators(children.size())) { + return create_helper(tmd, is_filter_search, std::cref(weights), + SearchIteratorPack(children, std::move(match_data))); + } else { + return create_helper(tmd, is_filter_search, std::cref(weights), + SearchIteratorPackUint32(children, std::move(match_data))); + } + } + if (SearchIteratorPack::can_handle_iterators(children.size())) { + return create_helper(tmd, is_filter_search, std::cref(weights), + SearchIteratorPack(children, std::move(match_data))); + } else { + return create_helper(tmd, is_filter_search, std::cref(weights), + SearchIteratorPackUint32(children, std::move(match_data))); } - return create_helper(tmd, is_filter_search, std::cref(weights), - SearchIteratorPack(children, std::move(match_data))); } namespace { @@ -228,7 +238,11 @@ WeightedSetTermSearch::create(fef::TermFieldMatchData& tmd, std::variant>, std::vector> weights, std::vector&& iterators) { - return create_helper_resolve_pack(tmd, is_filter_search, std::move(weights), std::move(iterators)); + if (DocidIteratorPack::can_handle_iterators(iterators.size())) { + return create_helper_resolve_pack(tmd, is_filter_search, std::move(weights), std::move(iterators)); + } else { + return create_helper_resolve_pack(tmd, is_filter_search, std::move(weights), std::move(iterators)); + } } SearchIterator::UP @@ -237,7 +251,11 @@ WeightedSetTermSearch::create(fef::TermFieldMatchData &tmd, std::variant>, std::vector> weights, std::vector &&iterators) { - return create_helper_resolve_pack(tmd, is_filter_search, std::move(weights), std::move(iterators)); + if (DocidWithWeightIteratorPack::can_handle_iterators(iterators.size())) { + return create_helper_resolve_pack(tmd, is_filter_search, std::move(weights), std::move(iterators)); + } else { + return create_helper_resolve_pack(tmd, is_filter_search, std::move(weights), std::move(iterators)); + } } namespace { -- cgit v1.2.3