summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-04-05 16:24:21 +0200
committerGitHub <noreply@github.com>2024-04-05 16:24:21 +0200
commitce57de993161044ce2580140c5299d35400b3522 (patch)
tree755dcdaaaf0b4ad8917eeb41c55559f99fb6d998
parentcdb93e44b185d322ce8b01049072175e77d66b62 (diff)
parentc9f6710f6ea2e63b4f8b86ab63e549793799b9ae (diff)
Merge pull request #30835 from vespa-engine/geirst/more-than-64k-child-iterators-for-in-and-weightedset
Support more than 64k child iterators for IN and weightedSet.
-rw-r--r--searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h12
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp41
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/iterator_pack.h26
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp30
6 files changed, 113 insertions, 43 deletions
diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
index 1d66c59d2c9..196b1cbb475 100644
--- a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
+++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
@@ -435,4 +435,13 @@ TEST_P(DirectMultiTermBlueprintTest, hash_filter_with_string_folding_used_for_no
expect_hits({30, 31, 40, 41}, *itr);
}
+TEST_P(DirectMultiTermBlueprintTest, supports_more_than_64k_btree_iterators) {
+ setup(false, true);
+ std::vector<int64_t> term_values(std::numeric_limits<uint16_t>::max() + 1, 3);
+ add_terms(term_values);
+ auto itr = create_leaf_search();
+ EXPECT_THAT(itr->asString(), StartsWith(resolve_iterator_with_unpack()));
+ expect_hits({30, 31}, *itr);
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp
index 662d77dd5d7..93bc4735916 100644
--- a/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp
@@ -6,27 +6,34 @@
namespace search {
-template <typename IteratorType>
-PostingIteratorPack<IteratorType>::~PostingIteratorPack() = default;
+template <typename IteratorType, typename RefType>
+PostingIteratorPack<IteratorType, RefType>::~PostingIteratorPack() = default;
-template <typename IteratorType>
-PostingIteratorPack<IteratorType>::PostingIteratorPack(std::vector<IteratorType> &&children)
+template <typename IteratorType, typename RefType>
+PostingIteratorPack<IteratorType, RefType>::PostingIteratorPack(std::vector<IteratorType> &&children)
: _children(std::move(children))
{
assert(_children.size() <= std::numeric_limits<ref_t>::max());
}
-template <typename IteratorType>
+template <typename IteratorType, typename RefType>
+bool
+PostingIteratorPack<IteratorType, RefType>::can_handle_iterators(size_t num_iterators)
+{
+ return num_iterators <= std::numeric_limits<ref_t>::max();
+}
+
+template <typename IteratorType, typename RefType>
std::unique_ptr<BitVector>
-PostingIteratorPack<IteratorType>::get_hits(uint32_t begin_id, uint32_t end_id) {
+PostingIteratorPack<IteratorType, RefType>::get_hits(uint32_t begin_id, uint32_t end_id) {
BitVector::UP result(BitVector::create(begin_id, end_id));
or_hits_into(*result, begin_id);
return result;
}
-template <typename IteratorType>
+template <typename IteratorType, typename RefType>
void
-PostingIteratorPack<IteratorType>::or_hits_into(BitVector &result, uint32_t begin_id) {
+PostingIteratorPack<IteratorType, RefType>::or_hits_into(BitVector &result, uint32_t begin_id) {
for (size_t i = 0; i < size(); ++i) {
uint32_t docId = get_docid(i);
if (begin_id > docId) {
@@ -41,12 +48,21 @@ PostingIteratorPack<IteratorType>::or_hits_into(BitVector &result, uint32_t begi
template <>
int32_t
-PostingIteratorPack<DocidIterator>::get_weight(ref_t, uint32_t)
+PostingIteratorPack<DocidIterator, uint16_t>::get_weight(ref_t, uint32_t)
+{
+ return 1;
+}
+
+template <>
+int32_t
+PostingIteratorPack<DocidIterator, uint32_t>::get_weight(ref_t, uint32_t)
{
return 1;
}
-template class PostingIteratorPack<DocidIterator>;
-template class PostingIteratorPack<DocidWithWeightIterator>;
+template class PostingIteratorPack<DocidIterator, uint16_t>;
+template class PostingIteratorPack<DocidIterator, uint32_t>;
+template class PostingIteratorPack<DocidWithWeightIterator, uint16_t>;
+template class PostingIteratorPack<DocidWithWeightIterator, uint32_t>;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h
index 28150730bad..7c6b85dd3ec 100644
--- a/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h
+++ b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h
@@ -12,13 +12,13 @@ class BitVector;
/**
* Class that wraps a set of underlying low-level posting lists and provides an API to search in them.
*/
-template <typename IteratorType>
+template <typename IteratorType, typename RefType>
class PostingIteratorPack {
private:
std::vector<IteratorType> _children;
public:
- using ref_t = uint16_t;
+ using ref_t = RefType;
PostingIteratorPack() noexcept : _children() {}
PostingIteratorPack(PostingIteratorPack &&rhs) noexcept = default;
PostingIteratorPack &operator=(PostingIteratorPack &&rhs) noexcept = default;
@@ -26,6 +26,8 @@ public:
explicit PostingIteratorPack(std::vector<IteratorType> &&children);
~PostingIteratorPack();
+ static bool can_handle_iterators(size_t num_iterators);
+
uint32_t get_docid(ref_t ref) const {
return _children[ref].valid() ? _children[ref].getKey() : endDocId;
}
@@ -59,8 +61,10 @@ private:
}
};
-using DocidIteratorPack = PostingIteratorPack<DocidIterator>;
-using DocidWithWeightIteratorPack = PostingIteratorPack<DocidWithWeightIterator>;
+using DocidIteratorPack = PostingIteratorPack<DocidIterator, uint16_t>;
+using DocidIteratorPackUint32 = PostingIteratorPack<DocidIterator, uint32_t>;
+using DocidWithWeightIteratorPack = PostingIteratorPack<DocidWithWeightIterator, uint16_t>;
+using DocidWithWeightIteratorPackUint32 = PostingIteratorPack<DocidWithWeightIterator, uint32_t>;
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp
index fdea54424de..5b3a67d7b5e 100644
--- a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp
@@ -8,18 +8,23 @@
namespace search::queryeval {
-SearchIteratorPack::~SearchIteratorPack() = default;
+template <typename RefType>
+SearchIteratorPackT<RefType>::~SearchIteratorPackT() = default;
-SearchIteratorPack::SearchIteratorPack() = default;
+template <typename RefType>
+SearchIteratorPackT<RefType>::SearchIteratorPackT() = default;
-SearchIteratorPack::SearchIteratorPack(SearchIteratorPack &&rhs) noexcept = default;
+template <typename RefType>
+SearchIteratorPackT<RefType>::SearchIteratorPackT(SearchIteratorPackT<RefType> &&rhs) noexcept = default;
-SearchIteratorPack &
-SearchIteratorPack::operator=(SearchIteratorPack &&rhs) noexcept = default;
+template <typename RefType>
+SearchIteratorPackT<RefType> &
+SearchIteratorPackT<RefType>::operator=(SearchIteratorPackT<RefType> &&rhs) noexcept = default;
-SearchIteratorPack::SearchIteratorPack(const std::vector<SearchIterator*> &children,
- const std::vector<fef::TermFieldMatchData*> &childMatch,
- MatchDataUP md)
+template <typename RefType>
+SearchIteratorPackT<RefType>::SearchIteratorPackT(const std::vector<SearchIterator*> &children,
+ const std::vector<fef::TermFieldMatchData*> &childMatch,
+ MatchDataUP md)
: _children(),
_childMatch(childMatch),
_md(std::move(md))
@@ -32,12 +37,21 @@ SearchIteratorPack::SearchIteratorPack(const std::vector<SearchIterator*> &child
assert(_children.size() <= std::numeric_limits<ref_t>::max());
}
-SearchIteratorPack::SearchIteratorPack(const std::vector<SearchIterator*> &children, MatchDataUP md)
- : SearchIteratorPack(children, std::vector<fef::TermFieldMatchData*>(), MatchDataUP(std::move(md)))
+template <typename RefType>
+SearchIteratorPackT<RefType>::SearchIteratorPackT(const std::vector<SearchIterator*> &children, MatchDataUP md)
+ : SearchIteratorPackT(children, std::vector<fef::TermFieldMatchData*>(), MatchDataUP(std::move(md)))
{ }
+template <typename RefType>
+bool
+SearchIteratorPackT<RefType>::can_handle_iterators(size_t num_iterators)
+{
+ return num_iterators <= std::numeric_limits<ref_t>::max();
+}
+
+template <typename RefType>
std::unique_ptr<BitVector>
-SearchIteratorPack::get_hits(uint32_t begin_id, uint32_t end_id) const {
+SearchIteratorPackT<RefType>::get_hits(uint32_t begin_id, uint32_t end_id) const {
BitVector::UP result = TermwiseHelper::orChildren(_children.begin(), _children.end(), begin_id);
if (! result ) {
result = BitVector::create(begin_id, end_id);
@@ -45,10 +59,13 @@ SearchIteratorPack::get_hits(uint32_t begin_id, uint32_t end_id) const {
return result;
}
+template <typename RefType>
void
-SearchIteratorPack::or_hits_into(BitVector &result, uint32_t begin_id) const {
+SearchIteratorPackT<RefType>::or_hits_into(BitVector &result, uint32_t begin_id) const {
TermwiseHelper::orChildren(result, _children.begin(), _children.end(), begin_id);
}
+template class SearchIteratorPackT<uint16_t>;
+template class SearchIteratorPackT<uint32_t>;
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h
index 0a1b140f28a..f05bf8e1adc 100644
--- a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h
+++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h
@@ -9,7 +9,8 @@ namespace search::fef { class MatchData; }
namespace search::queryeval {
-class SearchIteratorPack
+template <typename RefType>
+class SearchIteratorPackT
{
private:
using MatchDataUP = std::unique_ptr<fef::MatchData>;
@@ -18,19 +19,21 @@ private:
MatchDataUP _md;
public:
- using ref_t = uint16_t;
- SearchIteratorPack();
- ~SearchIteratorPack();
- SearchIteratorPack(SearchIteratorPack &&rhs) noexcept;
- SearchIteratorPack &operator=(SearchIteratorPack &&rhs) noexcept;
+ using ref_t = RefType;
+ SearchIteratorPackT();
+ ~SearchIteratorPackT();
+ SearchIteratorPackT(SearchIteratorPackT<RefType> &&rhs) noexcept;
+ SearchIteratorPackT<RefType> &operator=(SearchIteratorPackT<RefType> &&rhs) noexcept;
// TODO: use MultiSearch::Children to pass ownership
- SearchIteratorPack(const std::vector<SearchIterator*> &children,
- const std::vector<fef::TermFieldMatchData*> &childMatch,
- MatchDataUP md);
+ SearchIteratorPackT(const std::vector<SearchIterator*> &children,
+ const std::vector<fef::TermFieldMatchData*> &childMatch,
+ MatchDataUP md);
// TODO: use MultiSearch::Children to pass ownership
- SearchIteratorPack(const std::vector<SearchIterator*> &children, MatchDataUP md);
+ SearchIteratorPackT(const std::vector<SearchIterator*> &children, MatchDataUP md);
+
+ static bool can_handle_iterators(size_t num_iterators);
uint32_t get_docid(ref_t ref) const {
return _children[ref]->getDocId();
@@ -60,5 +63,8 @@ public:
void or_hits_into(BitVector &result, uint32_t begin_id) const;
};
+using SearchIteratorPack = SearchIteratorPackT<uint16_t>;
+using SearchIteratorPackUint32 = SearchIteratorPackT<uint32_t>;
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
index 0be014474d0..6a93c2fe322 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
@@ -196,11 +196,21 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children,
fef::MatchData::UP match_data)
{
if (children.size() < 128) {
- return create_helper<vespalib::LeftArrayHeap, SearchIteratorPack>(tmd, is_filter_search, std::cref(weights),
- SearchIteratorPack(children, std::move(match_data)));
+ if (SearchIteratorPack::can_handle_iterators(children.size())) {
+ return create_helper<vespalib::LeftArrayHeap, SearchIteratorPack>(tmd, is_filter_search, std::cref(weights),
+ SearchIteratorPack(children, std::move(match_data)));
+ } else {
+ return create_helper<vespalib::LeftArrayHeap, SearchIteratorPackUint32>(tmd, is_filter_search, std::cref(weights),
+ SearchIteratorPackUint32(children, std::move(match_data)));
+ }
+ }
+ if (SearchIteratorPack::can_handle_iterators(children.size())) {
+ return create_helper<vespalib::LeftHeap, SearchIteratorPack>(tmd, is_filter_search, std::cref(weights),
+ SearchIteratorPack(children, std::move(match_data)));
+ } else {
+ return create_helper<vespalib::LeftHeap, SearchIteratorPackUint32>(tmd, is_filter_search, std::cref(weights),
+ SearchIteratorPackUint32(children, std::move(match_data)));
}
- return create_helper<vespalib::LeftHeap, SearchIteratorPack>(tmd, is_filter_search, std::cref(weights),
- SearchIteratorPack(children, std::move(match_data)));
}
namespace {
@@ -228,7 +238,11 @@ WeightedSetTermSearch::create(fef::TermFieldMatchData& tmd,
std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
std::vector<DocidIterator>&& iterators)
{
- return create_helper_resolve_pack<DocidIterator, DocidIteratorPack>(tmd, is_filter_search, std::move(weights), std::move(iterators));
+ if (DocidIteratorPack::can_handle_iterators(iterators.size())) {
+ return create_helper_resolve_pack<DocidIterator, DocidIteratorPack>(tmd, is_filter_search, std::move(weights), std::move(iterators));
+ } else {
+ return create_helper_resolve_pack<DocidIterator, DocidIteratorPackUint32>(tmd, is_filter_search, std::move(weights), std::move(iterators));
+ }
}
SearchIterator::UP
@@ -237,7 +251,11 @@ WeightedSetTermSearch::create(fef::TermFieldMatchData &tmd,
std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
std::vector<DocidWithWeightIterator> &&iterators)
{
- return create_helper_resolve_pack<DocidWithWeightIterator, DocidWithWeightIteratorPack>(tmd, is_filter_search, std::move(weights), std::move(iterators));
+ if (DocidWithWeightIteratorPack::can_handle_iterators(iterators.size())) {
+ return create_helper_resolve_pack<DocidWithWeightIterator, DocidWithWeightIteratorPack>(tmd, is_filter_search, std::move(weights), std::move(iterators));
+ } else {
+ return create_helper_resolve_pack<DocidWithWeightIterator, DocidWithWeightIteratorPackUint32>(tmd, is_filter_search, std::move(weights), std::move(iterators));
+ }
}
namespace {