diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-11-27 11:03:54 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2023-11-27 11:09:46 +0000 |
commit | ca88f4d0402fc97480d609522e7965e6a3dbc63e (patch) | |
tree | ae55e417eb2d81df3b48e22b706a0a6f1e1493b7 /searchlib | |
parent | 6a2c1a65086386b37862596ecf73d161e17b56e1 (diff) |
Prepare for direct btree posting list iterators with only docids.
Diffstat (limited to 'searchlib')
12 files changed, 58 insertions, 33 deletions
diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp index 029c3130609..9ed28a7714c 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -572,7 +572,7 @@ TEST("require that attribute weighted set term works") { ASSERT_EQUAL(5u, result.hits.size()); if (fast_search && result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) { fprintf(stderr, "DUMP: %s\n", result.iterator_dump.c_str()); - EXPECT_TRUE(result.iterator_dump.find("AttributeIteratorPack") != vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("PostingIteratorPack") != vespalib::string::npos); } EXPECT_EQUAL(10u, result.hits[0].docid); EXPECT_EQUAL(20, result.hits[0].match_weight); diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 76389311eaa..f80e8dbe7be 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -74,7 +74,6 @@ vespa_add_library(searchlib_attribute OBJECT imported_search_context.cpp integerbase.cpp ipostinglistsearchcontext.cpp - iterator_pack.cpp load_utils.cpp loadedenumvalue.cpp loadednumericvalue.cpp @@ -104,6 +103,7 @@ vespa_add_library(searchlib_attribute OBJECT numeric_search_context.cpp numeric_sort_blob_writer.cpp numericbase.cpp + posting_iterator_pack.cpp posting_list_merger.cpp postingchange.cpp postinglistattribute.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 8c56fc081fc..6ccfa472b42 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -7,7 +7,7 @@ #include "direct_multi_term_blueprint.h" #include "document_weight_or_filter_search.h" #include "i_direct_posting_store.h" -#include "iterator_pack.h" +#include "posting_iterator_pack.h" #include "predicate_attribute.h" #include <vespa/eval/eval/value.h> #include <vespa/searchlib/common/location.h> diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp index 7ae7a24b80f..5159c2be06c 100644 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp +++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_weight_or_filter_search.h" -#include "iterator_pack.h" +#include "posting_iterator_pack.h" #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/queryeval/iterator_pack.h> #include <vespa/searchlib/common/bitvector.h> @@ -93,8 +93,8 @@ DocumentWeightOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& chil } else { std::sort(children.begin(), children.end(), [](const auto & a, const auto & b) { return a.size() > b.size(); }); - using OrFilter = DocumentWeightOrFilterSearchImpl<AttributeIteratorPack>; - return std::make_unique<OrFilter>(AttributeIteratorPack(std::move(children))); + using OrFilter = DocumentWeightOrFilterSearchImpl<DocidWithWeightIteratorPack>; + return std::make_unique<OrFilter>(DocidWithWeightIteratorPack(std::move(children))); } } diff --git a/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h index 1d26f3ef202..559a365923a 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h @@ -10,6 +10,14 @@ namespace search::queryeval { class SearchIterator; } namespace search { +/** + * Direct (low-level) iterator over a btree posting list that contains only docids. + */ +using DocidIterator = attribute::PostingListTraits<vespalib::btree::BTreeNoLeafData>::const_iterator; + +/** + * Direct (low-level) iterator over a btree posting list that contains {docid, weight} tuples. + */ using DocidWithWeightIterator = attribute::PostingListTraits<int32_t>::const_iterator; /** diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp index 7f40a38fc7f..662d77dd5d7 100644 --- a/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp +++ b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.cpp @@ -1,28 +1,32 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "iterator_pack.h" +#include "posting_iterator_pack.h" #include <vespa/searchlib/common/bitvector.h> #include <limits> namespace search { -AttributeIteratorPack::~AttributeIteratorPack() = default; +template <typename IteratorType> +PostingIteratorPack<IteratorType>::~PostingIteratorPack() = default; -AttributeIteratorPack::AttributeIteratorPack(std::vector<DocidWithWeightIterator> &&children) +template <typename IteratorType> +PostingIteratorPack<IteratorType>::PostingIteratorPack(std::vector<IteratorType> &&children) : _children(std::move(children)) { assert(_children.size() <= std::numeric_limits<ref_t>::max()); } +template <typename IteratorType> std::unique_ptr<BitVector> -AttributeIteratorPack::get_hits(uint32_t begin_id, uint32_t end_id) { +PostingIteratorPack<IteratorType>::get_hits(uint32_t begin_id, uint32_t end_id) { BitVector::UP result(BitVector::create(begin_id, end_id)); or_hits_into(*result, begin_id); return result; } +template <typename IteratorType> void -AttributeIteratorPack::or_hits_into(BitVector &result, uint32_t begin_id) { +PostingIteratorPack<IteratorType>::or_hits_into(BitVector &result, uint32_t begin_id) { for (size_t i = 0; i < size(); ++i) { uint32_t docId = get_docid(i); if (begin_id > docId) { @@ -35,6 +39,14 @@ AttributeIteratorPack::or_hits_into(BitVector &result, uint32_t begin_id) { result.invalidateCachedCount(); } +template <> +int32_t +PostingIteratorPack<DocidIterator>::get_weight(ref_t, uint32_t) +{ + return 1; +} +template class PostingIteratorPack<DocidIterator>; +template class PostingIteratorPack<DocidWithWeightIterator>; } diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.h b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h index 3d2bedc7b3a..28150730bad 100644 --- a/searchlib/src/vespa/searchlib/attribute/iterator_pack.h +++ b/searchlib/src/vespa/searchlib/attribute/posting_iterator_pack.h @@ -9,19 +9,22 @@ namespace search { class BitVector; -class AttributeIteratorPack -{ +/** + * Class that wraps a set of underlying low-level posting lists and provides an API to search in them. + */ +template <typename IteratorType> +class PostingIteratorPack { private: - std::vector<DocidWithWeightIterator> _children; + std::vector<IteratorType> _children; public: using ref_t = uint16_t; - AttributeIteratorPack() noexcept : _children() {} - AttributeIteratorPack(AttributeIteratorPack &&rhs) noexcept = default; - AttributeIteratorPack &operator=(AttributeIteratorPack &&rhs) noexcept = default; + PostingIteratorPack() noexcept : _children() {} + PostingIteratorPack(PostingIteratorPack &&rhs) noexcept = default; + PostingIteratorPack &operator=(PostingIteratorPack &&rhs) noexcept = default; - explicit AttributeIteratorPack(std::vector<DocidWithWeightIterator> &&children); - ~AttributeIteratorPack(); + explicit PostingIteratorPack(std::vector<IteratorType> &&children); + ~PostingIteratorPack(); uint32_t get_docid(ref_t ref) const { return _children[ref].valid() ? _children[ref].getKey() : endDocId; @@ -56,6 +59,8 @@ private: } }; +using DocidIteratorPack = PostingIteratorPack<DocidIterator>; +using DocidWithWeightIteratorPack = PostingIteratorPack<DocidWithWeightIterator>; -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp index 8f4df612023..3f8069a2a26 100644 --- a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp @@ -181,13 +181,13 @@ DotProductSearch::create(TermFieldMatchData &tmd, const std::vector<int32_t> &weights, std::vector<DocidWithWeightIterator> &&iterators) { - using ArrayHeapImpl = DotProductSearchImpl<vespalib::LeftArrayHeap, AttributeIteratorPack>; - using HeapImpl = DotProductSearchImpl<vespalib::LeftHeap, AttributeIteratorPack>; + using ArrayHeapImpl = DotProductSearchImpl<vespalib::LeftArrayHeap, DocidWithWeightIteratorPack>; + using HeapImpl = DotProductSearchImpl<vespalib::LeftHeap, DocidWithWeightIteratorPack>; if (iterators.size() < 128) { - return std::make_unique<ArrayHeapImpl>(tmd, field_is_filter, weights, AttributeIteratorPack(std::move(iterators))); + return std::make_unique<ArrayHeapImpl>(tmd, field_is_filter, weights, DocidWithWeightIteratorPack(std::move(iterators))); } - return std::make_unique<HeapImpl>(tmd, field_is_filter, weights, AttributeIteratorPack(std::move(iterators))); + return std::make_unique<HeapImpl>(tmd, field_is_filter, weights, DocidWithWeightIteratorPack(std::move(iterators))); } //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h index d06ea9439d2..2f1a4386a95 100644 --- a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h @@ -6,7 +6,7 @@ #include <vespa/vespalib/util/priority_queue.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> -#include <vespa/searchlib/attribute/iterator_pack.h> +#include <vespa/searchlib/attribute/posting_iterator_pack.h> namespace search::fef { class TermFieldMatchData; } diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h index 84fd2eb0d9e..9431b527510 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h +++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h @@ -8,7 +8,7 @@ #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/searchlib/queryeval/iterator_pack.h> -#include <vespa/searchlib/attribute/iterator_pack.h> +#include <vespa/searchlib/attribute/posting_iterator_pack.h> #include <vespa/vespalib/objects/objectvisitor.h> #include <vespa/vespalib/util/priority_queue.h> #include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h> @@ -264,7 +264,7 @@ VectorizedIteratorTerms::VectorizedIteratorTerms(const Terms &t, const Scorer &, //----------------------------------------------------------------------------- -struct VectorizedAttributeTerms : VectorizedState<AttributeIteratorPack> { +struct VectorizedAttributeTerms : VectorizedState<DocidWithWeightIteratorPack> { template <typename Scorer> VectorizedAttributeTerms(const std::vector<int32_t> &weights, const std::vector<IDirectPostingStore::LookupResult> &dict_entries, @@ -279,7 +279,7 @@ struct VectorizedAttributeTerms : VectorizedState<AttributeIteratorPack> { attr.create(dict_entries[order[i]].posting_idx, iterators); docId(i) = (iterators.back().valid()) ? iterators.back().getKey() : search::endDocId; } - iteratorPack() = AttributeIteratorPack(std::move(iterators)); + iteratorPack() = DocidWithWeightIteratorPack(std::move(iterators)); } void visit_members(vespalib::ObjectVisitor &) const {} }; diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp index 9e58cd489ac..0ffff30cee2 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp @@ -190,13 +190,13 @@ WeightedSetTermSearch::create(fef::TermFieldMatchData &tmd, const std::vector<int32_t> &weights, std::vector<DocidWithWeightIterator> &&iterators) { - using ArrayHeapImpl = WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, AttributeIteratorPack>; - using HeapImpl = WeightedSetTermSearchImpl<vespalib::LeftHeap, AttributeIteratorPack>; + using ArrayHeapImpl = WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, DocidWithWeightIteratorPack>; + using HeapImpl = WeightedSetTermSearchImpl<vespalib::LeftHeap, DocidWithWeightIteratorPack>; if (iterators.size() < 128) { - return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, weights, AttributeIteratorPack(std::move(iterators)))); + return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, weights, DocidWithWeightIteratorPack(std::move(iterators)))); } - return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, weights, AttributeIteratorPack(std::move(iterators)))); + return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, weights, DocidWithWeightIteratorPack(std::move(iterators)))); } //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h index 17239391df8..e6391124da0 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h @@ -6,7 +6,7 @@ #include <vespa/vespalib/util/priority_queue.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> -#include <vespa/searchlib/attribute/iterator_pack.h> +#include <vespa/searchlib/attribute/posting_iterator_pack.h> #include <memory> #include <vector> |