aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-09-26 12:24:29 +0200
committerGitHub <noreply@github.com>2023-09-26 12:24:29 +0200
commit589f8faf81c9ed1ace32ffb67653d2bc9b95cc51 (patch)
tree2b20aee35dda739c8921a7b4c412fff96897c999 /searchlib
parentd186922bea9bb26d9ab59182a9bf12340c024579 (diff)
parent40940483b8f551d2284f582bbb4af07a1c18ac87 (diff)
Merge pull request #28654 from vespa-engine/balder/return-early-on-match
- Return early in doSeek if docId found.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp18
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp32
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iterator_pack.h2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h13
6 files changed, 43 insertions, 29 deletions
diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
index b9c70d76934..1fd9dde09c7 100644
--- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
+++ b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
@@ -24,14 +24,14 @@ class DocumentWeightOrFilterSearchTest : public ::testing::Test {
uint32_t _range_end;
public:
DocumentWeightOrFilterSearchTest();
- ~DocumentWeightOrFilterSearchTest();
+ ~DocumentWeightOrFilterSearchTest() override;
void inc_generation();
size_t num_trees() const { return _trees.size(); }
Iterator get_tree(size_t idx) const {
if (idx < _trees.size()) {
return _postings.beginFrozen(_trees[idx]);
} else {
- return Iterator();
+ return {};
}
}
void ensure_tree(size_t idx) {
@@ -39,13 +39,13 @@ public:
_trees.resize(idx + 1);
}
}
- void add_tree(size_t idx, std::vector<uint32_t> keys) {
+ void add_tree(size_t idx, const std::vector<uint32_t>& keys) {
ensure_tree(idx);
std::vector<KeyData> adds;
std::vector<uint32_t> removes;
adds.reserve(keys.size());
for (auto& key : keys) {
- adds.emplace_back(KeyData(key, 1));
+ adds.emplace_back(key, 1);
}
_postings.apply(_trees[idx], adds.data(), adds.data() + adds.size(), removes.data(), removes.data() + removes.size());
}
@@ -67,7 +67,7 @@ public:
return result;
};
- std::vector<uint32_t> eval_daat(SearchIterator &iterator) {
+ std::vector<uint32_t> eval_daat(SearchIterator &iterator) const {
std::vector<uint32_t> result;
uint32_t doc_id = _range_start;
while (doc_id < _range_end) {
@@ -81,7 +81,7 @@ public:
return result;
}
- std::vector<uint32_t> frombv(const BitVector &bv) {
+ std::vector<uint32_t> frombv(const BitVector &bv) const {
std::vector<uint32_t> result;
uint32_t doc_id = _range_start;
doc_id = bv.getNextTrueBit(doc_id);
@@ -93,7 +93,7 @@ public:
return result;
}
- std::unique_ptr<BitVector> tobv(std::vector<uint32_t> values) {
+ std::unique_ptr<BitVector> tobv(const std::vector<uint32_t> & values) const {
auto bv = BitVector::create(_range_start, _range_end);
for (auto value : values) {
bv->setBit(value);
@@ -102,7 +102,7 @@ public:
return bv;
}
- void expect_result(std::vector<uint32_t> exp, std::vector<uint32_t> act)
+ static void expect_result(const std::vector<uint32_t> & exp, const std::vector<uint32_t> & act)
{
EXPECT_EQ(exp, act);
}
@@ -227,7 +227,7 @@ public:
}
_test.inc_generation();
}
- ~Verifier() {
+ ~Verifier() override {
for (uint32_t tree_id = 0; tree_id < _test.num_trees(); ++tree_id) {
_test.clear_tree(tree_id);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index b4cdd621b71..71ea2a67299 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -482,6 +482,9 @@ DirectWeightedSetBlueprint<SearchType>::createLeafSearch(const TermFieldMatchDat
_attr.create(r.posting_idx, iterators);
}
bool field_is_filter = getState().fields()[0].isFilter();
+ if (field_is_filter && tfmda[0]->isNotNeeded()) {
+ return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators));
+ }
return SearchType::create(*tfmda[0], field_is_filter, _weights, std::move(iterators));
}
diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp
index e2566c94f1c..c840c5cbc91 100644
--- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp
@@ -10,6 +10,7 @@ namespace search::attribute {
class DocumentWeightOrFilterSearchImpl : public DocumentWeightOrFilterSearch
{
AttributeIteratorPack _children;
+ void seek_all(uint32_t docId);
public:
explicit DocumentWeightOrFilterSearchImpl(AttributeIteratorPack&& children);
~DocumentWeightOrFilterSearchImpl() override;
@@ -32,6 +33,7 @@ public:
}
std::unique_ptr<BitVector> get_hits(uint32_t begin_id) override {
+ seek_all(getDocId());
return _children.get_hits(begin_id, getEndId());
}
@@ -47,17 +49,29 @@ DocumentWeightOrFilterSearchImpl::DocumentWeightOrFilterSearchImpl(AttributeIter
DocumentWeightOrFilterSearchImpl::~DocumentWeightOrFilterSearchImpl() = default;
void
+DocumentWeightOrFilterSearchImpl::seek_all(uint32_t docId) {
+ for (uint16_t i = 0; i < _children.size(); ++i) {
+ uint32_t next = _children.get_docid(i);
+ if (next < docId) {
+ _children.seek(i, docId);
+ }
+ }
+}
+
+void
DocumentWeightOrFilterSearchImpl::doSeek(uint32_t docId)
{
- if (_children.get_docid(0) < docId) {
- _children.seek(0, docId);
- }
- uint32_t min_doc_id = _children.get_docid(0);
- for (uint16_t i = 1; i < _children.size(); ++i) {
- if (_children.get_docid(i) < docId) {
- _children.seek(i, docId);
+ uint32_t min_doc_id = endDocId;
+ for (uint16_t i = 0; i < _children.size(); ++i) {
+ uint32_t next = _children.get_docid(i);
+ if (next < docId) {
+ next = _children.seek(i, docId);
+ }
+ if (next == docId) {
+ setDocId(next);
+ return;
}
- min_doc_id = std::min(min_doc_id, _children.get_docid(i));
+ min_doc_id = std::min(min_doc_id, next);
}
setDocId(min_doc_id);
}
@@ -73,6 +87,8 @@ DocumentWeightOrFilterSearch::create(std::vector<DocumentWeightIterator>&& child
if (children.empty()) {
return std::make_unique<queryeval::EmptySearch>();
} else {
+ std::sort(children.begin(), children.end(),
+ [](const auto & a, const auto & b) { return a.size() > b.size(); });
return std::make_unique<DocumentWeightOrFilterSearchImpl>(AttributeIteratorPack(std::move(children)));
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp b/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp
index 147f56d6d47..ab06fc270bd 100644
--- a/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp
@@ -17,9 +17,9 @@ AttributeIteratorPack::or_hits_into(BitVector &result, uint32_t begin_id) {
for (size_t i = 0; i < size(); ++i) {
uint32_t docId = get_docid(i);
if (begin_id > docId) {
- seek(i, begin_id);
+ docId = seek(i, begin_id);
}
- for (docId = get_docid(i); docId < result.size(); docId = next(i)) {
+ for (uint32_t limit = result.size(); docId < limit; docId = next(i)) {
result.setBit(docId);
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.h b/searchlib/src/vespa/searchlib/attribute/iterator_pack.h
index e042aab5eae..1753a3d0c2d 100644
--- a/searchlib/src/vespa/searchlib/attribute/iterator_pack.h
+++ b/searchlib/src/vespa/searchlib/attribute/iterator_pack.h
@@ -41,7 +41,7 @@ public:
std::unique_ptr<BitVector> get_hits(uint32_t begin_id, uint32_t end_id);
void or_hits_into(BitVector &result, uint32_t begin_id);
- size_t size() const { return _children.size(); }
+ size_t size() const noexcept { return _children.size(); }
void initRange(uint32_t begin, uint32_t end) {
(void) end;
for (auto &child: _children) {
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
index e3e12c27f28..b30d3bc3301 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
@@ -10,12 +10,9 @@
#include <memory>
#include <vector>
-namespace search {
-namespace fef {
-class TermFieldMatchData;
-} // namespace fef
+namespace search::fef { class TermFieldMatchData; }
-namespace queryeval {
+namespace search::queryeval {
class Blueprint;
@@ -26,7 +23,7 @@ class Blueprint;
class WeightedSetTermSearch : public SearchIterator
{
protected:
- WeightedSetTermSearch() {}
+ WeightedSetTermSearch() = default;
public:
// TODO: pass ownership with unique_ptr
@@ -47,6 +44,4 @@ public:
virtual void find_matching_elements(uint32_t docid, const std::vector<std::unique_ptr<Blueprint>> &child_blueprints, std::vector<uint32_t> &dst) = 0;
};
-} // namespace search::queryeval
-} // namespace search
-
+}