summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-11-17 13:55:23 +0100
committerGitHub <noreply@github.com>2023-11-17 13:55:23 +0100
commitb0ff93e170cdd38f1bc1b38b756a428c4e0726ca (patch)
tree19d95bb31a133426f9b89e6ecc0191d8996acbbe
parent7bc9bc1707c81c24b9fa031609d537e39e49bae2 (diff)
parent38c682f3a630bb5274027bb8086e1bff27f7725a (diff)
Merge pull request #29355 from vespa-engine/balder/avoid-limiting-not-necessary
Balder/avoid limiting not necessary
-rw-r--r--searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp1
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp2
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeiterator.h15
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeiterator.hpp19
5 files changed, 26 insertions, 24 deletions
diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
index 21c572995d3..b26ed1d4765 100644
--- a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
@@ -352,6 +352,7 @@ TEST("require that the match phase limiter is able to pre-limit the query") {
" hit_rate: 0.1,"
" num_docs: 100000,"
" max_filter_docs: 100000,"
+ " upper_limited_corpus_size: 100000,"
" wanted_docs: 5000,"
" action: 'Will limit with prefix filter',"
" max_group_size: 5000,"
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
index b64d5ba4c05..784ce649c5f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
@@ -102,6 +102,10 @@ do_limit(AttributeLimiter &limiter_factory, SearchIterator::UP search, double ma
return search;
}
+// When hitrate is below 1% limiting the query is often far more expensive than not.
+// TODO This limit should probably be a lot higher.
+constexpr double MIN_HIT_RATE_LIMIT = 0.01;
+
} // namespace proton::matching::<unnamed>
SearchIterator::UP
@@ -114,11 +118,16 @@ MatchPhaseLimiter::maybe_limit(SearchIterator::UP search, double match_freq, siz
trace->setDouble("hit_rate", match_freq);
trace->setLong("num_docs", num_docs);
trace->setLong("max_filter_docs", max_filter_docs);
+ trace->setLong("upper_limited_corpus_size", upper_limited_corpus_size);
trace->setLong("wanted_docs", wanted_num_docs);
}
- if (upper_limited_corpus_size <= wanted_num_docs) {
+ if ((upper_limited_corpus_size <= wanted_num_docs) || (match_freq < MIN_HIT_RATE_LIMIT)) {
if (trace) {
- trace->setString("action", "Will not limit !");
+ if (upper_limited_corpus_size <= wanted_num_docs) {
+ trace->setString("action", "Will not limit due to upper_limited_corpus_size <= wanted_num_docs");
+ } else if (match_freq < MIN_HIT_RATE_LIMIT) {
+ trace->setString("action", "Will not limit due to match_freq < MIN_HIT_RATE_LIMIT(1%)");
+ }
}
LOG(debug, "Will not limit ! maybe_limit(hit_rate=%g, num_docs=%ld, max_filter_docs=%ld) = wanted_num_docs=%ld",
match_freq, num_docs, max_filter_docs, wanted_num_docs);
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
index e4047b57341..cc12b1f7825 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
@@ -204,7 +204,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict)
}
}
// returning nullptr will trigger fallback to filter iterator
- return SearchIterator::UP();
+ return {};
}
diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.h b/vespalib/src/vespa/vespalib/btree/btreeiterator.h
index 153fb005f00..53e4d004981 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeiterator.h
+++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.h
@@ -166,7 +166,7 @@ private:
/*
* Find the previous leaf node, called by operator--() as needed.
*/
- VESPA_DLL_LOCAL void findPrevLeafNode();
+ void findPrevLeafNode();
protected:
/*
@@ -219,7 +219,18 @@ protected:
* Step iterator backwards. If at end then place it at last valid
* position in tree (cf. rbegin())
*/
- BTreeIteratorBase & operator--();
+ BTreeIteratorBase & operator--() {
+ if (_leaf.getNode() == nullptr) {
+ rbegin();
+ return *this;
+ }
+ if (_leaf.getIdx() > 0u) {
+ _leaf.decIdx();
+ return *this;
+ }
+ findPrevLeafNode();
+ return *this;
+ }
~BTreeIteratorBase();
BTreeIteratorBase(const BTreeIteratorBase &other);
diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp
index 5f15ebc8b61..fdab3a94a5b 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp
@@ -461,25 +461,6 @@ BTreeIteratorBase() noexcept
template <typename KeyT, typename DataT, typename AggrT,
uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
-BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE> &
-BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
-operator--()
-{
- if (_leaf.getNode() == nullptr) {
- rbegin();
- return *this;
- }
- if (_leaf.getIdx() > 0u) {
- _leaf.decIdx();
- return *this;
- }
- findPrevLeafNode();
- return *this;
-}
-
-
-template <typename KeyT, typename DataT, typename AggrT,
- uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
size_t
BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
size() const noexcept