summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-11-16 20:05:10 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2023-11-16 21:16:37 +0000
commit38c682f3a630bb5274027bb8086e1bff27f7725a (patch)
tree756d056dd7e808c0588dfa42611e7d20058c2c6e /searchcore
parent739d3d94bdc50d3a3abb4bc9a0355331acb1917e (diff)
If hit_rate is below 1% drop match phase limiting. It has too high fixed cost and will liklely make things worse.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp1
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp13
2 files changed, 12 insertions, 2 deletions
diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
index 21c572995d3..b26ed1d4765 100644
--- a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
@@ -352,6 +352,7 @@ TEST("require that the match phase limiter is able to pre-limit the query") {
" hit_rate: 0.1,"
" num_docs: 100000,"
" max_filter_docs: 100000,"
+ " upper_limited_corpus_size: 100000,"
" wanted_docs: 5000,"
" action: 'Will limit with prefix filter',"
" max_group_size: 5000,"
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
index b64d5ba4c05..784ce649c5f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
@@ -102,6 +102,10 @@ do_limit(AttributeLimiter &limiter_factory, SearchIterator::UP search, double ma
return search;
}
+// When hitrate is below 1% limiting the query is often far more expensive than not.
+// TODO This limit should probably be a lot higher.
+constexpr double MIN_HIT_RATE_LIMIT = 0.01;
+
} // namespace proton::matching::<unnamed>
SearchIterator::UP
@@ -114,11 +118,16 @@ MatchPhaseLimiter::maybe_limit(SearchIterator::UP search, double match_freq, siz
trace->setDouble("hit_rate", match_freq);
trace->setLong("num_docs", num_docs);
trace->setLong("max_filter_docs", max_filter_docs);
+ trace->setLong("upper_limited_corpus_size", upper_limited_corpus_size);
trace->setLong("wanted_docs", wanted_num_docs);
}
- if (upper_limited_corpus_size <= wanted_num_docs) {
+ if ((upper_limited_corpus_size <= wanted_num_docs) || (match_freq < MIN_HIT_RATE_LIMIT)) {
if (trace) {
- trace->setString("action", "Will not limit !");
+ if (upper_limited_corpus_size <= wanted_num_docs) {
+ trace->setString("action", "Will not limit due to upper_limited_corpus_size <= wanted_num_docs");
+ } else if (match_freq < MIN_HIT_RATE_LIMIT) {
+ trace->setString("action", "Will not limit due to match_freq < MIN_HIT_RATE_LIMIT(1%)");
+ }
}
LOG(debug, "Will not limit ! maybe_limit(hit_rate=%g, num_docs=%ld, max_filter_docs=%ld) = wanted_num_docs=%ld",
match_freq, num_docs, max_filter_docs, wanted_num_docs);