diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-08-04 11:00:19 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-08-04 11:17:24 +0000 |
commit | 57795a688b13f25317275b74aae07ebb036dffe9 (patch) | |
tree | 43c0e98eac21666166a0f9a18f617a12164b62df /searchcore | |
parent | dd6e573fa7fbee8605578204f4fe0c01bb15be27 (diff) |
Split current global_filter_limit into global_filter.lower_limit/upper_limit.
If estimated_hits < lower_limit no filter is set which will cause fallback to bruteforce.
If estimated_hits in [lower_limit, upper_limit] apply global filter.
if estimated_hits > upper_limit an empty filter is set. This will avoid the filter setup cost.
So if the filter has a huge setup cost, you can reduce upper_limit to a number below 1.0 and instead increase target_num_hits similarly.
Setting target_num_hits to 1.0/upper_limit * 1.2 should give similar recall. This will add a 20% safety to handle correlation of filter
and NearestNeightbor calculation.
Diffstat (limited to 'searchcore')
3 files changed, 18 insertions, 12 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 55194e51048..1c04313f057 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -196,8 +196,9 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _query.fetchPostings(); if (is_search) { trace.addEvent(5, "MTF: Handle Global Filters"); - double global_filter_limit = GlobalFilterLimit::lookup(rankProperties, rankSetup.get_global_filter_limit()); - _query.handle_global_filters(searchContext.getDocIdLimit(), global_filter_limit); + double lower_limit = GlobalFilterLowerLimit::lookup(rankProperties, rankSetup.get_global_filter_lower_limit()); + double upper_limit = GlobalFilterUpperLimit::lookup(rankProperties, rankSetup.get_global_filter_upper_limit()); + _query.handle_global_filters(searchContext.getDocIdLimit(), lower_limit, upper_limit); } _query.freeze(); trace.addEvent(5, "MTF: prepareSharedState"); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index 1d8496c8d7e..418907fb54c 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -242,11 +242,17 @@ Query::fetchPostings() } void -Query::handle_global_filters(uint32_t docid_limit, double global_filter_limit) +Query::handle_global_filters(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit) { using search::queryeval::GlobalFilter; double estimated_hit_ratio = _blueprint->getState().hit_ratio(docid_limit); - if (_blueprint->getState().want_global_filter() && estimated_hit_ratio >= global_filter_limit) { + if ( ! _blueprint->getState().want_global_filter()) return; + + LOG(debug, "docid_limit=%d, estimated_hit_ratio=%1.2f, global_filter_lower_limit=%1.2f, global_filter_upper_limit=%1.2f", + docid_limit, estimated_hit_ratio, global_filter_lower_limit, global_filter_upper_limit); + if (estimated_hit_ratio < global_filter_lower_limit) return; + + if (estimated_hit_ratio <= global_filter_upper_limit) { auto constraint = Blueprint::FilterConstraint::UPPER_BOUND; bool strict = true; auto filter_iterator = _blueprint->createFilterSearch(strict, constraint); @@ -254,16 +260,15 @@ Query::handle_global_filters(uint32_t docid_limit, double global_filter_limit) auto white_list = filter_iterator->get_hits(1); auto global_filter = GlobalFilter::create(std::move(white_list)); _blueprint->set_global_filter(*global_filter); - // optimized order may change after accounting for global filter: - _blueprint = Blueprint::optimize(std::move(_blueprint)); - LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str()); - // strictness may change if optimized order changed: - fetchPostings(); } else { - auto no_filter = GlobalFilter::create(search::BitVector::UP()); + auto no_filter = GlobalFilter::create(); _blueprint->set_global_filter(*no_filter); - fetchPostings(); } + // optimized order may change after accounting for global filter: + _blueprint = Blueprint::optimize(std::move(_blueprint)); + LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str()); + // strictness may change if optimized order changed: + fetchPostings(); } void diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index 952b6260da1..945ce6b38ff 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -92,7 +92,7 @@ public: **/ void optimize(); void fetchPostings(); - void handle_global_filters(uint32_t docidLimit, double global_filter_limit); + void handle_global_filters(uint32_t docidLimit, double global_filter_lower_limit, double global_filter_upper_limit); void freeze(); /** |