summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-08-04 11:00:19 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-08-04 11:17:24 +0000
commit57795a688b13f25317275b74aae07ebb036dffe9 (patch)
tree43c0e98eac21666166a0f9a18f617a12164b62df /searchcore
parentdd6e573fa7fbee8605578204f4fe0c01bb15be27 (diff)
Split current global_filter_limit into global_filter.lower_limit/upper_limit.
If estimated_hits < lower_limit no filter is set which will cause fallback to bruteforce. If estimated_hits in [lower_limit, upper_limit] apply global filter. if estimated_hits > upper_limit an empty filter is set. This will avoid the filter setup cost. So if the filter has a huge setup cost, you can reduce upper_limit to a number below 1.0 and instead increase target_num_hits similarly. Setting target_num_hits to 1.0/upper_limit * 1.2 should give similar recall. This will add a 20% safety to handle correlation of filter and NearestNeightbor calculation.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.cpp23
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.h2
3 files changed, 18 insertions, 12 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index 55194e51048..1c04313f057 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -196,8 +196,9 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
_query.fetchPostings();
if (is_search) {
trace.addEvent(5, "MTF: Handle Global Filters");
- double global_filter_limit = GlobalFilterLimit::lookup(rankProperties, rankSetup.get_global_filter_limit());
- _query.handle_global_filters(searchContext.getDocIdLimit(), global_filter_limit);
+ double lower_limit = GlobalFilterLowerLimit::lookup(rankProperties, rankSetup.get_global_filter_lower_limit());
+ double upper_limit = GlobalFilterUpperLimit::lookup(rankProperties, rankSetup.get_global_filter_upper_limit());
+ _query.handle_global_filters(searchContext.getDocIdLimit(), lower_limit, upper_limit);
}
_query.freeze();
trace.addEvent(5, "MTF: prepareSharedState");
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
index 1d8496c8d7e..418907fb54c 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
@@ -242,11 +242,17 @@ Query::fetchPostings()
}
void
-Query::handle_global_filters(uint32_t docid_limit, double global_filter_limit)
+Query::handle_global_filters(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit)
{
using search::queryeval::GlobalFilter;
double estimated_hit_ratio = _blueprint->getState().hit_ratio(docid_limit);
- if (_blueprint->getState().want_global_filter() && estimated_hit_ratio >= global_filter_limit) {
+ if ( ! _blueprint->getState().want_global_filter()) return;
+
+ LOG(debug, "docid_limit=%d, estimated_hit_ratio=%1.2f, global_filter_lower_limit=%1.2f, global_filter_upper_limit=%1.2f",
+ docid_limit, estimated_hit_ratio, global_filter_lower_limit, global_filter_upper_limit);
+ if (estimated_hit_ratio < global_filter_lower_limit) return;
+
+ if (estimated_hit_ratio <= global_filter_upper_limit) {
auto constraint = Blueprint::FilterConstraint::UPPER_BOUND;
bool strict = true;
auto filter_iterator = _blueprint->createFilterSearch(strict, constraint);
@@ -254,16 +260,15 @@ Query::handle_global_filters(uint32_t docid_limit, double global_filter_limit)
auto white_list = filter_iterator->get_hits(1);
auto global_filter = GlobalFilter::create(std::move(white_list));
_blueprint->set_global_filter(*global_filter);
- // optimized order may change after accounting for global filter:
- _blueprint = Blueprint::optimize(std::move(_blueprint));
- LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str());
- // strictness may change if optimized order changed:
- fetchPostings();
} else {
- auto no_filter = GlobalFilter::create(search::BitVector::UP());
+ auto no_filter = GlobalFilter::create();
_blueprint->set_global_filter(*no_filter);
- fetchPostings();
}
+ // optimized order may change after accounting for global filter:
+ _blueprint = Blueprint::optimize(std::move(_blueprint));
+ LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str());
+ // strictness may change if optimized order changed:
+ fetchPostings();
}
void
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h
index 952b6260da1..945ce6b38ff 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.h
@@ -92,7 +92,7 @@ public:
**/
void optimize();
void fetchPostings();
- void handle_global_filters(uint32_t docidLimit, double global_filter_limit);
+ void handle_global_filters(uint32_t docidLimit, double global_filter_lower_limit, double global_filter_upper_limit);
void freeze();
/**