diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-08-05 10:39:44 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-05 10:39:44 +0200 |
commit | c4bf5a9ae94164619ba89b0b3c78ac2c68eb429a (patch) | |
tree | eda4c0bc50a2f9be18a424279e5c16991d85a3cb | |
parent | 16277f3f494c1bcbe54e3f6d175bc536cf98f99e (diff) | |
parent | 57795a688b13f25317275b74aae07ebb036dffe9 (diff) |
Merge pull request #18681 from vespa-engine/balder/also-perform-topk-without-global-filter
Balder/also perform topk without global filter
7 files changed, 67 insertions, 21 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 55194e51048..1c04313f057 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -196,8 +196,9 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _query.fetchPostings(); if (is_search) { trace.addEvent(5, "MTF: Handle Global Filters"); - double global_filter_limit = GlobalFilterLimit::lookup(rankProperties, rankSetup.get_global_filter_limit()); - _query.handle_global_filters(searchContext.getDocIdLimit(), global_filter_limit); + double lower_limit = GlobalFilterLowerLimit::lookup(rankProperties, rankSetup.get_global_filter_lower_limit()); + double upper_limit = GlobalFilterUpperLimit::lookup(rankProperties, rankSetup.get_global_filter_upper_limit()); + _query.handle_global_filters(searchContext.getDocIdLimit(), lower_limit, upper_limit); } _query.freeze(); trace.addEvent(5, "MTF: prepareSharedState"); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index 07024950779..418907fb54c 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -242,11 +242,17 @@ Query::fetchPostings() } void -Query::handle_global_filters(uint32_t docid_limit, double global_filter_limit) +Query::handle_global_filters(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit) { using search::queryeval::GlobalFilter; double estimated_hit_ratio = _blueprint->getState().hit_ratio(docid_limit); - if (_blueprint->getState().want_global_filter() && estimated_hit_ratio >= global_filter_limit) { + if ( ! _blueprint->getState().want_global_filter()) return; + + LOG(debug, "docid_limit=%d, estimated_hit_ratio=%1.2f, global_filter_lower_limit=%1.2f, global_filter_upper_limit=%1.2f", + docid_limit, estimated_hit_ratio, global_filter_lower_limit, global_filter_upper_limit); + if (estimated_hit_ratio < global_filter_lower_limit) return; + + if (estimated_hit_ratio <= global_filter_upper_limit) { auto constraint = Blueprint::FilterConstraint::UPPER_BOUND; bool strict = true; auto filter_iterator = _blueprint->createFilterSearch(strict, constraint); @@ -254,12 +260,15 @@ Query::handle_global_filters(uint32_t docid_limit, double global_filter_limit) auto white_list = filter_iterator->get_hits(1); auto global_filter = GlobalFilter::create(std::move(white_list)); _blueprint->set_global_filter(*global_filter); - // optimized order may change after accounting for global filter: - _blueprint = Blueprint::optimize(std::move(_blueprint)); - LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str()); - // strictness may change if optimized order changed: - fetchPostings(); + } else { + auto no_filter = GlobalFilter::create(); + _blueprint->set_global_filter(*no_filter); } + // optimized order may change after accounting for global filter: + _blueprint = Blueprint::optimize(std::move(_blueprint)); + LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str()); + // strictness may change if optimized order changed: + fetchPostings(); } void diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index 952b6260da1..945ce6b38ff 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -92,7 +92,7 @@ public: **/ void optimize(); void fetchPostings(); - void handle_global_filters(uint32_t docidLimit, double global_filter_limit); + void handle_global_filters(uint32_t docidLimit, double global_filter_lower_limit, double global_filter_upper_limit); void freeze(); /** diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 622e437692a..df4d46ecb73 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -290,18 +290,34 @@ NearestNeighborBruteForceLimit::lookup(const Properties &props, double defaultVa return lookupDouble(props, NAME, defaultValue); } -const vespalib::string GlobalFilterLimit::NAME("vespa.matching.global_filter_limit"); +const vespalib::string GlobalFilterLowerLimit::NAME("vespa.matching.global_filter.lower_limit"); -const double GlobalFilterLimit::DEFAULT_VALUE(0.0); +const double GlobalFilterLowerLimit::DEFAULT_VALUE(0.0); double -GlobalFilterLimit::lookup(const Properties &props) +GlobalFilterLowerLimit::lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } double -GlobalFilterLimit::lookup(const Properties &props, double defaultValue) +GlobalFilterLowerLimit::lookup(const Properties &props, double defaultValue) +{ + return lookupDouble(props, NAME, defaultValue); +} + +const vespalib::string GlobalFilterUpperLimit::NAME("vespa.matching.global_filter.upper_limit"); + +const double GlobalFilterUpperLimit::DEFAULT_VALUE(2.0); + +double +GlobalFilterUpperLimit::lookup(const Properties &props) +{ + return lookup(props, DEFAULT_VALUE); +} + +double +GlobalFilterUpperLimit::lookup(const Properties &props, double defaultValue) { return lookupDouble(props, NAME, defaultValue); } diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 1b4c2e92d8d..135a8254cd8 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -223,9 +223,24 @@ namespace matching { * Property to control fallback to not building a global filter * for a query with a blueprint that wants a global filter. If the * estimated ratio of matching documents is less than this limit - * then don't build a global filter. + * then don't build a global filter. The effect will be falling back to bruteforce instead of approximation. **/ - struct GlobalFilterLimit { + struct GlobalFilterLowerLimit { + static const vespalib::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties &props); + static double lookup(const Properties &props, double defaultValue); + }; + + /** + * Property to control not building a global filter + * for a query with a blueprint that wants a global filter. If the + * estimated ratio of matching documents is larger than this limit + * then don't build a global filter, but assumes that the expected filter ratio has been + * taken care of increasing recall. Increasing recall by 1/upper_limit * 1.2 is probably a sane solution + * adding 20% margin to handle some correlation between filter and rest of query. + **/ + struct GlobalFilterUpperLimit { static const vespalib::string NAME; static const double DEFAULT_VALUE; static double lookup(const Properties &props); diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 249351a4fe5..a37bb98068d 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -63,7 +63,8 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _softTimeoutTailCost(0.1), _softTimeoutFactor(0.5), _nearest_neighbor_brute_force_limit(0.05), - _global_filter_limit(0.0) + _global_filter_lower_limit(0.0), + _global_filter_upper_limit(1.0) { } RankSetup::~RankSetup() = default; @@ -107,7 +108,8 @@ RankSetup::configure() setSoftTimeoutTailCost(softtimeout::TailCost::lookup(_indexEnv.getProperties())); setSoftTimeoutFactor(softtimeout::Factor::lookup(_indexEnv.getProperties())); set_nearest_neighbor_brute_force_limit(matching::NearestNeighborBruteForceLimit::lookup(_indexEnv.getProperties())); - set_global_filter_limit(matching::GlobalFilterLimit::lookup(_indexEnv.getProperties())); + set_global_filter_lower_limit(matching::GlobalFilterLowerLimit::lookup(_indexEnv.getProperties())); + set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties())); } void diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index 3e127a1e8b5..6fea33b9e12 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -60,7 +60,8 @@ private: double _softTimeoutTailCost; double _softTimeoutFactor; double _nearest_neighbor_brute_force_limit; - double _global_filter_limit; + double _global_filter_lower_limit; + double _global_filter_upper_limit; public: @@ -370,8 +371,10 @@ public: void set_nearest_neighbor_brute_force_limit(double v) { _nearest_neighbor_brute_force_limit = v; } double get_nearest_neighbor_brute_force_limit() const { return _nearest_neighbor_brute_force_limit; } - void set_global_filter_limit(double v) { _global_filter_limit = v; } - double get_global_filter_limit() const { return _global_filter_limit; } + void set_global_filter_lower_limit(double v) { _global_filter_lower_limit = v; } + double get_global_filter_lower_limit() const { return _global_filter_lower_limit; } + void set_global_filter_upper_limit(double v) { _global_filter_upper_limit = v; } + double get_global_filter_upper_limit() const { return _global_filter_upper_limit; } /** * This method may be used to indicate that certain features |