diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-06-30 10:52:05 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-30 10:52:05 +0200 |
commit | f4b107073f2b8352a63f981c290909fc10ba6c8e (patch) | |
tree | 5ed4a491100b416afc9740eadc47ac9139cbb8eb | |
parent | 5aea1d09520da60523f5f972b50890ce930d6d54 (diff) | |
parent | 1885eff6b6ab74e8a2ce402fc849a7e31ef50fbf (diff) |
Merge pull request #13673 from vespa-engine/toregge/add-global-filter-limit
If the estimated rate of matching documents is less than global filteā¦
7 files changed, 42 insertions, 5 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index fadea4b7962..19bece5ae9c 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -194,7 +194,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter, trace.addEvent(4, "MTF: Fetch Postings"); _query.fetchPostings(); trace.addEvent(5, "MTF: Handle Global Filters"); - _query.handle_global_filters(searchContext.getDocIdLimit()); + double global_filter_limit = GlobalFilterLimit::lookup(rankProperties, rankSetup.get_global_filter_limit()); + _query.handle_global_filters(searchContext.getDocIdLimit(), global_filter_limit); _query.freeze(); trace.addEvent(5, "MTF: prepareSharedState"); _rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore()); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index 5213a2b9230..62a59ab7680 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -199,10 +199,11 @@ Query::fetchPostings() } void -Query::handle_global_filters(uint32_t docid_limit) +Query::handle_global_filters(uint32_t docid_limit, double global_filter_limit) { using search::queryeval::GlobalFilter; - if (_blueprint->getState().want_global_filter()) { + double estimated_hit_ratio = _blueprint->getState().hit_ratio(docid_limit); + if (_blueprint->getState().want_global_filter() && estimated_hit_ratio >= global_filter_limit) { auto constraint = Blueprint::FilterConstraint::UPPER_BOUND; bool strict = true; auto filter_iterator = _blueprint->createFilterSearch(strict, constraint); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index 3ed6229830d..60f40e24d1e 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -89,7 +89,7 @@ public: **/ void optimize(); void fetchPostings(); - void handle_global_filters(uint32_t docidLimit); + void handle_global_filters(uint32_t docidLimit, double global_filter_limit); void freeze(); /** diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index fb44b986301..622e437692a 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -290,6 +290,22 @@ NearestNeighborBruteForceLimit::lookup(const Properties &props, double defaultVa return lookupDouble(props, NAME, defaultValue); } +const vespalib::string GlobalFilterLimit::NAME("vespa.matching.global_filter_limit"); + +const double GlobalFilterLimit::DEFAULT_VALUE(0.0); + +double +GlobalFilterLimit::lookup(const Properties &props) +{ + return lookup(props, DEFAULT_VALUE); +} + +double +GlobalFilterLimit::lookup(const Properties &props, double defaultValue) +{ + return lookupDouble(props, NAME, defaultValue); +} + } // namespace matching namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 30c726caeba..1b4c2e92d8d 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -218,6 +218,19 @@ namespace matching { static double lookup(const Properties &props); static double lookup(const Properties &props, double defaultValue); }; + + /** + * Property to control fallback to not building a global filter + * for a query with a blueprint that wants a global filter. If the + * estimated ratio of matching documents is less than this limit + * then don't build a global filter. + **/ + struct GlobalFilterLimit { + static const vespalib::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties &props); + static double lookup(const Properties &props, double defaultValue); + }; } namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index e197f095852..249351a4fe5 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -62,7 +62,8 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _softTimeoutEnabled(false), _softTimeoutTailCost(0.1), _softTimeoutFactor(0.5), - _nearest_neighbor_brute_force_limit(0.05) + _nearest_neighbor_brute_force_limit(0.05), + _global_filter_limit(0.0) { } RankSetup::~RankSetup() = default; @@ -106,6 +107,7 @@ RankSetup::configure() setSoftTimeoutTailCost(softtimeout::TailCost::lookup(_indexEnv.getProperties())); setSoftTimeoutFactor(softtimeout::Factor::lookup(_indexEnv.getProperties())); set_nearest_neighbor_brute_force_limit(matching::NearestNeighborBruteForceLimit::lookup(_indexEnv.getProperties())); + set_global_filter_limit(matching::GlobalFilterLimit::lookup(_indexEnv.getProperties())); } void diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index ad793eeaceb..3e127a1e8b5 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -60,6 +60,7 @@ private: double _softTimeoutTailCost; double _softTimeoutFactor; double _nearest_neighbor_brute_force_limit; + double _global_filter_limit; public: @@ -369,6 +370,9 @@ public: void set_nearest_neighbor_brute_force_limit(double v) { _nearest_neighbor_brute_force_limit = v; } double get_nearest_neighbor_brute_force_limit() const { return _nearest_neighbor_brute_force_limit; } + void set_global_filter_limit(double v) { _global_filter_limit = v; } + double get_global_filter_limit() const { return _global_filter_limit; } + /** * This method may be used to indicate that certain features * should be dumped during a full feature dump. |