diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-08-04 11:00:19 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-08-04 11:17:24 +0000 |
commit | 57795a688b13f25317275b74aae07ebb036dffe9 (patch) | |
tree | 43c0e98eac21666166a0f9a18f617a12164b62df /searchlib | |
parent | dd6e573fa7fbee8605578204f4fe0c01bb15be27 (diff) |
Split current global_filter_limit into global_filter.lower_limit/upper_limit.
If estimated_hits < lower_limit no filter is set which will cause fallback to bruteforce.
If estimated_hits in [lower_limit, upper_limit] apply global filter.
if estimated_hits > upper_limit an empty filter is set. This will avoid the filter setup cost.
So if the filter has a huge setup cost, you can reduce upper_limit to a number below 1.0 and instead increase target_num_hits similarly.
Setting target_num_hits to 1.0/upper_limit * 1.2 should give similar recall. This will add a 20% safety to handle correlation of filter
and NearestNeightbor calculation.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/fef/indexproperties.cpp | 24 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/fef/indexproperties.h | 19 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/fef/ranksetup.cpp | 6 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/fef/ranksetup.h | 9 |
4 files changed, 47 insertions, 11 deletions
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 622e437692a..df4d46ecb73 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -290,18 +290,34 @@ NearestNeighborBruteForceLimit::lookup(const Properties &props, double defaultVa return lookupDouble(props, NAME, defaultValue); } -const vespalib::string GlobalFilterLimit::NAME("vespa.matching.global_filter_limit"); +const vespalib::string GlobalFilterLowerLimit::NAME("vespa.matching.global_filter.lower_limit"); -const double GlobalFilterLimit::DEFAULT_VALUE(0.0); +const double GlobalFilterLowerLimit::DEFAULT_VALUE(0.0); double -GlobalFilterLimit::lookup(const Properties &props) +GlobalFilterLowerLimit::lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } double -GlobalFilterLimit::lookup(const Properties &props, double defaultValue) +GlobalFilterLowerLimit::lookup(const Properties &props, double defaultValue) +{ + return lookupDouble(props, NAME, defaultValue); +} + +const vespalib::string GlobalFilterUpperLimit::NAME("vespa.matching.global_filter.upper_limit"); + +const double GlobalFilterUpperLimit::DEFAULT_VALUE(2.0); + +double +GlobalFilterUpperLimit::lookup(const Properties &props) +{ + return lookup(props, DEFAULT_VALUE); +} + +double +GlobalFilterUpperLimit::lookup(const Properties &props, double defaultValue) { return lookupDouble(props, NAME, defaultValue); } diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 1b4c2e92d8d..135a8254cd8 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -223,9 +223,24 @@ namespace matching { * Property to control fallback to not building a global filter * for a query with a blueprint that wants a global filter. If the * estimated ratio of matching documents is less than this limit - * then don't build a global filter. + * then don't build a global filter. The effect will be falling back to bruteforce instead of approximation. **/ - struct GlobalFilterLimit { + struct GlobalFilterLowerLimit { + static const vespalib::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties &props); + static double lookup(const Properties &props, double defaultValue); + }; + + /** + * Property to control not building a global filter + * for a query with a blueprint that wants a global filter. If the + * estimated ratio of matching documents is larger than this limit + * then don't build a global filter, but assumes that the expected filter ratio has been + * taken care of increasing recall. Increasing recall by 1/upper_limit * 1.2 is probably a sane solution + * adding 20% margin to handle some correlation between filter and rest of query. + **/ + struct GlobalFilterUpperLimit { static const vespalib::string NAME; static const double DEFAULT_VALUE; static double lookup(const Properties &props); diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 249351a4fe5..a37bb98068d 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -63,7 +63,8 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _softTimeoutTailCost(0.1), _softTimeoutFactor(0.5), _nearest_neighbor_brute_force_limit(0.05), - _global_filter_limit(0.0) + _global_filter_lower_limit(0.0), + _global_filter_upper_limit(1.0) { } RankSetup::~RankSetup() = default; @@ -107,7 +108,8 @@ RankSetup::configure() setSoftTimeoutTailCost(softtimeout::TailCost::lookup(_indexEnv.getProperties())); setSoftTimeoutFactor(softtimeout::Factor::lookup(_indexEnv.getProperties())); set_nearest_neighbor_brute_force_limit(matching::NearestNeighborBruteForceLimit::lookup(_indexEnv.getProperties())); - set_global_filter_limit(matching::GlobalFilterLimit::lookup(_indexEnv.getProperties())); + set_global_filter_lower_limit(matching::GlobalFilterLowerLimit::lookup(_indexEnv.getProperties())); + set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties())); } void diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index 3e127a1e8b5..6fea33b9e12 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -60,7 +60,8 @@ private: double _softTimeoutTailCost; double _softTimeoutFactor; double _nearest_neighbor_brute_force_limit; - double _global_filter_limit; + double _global_filter_lower_limit; + double _global_filter_upper_limit; public: @@ -370,8 +371,10 @@ public: void set_nearest_neighbor_brute_force_limit(double v) { _nearest_neighbor_brute_force_limit = v; } double get_nearest_neighbor_brute_force_limit() const { return _nearest_neighbor_brute_force_limit; } - void set_global_filter_limit(double v) { _global_filter_limit = v; } - double get_global_filter_limit() const { return _global_filter_limit; } + void set_global_filter_lower_limit(double v) { _global_filter_lower_limit = v; } + double get_global_filter_lower_limit() const { return _global_filter_lower_limit; } + void set_global_filter_upper_limit(double v) { _global_filter_upper_limit = v; } + double get_global_filter_upper_limit() const { return _global_filter_upper_limit; } /** * This method may be used to indicate that certain features |