aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-08-04 11:00:19 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-08-04 11:17:24 +0000
commit57795a688b13f25317275b74aae07ebb036dffe9 (patch)
tree43c0e98eac21666166a0f9a18f617a12164b62df /searchlib
parentdd6e573fa7fbee8605578204f4fe0c01bb15be27 (diff)
Split current global_filter_limit into global_filter.lower_limit/upper_limit.
If estimated_hits < lower_limit no filter is set which will cause fallback to bruteforce. If estimated_hits in [lower_limit, upper_limit] apply global filter. if estimated_hits > upper_limit an empty filter is set. This will avoid the filter setup cost. So if the filter has a huge setup cost, you can reduce upper_limit to a number below 1.0 and instead increase target_num_hits similarly. Setting target_num_hits to 1.0/upper_limit * 1.2 should give similar recall. This will add a 20% safety to handle correlation of filter and NearestNeightbor calculation.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h19
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h9
4 files changed, 47 insertions, 11 deletions
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 622e437692a..df4d46ecb73 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -290,18 +290,34 @@ NearestNeighborBruteForceLimit::lookup(const Properties &props, double defaultVa
return lookupDouble(props, NAME, defaultValue);
}
-const vespalib::string GlobalFilterLimit::NAME("vespa.matching.global_filter_limit");
+const vespalib::string GlobalFilterLowerLimit::NAME("vespa.matching.global_filter.lower_limit");
-const double GlobalFilterLimit::DEFAULT_VALUE(0.0);
+const double GlobalFilterLowerLimit::DEFAULT_VALUE(0.0);
double
-GlobalFilterLimit::lookup(const Properties &props)
+GlobalFilterLowerLimit::lookup(const Properties &props)
{
return lookup(props, DEFAULT_VALUE);
}
double
-GlobalFilterLimit::lookup(const Properties &props, double defaultValue)
+GlobalFilterLowerLimit::lookup(const Properties &props, double defaultValue)
+{
+ return lookupDouble(props, NAME, defaultValue);
+}
+
+const vespalib::string GlobalFilterUpperLimit::NAME("vespa.matching.global_filter.upper_limit");
+
+const double GlobalFilterUpperLimit::DEFAULT_VALUE(2.0);
+
+double
+GlobalFilterUpperLimit::lookup(const Properties &props)
+{
+ return lookup(props, DEFAULT_VALUE);
+}
+
+double
+GlobalFilterUpperLimit::lookup(const Properties &props, double defaultValue)
{
return lookupDouble(props, NAME, defaultValue);
}
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index 1b4c2e92d8d..135a8254cd8 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -223,9 +223,24 @@ namespace matching {
* Property to control fallback to not building a global filter
* for a query with a blueprint that wants a global filter. If the
* estimated ratio of matching documents is less than this limit
- * then don't build a global filter.
+ * then don't build a global filter. The effect will be falling back to bruteforce instead of approximation.
**/
- struct GlobalFilterLimit {
+ struct GlobalFilterLowerLimit {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ static double lookup(const Properties &props, double defaultValue);
+ };
+
+ /**
+ * Property to control not building a global filter
+ * for a query with a blueprint that wants a global filter. If the
+ * estimated ratio of matching documents is larger than this limit
+ * then don't build a global filter, but assumes that the expected filter ratio has been
+ * taken care of increasing recall. Increasing recall by 1/upper_limit * 1.2 is probably a sane solution
+ * adding 20% margin to handle some correlation between filter and rest of query.
+ **/
+ struct GlobalFilterUpperLimit {
static const vespalib::string NAME;
static const double DEFAULT_VALUE;
static double lookup(const Properties &props);
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index 249351a4fe5..a37bb98068d 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -63,7 +63,8 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_softTimeoutTailCost(0.1),
_softTimeoutFactor(0.5),
_nearest_neighbor_brute_force_limit(0.05),
- _global_filter_limit(0.0)
+ _global_filter_lower_limit(0.0),
+ _global_filter_upper_limit(1.0)
{ }
RankSetup::~RankSetup() = default;
@@ -107,7 +108,8 @@ RankSetup::configure()
setSoftTimeoutTailCost(softtimeout::TailCost::lookup(_indexEnv.getProperties()));
setSoftTimeoutFactor(softtimeout::Factor::lookup(_indexEnv.getProperties()));
set_nearest_neighbor_brute_force_limit(matching::NearestNeighborBruteForceLimit::lookup(_indexEnv.getProperties()));
- set_global_filter_limit(matching::GlobalFilterLimit::lookup(_indexEnv.getProperties()));
+ set_global_filter_lower_limit(matching::GlobalFilterLowerLimit::lookup(_indexEnv.getProperties()));
+ set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties()));
}
void
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index 3e127a1e8b5..6fea33b9e12 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -60,7 +60,8 @@ private:
double _softTimeoutTailCost;
double _softTimeoutFactor;
double _nearest_neighbor_brute_force_limit;
- double _global_filter_limit;
+ double _global_filter_lower_limit;
+ double _global_filter_upper_limit;
public:
@@ -370,8 +371,10 @@ public:
void set_nearest_neighbor_brute_force_limit(double v) { _nearest_neighbor_brute_force_limit = v; }
double get_nearest_neighbor_brute_force_limit() const { return _nearest_neighbor_brute_force_limit; }
- void set_global_filter_limit(double v) { _global_filter_limit = v; }
- double get_global_filter_limit() const { return _global_filter_limit; }
+ void set_global_filter_lower_limit(double v) { _global_filter_lower_limit = v; }
+ double get_global_filter_lower_limit() const { return _global_filter_lower_limit; }
+ void set_global_filter_upper_limit(double v) { _global_filter_upper_limit = v; }
+ double get_global_filter_upper_limit() const { return _global_filter_upper_limit; }
/**
* This method may be used to indicate that certain features