summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-08-05 10:39:44 +0200
committerGitHub <noreply@github.com>2021-08-05 10:39:44 +0200
commitc4bf5a9ae94164619ba89b0b3c78ac2c68eb429a (patch)
treeeda4c0bc50a2f9be18a424279e5c16991d85a3cb
parent16277f3f494c1bcbe54e3f6d175bc536cf98f99e (diff)
parent57795a688b13f25317275b74aae07ebb036dffe9 (diff)
Merge pull request #18681 from vespa-engine/balder/also-perform-topk-without-global-filter
Balder/also perform topk without global filter
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.cpp23
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.h2
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h19
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h9
7 files changed, 67 insertions, 21 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index 55194e51048..1c04313f057 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -196,8 +196,9 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
_query.fetchPostings();
if (is_search) {
trace.addEvent(5, "MTF: Handle Global Filters");
- double global_filter_limit = GlobalFilterLimit::lookup(rankProperties, rankSetup.get_global_filter_limit());
- _query.handle_global_filters(searchContext.getDocIdLimit(), global_filter_limit);
+ double lower_limit = GlobalFilterLowerLimit::lookup(rankProperties, rankSetup.get_global_filter_lower_limit());
+ double upper_limit = GlobalFilterUpperLimit::lookup(rankProperties, rankSetup.get_global_filter_upper_limit());
+ _query.handle_global_filters(searchContext.getDocIdLimit(), lower_limit, upper_limit);
}
_query.freeze();
trace.addEvent(5, "MTF: prepareSharedState");
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
index 07024950779..418907fb54c 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
@@ -242,11 +242,17 @@ Query::fetchPostings()
}
void
-Query::handle_global_filters(uint32_t docid_limit, double global_filter_limit)
+Query::handle_global_filters(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit)
{
using search::queryeval::GlobalFilter;
double estimated_hit_ratio = _blueprint->getState().hit_ratio(docid_limit);
- if (_blueprint->getState().want_global_filter() && estimated_hit_ratio >= global_filter_limit) {
+ if ( ! _blueprint->getState().want_global_filter()) return;
+
+ LOG(debug, "docid_limit=%d, estimated_hit_ratio=%1.2f, global_filter_lower_limit=%1.2f, global_filter_upper_limit=%1.2f",
+ docid_limit, estimated_hit_ratio, global_filter_lower_limit, global_filter_upper_limit);
+ if (estimated_hit_ratio < global_filter_lower_limit) return;
+
+ if (estimated_hit_ratio <= global_filter_upper_limit) {
auto constraint = Blueprint::FilterConstraint::UPPER_BOUND;
bool strict = true;
auto filter_iterator = _blueprint->createFilterSearch(strict, constraint);
@@ -254,12 +260,15 @@ Query::handle_global_filters(uint32_t docid_limit, double global_filter_limit)
auto white_list = filter_iterator->get_hits(1);
auto global_filter = GlobalFilter::create(std::move(white_list));
_blueprint->set_global_filter(*global_filter);
- // optimized order may change after accounting for global filter:
- _blueprint = Blueprint::optimize(std::move(_blueprint));
- LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str());
- // strictness may change if optimized order changed:
- fetchPostings();
+ } else {
+ auto no_filter = GlobalFilter::create();
+ _blueprint->set_global_filter(*no_filter);
}
+ // optimized order may change after accounting for global filter:
+ _blueprint = Blueprint::optimize(std::move(_blueprint));
+ LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str());
+ // strictness may change if optimized order changed:
+ fetchPostings();
}
void
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h
index 952b6260da1..945ce6b38ff 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.h
@@ -92,7 +92,7 @@ public:
**/
void optimize();
void fetchPostings();
- void handle_global_filters(uint32_t docidLimit, double global_filter_limit);
+ void handle_global_filters(uint32_t docidLimit, double global_filter_lower_limit, double global_filter_upper_limit);
void freeze();
/**
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 622e437692a..df4d46ecb73 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -290,18 +290,34 @@ NearestNeighborBruteForceLimit::lookup(const Properties &props, double defaultVa
return lookupDouble(props, NAME, defaultValue);
}
-const vespalib::string GlobalFilterLimit::NAME("vespa.matching.global_filter_limit");
+const vespalib::string GlobalFilterLowerLimit::NAME("vespa.matching.global_filter.lower_limit");
-const double GlobalFilterLimit::DEFAULT_VALUE(0.0);
+const double GlobalFilterLowerLimit::DEFAULT_VALUE(0.0);
double
-GlobalFilterLimit::lookup(const Properties &props)
+GlobalFilterLowerLimit::lookup(const Properties &props)
{
return lookup(props, DEFAULT_VALUE);
}
double
-GlobalFilterLimit::lookup(const Properties &props, double defaultValue)
+GlobalFilterLowerLimit::lookup(const Properties &props, double defaultValue)
+{
+ return lookupDouble(props, NAME, defaultValue);
+}
+
+const vespalib::string GlobalFilterUpperLimit::NAME("vespa.matching.global_filter.upper_limit");
+
+const double GlobalFilterUpperLimit::DEFAULT_VALUE(2.0);
+
+double
+GlobalFilterUpperLimit::lookup(const Properties &props)
+{
+ return lookup(props, DEFAULT_VALUE);
+}
+
+double
+GlobalFilterUpperLimit::lookup(const Properties &props, double defaultValue)
{
return lookupDouble(props, NAME, defaultValue);
}
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index 1b4c2e92d8d..135a8254cd8 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -223,9 +223,24 @@ namespace matching {
* Property to control fallback to not building a global filter
* for a query with a blueprint that wants a global filter. If the
* estimated ratio of matching documents is less than this limit
- * then don't build a global filter.
+ * then don't build a global filter. The effect will be falling back to bruteforce instead of approximation.
**/
- struct GlobalFilterLimit {
+ struct GlobalFilterLowerLimit {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ static double lookup(const Properties &props, double defaultValue);
+ };
+
+ /**
+ * Property to control not building a global filter
+ * for a query with a blueprint that wants a global filter. If the
+ * estimated ratio of matching documents is larger than this limit
+ * then don't build a global filter, but assumes that the expected filter ratio has been
+ * taken care of increasing recall. Increasing recall by 1/upper_limit * 1.2 is probably a sane solution
+ * adding 20% margin to handle some correlation between filter and rest of query.
+ **/
+ struct GlobalFilterUpperLimit {
static const vespalib::string NAME;
static const double DEFAULT_VALUE;
static double lookup(const Properties &props);
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index 249351a4fe5..a37bb98068d 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -63,7 +63,8 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_softTimeoutTailCost(0.1),
_softTimeoutFactor(0.5),
_nearest_neighbor_brute_force_limit(0.05),
- _global_filter_limit(0.0)
+ _global_filter_lower_limit(0.0),
+ _global_filter_upper_limit(1.0)
{ }
RankSetup::~RankSetup() = default;
@@ -107,7 +108,8 @@ RankSetup::configure()
setSoftTimeoutTailCost(softtimeout::TailCost::lookup(_indexEnv.getProperties()));
setSoftTimeoutFactor(softtimeout::Factor::lookup(_indexEnv.getProperties()));
set_nearest_neighbor_brute_force_limit(matching::NearestNeighborBruteForceLimit::lookup(_indexEnv.getProperties()));
- set_global_filter_limit(matching::GlobalFilterLimit::lookup(_indexEnv.getProperties()));
+ set_global_filter_lower_limit(matching::GlobalFilterLowerLimit::lookup(_indexEnv.getProperties()));
+ set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties()));
}
void
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index 3e127a1e8b5..6fea33b9e12 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -60,7 +60,8 @@ private:
double _softTimeoutTailCost;
double _softTimeoutFactor;
double _nearest_neighbor_brute_force_limit;
- double _global_filter_limit;
+ double _global_filter_lower_limit;
+ double _global_filter_upper_limit;
public:
@@ -370,8 +371,10 @@ public:
void set_nearest_neighbor_brute_force_limit(double v) { _nearest_neighbor_brute_force_limit = v; }
double get_nearest_neighbor_brute_force_limit() const { return _nearest_neighbor_brute_force_limit; }
- void set_global_filter_limit(double v) { _global_filter_limit = v; }
- double get_global_filter_limit() const { return _global_filter_limit; }
+ void set_global_filter_lower_limit(double v) { _global_filter_lower_limit = v; }
+ double get_global_filter_lower_limit() const { return _global_filter_lower_limit; }
+ void set_global_filter_upper_limit(double v) { _global_filter_upper_limit = v; }
+ double get_global_filter_upper_limit() const { return _global_filter_upper_limit; }
/**
* This method may be used to indicate that certain features