summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-06-30 10:52:05 +0200
committerGitHub <noreply@github.com>2020-06-30 10:52:05 +0200
commitf4b107073f2b8352a63f981c290909fc10ba6c8e (patch)
tree5ed4a491100b416afc9740eadc47ac9139cbb8eb
parent5aea1d09520da60523f5f972b50890ce930d6d54 (diff)
parent1885eff6b6ab74e8a2ce402fc849a7e31ef50fbf (diff)
Merge pull request #13673 from vespa-engine/toregge/add-global-filter-limit
If the estimated rate of matching documents is less than global filteā€¦
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.h2
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h13
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h4
7 files changed, 42 insertions, 5 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index fadea4b7962..19bece5ae9c 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -194,7 +194,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
trace.addEvent(4, "MTF: Fetch Postings");
_query.fetchPostings();
trace.addEvent(5, "MTF: Handle Global Filters");
- _query.handle_global_filters(searchContext.getDocIdLimit());
+ double global_filter_limit = GlobalFilterLimit::lookup(rankProperties, rankSetup.get_global_filter_limit());
+ _query.handle_global_filters(searchContext.getDocIdLimit(), global_filter_limit);
_query.freeze();
trace.addEvent(5, "MTF: prepareSharedState");
_rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore());
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
index 5213a2b9230..62a59ab7680 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
@@ -199,10 +199,11 @@ Query::fetchPostings()
}
void
-Query::handle_global_filters(uint32_t docid_limit)
+Query::handle_global_filters(uint32_t docid_limit, double global_filter_limit)
{
using search::queryeval::GlobalFilter;
- if (_blueprint->getState().want_global_filter()) {
+ double estimated_hit_ratio = _blueprint->getState().hit_ratio(docid_limit);
+ if (_blueprint->getState().want_global_filter() && estimated_hit_ratio >= global_filter_limit) {
auto constraint = Blueprint::FilterConstraint::UPPER_BOUND;
bool strict = true;
auto filter_iterator = _blueprint->createFilterSearch(strict, constraint);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h
index 3ed6229830d..60f40e24d1e 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.h
@@ -89,7 +89,7 @@ public:
**/
void optimize();
void fetchPostings();
- void handle_global_filters(uint32_t docidLimit);
+ void handle_global_filters(uint32_t docidLimit, double global_filter_limit);
void freeze();
/**
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index fb44b986301..622e437692a 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -290,6 +290,22 @@ NearestNeighborBruteForceLimit::lookup(const Properties &props, double defaultVa
return lookupDouble(props, NAME, defaultValue);
}
+const vespalib::string GlobalFilterLimit::NAME("vespa.matching.global_filter_limit");
+
+const double GlobalFilterLimit::DEFAULT_VALUE(0.0);
+
+double
+GlobalFilterLimit::lookup(const Properties &props)
+{
+ return lookup(props, DEFAULT_VALUE);
+}
+
+double
+GlobalFilterLimit::lookup(const Properties &props, double defaultValue)
+{
+ return lookupDouble(props, NAME, defaultValue);
+}
+
} // namespace matching
namespace softtimeout {
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index 30c726caeba..1b4c2e92d8d 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -218,6 +218,19 @@ namespace matching {
static double lookup(const Properties &props);
static double lookup(const Properties &props, double defaultValue);
};
+
+ /**
+ * Property to control fallback to not building a global filter
+ * for a query with a blueprint that wants a global filter. If the
+ * estimated ratio of matching documents is less than this limit
+ * then don't build a global filter.
+ **/
+ struct GlobalFilterLimit {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ static double lookup(const Properties &props, double defaultValue);
+ };
}
namespace softtimeout {
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index e197f095852..249351a4fe5 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -62,7 +62,8 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_softTimeoutEnabled(false),
_softTimeoutTailCost(0.1),
_softTimeoutFactor(0.5),
- _nearest_neighbor_brute_force_limit(0.05)
+ _nearest_neighbor_brute_force_limit(0.05),
+ _global_filter_limit(0.0)
{ }
RankSetup::~RankSetup() = default;
@@ -106,6 +107,7 @@ RankSetup::configure()
setSoftTimeoutTailCost(softtimeout::TailCost::lookup(_indexEnv.getProperties()));
setSoftTimeoutFactor(softtimeout::Factor::lookup(_indexEnv.getProperties()));
set_nearest_neighbor_brute_force_limit(matching::NearestNeighborBruteForceLimit::lookup(_indexEnv.getProperties()));
+ set_global_filter_limit(matching::GlobalFilterLimit::lookup(_indexEnv.getProperties()));
}
void
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index ad793eeaceb..3e127a1e8b5 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -60,6 +60,7 @@ private:
double _softTimeoutTailCost;
double _softTimeoutFactor;
double _nearest_neighbor_brute_force_limit;
+ double _global_filter_limit;
public:
@@ -369,6 +370,9 @@ public:
void set_nearest_neighbor_brute_force_limit(double v) { _nearest_neighbor_brute_force_limit = v; }
double get_nearest_neighbor_brute_force_limit() const { return _nearest_neighbor_brute_force_limit; }
+ void set_global_filter_limit(double v) { _global_filter_limit = v; }
+ double get_global_filter_limit() const { return _global_filter_limit; }
+
/**
* This method may be used to indicate that certain features
* should be dumped during a full feature dump.