summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-04-08 15:14:07 +0000
committerGeir Storli <geirst@yahooinc.com>2022-04-11 09:20:24 +0000
commit53420d91425d7b456ac25cb74a68fdff94db2cde (patch)
tree8838f8cfffaf7e0388bef9f6c4c7bea7cc0e880a /searchcore
parent23841f2517967c1a59cf9826f1de953c5caa7199 (diff)
Write unit test for how global filter is calculated and handled.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/matching/query_test.cpp82
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp2
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.cpp37
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.h20
4 files changed, 115 insertions, 26 deletions
diff --git a/searchcore/src/tests/proton/matching/query_test.cpp b/searchcore/src/tests/proton/matching/query_test.cpp
index 11d4b1a1687..7f0232120e7 100644
--- a/searchcore/src/tests/proton/matching/query_test.cpp
+++ b/searchcore/src/tests/proton/matching/query_test.cpp
@@ -43,34 +43,34 @@ using search::fef::ITermFieldData;
using search::fef::IllegalHandle;
using search::fef::MatchData;
using search::fef::MatchDataLayout;
-using search::fef::TermFieldMatchData;
using search::fef::TermFieldHandle;
+using search::fef::TermFieldMatchData;
using search::query::CustomTypeTermVisitor;
using search::query::Node;
using search::query::QueryBuilder;
using search::query::Range;
using search::query::StackDumpCreator;
using search::query::Weight;
-using search::queryeval::termAsString;
+using search::queryeval::AndBlueprint;
+using search::queryeval::AndNotBlueprint;
using search::queryeval::Blueprint;
+using search::queryeval::ExecuteInfo;
+using search::queryeval::FakeBlueprint;
+using search::queryeval::FakeRequestContext;
using search::queryeval::FakeResult;
using search::queryeval::FakeSearchable;
-using search::queryeval::FakeRequestContext;
-using search::queryeval::FakeBlueprint;
using search::queryeval::FieldSpec;
using search::queryeval::FieldSpecList;
-using search::queryeval::Searchable;
+using search::queryeval::GlobalFilter;
+using search::queryeval::IntermediateBlueprint;
+using search::queryeval::ParallelWeakAndBlueprint;
+using search::queryeval::RankBlueprint;
using search::queryeval::SearchIterator;
+using search::queryeval::Searchable;
using search::queryeval::SimpleBlueprint;
using search::queryeval::SimpleResult;
-using search::queryeval::ParallelWeakAndBlueprint;
-using search::queryeval::RankBlueprint;
-using search::queryeval::AndBlueprint;
-using search::queryeval::IntermediateBlueprint;
-using search::queryeval::AndNotBlueprint;
using search::queryeval::SourceBlenderBlueprint;
-using search::queryeval::ExecuteInfo;
-
+using search::queryeval::termAsString;
using std::string;
using std::vector;
namespace fef_test = search::fef::test;
@@ -121,6 +121,7 @@ class Test : public vespalib::TestApp {
void requireThatSameElementDoesNotAllocateMatchData();
void requireThatSameElementIteratorsCanBeBuilt();
void requireThatConstBoolBlueprintsAreCreatedCorrectly();
+ void global_filter_is_calculated_and_handled();
public:
~Test() override;
@@ -1113,6 +1114,62 @@ void Test::requireThatConstBoolBlueprintsAreCreatedCorrectly() {
EXPECT_TRUE(fbp != nullptr);
}
+class GlobalFilterBlueprint : public SimpleBlueprint {
+public:
+ std::shared_ptr<const GlobalFilter> filter;
+ GlobalFilterBlueprint(const SimpleResult& result,
+ bool want_global_filter)
+ : search::queryeval::SimpleBlueprint(result),
+ filter()
+ {
+ set_want_global_filter(want_global_filter);
+ }
+ ~GlobalFilterBlueprint() {}
+ void set_global_filter(const GlobalFilter& filter_) override {
+ filter = filter_.shared_from_this();
+ }
+};
+
+void
+Test::global_filter_is_calculated_and_handled()
+{
+ // estimated hits = 3, estimated hit ratio = 0.3
+ auto result = SimpleResult().addHit(3).addHit(5).addHit(7);
+ uint32_t docid_limit = 10;
+ { // global filter is not wanted
+ GlobalFilterBlueprint bp(result, false);
+ auto res = Query::handle_global_filter(bp, docid_limit, 0, 1);
+ EXPECT_FALSE(res);
+ EXPECT_FALSE(bp.filter);
+ }
+ { // estimated_hit_ratio < global_filter_lower_limit
+ GlobalFilterBlueprint bp(result, true);
+ auto res = Query::handle_global_filter(bp, docid_limit, 0.31, 1);
+ EXPECT_FALSE(res);
+ EXPECT_FALSE(bp.filter);
+ }
+ { // estimated_hit_ratio <= global_filter_upper_limit
+ GlobalFilterBlueprint bp(result, true);
+ auto res = Query::handle_global_filter(bp, docid_limit, 0, 0.3);
+ EXPECT_TRUE(res);
+ EXPECT_TRUE(bp.filter);
+ EXPECT_TRUE(bp.filter->has_filter());
+
+ auto* bv = bp.filter->filter();
+ EXPECT_EQUAL(3u, bv->countTrueBits());
+ EXPECT_TRUE(bv->testBit(3));
+ EXPECT_TRUE(bv->testBit(5));
+ EXPECT_TRUE(bv->testBit(7));
+ }
+ { // estimated_hit_ratio > global_filter_upper_limit
+ GlobalFilterBlueprint bp(result, true);
+ auto res = Query::handle_global_filter(bp, docid_limit, 0, 0.29);
+ EXPECT_TRUE(res);
+ EXPECT_TRUE(bp.filter);
+ EXPECT_FALSE(bp.filter->has_filter());
+ }
+}
+
Test::~Test() = default;
int
@@ -1152,6 +1209,7 @@ Test::Main()
TEST_CALL(requireThatSameElementDoesNotAllocateMatchData);
TEST_CALL(requireThatSameElementIteratorsCanBeBuilt);
TEST_CALL(requireThatConstBoolBlueprintsAreCreatedCorrectly);
+ TEST_CALL(global_filter_is_calculated_and_handled);
TEST_DONE();
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index 13639cb55b7..acee34220a6 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -205,7 +205,7 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
trace.addEvent(5, "MTF: Handle Global Filters");
double lower_limit = GlobalFilterLowerLimit::lookup(rankProperties, rankSetup.get_global_filter_lower_limit());
double upper_limit = GlobalFilterUpperLimit::lookup(rankProperties, rankSetup.get_global_filter_upper_limit());
- _query.handle_global_filters(searchContext.getDocIdLimit(), lower_limit, upper_limit);
+ _query.handle_global_filter(searchContext.getDocIdLimit(), lower_limit, upper_limit);
}
_query.freeze();
trace.addEvent(5, "MTF: prepareSharedState");
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
index 8842f426a3b..95fe846a088 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
@@ -244,33 +244,46 @@ Query::fetchPostings()
}
void
-Query::handle_global_filters(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit)
+Query::handle_global_filter(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit)
+{
+ if (!handle_global_filter(*_blueprint, docid_limit, global_filter_lower_limit, global_filter_upper_limit)) {
+ return;
+ }
+ // optimized order may change after accounting for global filter:
+ _blueprint = Blueprint::optimize(std::move(_blueprint));
+ LOG(debug, "blueprint after handle_global_filter:\n%s\n", _blueprint->asString().c_str());
+ // strictness may change if optimized order changed:
+ fetchPostings();
+}
+
+bool
+Query::handle_global_filter(Blueprint& blueprint, uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit)
{
using search::queryeval::GlobalFilter;
- double estimated_hit_ratio = _blueprint->getState().hit_ratio(docid_limit);
- if ( ! _blueprint->getState().want_global_filter()) return;
+ double estimated_hit_ratio = blueprint.getState().hit_ratio(docid_limit);
+ if (!blueprint.getState().want_global_filter()) {
+ return false;
+ }
LOG(debug, "docid_limit=%d, estimated_hit_ratio=%1.2f, global_filter_lower_limit=%1.2f, global_filter_upper_limit=%1.2f",
docid_limit, estimated_hit_ratio, global_filter_lower_limit, global_filter_upper_limit);
- if (estimated_hit_ratio < global_filter_lower_limit) return;
+ if (estimated_hit_ratio < global_filter_lower_limit) {
+ return false;
+ }
if (estimated_hit_ratio <= global_filter_upper_limit) {
auto constraint = Blueprint::FilterConstraint::UPPER_BOUND;
bool strict = true;
- auto filter_iterator = _blueprint->createFilterSearch(strict, constraint);
+ auto filter_iterator = blueprint.createFilterSearch(strict, constraint);
filter_iterator->initRange(1, docid_limit);
auto white_list = filter_iterator->get_hits(1);
auto global_filter = GlobalFilter::create(std::move(white_list));
- _blueprint->set_global_filter(*global_filter);
+ blueprint.set_global_filter(*global_filter);
} else {
auto no_filter = GlobalFilter::create();
- _blueprint->set_global_filter(*no_filter);
+ blueprint.set_global_filter(*no_filter);
}
- // optimized order may change after accounting for global filter:
- _blueprint = Blueprint::optimize(std::move(_blueprint));
- LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str());
- // strictness may change if optimized order changed:
- fetchPostings();
+ return true;
}
void
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h
index eff4371c4bc..29bca310502 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.h
@@ -92,7 +92,25 @@ public:
**/
void optimize();
void fetchPostings();
- void handle_global_filters(uint32_t docidLimit, double global_filter_lower_limit, double global_filter_upper_limit);
+
+ void handle_global_filter(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit);
+
+ /**
+ * Calculates and handles the global filter if needed by the blueprint tree.
+ *
+ * The estimated hit ratio from the blueprint tree is used to select strategy:
+ * 1) estimated_hit_ratio < global_filter_lower_limit:
+ * Nothing is done.
+ * 2) estimated_hit_ratio <= global_filter_upper_limit:
+ * Calculate the global filter and set it on the blueprint.
+ * 3) estimated_hit_ratio > global_filter_upper_limit:
+ * Set a "match all filter" on the blueprint.
+ *
+ * @return whether the global filter was set on the blueprint.
+ */
+ static bool handle_global_filter(Blueprint& blueprint, uint32_t docid_limit,
+ double global_filter_lower_limit, double global_filter_upper_limit);
+
void freeze();
/**