diff options
Diffstat (limited to 'searchcore')
4 files changed, 115 insertions, 26 deletions
diff --git a/searchcore/src/tests/proton/matching/query_test.cpp b/searchcore/src/tests/proton/matching/query_test.cpp index 11d4b1a1687..7f0232120e7 100644 --- a/searchcore/src/tests/proton/matching/query_test.cpp +++ b/searchcore/src/tests/proton/matching/query_test.cpp @@ -43,34 +43,34 @@ using search::fef::ITermFieldData; using search::fef::IllegalHandle; using search::fef::MatchData; using search::fef::MatchDataLayout; -using search::fef::TermFieldMatchData; using search::fef::TermFieldHandle; +using search::fef::TermFieldMatchData; using search::query::CustomTypeTermVisitor; using search::query::Node; using search::query::QueryBuilder; using search::query::Range; using search::query::StackDumpCreator; using search::query::Weight; -using search::queryeval::termAsString; +using search::queryeval::AndBlueprint; +using search::queryeval::AndNotBlueprint; using search::queryeval::Blueprint; +using search::queryeval::ExecuteInfo; +using search::queryeval::FakeBlueprint; +using search::queryeval::FakeRequestContext; using search::queryeval::FakeResult; using search::queryeval::FakeSearchable; -using search::queryeval::FakeRequestContext; -using search::queryeval::FakeBlueprint; using search::queryeval::FieldSpec; using search::queryeval::FieldSpecList; -using search::queryeval::Searchable; +using search::queryeval::GlobalFilter; +using search::queryeval::IntermediateBlueprint; +using search::queryeval::ParallelWeakAndBlueprint; +using search::queryeval::RankBlueprint; using search::queryeval::SearchIterator; +using search::queryeval::Searchable; using search::queryeval::SimpleBlueprint; using search::queryeval::SimpleResult; -using search::queryeval::ParallelWeakAndBlueprint; -using search::queryeval::RankBlueprint; -using search::queryeval::AndBlueprint; -using search::queryeval::IntermediateBlueprint; -using search::queryeval::AndNotBlueprint; using search::queryeval::SourceBlenderBlueprint; -using search::queryeval::ExecuteInfo; - +using search::queryeval::termAsString; using std::string; using std::vector; namespace fef_test = search::fef::test; @@ -121,6 +121,7 @@ class Test : public vespalib::TestApp { void requireThatSameElementDoesNotAllocateMatchData(); void requireThatSameElementIteratorsCanBeBuilt(); void requireThatConstBoolBlueprintsAreCreatedCorrectly(); + void global_filter_is_calculated_and_handled(); public: ~Test() override; @@ -1113,6 +1114,62 @@ void Test::requireThatConstBoolBlueprintsAreCreatedCorrectly() { EXPECT_TRUE(fbp != nullptr); } +class GlobalFilterBlueprint : public SimpleBlueprint { +public: + std::shared_ptr<const GlobalFilter> filter; + GlobalFilterBlueprint(const SimpleResult& result, + bool want_global_filter) + : search::queryeval::SimpleBlueprint(result), + filter() + { + set_want_global_filter(want_global_filter); + } + ~GlobalFilterBlueprint() {} + void set_global_filter(const GlobalFilter& filter_) override { + filter = filter_.shared_from_this(); + } +}; + +void +Test::global_filter_is_calculated_and_handled() +{ + // estimated hits = 3, estimated hit ratio = 0.3 + auto result = SimpleResult().addHit(3).addHit(5).addHit(7); + uint32_t docid_limit = 10; + { // global filter is not wanted + GlobalFilterBlueprint bp(result, false); + auto res = Query::handle_global_filter(bp, docid_limit, 0, 1); + EXPECT_FALSE(res); + EXPECT_FALSE(bp.filter); + } + { // estimated_hit_ratio < global_filter_lower_limit + GlobalFilterBlueprint bp(result, true); + auto res = Query::handle_global_filter(bp, docid_limit, 0.31, 1); + EXPECT_FALSE(res); + EXPECT_FALSE(bp.filter); + } + { // estimated_hit_ratio <= global_filter_upper_limit + GlobalFilterBlueprint bp(result, true); + auto res = Query::handle_global_filter(bp, docid_limit, 0, 0.3); + EXPECT_TRUE(res); + EXPECT_TRUE(bp.filter); + EXPECT_TRUE(bp.filter->has_filter()); + + auto* bv = bp.filter->filter(); + EXPECT_EQUAL(3u, bv->countTrueBits()); + EXPECT_TRUE(bv->testBit(3)); + EXPECT_TRUE(bv->testBit(5)); + EXPECT_TRUE(bv->testBit(7)); + } + { // estimated_hit_ratio > global_filter_upper_limit + GlobalFilterBlueprint bp(result, true); + auto res = Query::handle_global_filter(bp, docid_limit, 0, 0.29); + EXPECT_TRUE(res); + EXPECT_TRUE(bp.filter); + EXPECT_FALSE(bp.filter->has_filter()); + } +} + Test::~Test() = default; int @@ -1152,6 +1209,7 @@ Test::Main() TEST_CALL(requireThatSameElementDoesNotAllocateMatchData); TEST_CALL(requireThatSameElementIteratorsCanBeBuilt); TEST_CALL(requireThatConstBoolBlueprintsAreCreatedCorrectly); + TEST_CALL(global_filter_is_calculated_and_handled); TEST_DONE(); } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 13639cb55b7..acee34220a6 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -205,7 +205,7 @@ MatchToolsFactory(QueryLimiter & queryLimiter, trace.addEvent(5, "MTF: Handle Global Filters"); double lower_limit = GlobalFilterLowerLimit::lookup(rankProperties, rankSetup.get_global_filter_lower_limit()); double upper_limit = GlobalFilterUpperLimit::lookup(rankProperties, rankSetup.get_global_filter_upper_limit()); - _query.handle_global_filters(searchContext.getDocIdLimit(), lower_limit, upper_limit); + _query.handle_global_filter(searchContext.getDocIdLimit(), lower_limit, upper_limit); } _query.freeze(); trace.addEvent(5, "MTF: prepareSharedState"); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index 8842f426a3b..95fe846a088 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -244,33 +244,46 @@ Query::fetchPostings() } void -Query::handle_global_filters(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit) +Query::handle_global_filter(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit) +{ + if (!handle_global_filter(*_blueprint, docid_limit, global_filter_lower_limit, global_filter_upper_limit)) { + return; + } + // optimized order may change after accounting for global filter: + _blueprint = Blueprint::optimize(std::move(_blueprint)); + LOG(debug, "blueprint after handle_global_filter:\n%s\n", _blueprint->asString().c_str()); + // strictness may change if optimized order changed: + fetchPostings(); +} + +bool +Query::handle_global_filter(Blueprint& blueprint, uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit) { using search::queryeval::GlobalFilter; - double estimated_hit_ratio = _blueprint->getState().hit_ratio(docid_limit); - if ( ! _blueprint->getState().want_global_filter()) return; + double estimated_hit_ratio = blueprint.getState().hit_ratio(docid_limit); + if (!blueprint.getState().want_global_filter()) { + return false; + } LOG(debug, "docid_limit=%d, estimated_hit_ratio=%1.2f, global_filter_lower_limit=%1.2f, global_filter_upper_limit=%1.2f", docid_limit, estimated_hit_ratio, global_filter_lower_limit, global_filter_upper_limit); - if (estimated_hit_ratio < global_filter_lower_limit) return; + if (estimated_hit_ratio < global_filter_lower_limit) { + return false; + } if (estimated_hit_ratio <= global_filter_upper_limit) { auto constraint = Blueprint::FilterConstraint::UPPER_BOUND; bool strict = true; - auto filter_iterator = _blueprint->createFilterSearch(strict, constraint); + auto filter_iterator = blueprint.createFilterSearch(strict, constraint); filter_iterator->initRange(1, docid_limit); auto white_list = filter_iterator->get_hits(1); auto global_filter = GlobalFilter::create(std::move(white_list)); - _blueprint->set_global_filter(*global_filter); + blueprint.set_global_filter(*global_filter); } else { auto no_filter = GlobalFilter::create(); - _blueprint->set_global_filter(*no_filter); + blueprint.set_global_filter(*no_filter); } - // optimized order may change after accounting for global filter: - _blueprint = Blueprint::optimize(std::move(_blueprint)); - LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str()); - // strictness may change if optimized order changed: - fetchPostings(); + return true; } void diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index eff4371c4bc..29bca310502 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -92,7 +92,25 @@ public: **/ void optimize(); void fetchPostings(); - void handle_global_filters(uint32_t docidLimit, double global_filter_lower_limit, double global_filter_upper_limit); + + void handle_global_filter(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit); + + /** + * Calculates and handles the global filter if needed by the blueprint tree. + * + * The estimated hit ratio from the blueprint tree is used to select strategy: + * 1) estimated_hit_ratio < global_filter_lower_limit: + * Nothing is done. + * 2) estimated_hit_ratio <= global_filter_upper_limit: + * Calculate the global filter and set it on the blueprint. + * 3) estimated_hit_ratio > global_filter_upper_limit: + * Set a "match all filter" on the blueprint. + * + * @return whether the global filter was set on the blueprint. + */ + static bool handle_global_filter(Blueprint& blueprint, uint32_t docid_limit, + double global_filter_lower_limit, double global_filter_upper_limit); + void freeze(); /** |