diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-09-12 15:28:14 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-12 15:28:14 +0200 |
commit | a10d6e4d37359a40e7543a5c0c9ad6eb3bd36e1c (patch) | |
tree | 3f41aa2acf26aa4e4d116ead8bb095af3970ae80 | |
parent | b444a4c3c56be61bd93c968e6891340a3feedeb3 (diff) | |
parent | 19a095712f39cc68477321ba705010f1235bd601 (diff) |
Merge pull request #24017 from vespa-engine/havardpe/re-wire-create-global-filter-with-thread-bundle
thread bundle now available when calculating the global filter
11 files changed, 81 insertions, 24 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index b404330df9b..df8f2a9476b 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -66,6 +66,8 @@ using vespalib::eval::SimpleValue; using vespalib::eval::TensorSpec; using vespalib::nbostream; +vespalib::ThreadBundle &ttb() { return vespalib::ThreadBundle::trivial(); } + void inject_match_phase_limiting(Properties &setup, const vespalib::string &attribute, size_t max_hits, bool descending) { Properties cfg; @@ -374,7 +376,7 @@ struct MyWorld { void verify_diversity_filter(const SearchRequest & req, bool expectDiverse) { Matcher::SP matcher = createMatcher(); search::fef::Properties overrides; - auto mtf = matcher->create_match_tools_factory(req, searchContext, attributeContext, metaStore, overrides, true); + auto mtf = matcher->create_match_tools_factory(req, searchContext, attributeContext, metaStore, overrides, ttb(), true); auto diversity = mtf->createDiversifier(HeapSize::lookup(config)); EXPECT_EQUAL(expectDiverse, static_cast<bool>(diversity)); } @@ -384,7 +386,7 @@ struct MyWorld { SearchRequest::SP request = createSimpleRequest("f1", "spread"); search::fef::Properties overrides; MatchToolsFactory::UP match_tools_factory = matcher->create_match_tools_factory( - *request, searchContext, attributeContext, metaStore, overrides, true); + *request, searchContext, attributeContext, metaStore, overrides, ttb(), true); MatchTools::UP match_tools = match_tools_factory->createMatchTools(); match_tools->setup_first_phase(nullptr); return match_tools->match_data().get_termwise_limit(); diff --git a/searchcore/src/tests/proton/matching/query_test.cpp b/searchcore/src/tests/proton/matching/query_test.cpp index 78bb679a7dc..4c81aa7e347 100644 --- a/searchcore/src/tests/proton/matching/query_test.cpp +++ b/searchcore/src/tests/proton/matching/query_test.cpp @@ -29,6 +29,7 @@ #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <vespa/document/datatype/positiondatatype.h> #include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/util/thread_bundle.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/log/log.h> @@ -166,6 +167,8 @@ fef_test::IndexEnvironment plain_index_env; fef_test::IndexEnvironment resolved_index_env; fef_test::IndexEnvironment attribute_index_env; +vespalib::ThreadBundle &ttb() { return vespalib::ThreadBundle::trivial(); } + vespalib::string termAsString(const search::query::Range &term) { vespalib::asciistream os; @@ -1141,21 +1144,21 @@ Test::global_filter_is_calculated_and_handled() uint32_t docid_limit = 10; { // global filter is not wanted GlobalFilterBlueprint bp(result, false); - auto res = Query::handle_global_filter(bp, docid_limit, 0, 1, nullptr); + auto res = Query::handle_global_filter(bp, docid_limit, 0, 1, ttb(), nullptr); EXPECT_FALSE(res); EXPECT_FALSE(bp.filter); EXPECT_EQUAL(-1.0, bp.estimated_hit_ratio); } { // estimated_hit_ratio < global_filter_lower_limit GlobalFilterBlueprint bp(result, true); - auto res = Query::handle_global_filter(bp, docid_limit, 0.31, 1, nullptr); + auto res = Query::handle_global_filter(bp, docid_limit, 0.31, 1, ttb(), nullptr); EXPECT_FALSE(res); EXPECT_FALSE(bp.filter); EXPECT_EQUAL(-1.0, bp.estimated_hit_ratio); } { // estimated_hit_ratio <= global_filter_upper_limit GlobalFilterBlueprint bp(result, true); - auto res = Query::handle_global_filter(bp, docid_limit, 0, 0.3, nullptr); + auto res = Query::handle_global_filter(bp, docid_limit, 0, 0.3, ttb(), nullptr); EXPECT_TRUE(res); EXPECT_TRUE(bp.filter); EXPECT_TRUE(bp.filter->is_active()); @@ -1168,7 +1171,7 @@ Test::global_filter_is_calculated_and_handled() } { // estimated_hit_ratio > global_filter_upper_limit GlobalFilterBlueprint bp(result, true); - auto res = Query::handle_global_filter(bp, docid_limit, 0, 0.29, nullptr); + auto res = Query::handle_global_filter(bp, docid_limit, 0, 0.29, ttb(), nullptr); EXPECT_TRUE(res); EXPECT_TRUE(bp.filter); EXPECT_FALSE(bp.filter->is_active()); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 0dfb6953cc9..e5a0ac3c3fe 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -14,6 +14,7 @@ #include <vespa/vespalib/data/slime/inject.h> #include <vespa/vespalib/data/slime/inserter.h> #include <vespa/vespalib/util/issue.h> +#include <vespa/vespalib/util/thread_bundle.h> using search::queryeval::IDiversifier; using search::attribute::diversity::DiversityFilter; @@ -171,6 +172,7 @@ MatchToolsFactory(QueryLimiter & queryLimiter, const RankSetup & rankSetup, const Properties & rankProperties, const Properties & featureOverrides, + vespalib::ThreadBundle & thread_bundle, bool is_search) : _queryLimiter(queryLimiter), _global_filter_params(extract_global_filter_params(rankSetup, rankProperties, metaStore.getNumActiveLids(), searchContext.getDocIdLimit())), @@ -203,7 +205,7 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _query.handle_global_filter(searchContext.getDocIdLimit(), _global_filter_params.global_filter_lower_limit, _global_filter_params.global_filter_upper_limit, - trace); + thread_bundle, trace); } _query.freeze(); trace.addEvent(5, "Prepare shared state for multi-threaded rank executors"); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index 1d50b44e839..8012d7b1ec5 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -19,6 +19,7 @@ #include <vespa/vespalib/util/clock.h> namespace vespalib { class ExecutionProfiler; } +namespace vespalib { class ThreadBundle; } namespace search::engine { class Trace; } @@ -143,6 +144,7 @@ public: const RankSetup &rankSetup, const Properties &rankProperties, const Properties &featureOverrides, + vespalib::ThreadBundle &thread_bundle, bool is_search); ~MatchToolsFactory(); bool valid() const { return _valid; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp index e8c7f2c5e44..95fe6fd6c3e 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp @@ -141,7 +141,8 @@ using search::fef::indexproperties::softtimeout::Factor; std::unique_ptr<MatchToolsFactory> Matcher::create_match_tools_factory(const search::engine::Request &request, ISearchContext &searchContext, IAttributeContext &attrContext, const search::IDocumentMetaStore &metaStore, - const Properties &feature_overrides, bool is_search) const + const Properties &feature_overrides, vespalib::ThreadBundle &thread_bundle, + bool is_search) const { const Properties & rankProperties = request.propertiesMap.rankProperties(); bool softTimeoutEnabled = Enabled::lookup(rankProperties, _rankSetup->getSoftTimeoutEnabled()); @@ -161,7 +162,7 @@ Matcher::create_match_tools_factory(const search::engine::Request &request, ISea return std::make_unique<MatchToolsFactory>(_queryLimiter, doom, searchContext, attrContext, request.trace(), request.getStackRef(), request.location, _viewResolver, metaStore, _indexEnv, *_rankSetup, - rankProperties, feature_overrides, is_search); + rankProperties, feature_overrides, thread_bundle, is_search); } size_t @@ -221,8 +222,8 @@ Matcher::match(const SearchRequest &request, vespalib::ThreadBundle &threadBundl feature_overrides = owned_objects.feature_overrides.get(); } - MatchToolsFactory::UP mtf = create_match_tools_factory(request, searchContext, attrContext, - metaStore, *feature_overrides, true); + MatchToolsFactory::UP mtf = create_match_tools_factory(request, searchContext, attrContext, metaStore, + *feature_overrides, threadBundle, true); isDoomExplicit = mtf->getRequestContext().getDoom().isExplicitSoftDoom(); traceQuery(6, request.trace(), mtf->query()); if (!mtf->valid()) { @@ -372,7 +373,8 @@ Matcher::create_docsum_matcher(const DocsumRequest &req, ISearchContext &search_ } StupidMetaStore meta; MatchToolsFactory::UP mtf = create_match_tools_factory(req, search_ctx, attr_ctx, meta, - req.propertiesMap.featureOverrides(), false); + req.propertiesMap.featureOverrides(), + vespalib::ThreadBundle::trivial(), false); if (!mtf->valid()) { LOG(warning, "could not initialize docsum matching: %s", (expectedSessionCached) ? "session has expired" : "invalid query"); diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.h b/searchcore/src/vespa/searchcore/proton/matching/matcher.h index 409f0ddeae5..4071f95b0c8 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.h +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.h @@ -104,7 +104,8 @@ public: std::unique_ptr<MatchToolsFactory> create_match_tools_factory(const search::engine::Request &request, ISearchContext &searchContext, IAttributeContext &attrContext, const search::IDocumentMetaStore &metaStore, - const Properties &feature_overrides, bool is_search) const; + const Properties &feature_overrides, vespalib::ThreadBundle &thread_bundle, + bool is_search) const; /** * Perform a search against this matcher. diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index ec289c06122..d8951eb1084 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -14,6 +14,7 @@ #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <vespa/searchlib/queryeval/intermediate_blueprints.h> #include <vespa/vespalib/util/issue.h> +#include <vespa/vespalib/util/thread_bundle.h> #include <vespa/log/log.h> LOG_SETUP(".proton.matching.query"); @@ -35,6 +36,7 @@ using search::query::Weight; using search::queryeval::AndBlueprint; using search::queryeval::AndNotBlueprint; using search::queryeval::Blueprint; +using search::queryeval::GlobalFilter; using search::queryeval::IRequestContext; using search::queryeval::IntermediateBlueprint; using search::queryeval::RankBlueprint; @@ -246,9 +248,9 @@ Query::fetchPostings() void Query::handle_global_filter(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit, - search::engine::Trace& trace) + vespalib::ThreadBundle &thread_bundle, search::engine::Trace& trace) { - if (!handle_global_filter(*_blueprint, docid_limit, global_filter_lower_limit, global_filter_upper_limit, &trace)) { + if (!handle_global_filter(*_blueprint, docid_limit, global_filter_lower_limit, global_filter_upper_limit, thread_bundle, &trace)) { return; } // optimized order may change after accounting for global filter: @@ -259,10 +261,21 @@ Query::handle_global_filter(uint32_t docid_limit, double global_filter_lower_lim fetchPostings(); } +std::shared_ptr<GlobalFilter> +Query::create_global_filter(Blueprint& blueprint, uint32_t docid_limit, vespalib::ThreadBundle &thread_bundle) +{ + (void) thread_bundle; // multi-threaded filter generation coming soon + bool strict = true; + auto constraint = Blueprint::FilterConstraint::UPPER_BOUND; + auto filter_iterator = blueprint.createFilterSearch(strict, constraint); + filter_iterator->initRange(1, docid_limit); + return GlobalFilter::create(filter_iterator->get_hits(1)); +} + bool Query::handle_global_filter(Blueprint& blueprint, uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit, - search::engine::Trace* trace) + vespalib::ThreadBundle &thread_bundle, search::engine::Trace* trace) { using search::queryeval::GlobalFilter; double estimated_hit_ratio = blueprint.getState().hit_ratio(docid_limit); @@ -284,12 +297,7 @@ Query::handle_global_filter(Blueprint& blueprint, uint32_t docid_limit, trace->addEvent(5, vespalib::make_string("Calculate global filter (estimated_hit_ratio (%f) <= upper_limit (%f))", estimated_hit_ratio, global_filter_upper_limit)); } - auto constraint = Blueprint::FilterConstraint::UPPER_BOUND; - bool strict = true; - auto filter_iterator = blueprint.createFilterSearch(strict, constraint); - filter_iterator->initRange(1, docid_limit); - auto white_list = filter_iterator->get_hits(1); - global_filter = GlobalFilter::create(std::move(white_list)); + global_filter = create_global_filter(blueprint, docid_limit, thread_bundle); } else { if (trace && trace->shouldTrace(5)) { trace->addEvent(5, vespalib::make_string("Create match all global filter (estimated_hit_ratio (%f) > upper_limit (%f))", diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index 78bd6a0bacb..8517ec2153f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -10,6 +10,7 @@ #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/queryeval/irequestcontext.h> +namespace vespalib { class ThreadBundle; } namespace search::engine { class Trace; } namespace proton::matching { @@ -21,6 +22,7 @@ class Query { private: using Blueprint = search::queryeval::Blueprint; + using GlobalFilter = search::queryeval::GlobalFilter; search::query::Node::UP _query_tree; Blueprint::UP _blueprint; Blueprint::UP _whiteListBlueprint; @@ -95,7 +97,11 @@ public: void fetchPostings(); void handle_global_filter(uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit, - search::engine::Trace& trace); + vespalib::ThreadBundle &thread_bundle, search::engine::Trace& trace); + + // Create a global filter. Called by handle_global_filter if needed. + static std::shared_ptr<GlobalFilter> create_global_filter(Blueprint& blueprint, uint32_t docid_limit, + vespalib::ThreadBundle &thread_bundle); /** * Calculates and handles the global filter if needed by the blueprint tree. @@ -112,7 +118,7 @@ public: */ static bool handle_global_filter(Blueprint& blueprint, uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit, - search::engine::Trace* trace); + vespalib::ThreadBundle &thread_bundle, search::engine::Trace* trace); void freeze(); diff --git a/vespalib/src/tests/simple_thread_bundle/simple_thread_bundle_test.cpp b/vespalib/src/tests/simple_thread_bundle/simple_thread_bundle_test.cpp index 5d69a18112a..debb34724f6 100644 --- a/vespalib/src/tests/simple_thread_bundle/simple_thread_bundle_test.cpp +++ b/vespalib/src/tests/simple_thread_bundle/simple_thread_bundle_test.cpp @@ -89,6 +89,17 @@ TEST_FF("require that having too many targets fails", SimpleThreadBundle(1), Sta f2.check(Box<size_t>().add(0).add(0)); } +TEST_F("require that ThreadBundle::trivial works the same as SimpleThreadBundle(1)", State(2)) { + ThreadBundle &bundle = ThreadBundle::trivial(); + EXPECT_EQUAL(bundle.size(), 1u); + bundle.run(f.getTargets(0)); + f.check({0,0}); + bundle.run(f.getTargets(1)); + f.check({1,0}); + EXPECT_EXCEPTION(bundle.run(f.getTargets(2)), IllegalArgumentException, ""); + f.check({1,0}); +} + TEST_FF("require that bundles with multiple internal threads work", SimpleThreadBundle(3), State(3)) { f1.run(f2.getTargets(3)); f2.check(Box<size_t>().add(1).add(1).add(1)); diff --git a/vespalib/src/vespa/vespalib/util/thread_bundle.cpp b/vespalib/src/vespa/vespalib/util/thread_bundle.cpp index 7068224229e..9f953abfce7 100644 --- a/vespalib/src/vespa/vespalib/util/thread_bundle.cpp +++ b/vespalib/src/vespa/vespalib/util/thread_bundle.cpp @@ -1,7 +1,24 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "thread_bundle.h" +#include "exceptions.h" namespace vespalib { +ThreadBundle & +ThreadBundle::trivial() { + struct TrivialThreadBundle : ThreadBundle { + size_t size() const override { return 1; } + void run(const std::vector<Runnable*> &targets) override { + if (targets.size() == 1) { + targets[0]->run(); + } else if (targets.size() > 1) { + throw IllegalArgumentException("too many targets"); + } + }; + }; + static TrivialThreadBundle trivial_thread_bundle; + return trivial_thread_bundle; +} + } // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/util/thread_bundle.h b/vespalib/src/vespa/vespalib/util/thread_bundle.h index 32b7d32b2a7..699fd8e27a0 100644 --- a/vespalib/src/vespa/vespalib/util/thread_bundle.h +++ b/vespalib/src/vespa/vespalib/util/thread_bundle.h @@ -33,6 +33,9 @@ struct ThreadBundle { * Empty virtual destructor to enable subclassing. **/ virtual ~ThreadBundle() {} + + // a thread bundle that can only run things in the current thread. + static ThreadBundle &trivial(); }; } // namespace vespalib |