diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-11-03 20:25:37 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-11-03 20:29:50 +0000 |
commit | 1d83798c32557577f692b8186f59f8692e06c7fc (patch) | |
tree | 3ba27ec3212201516afe43745950c978c115999a /searchcore | |
parent | 10cfb0c78fd1bf8ce12818849378c6205d6980d9 (diff) |
- Add maxHits param that can be used to compute a realistic(low) hit rate when fetching summaries (summary features).
Diffstat (limited to 'searchcore')
8 files changed, 25 insertions, 20 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index 9ac930cdbab..5f4f26c6c6b 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -3,7 +3,6 @@ #include <vespa/searchcore/proton/bucketdb/bucket_db_owner.h> #include <vespa/searchcore/proton/documentmetastore/documentmetastore.h> #include <vespa/searchcore/proton/matching/fakesearchcontext.h> -#include <vespa/searchcore/proton/matching/match_context.h> #include <vespa/searchcore/proton/matching/match_params.h> #include <vespa/searchcore/proton/matching/match_tools.h> #include <vespa/searchcore/proton/matching/matcher.h> @@ -28,7 +27,6 @@ #include <vespa/searchlib/query/tree/stackdumpcreator.h> #include <vespa/searchlib/queryeval/isourceselector.h> #include <vespa/searchlib/test/mock_attribute_context.h> -#include <vespa/searchcommon/attribute/iattributecontext.h> #include <vespa/document/base/globalid.h> #include <vespa/eval/eval/simple_value.h> #include <vespa/eval/eval/tensor_spec.h> @@ -377,7 +375,8 @@ struct MyWorld { void verify_diversity_filter(const SearchRequest & req, bool expectDiverse) { Matcher::SP matcher = createMatcher(); search::fef::Properties overrides; - auto mtf = matcher->create_match_tools_factory(req, searchContext, attributeContext, metaStore, overrides, ttb(), nullptr, true); + auto mtf = matcher->create_match_tools_factory(req, searchContext, attributeContext, metaStore, overrides, + ttb(), nullptr, searchContext.getDocIdLimit(), true); auto diversity = mtf->createDiversifier(HeapSize::lookup(config)); EXPECT_EQUAL(expectDiverse, static_cast<bool>(diversity)); } @@ -386,7 +385,8 @@ struct MyWorld { Matcher::SP matcher = createMatcher(); SearchRequest::SP request = createSimpleRequest("f1", "spread"); search::fef::Properties overrides; - auto mtf = matcher->create_match_tools_factory(*request, searchContext, attributeContext, metaStore, overrides, ttb(), nullptr, true); + auto mtf = matcher->create_match_tools_factory(*request, searchContext, attributeContext, metaStore, overrides, + ttb(), nullptr, searchContext.getDocIdLimit(), true); MatchTools::UP match_tools = mtf->createMatchTools(); match_tools->setup_first_phase(nullptr); return match_tools->match_data().get_termwise_limit(); @@ -1156,7 +1156,7 @@ struct AttributeBlueprintParamsFixture { } void set_query_properties(vespalib::stringref lower_limit, vespalib::stringref upper_limit, vespalib::stringref target_hits_max_adjustment_factor, - const vespalib::string fuzzy_matching_algorithm) { + const vespalib::string & fuzzy_matching_algorithm) { rank_properties.add(GlobalFilterLowerLimit::NAME, lower_limit); rank_properties.add(GlobalFilterUpperLimit::NAME, upper_limit); rank_properties.add(TargetHitsMaxAdjustmentFactor::NAME, target_hits_max_adjustment_factor); diff --git a/searchcore/src/tests/proton/matching/query_test.cpp b/searchcore/src/tests/proton/matching/query_test.cpp index 34c50084c35..928e13a9fae 100644 --- a/searchcore/src/tests/proton/matching/query_test.cpp +++ b/searchcore/src/tests/proton/matching/query_test.cpp @@ -711,7 +711,7 @@ void Test::requireThatQueryGluesEverythingTogether() { EXPECT_EQUAL(1u, md->getNumTermFields()); query.optimize(); - query.fetchPostings(requestContext.getDoom()); + query.fetchPostings(search::queryeval::ExecuteInfo::create(true, &requestContext.getDoom())); SearchIterator::UP search = query.createSearch(*md); ASSERT_TRUE(search.get()); } @@ -744,7 +744,7 @@ void checkQueryAddsLocation(const string &loc_in, const string &loc_out) { MatchData::UP md = mdl.createMatchData(); EXPECT_EQUAL(2u, md->getNumTermFields()); - query.fetchPostings(requestContext.getDoom()); + query.fetchPostings(search::queryeval::ExecuteInfo::create(true, &requestContext.getDoom())); SearchIterator::UP search = query.createSearch(*md); ASSERT_TRUE(search.get()); if (!EXPECT_NOT_EQUAL(string::npos, search->asString().find(loc_out))) { @@ -966,7 +966,7 @@ Test::requireThatWhiteListBlueprintCanBeUsed() MatchData::UP md = mdl.createMatchData(); query.optimize(); - query.fetchPostings(requestContext.getDoom()); + query.fetchPostings(search::queryeval::ExecuteInfo::create(true, &requestContext.getDoom())); SearchIterator::UP search = query.createSearch(*md); SimpleResult exp = SimpleResult().addHit(1).addHit(5).addHit(7).addHit(11); SimpleResult act; @@ -1129,13 +1129,15 @@ public: { set_want_global_filter(want_global_filter); } - ~GlobalFilterBlueprint() {} + ~GlobalFilterBlueprint(); void set_global_filter(const GlobalFilter& filter_, double estimated_hit_ratio_) override { filter = filter_.shared_from_this(); estimated_hit_ratio = estimated_hit_ratio_; } }; +GlobalFilterBlueprint::~GlobalFilterBlueprint() = default; + void Test::global_filter_is_calculated_and_handled() { diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 2937429be13..c6f3e215329 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -174,6 +174,7 @@ MatchToolsFactory(QueryLimiter & queryLimiter, const Properties & featureOverrides, vespalib::ThreadBundle & thread_bundle, const search::IDocumentMetaStoreContext::IReadGuard::SP * metaStoreReadGuard, + uint32_t maxNumHits, bool is_search) : _queryLimiter(queryLimiter), _attribute_blueprint_params(extract_attribute_blueprint_params(rankSetup, rankProperties, metaStore.getNumActiveLids(), searchContext.getDocIdLimit())), @@ -201,7 +202,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter, trace.addEvent(5, "Optimize query execution plan"); _query.optimize(); trace.addEvent(4, "Perform dictionary lookups and posting lists initialization"); - _query.fetchPostings(is_search, _requestContext.getDoom()); + float hitRate = std::min(1.0F, float(maxNumHits)/float(searchContext.getDocIdLimit())); + _query.fetchPostings(search::queryeval::ExecuteInfo::create(is_search, hitRate, &_requestContext.getDoom())); if (is_search) { _query.handle_global_filter(_requestContext.getDoom(), searchContext.getDocIdLimit(), _attribute_blueprint_params.global_filter_lower_limit, diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index 9c6a6391504..ce1a9ff0b0a 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -155,6 +155,7 @@ public: const Properties &featureOverrides, vespalib::ThreadBundle &thread_bundle, const search::IDocumentMetaStoreContext::IReadGuard::SP * metaStoreReadGuard, + uint32_t maxNumHits, bool is_search); ~MatchToolsFactory(); bool valid() const { return _valid; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp index 1e6773e1d31..3c8c90a229d 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp @@ -155,7 +155,7 @@ Matcher::create_match_tools_factory(const search::engine::Request &request, ISea IAttributeContext &attrContext, const search::IDocumentMetaStore &metaStore, const Properties &feature_overrides, vespalib::ThreadBundle &thread_bundle, const IDocumentMetaStoreContext::IReadGuard::SP * metaStoreReadGuard, - bool is_search) const + uint32_t maxHits, bool is_search) const { const Properties & rankProperties = request.propertiesMap.rankProperties(); bool softTimeoutEnabled = softtimeout::Enabled::lookup(rankProperties, _rankSetup->getSoftTimeoutEnabled()); @@ -176,7 +176,7 @@ Matcher::create_match_tools_factory(const search::engine::Request &request, ISea request.trace(), request.getStackRef(), request.location, _viewResolver, metaStore, _indexEnv, *_rankSetup, rankProperties, feature_overrides, thread_bundle, - metaStoreReadGuard, is_search); + metaStoreReadGuard, maxHits, is_search); } size_t @@ -272,7 +272,8 @@ Matcher::match(const SearchRequest &request, vespalib::ThreadBundle &threadBundl } MatchToolsFactory::UP mtf = create_match_tools_factory(request, searchContext, attrContext, metaStore, - *feature_overrides, threadBundle, &owned_objects.readGuard, true); + *feature_overrides, threadBundle, &owned_objects.readGuard, + searchContext.getDocIdLimit(), true); isDoomExplicit = mtf->get_request_context().getDoom().isExplicitSoftDoom(); traceQuery(6, request.trace(), mtf->query()); if (!mtf->valid()) { @@ -411,7 +412,7 @@ Matcher::create_docsum_matcher(const DocsumRequest &req, ISearchContext &search_ StupidMetaStore meta; MatchToolsFactory::UP mtf = create_match_tools_factory(req, search_ctx, attr_ctx, meta, req.propertiesMap.featureOverrides(), - vespalib::ThreadBundle::trivial(), nullptr, false); + vespalib::ThreadBundle::trivial(), nullptr, docs.size(), false); if (!mtf->valid()) { LOG(warning, "could not initialize docsum matching: %s", (expectedSessionCached) ? "session has expired" : "invalid query"); diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.h b/searchcore/src/vespa/searchcore/proton/matching/matcher.h index 75339f2a207..ab9b8ed88f3 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.h +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.h @@ -112,7 +112,7 @@ public: IAttributeContext &attrContext, const search::IDocumentMetaStore &metaStore, const Properties &feature_overrides, vespalib::ThreadBundle &thread_bundle, const IDocumentMetaStoreContext::IReadGuard::SP * metaStoreReadGuard, - bool is_search) const; + uint32_t maxHits, bool is_search) const; /** * Perform a search against this matcher. diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index b3c7eae0ccf..955cd30714f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -247,9 +247,9 @@ Query::optimize() } void -Query::fetchPostings(bool strict, const vespalib::Doom & doom) +Query::fetchPostings(const search::queryeval::ExecuteInfo & executeInfo) { - _blueprint->fetchPostings(search::queryeval::ExecuteInfo::create(strict, &doom)); + _blueprint->fetchPostings(executeInfo); } void @@ -265,7 +265,7 @@ Query::handle_global_filter(const vespalib::Doom & doom, uint32_t docid_limit, _blueprint = Blueprint::optimize(std::move(_blueprint)); LOG(debug, "blueprint after handle_global_filter:\n%s\n", _blueprint->asString().c_str()); // strictness may change if optimized order changed: - fetchPostings(doom); + fetchPostings(search::queryeval::ExecuteInfo::create(true, &doom)); } bool diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index 2bdea528b57..c2d2d389d2c 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -98,8 +98,7 @@ public: * test to verify the original query without optimization. **/ void optimize(); - void fetchPostings(const vespalib::Doom & doom) { fetchPostings(true, doom); } - void fetchPostings(bool strict, const vespalib::Doom & doom); + void fetchPostings(const search::queryeval::ExecuteInfo & executeInfo) ; void handle_global_filter(const vespalib::Doom & doom, uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit, |