diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-05-12 12:56:11 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2022-05-12 12:56:11 +0000 |
commit | 5ecdd4265fc51cc51c8fc488580afb4754430cd0 (patch) | |
tree | 3877d3b5d5297009b5bb2607c1414cdc6149b087 /searchcore | |
parent | 3ae64956b57ee364807e5258da331bc156771a9e (diff) |
Auto-adjust global filter tuning parameters to handle searchable-copies > 1.
Diffstat (limited to 'searchcore')
3 files changed, 102 insertions, 29 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index 187c0463da3..e3d0b37c0d6 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -1,42 +1,44 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/testapp.h> #include <vespa/document/base/globalid.h> -#include <initializer_list> +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/value_codec.h> #include <vespa/searchcommon/attribute/iattributecontext.h> -#include <vespa/searchcore/proton/test/bucketfactory.h> +#include <vespa/searchcore/proton/bucketdb/bucket_db_owner.h> #include <vespa/searchcore/proton/documentmetastore/documentmetastore.h> #include <vespa/searchcore/proton/matching/fakesearchcontext.h> #include <vespa/searchcore/proton/matching/i_constant_value_repo.h> +#include <vespa/searchcore/proton/matching/match_context.h> +#include <vespa/searchcore/proton/matching/match_params.h> +#include <vespa/searchcore/proton/matching/match_tools.h> #include <vespa/searchcore/proton/matching/matcher.h> #include <vespa/searchcore/proton/matching/querynodes.h> #include <vespa/searchcore/proton/matching/sessionmanager.h> #include <vespa/searchcore/proton/matching/viewresolver.h> -#include <vespa/searchcore/proton/bucketdb/bucket_db_owner.h> +#include <vespa/searchcore/proton/test/bucketfactory.h> #include <vespa/searchlib/aggregation/aggregation.h> #include <vespa/searchlib/aggregation/grouping.h> #include <vespa/searchlib/aggregation/perdocexpression.h> #include <vespa/searchlib/attribute/extendableattributes.h> #include <vespa/searchlib/common/featureset.h> -#include <vespa/searchlib/engine/docsumrequest.h> -#include <vespa/searchlib/engine/searchrequest.h> #include <vespa/searchlib/engine/docsumreply.h> +#include <vespa/searchlib/engine/docsumrequest.h> #include <vespa/searchlib/engine/searchreply.h> -#include <vespa/searchlib/test/mock_attribute_context.h> -#include <vespa/searchlib/fef/properties.h> +#include <vespa/searchlib/engine/searchrequest.h> #include <vespa/searchlib/fef/indexproperties.h> +#include <vespa/searchlib/fef/properties.h> +#include <vespa/searchlib/fef/ranksetup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> #include <vespa/searchlib/query/tree/querybuilder.h> #include <vespa/searchlib/query/tree/stackdumpcreator.h> #include <vespa/searchlib/queryeval/isourceselector.h> -#include <vespa/vespalib/util/simple_thread_bundle.h> -#include <vespa/searchcore/proton/matching/match_params.h> -#include <vespa/searchcore/proton/matching/match_tools.h> -#include <vespa/searchcore/proton/matching/match_context.h> -#include <vespa/eval/eval/simple_value.h> -#include <vespa/eval/eval/tensor_spec.h> -#include <vespa/eval/eval/value_codec.h> +#include <vespa/searchlib/test/mock_attribute_context.h> #include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/simple_thread_bundle.h> #include <vespa/vespalib/util/testclock.h> +#include <initializer_list> #include <vespa/log/log.h> LOG_SETUP("matching_test"); @@ -47,6 +49,7 @@ using namespace search::aggregation; using namespace search::attribute; using namespace search::engine; using namespace search::expression; +using namespace search::fef::indexproperties::matching; using namespace search::fef; using namespace search::grouping; using namespace search::index; @@ -55,13 +58,12 @@ using namespace search::queryeval; using namespace search; using search::attribute::test::MockAttributeContext; +using search::fef::indexproperties::hitcollector::HeapSize; using search::index::schema::DataType; using storage::spi::Timestamp; -using search::fef::indexproperties::hitcollector::HeapSize; - -using vespalib::nbostream; using vespalib::eval::SimpleValue; using vespalib::eval::TensorSpec; +using vespalib::nbostream; void inject_match_phase_limiting(Properties &setup, const vespalib::string &attribute, size_t max_hits, bool descending) { @@ -1102,4 +1104,49 @@ TEST("require that docsum matcher can extract matching elements from single attr EXPECT_EQUAL(list[1], 3u); } +struct GlobalFilterParamsFixture { + BlueprintFactory factory; + search::fef::test::IndexEnvironment index_env; + RankSetup rank_setup; + Properties rank_properties; + GlobalFilterParamsFixture(double lower_limit, double upper_limit) + : factory(), + index_env(), + rank_setup(factory, index_env), + rank_properties() + { + rank_setup.set_global_filter_lower_limit(lower_limit); + rank_setup.set_global_filter_upper_limit(upper_limit); + } + void set_query_properties(vespalib::stringref lower_limit, vespalib::stringref upper_limit) { + rank_properties.add(GlobalFilterLowerLimit::NAME, lower_limit); + rank_properties.add(GlobalFilterUpperLimit::NAME, upper_limit); + } + AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) { + return MatchToolsFactory::extract_global_filter_params(rank_setup, rank_properties, active_docids, docid_limit); + } +}; + +TEST_F("global filter params are extracted from rank profile", GlobalFilterParamsFixture(0.2, 0.8)) +{ + auto params = f.extract(); + EXPECT_EQUAL(0.2, params.global_filter_lower_limit); + EXPECT_EQUAL(0.8, params.global_filter_upper_limit); +} + +TEST_F("global filter params are extracted from query", GlobalFilterParamsFixture(0.2, 0.8)) +{ + f.set_query_properties("0.15", "0.75"); + auto params = f.extract(); + EXPECT_EQUAL(0.15, params.global_filter_lower_limit); + EXPECT_EQUAL(0.75, params.global_filter_upper_limit); +} + +TEST_F("global filter params are scaled with active hit ratio", GlobalFilterParamsFixture(0.2, 0.8)) +{ + auto params = f.extract(5, 10); + EXPECT_EQUAL(0.12, params.global_filter_lower_limit); + EXPECT_EQUAL(0.48, params.global_filter_upper_limit); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 3d8d56f0150..7bf62f678ed 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -70,13 +70,6 @@ extractDiversityParams(const RankSetup &rankSetup, const Properties &rankPropert AttributeLimiter::toDiversityCutoffStrategy(DiversityCutoffStrategy::lookup(rankProperties, rankSetup.getDiversityCutoffStrategy()))); } -AttributeBlueprintParams -extractAttributeBlueprintParams(const RankSetup& rank_setup, const Properties &rankProperties) -{ - return AttributeBlueprintParams(GlobalFilterLowerLimit::lookup(rankProperties, rank_setup.get_global_filter_lower_limit()), - GlobalFilterUpperLimit::lookup(rankProperties, rank_setup.get_global_filter_upper_limit())); -} - } // namespace proton::matching::<unnamed> void @@ -181,7 +174,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter, const Properties & featureOverrides, bool is_search) : _queryLimiter(queryLimiter), - _requestContext(doom, attributeContext, rankProperties, extractAttributeBlueprintParams(rankSetup, rankProperties)), + _global_filter_params(extract_global_filter_params(rankSetup, rankProperties, metaStore.getNumActiveLids(), searchContext.getDocIdLimit())), + _requestContext(doom, attributeContext, rankProperties, _global_filter_params), _query(), _match_limiter(), _queryEnv(indexEnv, attributeContext, rankProperties, searchContext.getIndexes()), @@ -208,9 +202,10 @@ MatchToolsFactory(QueryLimiter & queryLimiter, trace.addEvent(4, "Perform dictionary lookups and posting lists initialization"); _query.fetchPostings(); if (is_search) { - double lower_limit = GlobalFilterLowerLimit::lookup(rankProperties, rankSetup.get_global_filter_lower_limit()); - double upper_limit = GlobalFilterUpperLimit::lookup(rankProperties, rankSetup.get_global_filter_upper_limit()); - _query.handle_global_filter(searchContext.getDocIdLimit(), lower_limit, upper_limit, trace); + _query.handle_global_filter(searchContext.getDocIdLimit(), + _global_filter_params.global_filter_lower_limit, + _global_filter_params.global_filter_upper_limit, + trace); } _query.freeze(); trace.addEvent(5, "Prepare shared state for multi-threaded rank executors"); @@ -309,6 +304,23 @@ MatchToolsFactory::get_feature_rename_map() const return _rankSetup.get_feature_rename_map(); } +AttributeBlueprintParams +MatchToolsFactory::extract_global_filter_params(const search::fef::RankSetup& rank_setup, + const search::fef::Properties& rank_properties, + uint32_t active_docids, + uint32_t docid_limit) +{ + double lower_limit = GlobalFilterLowerLimit::lookup(rank_properties, rank_setup.get_global_filter_lower_limit()); + double upper_limit = GlobalFilterUpperLimit::lookup(rank_properties, rank_setup.get_global_filter_upper_limit()); + + // Note that we count the reserved docid 0 as active. + // This ensures that when searchable-copies=1, the ratio is 1.0. + double active_hit_ratio = std::min(active_docids + 1, docid_limit) / static_cast<double>(docid_limit); + + return {lower_limit * active_hit_ratio, + upper_limit * active_hit_ratio}; +} + AttributeOperationTask::AttributeOperationTask(const RequestContext & requestContext, vespalib::stringref attribute, vespalib::stringref operation) : _requestContext(requestContext), diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index a7d39a0c3e8..d01ea05f3f7 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -93,6 +93,7 @@ class MatchToolsFactory private: using IAttributeFunctor = search::attribute::IAttributeFunctor; QueryLimiter & _queryLimiter; + search::attribute::AttributeBlueprintParams _global_filter_params; RequestContext _requestContext; Query _query; MaybeMatchPhaseLimiter::UP _match_limiter; @@ -142,6 +143,19 @@ public: const RequestContext & getRequestContext() const { return _requestContext; } const StringStringMap & get_feature_rename_map() const; + + /** + * Extracts global filter parameters from the rank-profile and query. + * + * These parameters are expected to be in the range [0.0, 1.0], which matches the range of the estimated hit ratio of the query. + * When searchable-copies > 1, we must scale the parameters to match the effective range of the estimated hit ratio. + * This is done by multiplying with the active hit ratio (active docids / docid limit). + */ + static search::attribute::AttributeBlueprintParams + extract_global_filter_params(const search::fef::RankSetup& rank_setup, + const search::fef::Properties& rank_properties, + uint32_t active_docids, + uint32_t docid_limit); }; } |