summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-05-12 12:56:11 +0000
committerGeir Storli <geirst@yahooinc.com>2022-05-12 12:56:11 +0000
commit5ecdd4265fc51cc51c8fc488580afb4754430cd0 (patch)
tree3877d3b5d5297009b5bb2607c1414cdc6149b087 /searchcore
parent3ae64956b57ee364807e5258da331bc156771a9e (diff)
Auto-adjust global filter tuning parameters to handle searchable-copies > 1.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/matching/matching_test.cpp83
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp34
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.h14
3 files changed, 102 insertions, 29 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp
index 187c0463da3..e3d0b37c0d6 100644
--- a/searchcore/src/tests/proton/matching/matching_test.cpp
+++ b/searchcore/src/tests/proton/matching/matching_test.cpp
@@ -1,42 +1,44 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/document/base/globalid.h>
-#include <initializer_list>
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value_codec.h>
#include <vespa/searchcommon/attribute/iattributecontext.h>
-#include <vespa/searchcore/proton/test/bucketfactory.h>
+#include <vespa/searchcore/proton/bucketdb/bucket_db_owner.h>
#include <vespa/searchcore/proton/documentmetastore/documentmetastore.h>
#include <vespa/searchcore/proton/matching/fakesearchcontext.h>
#include <vespa/searchcore/proton/matching/i_constant_value_repo.h>
+#include <vespa/searchcore/proton/matching/match_context.h>
+#include <vespa/searchcore/proton/matching/match_params.h>
+#include <vespa/searchcore/proton/matching/match_tools.h>
#include <vespa/searchcore/proton/matching/matcher.h>
#include <vespa/searchcore/proton/matching/querynodes.h>
#include <vespa/searchcore/proton/matching/sessionmanager.h>
#include <vespa/searchcore/proton/matching/viewresolver.h>
-#include <vespa/searchcore/proton/bucketdb/bucket_db_owner.h>
+#include <vespa/searchcore/proton/test/bucketfactory.h>
#include <vespa/searchlib/aggregation/aggregation.h>
#include <vespa/searchlib/aggregation/grouping.h>
#include <vespa/searchlib/aggregation/perdocexpression.h>
#include <vespa/searchlib/attribute/extendableattributes.h>
#include <vespa/searchlib/common/featureset.h>
-#include <vespa/searchlib/engine/docsumrequest.h>
-#include <vespa/searchlib/engine/searchrequest.h>
#include <vespa/searchlib/engine/docsumreply.h>
+#include <vespa/searchlib/engine/docsumrequest.h>
#include <vespa/searchlib/engine/searchreply.h>
-#include <vespa/searchlib/test/mock_attribute_context.h>
-#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/engine/searchrequest.h>
#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/ranksetup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
#include <vespa/searchlib/query/tree/querybuilder.h>
#include <vespa/searchlib/query/tree/stackdumpcreator.h>
#include <vespa/searchlib/queryeval/isourceselector.h>
-#include <vespa/vespalib/util/simple_thread_bundle.h>
-#include <vespa/searchcore/proton/matching/match_params.h>
-#include <vespa/searchcore/proton/matching/match_tools.h>
-#include <vespa/searchcore/proton/matching/match_context.h>
-#include <vespa/eval/eval/simple_value.h>
-#include <vespa/eval/eval/tensor_spec.h>
-#include <vespa/eval/eval/value_codec.h>
+#include <vespa/searchlib/test/mock_attribute_context.h>
#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/simple_thread_bundle.h>
#include <vespa/vespalib/util/testclock.h>
+#include <initializer_list>
#include <vespa/log/log.h>
LOG_SETUP("matching_test");
@@ -47,6 +49,7 @@ using namespace search::aggregation;
using namespace search::attribute;
using namespace search::engine;
using namespace search::expression;
+using namespace search::fef::indexproperties::matching;
using namespace search::fef;
using namespace search::grouping;
using namespace search::index;
@@ -55,13 +58,12 @@ using namespace search::queryeval;
using namespace search;
using search::attribute::test::MockAttributeContext;
+using search::fef::indexproperties::hitcollector::HeapSize;
using search::index::schema::DataType;
using storage::spi::Timestamp;
-using search::fef::indexproperties::hitcollector::HeapSize;
-
-using vespalib::nbostream;
using vespalib::eval::SimpleValue;
using vespalib::eval::TensorSpec;
+using vespalib::nbostream;
void inject_match_phase_limiting(Properties &setup, const vespalib::string &attribute, size_t max_hits, bool descending)
{
@@ -1102,4 +1104,49 @@ TEST("require that docsum matcher can extract matching elements from single attr
EXPECT_EQUAL(list[1], 3u);
}
+struct GlobalFilterParamsFixture {
+ BlueprintFactory factory;
+ search::fef::test::IndexEnvironment index_env;
+ RankSetup rank_setup;
+ Properties rank_properties;
+ GlobalFilterParamsFixture(double lower_limit, double upper_limit)
+ : factory(),
+ index_env(),
+ rank_setup(factory, index_env),
+ rank_properties()
+ {
+ rank_setup.set_global_filter_lower_limit(lower_limit);
+ rank_setup.set_global_filter_upper_limit(upper_limit);
+ }
+ void set_query_properties(vespalib::stringref lower_limit, vespalib::stringref upper_limit) {
+ rank_properties.add(GlobalFilterLowerLimit::NAME, lower_limit);
+ rank_properties.add(GlobalFilterUpperLimit::NAME, upper_limit);
+ }
+ AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) {
+ return MatchToolsFactory::extract_global_filter_params(rank_setup, rank_properties, active_docids, docid_limit);
+ }
+};
+
+TEST_F("global filter params are extracted from rank profile", GlobalFilterParamsFixture(0.2, 0.8))
+{
+ auto params = f.extract();
+ EXPECT_EQUAL(0.2, params.global_filter_lower_limit);
+ EXPECT_EQUAL(0.8, params.global_filter_upper_limit);
+}
+
+TEST_F("global filter params are extracted from query", GlobalFilterParamsFixture(0.2, 0.8))
+{
+ f.set_query_properties("0.15", "0.75");
+ auto params = f.extract();
+ EXPECT_EQUAL(0.15, params.global_filter_lower_limit);
+ EXPECT_EQUAL(0.75, params.global_filter_upper_limit);
+}
+
+TEST_F("global filter params are scaled with active hit ratio", GlobalFilterParamsFixture(0.2, 0.8))
+{
+ auto params = f.extract(5, 10);
+ EXPECT_EQUAL(0.12, params.global_filter_lower_limit);
+ EXPECT_EQUAL(0.48, params.global_filter_upper_limit);
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index 3d8d56f0150..7bf62f678ed 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -70,13 +70,6 @@ extractDiversityParams(const RankSetup &rankSetup, const Properties &rankPropert
AttributeLimiter::toDiversityCutoffStrategy(DiversityCutoffStrategy::lookup(rankProperties, rankSetup.getDiversityCutoffStrategy())));
}
-AttributeBlueprintParams
-extractAttributeBlueprintParams(const RankSetup& rank_setup, const Properties &rankProperties)
-{
- return AttributeBlueprintParams(GlobalFilterLowerLimit::lookup(rankProperties, rank_setup.get_global_filter_lower_limit()),
- GlobalFilterUpperLimit::lookup(rankProperties, rank_setup.get_global_filter_upper_limit()));
-}
-
} // namespace proton::matching::<unnamed>
void
@@ -181,7 +174,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
const Properties & featureOverrides,
bool is_search)
: _queryLimiter(queryLimiter),
- _requestContext(doom, attributeContext, rankProperties, extractAttributeBlueprintParams(rankSetup, rankProperties)),
+ _global_filter_params(extract_global_filter_params(rankSetup, rankProperties, metaStore.getNumActiveLids(), searchContext.getDocIdLimit())),
+ _requestContext(doom, attributeContext, rankProperties, _global_filter_params),
_query(),
_match_limiter(),
_queryEnv(indexEnv, attributeContext, rankProperties, searchContext.getIndexes()),
@@ -208,9 +202,10 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
trace.addEvent(4, "Perform dictionary lookups and posting lists initialization");
_query.fetchPostings();
if (is_search) {
- double lower_limit = GlobalFilterLowerLimit::lookup(rankProperties, rankSetup.get_global_filter_lower_limit());
- double upper_limit = GlobalFilterUpperLimit::lookup(rankProperties, rankSetup.get_global_filter_upper_limit());
- _query.handle_global_filter(searchContext.getDocIdLimit(), lower_limit, upper_limit, trace);
+ _query.handle_global_filter(searchContext.getDocIdLimit(),
+ _global_filter_params.global_filter_lower_limit,
+ _global_filter_params.global_filter_upper_limit,
+ trace);
}
_query.freeze();
trace.addEvent(5, "Prepare shared state for multi-threaded rank executors");
@@ -309,6 +304,23 @@ MatchToolsFactory::get_feature_rename_map() const
return _rankSetup.get_feature_rename_map();
}
+AttributeBlueprintParams
+MatchToolsFactory::extract_global_filter_params(const search::fef::RankSetup& rank_setup,
+ const search::fef::Properties& rank_properties,
+ uint32_t active_docids,
+ uint32_t docid_limit)
+{
+ double lower_limit = GlobalFilterLowerLimit::lookup(rank_properties, rank_setup.get_global_filter_lower_limit());
+ double upper_limit = GlobalFilterUpperLimit::lookup(rank_properties, rank_setup.get_global_filter_upper_limit());
+
+ // Note that we count the reserved docid 0 as active.
+ // This ensures that when searchable-copies=1, the ratio is 1.0.
+ double active_hit_ratio = std::min(active_docids + 1, docid_limit) / static_cast<double>(docid_limit);
+
+ return {lower_limit * active_hit_ratio,
+ upper_limit * active_hit_ratio};
+}
+
AttributeOperationTask::AttributeOperationTask(const RequestContext & requestContext,
vespalib::stringref attribute, vespalib::stringref operation)
: _requestContext(requestContext),
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
index a7d39a0c3e8..d01ea05f3f7 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
@@ -93,6 +93,7 @@ class MatchToolsFactory
private:
using IAttributeFunctor = search::attribute::IAttributeFunctor;
QueryLimiter & _queryLimiter;
+ search::attribute::AttributeBlueprintParams _global_filter_params;
RequestContext _requestContext;
Query _query;
MaybeMatchPhaseLimiter::UP _match_limiter;
@@ -142,6 +143,19 @@ public:
const RequestContext & getRequestContext() const { return _requestContext; }
const StringStringMap & get_feature_rename_map() const;
+
+ /**
+ * Extracts global filter parameters from the rank-profile and query.
+ *
+ * These parameters are expected to be in the range [0.0, 1.0], which matches the range of the estimated hit ratio of the query.
+ * When searchable-copies > 1, we must scale the parameters to match the effective range of the estimated hit ratio.
+ * This is done by multiplying with the active hit ratio (active docids / docid limit).
+ */
+ static search::attribute::AttributeBlueprintParams
+ extract_global_filter_params(const search::fef::RankSetup& rank_setup,
+ const search::fef::Properties& rank_properties,
+ uint32_t active_docids,
+ uint32_t docid_limit);
};
}