diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-09-21 13:32:47 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2023-09-21 13:38:52 +0000 |
commit | 9edf3caed8ecad63d1f1bb5b07510934690cc6d2 (patch) | |
tree | 209187088bfa38a085798d605ddf7bd2c68b2bd5 /searchcore | |
parent | 92d656cb14e33c4aea1677241aa687bdc70d5bc1 (diff) |
Add query property to control fuzzy matching algorithm.
Diffstat (limited to 'searchcore')
-rw-r--r-- | searchcore/src/tests/proton/matching/matching_test.cpp | 20 | ||||
-rw-r--r-- | searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp | 4 |
2 files changed, 17 insertions, 7 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index 6ef462f80c4..ec549ee6f71 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -1135,12 +1135,15 @@ TEST("require that docsum matcher can extract matching elements from single attr EXPECT_EQUAL(list[1], 3u); } +using FMA = vespalib::FuzzyMatchingAlgorithm; + struct AttributeBlueprintParamsFixture { BlueprintFactory factory; search::fef::test::IndexEnvironment index_env; RankSetup rank_setup; Properties rank_properties; - AttributeBlueprintParamsFixture(double lower_limit, double upper_limit, double target_hits_max_adjustment_factor) + AttributeBlueprintParamsFixture(double lower_limit, double upper_limit, double target_hits_max_adjustment_factor, + FMA fuzzy_matching_algorithm) : factory(), index_env(), rank_setup(factory, index_env), @@ -1149,36 +1152,41 @@ struct AttributeBlueprintParamsFixture { rank_setup.set_global_filter_lower_limit(lower_limit); rank_setup.set_global_filter_upper_limit(upper_limit); rank_setup.set_target_hits_max_adjustment_factor(target_hits_max_adjustment_factor); + rank_setup.set_fuzzy_matching_algorithm(fuzzy_matching_algorithm); } void set_query_properties(vespalib::stringref lower_limit, vespalib::stringref upper_limit, - vespalib::stringref target_hits_max_adjustment_factor) { + vespalib::stringref target_hits_max_adjustment_factor, + const vespalib::string fuzzy_matching_algorithm) { rank_properties.add(GlobalFilterLowerLimit::NAME, lower_limit); rank_properties.add(GlobalFilterUpperLimit::NAME, upper_limit); rank_properties.add(TargetHitsMaxAdjustmentFactor::NAME, target_hits_max_adjustment_factor); + rank_properties.add(FuzzyAlgorithm::NAME, fuzzy_matching_algorithm); } AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) const { return MatchToolsFactory::extract_attribute_blueprint_params(rank_setup, rank_properties, active_docids, docid_limit); } }; -TEST_F("attribute blueprint params are extracted from rank profile", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0)) +TEST_F("attribute blueprint params are extracted from rank profile", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::BruteForce)) { auto params = f.extract(); EXPECT_EQUAL(0.2, params.global_filter_lower_limit); EXPECT_EQUAL(0.8, params.global_filter_upper_limit); EXPECT_EQUAL(5.0, params.target_hits_max_adjustment_factor); + EXPECT_EQUAL(FMA::BruteForce, params.fuzzy_matching_algorithm); } -TEST_F("attribute blueprint params are extracted from query", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0)) +TEST_F("attribute blueprint params are extracted from query", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::BruteForce)) { - f.set_query_properties("0.15", "0.75", "3.0"); + f.set_query_properties("0.15", "0.75", "3.0", "dfa_explicit"); auto params = f.extract(); EXPECT_EQUAL(0.15, params.global_filter_lower_limit); EXPECT_EQUAL(0.75, params.global_filter_upper_limit); EXPECT_EQUAL(3.0, params.target_hits_max_adjustment_factor); + EXPECT_EQUAL(FMA::DfaExplicit, params.fuzzy_matching_algorithm); } -TEST_F("global filter params are scaled with active hit ratio", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0)) +TEST_F("global filter params are scaled with active hit ratio", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::BruteForce)) { auto params = f.extract(5, 10); EXPECT_EQUAL(0.12, params.global_filter_lower_limit); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index f62f4c60a6c..5ae671b88cb 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -331,6 +331,7 @@ MatchToolsFactory::extract_attribute_blueprint_params(const RankSetup& rank_setu double lower_limit = GlobalFilterLowerLimit::lookup(rank_properties, rank_setup.get_global_filter_lower_limit()); double upper_limit = GlobalFilterUpperLimit::lookup(rank_properties, rank_setup.get_global_filter_upper_limit()); double target_hits_max_adjustment_factor = TargetHitsMaxAdjustmentFactor::lookup(rank_properties, rank_setup.get_target_hits_max_adjustment_factor()); + auto fuzzy_matching_algorithm = FuzzyAlgorithm::lookup(rank_properties, rank_setup.get_fuzzy_matching_algorithm()); // Note that we count the reserved docid 0 as active. // This ensures that when searchable-copies=1, the ratio is 1.0. @@ -338,7 +339,8 @@ MatchToolsFactory::extract_attribute_blueprint_params(const RankSetup& rank_setu return {lower_limit * active_hit_ratio, upper_limit * active_hit_ratio, - target_hits_max_adjustment_factor}; + target_hits_max_adjustment_factor, + fuzzy_matching_algorithm}; } AttributeOperationTask::AttributeOperationTask(const RequestContext & requestContext, |