summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-09-21 13:32:47 +0000
committerGeir Storli <geirst@yahooinc.com>2023-09-21 13:38:52 +0000
commit9edf3caed8ecad63d1f1bb5b07510934690cc6d2 (patch)
tree209187088bfa38a085798d605ddf7bd2c68b2bd5 /searchcore
parent92d656cb14e33c4aea1677241aa687bdc70d5bc1 (diff)
Add query property to control fuzzy matching algorithm.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/matching/matching_test.cpp20
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp4
2 files changed, 17 insertions, 7 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp
index 6ef462f80c4..ec549ee6f71 100644
--- a/searchcore/src/tests/proton/matching/matching_test.cpp
+++ b/searchcore/src/tests/proton/matching/matching_test.cpp
@@ -1135,12 +1135,15 @@ TEST("require that docsum matcher can extract matching elements from single attr
EXPECT_EQUAL(list[1], 3u);
}
+using FMA = vespalib::FuzzyMatchingAlgorithm;
+
struct AttributeBlueprintParamsFixture {
BlueprintFactory factory;
search::fef::test::IndexEnvironment index_env;
RankSetup rank_setup;
Properties rank_properties;
- AttributeBlueprintParamsFixture(double lower_limit, double upper_limit, double target_hits_max_adjustment_factor)
+ AttributeBlueprintParamsFixture(double lower_limit, double upper_limit, double target_hits_max_adjustment_factor,
+ FMA fuzzy_matching_algorithm)
: factory(),
index_env(),
rank_setup(factory, index_env),
@@ -1149,36 +1152,41 @@ struct AttributeBlueprintParamsFixture {
rank_setup.set_global_filter_lower_limit(lower_limit);
rank_setup.set_global_filter_upper_limit(upper_limit);
rank_setup.set_target_hits_max_adjustment_factor(target_hits_max_adjustment_factor);
+ rank_setup.set_fuzzy_matching_algorithm(fuzzy_matching_algorithm);
}
void set_query_properties(vespalib::stringref lower_limit, vespalib::stringref upper_limit,
- vespalib::stringref target_hits_max_adjustment_factor) {
+ vespalib::stringref target_hits_max_adjustment_factor,
+ const vespalib::string fuzzy_matching_algorithm) {
rank_properties.add(GlobalFilterLowerLimit::NAME, lower_limit);
rank_properties.add(GlobalFilterUpperLimit::NAME, upper_limit);
rank_properties.add(TargetHitsMaxAdjustmentFactor::NAME, target_hits_max_adjustment_factor);
+ rank_properties.add(FuzzyAlgorithm::NAME, fuzzy_matching_algorithm);
}
AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) const {
return MatchToolsFactory::extract_attribute_blueprint_params(rank_setup, rank_properties, active_docids, docid_limit);
}
};
-TEST_F("attribute blueprint params are extracted from rank profile", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0))
+TEST_F("attribute blueprint params are extracted from rank profile", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::BruteForce))
{
auto params = f.extract();
EXPECT_EQUAL(0.2, params.global_filter_lower_limit);
EXPECT_EQUAL(0.8, params.global_filter_upper_limit);
EXPECT_EQUAL(5.0, params.target_hits_max_adjustment_factor);
+ EXPECT_EQUAL(FMA::BruteForce, params.fuzzy_matching_algorithm);
}
-TEST_F("attribute blueprint params are extracted from query", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0))
+TEST_F("attribute blueprint params are extracted from query", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::BruteForce))
{
- f.set_query_properties("0.15", "0.75", "3.0");
+ f.set_query_properties("0.15", "0.75", "3.0", "dfa_explicit");
auto params = f.extract();
EXPECT_EQUAL(0.15, params.global_filter_lower_limit);
EXPECT_EQUAL(0.75, params.global_filter_upper_limit);
EXPECT_EQUAL(3.0, params.target_hits_max_adjustment_factor);
+ EXPECT_EQUAL(FMA::DfaExplicit, params.fuzzy_matching_algorithm);
}
-TEST_F("global filter params are scaled with active hit ratio", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0))
+TEST_F("global filter params are scaled with active hit ratio", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::BruteForce))
{
auto params = f.extract(5, 10);
EXPECT_EQUAL(0.12, params.global_filter_lower_limit);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index f62f4c60a6c..5ae671b88cb 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -331,6 +331,7 @@ MatchToolsFactory::extract_attribute_blueprint_params(const RankSetup& rank_setu
double lower_limit = GlobalFilterLowerLimit::lookup(rank_properties, rank_setup.get_global_filter_lower_limit());
double upper_limit = GlobalFilterUpperLimit::lookup(rank_properties, rank_setup.get_global_filter_upper_limit());
double target_hits_max_adjustment_factor = TargetHitsMaxAdjustmentFactor::lookup(rank_properties, rank_setup.get_target_hits_max_adjustment_factor());
+ auto fuzzy_matching_algorithm = FuzzyAlgorithm::lookup(rank_properties, rank_setup.get_fuzzy_matching_algorithm());
// Note that we count the reserved docid 0 as active.
// This ensures that when searchable-copies=1, the ratio is 1.0.
@@ -338,7 +339,8 @@ MatchToolsFactory::extract_attribute_blueprint_params(const RankSetup& rank_setu
return {lower_limit * active_hit_ratio,
upper_limit * active_hit_ratio,
- target_hits_max_adjustment_factor};
+ target_hits_max_adjustment_factor,
+ fuzzy_matching_algorithm};
}
AttributeOperationTask::AttributeOperationTask(const RequestContext & requestContext,