diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-10-03 17:11:41 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-03 17:11:41 +0200 |
commit | a2146ec1453f23b2f6a623dacb9eff9b11d20c83 (patch) | |
tree | 4c18aa176a436cea3040d2ba7c3326e08550a641 /searchlib | |
parent | 92ddda4cb9c89cf122c2b6ffbbb3803bba0c4fc0 (diff) | |
parent | 3ed10034df48c7145ceeaaf8b4574a8412dbad2a (diff) |
Merge pull request #28773 from vespa-engine/geirst/dfa-table-as-default-fuzzy-matching-algorithm
Use DfaTable as default fuzzy matching algorithm for maxEditDistance …
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp | 7 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/fef/indexproperties.cpp | 2 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/fef/ranksetup.cpp | 2 |
3 files changed, 6 insertions, 5 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp index 82709997228..57f879c1431 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp @@ -49,14 +49,15 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased, vespali ? vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None) : vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase); } else if (isFuzzy()) { + auto max_edit_dist = term.getFuzzyMaxEditDistance(); _fuzzyMatcher = std::make_unique<vespalib::FuzzyMatcher>(term.getTerm(), - term.getFuzzyMaxEditDistance(), + max_edit_dist, term.getFuzzyPrefixLength(), isCased()); if ((fuzzy_matching_algorithm != FMA::BruteForce) && - (term.getFuzzyMaxEditDistance() <= 2)) { + (max_edit_dist > 0 && max_edit_dist <= 2)) { _dfa_fuzzy_matcher = std::make_unique<DfaFuzzyMatcher>(term.getTerm(), - term.getFuzzyMaxEditDistance(), + max_edit_dist, term.getFuzzyPrefixLength(), isCased(), to_dfa_type(fuzzy_matching_algorithm)); diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index b006aebbcdb..cc5a7fb9b15 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -439,7 +439,7 @@ TargetHitsMaxAdjustmentFactor::lookup(const Properties& props, double defaultVal } const vespalib::string FuzzyAlgorithm::NAME("vespa.matching.fuzzy.algorithm"); -const vespalib::FuzzyMatchingAlgorithm FuzzyAlgorithm::DEFAULT_VALUE(vespalib::FuzzyMatchingAlgorithm::BruteForce); +const vespalib::FuzzyMatchingAlgorithm FuzzyAlgorithm::DEFAULT_VALUE(vespalib::FuzzyMatchingAlgorithm::DfaTable); vespalib::FuzzyMatchingAlgorithm FuzzyAlgorithm::lookup(const Properties& props) diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 02b56701cdb..0f7bd07f92f 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -69,7 +69,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _global_filter_lower_limit(0.0), _global_filter_upper_limit(1.0), _target_hits_max_adjustment_factor(20.0), - _fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm::BruteForce), + _fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm::DfaTable), _mutateOnMatch(), _mutateOnFirstPhase(), _mutateOnSecondPhase(), |