diff options
Diffstat (limited to 'searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp')
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp | 40 |
1 files changed, 37 insertions, 3 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp index 1efe39667b8..aec317926f1 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp @@ -1,6 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "string_search_helper.h" +#include "dfa_fuzzy_matcher.h" +#include "i_enum_store_dictionary.h" #include <vespa/searchlib/query/query_term_ucs4.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/vespalib/text/utf8.h> @@ -12,6 +14,7 @@ namespace search::attribute { StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm) : _regex(), _fuzzyMatcher(), + _dfa_fuzzy_matcher(), _term(), _termLen(), _isPrefix(term.isPrefix()), @@ -24,12 +27,20 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased, vespali ? vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None) : vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase); } else if (isFuzzy()) { - (void) fuzzy_matching_algorithm; - // TODO: Select implementation based on algorithm. _fuzzyMatcher = std::make_unique<vespalib::FuzzyMatcher>(term.getTerm(), term.getFuzzyMaxEditDistance(), term.getFuzzyPrefixLength(), isCased()); + using FMA = vespalib::FuzzyMatchingAlgorithm; + using LDT = vespalib::fuzzy::LevenshteinDfa::DfaType; + if ((fuzzy_matching_algorithm != FMA::BruteForce) && + (term.getFuzzyMaxEditDistance() <= 2)) { + _dfa_fuzzy_matcher = std::make_unique<DfaFuzzyMatcher>(term.getTerm(), + term.getFuzzyMaxEditDistance(), + term.getFuzzyPrefixLength(), + isCased(), + (fuzzy_matching_algorithm == FMA::DfaImplicit) ? LDT::Implicit : LDT::Explicit); + } } else if (isCased()) { _term = term.getTerm(); _termLen = strlen(_term); @@ -48,7 +59,7 @@ StringSearchHelper::isMatch(const char *src) const noexcept { return getRegex().valid() && getRegex().partial_match(std::string_view(src)); } if (__builtin_expect(isFuzzy(), false)) { - return getFuzzyMatcher().isMatch(src); + return _dfa_fuzzy_matcher ? _dfa_fuzzy_matcher->is_match(src) : getFuzzyMatcher().isMatch(src); } if (__builtin_expect(isCased(), false)) { int res = strncmp(_term, src, _termLen); @@ -67,4 +78,27 @@ StringSearchHelper::isMatch(const char *src) const noexcept { return (_ucs4[j] == 0 && (val == 0 || isPrefix())); } +template <typename DictionaryConstIteratorType> +bool +StringSearchHelper::is_fuzzy_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store) const +{ + if (_dfa_fuzzy_matcher) { + return _dfa_fuzzy_matcher->is_match(word, itr, data_store); + } else { + if (_fuzzyMatcher->isMatch(word)) { + return true; + } + ++itr; + return false; + } +} + +template +bool +StringSearchHelper::is_fuzzy_match(const char*, EnumPostingTree::ConstIterator&, const DfaStringComparator::DataStoreType&) const; + +template +bool +StringSearchHelper::is_fuzzy_match(const char*, EnumTree::ConstIterator&, const DfaStringComparator::DataStoreType&) const; + } |