summaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp40
1 files changed, 37 insertions, 3 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
index 1efe39667b8..aec317926f1 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
@@ -1,6 +1,8 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "string_search_helper.h"
+#include "dfa_fuzzy_matcher.h"
+#include "i_enum_store_dictionary.h"
#include <vespa/searchlib/query/query_term_ucs4.h>
#include <vespa/vespalib/text/lowercase.h>
#include <vespa/vespalib/text/utf8.h>
@@ -12,6 +14,7 @@ namespace search::attribute {
StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm)
: _regex(),
_fuzzyMatcher(),
+ _dfa_fuzzy_matcher(),
_term(),
_termLen(),
_isPrefix(term.isPrefix()),
@@ -24,12 +27,20 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased, vespali
? vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None)
: vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase);
} else if (isFuzzy()) {
- (void) fuzzy_matching_algorithm;
- // TODO: Select implementation based on algorithm.
_fuzzyMatcher = std::make_unique<vespalib::FuzzyMatcher>(term.getTerm(),
term.getFuzzyMaxEditDistance(),
term.getFuzzyPrefixLength(),
isCased());
+ using FMA = vespalib::FuzzyMatchingAlgorithm;
+ using LDT = vespalib::fuzzy::LevenshteinDfa::DfaType;
+ if ((fuzzy_matching_algorithm != FMA::BruteForce) &&
+ (term.getFuzzyMaxEditDistance() <= 2)) {
+ _dfa_fuzzy_matcher = std::make_unique<DfaFuzzyMatcher>(term.getTerm(),
+ term.getFuzzyMaxEditDistance(),
+ term.getFuzzyPrefixLength(),
+ isCased(),
+ (fuzzy_matching_algorithm == FMA::DfaImplicit) ? LDT::Implicit : LDT::Explicit);
+ }
} else if (isCased()) {
_term = term.getTerm();
_termLen = strlen(_term);
@@ -48,7 +59,7 @@ StringSearchHelper::isMatch(const char *src) const noexcept {
return getRegex().valid() && getRegex().partial_match(std::string_view(src));
}
if (__builtin_expect(isFuzzy(), false)) {
- return getFuzzyMatcher().isMatch(src);
+ return _dfa_fuzzy_matcher ? _dfa_fuzzy_matcher->is_match(src) : getFuzzyMatcher().isMatch(src);
}
if (__builtin_expect(isCased(), false)) {
int res = strncmp(_term, src, _termLen);
@@ -67,4 +78,27 @@ StringSearchHelper::isMatch(const char *src) const noexcept {
return (_ucs4[j] == 0 && (val == 0 || isPrefix()));
}
+template <typename DictionaryConstIteratorType>
+bool
+StringSearchHelper::is_fuzzy_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store) const
+{
+ if (_dfa_fuzzy_matcher) {
+ return _dfa_fuzzy_matcher->is_match(word, itr, data_store);
+ } else {
+ if (_fuzzyMatcher->isMatch(word)) {
+ return true;
+ }
+ ++itr;
+ return false;
+ }
+}
+
+template
+bool
+StringSearchHelper::is_fuzzy_match(const char*, EnumPostingTree::ConstIterator&, const DfaStringComparator::DataStoreType&) const;
+
+template
+bool
+StringSearchHelper::is_fuzzy_match(const char*, EnumTree::ConstIterator&, const DfaStringComparator::DataStoreType&) const;
+
}