diff options
Diffstat (limited to 'searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h')
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h | 48 |
1 files changed, 40 insertions, 8 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h index fcba13f85a4..8e5b3ce0ccd 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h +++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h @@ -5,6 +5,7 @@ #include "dfa_string_comparator.h" #include <vespa/vespalib/datastore/atomic_entry_ref.h> #include <vespa/vespalib/fuzzy/levenshtein_dfa.h> +#include <iostream> namespace search::attribute { @@ -17,22 +18,53 @@ namespace search::attribute { class DfaFuzzyMatcher { private: vespalib::fuzzy::LevenshteinDfa _dfa; - std::vector<uint32_t> _successor; + std::vector<uint32_t> _successor; + std::vector<uint32_t> _prefix; + uint32_t _prefix_size; + bool _cased; + const char* skip_prefix(const char* word) const; public: - DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type); + DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type); ~DfaFuzzyMatcher(); + bool is_match(const char *word) const; + + /* + * If prefix size is nonzero then this variant of is_match() + * should only be called with words that starts with the extracted + * prefix of the target word. + * + * Caller must position iterator at right location using lower bound + * functionality in the dictionary. + */ template <typename DictionaryConstIteratorType> bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store) { - auto match = _dfa.match(word, _successor); - if (match.matches()) { - return true; + if (_prefix_size > 0) { + word = skip_prefix(word); + if (_prefix.size() < _prefix_size) { + if (*word == '\0') { + return true; + } + _successor.resize(_prefix.size()); + _successor.emplace_back(1); + } else { + _successor.resize(_prefix.size()); + auto match = _dfa.match(word, _successor); + if (match.matches()) { + return true; + } + } } else { - DfaStringComparator cmp(data_store, _successor); - itr.seek(vespalib::datastore::AtomicEntryRef(), cmp); - return false; + _successor.clear(); + auto match = _dfa.match(word, _successor); + if (match.matches()) { + return true; + } } + DfaStringComparator cmp(data_store, _successor); + itr.seek(vespalib::datastore::AtomicEntryRef(), cmp); + return false; } }; |