// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once #include "dfa_string_comparator.h" #include #include namespace search::attribute { /** * Class that uses a LevenshteinDfa to fuzzy match a target word against words in a dictionary. * * The dictionary iterator is advanced based on the successor string from the DFA * each time the candidate word is _not_ a match. */ class DfaFuzzyMatcher { private: vespalib::fuzzy::LevenshteinDfa _dfa; std::vector _successor; std::vector _prefix; uint32_t _prefix_size; bool _cased; static constexpr uint32_t beyond_unicode = 0x110000; const char* skip_prefix(const char* word) const; public: DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, bool prefix_match, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type); // Defaults to table-based DFA: DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, bool prefix_match); ~DfaFuzzyMatcher(); [[nodiscard]] static constexpr bool supports_max_edits(uint8_t edits) noexcept { return (edits == 1 || edits == 2); } [[nodiscard]] bool is_match(std::string_view word) const; /* * If prefix size is nonzero then this variant of is_match() * should only be called with words that starts with the extracted * prefix of the target word. * * Caller must position iterator at right location using lower bound * functionality in the dictionary. */ template [[nodiscard]] bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store); }; }