diff options
4 files changed, 18 insertions, 7 deletions
diff --git a/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp b/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp index 433ad9e7671..8ba8c62c5ff 100644 --- a/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp +++ b/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp @@ -197,7 +197,7 @@ dfa_fuzzy_match_in_dictionary_no_skip(std::string_view target, const StringEnumS size_t seeks = 0; for (;itr.valid(); ++itr) { auto word = store.get_value(itr.getKey().load_relaxed()); - if (matcher.is_match(word)) { + if (matcher.is_match(std::string_view(word))) { ++matches; if (collect_matches) { matched_words.push_back(word); diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp index 9d6e2e9815d..5f3ab9cd3d8 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp +++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp @@ -54,6 +54,11 @@ DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uin _successor = _prefix; } +DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased) + : DfaFuzzyMatcher(target, max_edits, prefix_size, cased, LevenshteinDfa::DfaType::Table) +{ +} + DfaFuzzyMatcher::~DfaFuzzyMatcher() = default; const char* @@ -69,10 +74,10 @@ DfaFuzzyMatcher::skip_prefix(const char* word) const } bool -DfaFuzzyMatcher::is_match(const char* word) const +DfaFuzzyMatcher::is_match(std::string_view word) const { if (_prefix_size > 0) { - Utf8ReaderForZTS reader(word); + Utf8Reader reader(word.data(), word.size()); size_t pos = 0; for (; pos < _prefix.size() && reader.hasMore(); ++pos) { uint32_t code_point = reader.getChar(); @@ -89,7 +94,7 @@ DfaFuzzyMatcher::is_match(const char* word) const if (pos != _prefix_size) { return false; } - word = reader.get_current_ptr(); + word = word.substr(reader.getPos()); } auto match = _dfa.match(word); return match.matches(); diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h index 51457129637..653af602c0d 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h +++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h @@ -27,9 +27,14 @@ private: const char* skip_prefix(const char* word) const; public: DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type); + DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased); // Defaults to table-based DFA ~DfaFuzzyMatcher(); - bool is_match(const char *word) const; + [[nodiscard]] static constexpr bool supports_max_edits(uint8_t edits) noexcept { + return (edits == 1 || edits == 2); + } + + [[nodiscard]] bool is_match(std::string_view word) const; /* * If prefix size is nonzero then this variant of is_match() @@ -40,7 +45,7 @@ public: * functionality in the dictionary. */ template <typename DictionaryConstIteratorType> - bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store); + [[nodiscard]] bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store); }; } diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp index 75885aa0402..f1a643dc376 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp @@ -80,7 +80,8 @@ StringSearchHelper::isMatch(const char *src) const noexcept { return getRegex().valid() && getRegex().partial_match(std::string_view(src)); } if (__builtin_expect(isFuzzy(), false)) { - return _dfa_fuzzy_matcher ? _dfa_fuzzy_matcher->is_match(src) : getFuzzyMatcher().isMatch(src); + return _dfa_fuzzy_matcher ? _dfa_fuzzy_matcher->is_match(std::string_view(src)) + : getFuzzyMatcher().isMatch(std::string_view(src)); } if (__builtin_expect(isCased(), false)) { int res = strncmp(_term, src, _termLen); |