diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-11-23 10:42:17 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-23 10:42:17 +0100 |
commit | afde670ae04cd7e4645a180816a61c697d29698c (patch) | |
tree | e6c662f25a00338c6ec271736790faff4a121d55 /searchlib | |
parent | 44cbb7e7012614d09597cd30f78e561250f3ef92 (diff) | |
parent | 66eaacb62cf323321b336dcb3619f990f9a89863 (diff) |
Merge pull request #29425 from vespa-engine/toregge/check-for-valid-successor
Check for valid successor.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp | 40 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h | 31 |
2 files changed, 43 insertions, 28 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp index 040bd9ccc98..9d6e2e9815d 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp +++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "dfa_fuzzy_matcher.h" +#include "i_enum_store_dictionary.h" #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> @@ -94,4 +95,43 @@ DfaFuzzyMatcher::is_match(const char* word) const return match.matches(); } +template <typename DictionaryConstIteratorType> +bool +DfaFuzzyMatcher::is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store) { + if (_prefix_size > 0) { + word = skip_prefix(word); + if (_prefix.size() < _prefix_size) { + if (*word == '\0') { + return true; + } + _successor.resize(_prefix.size()); + _successor.emplace_back(beyond_unicode); + } else { + _successor.resize(_prefix.size()); + auto match = _dfa.match(word, _successor); + if (match.matches()) { + return true; + } + } + } else { + _successor.clear(); + auto match = _dfa.match(word, _successor); + if (match.matches()) { + return true; + } + } + DfaStringComparator cmp(data_store, _successor, _cased); + assert(cmp.less(itr.getKey().load_acquire(), vespalib::datastore::EntryRef())); + itr.seek(vespalib::datastore::AtomicEntryRef(), cmp); + return false; +} + +template +bool +DfaFuzzyMatcher::is_match(const char* word, EnumPostingTree::ConstIterator& itr, const DfaStringComparator::DataStoreType& data_store); + +template +bool +DfaFuzzyMatcher::is_match(const char* word, EnumTree::ConstIterator& itr, const DfaStringComparator::DataStoreType& data_store); + } diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h index 9de77085fae..51457129637 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h +++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h @@ -5,7 +5,6 @@ #include "dfa_string_comparator.h" #include <vespa/vespalib/datastore/atomic_entry_ref.h> #include <vespa/vespalib/fuzzy/levenshtein_dfa.h> -#include <iostream> namespace search::attribute { @@ -23,6 +22,8 @@ private: uint32_t _prefix_size; bool _cased; + static constexpr uint32_t beyond_unicode = 0x110000; + const char* skip_prefix(const char* word) const; public: DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type); @@ -39,33 +40,7 @@ public: * functionality in the dictionary. */ template <typename DictionaryConstIteratorType> - bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store) { - if (_prefix_size > 0) { - word = skip_prefix(word); - if (_prefix.size() < _prefix_size) { - if (*word == '\0') { - return true; - } - _successor.resize(_prefix.size()); - _successor.emplace_back(1); - } else { - _successor.resize(_prefix.size()); - auto match = _dfa.match(word, _successor); - if (match.matches()) { - return true; - } - } - } else { - _successor.clear(); - auto match = _dfa.match(word, _successor); - if (match.matches()) { - return true; - } - } - DfaStringComparator cmp(data_store, _successor, _cased); - itr.seek(vespalib::datastore::AtomicEntryRef(), cmp); - return false; - } + bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store); }; } |