aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
diff options
context:
space:
mode:
authorbjormel <bjormel@yahooinc.com>2023-10-01 12:23:12 +0000
committerbjormel <bjormel@yahooinc.com>2023-10-01 12:23:12 +0000
commite9058b555d4dfea2f6c872d9a677e8678b569569 (patch)
treefa1b67c6e39712c1e0d9f308b0dd55573b43f913 /searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
parent0ad931fa86658904fe9212b014d810236b0e00e4 (diff)
parent16030193ec04ee41e98779a3d7ee6a6c1d0d0d6f (diff)
Merge branch 'master' into bjormel/aws-main-controller
Diffstat (limited to 'searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h48
1 files changed, 40 insertions, 8 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
index fcba13f85a4..8e5b3ce0ccd 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
@@ -5,6 +5,7 @@
#include "dfa_string_comparator.h"
#include <vespa/vespalib/datastore/atomic_entry_ref.h>
#include <vespa/vespalib/fuzzy/levenshtein_dfa.h>
+#include <iostream>
namespace search::attribute {
@@ -17,22 +18,53 @@ namespace search::attribute {
class DfaFuzzyMatcher {
private:
vespalib::fuzzy::LevenshteinDfa _dfa;
- std::vector<uint32_t> _successor;
+ std::vector<uint32_t> _successor;
+ std::vector<uint32_t> _prefix;
+ uint32_t _prefix_size;
+ bool _cased;
+ const char* skip_prefix(const char* word) const;
public:
- DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type);
+ DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type);
~DfaFuzzyMatcher();
+ bool is_match(const char *word) const;
+
+ /*
+ * If prefix size is nonzero then this variant of is_match()
+ * should only be called with words that starts with the extracted
+ * prefix of the target word.
+ *
+ * Caller must position iterator at right location using lower bound
+ * functionality in the dictionary.
+ */
template <typename DictionaryConstIteratorType>
bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store) {
- auto match = _dfa.match(word, _successor);
- if (match.matches()) {
- return true;
+ if (_prefix_size > 0) {
+ word = skip_prefix(word);
+ if (_prefix.size() < _prefix_size) {
+ if (*word == '\0') {
+ return true;
+ }
+ _successor.resize(_prefix.size());
+ _successor.emplace_back(1);
+ } else {
+ _successor.resize(_prefix.size());
+ auto match = _dfa.match(word, _successor);
+ if (match.matches()) {
+ return true;
+ }
+ }
} else {
- DfaStringComparator cmp(data_store, _successor);
- itr.seek(vespalib::datastore::AtomicEntryRef(), cmp);
- return false;
+ _successor.clear();
+ auto match = _dfa.match(word, _successor);
+ if (match.matches()) {
+ return true;
+ }
}
+ DfaStringComparator cmp(data_store, _successor);
+ itr.seek(vespalib::datastore::AtomicEntryRef(), cmp);
+ return false;
}
};