aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h48
1 files changed, 40 insertions, 8 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
index fcba13f85a4..8e5b3ce0ccd 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
@@ -5,6 +5,7 @@
#include "dfa_string_comparator.h"
#include <vespa/vespalib/datastore/atomic_entry_ref.h>
#include <vespa/vespalib/fuzzy/levenshtein_dfa.h>
+#include <iostream>
namespace search::attribute {
@@ -17,22 +18,53 @@ namespace search::attribute {
class DfaFuzzyMatcher {
private:
vespalib::fuzzy::LevenshteinDfa _dfa;
- std::vector<uint32_t> _successor;
+ std::vector<uint32_t> _successor;
+ std::vector<uint32_t> _prefix;
+ uint32_t _prefix_size;
+ bool _cased;
+ const char* skip_prefix(const char* word) const;
public:
- DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type);
+ DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type);
~DfaFuzzyMatcher();
+ bool is_match(const char *word) const;
+
+ /*
+ * If prefix size is nonzero then this variant of is_match()
+ * should only be called with words that starts with the extracted
+ * prefix of the target word.
+ *
+ * Caller must position iterator at right location using lower bound
+ * functionality in the dictionary.
+ */
template <typename DictionaryConstIteratorType>
bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store) {
- auto match = _dfa.match(word, _successor);
- if (match.matches()) {
- return true;
+ if (_prefix_size > 0) {
+ word = skip_prefix(word);
+ if (_prefix.size() < _prefix_size) {
+ if (*word == '\0') {
+ return true;
+ }
+ _successor.resize(_prefix.size());
+ _successor.emplace_back(1);
+ } else {
+ _successor.resize(_prefix.size());
+ auto match = _dfa.match(word, _successor);
+ if (match.matches()) {
+ return true;
+ }
+ }
} else {
- DfaStringComparator cmp(data_store, _successor);
- itr.seek(vespalib::datastore::AtomicEntryRef(), cmp);
- return false;
+ _successor.clear();
+ auto match = _dfa.match(word, _successor);
+ if (match.matches()) {
+ return true;
+ }
}
+ DfaStringComparator cmp(data_store, _successor);
+ itr.seek(vespalib::datastore::AtomicEntryRef(), cmp);
+ return false;
}
};