diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-09-27 11:58:57 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-09-27 12:07:04 +0000 |
commit | be742a8039bb89378cb91e3a6c2b4535c0b36a1d (patch) | |
tree | aaa27be1378cf323bae3edd32633a752c2c55400 /searchlib | |
parent | 77c10925f7994e96e71f1762accdbfbc43473b99 (diff) |
Preserve prefix of input DFA successor string
If a non-empty string is passed as a successor to the DFA,
the contents of the string will be preserved, i.e. the successor
will always be _appended_ to any existing data. This allows
for less manual fiddling when implementing prefix locking by the
caller (no need to concatenate a prefix with the generated successor
string).
Note: this has some added cognitive cost where the caller now has
the entire responsibility of resetting the successor between calls.
The existing fuzzy matcher has been updated to no longer require
a separation between successor prefix and suffix; it can now
safely reuse the successor prefix between calls.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp | 1 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h | 7 |
2 files changed, 3 insertions, 5 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp index b16fdc12a9a..18f480eebcd 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp +++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp @@ -47,7 +47,6 @@ DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uin : _dfa(vespalib::fuzzy::LevenshteinDfa::build(extract_suffix(target, prefix_size), max_edits, (cased ? LevenshteinDfa::Casing::Cased : LevenshteinDfa::Casing::Uncased), dfa_type)), _successor(), _prefix(extract_prefix(target, prefix_size, cased)), - _successor_suffix(), _prefix_size(prefix_size), _cased(cased) { diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h index 7116b4d8662..8e5b3ce0ccd 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h +++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h @@ -20,7 +20,6 @@ private: vespalib::fuzzy::LevenshteinDfa _dfa; std::vector<uint32_t> _successor; std::vector<uint32_t> _prefix; - std::vector<uint32_t> _successor_suffix; uint32_t _prefix_size; bool _cased; @@ -50,14 +49,14 @@ public: _successor.resize(_prefix.size()); _successor.emplace_back(1); } else { - auto match = _dfa.match(word, _successor_suffix); + _successor.resize(_prefix.size()); + auto match = _dfa.match(word, _successor); if (match.matches()) { return true; } - _successor.resize(_prefix.size()); - _successor.insert(_successor.end(), _successor_suffix.begin(), _successor_suffix.end()); } } else { + _successor.clear(); auto match = _dfa.match(word, _successor); if (match.matches()) { return true; |