summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2023-09-27 11:58:57 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2023-09-27 12:07:04 +0000
commitbe742a8039bb89378cb91e3a6c2b4535c0b36a1d (patch)
treeaaa27be1378cf323bae3edd32633a752c2c55400 /searchlib
parent77c10925f7994e96e71f1762accdbfbc43473b99 (diff)
Preserve prefix of input DFA successor string
If a non-empty string is passed as a successor to the DFA, the contents of the string will be preserved, i.e. the successor will always be _appended_ to any existing data. This allows for less manual fiddling when implementing prefix locking by the caller (no need to concatenate a prefix with the generated successor string). Note: this has some added cognitive cost where the caller now has the entire responsibility of resetting the successor between calls. The existing fuzzy matcher has been updated to no longer require a separation between successor prefix and suffix; it can now safely reuse the successor prefix between calls.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h7
2 files changed, 3 insertions, 5 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
index b16fdc12a9a..18f480eebcd 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
@@ -47,7 +47,6 @@ DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uin
: _dfa(vespalib::fuzzy::LevenshteinDfa::build(extract_suffix(target, prefix_size), max_edits, (cased ? LevenshteinDfa::Casing::Cased : LevenshteinDfa::Casing::Uncased), dfa_type)),
_successor(),
_prefix(extract_prefix(target, prefix_size, cased)),
- _successor_suffix(),
_prefix_size(prefix_size),
_cased(cased)
{
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
index 7116b4d8662..8e5b3ce0ccd 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
@@ -20,7 +20,6 @@ private:
vespalib::fuzzy::LevenshteinDfa _dfa;
std::vector<uint32_t> _successor;
std::vector<uint32_t> _prefix;
- std::vector<uint32_t> _successor_suffix;
uint32_t _prefix_size;
bool _cased;
@@ -50,14 +49,14 @@ public:
_successor.resize(_prefix.size());
_successor.emplace_back(1);
} else {
- auto match = _dfa.match(word, _successor_suffix);
+ _successor.resize(_prefix.size());
+ auto match = _dfa.match(word, _successor);
if (match.matches()) {
return true;
}
- _successor.resize(_prefix.size());
- _successor.insert(_successor.end(), _successor_suffix.begin(), _successor_suffix.end());
}
} else {
+ _successor.clear();
auto match = _dfa.match(word, _successor);
if (match.matches()) {
return true;