summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <tegge@vespa.ai>2024-01-18 12:57:17 +0100
committerGitHub <noreply@github.com>2024-01-18 12:57:17 +0100
commit1f65f322a8d0f67e83129888b873a241c243a851 (patch)
treee0fb32aee4627d44823a756123caf63c70db03d0 /searchlib
parent5f5066055c2fbdf57f3126f2c39a62ac02f01f10 (diff)
parenta8476292a061bf609e24e2731202b634bba46b95 (diff)
Merge pull request #29964 from vespa-engine/vekterli/use-string-view-for-non-dict-match
Use `string_view` for standalone DFA fuzzy match function
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp3
4 files changed, 18 insertions, 7 deletions
diff --git a/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp b/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp
index 433ad9e7671..8ba8c62c5ff 100644
--- a/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp
+++ b/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp
@@ -197,7 +197,7 @@ dfa_fuzzy_match_in_dictionary_no_skip(std::string_view target, const StringEnumS
size_t seeks = 0;
for (;itr.valid(); ++itr) {
auto word = store.get_value(itr.getKey().load_relaxed());
- if (matcher.is_match(word)) {
+ if (matcher.is_match(std::string_view(word))) {
++matches;
if (collect_matches) {
matched_words.push_back(word);
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
index 9d6e2e9815d..5f3ab9cd3d8 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
@@ -54,6 +54,11 @@ DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uin
_successor = _prefix;
}
+DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased)
+ : DfaFuzzyMatcher(target, max_edits, prefix_size, cased, LevenshteinDfa::DfaType::Table)
+{
+}
+
DfaFuzzyMatcher::~DfaFuzzyMatcher() = default;
const char*
@@ -69,10 +74,10 @@ DfaFuzzyMatcher::skip_prefix(const char* word) const
}
bool
-DfaFuzzyMatcher::is_match(const char* word) const
+DfaFuzzyMatcher::is_match(std::string_view word) const
{
if (_prefix_size > 0) {
- Utf8ReaderForZTS reader(word);
+ Utf8Reader reader(word.data(), word.size());
size_t pos = 0;
for (; pos < _prefix.size() && reader.hasMore(); ++pos) {
uint32_t code_point = reader.getChar();
@@ -89,7 +94,7 @@ DfaFuzzyMatcher::is_match(const char* word) const
if (pos != _prefix_size) {
return false;
}
- word = reader.get_current_ptr();
+ word = word.substr(reader.getPos());
}
auto match = _dfa.match(word);
return match.matches();
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
index 51457129637..653af602c0d 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
@@ -27,9 +27,14 @@ private:
const char* skip_prefix(const char* word) const;
public:
DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type);
+ DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased); // Defaults to table-based DFA
~DfaFuzzyMatcher();
- bool is_match(const char *word) const;
+ [[nodiscard]] static constexpr bool supports_max_edits(uint8_t edits) noexcept {
+ return (edits == 1 || edits == 2);
+ }
+
+ [[nodiscard]] bool is_match(std::string_view word) const;
/*
* If prefix size is nonzero then this variant of is_match()
@@ -40,7 +45,7 @@ public:
* functionality in the dictionary.
*/
template <typename DictionaryConstIteratorType>
- bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store);
+ [[nodiscard]] bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store);
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
index 75885aa0402..f1a643dc376 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
@@ -80,7 +80,8 @@ StringSearchHelper::isMatch(const char *src) const noexcept {
return getRegex().valid() && getRegex().partial_match(std::string_view(src));
}
if (__builtin_expect(isFuzzy(), false)) {
- return _dfa_fuzzy_matcher ? _dfa_fuzzy_matcher->is_match(src) : getFuzzyMatcher().isMatch(src);
+ return _dfa_fuzzy_matcher ? _dfa_fuzzy_matcher->is_match(std::string_view(src))
+ : getFuzzyMatcher().isMatch(std::string_view(src));
}
if (__builtin_expect(isCased(), false)) {
int res = strncmp(_term, src, _termLen);