summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-11-23 10:42:17 +0100
committerGitHub <noreply@github.com>2023-11-23 10:42:17 +0100
commitafde670ae04cd7e4645a180816a61c697d29698c (patch)
treee6c662f25a00338c6ec271736790faff4a121d55 /searchlib
parent44cbb7e7012614d09597cd30f78e561250f3ef92 (diff)
parent66eaacb62cf323321b336dcb3619f990f9a89863 (diff)
Merge pull request #29425 from vespa-engine/toregge/check-for-valid-successor
Check for valid successor.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp40
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h31
2 files changed, 43 insertions, 28 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
index 040bd9ccc98..9d6e2e9815d 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "dfa_fuzzy_matcher.h"
+#include "i_enum_store_dictionary.h"
#include <vespa/vespalib/text/utf8.h>
#include <vespa/vespalib/text/lowercase.h>
@@ -94,4 +95,43 @@ DfaFuzzyMatcher::is_match(const char* word) const
return match.matches();
}
+template <typename DictionaryConstIteratorType>
+bool
+DfaFuzzyMatcher::is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store) {
+ if (_prefix_size > 0) {
+ word = skip_prefix(word);
+ if (_prefix.size() < _prefix_size) {
+ if (*word == '\0') {
+ return true;
+ }
+ _successor.resize(_prefix.size());
+ _successor.emplace_back(beyond_unicode);
+ } else {
+ _successor.resize(_prefix.size());
+ auto match = _dfa.match(word, _successor);
+ if (match.matches()) {
+ return true;
+ }
+ }
+ } else {
+ _successor.clear();
+ auto match = _dfa.match(word, _successor);
+ if (match.matches()) {
+ return true;
+ }
+ }
+ DfaStringComparator cmp(data_store, _successor, _cased);
+ assert(cmp.less(itr.getKey().load_acquire(), vespalib::datastore::EntryRef()));
+ itr.seek(vespalib::datastore::AtomicEntryRef(), cmp);
+ return false;
+}
+
+template
+bool
+DfaFuzzyMatcher::is_match(const char* word, EnumPostingTree::ConstIterator& itr, const DfaStringComparator::DataStoreType& data_store);
+
+template
+bool
+DfaFuzzyMatcher::is_match(const char* word, EnumTree::ConstIterator& itr, const DfaStringComparator::DataStoreType& data_store);
+
}
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
index 9de77085fae..51457129637 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
@@ -5,7 +5,6 @@
#include "dfa_string_comparator.h"
#include <vespa/vespalib/datastore/atomic_entry_ref.h>
#include <vespa/vespalib/fuzzy/levenshtein_dfa.h>
-#include <iostream>
namespace search::attribute {
@@ -23,6 +22,8 @@ private:
uint32_t _prefix_size;
bool _cased;
+ static constexpr uint32_t beyond_unicode = 0x110000;
+
const char* skip_prefix(const char* word) const;
public:
DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type);
@@ -39,33 +40,7 @@ public:
* functionality in the dictionary.
*/
template <typename DictionaryConstIteratorType>
- bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store) {
- if (_prefix_size > 0) {
- word = skip_prefix(word);
- if (_prefix.size() < _prefix_size) {
- if (*word == '\0') {
- return true;
- }
- _successor.resize(_prefix.size());
- _successor.emplace_back(1);
- } else {
- _successor.resize(_prefix.size());
- auto match = _dfa.match(word, _successor);
- if (match.matches()) {
- return true;
- }
- }
- } else {
- _successor.clear();
- auto match = _dfa.match(word, _successor);
- if (match.matches()) {
- return true;
- }
- }
- DfaStringComparator cmp(data_store, _successor, _cased);
- itr.seek(vespalib::datastore::AtomicEntryRef(), cmp);
- return false;
- }
+ bool is_match(const char* word, DictionaryConstIteratorType& itr, const DfaStringComparator::DataStoreType& data_store);
};
}