summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp39
-rw-r--r--vespalib/src/tests/fuzzy/levenshtein_distance_test.cpp6
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.cpp6
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.h3
4 files changed, 41 insertions, 13 deletions
diff --git a/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp b/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp
index d94120e5bcf..9e4550db073 100644
--- a/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp
+++ b/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp
@@ -33,7 +33,7 @@ TEST(FuzzyMatcherTest, get_suffix_edge_cases) {
}
TEST(FuzzyMatcherTest, fuzzy_match_empty_prefix) {
- FuzzyMatcher fuzzy("abc", 2, 0, false);
+ FuzzyMatcher fuzzy("abc", 2, 0, false, false);
EXPECT_TRUE(fuzzy.isMatch("abc"));
EXPECT_TRUE(fuzzy.isMatch("ABC"));
EXPECT_TRUE(fuzzy.isMatch("ab1"));
@@ -42,15 +42,15 @@ TEST(FuzzyMatcherTest, fuzzy_match_empty_prefix) {
}
TEST(FuzzyMatcherTest, fuzzy_match_cased) {
- FuzzyMatcher fuzzy("abc", 2, 0, true);
+ FuzzyMatcher fuzzy("abc", 2, 0, true, false);
EXPECT_TRUE(fuzzy.isMatch("abc"));
EXPECT_TRUE(fuzzy.isMatch("abC"));
EXPECT_TRUE(fuzzy.isMatch("aBC"));
EXPECT_FALSE(fuzzy.isMatch("ABC"));
}
-TEST(FuzzyMatcherTest, fuzzy_match_with_prefix) {
- FuzzyMatcher fuzzy("abcdef", 2, 2, false);
+TEST(FuzzyMatcherTest, fuzzy_match_with_prefix_locking) {
+ FuzzyMatcher fuzzy("abcdef", 2, 2, false, false);
EXPECT_TRUE(fuzzy.isMatch("abcdef"));
EXPECT_TRUE(fuzzy.isMatch("ABCDEF"));
EXPECT_TRUE(fuzzy.isMatch("abcde1"));
@@ -59,22 +59,43 @@ TEST(FuzzyMatcherTest, fuzzy_match_with_prefix) {
EXPECT_FALSE(fuzzy.isMatch("12cdef"));
}
-TEST(FuzzyMatcherTest, get_prefix_is_empty) {
- FuzzyMatcher fuzzy("whatever", 2, 0, false);
+TEST(FuzzyMatcherTest, get_prefix_lock_length_is_zero) {
+ FuzzyMatcher fuzzy("whatever", 2, 0, false, false);
EXPECT_EQ(fuzzy.getPrefix(), "");
}
TEST(FuzzyMatcherTest, term_is_empty) {
- FuzzyMatcher fuzzy("", 2, 0, false);
+ FuzzyMatcher fuzzy("", 2, 0, false, false);
EXPECT_TRUE(fuzzy.isMatch(""));
EXPECT_TRUE(fuzzy.isMatch("a"));
EXPECT_TRUE(fuzzy.isMatch("aa"));
EXPECT_FALSE(fuzzy.isMatch("aaa"));
}
-TEST(FuzzyMatcherTest, get_prefix_non_empty) {
- FuzzyMatcher fuzzy("abcd", 2, 2, false);
+TEST(FuzzyMatcherTest, get_prefix_lock_length_non_zero) {
+ FuzzyMatcher fuzzy("abcd", 2, 2, false, false);
EXPECT_EQ(fuzzy.getPrefix(), "ab");
}
+TEST(FuzzyMatcherTest, fuzzy_prefix_matching_without_prefix_lock_length) {
+ FuzzyMatcher fuzzy("abc", 1, 0, false, true);
+ EXPECT_EQ(fuzzy.getPrefix(), "");
+ EXPECT_TRUE(fuzzy.isMatch("abc"));
+ EXPECT_TRUE(fuzzy.isMatch("abcdefgh"));
+ EXPECT_TRUE(fuzzy.isMatch("ab"));
+ EXPECT_TRUE(fuzzy.isMatch("abd"));
+ EXPECT_TRUE(fuzzy.isMatch("xabc"));
+ EXPECT_FALSE(fuzzy.isMatch("xy"));
+}
+
+TEST(FuzzyMatcherTest, fuzzy_prefix_matching_with_prefix_lock_length) {
+ FuzzyMatcher fuzzy("zoid", 1, 2, false, true);
+ EXPECT_EQ(fuzzy.getPrefix(), "zo");
+ EXPECT_TRUE(fuzzy.isMatch("zoidberg"));
+ EXPECT_TRUE(fuzzy.isMatch("zold"));
+ EXPECT_TRUE(fuzzy.isMatch("zoldberg"));
+ EXPECT_FALSE(fuzzy.isMatch("zoxx"));
+ EXPECT_FALSE(fuzzy.isMatch("loid"));
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/tests/fuzzy/levenshtein_distance_test.cpp b/vespalib/src/tests/fuzzy/levenshtein_distance_test.cpp
index 918cbc624db..61824fa4abf 100644
--- a/vespalib/src/tests/fuzzy/levenshtein_distance_test.cpp
+++ b/vespalib/src/tests/fuzzy/levenshtein_distance_test.cpp
@@ -74,7 +74,11 @@ TEST(LevenshteinDistance, prefix_match_edge_cases) {
EXPECT_EQ(prefix_calculate("xy", "", 2), std::optional{2});
EXPECT_EQ(prefix_calculate("xyz", "", 2), std::nullopt);
- // Max edits > 2 cases; not supported by DFA implementation.
+ // Max edits not in {1, 2} cases; not supported by DFA implementation.
+ EXPECT_EQ(prefix_calculate("", "", 0), std::optional{0});
+ EXPECT_EQ(prefix_calculate("abc", "abc", 0), std::optional{0});
+ EXPECT_EQ(prefix_calculate("abc", "abcde", 0), std::optional{0});
+ EXPECT_EQ(prefix_calculate("abc", "dbc", 0), std::nullopt);
EXPECT_EQ(prefix_calculate("abc", "", 3), std::optional{3});
EXPECT_EQ(prefix_calculate("abc", "xy", 3), std::optional{3});
EXPECT_EQ(prefix_calculate("abc", "xyz", 3), std::optional{3});
diff --git a/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.cpp b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.cpp
index 71388f43b89..fbfa50aa57f 100644
--- a/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.cpp
+++ b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.cpp
@@ -29,10 +29,12 @@ vespalib::FuzzyMatcher::FuzzyMatcher():
_folded_term_codepoints_suffix()
{}
-vespalib::FuzzyMatcher::FuzzyMatcher(std::string_view term, uint32_t max_edit_distance, uint32_t prefix_size, bool is_cased):
+vespalib::FuzzyMatcher::FuzzyMatcher(std::string_view term, uint32_t max_edit_distance, uint32_t prefix_size,
+ bool is_cased, bool is_prefix):
_max_edit_distance(max_edit_distance),
_prefix_size(prefix_size),
_is_cased(is_cased),
+ _is_prefix(is_prefix),
_folded_term_codepoints(_is_cased ? cased_convert_to_ucs4(term) : LowerCase::convert_to_ucs4(term)),
_folded_term_codepoints_prefix(get_prefix(_folded_term_codepoints, _prefix_size)),
_folded_term_codepoints_suffix(get_suffix(_folded_term_codepoints, _prefix_size))
@@ -73,7 +75,7 @@ bool vespalib::FuzzyMatcher::isMatch(std::string_view target) const {
return LevenshteinDistance::calculate(
_folded_term_codepoints_suffix,
get_suffix(targetCodepoints, _prefix_size),
- _max_edit_distance).has_value();
+ _max_edit_distance, _is_prefix).has_value();
}
vespalib::string vespalib::FuzzyMatcher::getPrefix() const {
diff --git a/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.h b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.h
index aae6ca1c6e5..2c17283d20a 100644
--- a/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.h
+++ b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.h
@@ -24,6 +24,7 @@ private:
uint32_t _max_edit_distance; // max edit distance
uint32_t _prefix_size; // prefix of a term that is considered frozen, i.e. non-fuzzy
bool _is_cased;
+ bool _is_prefix;
std::vector<uint32_t> _folded_term_codepoints;
@@ -34,7 +35,7 @@ public:
FuzzyMatcher();
FuzzyMatcher(const FuzzyMatcher &) = delete;
FuzzyMatcher & operator = (const FuzzyMatcher &) = delete;
- FuzzyMatcher(std::string_view term, uint32_t max_edit_distance, uint32_t prefix_size, bool is_cased);
+ FuzzyMatcher(std::string_view term, uint32_t max_edit_distance, uint32_t prefix_size, bool is_cased, bool is_prefix);
~FuzzyMatcher();
[[nodiscard]] bool isMatch(std::string_view target) const;