diff options
Diffstat (limited to 'vespalib/src')
4 files changed, 41 insertions, 13 deletions
diff --git a/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp b/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp index d94120e5bcf..9e4550db073 100644 --- a/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp +++ b/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp @@ -33,7 +33,7 @@ TEST(FuzzyMatcherTest, get_suffix_edge_cases) { } TEST(FuzzyMatcherTest, fuzzy_match_empty_prefix) { - FuzzyMatcher fuzzy("abc", 2, 0, false); + FuzzyMatcher fuzzy("abc", 2, 0, false, false); EXPECT_TRUE(fuzzy.isMatch("abc")); EXPECT_TRUE(fuzzy.isMatch("ABC")); EXPECT_TRUE(fuzzy.isMatch("ab1")); @@ -42,15 +42,15 @@ TEST(FuzzyMatcherTest, fuzzy_match_empty_prefix) { } TEST(FuzzyMatcherTest, fuzzy_match_cased) { - FuzzyMatcher fuzzy("abc", 2, 0, true); + FuzzyMatcher fuzzy("abc", 2, 0, true, false); EXPECT_TRUE(fuzzy.isMatch("abc")); EXPECT_TRUE(fuzzy.isMatch("abC")); EXPECT_TRUE(fuzzy.isMatch("aBC")); EXPECT_FALSE(fuzzy.isMatch("ABC")); } -TEST(FuzzyMatcherTest, fuzzy_match_with_prefix) { - FuzzyMatcher fuzzy("abcdef", 2, 2, false); +TEST(FuzzyMatcherTest, fuzzy_match_with_prefix_locking) { + FuzzyMatcher fuzzy("abcdef", 2, 2, false, false); EXPECT_TRUE(fuzzy.isMatch("abcdef")); EXPECT_TRUE(fuzzy.isMatch("ABCDEF")); EXPECT_TRUE(fuzzy.isMatch("abcde1")); @@ -59,22 +59,43 @@ TEST(FuzzyMatcherTest, fuzzy_match_with_prefix) { EXPECT_FALSE(fuzzy.isMatch("12cdef")); } -TEST(FuzzyMatcherTest, get_prefix_is_empty) { - FuzzyMatcher fuzzy("whatever", 2, 0, false); +TEST(FuzzyMatcherTest, get_prefix_lock_length_is_zero) { + FuzzyMatcher fuzzy("whatever", 2, 0, false, false); EXPECT_EQ(fuzzy.getPrefix(), ""); } TEST(FuzzyMatcherTest, term_is_empty) { - FuzzyMatcher fuzzy("", 2, 0, false); + FuzzyMatcher fuzzy("", 2, 0, false, false); EXPECT_TRUE(fuzzy.isMatch("")); EXPECT_TRUE(fuzzy.isMatch("a")); EXPECT_TRUE(fuzzy.isMatch("aa")); EXPECT_FALSE(fuzzy.isMatch("aaa")); } -TEST(FuzzyMatcherTest, get_prefix_non_empty) { - FuzzyMatcher fuzzy("abcd", 2, 2, false); +TEST(FuzzyMatcherTest, get_prefix_lock_length_non_zero) { + FuzzyMatcher fuzzy("abcd", 2, 2, false, false); EXPECT_EQ(fuzzy.getPrefix(), "ab"); } +TEST(FuzzyMatcherTest, fuzzy_prefix_matching_without_prefix_lock_length) { + FuzzyMatcher fuzzy("abc", 1, 0, false, true); + EXPECT_EQ(fuzzy.getPrefix(), ""); + EXPECT_TRUE(fuzzy.isMatch("abc")); + EXPECT_TRUE(fuzzy.isMatch("abcdefgh")); + EXPECT_TRUE(fuzzy.isMatch("ab")); + EXPECT_TRUE(fuzzy.isMatch("abd")); + EXPECT_TRUE(fuzzy.isMatch("xabc")); + EXPECT_FALSE(fuzzy.isMatch("xy")); +} + +TEST(FuzzyMatcherTest, fuzzy_prefix_matching_with_prefix_lock_length) { + FuzzyMatcher fuzzy("zoid", 1, 2, false, true); + EXPECT_EQ(fuzzy.getPrefix(), "zo"); + EXPECT_TRUE(fuzzy.isMatch("zoidberg")); + EXPECT_TRUE(fuzzy.isMatch("zold")); + EXPECT_TRUE(fuzzy.isMatch("zoldberg")); + EXPECT_FALSE(fuzzy.isMatch("zoxx")); + EXPECT_FALSE(fuzzy.isMatch("loid")); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/tests/fuzzy/levenshtein_distance_test.cpp b/vespalib/src/tests/fuzzy/levenshtein_distance_test.cpp index 918cbc624db..61824fa4abf 100644 --- a/vespalib/src/tests/fuzzy/levenshtein_distance_test.cpp +++ b/vespalib/src/tests/fuzzy/levenshtein_distance_test.cpp @@ -74,7 +74,11 @@ TEST(LevenshteinDistance, prefix_match_edge_cases) { EXPECT_EQ(prefix_calculate("xy", "", 2), std::optional{2}); EXPECT_EQ(prefix_calculate("xyz", "", 2), std::nullopt); - // Max edits > 2 cases; not supported by DFA implementation. + // Max edits not in {1, 2} cases; not supported by DFA implementation. + EXPECT_EQ(prefix_calculate("", "", 0), std::optional{0}); + EXPECT_EQ(prefix_calculate("abc", "abc", 0), std::optional{0}); + EXPECT_EQ(prefix_calculate("abc", "abcde", 0), std::optional{0}); + EXPECT_EQ(prefix_calculate("abc", "dbc", 0), std::nullopt); EXPECT_EQ(prefix_calculate("abc", "", 3), std::optional{3}); EXPECT_EQ(prefix_calculate("abc", "xy", 3), std::optional{3}); EXPECT_EQ(prefix_calculate("abc", "xyz", 3), std::optional{3}); diff --git a/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.cpp b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.cpp index 71388f43b89..fbfa50aa57f 100644 --- a/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.cpp +++ b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.cpp @@ -29,10 +29,12 @@ vespalib::FuzzyMatcher::FuzzyMatcher(): _folded_term_codepoints_suffix() {} -vespalib::FuzzyMatcher::FuzzyMatcher(std::string_view term, uint32_t max_edit_distance, uint32_t prefix_size, bool is_cased): +vespalib::FuzzyMatcher::FuzzyMatcher(std::string_view term, uint32_t max_edit_distance, uint32_t prefix_size, + bool is_cased, bool is_prefix): _max_edit_distance(max_edit_distance), _prefix_size(prefix_size), _is_cased(is_cased), + _is_prefix(is_prefix), _folded_term_codepoints(_is_cased ? cased_convert_to_ucs4(term) : LowerCase::convert_to_ucs4(term)), _folded_term_codepoints_prefix(get_prefix(_folded_term_codepoints, _prefix_size)), _folded_term_codepoints_suffix(get_suffix(_folded_term_codepoints, _prefix_size)) @@ -73,7 +75,7 @@ bool vespalib::FuzzyMatcher::isMatch(std::string_view target) const { return LevenshteinDistance::calculate( _folded_term_codepoints_suffix, get_suffix(targetCodepoints, _prefix_size), - _max_edit_distance).has_value(); + _max_edit_distance, _is_prefix).has_value(); } vespalib::string vespalib::FuzzyMatcher::getPrefix() const { diff --git a/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.h b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.h index aae6ca1c6e5..2c17283d20a 100644 --- a/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.h +++ b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matcher.h @@ -24,6 +24,7 @@ private: uint32_t _max_edit_distance; // max edit distance uint32_t _prefix_size; // prefix of a term that is considered frozen, i.e. non-fuzzy bool _is_cased; + bool _is_prefix; std::vector<uint32_t> _folded_term_codepoints; @@ -34,7 +35,7 @@ public: FuzzyMatcher(); FuzzyMatcher(const FuzzyMatcher &) = delete; FuzzyMatcher & operator = (const FuzzyMatcher &) = delete; - FuzzyMatcher(std::string_view term, uint32_t max_edit_distance, uint32_t prefix_size, bool is_cased); + FuzzyMatcher(std::string_view term, uint32_t max_edit_distance, uint32_t prefix_size, bool is_cased, bool is_prefix); ~FuzzyMatcher(); [[nodiscard]] bool isMatch(std::string_view target) const; |