diff options
author | Alexey Chernyshev <aleksei@spotify.com> | 2022-03-22 16:47:14 +0100 |
---|---|---|
committer | Alexey Chernyshev <aleksei@spotify.com> | 2022-03-23 16:21:05 +0100 |
commit | 6bcdc1ac1c1c3ce8b30472926098df989b9f7019 (patch) | |
tree | 1bec251b1711a093a196c25be896c680282abd24 /vespalib/src/tests | |
parent | 32358e1689cded19a3c5d0213b0ef0c5329c1e33 (diff) |
Addressing more comments
Diffstat (limited to 'vespalib/src/tests')
-rw-r--r-- | vespalib/src/tests/fuzzy/CMakeLists.txt | 16 | ||||
-rw-r--r-- | vespalib/src/tests/fuzzy/fuzzy.cpp | 33 | ||||
-rw-r--r-- | vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp | 42 | ||||
-rw-r--r-- | vespalib/src/tests/fuzzy/levenstein_distance_test.cpp | 39 |
4 files changed, 94 insertions, 36 deletions
diff --git a/vespalib/src/tests/fuzzy/CMakeLists.txt b/vespalib/src/tests/fuzzy/CMakeLists.txt index f58602296a5..2a415a9ad62 100644 --- a/vespalib/src/tests/fuzzy/CMakeLists.txt +++ b/vespalib/src/tests/fuzzy/CMakeLists.txt @@ -1,8 +1,18 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(vespalib_fuzzy_test_app TEST +vespa_add_executable(vespalib_fuzzy_matcher_test_app TEST SOURCES - fuzzy.cpp + fuzzy_matcher_test.cpp DEPENDS vespalib + GTest::GTest ) -vespa_add_test(NAME vespalib_fuzzy_test_app COMMAND vespalib_fuzzy_test_app) +vespa_add_test(NAME vespalib_fuzzy_matcher_test_app COMMAND vespalib_fuzzy_matcher_test_app) + +vespa_add_executable(vespalib_levenstein_distance_test_app TEST + SOURCES + levenstein_distance_test.cpp + DEPENDS + vespalib + GTest::GTest + ) +vespa_add_test(NAME vespalib_levenstein_distance_test_app COMMAND vespalib_levenstein_distance_test_app)
\ No newline at end of file diff --git a/vespalib/src/tests/fuzzy/fuzzy.cpp b/vespalib/src/tests/fuzzy/fuzzy.cpp deleted file mode 100644 index 9ffb77b3742..00000000000 --- a/vespalib/src/tests/fuzzy/fuzzy.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/test_kit.h> -#include <vespa/vespalib/fuzzy/fuzzy.h> - -using namespace vespalib; - - -TEST("require that levenstein distance works") { - EXPECT_EQUAL(0u, Fuzzy::levenstein_distance("abc", "abc", 2).value()); - EXPECT_EQUAL(0u, Fuzzy::levenstein_distance("abc", "ABC", 2).value()); - EXPECT_EQUAL(1u, Fuzzy::levenstein_distance("abc", "abd", 2).value()); - EXPECT_EQUAL(1u, Fuzzy::levenstein_distance("ABC", "abd", 2).value()); - EXPECT_EQUAL(2u, Fuzzy::levenstein_distance("ABC", "add", 2).value()); - EXPECT_FALSE(Fuzzy::levenstein_distance("ABC", "ddd", 2).has_value()); -} - -TEST("require that extracting of a prefix works") { - Fuzzy fuzzy(Fuzzy::folded_codepoints("prefix"), 2, 2); - EXPECT_EQUAL("pr", fuzzy.getPrefix()); -} - -TEST("require that empty prefix works") { - Fuzzy fuzzy(Fuzzy::folded_codepoints("prefix"), 0, 2); - EXPECT_EQUAL("", fuzzy.getPrefix()); -} - -TEST("require that longer prefix size works") { - Fuzzy fuzzy(Fuzzy::folded_codepoints("prefix"), 100, 2); - EXPECT_EQUAL("prefix", fuzzy.getPrefix()); -} - - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp b/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp new file mode 100644 index 00000000000..60a4eab3f57 --- /dev/null +++ b/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp @@ -0,0 +1,42 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/fuzzy/fuzzy_matcher.h> +#include <vespa/vespalib/text/lowercase.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib; + +FuzzyMatcher from_term(std::string_view term, uint8_t threshold, uint8_t prefix_size) { + return {LowerCase::convert_to_ucs4(term), threshold, prefix_size}; +} + +TEST(FuzzyMatcherTest, fuzzy_match_empty_prefix) { + FuzzyMatcher fuzzy = from_term("abc", 2, 0); + EXPECT_TRUE(fuzzy.isMatch("abc")); + EXPECT_TRUE(fuzzy.isMatch("ABC")); + EXPECT_TRUE(fuzzy.isMatch("ab1")); + EXPECT_TRUE(fuzzy.isMatch("a12")); + EXPECT_FALSE(fuzzy.isMatch("123")); +} + +TEST(FuzzyMatcherTest, fuzzy_match_with_prefix) { + FuzzyMatcher fuzzy = from_term("abcdef", 2, 2); + EXPECT_TRUE(fuzzy.isMatch("abcdef")); + EXPECT_TRUE(fuzzy.isMatch("ABCDEF")); + EXPECT_TRUE(fuzzy.isMatch("abcde1")); + EXPECT_TRUE(fuzzy.isMatch("abcd12")); + EXPECT_FALSE(fuzzy.isMatch("abc123")); + EXPECT_TRUE(fuzzy.isMatch("12cdef")); // prefix match is not enforced +} + +TEST(FuzzyMatcherTest, get_prefix_is_empty) { + FuzzyMatcher fuzzy = from_term("whatever", 2, 0); + EXPECT_EQ(fuzzy.getPrefix(), ""); +} + +TEST(FuzzyMatcherTest, get_prefix_non_empty) { + FuzzyMatcher fuzzy = from_term("abcd", 2, 2); + EXPECT_EQ(fuzzy.getPrefix(), "ab"); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/tests/fuzzy/levenstein_distance_test.cpp b/vespalib/src/tests/fuzzy/levenstein_distance_test.cpp new file mode 100644 index 00000000000..efdcc82fce1 --- /dev/null +++ b/vespalib/src/tests/fuzzy/levenstein_distance_test.cpp @@ -0,0 +1,39 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/fuzzy/levenstein_distance.h> +#include <vespa/vespalib/text/lowercase.h> +#include <vespa/vespalib/gtest/gtest.h> + +std::optional<uint32_t> calculate(std::string_view left, std::string_view right, uint32_t threshold) { + std::vector<uint32_t> leftCodepoints = vespalib::LowerCase::convert_to_ucs4(left); + std::vector<uint32_t> rightCodepoints = vespalib::LowerCase::convert_to_ucs4(right); + + std::optional<uint32_t> leftRight = vespalib::LevensteinDistance::calculate(leftCodepoints,rightCodepoints, threshold); + std::optional<uint32_t> rightLeft = vespalib::LevensteinDistance::calculate(rightCodepoints,leftCodepoints, threshold); + + EXPECT_EQ(leftRight, rightLeft); // should be independent whether left or right strings are swapped + + return leftRight; +} + +TEST(LevensteinDistance, calculate_edgecases) { + EXPECT_EQ(calculate("abc", "abc", 2), std::optional{0}); + EXPECT_EQ(calculate("abc", "ab1", 2), std::optional{1}); + EXPECT_EQ(calculate("abc", "1bc", 2), std::optional{1}); + EXPECT_EQ(calculate("abc", "a1c", 2), std::optional{1}); + EXPECT_EQ(calculate("abc", "ab", 2), std::optional{1}); + EXPECT_EQ(calculate("abc", "abcd", 2), std::optional{1}); + EXPECT_EQ(calculate("bc", "abcd", 2), std::optional{2}); + EXPECT_EQ(calculate("ab", "abcd", 2), std::optional{2}); + EXPECT_EQ(calculate("cd", "abcd", 2), std::optional{2}); + EXPECT_EQ(calculate("ad", "abcd", 2), std::optional{2}); + EXPECT_EQ(calculate("abc", "a12", 2), std::optional{2}); + EXPECT_EQ(calculate("abc", "123", 2), std::nullopt); + EXPECT_EQ(calculate("a", "", 2), std::optional{1}); + EXPECT_EQ(calculate("ab", "", 2), std::optional{2}); + EXPECT_EQ(calculate("abc", "", 2), std::nullopt); + EXPECT_EQ(calculate("abc", "123", 2), std::nullopt); +} + +GTEST_MAIN_RUN_ALL_TESTS() + |