aboutsummaryrefslogtreecommitdiffstats
path: root/vespalib/src/tests
diff options
context:
space:
mode:
authorAlexey Chernyshev <aleksei@spotify.com>2022-03-22 16:47:14 +0100
committerAlexey Chernyshev <aleksei@spotify.com>2022-03-23 16:21:05 +0100
commit6bcdc1ac1c1c3ce8b30472926098df989b9f7019 (patch)
tree1bec251b1711a093a196c25be896c680282abd24 /vespalib/src/tests
parent32358e1689cded19a3c5d0213b0ef0c5329c1e33 (diff)
Addressing more comments
Diffstat (limited to 'vespalib/src/tests')
-rw-r--r--vespalib/src/tests/fuzzy/CMakeLists.txt16
-rw-r--r--vespalib/src/tests/fuzzy/fuzzy.cpp33
-rw-r--r--vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp42
-rw-r--r--vespalib/src/tests/fuzzy/levenstein_distance_test.cpp39
4 files changed, 94 insertions, 36 deletions
diff --git a/vespalib/src/tests/fuzzy/CMakeLists.txt b/vespalib/src/tests/fuzzy/CMakeLists.txt
index f58602296a5..2a415a9ad62 100644
--- a/vespalib/src/tests/fuzzy/CMakeLists.txt
+++ b/vespalib/src/tests/fuzzy/CMakeLists.txt
@@ -1,8 +1,18 @@
# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(vespalib_fuzzy_test_app TEST
+vespa_add_executable(vespalib_fuzzy_matcher_test_app TEST
SOURCES
- fuzzy.cpp
+ fuzzy_matcher_test.cpp
DEPENDS
vespalib
+ GTest::GTest
)
-vespa_add_test(NAME vespalib_fuzzy_test_app COMMAND vespalib_fuzzy_test_app)
+vespa_add_test(NAME vespalib_fuzzy_matcher_test_app COMMAND vespalib_fuzzy_matcher_test_app)
+
+vespa_add_executable(vespalib_levenstein_distance_test_app TEST
+ SOURCES
+ levenstein_distance_test.cpp
+ DEPENDS
+ vespalib
+ GTest::GTest
+ )
+vespa_add_test(NAME vespalib_levenstein_distance_test_app COMMAND vespalib_levenstein_distance_test_app) \ No newline at end of file
diff --git a/vespalib/src/tests/fuzzy/fuzzy.cpp b/vespalib/src/tests/fuzzy/fuzzy.cpp
deleted file mode 100644
index 9ffb77b3742..00000000000
--- a/vespalib/src/tests/fuzzy/fuzzy.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/test_kit.h>
-#include <vespa/vespalib/fuzzy/fuzzy.h>
-
-using namespace vespalib;
-
-
-TEST("require that levenstein distance works") {
- EXPECT_EQUAL(0u, Fuzzy::levenstein_distance("abc", "abc", 2).value());
- EXPECT_EQUAL(0u, Fuzzy::levenstein_distance("abc", "ABC", 2).value());
- EXPECT_EQUAL(1u, Fuzzy::levenstein_distance("abc", "abd", 2).value());
- EXPECT_EQUAL(1u, Fuzzy::levenstein_distance("ABC", "abd", 2).value());
- EXPECT_EQUAL(2u, Fuzzy::levenstein_distance("ABC", "add", 2).value());
- EXPECT_FALSE(Fuzzy::levenstein_distance("ABC", "ddd", 2).has_value());
-}
-
-TEST("require that extracting of a prefix works") {
- Fuzzy fuzzy(Fuzzy::folded_codepoints("prefix"), 2, 2);
- EXPECT_EQUAL("pr", fuzzy.getPrefix());
-}
-
-TEST("require that empty prefix works") {
- Fuzzy fuzzy(Fuzzy::folded_codepoints("prefix"), 0, 2);
- EXPECT_EQUAL("", fuzzy.getPrefix());
-}
-
-TEST("require that longer prefix size works") {
- Fuzzy fuzzy(Fuzzy::folded_codepoints("prefix"), 100, 2);
- EXPECT_EQUAL("prefix", fuzzy.getPrefix());
-}
-
-
-TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp b/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp
new file mode 100644
index 00000000000..60a4eab3f57
--- /dev/null
+++ b/vespalib/src/tests/fuzzy/fuzzy_matcher_test.cpp
@@ -0,0 +1,42 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/fuzzy/fuzzy_matcher.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib;
+
+FuzzyMatcher from_term(std::string_view term, uint8_t threshold, uint8_t prefix_size) {
+ return {LowerCase::convert_to_ucs4(term), threshold, prefix_size};
+}
+
+TEST(FuzzyMatcherTest, fuzzy_match_empty_prefix) {
+ FuzzyMatcher fuzzy = from_term("abc", 2, 0);
+ EXPECT_TRUE(fuzzy.isMatch("abc"));
+ EXPECT_TRUE(fuzzy.isMatch("ABC"));
+ EXPECT_TRUE(fuzzy.isMatch("ab1"));
+ EXPECT_TRUE(fuzzy.isMatch("a12"));
+ EXPECT_FALSE(fuzzy.isMatch("123"));
+}
+
+TEST(FuzzyMatcherTest, fuzzy_match_with_prefix) {
+ FuzzyMatcher fuzzy = from_term("abcdef", 2, 2);
+ EXPECT_TRUE(fuzzy.isMatch("abcdef"));
+ EXPECT_TRUE(fuzzy.isMatch("ABCDEF"));
+ EXPECT_TRUE(fuzzy.isMatch("abcde1"));
+ EXPECT_TRUE(fuzzy.isMatch("abcd12"));
+ EXPECT_FALSE(fuzzy.isMatch("abc123"));
+ EXPECT_TRUE(fuzzy.isMatch("12cdef")); // prefix match is not enforced
+}
+
+TEST(FuzzyMatcherTest, get_prefix_is_empty) {
+ FuzzyMatcher fuzzy = from_term("whatever", 2, 0);
+ EXPECT_EQ(fuzzy.getPrefix(), "");
+}
+
+TEST(FuzzyMatcherTest, get_prefix_non_empty) {
+ FuzzyMatcher fuzzy = from_term("abcd", 2, 2);
+ EXPECT_EQ(fuzzy.getPrefix(), "ab");
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/tests/fuzzy/levenstein_distance_test.cpp b/vespalib/src/tests/fuzzy/levenstein_distance_test.cpp
new file mode 100644
index 00000000000..efdcc82fce1
--- /dev/null
+++ b/vespalib/src/tests/fuzzy/levenstein_distance_test.cpp
@@ -0,0 +1,39 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/fuzzy/levenstein_distance.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+std::optional<uint32_t> calculate(std::string_view left, std::string_view right, uint32_t threshold) {
+ std::vector<uint32_t> leftCodepoints = vespalib::LowerCase::convert_to_ucs4(left);
+ std::vector<uint32_t> rightCodepoints = vespalib::LowerCase::convert_to_ucs4(right);
+
+ std::optional<uint32_t> leftRight = vespalib::LevensteinDistance::calculate(leftCodepoints,rightCodepoints, threshold);
+ std::optional<uint32_t> rightLeft = vespalib::LevensteinDistance::calculate(rightCodepoints,leftCodepoints, threshold);
+
+ EXPECT_EQ(leftRight, rightLeft); // should be independent whether left or right strings are swapped
+
+ return leftRight;
+}
+
+TEST(LevensteinDistance, calculate_edgecases) {
+ EXPECT_EQ(calculate("abc", "abc", 2), std::optional{0});
+ EXPECT_EQ(calculate("abc", "ab1", 2), std::optional{1});
+ EXPECT_EQ(calculate("abc", "1bc", 2), std::optional{1});
+ EXPECT_EQ(calculate("abc", "a1c", 2), std::optional{1});
+ EXPECT_EQ(calculate("abc", "ab", 2), std::optional{1});
+ EXPECT_EQ(calculate("abc", "abcd", 2), std::optional{1});
+ EXPECT_EQ(calculate("bc", "abcd", 2), std::optional{2});
+ EXPECT_EQ(calculate("ab", "abcd", 2), std::optional{2});
+ EXPECT_EQ(calculate("cd", "abcd", 2), std::optional{2});
+ EXPECT_EQ(calculate("ad", "abcd", 2), std::optional{2});
+ EXPECT_EQ(calculate("abc", "a12", 2), std::optional{2});
+ EXPECT_EQ(calculate("abc", "123", 2), std::nullopt);
+ EXPECT_EQ(calculate("a", "", 2), std::optional{1});
+ EXPECT_EQ(calculate("ab", "", 2), std::optional{2});
+ EXPECT_EQ(calculate("abc", "", 2), std::nullopt);
+ EXPECT_EQ(calculate("abc", "123", 2), std::nullopt);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
+