Fold fastlib into vespalib and gc some unused code.

Also move some code only used by juniper up into juniper test module.
author: Henning Baldersheim <balder@yahoo-inc.com> 2022-05-21 13:31:10 +0000
committer: Henning Baldersheim <balder@yahoo-inc.com> 2022-05-21 14:29:19 +0000
commit: 58a7afd1bd0cd358a8d19bfefd3e0c2c32daecc1 (patch)
tree: 4ced08d5ed7c7020e3cfb516f135f885334ff27d /vespalib/src/tests/fastlib/text/wordfolderstest.cpp
parent: 2c34544abef32f7da1c05a83a3648532afb53186 (diff)
1 files changed, 46 insertions, 0 deletions
diff --git a/vespalib/src/tests/fastlib/text/wordfolderstest.cpp b/vespalib/src/tests/fastlib/text/wordfolderstest.cpp
new file mode 100644
index 00000000000..b2e05250951
--- /dev/null
+++ b/vespalib/src/tests/fastlib/text/wordfolderstest.cpp
@@ -0,0 +1,46 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastlib/text/normwordfolder.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+
+TEST("NormalizeWordFolderConstruction") {
+    Fast_NormalizeWordFolder::Setup(
+            Fast_NormalizeWordFolder::DO_ACCENT_REMOVAL
+            | Fast_NormalizeWordFolder::DO_KATAKANA_TO_HIRAGANA
+            | Fast_NormalizeWordFolder::DO_SMALL_TO_NORMAL_KANA
+            | Fast_NormalizeWordFolder::DO_SHARP_S_SUBSTITUTION
+            | Fast_NormalizeWordFolder::DO_LIGATURE_SUBSTITUTION
+            | Fast_NormalizeWordFolder::DO_MULTICHAR_EXPANSION);
+}
+
+TEST("TokenizeAnnotatedUCS4Buffer") {
+    auto nwf = std::make_unique<Fast_NormalizeWordFolder>();
+    const char *testinput = "This is a "
+                            "\xEF\xBF\xB9" "café" "\xEF\xBF\xBA" "cafe" "\xEF\xBF\xBB"
+                            " superduperextrafeaturecoolandlongplainword fun "
+                            "\xEF\xBF\xB9" "www" "\xEF\xBF\xBA"
+                            "world wide web extra long annotation block" "\xEF\xBF\xBB"
+                            " test\nIt is cool.\n";
+    const char *correct[] = {
+            "this", "is", "a",
+            "\xEF\xBF\xB9" "café" "\xEF\xBF\xBA" "cafe" "\xEF\xBF\xBB",
+            "superduperextrafeaturecooland", "fun",
+            "\xEF\xBF\xB9" "www" "\xEF\xBF\xBA" "world wide web extra lon",
+            "test", "it", "is", "cool" };
+
+    const char *teststart = testinput;
+    const char *testend = testinput + strlen(testinput);
+    ucs4_t destbuf[32];
+    ucs4_t *destbufend = destbuf + 32;
+
+    const char *origstart = testinput;
+    size_t tokenlen = 0;
+
+    int tokencounter = 0;
+    while ((teststart = nwf->UCS4Tokenize(teststart, testend, destbuf, destbufend, origstart, tokenlen)) < testend) {
+        EXPECT_EQUAL(0, Fast_UnicodeUtil::utf8cmp(correct[tokencounter++], destbuf));
+    }
+
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
+\ No newline at end of file
author	Henning Baldersheim <balder@yahoo-inc.com>	2022-05-21 13:31:10 +0000
committer	Henning Baldersheim <balder@yahoo-inc.com>	2022-05-21 14:29:19 +0000
commit	58a7afd1bd0cd358a8d19bfefd3e0c2c32daecc1 (patch)
tree	4ced08d5ed7c7020e3cfb516f135f885334ff27d /vespalib/src/tests/fastlib/text/wordfolderstest.cpp
parent	2c34544abef32f7da1c05a83a3648532afb53186 (diff)