From dacf557add1c6a3ffab036cdf2f7dfdf9750b22e Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Sun, 15 May 2022 00:40:43 +0200 Subject: Revert "Collapse vsm into streamingvisitors" --- streamingvisitors/src/tests/textutil/.gitignore | 4 - .../src/tests/textutil/CMakeLists.txt | 8 - streamingvisitors/src/tests/textutil/textutil.cpp | 285 --------------------- 3 files changed, 297 deletions(-) delete mode 100644 streamingvisitors/src/tests/textutil/.gitignore delete mode 100644 streamingvisitors/src/tests/textutil/CMakeLists.txt delete mode 100644 streamingvisitors/src/tests/textutil/textutil.cpp (limited to 'streamingvisitors/src/tests/textutil') diff --git a/streamingvisitors/src/tests/textutil/.gitignore b/streamingvisitors/src/tests/textutil/.gitignore deleted file mode 100644 index 1103f79800a..00000000000 --- a/streamingvisitors/src/tests/textutil/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.depend -Makefile -textutil_test -vsm_textutil_test_app diff --git a/streamingvisitors/src/tests/textutil/CMakeLists.txt b/streamingvisitors/src/tests/textutil/CMakeLists.txt deleted file mode 100644 index 59817d01137..00000000000 --- a/streamingvisitors/src/tests/textutil/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(vsm_textutil_test_app TEST - SOURCES - textutil.cpp - DEPENDS - streamingvisitors -) -vespa_add_test(NAME vsm_textutil_test_app COMMAND vsm_textutil_test_app) diff --git a/streamingvisitors/src/tests/textutil/textutil.cpp b/streamingvisitors/src/tests/textutil/textutil.cpp deleted file mode 100644 index 2a1390eaa01..00000000000 --- a/streamingvisitors/src/tests/textutil/textutil.cpp +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include - -#include -#include -#include -#include -#include - -using search::byte; // unsigned char - -namespace vsm { - -template -class Vector : public std::vector -{ -public: - Vector() : std::vector() {} - Vector & a(T v) { this->push_back(v); return *this; } -}; - -typedef Vector UCS4V; -typedef Vector SizeV; -typedef UTF8StringFieldSearcherBase SFSB; -typedef FUTF8StrChrFieldSearcher FSFS; - -class TextUtilTest : public vespalib::TestApp -{ -private: - ucs4_t getUTF8Char(const char * src); - template - void assertSkipSeparators(const char * input, size_t len, const UCS4V & expdstbuf, const SizeV & expoffsets); - void assertAnsiFold(const std::string & toFold, const std::string & exp); - void assertAnsiFold(char c, char exp); -#ifdef __x86_64__ - void assert_sse2_foldua(const std::string & toFold, size_t charFolded, const std::string & exp); - void assert_sse2_foldua(unsigned char c, unsigned char exp, size_t charFolded = 16); -#endif - - template - void testSkipSeparators(); - void testSkipSeparators(); - void testSeparatorCharacter(); - void testAnsiFold(); - void test_lfoldua(); -#ifdef __x86_64__ - void test_sse2_foldua(); -#endif - -public: - int Main() override; -}; - -ucs4_t -TextUtilTest::getUTF8Char(const char * src) -{ - ucs4_t retval = Fast_UnicodeUtil::GetUTF8Char(src); - ASSERT_TRUE(retval != Fast_UnicodeUtil::_BadUTF8Char); - return retval; -} - -template -void -TextUtilTest::assertSkipSeparators(const char * input, size_t len, const UCS4V & expdstbuf, const SizeV & expoffsets) -{ - const byte * srcbuf = reinterpret_cast(input); - auto dstbuf = std::make_unique(len + 1); - auto offsets = std::make_unique(len + 1); - UTF8StrChrFieldSearcher fs; - BW bw(dstbuf.get(), offsets.get()); - size_t dstlen = fs.skipSeparators(srcbuf, len, bw); - EXPECT_EQUAL(dstlen, expdstbuf.size()); - ASSERT_TRUE(dstlen == expdstbuf.size()); - for (size_t i = 0; i < dstlen; ++i) { - EXPECT_EQUAL(dstbuf[i], expdstbuf[i]); - if (OFF) { - EXPECT_EQUAL(offsets[i], expoffsets[i]); - } - } -} - -void -TextUtilTest::assertAnsiFold(const std::string & toFold, const std::string & exp) -{ - char folded[256]; - EXPECT_TRUE(FSFS::ansiFold(toFold.c_str(), toFold.size(), folded)); - EXPECT_EQUAL(std::string(folded, toFold.size()), exp); -} - -void -TextUtilTest::assertAnsiFold(char c, char exp) -{ - char folded; - EXPECT_TRUE(FSFS::ansiFold(&c, 1, &folded)); - EXPECT_EQUAL((int32_t)folded, (int32_t)exp); -} - -#ifdef __x86_64__ -void -TextUtilTest::assert_sse2_foldua(const std::string & toFold, size_t charFolded, const std::string & exp) -{ - char folded[256]; - size_t alignedStart = 0xF - (size_t(folded + 0xF) % 0x10); - const unsigned char * toFoldOrg = reinterpret_cast(toFold.c_str()); - const unsigned char * retval = - sse2_foldua(toFoldOrg, toFold.size(), reinterpret_cast(folded + alignedStart)); - EXPECT_EQUAL((size_t)(retval - toFoldOrg), charFolded); - EXPECT_EQUAL(std::string(folded + alignedStart, charFolded), exp); -} - -void -TextUtilTest::assert_sse2_foldua(unsigned char c, unsigned char exp, size_t charFolded) -{ - unsigned char toFold[16]; - memset(toFold, c, 16); - unsigned char folded[32]; - size_t alignedStart = 0xF - (size_t(folded + 0xF) % 0x10); - const unsigned char * retval = sse2_foldua(toFold, 16, folded + alignedStart); - EXPECT_EQUAL((size_t)(retval - toFold), charFolded); - for (size_t i = 0; i < charFolded; ++i) { - EXPECT_EQUAL((int32_t)folded[i + alignedStart], (int32_t)exp); - } -} -#endif - -template -void -TextUtilTest::testSkipSeparators() -{ - // ascii characters - assertSkipSeparators("foo", 3, UCS4V().a('f').a('o').a('o'), SizeV().a(0).a(1).a(2)); - assertSkipSeparators("f\x1Fo", 3, UCS4V().a('f').a('o'), SizeV().a(0).a(2)); - assertSkipSeparators("f\no", 3, UCS4V().a('f').a('\n').a('o'), SizeV().a(0).a(1).a(2)); - assertSkipSeparators("f\to", 3, UCS4V().a('f').a('\t').a('o'), SizeV().a(0).a(1).a(2)); - - // utf8 char - assertSkipSeparators("\xC2\x80\x66", 3, UCS4V().a(getUTF8Char("\xC2\x80")).a('f'), - SizeV().a(0).a(2)); - assertSkipSeparators("\xE0\xA0\x80\x66", 4, UCS4V().a(getUTF8Char("\xE0\xA0\x80")).a('f'), - SizeV().a(0).a(3)); - assertSkipSeparators("\xF0\x90\x80\x80\x66", 5, UCS4V().a(getUTF8Char("\xF0\x90\x80\x80")).a('f'), - SizeV().a(0).a(4)); - - // replacement string (sharp s -> ss) - assertSkipSeparators("\xC3\x9F\x66\xC3\x9F", 5, UCS4V().a('s').a('s').a('f').a('s').a('s'), - SizeV().a(0).a(0).a(2).a(3).a(3)); -} - -void -TextUtilTest::testSkipSeparators() -{ - Fast_NormalizeWordFolder::Setup(Fast_NormalizeWordFolder::DO_SHARP_S_SUBSTITUTION); - - testSkipSeparators(); - testSkipSeparators(); -} - -void -TextUtilTest::testSeparatorCharacter() -{ - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x00')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x01')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x02')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x03')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x04')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x05')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x06')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x07')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x08')); - EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x09')); // '\t' - EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x0a')); // '\n' - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0b')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0c')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0d')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0e')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0f')); - - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x10')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x11')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x12')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x13')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x14')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x15')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x16')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x17')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x18')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x19')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1a')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1b')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1c')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1d')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1e')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1f')); - - EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x20')); // space -} - -void -TextUtilTest::testAnsiFold() -{ - FieldSearcher::init(); - assertAnsiFold("", ""); - assertAnsiFold("ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"); - assertAnsiFold("abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz"); - assertAnsiFold("0123456789", "0123456789"); - for (int i = 0; i < 128; ++i) { - if ((i >= 'a' && i <= 'z') || (i >= '0' && i <= '9')) { - assertAnsiFold(i, i); - } else if (i >= 'A' && i <= 'Z') { - assertAnsiFold(i, i + 32); - } else { - assertAnsiFold(i, 0); - } - } - - // non-ascii is ignored - for (int i = 128; i < 256; ++i) { - char toFold = i; - char folded; - EXPECT_TRUE(!FSFS::ansiFold(&toFold, 1, &folded)); - } -} - -void -TextUtilTest::test_lfoldua() -{ - FieldSearcher::init(); - char folded[256]; - size_t alignedStart = 0; - const char * toFold = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - size_t len = strlen(toFold); - EXPECT_TRUE(FSFS::lfoldua(toFold, len, folded, alignedStart)); - EXPECT_EQUAL(std::string(folded + alignedStart, len), "abcdefghijklmnopqrstuvwxyz"); -} - -#ifdef __x86_64__ -void -TextUtilTest::test_sse2_foldua() -{ - assert_sse2_foldua("", 0, ""); - assert_sse2_foldua("ABCD", 0, ""); - assert_sse2_foldua("ABCDEFGHIJKLMNO", 0, ""); - assert_sse2_foldua("ABCDEFGHIJKLMNOP", 16, "abcdefghijklmnop"); - assert_sse2_foldua("ABCDEFGHIJKLMNOPQ", 16, "abcdefghijklmnop"); - assert_sse2_foldua("KLMNOPQRSTUVWXYZ", 16, "klmnopqrstuvwxyz"); - assert_sse2_foldua("abcdefghijklmnop", 16, "abcdefghijklmnop"); - assert_sse2_foldua("klmnopqrstuvwxyz", 16, "klmnopqrstuvwxyz"); - assert_sse2_foldua("0123456789abcdef", 16, "0123456789abcdef"); - - for (int i = 0; i < 128; ++i) { - if ((i >= 'a' && i <= 'z') || (i >= '0' && i <= '9')) { - assert_sse2_foldua(i, i); - } else if (i >= 'A' && i <= 'Z') { - assert_sse2_foldua(i, i + 32); - } else { - assert_sse2_foldua(i, 0); - } - } - - // non-ascii is ignored - for (int i = 128; i < 256; ++i) { - assert_sse2_foldua(i, '?', 0); - } -} -#endif - -int -TextUtilTest::Main() -{ - TEST_INIT("textutil_test"); - - testSkipSeparators(); - testSeparatorCharacter(); - testAnsiFold(); - test_lfoldua(); -#ifdef __x86_64__ - test_sse2_foldua(); -#endif - - TEST_DONE(); -} - -} - -TEST_APPHOOK(vsm::TextUtilTest); -- cgit v1.2.3