diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-06-14 13:51:04 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-06-14 13:51:04 +0200 |
commit | 0abf1a0f21802b2f448a5280c20e2646c82afff0 (patch) | |
tree | abd7053f28a4d9026de57685a66ad7157bc8d681 /fastlib/src | |
parent | a13b8bdff99358b18967040a8dfc0d35a95d0b6e (diff) |
Encode boundary characters as octets
Diffstat (limited to 'fastlib/src')
-rw-r--r-- | fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h b/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h index 4a176120bc5..08b3e60a538 100644 --- a/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h +++ b/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h @@ -98,10 +98,12 @@ class WordFoldersTest : public Test } bool AccentRemovalTest() { + // Note last encoded characters encoded as octets to avoid interpreting following letters after xNN as part of the encoding of the character + // See http://en.cppreference.com/w/cpp/language/escape auto freefunction = [] (char * ptr) { free(ptr); }; auto input = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\x70\xFE\x21"), freefunction); - auto yelloutput = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBFAAAAAEAAAECEEEEIIIIDNOOOOOE\xD7OEUUUUEYTHssaaaaaeaaaeceeeeiiiidnoooooe\xF7oeuuuueythpth!"), + auto yelloutput = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\277AAAAAEAAAECEEEEIIIIDNOOOOOE\327OEUUUUEYTHssaaaaaeaaaeceeeeiiiidnoooooe\367oeuuuueythpth!"), freefunction); Fast_NormalizeWordFolder wordfolder; int len = wordfolder.FoldedSizeAsUTF8(input.get()); |