aboutsummaryrefslogtreecommitdiffstats
path: root/fastlib
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2017-06-14 13:51:04 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2017-06-14 13:51:04 +0200
commit0abf1a0f21802b2f448a5280c20e2646c82afff0 (patch)
treeabd7053f28a4d9026de57685a66ad7157bc8d681 /fastlib
parenta13b8bdff99358b18967040a8dfc0d35a95d0b6e (diff)
Encode boundary characters as octets
Diffstat (limited to 'fastlib')
-rw-r--r--fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h4
1 files changed, 3 insertions, 1 deletions
diff --git a/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h b/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h
index 4a176120bc5..08b3e60a538 100644
--- a/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h
+++ b/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h
@@ -98,10 +98,12 @@ class WordFoldersTest : public Test
}
bool AccentRemovalTest() {
+ // Note last encoded characters encoded as octets to avoid interpreting following letters after xNN as part of the encoding of the character
+ // See http://en.cppreference.com/w/cpp/language/escape
auto freefunction = [] (char * ptr) { free(ptr); };
auto input = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\x70\xFE\x21"),
freefunction);
- auto yelloutput = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBFAAAAAEAAAECEEEEIIIIDNOOOOOE\xD7OEUUUUEYTHssaaaaaeaaaeceeeeiiiidnoooooe\xF7oeuuuueythpth!"),
+ auto yelloutput = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\277AAAAAEAAAECEEEEIIIIDNOOOOOE\327OEUUUUEYTHssaaaaaeaaaeceeeeiiiidnoooooe\367oeuuuueythpth!"),
freefunction);
Fast_NormalizeWordFolder wordfolder;
int len = wordfolder.FoldedSizeAsUTF8(input.get());