diff options
author | Jon Bratseth <jonbratseth@yahoo.com> | 2017-06-14 13:55:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-14 13:55:39 +0200 |
commit | 6fc31227f8e7ddfddf3ad69aac8f97f03c8fb1d2 (patch) | |
tree | 33f9ea7e5dc2002473c718a7f03f6a730c00fbc9 | |
parent | b1b583aee28746429b8ed9f6d2714a99c0fd2d4a (diff) | |
parent | 0abf1a0f21802b2f448a5280c20e2646c82afff0 (diff) |
Merge pull request #2765 from yahoo/revert-2763-revert-2761-bratseth/correct-encoding
Revert "Revert "Encode as UTF-8""
6 files changed, 9 insertions, 7 deletions
diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/specialtokens.cfg b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/specialtokens.cfg index 5f54d47353f..306e590a7ca 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/specialtokens.cfg +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/specialtokens.cfg @@ -6,7 +6,7 @@ tokenlist[0].tokens[1].token c++ tokenlist[0].tokens[2].token b.s.d. tokenlist[0].tokens[3].token with space tokenlist[0].tokens[4].token c# -tokenlist[0].tokens[5].token dvd±r +tokenlist[0].tokens[5].token dvd\xB1r tokenlist[1].name other tokenlist[1].tokens[4] tokenlist[1].tokens[0].token [huh] diff --git a/fastlib/src/vespa/fastlib/testsuite/cpptest.el b/fastlib/src/vespa/fastlib/testsuite/cpptest.el index 6a1e1da09b0..d0a4f5c839a 100644 --- a/fastlib/src/vespa/fastlib/testsuite/cpptest.el +++ b/fastlib/src/vespa/fastlib/testsuite/cpptest.el @@ -2,7 +2,7 @@ ;; $Revision: 1.179 $ $Date: 2004-02-17 17:01:15 $ -;; Author: Nils Sandøy <nils.sandoy@fast.no> +;; Author: Nils Sandøy <nils.sandoy@fast.no> ;; Keywords: C++, tools ;; diff --git a/fastlib/src/vespa/fastlib/testsuite/testproject.el b/fastlib/src/vespa/fastlib/testsuite/testproject.el index 6cdfe9929ec..0a7ed652378 100644 --- a/fastlib/src/vespa/fastlib/testsuite/testproject.el +++ b/fastlib/src/vespa/fastlib/testsuite/testproject.el @@ -6,7 +6,7 @@ ;; the class(es) you want to test. ;; $Revision: 1.6 $ $Date: 2003-09-11 09:14:01 $ -;; Author: Nils Sandøy <nils.sandoy@fast.no> +;; Author: Nils Sandøy <nils.sandoy@fast.no> ;; Just a message to show that this file is beeing read. Look for this ;; in the *Messages* buffer. diff --git a/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h b/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h index eb717ad489d..08b3e60a538 100644 --- a/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h +++ b/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h @@ -98,10 +98,12 @@ class WordFoldersTest : public Test } bool AccentRemovalTest() { + // Note last encoded characters encoded as octets to avoid interpreting following letters after xNN as part of the encoding of the character + // See http://en.cppreference.com/w/cpp/language/escape auto freefunction = [] (char * ptr) { free(ptr); }; - auto input = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþpþ!"), + auto input = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\x70\xFE\x21"), freefunction); - auto yelloutput = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿AAAAAEAAAECEEEEIIIIDNOOOOOE×OEUUUUEYTHssaaaaaeaaaeceeeeiiiidnoooooe÷oeuuuueythpth!"), + auto yelloutput = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\277AAAAAEAAAECEEEEIIIIDNOOOOOE\327OEUUUUEYTHssaaaaaeaaaeceeeeiiiidnoooooe\367oeuuuueythpth!"), freefunction); Fast_NormalizeWordFolder wordfolder; int len = wordfolder.FoldedSizeAsUTF8(input.get()); diff --git a/fastlib/src/vespa/fastlib/util/testproject.el b/fastlib/src/vespa/fastlib/util/testproject.el index f10d9ebc108..3221370d475 100644 --- a/fastlib/src/vespa/fastlib/util/testproject.el +++ b/fastlib/src/vespa/fastlib/util/testproject.el @@ -2,7 +2,7 @@ ;; Local configurations for the cpptest Emacs unit-test framework -;; Author: Nils Sandøy <nils.sandoy@fast.no> +;; Author: Nils Sandøy <nils.sandoy@fast.no> (message "Setting local test configuration") diff --git a/juniper/src/testproject.el b/juniper/src/testproject.el index 56cc68fe61d..8b52d77d651 100644 --- a/juniper/src/testproject.el +++ b/juniper/src/testproject.el @@ -6,7 +6,7 @@ ;; the class(es) you want to test. ;; $Revision: 1.2 $ $Date: 2003-02-27 12:32:24 $ -;; Author: Nils Sandøy <nils.sandoy@fast.no> +;; Author: Nils Sandøy <nils.sandoy@fast.no> ;; Just a message to show that this file is beeing read. Look for this ;; in the *Messages* buffer. |