diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /lowercasing_test/src/tests/lowercasing/casingvariants_fastlib.cpp |
Publish
Diffstat (limited to 'lowercasing_test/src/tests/lowercasing/casingvariants_fastlib.cpp')
-rw-r--r-- | lowercasing_test/src/tests/lowercasing/casingvariants_fastlib.cpp | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/lowercasing_test/src/tests/lowercasing/casingvariants_fastlib.cpp b/lowercasing_test/src/tests/lowercasing/casingvariants_fastlib.cpp new file mode 100644 index 00000000000..d7478c4755a --- /dev/null +++ b/lowercasing_test/src/tests/lowercasing/casingvariants_fastlib.cpp @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/fastlib/text/normwordfolder.h> +#include <fstream> +#include <iostream> + +ucs4_t +getUCS4Char(const char *src) +{ + return Fast_UnicodeUtil::GetUTF8Char(src); +} + +int +main(int argc, char ** argv) +{ + assert(argc == 3); + (void) argc; + std::ifstream input(argv[1]); + std::ifstream ref(argv[2]); + Fast_NormalizeWordFolder wordFolder; + char inputBuf[128]; + char refBuf[128]; + char lowerBuf[128]; + while (input.good()) { + input.getline(inputBuf, 128); + ref.getline(refBuf, 128); + ucs4_t inputChar = getUCS4Char(inputBuf); + ucs4_t refChar = getUCS4Char(refBuf); + ucs4_t lowerChar = wordFolder.ToFold(inputChar); + Fast_UnicodeUtil::utf8ncopy(lowerBuf, &lowerChar, 128, 1); + if (refChar != lowerChar) { + printf("input(%s,%u,0x%X), lower(%s,%u,0x%X), ref(%s,%u,0x%X) \n", + inputBuf, inputChar, inputChar, + lowerBuf, lowerChar, lowerChar, + refBuf, refChar, refChar); + } + } + input.close(); + return 0; +} + |