diff options
Diffstat (limited to 'lowercasing_test/src/tests/lowercasing/fetchletters.py')
-rw-r--r-- | lowercasing_test/src/tests/lowercasing/fetchletters.py | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/lowercasing_test/src/tests/lowercasing/fetchletters.py b/lowercasing_test/src/tests/lowercasing/fetchletters.py new file mode 100644 index 00000000000..350dfa050da --- /dev/null +++ b/lowercasing_test/src/tests/lowercasing/fetchletters.py @@ -0,0 +1,28 @@ +#! /usr/bin/env python +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# This program reads a Unicode database and emits all letters in lower +# and upper case. + +# Refer to http://www.unicode.org/ucd/ to download new files. + +import sys + +def add_character(unicodespec, characterstore): + characterstora + +def main(raw, out): + # Fetch upper and lower case characters in Unicode + characters = filter(lambda x: x[2] == 'Lu' or x[2] == 'Ll', raw) + image = [unichr(int(c[0], 16)) for c in characters] + output = u"\n".join(image) + out.write(output.encode("UTF-8")) + out.write(u"\n".encode("UTF-8")) + +if __name__ == '__main__': + try: + raw = [x.split(";") for x in open("./UnicodeData.txt", "r").readlines()] + except: + sys.stderr.write("Problems reading ./UnicodeData.txt.\n") + sys.exit(1) + main(raw, sys.stdout) |