summaryrefslogtreecommitdiffstats
path: root/lowercasing_test/src/tests/lowercasing/fetchletters.py
diff options
context:
space:
mode:
Diffstat (limited to 'lowercasing_test/src/tests/lowercasing/fetchletters.py')
-rw-r--r--lowercasing_test/src/tests/lowercasing/fetchletters.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/lowercasing_test/src/tests/lowercasing/fetchletters.py b/lowercasing_test/src/tests/lowercasing/fetchletters.py
new file mode 100644
index 00000000000..350dfa050da
--- /dev/null
+++ b/lowercasing_test/src/tests/lowercasing/fetchletters.py
@@ -0,0 +1,28 @@
+#! /usr/bin/env python
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# This program reads a Unicode database and emits all letters in lower
+# and upper case.
+
+# Refer to http://www.unicode.org/ucd/ to download new files.
+
+import sys
+
+def add_character(unicodespec, characterstore):
+ characterstora
+
+def main(raw, out):
+ # Fetch upper and lower case characters in Unicode
+ characters = filter(lambda x: x[2] == 'Lu' or x[2] == 'Ll', raw)
+ image = [unichr(int(c[0], 16)) for c in characters]
+ output = u"\n".join(image)
+ out.write(output.encode("UTF-8"))
+ out.write(u"\n".encode("UTF-8"))
+
+if __name__ == '__main__':
+ try:
+ raw = [x.split(";") for x in open("./UnicodeData.txt", "r").readlines()]
+ except:
+ sys.stderr.write("Problems reading ./UnicodeData.txt.\n")
+ sys.exit(1)
+ main(raw, sys.stdout)