aboutsummaryrefslogtreecommitdiffstats
path: root/lowercasing_test/src/tests/lowercasing/fetchletters.py
blob: b90efadf2d482f0b37aa5332c7090060455356ac (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env python3
# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

# This program reads a Unicode database and emits all letters in lower
# and upper case.

# Refer to http://www.unicode.org/ucd/ to download new files.

import sys

def add_character(unicodespec, characterstore):
    characterstora

def main(raw, out):
    # Fetch upper and lower case characters in Unicode
    characters = [x for x in raw if x[2] == 'Lu' or x[2] == 'Ll']
    image = [chr(int(c[0], 16)) for c in characters]
    output = "\n".join(image)
    out.write(output.encode("UTF-8"))
    out.write(u"\n".encode("UTF-8"))

if __name__ == '__main__':
    try:
        raw = [x.split(";") for x in open("./UnicodeData.txt", "r").readlines()]
    except:
        sys.stderr.write("Problems reading ./UnicodeData.txt.\n")
        sys.exit(1)
    main(raw, sys.stdout)