aboutsummaryrefslogtreecommitdiffstats
path: root/lowercasing_test/src/tests/lowercasing/fetchletters.py
blob: 350dfa050da88ed465a018fb99dfcc50a429d903 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#! /usr/bin/env python
# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

# This program reads a Unicode database and emits all letters in lower
# and upper case.

# Refer to http://www.unicode.org/ucd/ to download new files.

import sys

def add_character(unicodespec, characterstore):
    characterstora

def main(raw, out):
    # Fetch upper and lower case characters in Unicode
    characters = filter(lambda x: x[2] == 'Lu' or x[2] == 'Ll', raw)
    image = [unichr(int(c[0], 16)) for c in characters]
    output = u"\n".join(image)
    out.write(output.encode("UTF-8"))
    out.write(u"\n".encode("UTF-8"))

if __name__ == '__main__':
    try:
        raw = [x.split(";") for x in open("./UnicodeData.txt", "r").readlines()]
    except:
        sys.stderr.write("Problems reading ./UnicodeData.txt.\n")
        sys.exit(1)
    main(raw, sys.stdout)