blob: 6c48e980aff31491dfe2f5389dc17df71db100be (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query.parser;
import java.io.PrintStream;
/**
* Dump properties of unicode characters in a format compatible
* with fastlib/text/unicode_propertydump
*
* <p>Arguments:</p>
*
* <ol>
* <li>start-char-number</li>
* <li>end-char-number</li>
* <li>debug true|false</li>
* </ol>
*
* @author <a href="mailto:vlarsen@yahoo-inc.com">Vidar Larsen</a>
*/
class UnicodePropertyDump {
public static void main(String[] arg) {
int start = 0;
int end = 0xffff;
boolean debug = false;
if (arg.length > 0) {
start = new Integer(arg[0]).intValue();
}
if (arg.length > 1) {
end = new Integer(arg[1]).intValue();
}
if (arg.length > 2) {
debug = new Boolean(arg[2]).booleanValue();
}
dumpProperties(start, end, debug, System.out);
}
static void dumpProperties(int start, int end, boolean debug, PrintStream out) {
for (int i = start; i < end; i++) {
// printf("%08x ", i);
String charcode = Integer.toHexString(i);
while (charcode.length() < 8) {
charcode = "0" + charcode;
}
out.print(charcode + " ");
/*
* compute property bitmap fastlib-style
* bit 0 = white space
* bit 1 = word char
* bit 2 = ideographic
* bit 3 = decimal digit
* bit 4 = ignorable control
*
* White_Space = 0x01
* Alphabetic = 0x02
* Diacritic = 0x02
* Extender = 0x02
* Custom_word_char = 0x02
* Ideographic = 0x04
* Nd = 0x0A (both digit and alphabetic)
* Default_Ignorable_Code_Point = 0x10
* Custom_Non_Word_Char = ~0x02
*
* Uses both PropList, DerivedCoreProperties, CustomProperties
* and UnicodeData
*/
int map = 0;
char the_char = (char) i;
int char_type = Character.getType(the_char);
if (Character.isWhitespace(the_char)) {
map |= 0x01;
}
if (Character.isLetter(the_char)) {
map |= 0x02;
}
if (Character.getType(the_char) == Character.OTHER_LETTER) {
map |= 0x04;
}
if (Character.isDigit(the_char)) {
map |= 0x0A;
}
if ((char_type == Character.CONTROL || char_type == Character.FORMAT
|| char_type == Character.SURROGATE
|| char_type == Character.UNASSIGNED)
&& !Character.isWhitespace(the_char)
) {
map |= 0x10;
}
// printf("%04x\n", map);
String mapcode = Integer.toHexString(map);
while (mapcode.length() < 4) {
mapcode = "0" + mapcode;
}
out.print(mapcode);
if (debug) {
out.print(" " + char_type);
}
out.println();
}
}
}
|