summaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenScript.java
blob: a695d2e7f8dd341a02005228fef4c72a289f1c1b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
package com.yahoo.language.simple;

import com.yahoo.language.process.TokenScript;

/**
 * @author mariusarhaug
 */

class SimpleTokenScript {

    static TokenScript valueOf(int codePoint) {
        return switch(Character.UnicodeScript.of(codePoint))
        {
            case COMMON -> TokenScript.COMMON;
            case LATIN -> TokenScript.LATIN;
            case GREEK -> TokenScript.GREEK;
            case CYRILLIC -> TokenScript.CYRILLIC;
            case ARMENIAN -> TokenScript.ARMENIAN;
            case HEBREW -> TokenScript.HEBREW;
            case ARABIC -> TokenScript.ARABIC;
            case SYRIAC -> TokenScript.SYRIAC;
            case THAANA -> TokenScript.THAANA;
            case DEVANAGARI -> TokenScript.DEVANAGARI;
            case GURMUKHI -> TokenScript.GURMUKHI;
            case GUJARATI -> TokenScript.GUJARATI;
            case ORIYA -> TokenScript.ORIYA;
            case TAMIL -> TokenScript.TAMIL;
            case TELUGU -> TokenScript.TELUGU;
            case KANNADA -> TokenScript.KANNADA;
            case MALAYALAM -> TokenScript.MALAYALAM;
            case SINHALA -> TokenScript.SINHALA;
            case THAI -> TokenScript.THAI;
            case LAO -> TokenScript.LAO;
            case TIBETAN -> TokenScript.TIBETAN;
            case MYANMAR -> TokenScript.MYANMAR;
            case GEORGIAN -> TokenScript.GEORGIAN;
            case HANGUL -> TokenScript.HANGUL;
            case ETHIOPIC -> TokenScript.ETHIOPIC;
            case CHEROKEE -> TokenScript.CHEROKEE;
            case OGHAM -> TokenScript.OGHAM;
            case RUNIC -> TokenScript.RUNIC;
            case KHMER -> TokenScript.KHMER;
            case MONGOLIAN -> TokenScript.MONGOLIAN;
            case HIRAGANA -> TokenScript.HIRAGANA;
            case KATAKANA -> TokenScript.KATAKANA;
            case HAN -> TokenScript.HAN;
            case YI -> TokenScript.YI;
            case GOTHIC -> TokenScript.GOTHIC;
            case DESERET -> TokenScript.DESERET;
            case INHERITED -> TokenScript.INHERITED;
            case TAGALOG -> TokenScript.TAGALOG;
            case HANUNOO -> TokenScript.HANUNOO;
            case BUHID -> TokenScript.BUHID;
            case TAGBANWA -> TokenScript.TAGBANWA;
            case LIMBU -> TokenScript.LIMBU;
            case UGARITIC -> TokenScript.UGARITIC;
            case SHAVIAN -> TokenScript.SHAVIAN;
            case OSMANYA -> TokenScript.OSMANYA;
            case CYPRIOT -> TokenScript.CYPRIOT;
            case BRAILLE -> TokenScript.BRAILLE;
            case BUGINESE -> TokenScript.BUGINESE;
            case COPTIC -> TokenScript.COPTIC;
            case GLAGOLITIC -> TokenScript.GLAGOLITIC;
            case KHAROSHTHI -> TokenScript.KHAROSHTHI;
            case TIFINAGH -> TokenScript.TIFINAGH;

            default -> TokenScript.UNKNOWN;
        };
    }
}