summaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java94
1 files changed, 47 insertions, 47 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java
index ace2fd3246e..5c321e4da9b 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java
@@ -10,58 +10,58 @@ public class SimpleTokenType {
public static TokenType valueOf(int codePoint) {
switch (Character.getType(codePoint)) {
- case Character.NON_SPACING_MARK:
- // "combining grave accent"
- // and "DEVANAGARI VOWEL SIGN SHORT E" etc
- // (letter-like)
- case Character.COMBINING_SPACING_MARK:
- // "DEVANAGARI VOWEL SIGN SHORT O"
- // and similar (letter-like)
- case Character.LETTER_NUMBER:
- // "SMALL ROMAN NUMERAL SIX" etc (letter-like)
- case Character.UPPERCASE_LETTER:
- case Character.LOWERCASE_LETTER:
- case Character.TITLECASE_LETTER:
- case Character.MODIFIER_LETTER:
- case Character.OTHER_LETTER:
- return TokenType.ALPHABETIC;
+ case Character.NON_SPACING_MARK:
+ // "combining grave accent"
+ // and "DEVANAGARI VOWEL SIGN SHORT E" etc
+ // (letter-like)
+ case Character.COMBINING_SPACING_MARK:
+ // "DEVANAGARI VOWEL SIGN SHORT O"
+ // and similar (letter-like)
+ case Character.LETTER_NUMBER:
+ // "SMALL ROMAN NUMERAL SIX" etc (letter-like)
+ case Character.UPPERCASE_LETTER:
+ case Character.LOWERCASE_LETTER:
+ case Character.TITLECASE_LETTER:
+ case Character.MODIFIER_LETTER:
+ case Character.OTHER_LETTER:
+ return TokenType.ALPHABETIC;
- case Character.ENCLOSING_MARK:
- // "enclosing circle" etc is symbol-like
- case Character.MATH_SYMBOL:
- case Character.CURRENCY_SYMBOL:
- case Character.MODIFIER_SYMBOL:
- case Character.OTHER_SYMBOL:
- return TokenType.SYMBOL;
+ case Character.ENCLOSING_MARK:
+ // "enclosing circle" etc is symbol-like
+ case Character.MATH_SYMBOL:
+ case Character.CURRENCY_SYMBOL:
+ case Character.MODIFIER_SYMBOL:
+ case Character.OTHER_SYMBOL:
+ return TokenType.SYMBOL;
- case Character.OTHER_NUMBER:
- // "SUPERSCRIPT TWO",
- // "DINGBAT CIRCLED SANS-SERIF DIGIT THREE"
- // and more numbers that should mostly normalize
- // to digits
- case Character.DECIMAL_DIGIT_NUMBER:
- return TokenType.NUMERIC;
+ case Character.OTHER_NUMBER:
+ // "SUPERSCRIPT TWO",
+ // "DINGBAT CIRCLED SANS-SERIF DIGIT THREE"
+ // and more numbers that should mostly normalize
+ // to digits
+ case Character.DECIMAL_DIGIT_NUMBER:
+ return TokenType.NUMERIC;
- case Character.SPACE_SEPARATOR:
- case Character.LINE_SEPARATOR:
- case Character.PARAGRAPH_SEPARATOR:
- return TokenType.SPACE;
+ case Character.SPACE_SEPARATOR:
+ case Character.LINE_SEPARATOR:
+ case Character.PARAGRAPH_SEPARATOR:
+ return TokenType.SPACE;
- case Character.DASH_PUNCTUATION:
- case Character.START_PUNCTUATION:
- case Character.END_PUNCTUATION:
- case Character.CONNECTOR_PUNCTUATION:
- case Character.OTHER_PUNCTUATION:
- case Character.INITIAL_QUOTE_PUNCTUATION:
- case Character.FINAL_QUOTE_PUNCTUATION:
- return TokenType.PUNCTUATION;
+ case Character.DASH_PUNCTUATION:
+ case Character.START_PUNCTUATION:
+ case Character.END_PUNCTUATION:
+ case Character.CONNECTOR_PUNCTUATION:
+ case Character.OTHER_PUNCTUATION:
+ case Character.INITIAL_QUOTE_PUNCTUATION:
+ case Character.FINAL_QUOTE_PUNCTUATION:
+ return TokenType.PUNCTUATION;
- case Character.CONTROL:
- case Character.FORMAT:
- case Character.SURROGATE:
- case Character.PRIVATE_USE:
- case Character.UNASSIGNED:
- return TokenType.UNKNOWN;
+ case Character.CONTROL:
+ case Character.FORMAT:
+ case Character.SURROGATE:
+ case Character.PRIVATE_USE:
+ case Character.UNASSIGNED:
+ return TokenType.UNKNOWN;
}
throw new UnsupportedOperationException(String.valueOf(Character.getType(codePoint)));
}