diff options
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java b/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java index 5946a00b8bf..f6177262bf9 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java +++ b/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java @@ -15,7 +15,6 @@ public class CharacterClasses { public boolean isLetter(int c) { if (Character.isLetter(c)) return true; if (Character.isDigit(c) && ! isLatin(c)) return true; // Not considering these digits, so treat them as letters - if (Character.getType(c) == Character.OTHER_SYMBOL) return true; // emojis searchable // Some CJK punctuation defined as word characters if (c == '\u3008' || c == '\u3009' || c == '\u300a' || c == '\u300b' || @@ -30,6 +29,13 @@ public class CharacterClasses { } /** + * Returns true if the character is in the class "other symbol" - emojis etc. + */ + public boolean isSymbol(int c) { + return Character.getType(c) == Character.OTHER_SYMBOL; + } + + /** * Returns true for code points which should be considered digits - same as java.lang.Character.isDigit */ public boolean isDigit(int c) { |