aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java8
1 files changed, 7 insertions, 1 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java b/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java
index 5946a00b8bf..f6177262bf9 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java
@@ -15,7 +15,6 @@ public class CharacterClasses {
public boolean isLetter(int c) {
if (Character.isLetter(c)) return true;
if (Character.isDigit(c) && ! isLatin(c)) return true; // Not considering these digits, so treat them as letters
- if (Character.getType(c) == Character.OTHER_SYMBOL) return true; // emojis searchable
// Some CJK punctuation defined as word characters
if (c == '\u3008' || c == '\u3009' || c == '\u300a' || c == '\u300b' ||
@@ -30,6 +29,13 @@ public class CharacterClasses {
}
/**
+ * Returns true if the character is in the class "other symbol" - emojis etc.
+ */
+ public boolean isSymbol(int c) {
+ return Character.getType(c) == Character.OTHER_SYMBOL;
+ }
+
+ /**
* Returns true for code points which should be considered digits - same as java.lang.Character.isDigit
*/
public boolean isDigit(int c) {