diff options
Diffstat (limited to 'linguistics')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java index fb876f5f066..75360e93729 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java @@ -104,6 +104,7 @@ public class SimpleTokenizer implements Tokenizer { } private TokenScript determineScript(TokenScript tokenScript, TokenScript characterScript) { + // if any character is LATIN, use that as token script; otherwise use script of first character seen. if (characterScript == TokenScript.LATIN) return TokenScript.LATIN; return tokenScript; } |