aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java1
1 files changed, 1 insertions, 0 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java
index fb876f5f066..75360e93729 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java
@@ -104,6 +104,7 @@ public class SimpleTokenizer implements Tokenizer {
}
private TokenScript determineScript(TokenScript tokenScript, TokenScript characterScript) {
+ // if any character is LATIN, use that as token script; otherwise use script of first character seen.
if (characterScript == TokenScript.LATIN) return TokenScript.LATIN;
return tokenScript;
}