aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics
diff options
context:
space:
mode:
authorMariusArhaug <mariusarhaug@hotmail.com>2024-04-04 13:49:27 +0200
committerMariusArhaug <mariusarhaug@hotmail.com>2024-04-04 13:49:27 +0200
commita1e50d27853b14c6a52028e60e32b8e14e28d1ad (patch)
treee0eb84d45a49690a3f6355af446ebb789fb1cacb /linguistics
parent80744246aff5cb9294496842ea27bf703e430c99 (diff)
add comment for intention in determineScript function
Diffstat (limited to 'linguistics')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java1
1 files changed, 1 insertions, 0 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java
index fb876f5f066..75360e93729 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java
@@ -104,6 +104,7 @@ public class SimpleTokenizer implements Tokenizer {
}
private TokenScript determineScript(TokenScript tokenScript, TokenScript characterScript) {
+ // if any character is LATIN, use that as token script; otherwise use script of first character seen.
if (characterScript == TokenScript.LATIN) return TokenScript.LATIN;
return tokenScript;
}