summaryrefslogtreecommitdiffstats
path: root/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java')
-rw-r--r--linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java18
1 files changed, 18 insertions, 0 deletions
diff --git a/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java
index 05a2e35f09f..ad8f990ef83 100644
--- a/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java
+++ b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java
@@ -5,6 +5,7 @@ import com.yahoo.language.Language;
import com.yahoo.language.process.AbstractTokenizerTestCase;
import com.yahoo.language.process.StemMode;
import com.yahoo.language.process.Token;
+import com.yahoo.language.process.TokenScript;
import org.junit.Test;
import java.util.Iterator;
@@ -52,4 +53,21 @@ public class SimpleTokenizerTestCase extends AbstractTokenizerTestCase {
tester.assertTokens(emoji1 + emoji2, emoji1, emoji2);
}
+ @Test public void testTokenizeScripts() {
+ TokenizerTester tester = new TokenizerTester().setStemMode(StemMode.NONE);
+
+ tester.assertTokenScripts("anyone is արևելահայերեն by ancient कार्य",
+ TokenScript.LATIN,
+ TokenScript.COMMON,
+ TokenScript.LATIN,
+ TokenScript.COMMON,
+ TokenScript.ARMENIAN,
+ TokenScript.COMMON,
+ TokenScript.LATIN,
+ TokenScript.COMMON,
+ TokenScript.LATIN,
+ TokenScript.COMMON,
+ TokenScript.DEVANAGARI);
+ }
}
+