diff options
Diffstat (limited to 'opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java')
-rw-r--r-- | opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java b/opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java index 78412f94fd4..33e820fbb9a 100644 --- a/opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java +++ b/opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java @@ -2,6 +2,7 @@ package com.yahoo.language.opennlp; import com.yahoo.language.Language; +import com.yahoo.language.process.StemList; import com.yahoo.language.process.StemMode; import com.yahoo.language.process.Token; import com.yahoo.language.process.TokenType; @@ -180,6 +181,17 @@ public class OpenNlpTokenizationTestCase { } @Test + public void testStemEmojis() { + var stemmer = new OpenNlpLinguistics().getStemmer(); + String emoji = "\uD83D\uDD2A"; // 🔪 + List<StemList> stems = stemmer.stem(emoji, StemMode.ALL, Language.ENGLISH); + assertEquals(1, stems.size()); + var stemList = stems.get(0); + assertEquals(1, stemList.size()); + assertEquals(emoji, stemList.get(0)); + } + + @Test public void testTokenTypes() { testTokenTypes(Language.ENGLISH); testTokenTypes(Language.SPANISH); |