aboutsummaryrefslogtreecommitdiffstats
path: root/opennlp-linguistics
diff options
context:
space:
mode:
Diffstat (limited to 'opennlp-linguistics')
-rw-r--r--opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java12
1 files changed, 12 insertions, 0 deletions
diff --git a/opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java b/opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java
index 78412f94fd4..33e820fbb9a 100644
--- a/opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java
+++ b/opennlp-linguistics/src/test/java/com/yahoo/language/opennlp/OpenNlpTokenizationTestCase.java
@@ -2,6 +2,7 @@
package com.yahoo.language.opennlp;
import com.yahoo.language.Language;
+import com.yahoo.language.process.StemList;
import com.yahoo.language.process.StemMode;
import com.yahoo.language.process.Token;
import com.yahoo.language.process.TokenType;
@@ -180,6 +181,17 @@ public class OpenNlpTokenizationTestCase {
}
@Test
+ public void testStemEmojis() {
+ var stemmer = new OpenNlpLinguistics().getStemmer();
+ String emoji = "\uD83D\uDD2A"; // 🔪
+ List<StemList> stems = stemmer.stem(emoji, StemMode.ALL, Language.ENGLISH);
+ assertEquals(1, stems.size());
+ var stemList = stems.get(0);
+ assertEquals(1, stemList.size());
+ assertEquals(emoji, stemList.get(0));
+ }
+
+ @Test
public void testTokenTypes() {
testTokenTypes(Language.ENGLISH);
testTokenTypes(Language.SPANISH);