summaryrefslogtreecommitdiffstats
path: root/linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java')
-rw-r--r--linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java11
1 files changed, 11 insertions, 0 deletions
diff --git a/linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java b/linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java
index 6cefcfbf67a..a219efce3cd 100644
--- a/linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java
+++ b/linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java
@@ -49,6 +49,17 @@ public class GramSplitterTestCase {
}
@Test
+ public void testEmojis() {
+ String emoji1 = "\uD83D\uDD2A"; // 🔪
+ String emoji2 = "\uD83D\uDE00"; // 😀
+ assertGramSplit(emoji1, 2, "[" + emoji1+ "]");
+ assertGramSplit(emoji1 + emoji2, 2, "[" + emoji1 + ", " + emoji2 + "]");
+ assertGramSplit(emoji1 + "." + emoji2, 2, "[" + emoji1 + ", " + emoji2 + "]");
+ assertGramSplit("." + emoji1 + "." + emoji2 + ".", 2, "[" + emoji1 + ", " + emoji2 + "]");
+ assertGramSplit("foo" + emoji1 + "bar" + emoji2 + "baz", 2, "[fo, oo, " + emoji1 + ", ba, ar, " + emoji2 + ", ba, az]");
+ }
+
+ @Test
public void testSpaceCornerCases() {
// space corner cases
assertGramSplit("e en e", 1, "[e, e, n, e]");