diff options
Diffstat (limited to 'linguistics/src')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java | 2 | ||||
-rw-r--r-- | linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java | 19 |
2 files changed, 20 insertions, 1 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java b/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java index 8a255dd5370..c5ce3278d72 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java +++ b/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java @@ -153,7 +153,7 @@ public class GramSplitter { */ public static final class Gram { - private int start, codePointCount; + private final int start, codePointCount; public Gram(int start, int codePointCount) { this.start = start; diff --git a/linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java b/linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java index 8fa23626193..a37ef6bfde5 100644 --- a/linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java +++ b/linguistics/src/test/java/com/yahoo/language/process/GramSplitterTestCase.java @@ -167,6 +167,25 @@ public class GramSplitterTestCase { assertFalse(grams.hasNext()); } + @Test + public void testChineseComma() { + String text = "我喜欢红色、蓝色和紫色"; + Iterator<GramSplitter.Gram> grams = gramSplitter.split(text, 2); + for (; grams.hasNext(); ) { + System.out.println(grams.next().extractFrom(text)); + } + } + + @Test + public void testEnglishComma() { + String text = "我喜欢红色,蓝色和紫色"; + Iterator<GramSplitter.Gram> grams = gramSplitter.split(text, 2); + for (; grams.hasNext(); ) { + System.out.println(grams.next().extractFrom(text)); + } + } + + private void assertGramSplits(String input, int gramSize, String ... expected) { assertEquals(Arrays.asList(expected), gramSplitter.split(input, gramSize).toExtractedList()); } |