summaryrefslogtreecommitdiffstats
path: root/linguistics
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics')
-rw-r--r--linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java21
1 files changed, 20 insertions, 1 deletions
diff --git a/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java b/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java
index 70361f55750..7d0c1c5c78e 100644
--- a/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java
+++ b/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java
@@ -17,7 +17,7 @@ import static org.junit.Assert.assertArrayEquals;
public class SentencePieceTest {
@Test
- public void testEnglishTokenization() throws IOException {
+ public void testEnglishTokenization() {
var tester = new SentencePieceTester(new File("src/test/models/sentencepiece/en.wiki.bpe.vs10000.model").toPath());
tester.assertSegmented("h", "▁h");
tester.assertSegmented("he", "▁he");
@@ -42,6 +42,25 @@ public class SentencePieceTest {
}
@Test
+ public void testNoCollapse() {
+ var tester = new SentencePieceTester(new SentencePieceEncoder.Builder()
+ .addDefaultModel(new File("src/test/models/sentencepiece/en.wiki.bpe.vs10000.model").toPath())
+ .setCollapseUnknowns(false));
+ tester.assertSegmented("KHJ hello", "▁", "K", "H", "J", "▁hel", "lo");
+ }
+
+ @Test
+ public void testHighestScore() {
+ var tester = new SentencePieceTester(new SentencePieceEncoder.Builder()
+ .addDefaultModel(new File("src/test/models/sentencepiece/en.wiki.bpe.vs10000.model").toPath())
+ .setScoring(SentencePieceEncoder.Scoring.highestScore));
+ tester.assertSegmented("h", "▁h");
+ tester.assertSegmented("he", "▁he");
+ tester.assertSegmented("hel", "▁h", "el");
+ tester.assertSegmented("hello", "▁h", "el", "lo");
+ }
+
+ @Test
public void testJapaneseTokenization() throws IOException {
SentencePieceEncoder.Builder builder = new SentencePieceEncoder.Builder();
builder.addModel(Language.JAPANESE, new File("src/test/models/sentencepiece/ja.wiki.bpe.vs5000.model").toPath());