summaryrefslogtreecommitdiffstats
path: root/linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-12-17 12:49:32 +0100
committerJon Bratseth <bratseth@gmail.com>2021-12-17 12:49:32 +0100
commit468ebc9a5527eaff02207443f7240e6da21fa7ac (patch)
tree2bd1d2844512765918c56caf86e7d534e62b3024 /linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java
parent601b117281b74a578126a0f3effead55bc79c680 (diff)
Test segmentation with subwords
Diffstat (limited to 'linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java')
-rw-r--r--linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java12
1 files changed, 9 insertions, 3 deletions
diff --git a/linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java b/linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java
index 4cbfe541327..13e0cbce10d 100644
--- a/linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java
+++ b/linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java
@@ -15,13 +15,19 @@ public class WordPieceEmbedderTest {
private static final String vocabulary = "src/test/models/wordpiece/bert-base-uncased-vocab.txt";
@Test
- public void testWordPieceEmbedder() {
+ public void testWordPieceSegmentation() {
+ var tester = new EmbedderTester(new WordPieceEmbedder.Builder(vocabulary).build());
+ tester.assertSegmented("what was the impact of the manhattan project",
+ "what", "was", "the", "impact", "of", "the", "manhattan", "project");
+ tester.assertSegmented("overcommunication", "over", "##com", "##mun", "##ication");
+ }
+
+ @Test
+ public void testWordPieceEmbedding() {
var tester = new EmbedderTester(new WordPieceEmbedder.Builder(vocabulary).build());
tester.assertEmbedded("what was the impact of the manhattan project",
"tensor(x[8])",
2054, 2001, 1996, 4254, 1997, 1996, 7128, 2622);
- tester.assertSegmented("what was the impact of the manhattan project",
- "what", "was", "the", "impact", "of", "the", "manhattan", "project");
}
@Test