diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-12-17 12:49:32 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-12-17 12:49:32 +0100 |
commit | 468ebc9a5527eaff02207443f7240e6da21fa7ac (patch) | |
tree | 2bd1d2844512765918c56caf86e7d534e62b3024 /linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java | |
parent | 601b117281b74a578126a0f3effead55bc79c680 (diff) |
Test segmentation with subwords
Diffstat (limited to 'linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java')
-rw-r--r-- | linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java b/linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java index 4cbfe541327..13e0cbce10d 100644 --- a/linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java +++ b/linguistics-components/src/test/java/com/yahoo/language/wordpiece/WordPieceEmbedderTest.java @@ -15,13 +15,19 @@ public class WordPieceEmbedderTest { private static final String vocabulary = "src/test/models/wordpiece/bert-base-uncased-vocab.txt"; @Test - public void testWordPieceEmbedder() { + public void testWordPieceSegmentation() { + var tester = new EmbedderTester(new WordPieceEmbedder.Builder(vocabulary).build()); + tester.assertSegmented("what was the impact of the manhattan project", + "what", "was", "the", "impact", "of", "the", "manhattan", "project"); + tester.assertSegmented("overcommunication", "over", "##com", "##mun", "##ication"); + } + + @Test + public void testWordPieceEmbedding() { var tester = new EmbedderTester(new WordPieceEmbedder.Builder(vocabulary).build()); tester.assertEmbedded("what was the impact of the manhattan project", "tensor(x[8])", 2054, 2001, 1996, 4254, 1997, 1996, 7128, 2622); - tester.assertSegmented("what was the impact of the manhattan project", - "what", "was", "the", "impact", "of", "the", "manhattan", "project"); } @Test |