diff options
author | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-06-08 14:41:30 +0200 |
---|---|---|
committer | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-06-08 14:41:30 +0200 |
commit | 53e8203706e41e07875f37d0343b9e97a33b12f6 (patch) | |
tree | cb97b5fc65e18f75ad1369f9e8ddeb103ee9781e /linguistics-components | |
parent | 794b62b71cc64e1ad2cb3a40865ff65653d4240f (diff) |
Verify presence of special token
Diffstat (limited to 'linguistics-components')
-rw-r--r-- | linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java b/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java index 8b34e1487be..67f94800c39 100644 --- a/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java +++ b/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java @@ -16,6 +16,7 @@ import java.nio.file.StandardOpenOption; import java.util.zip.GZIPInputStream; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; /** @@ -88,8 +89,12 @@ class HuggingFaceTokenizerTest { String input = "what was the impact of the manhattan project"; try (var tokenizerWithoutSpecialTokens = builder.addSpecialTokens(false).build(); var tokenizerWithSpecialTokens = builder.addSpecialTokens(true).build()) { - assertMaxLengthRespected(maxLength, tokenizerWithoutSpecialTokens.encode(input)); - assertMaxLengthRespected(maxLength, tokenizerWithSpecialTokens.encode(input)); + var encodingWithoutSpecialTokens = tokenizerWithoutSpecialTokens.encode(input); + assertMaxLengthRespected(maxLength, encodingWithoutSpecialTokens); + assertNotEquals(101, encodingWithoutSpecialTokens.ids().get(0)); + var encodingWithSpecialTokens = tokenizerWithSpecialTokens.encode(input); + assertMaxLengthRespected(maxLength, encodingWithSpecialTokens); + assertEquals(101, encodingWithSpecialTokens.ids().get(0)); } } |