diff options
Diffstat (limited to 'linguistics-components')
-rw-r--r-- | linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java b/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java index 8b34e1487be..67f94800c39 100644 --- a/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java +++ b/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java @@ -16,6 +16,7 @@ import java.nio.file.StandardOpenOption; import java.util.zip.GZIPInputStream; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; /** @@ -88,8 +89,12 @@ class HuggingFaceTokenizerTest { String input = "what was the impact of the manhattan project"; try (var tokenizerWithoutSpecialTokens = builder.addSpecialTokens(false).build(); var tokenizerWithSpecialTokens = builder.addSpecialTokens(true).build()) { - assertMaxLengthRespected(maxLength, tokenizerWithoutSpecialTokens.encode(input)); - assertMaxLengthRespected(maxLength, tokenizerWithSpecialTokens.encode(input)); + var encodingWithoutSpecialTokens = tokenizerWithoutSpecialTokens.encode(input); + assertMaxLengthRespected(maxLength, encodingWithoutSpecialTokens); + assertNotEquals(101, encodingWithoutSpecialTokens.ids().get(0)); + var encodingWithSpecialTokens = tokenizerWithSpecialTokens.encode(input); + assertMaxLengthRespected(maxLength, encodingWithSpecialTokens); + assertEquals(101, encodingWithSpecialTokens.ids().get(0)); } } |