diff options
Diffstat (limited to 'linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java')
-rw-r--r-- | linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java b/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java index bf2e0f82829..f727252cccb 100644 --- a/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java +++ b/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java @@ -99,7 +99,7 @@ class HuggingFaceTokenizerTest { } @Test - void disables_padding_by_default() throws IOException { + void pads_to_max_length() throws IOException { var builder = new HuggingFaceTokenizer.Builder() .setTruncation(true) .addDefaultModel(decompressModelFile(tmp, "bert-base-uncased")) @@ -114,6 +114,13 @@ class HuggingFaceTokenizerTest { } } + @Test + void provides_model_info() throws IOException { + var expected = new ModelInfo(ModelInfo.TruncationStrategy.LONGEST_FIRST, ModelInfo.PaddingStrategy.LONGEST, 128, 0, 0); + var actual = HuggingFaceTokenizer.getModelInfo(decompressModelFile(tmp, "paraphrase-multilingual-mpnet-base-v2")); + assertEquals(expected, actual); + } + private static void assertMaxLengthRespected(int maxLength, Encoding encoding) { assertEquals(maxLength, encoding.ids().size()); assertEquals(maxLength, encoding.tokens().size()); |