summaryrefslogtreecommitdiffstats
path: root/linguistics-components
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-06-08 14:41:30 +0200
committerBjørn Christian Seime <bjorncs@yahooinc.com>2023-06-08 14:41:30 +0200
commit53e8203706e41e07875f37d0343b9e97a33b12f6 (patch)
treecb97b5fc65e18f75ad1369f9e8ddeb103ee9781e /linguistics-components
parent794b62b71cc64e1ad2cb3a40865ff65653d4240f (diff)
Verify presence of special token
Diffstat (limited to 'linguistics-components')
-rw-r--r--linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java9
1 files changed, 7 insertions, 2 deletions
diff --git a/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java b/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java
index 8b34e1487be..67f94800c39 100644
--- a/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java
+++ b/linguistics-components/src/test/java/com/yahoo/language/huggingface/HuggingFaceTokenizerTest.java
@@ -16,6 +16,7 @@ import java.nio.file.StandardOpenOption;
import java.util.zip.GZIPInputStream;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
/**
@@ -88,8 +89,12 @@ class HuggingFaceTokenizerTest {
String input = "what was the impact of the manhattan project";
try (var tokenizerWithoutSpecialTokens = builder.addSpecialTokens(false).build();
var tokenizerWithSpecialTokens = builder.addSpecialTokens(true).build()) {
- assertMaxLengthRespected(maxLength, tokenizerWithoutSpecialTokens.encode(input));
- assertMaxLengthRespected(maxLength, tokenizerWithSpecialTokens.encode(input));
+ var encodingWithoutSpecialTokens = tokenizerWithoutSpecialTokens.encode(input);
+ assertMaxLengthRespected(maxLength, encodingWithoutSpecialTokens);
+ assertNotEquals(101, encodingWithoutSpecialTokens.ids().get(0));
+ var encodingWithSpecialTokens = tokenizerWithSpecialTokens.encode(input);
+ assertMaxLengthRespected(maxLength, encodingWithSpecialTokens);
+ assertEquals(101, encodingWithSpecialTokens.ids().get(0));
}
}