summaryrefslogtreecommitdiffstats
path: root/model-integration/src/test/models/onnx/transformer/tokenizer.json
diff options
context:
space:
mode:
Diffstat (limited to 'model-integration/src/test/models/onnx/transformer/tokenizer.json')
-rw-r--r--model-integration/src/test/models/onnx/transformer/tokenizer.json175
1 files changed, 175 insertions, 0 deletions
diff --git a/model-integration/src/test/models/onnx/transformer/tokenizer.json b/model-integration/src/test/models/onnx/transformer/tokenizer.json
new file mode 100644
index 00000000000..28340f289bb
--- /dev/null
+++ b/model-integration/src/test/models/onnx/transformer/tokenizer.json
@@ -0,0 +1,175 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 0,
+ "content": "[PAD]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 100,
+ "content": "[UNK]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 101,
+ "content": "[CLS]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 102,
+ "content": "[SEP]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 103,
+ "content": "[MASK]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": {
+ "type": "BertNormalizer",
+ "clean_text": true,
+ "handle_chinese_chars": true,
+ "strip_accents": null,
+ "lowercase": true
+ },
+ "pre_tokenizer": {
+ "type": "BertPreTokenizer"
+ },
+ "post_processor": {
+ "type": "TemplateProcessing",
+ "single": [
+ {
+ "SpecialToken": {
+ "id": "[CLS]",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ },
+ {
+ "SpecialToken": {
+ "id": "[SEP]",
+ "type_id": 0
+ }
+ }
+ ],
+ "pair": [
+ {
+ "SpecialToken": {
+ "id": "[CLS]",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ },
+ {
+ "SpecialToken": {
+ "id": "[SEP]",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "B",
+ "type_id": 1
+ }
+ },
+ {
+ "SpecialToken": {
+ "id": "[SEP]",
+ "type_id": 1
+ }
+ }
+ ],
+ "special_tokens": {
+ "[CLS]": {
+ "id": "[CLS]",
+ "ids": [101],
+ "tokens": ["[CLS]"]
+ },
+ "[SEP]": {
+ "id": "[SEP]",
+ "ids": [102],
+ "tokens": ["[SEP]"]
+ }
+ }
+ },
+ "decoder": {
+ "type": "WordPiece",
+ "prefix": "##",
+ "cleanup": true
+ },
+ "model": {
+ "type": "WordPiece",
+ "unk_token": "[UNK]",
+ "continuing_subword_prefix": "##",
+ "max_input_chars_per_word": 100,
+ "vocab": {
+ "[PAD]": 0,
+ "[unused0]": 1,
+ "[unused1]": 2,
+ "[UNK]": 100,
+ "[CLS]": 101,
+ "[SEP]": 102,
+ "[MASK]": 103,
+ "a": 1037,
+ "b": 1038,
+ "c": 1039,
+ "d": 1040,
+ "e": 1041,
+ "f": 1042,
+ "g": 1043,
+ "h": 1044,
+ "i": 1045,
+ "j": 1046,
+ "k": 1047,
+ "l": 1048,
+ "m": 1049,
+ "n": 1050,
+ "o": 1051,
+ "p": 1052,
+ "q": 1053,
+ "r": 1054,
+ "s": 1055,
+ "t": 1056,
+ "u": 1057,
+ "v": 1058,
+ "w": 1059,
+ "x": 1060,
+ "y": 1061,
+ "z": 1062
+ }
+ }
+}