diff options
Diffstat (limited to 'model-integration/src/test/models/onnx/transformer/tokenizer.json')
-rw-r--r-- | model-integration/src/test/models/onnx/transformer/tokenizer.json | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/model-integration/src/test/models/onnx/transformer/tokenizer.json b/model-integration/src/test/models/onnx/transformer/tokenizer.json new file mode 100644 index 00000000000..28340f289bb --- /dev/null +++ b/model-integration/src/test/models/onnx/transformer/tokenizer.json @@ -0,0 +1,175 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 100, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 101, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 102, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 103, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "BertNormalizer", + "clean_text": true, + "handle_chinese_chars": true, + "strip_accents": null, + "lowercase": true + }, + "pre_tokenizer": { + "type": "BertPreTokenizer" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [101], + "tokens": ["[CLS]"] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [102], + "tokens": ["[SEP]"] + } + } + }, + "decoder": { + "type": "WordPiece", + "prefix": "##", + "cleanup": true + }, + "model": { + "type": "WordPiece", + "unk_token": "[UNK]", + "continuing_subword_prefix": "##", + "max_input_chars_per_word": 100, + "vocab": { + "[PAD]": 0, + "[unused0]": 1, + "[unused1]": 2, + "[UNK]": 100, + "[CLS]": 101, + "[SEP]": 102, + "[MASK]": 103, + "a": 1037, + "b": 1038, + "c": 1039, + "d": 1040, + "e": 1041, + "f": 1042, + "g": 1043, + "h": 1044, + "i": 1045, + "j": 1046, + "k": 1047, + "l": 1048, + "m": 1049, + "n": 1050, + "o": 1051, + "p": 1052, + "q": 1053, + "r": 1054, + "s": 1055, + "t": 1056, + "u": 1057, + "v": 1058, + "w": 1059, + "x": 1060, + "y": 1061, + "z": 1062 + } + } +} |