| { | |
| "tokenizer_name": "AutoTokenizer", | |
| "pretrained_model_name": "AutoModel", | |
| "vocab": { | |
| "vocab_size": 30522, | |
| "model_max_length": 512, | |
| "padding_side": "right", | |
| "truncation_side": "right", | |
| "special_tokens": { | |
| "pad_token": "[PAD]", | |
| "unk_token": "[UNK]", | |
| "cls_token": "[CLS]", | |
| "sep_token": "[SEP]", | |
| "mask_token": "[MASK]" | |
| }, | |
| "tokenizer_type": "WordPiece", | |
| "lowercase": true, | |
| "pad_token_id": 0, | |
| "unk_token_id": 100, | |
| "cls_token_id": 101, | |
| "sep_token_id": 102, | |
| "mask_token_id": 103 | |
| }, | |
| "normalization": { | |
| "lowercase": true, | |
| "strip_accents": true | |
| }, | |
| "preprocessing": { | |
| "do_lower_case": true, | |
| "handle_chinese_chars": true | |
| } | |
| } |