| """ | |
| 默认采用:GLMGPT2Tokenizer | |
| """ | |
| from transformers import AutoTokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("THUDM/glm-10b", trust_remote_code=True) | |
| tokens_id = [3856, 11030] | |
| # tokens_id = [ 2484, 272, 20380] | |
| tokens_id = [50259, 51, 12215, 33061, 2059, 318, 5140, 287, 50260, 13, | |
| 50256, 50256, 50256, 50256, 50256, 50257, 3856, 50257, 2484, 272] | |
| print(tokenizer.decode(tokens_id)) | |
| # '' | |
| # decode_text = tokenizer.DecodeIds(tokens_id) | |
| # print(decode_text) | |
| for idx in tokens_id: | |
| print(idx, tokenizer.convert_ids_to_tokens(idx)) |