| { | |
| "additional_special_tokens": [ | |
| "hin_Deva", | |
| "ben_Beng", | |
| "mar_Deva", | |
| "tel_Telu", | |
| "tam_Taml", | |
| "guj_Gujr", | |
| "kan_Knda", | |
| "mal_Mlym", | |
| "asm_Beng", | |
| "urd_Arab", | |
| "ory_Orya", | |
| "eng_Latn", | |
| "fra_Latn", | |
| "ita_Latn", | |
| "spa_Latn", | |
| "deu_Latn", | |
| "por_Latn", | |
| "kor_Hang", | |
| "tha_Thai", | |
| "arb_Arab", | |
| "vie_Latn", | |
| "rus_Cyrl", | |
| "jpn_Jpan", | |
| "tur_Latn", | |
| "ukr_Cyrl", | |
| "pes_Arab", | |
| "nld_Latn", | |
| "npi_Deva", | |
| "pol_Latn", | |
| "ind_Latn", | |
| "afr_Latn", | |
| "amh_Ethi", | |
| "hye_Armn", | |
| "bam_Latn", | |
| "bel_Cyrl", | |
| "bul_Cyrl", | |
| "zho_Hans", | |
| "ces_Latn", | |
| "dan_Latn", | |
| "fin_Latn", | |
| "ell_Grek", | |
| "heb_Hebr", | |
| "mri_Latn", | |
| "ron_Latn", | |
| "slv_Latn", | |
| "swe_Latn", | |
| "xho_Latn", | |
| "uig_Arab", | |
| "som_Latn", | |
| "pan_Guru", | |
| "hin_Latn", | |
| "ben_Latn", | |
| "tel_Latn", | |
| "tam_Latn", | |
| "guj_Latn", | |
| "kan_Latn", | |
| "arb_Latn", | |
| "rus_Latn", | |
| "jpn_Latn" | |
| ], | |
| "bos_token": { | |
| "content": "<s>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "cls_token": { | |
| "content": "<s>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "eos_token": { | |
| "content": "</s>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "mask_token": { | |
| "content": "<mask>", | |
| "lstrip": false, | |
| "normalized": true, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "pad_token": { | |
| "content": "<pad>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "sep_token": { | |
| "content": "</s>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "unk_token": { | |
| "content": "<unk>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| } | |
| } | |