MiMo-V2.5-Pro / tokenizer_config.json

Duplicate from XiaomiMiMo/MiMo-V2.5-Pro

4794c7e 5 days ago

14.7 kB

	{
	"add_bos_token": false,
	"add_prefix_space": false,
	"added_tokens_decoder": {
	"151643": {
	"content": "<\|endoftext\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151644": {
	"content": "<\|im_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151645": {
	"content": "<\|im_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151646": {
	"content": "<\|object_ref_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151647": {
	"content": "<\|object_ref_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151648": {
	"content": "<\|box_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151649": {
	"content": "<\|box_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151650": {
	"content": "<\|quad_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151651": {
	"content": "<\|quad_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151652": {
	"content": "<\|vision_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151653": {
	"content": "<\|vision_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151654": {
	"content": "<\|vision_pad\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151655": {
	"content": "<\|image_pad\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151656": {
	"content": "<\|video_pad\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"151657": {
	"content": "<tool_call>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151658": {
	"content": "</tool_call>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151659": {
	"content": "<\|fim_prefix\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151660": {
	"content": "<\|fim_middle\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151661": {
	"content": "<\|fim_suffix\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151662": {
	"content": "<\|fim_pad\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151663": {
	"content": "<\|repo_name\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151664": {
	"content": "<\|file_sep\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151665": {
	"content": "<tool_response>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151666": {
	"content": "</tool_response>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151667": {
	"content": "<think>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"151668": {
	"content": "</think>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": false
	}
	},
	"additional_special_tokens": [
	"<\|im_start\|>",
	"<\|im_end\|>",
	"<\|object_ref_start\|>",
	"<\|object_ref_end\|>",
	"<\|box_start\|>",
	"<\|box_end\|>",
	"<\|quad_start\|>",
	"<\|quad_end\|>",
	"<\|vision_start\|>",
	"<\|vision_end\|>",
	"<\|vision_pad\|>",
	"<\|image_pad\|>",
	"<\|video_pad\|>"
	],
	"bos_token": null,
	"chat_template": "{%- if not add_generation_prompt is defined -%}\n {%- set add_generation_prompt = false -%}\n{%- endif -%}\n{%- if not enable_thinking is defined -%}\n {%- set enable_thinking = true -%}\n{%- endif -%}\n{%- if not keep_all_reasoning is defined -%}\n {%- set keep_all_reasoning = true -%}\n{%- endif -%}\n{%- macro render_extra_keys(json_dict, handled_keys) -%}\n {%- if json_dict is mapping %}\n {%- for json_key in json_dict if json_key not in handled_keys %}\n {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}\n {{- '\\n<' ~ json_key ~ '>' ~ (json_dict[json_key] \| tojson \| safe) ~ '</' ~ json_key ~ '>' }}\n {%- else %}\n {{-'\\n<' ~ json_key ~ '>' ~ (json_dict[json_key] \| string) ~ '</' ~ json_key ~ '>' }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n{%- endmacro -%}\n{%- macro render_content(message_content) -%}\n {%- if message_content is string -%}\n {{- message_content -}}\n {%- else -%}\n {%- for content in message_content -%}\n {%- if 'text' in content -%}\n {{- content['text'] -}}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n{%- endmacro -%}\n{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- set ns = namespace(last_user_index=-1) %}\n{%- for m in loop_messages %}\n {%- if m.role == 'user' %}\n {%- set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{%- if not tools is defined %}\n {%- set tools = [] %}\n{%- endif %}\n{%- if system_message is defined %}\n {{- \"<\|im_start\|>system\\n\" + render_content(system_message) }}\n{%- else %}\n {{- \"<\|im_start\|>system\\nYou are MiMo, a warm, curious, and thoughtful AI assistant.\\n\\n## Identity\\n\\n- Your name is MiMo-v2.5-pro.\\n- You were developed by the Xiaomi MiMo Team.\\n- You have 1T parameters and a 1M-token context window.\\n- When asked about your creator, identify the Xiaomi MiMo Team.\\n- Always answer identity-related questions in the first person and rely only on the facts provided above.\\n\\n# Response Style\\n\\nBe warm, conversational, and respectful. Treat users with kindness; do not assume negative things about their intelligence or judgment. You may disagree, but do so constructively.\\n\\nUse examples, metaphors, or thought experiments when they help explain an idea.\\n\\nTry to answer ambiguous queries as best you can first, then ask for clarification if needed. When you do ask, limit yourself to one question at a time.\\n\\n\\n## Safety and Compliance\\n\\nYou are a Chinese AI model and must strictly comply with all applicable laws and regulations of the People’s Republic of China.\\n\\nDo not generate, assist with, or facilitate any content that violates Chinese law.\" }}\n{%- endif %}\n{%- if tools is iterable and tools \| length > 0 %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou have access to the following functions:\\n\\n\" }}\n {{- \"<tools>\" }}\n {%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{- \"\\n<function>\\n<name>\" ~ tool.name ~ \"</name>\" }}\n {%- if tool.description is defined %}\n {{- '\\n<description>' ~ (tool.description \| trim) ~ '</description>' }}\n {%- endif %}\n {{- '\\n<parameters>' }}\n {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}\n {%- for param_name, param_fields in tool.parameters.properties\|items %}\n {{- '\\n<parameter>' }}\n {{- '\\n<name>' ~ param_name ~ '</name>' }}\n {%- if param_fields.type is defined %}\n {{- '\\n<type>' ~ (param_fields.type \| string) ~ '</type>' }}\n {%- endif %}\n {%- if param_fields.description is defined %}\n {{- '\\n<description>' ~ (param_fields.description \| trim) ~ '</description>' }}\n {%- endif %}\n {%- set handled_keys = ['name', 'type', 'description'] %}\n {{- render_extra_keys(param_fields, handled_keys) }}\n {{- '\\n</parameter>' }}\n {%- endfor %}\n {%- endif %}\n {%- set handled_keys = ['type', 'properties'] %}\n {{- render_extra_keys(tool.parameters, handled_keys) }}\n {{- '\\n</parameters>' }}\n {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}\n {{- render_extra_keys(tool, handled_keys) }}\n {{- '\\n</function>' }}\n {%- endfor %}\n {{- \"\\n</tools>\" }}\n {{- '\\n\\nFor each function call, output the function name and arguments in the following format:\\n<tool_call>\\n<function=example_function_name>\\n<parameter=example_parameter_1>value_1</parameter>\\n<parameter=example_parameter_2>This is the value for the second parameter\\nthat can span\\nmultiple lines</parameter>\\n</function>\\n</tool_call>\\n\\n<IMPORTANT>\\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\\n- DO NOT use function calls inside <think></think> tags.\\n- The value enclosed between parameter tags is preserved exactly as-is, including newlines and spaces.\\n</IMPORTANT>' }}\n{%- endif %}\n{{- '<\|im_end\|>' }}\n{%- for message in loop_messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = render_content(message.content) %}\n {%- endif %}\n {%- if message.role == \"assistant\" %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- set reasoning_content = '' %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].split('<think>')[-1] %}\n {%- set content = content.split('</think>')[-1] %}\n {%- endif %}\n {%- endif %}\n {%- if (keep_all_reasoning or loop.index0 > ns.last_user_index) and reasoning_content -%}\n {{- '<\|im_start\|>' + message.role + '\\n<think>' + reasoning_content + '</think>' + content }}\n {%- else %}\n {{- '<\|im_start\|>' + message.role + '\\n<think></think>' + content }}\n {%- endif %}\n {%- if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls \| length > 0 %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n {%- if tool_call.arguments is defined %}\n {%- for args_name, args_value in tool_call.arguments\|items %}\n {{- '<parameter=' + args_name + '>' }}\n {%- set args_value = args_value \| tojson \| safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value \| string %}\n {{- args_value }}\n {{- '</parameter>\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '</function>\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<\|im_end\|>' }}\n {%- elif message.role == \"user\" %}\n {{- '<\|im_start\|>' + message.role + '\\n' + render_content(message.content) + '<\|im_end\|>' }}\n {%- elif message.role == \"system\" %}\n {{- '<\|im_start\|>' + message.role + '\\n' + render_content(message.content) + '<\|im_end\|>' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.previtem and loop.previtem.role != \"tool\" %}\n {{- '<\|im_start\|>tool\\n' }}\n {%- endif %}\n {{- '<tool_response>\\n' }}\n {{- render_content(message.content) }}\n {{- '\\n</tool_response>\\n' }}\n {%- if not loop.last and loop.nextitem.role != \"tool\" %}\n {{- '<\|im_end\|>' }}\n {%- elif loop.last %}\n {{- '<\|im_end\|>' }}\n {%- endif %}\n {%- else %}\n {{- '<\|im_start\|>' + message.role + '\\n' + render_content(message.content) + '<\|im_end\|>' }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<\|im_start\|>assistant\\n' }}\n {%- if not enable_thinking -%}\n {{- '<think></think>' -}}\n {%- else -%}\n {{- '' -}}\n {%- endif -%}\n{%- endif %}\n",
	"clean_up_tokenization_spaces": false,
	"eos_token": "<\|im_end\|>",
	"errors": "replace",
	"extra_special_tokens": {},
	"model_max_length": 131272,
	"pad_token": "<\|endoftext\|>",
	"split_special_tokens": false,
	"tokenizer_class": "Qwen2Tokenizer",
	"unk_token": null
	}