RepublicOfKorokke commited on
Commit
387641c
·
verified ·
1 Parent(s): 0f9cafa

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - mlx
5
+ pipeline_tag: text-generation
6
+ library_name: mlx
7
+ base_model: lovedheart/Qwen3-Coder-Next-REAP-48B-A3B-GGUF
8
+ ---
9
+
10
+ This model was converted to MLX format from [lovedheart/Qwen3-Coder-Next-REAP-48B-A3B-GGUF](https://huggingface.co/lovedheart/Qwen3-Coder-Next-REAP-48B-A3B-GGUF) using mlx-lm version **0.30.5**.
11
+
12
+ Original safetensors model from: https://www.modelscope.cn/models/lovedheart/Qwen3-Coder-Next-REAP-48B-A3B/summary
13
+
14
+ **Conversion Command**
15
+
16
+ `$ uv run mlx_lm.convert --model ./model/ --mlx-path Qwen3-Coder-Next-REAP-48B-A3B-mlx-nvfp4 -q --q-mode nvfp4 --q-group-size 16`
chat_template.jinja ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% macro render_extra_keys(json_dict, handled_keys) %}
2
+ {%- if json_dict is mapping %}
3
+ {%- for json_key in json_dict if json_key not in handled_keys %}
4
+ {%- if json_dict[json_key] is string %}
5
+ {{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
6
+ {%- else %}
7
+ {{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
8
+ {%- endif %}
9
+ {%- endfor %}
10
+ {%- endif %}
11
+ {%- endmacro %}
12
+
13
+ {%- if messages[0]["role"] == "system" %}
14
+ {%- set system_message = messages[0]["content"] %}
15
+ {%- set loop_messages = messages[1:] %}
16
+ {%- else %}
17
+ {%- set loop_messages = messages %}
18
+ {%- endif %}
19
+
20
+ {%- if not tools is defined %}
21
+ {%- set tools = [] %}
22
+ {%- endif %}
23
+
24
+ {%- if system_message is defined %}
25
+ {{- "<|im_start|>system\n" + system_message }}
26
+ {%- else %}
27
+ {%- if tools is iterable and tools | length > 0 %}
28
+ {{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
29
+ {%- endif %}
30
+ {%- endif %}
31
+ {%- if tools is iterable and tools | length > 0 %}
32
+ {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
33
+ {{- "<tools>" }}
34
+ {%- for tool in tools %}
35
+ {%- if tool.function is defined %}
36
+ {%- set tool = tool.function %}
37
+ {%- endif %}
38
+ {{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
39
+ {%- if tool.description is defined %}
40
+ {{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
41
+ {%- endif %}
42
+ {{- '\n<parameters>' }}
43
+ {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
44
+ {%- for param_name, param_fields in tool.parameters.properties|items %}
45
+ {{- '\n<parameter>' }}
46
+ {{- '\n<name>' ~ param_name ~ '</name>' }}
47
+ {%- if param_fields.type is defined %}
48
+ {{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
49
+ {%- endif %}
50
+ {%- if param_fields.description is defined %}
51
+ {{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
52
+ {%- endif %}
53
+ {%- set handled_keys = ['name', 'type', 'description'] %}
54
+ {{- render_extra_keys(param_fields, handled_keys) }}
55
+ {{- '\n</parameter>' }}
56
+ {%- endfor %}
57
+ {%- endif %}
58
+ {%- set handled_keys = ['type', 'properties'] %}
59
+ {{- render_extra_keys(tool.parameters, handled_keys) }}
60
+ {{- '\n</parameters>' }}
61
+ {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
62
+ {{- render_extra_keys(tool, handled_keys) }}
63
+ {{- '\n</function>' }}
64
+ {%- endfor %}
65
+ {{- "\n</tools>" }}
66
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
67
+ {%- endif %}
68
+ {%- if system_message is defined %}
69
+ {{- '<|im_end|>\n' }}
70
+ {%- else %}
71
+ {%- if tools is iterable and tools | length > 0 %}
72
+ {{- '<|im_end|>\n' }}
73
+ {%- endif %}
74
+ {%- endif %}
75
+ {%- for message in loop_messages %}
76
+ {%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
77
+ {{- '<|im_start|>' + message.role }}
78
+ {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
79
+ {{- '\n' + message.content | trim + '\n' }}
80
+ {%- endif %}
81
+ {%- for tool_call in message.tool_calls %}
82
+ {%- if tool_call.function is defined %}
83
+ {%- set tool_call = tool_call.function %}
84
+ {%- endif %}
85
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
86
+ {%- if tool_call.arguments is defined %}
87
+ {%- for args_name, args_value in tool_call.arguments|items %}
88
+ {{- '<parameter=' + args_name + '>\n' }}
89
+ {%- set args_value = args_value if args_value is string else args_value | tojson | safe %}
90
+ {{- args_value }}
91
+ {{- '\n</parameter>\n' }}
92
+ {%- endfor %}
93
+ {%- endif %}
94
+ {{- '</function>\n</tool_call>' }}
95
+ {%- endfor %}
96
+ {{- '<|im_end|>\n' }}
97
+ {%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
98
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
99
+ {%- elif message.role == "tool" %}
100
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
101
+ {{- '<|im_start|>user' }}
102
+ {%- endif %}
103
+ {{- '\n<tool_response>\n' }}
104
+ {{- message.content }}
105
+ {{- '\n</tool_response>' }}
106
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
107
+ {{- '<|im_end|>\n' }}
108
+ {%- elif loop.last %}
109
+ {{- '<|im_end|>\n' }}
110
+ {%- endif %}
111
+ {%- else %}
112
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
113
+ {%- endif %}
114
+ {%- endfor %}
115
+ {%- if add_generation_prompt %}
116
+ {{- '<|im_start|>assistant\n' }}
117
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,875 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3NextForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0,
7
+ "bos_token_id": 151643,
8
+ "decoder_sparse_step": 1,
9
+ "dtype": "bfloat16",
10
+ "eos_token_id": [
11
+ 151645,
12
+ 151643
13
+ ],
14
+ "full_attention_interval": 4,
15
+ "head_dim": 256,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 2048,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 5120,
20
+ "layer_types": [
21
+ "linear_attention",
22
+ "linear_attention",
23
+ "linear_attention",
24
+ "full_attention",
25
+ "linear_attention",
26
+ "linear_attention",
27
+ "linear_attention",
28
+ "full_attention",
29
+ "linear_attention",
30
+ "linear_attention",
31
+ "linear_attention",
32
+ "full_attention",
33
+ "linear_attention",
34
+ "linear_attention",
35
+ "linear_attention",
36
+ "full_attention",
37
+ "linear_attention",
38
+ "linear_attention",
39
+ "linear_attention",
40
+ "full_attention",
41
+ "linear_attention",
42
+ "linear_attention",
43
+ "linear_attention",
44
+ "full_attention",
45
+ "linear_attention",
46
+ "linear_attention",
47
+ "linear_attention",
48
+ "full_attention",
49
+ "linear_attention",
50
+ "linear_attention",
51
+ "linear_attention",
52
+ "full_attention",
53
+ "linear_attention",
54
+ "linear_attention",
55
+ "linear_attention",
56
+ "full_attention",
57
+ "linear_attention",
58
+ "linear_attention",
59
+ "linear_attention",
60
+ "full_attention",
61
+ "linear_attention",
62
+ "linear_attention",
63
+ "linear_attention",
64
+ "full_attention",
65
+ "linear_attention",
66
+ "linear_attention",
67
+ "linear_attention",
68
+ "full_attention"
69
+ ],
70
+ "linear_conv_kernel_dim": 4,
71
+ "linear_key_head_dim": 128,
72
+ "linear_num_key_heads": 16,
73
+ "linear_num_value_heads": 32,
74
+ "linear_value_head_dim": 128,
75
+ "max_position_embeddings": 262144,
76
+ "mlp_only_layers": [],
77
+ "model_type": "qwen3_next",
78
+ "moe_intermediate_size": 512,
79
+ "norm_topk_prob": true,
80
+ "num_attention_heads": 16,
81
+ "num_experts": 308,
82
+ "num_experts_per_tok": 10,
83
+ "num_hidden_layers": 48,
84
+ "num_key_value_heads": 2,
85
+ "output_router_logits": false,
86
+ "partial_rotary_factor": 0.25,
87
+ "quantization": {
88
+ "group_size": 16,
89
+ "bits": 4,
90
+ "mode": "nvfp4",
91
+ "model.layers.0.mlp.gate": {
92
+ "group_size": 64,
93
+ "bits": 8
94
+ },
95
+ "model.layers.0.mlp.shared_expert_gate": {
96
+ "group_size": 64,
97
+ "bits": 8
98
+ },
99
+ "model.layers.1.mlp.gate": {
100
+ "group_size": 64,
101
+ "bits": 8
102
+ },
103
+ "model.layers.1.mlp.shared_expert_gate": {
104
+ "group_size": 64,
105
+ "bits": 8
106
+ },
107
+ "model.layers.2.mlp.gate": {
108
+ "group_size": 64,
109
+ "bits": 8
110
+ },
111
+ "model.layers.2.mlp.shared_expert_gate": {
112
+ "group_size": 64,
113
+ "bits": 8
114
+ },
115
+ "model.layers.3.mlp.gate": {
116
+ "group_size": 64,
117
+ "bits": 8
118
+ },
119
+ "model.layers.3.mlp.shared_expert_gate": {
120
+ "group_size": 64,
121
+ "bits": 8
122
+ },
123
+ "model.layers.4.mlp.gate": {
124
+ "group_size": 64,
125
+ "bits": 8
126
+ },
127
+ "model.layers.4.mlp.shared_expert_gate": {
128
+ "group_size": 64,
129
+ "bits": 8
130
+ },
131
+ "model.layers.5.mlp.gate": {
132
+ "group_size": 64,
133
+ "bits": 8
134
+ },
135
+ "model.layers.5.mlp.shared_expert_gate": {
136
+ "group_size": 64,
137
+ "bits": 8
138
+ },
139
+ "model.layers.6.mlp.gate": {
140
+ "group_size": 64,
141
+ "bits": 8
142
+ },
143
+ "model.layers.6.mlp.shared_expert_gate": {
144
+ "group_size": 64,
145
+ "bits": 8
146
+ },
147
+ "model.layers.7.mlp.gate": {
148
+ "group_size": 64,
149
+ "bits": 8
150
+ },
151
+ "model.layers.7.mlp.shared_expert_gate": {
152
+ "group_size": 64,
153
+ "bits": 8
154
+ },
155
+ "model.layers.8.mlp.gate": {
156
+ "group_size": 64,
157
+ "bits": 8
158
+ },
159
+ "model.layers.8.mlp.shared_expert_gate": {
160
+ "group_size": 64,
161
+ "bits": 8
162
+ },
163
+ "model.layers.9.mlp.gate": {
164
+ "group_size": 64,
165
+ "bits": 8
166
+ },
167
+ "model.layers.9.mlp.shared_expert_gate": {
168
+ "group_size": 64,
169
+ "bits": 8
170
+ },
171
+ "model.layers.10.mlp.gate": {
172
+ "group_size": 64,
173
+ "bits": 8
174
+ },
175
+ "model.layers.10.mlp.shared_expert_gate": {
176
+ "group_size": 64,
177
+ "bits": 8
178
+ },
179
+ "model.layers.11.mlp.gate": {
180
+ "group_size": 64,
181
+ "bits": 8
182
+ },
183
+ "model.layers.11.mlp.shared_expert_gate": {
184
+ "group_size": 64,
185
+ "bits": 8
186
+ },
187
+ "model.layers.12.mlp.gate": {
188
+ "group_size": 64,
189
+ "bits": 8
190
+ },
191
+ "model.layers.12.mlp.shared_expert_gate": {
192
+ "group_size": 64,
193
+ "bits": 8
194
+ },
195
+ "model.layers.13.mlp.gate": {
196
+ "group_size": 64,
197
+ "bits": 8
198
+ },
199
+ "model.layers.13.mlp.shared_expert_gate": {
200
+ "group_size": 64,
201
+ "bits": 8
202
+ },
203
+ "model.layers.14.mlp.gate": {
204
+ "group_size": 64,
205
+ "bits": 8
206
+ },
207
+ "model.layers.14.mlp.shared_expert_gate": {
208
+ "group_size": 64,
209
+ "bits": 8
210
+ },
211
+ "model.layers.15.mlp.gate": {
212
+ "group_size": 64,
213
+ "bits": 8
214
+ },
215
+ "model.layers.15.mlp.shared_expert_gate": {
216
+ "group_size": 64,
217
+ "bits": 8
218
+ },
219
+ "model.layers.16.mlp.gate": {
220
+ "group_size": 64,
221
+ "bits": 8
222
+ },
223
+ "model.layers.16.mlp.shared_expert_gate": {
224
+ "group_size": 64,
225
+ "bits": 8
226
+ },
227
+ "model.layers.17.mlp.gate": {
228
+ "group_size": 64,
229
+ "bits": 8
230
+ },
231
+ "model.layers.17.mlp.shared_expert_gate": {
232
+ "group_size": 64,
233
+ "bits": 8
234
+ },
235
+ "model.layers.18.mlp.gate": {
236
+ "group_size": 64,
237
+ "bits": 8
238
+ },
239
+ "model.layers.18.mlp.shared_expert_gate": {
240
+ "group_size": 64,
241
+ "bits": 8
242
+ },
243
+ "model.layers.19.mlp.gate": {
244
+ "group_size": 64,
245
+ "bits": 8
246
+ },
247
+ "model.layers.19.mlp.shared_expert_gate": {
248
+ "group_size": 64,
249
+ "bits": 8
250
+ },
251
+ "model.layers.20.mlp.gate": {
252
+ "group_size": 64,
253
+ "bits": 8
254
+ },
255
+ "model.layers.20.mlp.shared_expert_gate": {
256
+ "group_size": 64,
257
+ "bits": 8
258
+ },
259
+ "model.layers.21.mlp.gate": {
260
+ "group_size": 64,
261
+ "bits": 8
262
+ },
263
+ "model.layers.21.mlp.shared_expert_gate": {
264
+ "group_size": 64,
265
+ "bits": 8
266
+ },
267
+ "model.layers.22.mlp.gate": {
268
+ "group_size": 64,
269
+ "bits": 8
270
+ },
271
+ "model.layers.22.mlp.shared_expert_gate": {
272
+ "group_size": 64,
273
+ "bits": 8
274
+ },
275
+ "model.layers.23.mlp.gate": {
276
+ "group_size": 64,
277
+ "bits": 8
278
+ },
279
+ "model.layers.23.mlp.shared_expert_gate": {
280
+ "group_size": 64,
281
+ "bits": 8
282
+ },
283
+ "model.layers.24.mlp.gate": {
284
+ "group_size": 64,
285
+ "bits": 8
286
+ },
287
+ "model.layers.24.mlp.shared_expert_gate": {
288
+ "group_size": 64,
289
+ "bits": 8
290
+ },
291
+ "model.layers.25.mlp.gate": {
292
+ "group_size": 64,
293
+ "bits": 8
294
+ },
295
+ "model.layers.25.mlp.shared_expert_gate": {
296
+ "group_size": 64,
297
+ "bits": 8
298
+ },
299
+ "model.layers.26.mlp.gate": {
300
+ "group_size": 64,
301
+ "bits": 8
302
+ },
303
+ "model.layers.26.mlp.shared_expert_gate": {
304
+ "group_size": 64,
305
+ "bits": 8
306
+ },
307
+ "model.layers.27.mlp.gate": {
308
+ "group_size": 64,
309
+ "bits": 8
310
+ },
311
+ "model.layers.27.mlp.shared_expert_gate": {
312
+ "group_size": 64,
313
+ "bits": 8
314
+ },
315
+ "model.layers.28.mlp.gate": {
316
+ "group_size": 64,
317
+ "bits": 8
318
+ },
319
+ "model.layers.28.mlp.shared_expert_gate": {
320
+ "group_size": 64,
321
+ "bits": 8
322
+ },
323
+ "model.layers.29.mlp.gate": {
324
+ "group_size": 64,
325
+ "bits": 8
326
+ },
327
+ "model.layers.29.mlp.shared_expert_gate": {
328
+ "group_size": 64,
329
+ "bits": 8
330
+ },
331
+ "model.layers.30.mlp.gate": {
332
+ "group_size": 64,
333
+ "bits": 8
334
+ },
335
+ "model.layers.30.mlp.shared_expert_gate": {
336
+ "group_size": 64,
337
+ "bits": 8
338
+ },
339
+ "model.layers.31.mlp.gate": {
340
+ "group_size": 64,
341
+ "bits": 8
342
+ },
343
+ "model.layers.31.mlp.shared_expert_gate": {
344
+ "group_size": 64,
345
+ "bits": 8
346
+ },
347
+ "model.layers.32.mlp.gate": {
348
+ "group_size": 64,
349
+ "bits": 8
350
+ },
351
+ "model.layers.32.mlp.shared_expert_gate": {
352
+ "group_size": 64,
353
+ "bits": 8
354
+ },
355
+ "model.layers.33.mlp.gate": {
356
+ "group_size": 64,
357
+ "bits": 8
358
+ },
359
+ "model.layers.33.mlp.shared_expert_gate": {
360
+ "group_size": 64,
361
+ "bits": 8
362
+ },
363
+ "model.layers.34.mlp.gate": {
364
+ "group_size": 64,
365
+ "bits": 8
366
+ },
367
+ "model.layers.34.mlp.shared_expert_gate": {
368
+ "group_size": 64,
369
+ "bits": 8
370
+ },
371
+ "model.layers.35.mlp.gate": {
372
+ "group_size": 64,
373
+ "bits": 8
374
+ },
375
+ "model.layers.35.mlp.shared_expert_gate": {
376
+ "group_size": 64,
377
+ "bits": 8
378
+ },
379
+ "model.layers.36.mlp.gate": {
380
+ "group_size": 64,
381
+ "bits": 8
382
+ },
383
+ "model.layers.36.mlp.shared_expert_gate": {
384
+ "group_size": 64,
385
+ "bits": 8
386
+ },
387
+ "model.layers.37.mlp.gate": {
388
+ "group_size": 64,
389
+ "bits": 8
390
+ },
391
+ "model.layers.37.mlp.shared_expert_gate": {
392
+ "group_size": 64,
393
+ "bits": 8
394
+ },
395
+ "model.layers.38.mlp.gate": {
396
+ "group_size": 64,
397
+ "bits": 8
398
+ },
399
+ "model.layers.38.mlp.shared_expert_gate": {
400
+ "group_size": 64,
401
+ "bits": 8
402
+ },
403
+ "model.layers.39.mlp.gate": {
404
+ "group_size": 64,
405
+ "bits": 8
406
+ },
407
+ "model.layers.39.mlp.shared_expert_gate": {
408
+ "group_size": 64,
409
+ "bits": 8
410
+ },
411
+ "model.layers.40.mlp.gate": {
412
+ "group_size": 64,
413
+ "bits": 8
414
+ },
415
+ "model.layers.40.mlp.shared_expert_gate": {
416
+ "group_size": 64,
417
+ "bits": 8
418
+ },
419
+ "model.layers.41.mlp.gate": {
420
+ "group_size": 64,
421
+ "bits": 8
422
+ },
423
+ "model.layers.41.mlp.shared_expert_gate": {
424
+ "group_size": 64,
425
+ "bits": 8
426
+ },
427
+ "model.layers.42.mlp.gate": {
428
+ "group_size": 64,
429
+ "bits": 8
430
+ },
431
+ "model.layers.42.mlp.shared_expert_gate": {
432
+ "group_size": 64,
433
+ "bits": 8
434
+ },
435
+ "model.layers.43.mlp.gate": {
436
+ "group_size": 64,
437
+ "bits": 8
438
+ },
439
+ "model.layers.43.mlp.shared_expert_gate": {
440
+ "group_size": 64,
441
+ "bits": 8
442
+ },
443
+ "model.layers.44.mlp.gate": {
444
+ "group_size": 64,
445
+ "bits": 8
446
+ },
447
+ "model.layers.44.mlp.shared_expert_gate": {
448
+ "group_size": 64,
449
+ "bits": 8
450
+ },
451
+ "model.layers.45.mlp.gate": {
452
+ "group_size": 64,
453
+ "bits": 8
454
+ },
455
+ "model.layers.45.mlp.shared_expert_gate": {
456
+ "group_size": 64,
457
+ "bits": 8
458
+ },
459
+ "model.layers.46.mlp.gate": {
460
+ "group_size": 64,
461
+ "bits": 8
462
+ },
463
+ "model.layers.46.mlp.shared_expert_gate": {
464
+ "group_size": 64,
465
+ "bits": 8
466
+ },
467
+ "model.layers.47.mlp.gate": {
468
+ "group_size": 64,
469
+ "bits": 8
470
+ },
471
+ "model.layers.47.mlp.shared_expert_gate": {
472
+ "group_size": 64,
473
+ "bits": 8
474
+ }
475
+ },
476
+ "quantization_config": {
477
+ "group_size": 16,
478
+ "bits": 4,
479
+ "mode": "nvfp4",
480
+ "model.layers.0.mlp.gate": {
481
+ "group_size": 64,
482
+ "bits": 8
483
+ },
484
+ "model.layers.0.mlp.shared_expert_gate": {
485
+ "group_size": 64,
486
+ "bits": 8
487
+ },
488
+ "model.layers.1.mlp.gate": {
489
+ "group_size": 64,
490
+ "bits": 8
491
+ },
492
+ "model.layers.1.mlp.shared_expert_gate": {
493
+ "group_size": 64,
494
+ "bits": 8
495
+ },
496
+ "model.layers.2.mlp.gate": {
497
+ "group_size": 64,
498
+ "bits": 8
499
+ },
500
+ "model.layers.2.mlp.shared_expert_gate": {
501
+ "group_size": 64,
502
+ "bits": 8
503
+ },
504
+ "model.layers.3.mlp.gate": {
505
+ "group_size": 64,
506
+ "bits": 8
507
+ },
508
+ "model.layers.3.mlp.shared_expert_gate": {
509
+ "group_size": 64,
510
+ "bits": 8
511
+ },
512
+ "model.layers.4.mlp.gate": {
513
+ "group_size": 64,
514
+ "bits": 8
515
+ },
516
+ "model.layers.4.mlp.shared_expert_gate": {
517
+ "group_size": 64,
518
+ "bits": 8
519
+ },
520
+ "model.layers.5.mlp.gate": {
521
+ "group_size": 64,
522
+ "bits": 8
523
+ },
524
+ "model.layers.5.mlp.shared_expert_gate": {
525
+ "group_size": 64,
526
+ "bits": 8
527
+ },
528
+ "model.layers.6.mlp.gate": {
529
+ "group_size": 64,
530
+ "bits": 8
531
+ },
532
+ "model.layers.6.mlp.shared_expert_gate": {
533
+ "group_size": 64,
534
+ "bits": 8
535
+ },
536
+ "model.layers.7.mlp.gate": {
537
+ "group_size": 64,
538
+ "bits": 8
539
+ },
540
+ "model.layers.7.mlp.shared_expert_gate": {
541
+ "group_size": 64,
542
+ "bits": 8
543
+ },
544
+ "model.layers.8.mlp.gate": {
545
+ "group_size": 64,
546
+ "bits": 8
547
+ },
548
+ "model.layers.8.mlp.shared_expert_gate": {
549
+ "group_size": 64,
550
+ "bits": 8
551
+ },
552
+ "model.layers.9.mlp.gate": {
553
+ "group_size": 64,
554
+ "bits": 8
555
+ },
556
+ "model.layers.9.mlp.shared_expert_gate": {
557
+ "group_size": 64,
558
+ "bits": 8
559
+ },
560
+ "model.layers.10.mlp.gate": {
561
+ "group_size": 64,
562
+ "bits": 8
563
+ },
564
+ "model.layers.10.mlp.shared_expert_gate": {
565
+ "group_size": 64,
566
+ "bits": 8
567
+ },
568
+ "model.layers.11.mlp.gate": {
569
+ "group_size": 64,
570
+ "bits": 8
571
+ },
572
+ "model.layers.11.mlp.shared_expert_gate": {
573
+ "group_size": 64,
574
+ "bits": 8
575
+ },
576
+ "model.layers.12.mlp.gate": {
577
+ "group_size": 64,
578
+ "bits": 8
579
+ },
580
+ "model.layers.12.mlp.shared_expert_gate": {
581
+ "group_size": 64,
582
+ "bits": 8
583
+ },
584
+ "model.layers.13.mlp.gate": {
585
+ "group_size": 64,
586
+ "bits": 8
587
+ },
588
+ "model.layers.13.mlp.shared_expert_gate": {
589
+ "group_size": 64,
590
+ "bits": 8
591
+ },
592
+ "model.layers.14.mlp.gate": {
593
+ "group_size": 64,
594
+ "bits": 8
595
+ },
596
+ "model.layers.14.mlp.shared_expert_gate": {
597
+ "group_size": 64,
598
+ "bits": 8
599
+ },
600
+ "model.layers.15.mlp.gate": {
601
+ "group_size": 64,
602
+ "bits": 8
603
+ },
604
+ "model.layers.15.mlp.shared_expert_gate": {
605
+ "group_size": 64,
606
+ "bits": 8
607
+ },
608
+ "model.layers.16.mlp.gate": {
609
+ "group_size": 64,
610
+ "bits": 8
611
+ },
612
+ "model.layers.16.mlp.shared_expert_gate": {
613
+ "group_size": 64,
614
+ "bits": 8
615
+ },
616
+ "model.layers.17.mlp.gate": {
617
+ "group_size": 64,
618
+ "bits": 8
619
+ },
620
+ "model.layers.17.mlp.shared_expert_gate": {
621
+ "group_size": 64,
622
+ "bits": 8
623
+ },
624
+ "model.layers.18.mlp.gate": {
625
+ "group_size": 64,
626
+ "bits": 8
627
+ },
628
+ "model.layers.18.mlp.shared_expert_gate": {
629
+ "group_size": 64,
630
+ "bits": 8
631
+ },
632
+ "model.layers.19.mlp.gate": {
633
+ "group_size": 64,
634
+ "bits": 8
635
+ },
636
+ "model.layers.19.mlp.shared_expert_gate": {
637
+ "group_size": 64,
638
+ "bits": 8
639
+ },
640
+ "model.layers.20.mlp.gate": {
641
+ "group_size": 64,
642
+ "bits": 8
643
+ },
644
+ "model.layers.20.mlp.shared_expert_gate": {
645
+ "group_size": 64,
646
+ "bits": 8
647
+ },
648
+ "model.layers.21.mlp.gate": {
649
+ "group_size": 64,
650
+ "bits": 8
651
+ },
652
+ "model.layers.21.mlp.shared_expert_gate": {
653
+ "group_size": 64,
654
+ "bits": 8
655
+ },
656
+ "model.layers.22.mlp.gate": {
657
+ "group_size": 64,
658
+ "bits": 8
659
+ },
660
+ "model.layers.22.mlp.shared_expert_gate": {
661
+ "group_size": 64,
662
+ "bits": 8
663
+ },
664
+ "model.layers.23.mlp.gate": {
665
+ "group_size": 64,
666
+ "bits": 8
667
+ },
668
+ "model.layers.23.mlp.shared_expert_gate": {
669
+ "group_size": 64,
670
+ "bits": 8
671
+ },
672
+ "model.layers.24.mlp.gate": {
673
+ "group_size": 64,
674
+ "bits": 8
675
+ },
676
+ "model.layers.24.mlp.shared_expert_gate": {
677
+ "group_size": 64,
678
+ "bits": 8
679
+ },
680
+ "model.layers.25.mlp.gate": {
681
+ "group_size": 64,
682
+ "bits": 8
683
+ },
684
+ "model.layers.25.mlp.shared_expert_gate": {
685
+ "group_size": 64,
686
+ "bits": 8
687
+ },
688
+ "model.layers.26.mlp.gate": {
689
+ "group_size": 64,
690
+ "bits": 8
691
+ },
692
+ "model.layers.26.mlp.shared_expert_gate": {
693
+ "group_size": 64,
694
+ "bits": 8
695
+ },
696
+ "model.layers.27.mlp.gate": {
697
+ "group_size": 64,
698
+ "bits": 8
699
+ },
700
+ "model.layers.27.mlp.shared_expert_gate": {
701
+ "group_size": 64,
702
+ "bits": 8
703
+ },
704
+ "model.layers.28.mlp.gate": {
705
+ "group_size": 64,
706
+ "bits": 8
707
+ },
708
+ "model.layers.28.mlp.shared_expert_gate": {
709
+ "group_size": 64,
710
+ "bits": 8
711
+ },
712
+ "model.layers.29.mlp.gate": {
713
+ "group_size": 64,
714
+ "bits": 8
715
+ },
716
+ "model.layers.29.mlp.shared_expert_gate": {
717
+ "group_size": 64,
718
+ "bits": 8
719
+ },
720
+ "model.layers.30.mlp.gate": {
721
+ "group_size": 64,
722
+ "bits": 8
723
+ },
724
+ "model.layers.30.mlp.shared_expert_gate": {
725
+ "group_size": 64,
726
+ "bits": 8
727
+ },
728
+ "model.layers.31.mlp.gate": {
729
+ "group_size": 64,
730
+ "bits": 8
731
+ },
732
+ "model.layers.31.mlp.shared_expert_gate": {
733
+ "group_size": 64,
734
+ "bits": 8
735
+ },
736
+ "model.layers.32.mlp.gate": {
737
+ "group_size": 64,
738
+ "bits": 8
739
+ },
740
+ "model.layers.32.mlp.shared_expert_gate": {
741
+ "group_size": 64,
742
+ "bits": 8
743
+ },
744
+ "model.layers.33.mlp.gate": {
745
+ "group_size": 64,
746
+ "bits": 8
747
+ },
748
+ "model.layers.33.mlp.shared_expert_gate": {
749
+ "group_size": 64,
750
+ "bits": 8
751
+ },
752
+ "model.layers.34.mlp.gate": {
753
+ "group_size": 64,
754
+ "bits": 8
755
+ },
756
+ "model.layers.34.mlp.shared_expert_gate": {
757
+ "group_size": 64,
758
+ "bits": 8
759
+ },
760
+ "model.layers.35.mlp.gate": {
761
+ "group_size": 64,
762
+ "bits": 8
763
+ },
764
+ "model.layers.35.mlp.shared_expert_gate": {
765
+ "group_size": 64,
766
+ "bits": 8
767
+ },
768
+ "model.layers.36.mlp.gate": {
769
+ "group_size": 64,
770
+ "bits": 8
771
+ },
772
+ "model.layers.36.mlp.shared_expert_gate": {
773
+ "group_size": 64,
774
+ "bits": 8
775
+ },
776
+ "model.layers.37.mlp.gate": {
777
+ "group_size": 64,
778
+ "bits": 8
779
+ },
780
+ "model.layers.37.mlp.shared_expert_gate": {
781
+ "group_size": 64,
782
+ "bits": 8
783
+ },
784
+ "model.layers.38.mlp.gate": {
785
+ "group_size": 64,
786
+ "bits": 8
787
+ },
788
+ "model.layers.38.mlp.shared_expert_gate": {
789
+ "group_size": 64,
790
+ "bits": 8
791
+ },
792
+ "model.layers.39.mlp.gate": {
793
+ "group_size": 64,
794
+ "bits": 8
795
+ },
796
+ "model.layers.39.mlp.shared_expert_gate": {
797
+ "group_size": 64,
798
+ "bits": 8
799
+ },
800
+ "model.layers.40.mlp.gate": {
801
+ "group_size": 64,
802
+ "bits": 8
803
+ },
804
+ "model.layers.40.mlp.shared_expert_gate": {
805
+ "group_size": 64,
806
+ "bits": 8
807
+ },
808
+ "model.layers.41.mlp.gate": {
809
+ "group_size": 64,
810
+ "bits": 8
811
+ },
812
+ "model.layers.41.mlp.shared_expert_gate": {
813
+ "group_size": 64,
814
+ "bits": 8
815
+ },
816
+ "model.layers.42.mlp.gate": {
817
+ "group_size": 64,
818
+ "bits": 8
819
+ },
820
+ "model.layers.42.mlp.shared_expert_gate": {
821
+ "group_size": 64,
822
+ "bits": 8
823
+ },
824
+ "model.layers.43.mlp.gate": {
825
+ "group_size": 64,
826
+ "bits": 8
827
+ },
828
+ "model.layers.43.mlp.shared_expert_gate": {
829
+ "group_size": 64,
830
+ "bits": 8
831
+ },
832
+ "model.layers.44.mlp.gate": {
833
+ "group_size": 64,
834
+ "bits": 8
835
+ },
836
+ "model.layers.44.mlp.shared_expert_gate": {
837
+ "group_size": 64,
838
+ "bits": 8
839
+ },
840
+ "model.layers.45.mlp.gate": {
841
+ "group_size": 64,
842
+ "bits": 8
843
+ },
844
+ "model.layers.45.mlp.shared_expert_gate": {
845
+ "group_size": 64,
846
+ "bits": 8
847
+ },
848
+ "model.layers.46.mlp.gate": {
849
+ "group_size": 64,
850
+ "bits": 8
851
+ },
852
+ "model.layers.46.mlp.shared_expert_gate": {
853
+ "group_size": 64,
854
+ "bits": 8
855
+ },
856
+ "model.layers.47.mlp.gate": {
857
+ "group_size": 64,
858
+ "bits": 8
859
+ },
860
+ "model.layers.47.mlp.shared_expert_gate": {
861
+ "group_size": 64,
862
+ "bits": 8
863
+ }
864
+ },
865
+ "rms_norm_eps": 1e-06,
866
+ "rope_scaling": null,
867
+ "rope_theta": 5000000,
868
+ "router_aux_loss_coef": 0.001,
869
+ "shared_expert_intermediate_size": 512,
870
+ "tie_word_embeddings": false,
871
+ "transformers_version": "4.57.6",
872
+ "use_cache": true,
873
+ "use_sliding_window": false,
874
+ "vocab_size": 151936
875
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "top_k": 40,
10
+ "top_p": 0.95,
11
+ "transformers_version": "4.57.6"
12
+ }
model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:542d5115ffc81ca6ca0986759752a39d67e986eaa8c76bfb6f3e28c13a6ee124
3
+ size 5285554207
model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bee421d094464611989ea73a47865f7d3f6cd24df5bdb8d66be82c353fcd00de
3
+ size 5272495969
model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c6535f74615eb2f3e3e6dd679539c59b9145f1c3addeeee0a848e34c6ac270
3
+ size 5268818414
model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d171d946dc359fc3dc3f02c9f31a9ade91787227c7495162a0203769902ca9c
3
+ size 5293962838
model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0da4012c7e10e8294e65a4e8bb3ffc73363c75744705be9d2c99aa6e95e3d6e6
3
+ size 5272496044
model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6a1674b3453a03a55ce482932cdbbf1d8162e1c4c27ff484d9bdd8e4b418b0f
3
+ size 1102926881
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
3
+ size 11422650
tokenizer_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": true,
24
+ "model_max_length": 1048576,
25
+ "model_specific_special_tokens": {},
26
+ "pad_token": "<|endoftext|>",
27
+ "split_special_tokens": false,
28
+ "tokenizer_class": "Qwen2Tokenizer",
29
+ "tool_parser_type": "json_tools",
30
+ "unk_token": null
31
+ }