doublemathew commited on
Commit
e99d96f
·
verified ·
1 Parent(s): 066a3e0

Upload model trained with Unsloth

Browse files

Upload model trained with Unsloth 2x faster

Files changed (4) hide show
  1. README.md +1 -0
  2. config.json +153 -0
  3. generation_config.json +13 -0
  4. pytorch_model.bin +3 -0
README.md CHANGED
@@ -6,6 +6,7 @@ tags:
6
  - unsloth
7
  - qwen3
8
  - trl
 
9
  license: apache-2.0
10
  language:
11
  - en
 
6
  - unsloth
7
  - qwen3
8
  - trl
9
+ - sft
10
  license: apache-2.0
11
  language:
12
  - en
config.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "dtype": "bfloat16",
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 6144,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention"
43
+ ],
44
+ "max_position_embeddings": 40960,
45
+ "max_window_layers": 28,
46
+ "model_type": "qwen3",
47
+ "num_attention_heads": 16,
48
+ "num_hidden_layers": 28,
49
+ "num_key_value_heads": 8,
50
+ "pad_token_id": 151654,
51
+ "quantization_config": {
52
+ "include_input_output_embeddings": true,
53
+ "modules_to_not_convert": [],
54
+ "quant_method": "torchao",
55
+ "quant_type": {
56
+ "default": {
57
+ "_data": {
58
+ "module_fqn_to_config": {
59
+ "_default": {
60
+ "_data": {
61
+ "act_mapping_type": {
62
+ "_data": "ASYMMETRIC",
63
+ "_type": "MappingType"
64
+ },
65
+ "intx_choose_qparams_algorithm": {
66
+ "_data": "AFFINE",
67
+ "_type": "IntxChooseQParamsAlgorithm"
68
+ },
69
+ "intx_packing_format": {
70
+ "_data": "UNPACKED_TO_INT8",
71
+ "_type": "IntxPackingFormat"
72
+ },
73
+ "layout": {
74
+ "_data": {},
75
+ "_type": "QDQLayout",
76
+ "_version": 1
77
+ },
78
+ "weight_dtype": {
79
+ "_data": "int4",
80
+ "_type": "torch.dtype"
81
+ },
82
+ "weight_granularity": {
83
+ "_data": {
84
+ "group_size": 32
85
+ },
86
+ "_type": "PerGroup",
87
+ "_version": 1
88
+ },
89
+ "weight_mapping_type": {
90
+ "_data": "SYMMETRIC",
91
+ "_type": "MappingType"
92
+ },
93
+ "weight_scale_dtype": null
94
+ },
95
+ "_type": "Int8DynamicActivationIntxWeightConfig",
96
+ "_version": 2
97
+ },
98
+ "model.embed_tokens": {
99
+ "_data": {
100
+ "granularity": {
101
+ "_data": {
102
+ "axis": 0
103
+ },
104
+ "_type": "PerAxis",
105
+ "_version": 1
106
+ },
107
+ "intx_choose_qparams_algorithm": {
108
+ "_data": "AFFINE",
109
+ "_type": "IntxChooseQParamsAlgorithm"
110
+ },
111
+ "intx_packing_format": {
112
+ "_data": "UNPACKED_TO_INT8",
113
+ "_type": "IntxPackingFormat"
114
+ },
115
+ "layout": {
116
+ "_data": {},
117
+ "_type": "QDQLayout",
118
+ "_version": 1
119
+ },
120
+ "mapping_type": {
121
+ "_data": "SYMMETRIC",
122
+ "_type": "MappingType"
123
+ },
124
+ "scale_dtype": null,
125
+ "weight_dtype": {
126
+ "_data": "int8",
127
+ "_type": "torch.dtype"
128
+ }
129
+ },
130
+ "_type": "IntxWeightOnlyConfig",
131
+ "_version": 2
132
+ }
133
+ }
134
+ },
135
+ "_type": "ModuleFqnToConfig",
136
+ "_version": 1
137
+ }
138
+ },
139
+ "quant_type_kwargs": {},
140
+ "untie_embedding_weights": false
141
+ },
142
+ "rms_norm_eps": 1e-06,
143
+ "rope_scaling": null,
144
+ "rope_theta": 1000000,
145
+ "sliding_window": null,
146
+ "tie_word_embeddings": false,
147
+ "transformers_version": "4.57.3",
148
+ "unsloth_fixed": true,
149
+ "unsloth_version": "2025.12.4",
150
+ "use_cache": true,
151
+ "use_sliding_window": false,
152
+ "vocab_size": 151936
153
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151643
6
+ ],
7
+ "max_length": 40960,
8
+ "pad_token_id": 151654,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.57.3"
13
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60bf0c9ccf18d5311621b9aabb902866d828f82b6c03a8f3b1c7c73f9838c8a1
3
+ size 2193856495