ZennyKenny commited on
Commit
6c3d604
·
verified ·
1 Parent(s): e2eeef7

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +10 -16
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "architectures": ["GptOssForCausalLM"],
3
  "model_type": "gpt_oss",
4
-
5
  "hidden_size": 2880,
6
  "intermediate_size": 2880,
7
  "num_hidden_layers": 24,
@@ -12,17 +12,17 @@
12
  "head_dim": 64,
13
  "hidden_act": "silu",
14
  "initializer_range": 0.02,
15
-
16
  "experts_per_token": 4,
17
  "num_experts_per_tok": 4,
18
  "num_local_experts": 32,
19
  "output_router_logits": false,
20
  "router_aux_loss_coef": 0.9,
21
-
22
  "max_position_embeddings": 131072,
23
  "initial_context_length": 4096,
24
  "sliding_window": 128,
25
-
26
  "rope_scaling": {
27
  "beta_fast": 32.0,
28
  "beta_slow": 1.0,
@@ -32,7 +32,7 @@
32
  "truncate": false
33
  },
34
  "rope_theta": 150000,
35
-
36
  "layer_types": [
37
  "sliding_attention", "full_attention",
38
  "sliding_attention", "full_attention",
@@ -47,19 +47,19 @@
47
  "sliding_attention", "full_attention",
48
  "sliding_attention", "full_attention"
49
  ],
50
-
51
  "vocab_size": 201088,
52
  "eos_token_id": 200002,
53
  "pad_token_id": 199999,
54
-
55
  "rms_norm_eps": 1e-05,
56
  "swiglu_limit": 7.0,
57
  "tie_word_embeddings": false,
58
  "use_cache": true,
59
-
60
  "torch_dtype": "float16",
61
  "transformers_version": "4.55.0",
62
-
63
  "quantization_config": {
64
  "_load_in_4bit": true,
65
  "_load_in_8bit": false,
@@ -74,11 +74,5 @@
74
  "llm_int8_threshold": 6.0,
75
  "llm_int8_skip_modules": ["router", "lm_head", "embed_tokens"],
76
  "quant_method": "bitsandbytes"
77
- },
78
-
79
- "auto_map": {
80
- "AutoConfig": "configuration_gpt_oss.GptOssConfig",
81
- "AutoModelForCausalLM": "modeling_gpt_oss.GptOssForCausalLM",
82
- "AutoTokenizer": "tokenization_gpt_oss.GptOssTokenizer"
83
  }
84
- }
 
1
  {
2
  "architectures": ["GptOssForCausalLM"],
3
  "model_type": "gpt_oss",
4
+
5
  "hidden_size": 2880,
6
  "intermediate_size": 2880,
7
  "num_hidden_layers": 24,
 
12
  "head_dim": 64,
13
  "hidden_act": "silu",
14
  "initializer_range": 0.02,
15
+
16
  "experts_per_token": 4,
17
  "num_experts_per_tok": 4,
18
  "num_local_experts": 32,
19
  "output_router_logits": false,
20
  "router_aux_loss_coef": 0.9,
21
+
22
  "max_position_embeddings": 131072,
23
  "initial_context_length": 4096,
24
  "sliding_window": 128,
25
+
26
  "rope_scaling": {
27
  "beta_fast": 32.0,
28
  "beta_slow": 1.0,
 
32
  "truncate": false
33
  },
34
  "rope_theta": 150000,
35
+
36
  "layer_types": [
37
  "sliding_attention", "full_attention",
38
  "sliding_attention", "full_attention",
 
47
  "sliding_attention", "full_attention",
48
  "sliding_attention", "full_attention"
49
  ],
50
+
51
  "vocab_size": 201088,
52
  "eos_token_id": 200002,
53
  "pad_token_id": 199999,
54
+
55
  "rms_norm_eps": 1e-05,
56
  "swiglu_limit": 7.0,
57
  "tie_word_embeddings": false,
58
  "use_cache": true,
59
+
60
  "torch_dtype": "float16",
61
  "transformers_version": "4.55.0",
62
+
63
  "quantization_config": {
64
  "_load_in_4bit": true,
65
  "_load_in_8bit": false,
 
74
  "llm_int8_threshold": 6.0,
75
  "llm_int8_skip_modules": ["router", "lm_head", "embed_tokens"],
76
  "quant_method": "bitsandbytes"
 
 
 
 
 
 
77
  }
78
+ }