| { | |
| "att_dropout": 0.0, | |
| "att_heads": 16, | |
| "embed_dim": 1024, | |
| "granular_per_slot_gate": false, | |
| "head_dim": 128, | |
| "interlayer_att_dropout": 0.0, | |
| "interlayer_att_heads": 16, | |
| "interlayer_kv_heads": 8, | |
| "interlayer_qk_norm": true, | |
| "kv_heads": 8, | |
| "kv_input_dim": 1024, | |
| "num_groups": 2, | |
| "num_layers": 14, | |
| "residual_gate_init": 3.0, | |
| "residual_gate_type": "elementwise", | |
| "rope_base": 1000000, | |
| "seq_len": 8192, | |
| "stm_size": 4096, | |
| "use_flash_attention": false, | |
| "use_gqa": true, | |
| "use_granular_residual_gate": false, | |
| "use_interlayer_gqa": true, | |
| "use_linear_slot_status": false, | |
| "use_qk_norm": true, | |
| "variant": "grouped-self-interlayer" | |
| } |