AdamF92's picture
In progress training - batch: 88
7047281 verified
raw
history blame contribute delete
668 Bytes
{
"att_dropout": 0.0,
"att_heads": 16,
"embed_dim": 1024,
"granular_per_slot_gate": false,
"head_dim": 128,
"interlayer_att_dropout": 0.0,
"interlayer_att_heads": 16,
"interlayer_kv_heads": 8,
"interlayer_qk_norm": true,
"kv_heads": 8,
"kv_input_dim": 1024,
"num_groups": 2,
"num_layers": 14,
"residual_gate_init": 3.0,
"residual_gate_type": "elementwise",
"rope_base": 1000000,
"seq_len": 8192,
"stm_size": 4096,
"use_flash_attention": false,
"use_gqa": true,
"use_granular_residual_gate": false,
"use_interlayer_gqa": true,
"use_linear_slot_status": false,
"use_qk_norm": true,
"variant": "grouped-self-interlayer"
}