{ "att_dropout": 0.0, "att_heads": 16, "embed_dim": 1024, "granular_per_slot_gate": false, "head_dim": 128, "interlayer_att_dropout": 0.0, "interlayer_att_heads": 16, "interlayer_kv_heads": 8, "interlayer_qk_norm": true, "kv_heads": 8, "kv_input_dim": 1024, "num_groups": 2, "num_layers": 14, "residual_gate_init": 3.0, "residual_gate_type": "elementwise", "rope_base": 1000000, "seq_len": 8192, "stm_size": 4096, "use_flash_attention": false, "use_gqa": true, "use_granular_residual_gate": false, "use_interlayer_gqa": true, "use_linear_slot_status": false, "use_qk_norm": true, "variant": "grouped-self-interlayer" }