cnxup
/

LLaVA-NeXT-8B-MLA-stage2-rope32-d_kv_128

Model card Files Files and versions

cnxup commited on Jan 31

Commit

fa8b46b

·

verified ·

1 Parent(s): 7dfebfb

Upload config.json

Files changed (1) hide show

config.json +105 -0

config.json ADDED Viewed

	@@ -0,0 +1,105 @@

+{
+  "architectures": [
+    "LlavaNextForConditionalGeneration"
+  ],
+  "hidden_size": 4096,
+  "ignore_index": -100,
+  "image_grid_pinpoints": [
+    [
+      336,
+      672
+    ],
+    [
+      672,
+      336
+    ],
+    [
+      672,
+      672
+    ],
+    [
+      1008,
+      336
+    ],
+    [
+      336,
+      1008
+    ]
+  ],
+  "image_seq_length": 576,
+  "image_token_index": 128256,
+  "keys_to_ignore_at_inference": [
+    "past_key_values"
+  ],
+  "mha2mla": {
+    "is_baseline": false,
+    "is_gqa2mha2mla": false,
+    "low_rank": 128,
+    "multimodal_rope_section_for_mla": null,
+    "partial_rope_version": "mkl",
+    "peft_train": "v2",
+    "qk_tensor_path": "mkl/llava_next/llava_next-ranks.pth",
+    "rope_dim_for_mla": 32,
+    "stage1_path": "cnxup/LLaVA-NeXT-8B-MLA-stage1-rope32",
+    "svd_init_method": "joint",
+    "svd_init_weight_path": "LLaVA-NeXT-8B-rope32-d_kv_128.pt",
+    "svd_split_modal": true,
+    "uniform_start_point": 0
+  },
+  "model_type": "llava_next",
+  "multimodal_projector_bias": true,
+  "pad_token_id": 128257,
+  "projector_hidden_act": "gelu",
+  "text_config": {
+    "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "architectures": [
+      "LlamaForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "bos_token_id": 128000,
+    "eos_token_id": 128009,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "initializer_range": 0.02,
+    "intermediate_size": 14336,
+    "max_position_embeddings": 8192,
+    "mlp_bias": false,
+    "model_type": "llama",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 32,
+    "num_key_value_heads": 8,
+    "pretraining_tp": 1,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "rope_theta": 500000.0,
+    "torch_dtype": "bfloat16",
+    "use_cache": true,
+    "vocab_size": 128320
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_image_newline_parameter": true,
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
+    "image_size": 336,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "layer_norm_eps": 1e-05,
+    "model_type": "clip_vision_model",
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 24,
+    "patch_size": 14,
+    "projection_dim": 768,
+    "torch_dtype": "bfloat16",
+    "vocab_size": 32000
+  },
+  "vision_feature_layer": -2,
+  "vision_feature_select_strategy": "default"
+}