MuXodious commited on
Commit
1a949dd
·
verified ·
1 Parent(s): 2c03fdf
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
config.json CHANGED
@@ -3,7 +3,6 @@
3
  "Gemma3ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 255999,
6
- "dtype": "bfloat16",
7
  "eoi_token_index": 256000,
8
  "eos_token_id": [
9
  1,
@@ -14,109 +13,28 @@
14
  "mm_tokens_per_image": 256,
15
  "model_type": "gemma3",
16
  "text_config": {
17
- "_sliding_window_pattern": 6,
18
- "attention_bias": false,
19
- "attention_dropout": 0.0,
20
- "attn_logit_softcapping": null,
21
- "final_logit_softcapping": null,
22
  "head_dim": 128,
23
- "hidden_activation": "gelu_pytorch_tanh",
24
  "hidden_size": 5376,
25
- "initializer_range": 0.02,
26
  "intermediate_size": 21504,
27
- "layer_types": [
28
- "sliding_attention",
29
- "sliding_attention",
30
- "sliding_attention",
31
- "sliding_attention",
32
- "sliding_attention",
33
- "full_attention",
34
- "sliding_attention",
35
- "sliding_attention",
36
- "sliding_attention",
37
- "sliding_attention",
38
- "sliding_attention",
39
- "full_attention",
40
- "sliding_attention",
41
- "sliding_attention",
42
- "sliding_attention",
43
- "sliding_attention",
44
- "sliding_attention",
45
- "full_attention",
46
- "sliding_attention",
47
- "sliding_attention",
48
- "sliding_attention",
49
- "sliding_attention",
50
- "sliding_attention",
51
- "full_attention",
52
- "sliding_attention",
53
- "sliding_attention",
54
- "sliding_attention",
55
- "sliding_attention",
56
- "sliding_attention",
57
- "full_attention",
58
- "sliding_attention",
59
- "sliding_attention",
60
- "sliding_attention",
61
- "sliding_attention",
62
- "sliding_attention",
63
- "full_attention",
64
- "sliding_attention",
65
- "sliding_attention",
66
- "sliding_attention",
67
- "sliding_attention",
68
- "sliding_attention",
69
- "full_attention",
70
- "sliding_attention",
71
- "sliding_attention",
72
- "sliding_attention",
73
- "sliding_attention",
74
- "sliding_attention",
75
- "full_attention",
76
- "sliding_attention",
77
- "sliding_attention",
78
- "sliding_attention",
79
- "sliding_attention",
80
- "sliding_attention",
81
- "full_attention",
82
- "sliding_attention",
83
- "sliding_attention",
84
- "sliding_attention",
85
- "sliding_attention",
86
- "sliding_attention",
87
- "full_attention",
88
- "sliding_attention",
89
- "sliding_attention"
90
- ],
91
- "max_position_embeddings": 131072,
92
  "model_type": "gemma3_text",
93
  "num_attention_heads": 32,
94
  "num_hidden_layers": 62,
95
  "num_key_value_heads": 16,
96
  "query_pre_attn_scalar": 168,
97
- "rms_norm_eps": 1e-06,
98
- "rope_local_base_freq": 10000.0,
99
  "rope_scaling": {
100
  "factor": 8.0,
101
  "rope_type": "linear"
102
  },
103
- "rope_theta": 1000000.0,
104
- "sliding_window": 1024,
105
- "use_bidirectional_attention": false,
106
- "use_cache": true,
107
- "vocab_size": 262208
108
  },
 
109
  "transformers_version": "4.57.6",
110
  "vision_config": {
111
- "attention_dropout": 0.0,
112
- "hidden_act": "gelu_pytorch_tanh",
113
  "hidden_size": 1152,
114
  "image_size": 896,
115
  "intermediate_size": 4304,
116
- "layer_norm_eps": 1e-06,
117
  "model_type": "siglip_vision_model",
118
  "num_attention_heads": 16,
119
- "num_channels": 3,
120
  "num_hidden_layers": 27,
121
  "patch_size": 14,
122
  "vision_use_head": false
 
3
  "Gemma3ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 255999,
 
6
  "eoi_token_index": 256000,
7
  "eos_token_id": [
8
  1,
 
13
  "mm_tokens_per_image": 256,
14
  "model_type": "gemma3",
15
  "text_config": {
 
 
 
 
 
16
  "head_dim": 128,
 
17
  "hidden_size": 5376,
 
18
  "intermediate_size": 21504,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "model_type": "gemma3_text",
20
  "num_attention_heads": 32,
21
  "num_hidden_layers": 62,
22
  "num_key_value_heads": 16,
23
  "query_pre_attn_scalar": 168,
 
 
24
  "rope_scaling": {
25
  "factor": 8.0,
26
  "rope_type": "linear"
27
  },
28
+ "sliding_window": 1024
 
 
 
 
29
  },
30
+ "torch_dtype": "bfloat16",
31
  "transformers_version": "4.57.6",
32
  "vision_config": {
 
 
33
  "hidden_size": 1152,
34
  "image_size": 896,
35
  "intermediate_size": 4304,
 
36
  "model_type": "siglip_vision_model",
37
  "num_attention_heads": 16,
 
38
  "num_hidden_layers": 27,
39
  "patch_size": 14,
40
  "vision_use_head": false
generation_config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "bos_token_id": 2,
 
3
  "do_sample": true,
4
  "eos_token_id": [
5
  1,
 
1
  {
2
  "bos_token_id": 2,
3
+ "cache_implementation": "hybrid",
4
  "do_sample": true,
5
  "eos_token_id": [
6
  1,
preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_pan_and_scan": null,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "Gemma3ImageProcessor",
13
+ "image_seq_length": 256,
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "pan_and_scan_max_num_crops": null,
20
+ "pan_and_scan_min_crop_size": null,
21
+ "pan_and_scan_min_ratio_to_activate": null,
22
+ "processor_class": "Gemma3Processor",
23
+ "resample": 2,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 896,
27
+ "width": 896
28
+ }
29
+ }
processor_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "image_seq_length": 256,
3
+ "processor_class": "Gemma3Processor"
4
+ }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d786405177734910d7a3db625c2826640964a0b4e5cdbbd70620ae3313a01bef
3
- size 33384722
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
tokenizer_config.json CHANGED
@@ -51325,6 +51325,7 @@
51325
  },
51326
  "boi_token": "<start_of_image>",
51327
  "bos_token": "<bos>",
 
51328
  "clean_up_tokenization_spaces": false,
51329
  "eoi_token": "<end_of_image>",
51330
  "eos_token": "<eos>",
 
51325
  },
51326
  "boi_token": "<start_of_image>",
51327
  "bos_token": "<bos>",
51328
+ "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
51329
  "clean_up_tokenization_spaces": false,
51330
  "eoi_token": "<end_of_image>",
51331
  "eos_token": "<eos>",