Text Generation
Transformers
Safetensors
English
gemma4
image-text-to-text
gemma
google
Mixture of Experts
mixture-of-experts
sparse-moe
transformer
26b
4b-active
nvfp4
fp4
4-bit precision
quantized
modelopt
weight-quantization
uncensored
abliterated
unfiltered
refusal-removed
vision
multimodal
tool-calling
function-calling
reasoning
thinking
chat
instruct
agentic
coding
creative-writing
dgx-spark
blackwell
gb10
grace-blackwell
nvidia
gpu
vllm
openai-api
openai-compatible
fp8-kv-cache
flashinfer-cutlass
marlin-moe
native-fp4
prefix-caching
chunked-prefill
sliding-window-attention
english
production-ready
conversational
Add files using upload-large-folder tool
Browse files- config.json +56 -3
- hf_quant_config.json +0 -0
- model.safetensors +2 -2
- video_preprocessor_config.json +30 -0
config.json
CHANGED
|
@@ -145,11 +145,64 @@
|
|
| 145 |
},
|
| 146 |
"vision_soft_tokens_per_image": 280,
|
| 147 |
"quantization_config": {
|
| 148 |
-
"
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
"producer": {
|
| 151 |
"name": "modelopt",
|
| 152 |
-
"version": "0.
|
| 153 |
},
|
| 154 |
"quant_method": "modelopt"
|
| 155 |
}
|
|
|
|
| 145 |
},
|
| 146 |
"vision_soft_tokens_per_image": 280,
|
| 147 |
"quantization_config": {
|
| 148 |
+
"config_groups": {
|
| 149 |
+
"group_0": {
|
| 150 |
+
"input_activations": {
|
| 151 |
+
"dynamic": false,
|
| 152 |
+
"num_bits": 4,
|
| 153 |
+
"type": "float",
|
| 154 |
+
"group_size": 16
|
| 155 |
+
},
|
| 156 |
+
"weights": {
|
| 157 |
+
"dynamic": false,
|
| 158 |
+
"num_bits": 4,
|
| 159 |
+
"type": "float",
|
| 160 |
+
"group_size": 16
|
| 161 |
+
},
|
| 162 |
+
"targets": [
|
| 163 |
+
"Linear"
|
| 164 |
+
]
|
| 165 |
+
}
|
| 166 |
+
},
|
| 167 |
+
"ignore": [
|
| 168 |
+
"lm_head",
|
| 169 |
+
"model.embed_vision*",
|
| 170 |
+
"model.language_model.layers.0.router*",
|
| 171 |
+
"model.language_model.layers.1.router*",
|
| 172 |
+
"model.language_model.layers.10.router*",
|
| 173 |
+
"model.language_model.layers.11.router*",
|
| 174 |
+
"model.language_model.layers.12.router*",
|
| 175 |
+
"model.language_model.layers.13.router*",
|
| 176 |
+
"model.language_model.layers.14.router*",
|
| 177 |
+
"model.language_model.layers.15.router*",
|
| 178 |
+
"model.language_model.layers.16.router*",
|
| 179 |
+
"model.language_model.layers.17.router*",
|
| 180 |
+
"model.language_model.layers.18.router*",
|
| 181 |
+
"model.language_model.layers.19.router*",
|
| 182 |
+
"model.language_model.layers.2.router*",
|
| 183 |
+
"model.language_model.layers.20.router*",
|
| 184 |
+
"model.language_model.layers.21.router*",
|
| 185 |
+
"model.language_model.layers.22.router*",
|
| 186 |
+
"model.language_model.layers.23.router*",
|
| 187 |
+
"model.language_model.layers.24.router*",
|
| 188 |
+
"model.language_model.layers.25.router*",
|
| 189 |
+
"model.language_model.layers.26.router*",
|
| 190 |
+
"model.language_model.layers.27.router*",
|
| 191 |
+
"model.language_model.layers.28.router*",
|
| 192 |
+
"model.language_model.layers.29.router*",
|
| 193 |
+
"model.language_model.layers.3.router*",
|
| 194 |
+
"model.language_model.layers.4.router*",
|
| 195 |
+
"model.language_model.layers.5.router*",
|
| 196 |
+
"model.language_model.layers.6.router*",
|
| 197 |
+
"model.language_model.layers.7.router*",
|
| 198 |
+
"model.language_model.layers.8.router*",
|
| 199 |
+
"model.language_model.layers.9.router*",
|
| 200 |
+
"model.vision_tower*"
|
| 201 |
+
],
|
| 202 |
+
"quant_algo": "NVFP4",
|
| 203 |
"producer": {
|
| 204 |
"name": "modelopt",
|
| 205 |
+
"version": "0.43.0rc2.dev114+g952a62bf6"
|
| 206 |
},
|
| 207 |
"quant_method": "modelopt"
|
| 208 |
}
|
hf_quant_config.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b826e3ff3849901be01ed8eb18e2ccb321332c717d97c4cdfbfa87c8effff7b
|
| 3 |
+
size 16423438668
|
video_preprocessor_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": true,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"do_sample_frames": true,
|
| 7 |
+
"image_mean": [
|
| 8 |
+
0.0,
|
| 9 |
+
0.0,
|
| 10 |
+
0.0
|
| 11 |
+
],
|
| 12 |
+
"image_std": [
|
| 13 |
+
1.0,
|
| 14 |
+
1.0,
|
| 15 |
+
1.0
|
| 16 |
+
],
|
| 17 |
+
"num_frames": 32,
|
| 18 |
+
"patch_size": 16,
|
| 19 |
+
"max_soft_tokens": 280,
|
| 20 |
+
"pooling_kernel_size": 3,
|
| 21 |
+
"resample": 3,
|
| 22 |
+
"rescale_factor": 0.00392156862745098,
|
| 23 |
+
"size": {
|
| 24 |
+
"height": 224,
|
| 25 |
+
"width": 224
|
| 26 |
+
},
|
| 27 |
+
"default_to_square": true,
|
| 28 |
+
"processor_class": "Gemma4Processor",
|
| 29 |
+
"video_processor_type": "Gemma4VideoProcessor"
|
| 30 |
+
}
|