Image-Text-to-Text
Transformers
Safetensors
English
gemma4
gemma
google
Mixture of Experts
mixture-of-experts
transformer
26b
nvfp4
fp4
4-bit precision
quantized
modelopt
weight-quantization
uncensored
abliterated
unfiltered
refusal-removed
vision
multimodal
text-generation
tool-calling
function-calling
reasoning
thinking
chat
instruct
agentic
coding
creative-writing
dgx-spark
blackwell
gb10
grace-blackwell
nvidia
gpu
vllm
openai-api
openai-compatible
fp8-kv-cache
prefix-caching
chunked-prefill
sliding-window-attention
english
production-ready
conversational
| { | |
| "audio_seq_length": 750, | |
| "image_processor": { | |
| "do_convert_rgb": true, | |
| "do_normalize": false, | |
| "do_rescale": true, | |
| "do_resize": true, | |
| "image_mean": [ | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "image_processor_type": "Gemma4ImageProcessor", | |
| "image_seq_length": 280, | |
| "image_std": [ | |
| 1.0, | |
| 1.0, | |
| 1.0 | |
| ], | |
| "max_soft_tokens": 280, | |
| "patch_size": 16, | |
| "pooling_kernel_size": 3, | |
| "resample": 3, | |
| "rescale_factor": 0.00392156862745098, | |
| "size": { | |
| "height": 224, | |
| "width": 224 | |
| } | |
| }, | |
| "image_seq_length": 280, | |
| "processor_class": "Gemma4Processor", | |
| "feature_extractor": { | |
| "feature_extractor_type": "Gemma4AudioFeatureExtractor", | |
| "sampling_rate": 16000, | |
| "num_mel_filters": 128, | |
| "fft_length": 512, | |
| "hop_length": 160, | |
| "chunk_duration": 8.0, | |
| "overlap_duration": 1.0 | |
| }, | |
| "audio_ms_per_token": 40 | |
| } |