| default_stage: | |
| default_modifiers: | |
| QuantizationModifier: | |
| targets: [Linear] | |
| ignore: ['re:.*lm_head', 're:.*embed_tokens', 're:.*layers[.]0[.].*', 're:.*input_layernorm$', | |
| 're:.*norm.*', 're:.*shared_experts.*', 're:.*block_sparse_moe[.]gate$', 're:.*router.*', | |
| 're:.*post_attention_layernorm$', 're:.*self_attn.*'] | |
| scheme: FP8_BLOCK | |