NVFP4 Mixed Quantized model of FLUX.2-klein-4B
Generation speed
- Tested on
- RTX5090 (400W), ComfyUI with torch2.10.0+cu130
- 1216x1856, cfg 1.0, 8steps
| Quantization | it/s | Times (s) | Speed vs BF16 (%) |
|---|---|---|---|
| bf16 | 2.84 | 4.36 | 0% |
| fp8 | 3.98 | 3.35 | 40% |
| nvfp4mixed | 4.38 | 2.77 | 57% |
| nvfp4 | 5.37 | 2.45 | 84% |
Sample
How to reproduce
put single_blocks.10 to single_blocks.18 from NVFP4 to FP8 model.
import torch
import json
import os
from safetensors.torch import load_file, save_file
def parse_args():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("src1")
parser.add_argument("src2")
parser.add_argument("dst")
return parser.parse_args()
def main():
args = parse_args()
state_dict1 = load_file(args.src1)
new_state_dict = {}
quantization_layers = {}
block_names = ["single_blocks.1"]
exception_names = ["single_blocks.1.", "single_blocks.19"]
for key, tensor in state_dict1.items():
if any(b in key for b in block_names) and not any(e in key for e in exception_names):
continue
new_state_dict[key] = tensor
if key.endswith(".weight_scale"):
layer_name = key[:-13]
quantization_layers[layer_name] = {"format": "float8_e4m3fn"}
state_dict2 = load_file(args.src2)
for key, tensor in state_dict2.items():
if any(b in key for b in block_names) and not any(e in key for e in exception_names):
new_state_dict[key] = tensor
if key.endswith(".weight_scale"):
layer_name = key[:-13]
quantization_layers[layer_name] = {"format": "nvfp4"}
metadata = {
"_quantization_metadata": json.dumps({
"format_version": "1.0",
"layers": quantization_layers
})
}
save_file(new_state_dict, args.dst, metadata=metadata)
total_bytes = os.path.getsize(args.dst)
print(f"Output: {args.dst} ({round(total_bytes / (1024**3), 2)}GB)")
if __name__ == "__main__":
main()
- Downloads last month
- 62
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support
Model tree for Bedovyy/FLUX.2-klein-4b-nvfp4mixed
Merge model
this model
