NVFP4 Mixed Quantized model of FLUX.2-klein-4B

Generation speed

  • Tested on
    • RTX5090 (400W), ComfyUI with torch2.10.0+cu130
    • 1216x1856, cfg 1.0, 8steps
Quantization it/s Times (s) Speed vs BF16 (%)
bf16 2.84 4.36 0%
fp8 3.98 3.35 40%
nvfp4mixed 4.38 2.77 57%
nvfp4 5.37 2.45 84%

Sample

Klein-4B-Sample

How to reproduce

put single_blocks.10 to single_blocks.18 from NVFP4 to FP8 model.

import torch
import json
import os
from safetensors.torch import load_file, save_file

def parse_args():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("src1")
    parser.add_argument("src2")
    parser.add_argument("dst")
    return parser.parse_args()

def main():
    args = parse_args()

    state_dict1 = load_file(args.src1)
    new_state_dict = {}
    quantization_layers = {}

    block_names = ["single_blocks.1"]
    exception_names = ["single_blocks.1.", "single_blocks.19"]
    for key, tensor in state_dict1.items():
        if any(b in key for b in block_names) and not any(e in key for e in exception_names):
            continue
        new_state_dict[key] = tensor
        if key.endswith(".weight_scale"):
            layer_name = key[:-13]
            quantization_layers[layer_name] = {"format": "float8_e4m3fn"}

    state_dict2 = load_file(args.src2)
    for key, tensor in state_dict2.items():
        if any(b in key for b in block_names) and not any(e in key for e in exception_names):
            new_state_dict[key] = tensor
            if key.endswith(".weight_scale"):
                layer_name = key[:-13]
                quantization_layers[layer_name] = {"format": "nvfp4"}

    metadata = {
        "_quantization_metadata": json.dumps({
            "format_version": "1.0",
            "layers": quantization_layers
        })
    }
    save_file(new_state_dict, args.dst, metadata=metadata)
    total_bytes = os.path.getsize(args.dst)
    print(f"Output: {args.dst} ({round(total_bytes / (1024**3), 2)}GB)")

if __name__ == "__main__":
    main()
Downloads last month
62
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Model tree for Bedovyy/FLUX.2-klein-4b-nvfp4mixed