SDNQ
Collection
Models quantized with SDNQ • 31 items • Updated • 34
How to use Disty0/Ideogram-4-SDNQ-4bit-dynamic-hadamard with Diffusers:
pip install -U diffusers transformers accelerate
import torch
from diffusers import DiffusionPipeline
# switch to "mps" for apple devices
pipe = DiffusionPipeline.from_pretrained("Disty0/Ideogram-4-SDNQ-4bit-dynamic-hadamard", dtype=torch.bfloat16, device_map="cuda")
prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
image = pipe(prompt).images[0]import torch
from diffusers import DiffusionPipeline
# switch to "mps" for apple devices
pipe = DiffusionPipeline.from_pretrained("Disty0/Ideogram-4-SDNQ-4bit-dynamic-hadamard", dtype=torch.bfloat16, device_map="cuda")
prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
image = pipe(prompt).images[0]Dynamic 4 bit quantization of Ideogram-4 using SDNQ.
This model uses per layer fine grained quantization.
What dtype to use for a layer is selected dynamically by trial and error until the std normalized mse loss is lower than the selected threshold.
Minimum allowed dtype is set to uint4 and std normalized mse loss threshold is set to 1e-2.
This created a mixed precision model with uint4, int5 and float5_e3m1fn dtypes.
Hadamard rotations are enabled with Hadamard Group size 128.
Usage:
pip install sdnq>=0.1.9
Note: Minimum SDNQ version required for this model is v0.1.9! Otherwise you will get garbled output.
import os
import json
import requests
import torch
import diffusers
from sdnq import SDNQConfig # import sdnq to register it into diffusers and transformers
from sdnq.common import use_torch_compile as triton_is_available
from sdnq.loader import apply_sdnq_options_to_model
pipe = diffusers.Ideogram4Pipeline.from_pretrained("Disty0/Ideogram-4-SDNQ-4bit-dynamic-hadamard", torch_dtype=torch.bfloat16)
# Enable INT8 and FP8 MatMul for AMD, Intel ARC and Nvidia GPUs:
if triton_is_available and (torch.cuda.is_available() or torch.xpu.is_available()):
pipe.transformer = apply_sdnq_options_to_model(pipe.transformer, use_quantized_matmul=True)
pipe.unconditional_transformer = apply_sdnq_options_to_model(pipe.unconditional_transformer, use_quantized_matmul=True)
pipe.text_encoder = apply_sdnq_options_to_model(pipe.text_encoder, use_quantized_matmul=True)
# pipe.transformer = torch.compile(pipe.transformer) # optional for faster speeds
# pipe.unconditional_transformer = torch.compile(pipe.unconditional_transformer) # optional for faster speeds
pipe.enable_model_cpu_offload()
# Expand the prompt into a structured JSON caption with Ideogram's free hosted magic-prompt API.
# Get a key at https://developer.ideogram.ai/ (set IDEOGRAM_API_KEY).
resp = requests.post(
"https://api.ideogram.ai/v1/ideogram-v4/magic-prompt",
headers={"Api-Key": "your_ideogram_api_key"},
json={"text_prompt": "a ginger cat wearing a tiny wizard hat reading a spellbook", "aspect_ratio": "1x1"},
).json()
caption = json.dumps(resp["json_prompt"]) # or: token="hf_xxxxxxxxx", token is needed as the repo is gated
# Pass the caption straight to the pipeline (no prompt_upsampling — it's already upsampled).
image = pipe(
caption,
height=1024, # model supports up to 2048
width=1024, # model supports up to 2048
generator=torch.manual_seed(0),
).images[0]
image.save("ideogram4-sdnq-4bit-dynamic-hadamard.png")
Base model
ideogram-ai/ideogram-4-fp8