IMATRIX=~/imatrices/ConicCat-Qwen3.5-27B-Writer-ddh0_v2-imatrix.gguf
SRC_GGUF=~/gguf/ConicCat-Qwen3.5-27B-Writer-bf16.gguf
DST_GGUF=~/gguf/ConicCat-Qwen3.5-27B-Writer-5.45bpw.gguf
time llama-quantize \
--imatrix $IMATRIX \
--token-embedding-type Q4_K \
--tensor-type ffn_up=Q4_K \
--tensor-type ffn_gate=Q4_K \
--tensor-type ffn_down=Q5_K \
--tensor-type attn_qkv=Q6_K \
--tensor-type attn_k=Q8_0 \
--tensor-type attn_v=Q8_0 \
--tensor-type attn_output=Q5_K \
--tensor-type attn_q=Q4_K \
--tensor-type ssm_alpha=f32 \
--tensor-type ssm_beta=f32 \
--tensor-type ssm_out=Q5_K \
--tensor-type attn_gate=Q8_0 \
--output-tensor-type Q8_0 \
$SRC_GGUF $DST_GGUF Q8_0 $(nproc)
- Downloads last month
- 67
Hardware compatibility
Log In to add your hardware
We're not able to determine the quantization variants.
Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐ Ask for provider support