z-lab
/

Llama-3.1-8B-Instruct-PARO

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions

liang2kl commited on 13 days ago

Commit

f7abebb

·

verified ·

1 Parent(s): 9b3b9a9

Upload README.md with huggingface_hub

Files changed (1) hide show

README.md +4 -5

README.md CHANGED Viewed

@@ -2,9 +2,8 @@
 library_name: transformers
 license: llama3
 pipeline_tag: text-generation
-base_model: meta-llama/Llama-3.1-8B-Instruct
-tags:
-  - mlx
 ---
 # z-lab/Llama-3.1-8B-Instruct-PARO
@@ -32,8 +31,8 @@ z-lab/Llama-3.1-8B-Instruct-PARO is a 4-bit [meta-llama/Llama-3.1-8B-Instruct](h
 pip install "paroquant[vllm]"
 # NVIDIA GPU (CUDA 13.0)
-pip install "paroquant[vllm]" "vllm==0.17.1" \
-  --extra-index-url https://wheels.vllm.ai/0.17.1/cu130 \
   --extra-index-url https://download.pytorch.org/whl/cu130
 # Apple Silicon

 library_name: transformers
 license: llama3
 pipeline_tag: text-generation
+base_model:
+- meta-llama/Llama-3.1-8B-Instruct
 ---
 # z-lab/Llama-3.1-8B-Instruct-PARO
 pip install "paroquant[vllm]"
 # NVIDIA GPU (CUDA 13.0)
+pip install "paroquant[vllm]" "vllm==0.19.0" \
+  --extra-index-url https://wheels.vllm.ai/2a69949bdadf0e8942b7a1619b229cb475beef20/cu130 \
   --extra-index-url https://download.pytorch.org/whl/cu130
 # Apple Silicon