Update factuality-detection/granite-4.0-micro/README.md
Browse files
factuality-detection/granite-4.0-micro/README.md
CHANGED
|
@@ -78,7 +78,14 @@ def make_prompt(query: str, response: str, contexts: list, tokenizer):
|
|
| 78 |
|
| 79 |
# Load the model
|
| 80 |
BASE_PATH = "ibm-granite/granite-4.0-micro"
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
sampling_params = SamplingParams(max_tokens=30, temperature=0.0, seed=42)
|
| 83 |
lora_request = LoRARequest("adapter1", 1, LORA_PATH)
|
| 84 |
model = LLM(model=BASE_PATH, tensor_parallel_size=1, gpu_memory_utilization=0.95, dtype="bfloat16", enable_lora=True, max_lora_rank=128)
|
|
|
|
| 78 |
|
| 79 |
# Load the model
|
| 80 |
BASE_PATH = "ibm-granite/granite-4.0-micro"
|
| 81 |
+
adapter_repo = "ibm-granite/granitelib-guardian-r1.0"
|
| 82 |
+
adapter_subfolder = "factuality-detection/granite-4.0-micro/lora"
|
| 83 |
+
|
| 84 |
+
# Download adapter to local cache and get path
|
| 85 |
+
local_repo = snapshot_download(adapter_repo, allow_patterns=f"{adapter_subfolder}/*")
|
| 86 |
+
adapter_path = f"{local_repo}/{adapter_subfolder}"
|
| 87 |
+
LORA_PATH = adapter_path
|
| 88 |
+
|
| 89 |
sampling_params = SamplingParams(max_tokens=30, temperature=0.0, seed=42)
|
| 90 |
lora_request = LoRARequest("adapter1", 1, LORA_PATH)
|
| 91 |
model = LLM(model=BASE_PATH, tensor_parallel_size=1, gpu_memory_utilization=0.95, dtype="bfloat16", enable_lora=True, max_lora_rank=128)
|