| from __future__ import annotations |
|
|
| from dataclasses import dataclass |
| from pathlib import Path |
|
|
|
|
| @dataclass(frozen=True) |
| class InstalledModel: |
| model_id: str |
| path: Path |
| already_present: bool |
|
|
|
|
| class ModelDependencyError(RuntimeError): |
| """Raised when optional ML dependencies are missing.""" |
|
|
|
|
| def has_local_model_files(path: Path) -> bool: |
| """Return true when a Hugging Face model snapshot already exists locally.""" |
| return (path / 'config.json').exists() and any( |
| (path / filename).exists() |
| for filename in ('pytorch_model.bin', 'model.safetensors', 'tf_model.h5') |
| ) |
|
|
|
|
| def ensure_huggingface_model( |
| *, |
| model_id: str, |
| target_dir: str | Path, |
| revision: str | None = None, |
| ) -> InstalledModel: |
| """Download a Hugging Face model snapshot if it is not already installed.""" |
| target_path = Path(target_dir) |
| if has_local_model_files(target_path): |
| return InstalledModel(model_id=model_id, path=target_path, already_present=True) |
|
|
| try: |
| from huggingface_hub import snapshot_download |
| except ImportError as exc: |
| raise ModelDependencyError( |
| 'huggingface_hub is required. Install optional ML dependencies from requirements-ml.txt.' |
| ) from exc |
|
|
| target_path.mkdir(parents=True, exist_ok=True) |
| snapshot_download( |
| repo_id=model_id, |
| revision=revision, |
| local_dir=str(target_path), |
| local_dir_use_symlinks=False, |
| allow_patterns=[ |
| '*.json', |
| '*.model', |
| '*.txt', |
| '*.safetensors', |
| 'pytorch_model*.bin', |
| 'spiece.model', |
| 'tokenizer*', |
| 'special_tokens_map.json', |
| 'sentencepiece.bpe.model', |
| ], |
| ) |
| return InstalledModel(model_id=model_id, path=target_path, already_present=False) |