from __future__ import annotations from dataclasses import dataclass from pathlib import Path @dataclass(frozen=True) class InstalledModel: model_id: str path: Path already_present: bool class ModelDependencyError(RuntimeError): """Raised when optional ML dependencies are missing.""" def has_local_model_files(path: Path) -> bool: """Return true when a Hugging Face model snapshot already exists locally.""" return (path / 'config.json').exists() and any( (path / filename).exists() for filename in ('pytorch_model.bin', 'model.safetensors', 'tf_model.h5') ) def ensure_huggingface_model( *, model_id: str, target_dir: str | Path, revision: str | None = None, ) -> InstalledModel: """Download a Hugging Face model snapshot if it is not already installed.""" target_path = Path(target_dir) if has_local_model_files(target_path): return InstalledModel(model_id=model_id, path=target_path, already_present=True) try: from huggingface_hub import snapshot_download except ImportError as exc: raise ModelDependencyError( 'huggingface_hub is required. Install optional ML dependencies from requirements-ml.txt.' ) from exc target_path.mkdir(parents=True, exist_ok=True) snapshot_download( repo_id=model_id, revision=revision, local_dir=str(target_path), local_dir_use_symlinks=False, allow_patterns=[ '*.json', '*.model', '*.txt', '*.safetensors', 'pytorch_model*.bin', 'spiece.model', 'tokenizer*', 'special_tokens_map.json', 'sentencepiece.bpe.model', ], ) return InstalledModel(model_id=model_id, path=target_path, already_present=False)