| # ARM64 + CUDA setup for NVIDIA DGX Spark | |
| set -e | |
| echo "Setting up ARM64 environment..." | |
| # 1. Install ARM64-compatible PyTorch with CUDA 12.1 | |
| echo "[1/4] Installing PyTorch..." | |
| pip install torch==2.3.0+cu121 torchvision==0.18.0+cu121 torchaudio==2.3.0 \ | |
| --index-url https://download.pytorch.org/whl/cu121 | |
| # 2. Build BitsAndBytes from source (no ARM64 wheels) | |
| echo "[2/4] Building BitsAndBytes..." | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential cmake libopenblas-dev | |
| pip install bitsandbytes==0.43.0 --no-binary bitsandbytes | |
| # 3. Install Transformers stack | |
| echo "[3/4] Installing dependencies..." | |
| pip install transformers==4.40.0 \ | |
| datasets==2.18.0 \ | |
| peft==0.10.0 \ | |
| accelerate==0.28.0 \ | |
| sentencepiece==0.2.0 \ | |
| scikit-learn==1.4.1 | |
| # 4. Configure vLLM for ARM64 | |
| echo "[4/4] Configuring vLLM..." | |
| export VLLM_USE_TRITON_FLASH_ATTN=0 | |
| export VLLM_ATTENTION_BACKEND=TORCH_SDPA | |
| echo 'export VLLM_USE_TRITON_FLASH_ATTN=0' >> ~/.bashrc | |
| echo 'export VLLM_ATTENTION_BACKEND=TORCH_SDPA' >> ~/.bashrc | |
| pip install vllm==0.4.0.post1 | |
| # Verify | |
| python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA: {torch.cuda.is_available()}')" | |
| python -c "import bitsandbytes; print(f'BitsAndBytes: {bitsandbytes.__version__}')" | |
| echo "" | |
| echo "✓ ARM64 setup complete!" | |