# SkyPilot Multi-GPU Configuration for Fast Fine-tuning # Uses 8x GPUs for parallel training and dataset annotation name: ensemble-multi-gpu resources: use_spot: true accelerators: A100:8 # 8x A100 GPUs # Alternative cheaper options: # accelerators: V100:8 # 8x V100 # accelerators: L4:8 # 8x L4 (cheaper) memory: 128+ # 128GB+ RAM for multi-GPU disk_size: 500 # 500GB for datasets setup: | set -e echo "🔧 Setting up multi-GPU environment..." # Install dependencies sudo apt-get update -qq pip install --quiet torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 pip install --quiet transformers datasets librosa soundfile accelerate pip install --quiet huggingface_hub pandas numpy tqdm scikit-learn # Clone repo if [ ! -d "ensemble-tts-annotation" ]; then git clone https://huggingface.co/marcosremar2/ensemble-tts-annotation fi cd ensemble-tts-annotation echo "✅ Setup complete!" echo "GPUs available:" nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader run: | cd ensemble-tts-annotation # Check GPU count GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) echo "🚀 Multi-GPU Training with $GPU_COUNT GPUs" echo "================================================" # Create synthetic data echo "📊 Creating synthetic dataset (larger for multi-GPU)..." python scripts/data/create_synthetic_test_data.py \ --output data/raw/synthetic_large/ \ --samples 200 # Prepare dataset echo "📦 Preparing dataset..." python scripts/data/download_ptbr_datasets.py \ --prepare-local data/raw/synthetic_large/ # Fine-tune with multi-GPU (using accelerate) echo "🔥 Fine-tuning with $GPU_COUNT GPUs..." accelerate launch --multi_gpu --num_processes=$GPU_COUNT \ scripts/training/finetune_emotion2vec.py \ --dataset data/prepared/synthetic_large_prepared \ --epochs 20 \ --batch-size 64 \ --device cuda \ --augment \ --output models/emotion/emotion2vec_finetuned_multigpu/ echo "✅ Fine-tuning complete!" # Benchmark echo "📊 Performance benchmark:" python scripts/test/test_quick.py --mode balanced echo "================================================" echo "💡 Upload results with:" echo "sky storage upload models/emotion/emotion2vec_finetuned_multigpu/ s3://my-bucket/" num_nodes: 1