ensemble-tts-annotation / scripts /cloud /skypilot_multi_gpu.yaml
marcosremar
πŸš€ SkyPilot Multi-Cloud GPU Support + Synthetic Data Generation
13e402e
# SkyPilot Multi-GPU Configuration for Fast Fine-tuning
# Uses 8x GPUs for parallel training and dataset annotation
name: ensemble-multi-gpu
resources:
use_spot: true
accelerators: A100:8 # 8x A100 GPUs
# Alternative cheaper options:
# accelerators: V100:8 # 8x V100
# accelerators: L4:8 # 8x L4 (cheaper)
memory: 128+ # 128GB+ RAM for multi-GPU
disk_size: 500 # 500GB for datasets
setup: |
set -e
echo "πŸ”§ Setting up multi-GPU environment..."
# Install dependencies
sudo apt-get update -qq
pip install --quiet torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
pip install --quiet transformers datasets librosa soundfile accelerate
pip install --quiet huggingface_hub pandas numpy tqdm scikit-learn
# Clone repo
if [ ! -d "ensemble-tts-annotation" ]; then
git clone https://huggingface.co/marcosremar2/ensemble-tts-annotation
fi
cd ensemble-tts-annotation
echo "βœ… Setup complete!"
echo "GPUs available:"
nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader
run: |
cd ensemble-tts-annotation
# Check GPU count
GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
echo "πŸš€ Multi-GPU Training with $GPU_COUNT GPUs"
echo "================================================"
# Create synthetic data
echo "πŸ“Š Creating synthetic dataset (larger for multi-GPU)..."
python scripts/data/create_synthetic_test_data.py \
--output data/raw/synthetic_large/ \
--samples 200
# Prepare dataset
echo "πŸ“¦ Preparing dataset..."
python scripts/data/download_ptbr_datasets.py \
--prepare-local data/raw/synthetic_large/
# Fine-tune with multi-GPU (using accelerate)
echo "πŸ”₯ Fine-tuning with $GPU_COUNT GPUs..."
accelerate launch --multi_gpu --num_processes=$GPU_COUNT \
scripts/training/finetune_emotion2vec.py \
--dataset data/prepared/synthetic_large_prepared \
--epochs 20 \
--batch-size 64 \
--device cuda \
--augment \
--output models/emotion/emotion2vec_finetuned_multigpu/
echo "βœ… Fine-tuning complete!"
# Benchmark
echo "πŸ“Š Performance benchmark:"
python scripts/test/test_quick.py --mode balanced
echo "================================================"
echo "πŸ’‘ Upload results with:"
echo "sky storage upload models/emotion/emotion2vec_finetuned_multigpu/ s3://my-bucket/"
num_nodes: 1