|
|
|
|
|
|
|
|
|
|
|
name: ensemble-multi-gpu |
|
|
|
|
|
resources: |
|
|
use_spot: true |
|
|
accelerators: A100:8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
memory: 128+ |
|
|
disk_size: 500 |
|
|
|
|
|
setup: | |
|
|
set -e |
|
|
|
|
|
echo "π§ Setting up multi-GPU environment..." |
|
|
|
|
|
|
|
|
sudo apt-get update -qq |
|
|
pip install --quiet torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 |
|
|
pip install --quiet transformers datasets librosa soundfile accelerate |
|
|
pip install --quiet huggingface_hub pandas numpy tqdm scikit-learn |
|
|
|
|
|
|
|
|
if [ ! -d "ensemble-tts-annotation" ]; then |
|
|
git clone https://huggingface.co/marcosremar2/ensemble-tts-annotation |
|
|
fi |
|
|
|
|
|
cd ensemble-tts-annotation |
|
|
|
|
|
echo "β
Setup complete!" |
|
|
echo "GPUs available:" |
|
|
nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader |
|
|
|
|
|
run: | |
|
|
cd ensemble-tts-annotation |
|
|
|
|
|
|
|
|
GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) |
|
|
echo "π Multi-GPU Training with $GPU_COUNT GPUs" |
|
|
echo "================================================" |
|
|
|
|
|
|
|
|
echo "π Creating synthetic dataset (larger for multi-GPU)..." |
|
|
python scripts/data/create_synthetic_test_data.py \ |
|
|
--output data/raw/synthetic_large/ \ |
|
|
--samples 200 |
|
|
|
|
|
|
|
|
echo "π¦ Preparing dataset..." |
|
|
python scripts/data/download_ptbr_datasets.py \ |
|
|
--prepare-local data/raw/synthetic_large/ |
|
|
|
|
|
|
|
|
echo "π₯ Fine-tuning with $GPU_COUNT GPUs..." |
|
|
accelerate launch --multi_gpu --num_processes=$GPU_COUNT \ |
|
|
scripts/training/finetune_emotion2vec.py \ |
|
|
--dataset data/prepared/synthetic_large_prepared \ |
|
|
--epochs 20 \ |
|
|
--batch-size 64 \ |
|
|
--device cuda \ |
|
|
--augment \ |
|
|
--output models/emotion/emotion2vec_finetuned_multigpu/ |
|
|
|
|
|
echo "β
Fine-tuning complete!" |
|
|
|
|
|
|
|
|
echo "π Performance benchmark:" |
|
|
python scripts/test/test_quick.py --mode balanced |
|
|
|
|
|
echo "================================================" |
|
|
echo "π‘ Upload results with:" |
|
|
echo "sky storage upload models/emotion/emotion2vec_finetuned_multigpu/ s3://my-bucket/" |
|
|
|
|
|
num_nodes: 1 |
|
|
|