#!/usr/bin/env bash #SBATCH --job-name=lfm2vl # optional #SBATCH --nodes=1 #SBATCH --gres=gpu:8 # <-- change to your GPU count #SBATCH --ntasks-per-node=8 #SBATCH --time=08:00:00 export CUDA_DEVICE_MAX_CONNECTIONS=1 # NCCL optimal export TOKENIZERS_PARALLELISM=false NUM_GPU=8 # must match --gres above MASTER_PORT=29500 MLFLOW_TRACKING_URI="./mlruns/finetune_lfm" MLFLOW_OFFLINE_MODE=true MLFLOW_EXPERIMENT_NAME="lfm_further_hf" torchrun \ --nproc_per_node=$NUM_GPU \ --master_port=$MASTER_PORT \ ./finetune_custom_trainer.py