| | #!/bin/bash |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | export OMP_NUM_THREADS=64 |
| |
|
| | |
| | export NCCL_NVLS_ENABLE=1 |
| | export NCCL_IB_ADAPTIVE_ROUTING=1 |
| | export NCCL_IB_SL=1 |
| | export NCCL_IB_QPS_PER_CONNECTION=2 |
| | export NCCL_IB_SPLIT_DATA_ON_QPS=0 |
| | export NCCL_IB_HCA=mlx5_15,mlx5_10,mlx5_14,mlx5_13,mlx5_8,mlx5_7,mlx5_9,mlx5_4 |
| | export NCCL_SOCKET_IFNAME=bond0 |
| | export NCCL_ALGO=RING |
| | export UCX_TLS=rc |
| |
|
| | python ./peptide/rectify_train.py \ |
| | --train_dataset_path ./peptide/ectified_datasets/v3/train \ |
| | --val_dataset_path ./peptide/rectified_datasets/v3/validation \ |
| | --version 3 \ |
| | --model_dim 512 \ |
| | --n_heads 8 \ |
| | --n_layers 6 \ |
| | --vocab_size 24 \ |
| | --seq_len 100 \ |
| | --epochs 50 \ |
| | --learning_rate 1e-4 \ |
| | --weight_decay 2e-5 \ |
| | --label_smoothing 0.0 \ |
| | --checkpoint_dir ./peptide/ckpt \ |
| | --tc_batches 20 \ |
| | --tc_k_samples 50 \ |
| | --resume_from_checkpoint ./peptide/ckpt/PepReDi_v2.pt |
| |
|