Spaces:

harvesthealth
/

secondme-api

Sleeping

secondme-api / lpm_kernel /L2 /train_for_user.sh

Gemini

feat: add detailed logging

01d5a5d 3 months ago

4.21 kB

	#!/bin/bash

	# Initialize variables
	LEARNING_RATE="2e-4"
	NUM_TRAIN_EPOCHS="3"
	CONCURRENCY_THREADS="2"
	DATA_SYNTHESIS_MODE="low"
	HALF=False
	USE_CUDA=False # Default to False, will be overridden by parameter
	IS_COT=False

	# Process parameters
	while [[ "$#" -gt 0 ]]; do
	case $1 in
	--lr) LEARNING_RATE="$2"; shift ;;
	--epochs) NUM_TRAIN_EPOCHS="$2"; shift ;;
	--threads) CONCURRENCY_THREADS="$2"; shift ;;
	--mode) DATA_SYNTHESIS_MODE="$2"; shift ;;
	--cuda)
	# Convert string to lowercase for consistent comparison
	cuda_value=$(echo "$2" \| tr '[:upper:]' '[:lower:]')
	if [[ "$cuda_value" == "true" \|\| "$cuda_value" == "1" \|\| "$cuda_value" == "yes" ]]; then
	USE_CUDA=True
	echo "CUDA enabled by user configuration."
	else
	USE_CUDA=False
	echo "CUDA disabled by user configuration."
	fi
	shift ;;
	--is_cot) IS_COT="$2"; shift ;;
	*) echo "Unknown parameter: $1"; exit 1 ;;
	esac
	shift
	done

	# Explicitly log the CUDA setting passed from the command line
	echo "CUDA parameter received: $USE_CUDA"

	# Verify CUDA availability if enabled
	if [[ "$USE_CUDA" == "True" ]]; then
	# Set CUDA environment variables to ensure PyTorch detects GPU
	export CUDA_VISIBLE_DEVICES=0
	echo "CUDA_VISIBLE_DEVICES set to 0"

	# Set CUDA_LAUNCH_BLOCKING to 0 for async operations (better performance)
	export CUDA_LAUNCH_BLOCKING=0
	echo "CUDA_LAUNCH_BLOCKING set to 0 for better performance"
	else
	# Explicitly disable CUDA
	export CUDA_VISIBLE_DEVICES=""
	echo "CUDA_VISIBLE_DEVICES explicitly disabled"
	fi

	# Log the parameters being used
	echo "Using training parameters:"
	echo " Learning rate: $LEARNING_RATE"
	echo " Number of epochs: $NUM_TRAIN_EPOCHS"
	echo " Concurrency threads: $CONCURRENCY_THREADS"
	echo " Data synthesis mode: $DATA_SYNTHESIS_MODE"
	echo " Use CUDA: $USE_CUDA"
	echo " Is chain of thought: $IS_COT"

	# If concurrency threads are set, configure related environment variables
	if [ "$CONCURRENCY_THREADS" != "1" ]; then
	# Limit the number of parallel threads to avoid memory issues
	export OMP_NUM_THREADS=$CONCURRENCY_THREADS
	export MKL_NUM_THREADS=$CONCURRENCY_THREADS
	export NUMEXPR_NUM_THREADS=$CONCURRENCY_THREADS
	# Add torch-specific threading controls
	export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
	echo "Set thread environment variables to $CONCURRENCY_THREADS"
	fi

	# Add BF16 option based on the platform and CUDA availability
	if [ "$PLATFORM" != "apple" ] && [ "$USE_CUDA" == "True" ]; then
	HALF=True
	echo "Enabling BF16 half precision for non-Apple platform with CUDA"
	else
	echo "Using standard precision (not using BF16)"
	fi

	# Print environment for debugging
	echo "Environment configuration:"
	echo " CUDA_VISIBLE_DEVICES: ${CUDA_VISIBLE_DEVICES}"
	echo " PYTORCH_CUDA_ALLOC_CONF: ${PYTORCH_CUDA_ALLOC_CONF}"
	echo " Using half precision: ${HALF}"

	# Execute training script with parameters from environment variables
	python lpm_kernel/L2/train.py \
	--seed 42 \
	--model_name_or_path "${MODEL_BASE_PATH}" \
	--user_name "${USER_NAME}" \
	--dataset_name "resources/L2/data/merged.json" \
	--chat_template_format "chatml" \
	--add_special_tokens False \
	--append_concat_token False \
	--max_seq_length 2048 \
	--num_train_epochs $NUM_TRAIN_EPOCHS \
	--save_total_limit 2 \
	--logging_steps 20 \
	--log_level "info" \
	--logging_strategy "steps" \
	--save_strategy "steps" \
	--save_steps 5 \
	--push_to_hub False \
	--bf16 $HALF \
	--packing False \
	--learning_rate $LEARNING_RATE \
	--lr_scheduler_type "cosine" \
	--weight_decay 1e-4 \
	--max_grad_norm 0.3 \
	--output_dir "${MODEL_PERSONAL_DIR}" \
	--per_device_train_batch_size 2 \
	--gradient_accumulation_steps $CONCURRENCY_THREADS \
	--gradient_checkpointing True \
	--use_reentrant False \
	--use_peft_lora True \
	--lora_r 8 \
	--lora_alpha 16 \
	--lora_dropout 0.1 \
	--lora_target_modules "all-linear" \
	--use_4bit_quantization False \
	--use_nested_quant False \
	--bnb_4bit_compute_dtype "bfloat16" \
	--is_cot $IS_COT \
	--use_cuda $USE_CUDA