# Training Configuration for Code Comment Quality Classifier model: name: "distilbert-base-uncased" num_labels: 4 max_length: 512 dropout: 0.1 # Dropout probability for regularization training: output_dir: "./results" num_train_epochs: 3 per_device_train_batch_size: 16 per_device_eval_batch_size: 32 gradient_accumulation_steps: 1 # Effective batch size = per_device_batch_size * gradient_accumulation_steps * num_gpus learning_rate: 0.00002 lr_scheduler_type: "cosine" # Options: linear, cosine, cosine_with_restarts, polynomial, constant, constant_with_warmup weight_decay: 0.01 warmup_steps: 500 warmup_ratio: null # Alternative to warmup_steps (ratio of total training steps) logging_steps: 100 eval_steps: 500 save_steps: 1000 save_total_limit: 3 # Maximum number of checkpoints to keep evaluation_strategy: "steps" save_strategy: "steps" load_best_model_at_end: true metric_for_best_model: "f1" greater_is_better: true early_stopping_patience: 3 # Number of evaluations without improvement before stopping early_stopping_threshold: 0.001 # Minimum improvement to reset patience counter seed: 42 fp16: false # Mixed precision training (set to true if using GPU with Tensor Cores) dataloader_num_workers: 4 # Number of workers for data loading dataloader_pin_memory: true # Pin memory for faster GPU transfer remove_unused_columns: true report_to: ["none"] # Options: "wandb", "tensorboard", "none", or list # Class weights for handling imbalanced data (null = equal weights) class_weights: null # Example: [1.0, 1.0, 1.2, 1.0] if unclear class needs more weight data: train_size: 0.8 val_size: 0.1 test_size: 0.1 data_path: "./data/comments.csv" shuffle: true stratify: true # Maintain class distribution in splits labels: - "excellent" - "helpful" - "unclear" - "outdated" # Logging configuration logging: level: "INFO" # DEBUG, INFO, WARNING, ERROR log_file: "./results/training.log"