# Training Configuration for Code Comment Quality Classifier

model:
  name: "distilbert-base-uncased"
  num_labels: 4
  max_length: 512
  dropout: 0.1  # Dropout probability for regularization

training:
  output_dir: "./results"
  num_train_epochs: 3
  per_device_train_batch_size: 16
  per_device_eval_batch_size: 32
  gradient_accumulation_steps: 1  # Effective batch size = per_device_batch_size * gradient_accumulation_steps * num_gpus
  learning_rate: 0.00002
  lr_scheduler_type: "cosine"  # Options: linear, cosine, cosine_with_restarts, polynomial, constant, constant_with_warmup
  weight_decay: 0.01
  warmup_steps: 500
  warmup_ratio: null  # Alternative to warmup_steps (ratio of total training steps)
  logging_steps: 100
  eval_steps: 500
  save_steps: 1000
  save_total_limit: 3  # Maximum number of checkpoints to keep
  evaluation_strategy: "steps"
  save_strategy: "steps"
  load_best_model_at_end: true
  metric_for_best_model: "f1"
  greater_is_better: true
  early_stopping_patience: 3  # Number of evaluations without improvement before stopping
  early_stopping_threshold: 0.001  # Minimum improvement to reset patience counter
  seed: 42
  fp16: false  # Mixed precision training (set to true if using GPU with Tensor Cores)
  dataloader_num_workers: 4  # Number of workers for data loading
  dataloader_pin_memory: true  # Pin memory for faster GPU transfer
  remove_unused_columns: true
  report_to: ["none"]  # Options: "wandb", "tensorboard", "none", or list

# Class weights for handling imbalanced data (null = equal weights)
class_weights: null  # Example: [1.0, 1.0, 1.2, 1.0] if unclear class needs more weight

data:
  train_size: 0.8
  val_size: 0.1
  test_size: 0.1
  data_path: "./data/comments.csv"
  shuffle: true
  stratify: true  # Maintain class distribution in splits

labels:
  - "excellent"
  - "helpful"
  - "unclear"
  - "outdated"

# Logging configuration
logging:
  level: "INFO"  # DEBUG, INFO, WARNING, ERROR
  log_file: "./results/training.log"