Snaseem2026 commited on
Commit
132a144
·
verified ·
1 Parent(s): aa71938

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +58 -0
config.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training Configuration for Code Comment Quality Classifier
2
+
3
+ model:
4
+ name: "distilbert-base-uncased"
5
+ num_labels: 4
6
+ max_length: 512
7
+ dropout: 0.1 # Dropout probability for regularization
8
+
9
+ training:
10
+ output_dir: "./results"
11
+ num_train_epochs: 3
12
+ per_device_train_batch_size: 16
13
+ per_device_eval_batch_size: 32
14
+ gradient_accumulation_steps: 1 # Effective batch size = per_device_batch_size * gradient_accumulation_steps * num_gpus
15
+ learning_rate: 0.00002
16
+ lr_scheduler_type: "cosine" # Options: linear, cosine, cosine_with_restarts, polynomial, constant, constant_with_warmup
17
+ weight_decay: 0.01
18
+ warmup_steps: 500
19
+ warmup_ratio: null # Alternative to warmup_steps (ratio of total training steps)
20
+ logging_steps: 100
21
+ eval_steps: 500
22
+ save_steps: 1000
23
+ save_total_limit: 3 # Maximum number of checkpoints to keep
24
+ evaluation_strategy: "steps"
25
+ save_strategy: "steps"
26
+ load_best_model_at_end: true
27
+ metric_for_best_model: "f1"
28
+ greater_is_better: true
29
+ early_stopping_patience: 3 # Number of evaluations without improvement before stopping
30
+ early_stopping_threshold: 0.001 # Minimum improvement to reset patience counter
31
+ seed: 42
32
+ fp16: false # Mixed precision training (set to true if using GPU with Tensor Cores)
33
+ dataloader_num_workers: 4 # Number of workers for data loading
34
+ dataloader_pin_memory: true # Pin memory for faster GPU transfer
35
+ remove_unused_columns: true
36
+ report_to: ["none"] # Options: "wandb", "tensorboard", "none", or list
37
+
38
+ # Class weights for handling imbalanced data (null = equal weights)
39
+ class_weights: null # Example: [1.0, 1.0, 1.2, 1.0] if unclear class needs more weight
40
+
41
+ data:
42
+ train_size: 0.8
43
+ val_size: 0.1
44
+ test_size: 0.1
45
+ data_path: "./data/comments.csv"
46
+ shuffle: true
47
+ stratify: true # Maintain class distribution in splits
48
+
49
+ labels:
50
+ - "excellent"
51
+ - "helpful"
52
+ - "unclear"
53
+ - "outdated"
54
+
55
+ # Logging configuration
56
+ logging:
57
+ level: "INFO" # DEBUG, INFO, WARNING, ERROR
58
+ log_file: "./results/training.log"