stmasson commited on
Commit
202ab61
·
verified ·
1 Parent(s): e6a6333

Upload train_qwen3_codeforces.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_qwen3_codeforces.py +15 -6
train_qwen3_codeforces.py CHANGED
@@ -33,17 +33,24 @@ dataset = load_dataset(
33
  )
34
  print(f"Dataset loaded: {len(dataset)} examples")
35
 
36
- # Formatting function to convert messages to text using chat template
37
- def formatting_func(example):
38
  """Apply chat template to convert messages to text format."""
39
  messages = example["messages"]
40
- # Apply the tokenizer's chat template
41
  text = tokenizer.apply_chat_template(
42
  messages,
43
  tokenize=False,
44
  add_generation_prompt=False
45
  )
46
- return text
 
 
 
 
 
 
 
 
47
 
48
  # Create train/eval split
49
  print("Creating train/eval split...")
@@ -88,6 +95,9 @@ config = SFTConfig(
88
  report_to="trackio",
89
  project="codeforces-finetuning",
90
  run_name="qwen3-0.6b-codeforces-sft",
 
 
 
91
  )
92
 
93
  # LoRA configuration for efficient training
@@ -100,7 +110,7 @@ peft_config = LoraConfig(
100
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
101
  )
102
 
103
- # Initialize trainer with formatting function
104
  print("Initializing trainer with Qwen/Qwen3-0.6B...")
105
  trainer = SFTTrainer(
106
  model="Qwen/Qwen3-0.6B",
@@ -108,7 +118,6 @@ trainer = SFTTrainer(
108
  eval_dataset=eval_dataset,
109
  args=config,
110
  peft_config=peft_config,
111
- formatting_func=formatting_func,
112
  )
113
 
114
  print("Starting training...")
 
33
  )
34
  print(f"Dataset loaded: {len(dataset)} examples")
35
 
36
+ # Preprocess dataset to create 'text' column with chat template applied
37
+ def preprocess_function(example):
38
  """Apply chat template to convert messages to text format."""
39
  messages = example["messages"]
 
40
  text = tokenizer.apply_chat_template(
41
  messages,
42
  tokenize=False,
43
  add_generation_prompt=False
44
  )
45
+ return {"text": text}
46
+
47
+ print("Preprocessing dataset with chat template...")
48
+ dataset = dataset.map(
49
+ preprocess_function,
50
+ remove_columns=dataset.column_names,
51
+ desc="Applying chat template"
52
+ )
53
+ print(f"Preprocessed dataset: {len(dataset)} examples")
54
 
55
  # Create train/eval split
56
  print("Creating train/eval split...")
 
95
  report_to="trackio",
96
  project="codeforces-finetuning",
97
  run_name="qwen3-0.6b-codeforces-sft",
98
+
99
+ # Dataset field
100
+ dataset_text_field="text",
101
  )
102
 
103
  # LoRA configuration for efficient training
 
110
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
111
  )
112
 
113
+ # Initialize trainer
114
  print("Initializing trainer with Qwen/Qwen3-0.6B...")
115
  trainer = SFTTrainer(
116
  model="Qwen/Qwen3-0.6B",
 
118
  eval_dataset=eval_dataset,
119
  args=config,
120
  peft_config=peft_config,
 
121
  )
122
 
123
  print("Starting training...")