stmasson
/

training-scripts

stmasson commited on Dec 10, 2025

Commit

202ab61

verified ·

1 Parent(s): e6a6333

Upload train_qwen3_codeforces.py with huggingface_hub

Files changed (1) hide show

train_qwen3_codeforces.py CHANGED Viewed

@@ -33,17 +33,24 @@ dataset = load_dataset(
 )
 print(f"Dataset loaded: {len(dataset)} examples")
-# Formatting function to convert messages to text using chat template
-def formatting_func(example):
     """Apply chat template to convert messages to text format."""
     messages = example["messages"]
-    # Apply the tokenizer's chat template
     text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=False
     )
-    return text
 # Create train/eval split
 print("Creating train/eval split...")
@@ -88,6 +95,9 @@ config = SFTConfig(
     report_to="trackio",
     project="codeforces-finetuning",
     run_name="qwen3-0.6b-codeforces-sft",
 )
 # LoRA configuration for efficient training
@@ -100,7 +110,7 @@ peft_config = LoraConfig(
     target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
 )
-# Initialize trainer with formatting function
 print("Initializing trainer with Qwen/Qwen3-0.6B...")
 trainer = SFTTrainer(
     model="Qwen/Qwen3-0.6B",
@@ -108,7 +118,6 @@ trainer = SFTTrainer(
     eval_dataset=eval_dataset,
     args=config,
     peft_config=peft_config,
-    formatting_func=formatting_func,
 )
 print("Starting training...")

 )
 print(f"Dataset loaded: {len(dataset)} examples")
+# Preprocess dataset to create 'text' column with chat template applied
+def preprocess_function(example):
     """Apply chat template to convert messages to text format."""
     messages = example["messages"]
     text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=False
     )
+    return {"text": text}
+print("Preprocessing dataset with chat template...")
+dataset = dataset.map(
+    preprocess_function,
+    remove_columns=dataset.column_names,
+    desc="Applying chat template"
+)
+print(f"Preprocessed dataset: {len(dataset)} examples")
 # Create train/eval split
 print("Creating train/eval split...")
     report_to="trackio",
     project="codeforces-finetuning",
     run_name="qwen3-0.6b-codeforces-sft",
+    # Dataset field
+    dataset_text_field="text",
 )
 # LoRA configuration for efficient training
     target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
 )
+# Initialize trainer
 print("Initializing trainer with Qwen/Qwen3-0.6B...")
 trainer = SFTTrainer(
     model="Qwen/Qwen3-0.6B",
     eval_dataset=eval_dataset,
     args=config,
     peft_config=peft_config,
 )
 print("Starting training...")