stmasson commited on
Commit
816727c
·
verified ·
1 Parent(s): 7dbc984

Upload train_alizee_coder.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_alizee_coder.py +131 -0
train_alizee_coder.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = ["trl>=0.12.0", "peft>=0.7.0", "trackio", "datasets", "transformers>=4.45.0", "accelerate", "bitsandbytes", "torch"]
3
+ # ///
4
+
5
+ import os
6
+ import torch
7
+ from datasets import load_dataset
8
+ from peft import LoraConfig, TaskType
9
+ from trl import SFTTrainer, SFTConfig
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
11
+ import trackio
12
+
13
+ print("="*50)
14
+ print("Starting Alizee Coder Devstral Training")
15
+ print("="*50)
16
+
17
+ # Configuration
18
+ MODEL_NAME = "mistralai/Devstral-Small-2505"
19
+ OUTPUT_REPO = "stmasson/alizee-coder-devstral-1-small"
20
+ DATASET_SIZE = 10000
21
+
22
+ # Verify HF_TOKEN
23
+ if not os.environ.get("HF_TOKEN"):
24
+ raise ValueError("HF_TOKEN not set!")
25
+ print("HF_TOKEN verified")
26
+
27
+ print(f"Loading dataset nvidia/OpenCodeReasoning...")
28
+ try:
29
+ dataset = load_dataset("nvidia/OpenCodeReasoning", split="split_0")
30
+ dataset = dataset.shuffle(seed=42).select(range(min(DATASET_SIZE, len(dataset))))
31
+ print(f"Dataset loaded: {len(dataset)} examples")
32
+ except Exception as e:
33
+ print(f"Error loading dataset: {e}")
34
+ raise
35
+
36
+ # Split train/eval
37
+ dataset_split = dataset.train_test_split(test_size=0.05, seed=42)
38
+ train_dataset = dataset_split["train"]
39
+ eval_dataset = dataset_split["test"]
40
+ print(f"Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")
41
+
42
+ # Format for code reasoning
43
+ def format_example(example):
44
+ solution = example.get('solution', '') or ''
45
+ output = example.get('output', '') or ''
46
+ text = f"<s>[INST] Solve this programming problem with detailed reasoning:\n\n{example['input']}\n[/INST]\n\n**Reasoning:**\n{output}\n\n**Solution:**\n```python\n{solution}\n```</s>"
47
+ return {"text": text}
48
+
49
+ print("Formatting dataset...")
50
+ train_dataset = train_dataset.map(format_example, remove_columns=train_dataset.column_names)
51
+ eval_dataset = eval_dataset.map(format_example, remove_columns=eval_dataset.column_names)
52
+ print("Dataset formatted")
53
+
54
+ # Load tokenizer
55
+ print(f"Loading tokenizer...")
56
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
57
+ if tokenizer.pad_token is None:
58
+ tokenizer.pad_token = tokenizer.eos_token
59
+ print("Tokenizer loaded")
60
+
61
+ # 4-bit quantization
62
+ bnb_config = BitsAndBytesConfig(
63
+ load_in_4bit=True,
64
+ bnb_4bit_quant_type="nf4",
65
+ bnb_4bit_compute_dtype=torch.bfloat16,
66
+ bnb_4bit_use_double_quant=True,
67
+ )
68
+
69
+ print(f"Loading model {MODEL_NAME}...")
70
+ model = AutoModelForCausalLM.from_pretrained(
71
+ MODEL_NAME,
72
+ quantization_config=bnb_config,
73
+ device_map="auto",
74
+ trust_remote_code=True,
75
+ torch_dtype=torch.bfloat16,
76
+ )
77
+ print("Model loaded")
78
+
79
+ # LoRA configuration
80
+ lora_config = LoraConfig(
81
+ r=32,
82
+ lora_alpha=64,
83
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
84
+ lora_dropout=0.05,
85
+ bias="none",
86
+ task_type=TaskType.CAUSAL_LM,
87
+ )
88
+
89
+ # Training config
90
+ training_config = SFTConfig(
91
+ output_dir="./alizee-coder-devstral-1-small",
92
+ num_train_epochs=1,
93
+ per_device_train_batch_size=1,
94
+ per_device_eval_batch_size=1,
95
+ gradient_accumulation_steps=16,
96
+ gradient_checkpointing=True,
97
+ learning_rate=2e-4,
98
+ lr_scheduler_type="cosine",
99
+ warmup_ratio=0.1,
100
+ max_length=4096,
101
+ logging_steps=10,
102
+ save_strategy="steps",
103
+ save_steps=200,
104
+ eval_strategy="steps",
105
+ eval_steps=200,
106
+ bf16=True,
107
+ push_to_hub=True,
108
+ hub_model_id=OUTPUT_REPO,
109
+ hub_strategy="every_save",
110
+ report_to="trackio",
111
+ run_name="alizee-coder-devstral-1-small",
112
+ )
113
+
114
+ print("Initializing trainer...")
115
+ trainer = SFTTrainer(
116
+ model=model,
117
+ args=training_config,
118
+ train_dataset=train_dataset,
119
+ eval_dataset=eval_dataset,
120
+ peft_config=lora_config,
121
+ processing_class=tokenizer,
122
+ )
123
+
124
+ print("="*50)
125
+ print("STARTING TRAINING")
126
+ print("="*50)
127
+ trainer.train()
128
+
129
+ print("Pushing to Hub...")
130
+ trainer.push_to_hub()
131
+ print(f"Done! Model: https://huggingface.co/{OUTPUT_REPO}")