Update train_lora_mistral.py
Browse files- train_lora_mistral.py +7 -1
train_lora_mistral.py
CHANGED
|
@@ -36,6 +36,7 @@ def run_health_server():
|
|
| 36 |
threading.Thread(target=run_health_server, daemon=True).start()
|
| 37 |
|
| 38 |
# === Log
|
|
|
|
| 39 |
def log(message):
|
| 40 |
timestamp = datetime.now().strftime("%H:%M:%S")
|
| 41 |
print(f"[{timestamp}] {message}")
|
|
@@ -43,7 +44,7 @@ def log(message):
|
|
| 43 |
|
| 44 |
# === Eğitim Başlıyor
|
| 45 |
log("🛠️ Ortam hazırlanıyor...")
|
| 46 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 47 |
if tokenizer.pad_token is None:
|
| 48 |
tokenizer.pad_token = tokenizer.eos_token
|
| 49 |
|
|
@@ -95,6 +96,11 @@ for file in selected_files:
|
|
| 95 |
log(f"🔍 {len(dataset)} örnek")
|
| 96 |
if len(dataset) == 0:
|
| 97 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
trainer = Trainer(model=model, args=training_args, train_dataset=dataset)
|
| 99 |
log("🚀 Eğitim başlıyor...")
|
| 100 |
trainer.train()
|
|
|
|
| 36 |
threading.Thread(target=run_health_server, daemon=True).start()
|
| 37 |
|
| 38 |
# === Log
|
| 39 |
+
|
| 40 |
def log(message):
|
| 41 |
timestamp = datetime.now().strftime("%H:%M:%S")
|
| 42 |
print(f"[{timestamp}] {message}")
|
|
|
|
| 44 |
|
| 45 |
# === Eğitim Başlıyor
|
| 46 |
log("🛠️ Ortam hazırlanıyor...")
|
| 47 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
|
| 48 |
if tokenizer.pad_token is None:
|
| 49 |
tokenizer.pad_token = tokenizer.eos_token
|
| 50 |
|
|
|
|
| 96 |
log(f"🔍 {len(dataset)} örnek")
|
| 97 |
if len(dataset) == 0:
|
| 98 |
continue
|
| 99 |
+
|
| 100 |
+
# Eğitim öncesi örnek prompt kontrolü
|
| 101 |
+
first_row = dataset[0]
|
| 102 |
+
log(f"📌 Örnek prompt: {tokenizer.decode(first_row['input_ids'], skip_special_tokens=True)}")
|
| 103 |
+
|
| 104 |
trainer = Trainer(model=model, args=training_args, train_dataset=dataset)
|
| 105 |
log("🚀 Eğitim başlıyor...")
|
| 106 |
trainer.train()
|