stmasson
/

training-scripts

stmasson commited on 30 days ago

Commit

f5c4f4a

verified ·

1 Parent(s): e388b5a

Upload scripts/train_n8n_sft.py with huggingface_hub

Files changed (1) hide show

scripts/train_n8n_sft.py CHANGED Viewed

@@ -68,8 +68,8 @@ LORA_R = int(os.environ.get("LORA_R", "64"))
 LORA_ALPHA = int(os.environ.get("LORA_ALPHA", "128"))
 LORA_DROPOUT = float(os.environ.get("LORA_DROPOUT", "0.05"))
-# Quantization (pour economiser la VRAM)
-USE_4BIT = os.environ.get("USE_4BIT", "false").lower() == "true"
 # ============================================================================
 # AUTHENTIFICATION
@@ -234,7 +234,7 @@ training_args = SFTConfig(
     eval_strategy="steps",
     eval_steps=500,
     max_length=MAX_SEQ_LENGTH,  # renamed from max_seq_length in TRL 0.12+
-    packing=True,
     gradient_checkpointing=True,
     gradient_checkpointing_kwargs={"use_reentrant": False},
     dataset_text_field="text",

 LORA_ALPHA = int(os.environ.get("LORA_ALPHA", "128"))
 LORA_DROPOUT = float(os.environ.get("LORA_DROPOUT", "0.05"))
+# Quantization (pour economiser la VRAM) - 4-bit par defaut pour H100
+USE_4BIT = os.environ.get("USE_4BIT", "true").lower() == "true"
 # ============================================================================
 # AUTHENTIFICATION
     eval_strategy="steps",
     eval_steps=500,
     max_length=MAX_SEQ_LENGTH,  # renamed from max_seq_length in TRL 0.12+
+    packing=False,  # Disabled: packing requires flash attention for proper cross-attention masking
     gradient_checkpointing=True,
     gradient_checkpointing_kwargs={"use_reentrant": False},
     dataset_text_field="text",