Upload scripts/train_n8n_sft.py with huggingface_hub
Browse files- scripts/train_n8n_sft.py +3 -3
scripts/train_n8n_sft.py
CHANGED
|
@@ -68,8 +68,8 @@ LORA_R = int(os.environ.get("LORA_R", "64"))
|
|
| 68 |
LORA_ALPHA = int(os.environ.get("LORA_ALPHA", "128"))
|
| 69 |
LORA_DROPOUT = float(os.environ.get("LORA_DROPOUT", "0.05"))
|
| 70 |
|
| 71 |
-
# Quantization (pour economiser la VRAM)
|
| 72 |
-
USE_4BIT = os.environ.get("USE_4BIT", "
|
| 73 |
|
| 74 |
# ============================================================================
|
| 75 |
# AUTHENTIFICATION
|
|
@@ -234,7 +234,7 @@ training_args = SFTConfig(
|
|
| 234 |
eval_strategy="steps",
|
| 235 |
eval_steps=500,
|
| 236 |
max_length=MAX_SEQ_LENGTH, # renamed from max_seq_length in TRL 0.12+
|
| 237 |
-
packing=
|
| 238 |
gradient_checkpointing=True,
|
| 239 |
gradient_checkpointing_kwargs={"use_reentrant": False},
|
| 240 |
dataset_text_field="text",
|
|
|
|
| 68 |
LORA_ALPHA = int(os.environ.get("LORA_ALPHA", "128"))
|
| 69 |
LORA_DROPOUT = float(os.environ.get("LORA_DROPOUT", "0.05"))
|
| 70 |
|
| 71 |
+
# Quantization (pour economiser la VRAM) - 4-bit par defaut pour H100
|
| 72 |
+
USE_4BIT = os.environ.get("USE_4BIT", "true").lower() == "true"
|
| 73 |
|
| 74 |
# ============================================================================
|
| 75 |
# AUTHENTIFICATION
|
|
|
|
| 234 |
eval_strategy="steps",
|
| 235 |
eval_steps=500,
|
| 236 |
max_length=MAX_SEQ_LENGTH, # renamed from max_seq_length in TRL 0.12+
|
| 237 |
+
packing=False, # Disabled: packing requires flash attention for proper cross-attention masking
|
| 238 |
gradient_checkpointing=True,
|
| 239 |
gradient_checkpointing_kwargs={"use_reentrant": False},
|
| 240 |
dataset_text_field="text",
|