stmasson commited on
Commit
f5c4f4a
·
verified ·
1 Parent(s): e388b5a

Upload scripts/train_n8n_sft.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. scripts/train_n8n_sft.py +3 -3
scripts/train_n8n_sft.py CHANGED
@@ -68,8 +68,8 @@ LORA_R = int(os.environ.get("LORA_R", "64"))
68
  LORA_ALPHA = int(os.environ.get("LORA_ALPHA", "128"))
69
  LORA_DROPOUT = float(os.environ.get("LORA_DROPOUT", "0.05"))
70
 
71
- # Quantization (pour economiser la VRAM)
72
- USE_4BIT = os.environ.get("USE_4BIT", "false").lower() == "true"
73
 
74
  # ============================================================================
75
  # AUTHENTIFICATION
@@ -234,7 +234,7 @@ training_args = SFTConfig(
234
  eval_strategy="steps",
235
  eval_steps=500,
236
  max_length=MAX_SEQ_LENGTH, # renamed from max_seq_length in TRL 0.12+
237
- packing=True,
238
  gradient_checkpointing=True,
239
  gradient_checkpointing_kwargs={"use_reentrant": False},
240
  dataset_text_field="text",
 
68
  LORA_ALPHA = int(os.environ.get("LORA_ALPHA", "128"))
69
  LORA_DROPOUT = float(os.environ.get("LORA_DROPOUT", "0.05"))
70
 
71
+ # Quantization (pour economiser la VRAM) - 4-bit par defaut pour H100
72
+ USE_4BIT = os.environ.get("USE_4BIT", "true").lower() == "true"
73
 
74
  # ============================================================================
75
  # AUTHENTIFICATION
 
234
  eval_strategy="steps",
235
  eval_steps=500,
236
  max_length=MAX_SEQ_LENGTH, # renamed from max_seq_length in TRL 0.12+
237
+ packing=False, # Disabled: packing requires flash attention for proper cross-attention masking
238
  gradient_checkpointing=True,
239
  gradient_checkpointing_kwargs={"use_reentrant": False},
240
  dataset_text_field="text",