File size: 5,058 Bytes
fd0a709
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e718c32
fd0a709
 
 
 
 
86f2f8f
fd0a709
 
 
 
 
 
 
 
 
 
 
 
 
e718c32
 
fd0a709
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac9c3c5
fd0a709
 
 
 
 
 
 
 
 
 
e718c32
 
fd0a709
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
? ''
: ? ''
  : ? ''
    : hydra:
        run:
          dir: .
        output_subdir: null
        job:
          chdir: false
      _target_: null
      job:
        name: ???
        mode: slurm
        work_dir: null
        dry_run: false
      slurm:
        time_limit: ???
        constraint:
        - h200
        memory: 200
        cpus_per_task: 16
        partition: null
        mail_user: user@example.com
      execution:
        nodes: null
        gpus_per_node: null
        num_gpus: null
        hostfile: null
        secrets_file: null
      model:
        name_or_path: ???
        finetuning_type: lora
      dataset:
        name: ???
        dir: null
        info_json: null
        template: default
        cutoff_len: 1024
        val_size: 0.1
        hf_hub_url: null
        formatting: alpaca
        ranking: false
        subset: null
        split: train
        folder: null
        num_samples: null
        columns:
          prompt: null
          query: null
          response: null
          history: null
          messages: null
          system: null
          tools: null
          images: null
          videos: null
          audios: null
          chosen: null
          rejected: null
          kto_tag: null
        tags:
          role: null
          content: null
          user: null
          assistant: null
          observation: null
          function: null
          system: null
      training:
        stage: sft
        do_train: true
        model_name_or_path: null
        finetuning_type: lora
        trust_remote_code: true
        dataset: null
        dataset_dir: null
        template: default
        cutoff_len: 1024
        val_size: 0.1
        preprocessing_num_workers: 1
        dataset_num_proc: 1
        dataloader_num_workers: 0
        streaming: false
        learning_rate: 5.0e-05
        num_train_epochs: 3.0
        per_device_train_batch_size: 1
        per_device_eval_batch_size: 1
        gradient_accumulation_steps: 8
        lr_scheduler_type: cosine
        warmup_ratio: 0.1
        warmup_steps: 0
        lora_rank: 8
        lora_alpha: 16
        lora_dropout: 0.05
        lora_target: all
        optim: adamw_torch
        bf16: true
        fp16: false
        output_dir: null
        save_strategy: epoch
        save_steps: 500
        save_total_limit: 3
        save_only_model: false
        eval_strategy: steps
        eval_steps: 500
        do_eval: true
        logging_steps: 10
        plot_loss: true
        report_to: none
        gradient_checkpointing: true
        ddp_timeout: 180000000
        include_num_input_tokens_seen: true
        overwrite_output_dir: true
        overwrite_cache: false
        seed: 42
      lora:
        rank: 8
        alpha: 16
        dropout: 0.05
        target: all
      output:
        experiment_dir: ./experiments
      merge:
        stage: export
        model_name_or_path: null
        adapter_name_or_path: null
        template: default
        export_dir: null
        export_size: 2
        export_device: auto
        export_legacy_format: false
        finetuning_type: lora
      wandb:
        project: null
        run_name: null
        entity: null
      hf:
        repo_id: null
        private: false
        upload_artifacts: true
      cleanup:
        checkpoints: false
        merged: false
job:
  name: lf_torch_test__interactive
  mode: local
  work_dir: null
  dry_run: false
slurm:
  time_limit: null
  constraint: null
  memory: null
  partition: null
  mail_user: null
execution:
  nodes: 2
  gpus_per_node: 2
  num_gpus: null
  hostfile: /scratch/zrs2020/LlamaFactoryHelper/experiments/lf_torch_test__interactive/hostfile_auto_generated.txt
  secrets_file: ./secrets.env
model:
  name_or_path: Qwen/Qwen2.5-0.5B
  finetuning_type: lora
lora:
  rank: 8
  alpha: 16
  dropout: 0.05
  target: all
dataset:
  name: my_custom_sft12
  dir: null
  info_json: null
  template: default
  cutoff_len: 8096
  val_size: 0.1
  hf_hub_url: TAUR-dev/D-SFT_C-BASELINE_r1_distillation-sft-data
  formatting: sharegpt
  ranking: false
  subset: null
  split: train
  folder: null
  num_samples: null
  columns:
    messages: conversations
  tags:
    role: role
    content: content
    user: user
    assistant: assistant
  tokenized_path: /scratch/zrs2020/LlamaFactoryHelper/experiments/lf_torch_test__interactive/tokenized/my_custom_sft12
  data_shared_file_system: true
output:
  experiment_dir: ./experiments
wandb:
  project: null
  run_name: interactive_test
  entity: null
hf:
  repo_id: TAUR-dev/testing_llamafactory_helper_quick_test__interactive
  private: false
cleanup:
  checkpoints: false
  merged: false
training:
  stage: sft
  do_train: true
  max_steps: 100
  do_eval: false
  save_strategy: steps
  save_steps: 50
  logging_steps: 10
  fp16: true
  bf16: false
  overwrite_output_dir: true
  per_device_train_batch_size: 1
  gradient_accumulation_steps: 1
  gradient_checkpointing: true
  preprocessing_num_workers: 16
  overwrite_cache: true
merge: {}