Siqi-Hu commited on
Commit
26001d3
·
verified ·
1 Parent(s): a62ae3b

Siqi-Hu/Llama2-7B-lora-r-32-generic-epoch-5-labels_40.0

Browse files
README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: llama2
4
+ base_model: meta-llama/Llama-2-7b-hf
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: Llama2-7B-lora-r-32-generic-epoch-5-labels_40.0
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # Llama2-7B-lora-r-32-generic-epoch-5-labels_40.0
16
+
17
+ This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 3.9360
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 1e-05
39
+ - train_batch_size: 2
40
+ - eval_batch_size: 2
41
+ - seed: 42
42
+ - gradient_accumulation_steps: 4
43
+ - total_train_batch_size: 8
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: cosine
46
+ - lr_scheduler_warmup_steps: 100
47
+ - training_steps: 150
48
+ - mixed_precision_training: Native AMP
49
+
50
+ ### Training results
51
+
52
+ | Training Loss | Epoch | Step | Validation Loss |
53
+ |:-------------:|:------:|:----:|:---------------:|
54
+ | 5.1569 | 0.0457 | 20 | 5.3968 |
55
+ | 5.3987 | 0.0914 | 40 | 5.2553 |
56
+ | 5.133 | 0.1371 | 60 | 5.0065 |
57
+ | 4.6634 | 0.1829 | 80 | 4.6042 |
58
+ | 4.3315 | 0.2286 | 100 | 4.2429 |
59
+ | 4.0887 | 0.2743 | 120 | 4.0269 |
60
+ | 3.9483 | 0.32 | 140 | 3.9360 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - PEFT 0.15.2
66
+ - Transformers 4.45.2
67
+ - Pytorch 2.5.0+cu121
68
+ - Datasets 3.2.0
69
+ - Tokenizers 0.20.3
logs/events.out.tfevents.1753121141.ly-w220.74316.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a658f95486d9657dc42b557f526909281c53434f3cb2ba05229cdf5d6a48fd89
3
- size 12454
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5561b7dd6a1c22d2f41360e71f4dae43f1e0615f0bbf92d35114e40d279e7514
3
+ size 14341
lora_1/adapter_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 64,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.1,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 32,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "gate_proj",
28
+ "q_proj",
29
+ "k_proj",
30
+ "down_proj",
31
+ "o_proj",
32
+ "up_proj",
33
+ "v_proj"
34
+ ],
35
+ "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
+ "use_dora": false,
38
+ "use_rslora": false
39
+ }
lora_1/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86be3827dd4e677979c549f0e0a94fe67e91af5c0cb628d7c09b5b976935e812
3
+ size 319876032
training_metrics.json CHANGED
@@ -23,7 +23,13 @@
23
  4.7347,
24
  4.0313,
25
  4.0846,
26
- 4.0887
 
 
 
 
 
 
27
  ],
28
  "train_steps": [
29
  5,
@@ -49,7 +55,13 @@
49
  105,
50
  110,
51
  115,
52
- 120
 
 
 
 
 
 
53
  ],
54
  "train_epochs": [
55
  0.011428571428571429,
@@ -75,7 +87,13 @@
75
  0.24,
76
  0.25142857142857145,
77
  0.26285714285714284,
78
- 0.2742857142857143
 
 
 
 
 
 
79
  ],
80
  "eval_loss": [
81
  5.396810531616211,
@@ -83,7 +101,8 @@
83
  5.006458282470703,
84
  4.604181289672852,
85
  4.242877006530762,
86
- 4.0268988609313965
 
87
  ],
88
  "eval_steps": [
89
  20,
@@ -91,7 +110,8 @@
91
  60,
92
  80,
93
  100,
94
- 120
 
95
  ],
96
  "eval_epochs": [
97
  0.045714285714285714,
@@ -99,6 +119,7 @@
99
  0.13714285714285715,
100
  0.18285714285714286,
101
  0.22857142857142856,
102
- 0.2742857142857143
 
103
  ]
104
  }
 
23
  4.7347,
24
  4.0313,
25
  4.0846,
26
+ 4.0887,
27
+ 4.0388,
28
+ 3.896,
29
+ 3.936,
30
+ 3.9483,
31
+ 3.925,
32
+ 3.99
33
  ],
34
  "train_steps": [
35
  5,
 
55
  105,
56
  110,
57
  115,
58
+ 120,
59
+ 125,
60
+ 130,
61
+ 135,
62
+ 140,
63
+ 145,
64
+ 150
65
  ],
66
  "train_epochs": [
67
  0.011428571428571429,
 
87
  0.24,
88
  0.25142857142857145,
89
  0.26285714285714284,
90
+ 0.2742857142857143,
91
+ 0.2857142857142857,
92
+ 0.29714285714285715,
93
+ 0.30857142857142855,
94
+ 0.32,
95
+ 0.3314285714285714,
96
+ 0.34285714285714286
97
  ],
98
  "eval_loss": [
99
  5.396810531616211,
 
101
  5.006458282470703,
102
  4.604181289672852,
103
  4.242877006530762,
104
+ 4.0268988609313965,
105
+ 3.935999870300293
106
  ],
107
  "eval_steps": [
108
  20,
 
110
  60,
111
  80,
112
  100,
113
+ 120,
114
+ 140
115
  ],
116
  "eval_epochs": [
117
  0.045714285714285714,
 
119
  0.13714285714285715,
120
  0.18285714285714286,
121
  0.22857142857142856,
122
+ 0.2742857142857143,
123
+ 0.32
124
  ]
125
  }