Siqi-Hu/Llama2-7B-lora-r-32-generic-epoch-5-labels_40.0
Browse files- README.md +69 -0
- logs/events.out.tfevents.1753121141.ly-w220.74316.0 +2 -2
- lora_1/adapter_config.json +39 -0
- lora_1/adapter_model.safetensors +3 -0
- training_metrics.json +27 -6
README.md
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: peft
|
| 3 |
+
license: llama2
|
| 4 |
+
base_model: meta-llama/Llama-2-7b-hf
|
| 5 |
+
tags:
|
| 6 |
+
- generated_from_trainer
|
| 7 |
+
model-index:
|
| 8 |
+
- name: Llama2-7B-lora-r-32-generic-epoch-5-labels_40.0
|
| 9 |
+
results: []
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 13 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 14 |
+
|
| 15 |
+
# Llama2-7B-lora-r-32-generic-epoch-5-labels_40.0
|
| 16 |
+
|
| 17 |
+
This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on an unknown dataset.
|
| 18 |
+
It achieves the following results on the evaluation set:
|
| 19 |
+
- Loss: 3.9360
|
| 20 |
+
|
| 21 |
+
## Model description
|
| 22 |
+
|
| 23 |
+
More information needed
|
| 24 |
+
|
| 25 |
+
## Intended uses & limitations
|
| 26 |
+
|
| 27 |
+
More information needed
|
| 28 |
+
|
| 29 |
+
## Training and evaluation data
|
| 30 |
+
|
| 31 |
+
More information needed
|
| 32 |
+
|
| 33 |
+
## Training procedure
|
| 34 |
+
|
| 35 |
+
### Training hyperparameters
|
| 36 |
+
|
| 37 |
+
The following hyperparameters were used during training:
|
| 38 |
+
- learning_rate: 1e-05
|
| 39 |
+
- train_batch_size: 2
|
| 40 |
+
- eval_batch_size: 2
|
| 41 |
+
- seed: 42
|
| 42 |
+
- gradient_accumulation_steps: 4
|
| 43 |
+
- total_train_batch_size: 8
|
| 44 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 45 |
+
- lr_scheduler_type: cosine
|
| 46 |
+
- lr_scheduler_warmup_steps: 100
|
| 47 |
+
- training_steps: 150
|
| 48 |
+
- mixed_precision_training: Native AMP
|
| 49 |
+
|
| 50 |
+
### Training results
|
| 51 |
+
|
| 52 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
| 53 |
+
|:-------------:|:------:|:----:|:---------------:|
|
| 54 |
+
| 5.1569 | 0.0457 | 20 | 5.3968 |
|
| 55 |
+
| 5.3987 | 0.0914 | 40 | 5.2553 |
|
| 56 |
+
| 5.133 | 0.1371 | 60 | 5.0065 |
|
| 57 |
+
| 4.6634 | 0.1829 | 80 | 4.6042 |
|
| 58 |
+
| 4.3315 | 0.2286 | 100 | 4.2429 |
|
| 59 |
+
| 4.0887 | 0.2743 | 120 | 4.0269 |
|
| 60 |
+
| 3.9483 | 0.32 | 140 | 3.9360 |
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
### Framework versions
|
| 64 |
+
|
| 65 |
+
- PEFT 0.15.2
|
| 66 |
+
- Transformers 4.45.2
|
| 67 |
+
- Pytorch 2.5.0+cu121
|
| 68 |
+
- Datasets 3.2.0
|
| 69 |
+
- Tokenizers 0.20.3
|
logs/events.out.tfevents.1753121141.ly-w220.74316.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5561b7dd6a1c22d2f41360e71f4dae43f1e0615f0bbf92d35114e40d279e7514
|
| 3 |
+
size 14341
|
lora_1/adapter_config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": true,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 64,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0.1,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": null,
|
| 22 |
+
"peft_type": "LORA",
|
| 23 |
+
"r": 32,
|
| 24 |
+
"rank_pattern": {},
|
| 25 |
+
"revision": null,
|
| 26 |
+
"target_modules": [
|
| 27 |
+
"gate_proj",
|
| 28 |
+
"q_proj",
|
| 29 |
+
"k_proj",
|
| 30 |
+
"down_proj",
|
| 31 |
+
"o_proj",
|
| 32 |
+
"up_proj",
|
| 33 |
+
"v_proj"
|
| 34 |
+
],
|
| 35 |
+
"task_type": "CAUSAL_LM",
|
| 36 |
+
"trainable_token_indices": null,
|
| 37 |
+
"use_dora": false,
|
| 38 |
+
"use_rslora": false
|
| 39 |
+
}
|
lora_1/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86be3827dd4e677979c549f0e0a94fe67e91af5c0cb628d7c09b5b976935e812
|
| 3 |
+
size 319876032
|
training_metrics.json
CHANGED
|
@@ -23,7 +23,13 @@
|
|
| 23 |
4.7347,
|
| 24 |
4.0313,
|
| 25 |
4.0846,
|
| 26 |
-
4.0887
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
],
|
| 28 |
"train_steps": [
|
| 29 |
5,
|
|
@@ -49,7 +55,13 @@
|
|
| 49 |
105,
|
| 50 |
110,
|
| 51 |
115,
|
| 52 |
-
120
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
],
|
| 54 |
"train_epochs": [
|
| 55 |
0.011428571428571429,
|
|
@@ -75,7 +87,13 @@
|
|
| 75 |
0.24,
|
| 76 |
0.25142857142857145,
|
| 77 |
0.26285714285714284,
|
| 78 |
-
0.2742857142857143
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
],
|
| 80 |
"eval_loss": [
|
| 81 |
5.396810531616211,
|
|
@@ -83,7 +101,8 @@
|
|
| 83 |
5.006458282470703,
|
| 84 |
4.604181289672852,
|
| 85 |
4.242877006530762,
|
| 86 |
-
4.0268988609313965
|
|
|
|
| 87 |
],
|
| 88 |
"eval_steps": [
|
| 89 |
20,
|
|
@@ -91,7 +110,8 @@
|
|
| 91 |
60,
|
| 92 |
80,
|
| 93 |
100,
|
| 94 |
-
120
|
|
|
|
| 95 |
],
|
| 96 |
"eval_epochs": [
|
| 97 |
0.045714285714285714,
|
|
@@ -99,6 +119,7 @@
|
|
| 99 |
0.13714285714285715,
|
| 100 |
0.18285714285714286,
|
| 101 |
0.22857142857142856,
|
| 102 |
-
0.2742857142857143
|
|
|
|
| 103 |
]
|
| 104 |
}
|
|
|
|
| 23 |
4.7347,
|
| 24 |
4.0313,
|
| 25 |
4.0846,
|
| 26 |
+
4.0887,
|
| 27 |
+
4.0388,
|
| 28 |
+
3.896,
|
| 29 |
+
3.936,
|
| 30 |
+
3.9483,
|
| 31 |
+
3.925,
|
| 32 |
+
3.99
|
| 33 |
],
|
| 34 |
"train_steps": [
|
| 35 |
5,
|
|
|
|
| 55 |
105,
|
| 56 |
110,
|
| 57 |
115,
|
| 58 |
+
120,
|
| 59 |
+
125,
|
| 60 |
+
130,
|
| 61 |
+
135,
|
| 62 |
+
140,
|
| 63 |
+
145,
|
| 64 |
+
150
|
| 65 |
],
|
| 66 |
"train_epochs": [
|
| 67 |
0.011428571428571429,
|
|
|
|
| 87 |
0.24,
|
| 88 |
0.25142857142857145,
|
| 89 |
0.26285714285714284,
|
| 90 |
+
0.2742857142857143,
|
| 91 |
+
0.2857142857142857,
|
| 92 |
+
0.29714285714285715,
|
| 93 |
+
0.30857142857142855,
|
| 94 |
+
0.32,
|
| 95 |
+
0.3314285714285714,
|
| 96 |
+
0.34285714285714286
|
| 97 |
],
|
| 98 |
"eval_loss": [
|
| 99 |
5.396810531616211,
|
|
|
|
| 101 |
5.006458282470703,
|
| 102 |
4.604181289672852,
|
| 103 |
4.242877006530762,
|
| 104 |
+
4.0268988609313965,
|
| 105 |
+
3.935999870300293
|
| 106 |
],
|
| 107 |
"eval_steps": [
|
| 108 |
20,
|
|
|
|
| 110 |
60,
|
| 111 |
80,
|
| 112 |
100,
|
| 113 |
+
120,
|
| 114 |
+
140
|
| 115 |
],
|
| 116 |
"eval_epochs": [
|
| 117 |
0.045714285714285714,
|
|
|
|
| 119 |
0.13714285714285715,
|
| 120 |
0.18285714285714286,
|
| 121 |
0.22857142857142856,
|
| 122 |
+
0.2742857142857143,
|
| 123 |
+
0.32
|
| 124 |
]
|
| 125 |
}
|