Siqi-Hu/Llama2-7B-lora-r-32-generic-epoch-5-labels_40.0

Browse files

Files changed (5) hide show

README.md +69 -0
logs/events.out.tfevents.1753121141.ly-w220.74316.0 +2 -2
lora_1/adapter_config.json +39 -0
lora_1/adapter_model.safetensors +3 -0
training_metrics.json +27 -6

README.md ADDED Viewed

	@@ -0,0 +1,69 @@

+---
+library_name: peft
+license: llama2
+base_model: meta-llama/Llama-2-7b-hf
+tags:
+- generated_from_trainer
+model-index:
+- name: Llama2-7B-lora-r-32-generic-epoch-5-labels_40.0
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Llama2-7B-lora-r-32-generic-epoch-5-labels_40.0
+This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 3.9360
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 2
+- eval_batch_size: 2
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 8
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 100
+- training_steps: 150
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 5.1569        | 0.0457 | 20   | 5.3968          |
+| 5.3987        | 0.0914 | 40   | 5.2553          |
+| 5.133         | 0.1371 | 60   | 5.0065          |
+| 4.6634        | 0.1829 | 80   | 4.6042          |
+| 4.3315        | 0.2286 | 100  | 4.2429          |
+| 4.0887        | 0.2743 | 120  | 4.0269          |
+| 3.9483        | 0.32   | 140  | 3.9360          |
+### Framework versions
+- PEFT 0.15.2
+- Transformers 4.45.2
+- Pytorch 2.5.0+cu121
+- Datasets 3.2.0
+- Tokenizers 0.20.3

logs/events.out.tfevents.1753121141.ly-w220.74316.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a658f95486d9657dc42b557f526909281c53434f3cb2ba05229cdf5d6a48fd89
-size 12454

 version https://git-lfs.github.com/spec/v1
+oid sha256:5561b7dd6a1c22d2f41360e71f4dae43f1e0615f0bbf92d35114e40d279e7514
+size 14341

lora_1/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "gate_proj",
+    "q_proj",
+    "k_proj",
+    "down_proj",
+    "o_proj",
+    "up_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

lora_1/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86be3827dd4e677979c549f0e0a94fe67e91af5c0cb628d7c09b5b976935e812
+size 319876032

training_metrics.json CHANGED Viewed

@@ -23,7 +23,13 @@
     4.7347,
     4.0313,
     4.0846,
-    4.0887
   ],
   "train_steps": [
     5,
@@ -49,7 +55,13 @@
     105,
     110,
     115,
-    120
   ],
   "train_epochs": [
     0.011428571428571429,
@@ -75,7 +87,13 @@
     0.24,
     0.25142857142857145,
     0.26285714285714284,
-    0.2742857142857143
   ],
   "eval_loss": [
     5.396810531616211,
@@ -83,7 +101,8 @@
     5.006458282470703,
     4.604181289672852,
     4.242877006530762,
-    4.0268988609313965
   ],
   "eval_steps": [
     20,
@@ -91,7 +110,8 @@
     60,
     80,
     100,
-    120
   ],
   "eval_epochs": [
     0.045714285714285714,
@@ -99,6 +119,7 @@
     0.13714285714285715,
     0.18285714285714286,
     0.22857142857142856,
-    0.2742857142857143
   ]
 }

     4.7347,
     4.0313,
     4.0846,
+    4.0887,
+    4.0388,
+    3.896,
+    3.936,
+    3.9483,
+    3.925,
+    3.99
   ],
   "train_steps": [
     5,
     105,
     110,
     115,
+    120,
+    125,
+    130,
+    135,
+    140,
+    145,
+    150
   ],
   "train_epochs": [
     0.011428571428571429,
     0.24,
     0.25142857142857145,
     0.26285714285714284,
+    0.2742857142857143,
+    0.2857142857142857,
+    0.29714285714285715,
+    0.30857142857142855,
+    0.32,
+    0.3314285714285714,
+    0.34285714285714286
   ],
   "eval_loss": [
     5.396810531616211,
     5.006458282470703,
     4.604181289672852,
     4.242877006530762,
+    4.0268988609313965,
+    3.935999870300293
   ],
   "eval_steps": [
     20,
     60,
     80,
     100,
+    120,
+    140
   ],
   "eval_epochs": [
     0.045714285714285714,
     0.13714285714285715,
     0.18285714285714286,
     0.22857142857142856,
+    0.2742857142857143,
+    0.32
   ]
 }