Alperitoo commited on
Commit
63f94f6
·
verified ·
1 Parent(s): 70df61d

Model save

Browse files
README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: mit
4
+ base_model: openai/whisper-large-v3-turbo
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - wer
9
+ model-index:
10
+ - name: v3-turbo-cv17-telephonic-lora
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # v3-turbo-cv17-telephonic-lora
18
+
19
+ This model is a fine-tuned version of [openai/whisper-large-v3-turbo](https://huggingface.co/openai/whisper-large-v3-turbo) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.1411
22
+ - Wer: 14.2090
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 1e-05
42
+ - train_batch_size: 4
43
+ - eval_batch_size: 8
44
+ - seed: 42
45
+ - gradient_accumulation_steps: 2
46
+ - total_train_batch_size: 8
47
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
48
+ - lr_scheduler_type: cosine
49
+ - training_steps: 5000
50
+ - mixed_precision_training: Native AMP
51
+
52
+ ### Training results
53
+
54
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
55
+ |:-------------:|:------:|:----:|:---------------:|:-------:|
56
+ | 0.454 | 0.1138 | 500 | 0.1633 | 15.3845 |
57
+ | 0.1463 | 0.2276 | 1000 | 0.1525 | 14.9965 |
58
+ | 0.1393 | 0.3414 | 1500 | 0.1482 | 14.7002 |
59
+ | 0.1344 | 0.4552 | 2000 | 0.1466 | 14.4383 |
60
+ | 0.1305 | 0.5690 | 2500 | 0.1442 | 14.3084 |
61
+ | 0.1235 | 0.6828 | 3000 | 0.1427 | 14.2510 |
62
+ | 0.129 | 0.7966 | 3500 | 0.1418 | 14.2434 |
63
+ | 0.1259 | 0.9104 | 4000 | 0.1416 | 14.1765 |
64
+ | 0.1169 | 1.0241 | 4500 | 0.1412 | 14.2185 |
65
+ | 0.1103 | 1.1379 | 5000 | 0.1411 | 14.2090 |
66
+
67
+
68
+ ### Framework versions
69
+
70
+ - PEFT 0.14.0
71
+ - Transformers 4.48.3
72
+ - Pytorch 2.4.1+cu121
73
+ - Datasets 3.1.0
74
+ - Tokenizers 0.21.0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afc092bd82bf74726cf958a43b3bfc8ac61fa887985e67b56ad49fac7404f85b
3
  size 111475752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb17d2835145e2dd53e0457281192b0d0b2cc793b78afdc87b3739679426d27f
3
  size 111475752
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.1379310344827587,
3
+ "total_flos": 7.07561145335808e+19,
4
+ "train_loss": 0.1610086441040039,
5
+ "train_runtime": 59312.8298,
6
+ "train_samples_per_second": 0.674,
7
+ "train_steps_per_second": 0.084
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.1379310344827587,
3
+ "total_flos": 7.07561145335808e+19,
4
+ "train_loss": 0.1610086441040039,
5
+ "train_runtime": 59312.8298,
6
+ "train_samples_per_second": 0.674,
7
+ "train_steps_per_second": 0.084
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 14.176494199048149,
3
+ "best_model_checkpoint": "./v3-turbo-cv17-telephonic-lora/v3-turbo-cv17-telephonic-lora/checkpoint-4000",
4
+ "epoch": 1.1379310344827587,
5
+ "eval_steps": 500,
6
+ "global_step": 5000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11380448389666553,
13
+ "grad_norm": 1.8799397945404053,
14
+ "learning_rate": 9.757220437383345e-06,
15
+ "loss": 0.454,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.11380448389666553,
20
+ "eval_loss": 0.16326197981834412,
21
+ "eval_runtime": 4750.4498,
22
+ "eval_samples_per_second": 2.37,
23
+ "eval_steps_per_second": 0.296,
24
+ "eval_wer": 15.384468357575642,
25
+ "step": 500
26
+ },
27
+ {
28
+ "epoch": 0.22760896779333106,
29
+ "grad_norm": 2.3445775508880615,
30
+ "learning_rate": 9.050617527877911e-06,
31
+ "loss": 0.1463,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 0.22760896779333106,
36
+ "eval_loss": 0.1524566262960434,
37
+ "eval_runtime": 4747.2698,
38
+ "eval_samples_per_second": 2.371,
39
+ "eval_steps_per_second": 0.297,
40
+ "eval_wer": 14.996463999694184,
41
+ "step": 1000
42
+ },
43
+ {
44
+ "epoch": 0.3414134516899966,
45
+ "grad_norm": 3.6666531562805176,
46
+ "learning_rate": 7.94908337621646e-06,
47
+ "loss": 0.1393,
48
+ "step": 1500
49
+ },
50
+ {
51
+ "epoch": 0.3414134516899966,
52
+ "eval_loss": 0.148152157664299,
53
+ "eval_runtime": 4738.8967,
54
+ "eval_samples_per_second": 2.376,
55
+ "eval_steps_per_second": 0.297,
56
+ "eval_wer": 14.700204514612283,
57
+ "step": 1500
58
+ },
59
+ {
60
+ "epoch": 0.4552179355866621,
61
+ "grad_norm": 2.111747980117798,
62
+ "learning_rate": 6.557031408153642e-06,
63
+ "loss": 0.1344,
64
+ "step": 2000
65
+ },
66
+ {
67
+ "epoch": 0.4552179355866621,
68
+ "eval_loss": 0.14661043882369995,
69
+ "eval_runtime": 4746.3362,
70
+ "eval_samples_per_second": 2.372,
71
+ "eval_steps_per_second": 0.297,
72
+ "eval_wer": 14.438349356830216,
73
+ "step": 2000
74
+ },
75
+ {
76
+ "epoch": 0.5690224194833277,
77
+ "grad_norm": 1.8398709297180176,
78
+ "learning_rate": 5.0125663573850204e-06,
79
+ "loss": 0.1305,
80
+ "step": 2500
81
+ },
82
+ {
83
+ "epoch": 0.5690224194833277,
84
+ "eval_loss": 0.14422065019607544,
85
+ "eval_runtime": 4749.083,
86
+ "eval_samples_per_second": 2.371,
87
+ "eval_steps_per_second": 0.296,
88
+ "eval_wer": 14.30837745369751,
89
+ "step": 2500
90
+ },
91
+ {
92
+ "epoch": 0.6828269033799932,
93
+ "grad_norm": 0.9027289152145386,
94
+ "learning_rate": 3.4668712240005912e-06,
95
+ "loss": 0.1235,
96
+ "step": 3000
97
+ },
98
+ {
99
+ "epoch": 0.6828269033799932,
100
+ "eval_loss": 0.14272235333919525,
101
+ "eval_runtime": 4749.4242,
102
+ "eval_samples_per_second": 2.37,
103
+ "eval_steps_per_second": 0.296,
104
+ "eval_wer": 14.251036908197786,
105
+ "step": 3000
106
+ },
107
+ {
108
+ "epoch": 0.7966313872766587,
109
+ "grad_norm": 1.789233684539795,
110
+ "learning_rate": 2.0737962298724513e-06,
111
+ "loss": 0.129,
112
+ "step": 3500
113
+ },
114
+ {
115
+ "epoch": 0.7966313872766587,
116
+ "eval_loss": 0.14178909361362457,
117
+ "eval_runtime": 4763.1612,
118
+ "eval_samples_per_second": 2.364,
119
+ "eval_steps_per_second": 0.296,
120
+ "eval_wer": 14.243391502131155,
121
+ "step": 3500
122
+ },
123
+ {
124
+ "epoch": 0.9104358711733243,
125
+ "grad_norm": 2.0484585762023926,
126
+ "learning_rate": 9.641678837679985e-07,
127
+ "loss": 0.1259,
128
+ "step": 4000
129
+ },
130
+ {
131
+ "epoch": 0.9104358711733243,
132
+ "eval_loss": 0.14161205291748047,
133
+ "eval_runtime": 4761.4623,
134
+ "eval_samples_per_second": 2.364,
135
+ "eval_steps_per_second": 0.296,
136
+ "eval_wer": 14.176494199048149,
137
+ "step": 4000
138
+ },
139
+ {
140
+ "epoch": 1.024126550586093,
141
+ "grad_norm": 2.5858395099639893,
142
+ "learning_rate": 2.49594904496141e-07,
143
+ "loss": 0.1169,
144
+ "step": 4500
145
+ },
146
+ {
147
+ "epoch": 1.024126550586093,
148
+ "eval_loss": 0.14117974042892456,
149
+ "eval_runtime": 4761.2429,
150
+ "eval_samples_per_second": 2.365,
151
+ "eval_steps_per_second": 0.296,
152
+ "eval_wer": 14.21854393241461,
153
+ "step": 4500
154
+ },
155
+ {
156
+ "epoch": 1.1379310344827587,
157
+ "grad_norm": 2.0550997257232666,
158
+ "learning_rate": 2.467399070893439e-11,
159
+ "loss": 0.1103,
160
+ "step": 5000
161
+ },
162
+ {
163
+ "epoch": 1.1379310344827587,
164
+ "eval_loss": 0.1410815715789795,
165
+ "eval_runtime": 4761.6981,
166
+ "eval_samples_per_second": 2.364,
167
+ "eval_steps_per_second": 0.296,
168
+ "eval_wer": 14.208987174831321,
169
+ "step": 5000
170
+ },
171
+ {
172
+ "epoch": 1.1379310344827587,
173
+ "step": 5000,
174
+ "total_flos": 7.07561145335808e+19,
175
+ "train_loss": 0.1610086441040039,
176
+ "train_runtime": 59312.8298,
177
+ "train_samples_per_second": 0.674,
178
+ "train_steps_per_second": 0.084
179
+ }
180
+ ],
181
+ "logging_steps": 500,
182
+ "max_steps": 5000,
183
+ "num_input_tokens_seen": 0,
184
+ "num_train_epochs": 2,
185
+ "save_steps": 500,
186
+ "stateful_callbacks": {
187
+ "TrainerControl": {
188
+ "args": {
189
+ "should_epoch_stop": false,
190
+ "should_evaluate": false,
191
+ "should_log": false,
192
+ "should_save": true,
193
+ "should_training_stop": true
194
+ },
195
+ "attributes": {}
196
+ }
197
+ },
198
+ "total_flos": 7.07561145335808e+19,
199
+ "train_batch_size": 4,
200
+ "trial_name": null,
201
+ "trial_params": null
202
+ }