KaiquanMah commited on
Commit
cf48f55
·
verified ·
1 Parent(s): a3ca25f

Upload 3 files

Browse files
DeepLearning/w9-qna/results/round1/training_logs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"eval_loss": 1.0543116331100464, "eval_exact_match": 71.36294027565084, "eval_f1": 80.3990198651532, "eval_runtime": 5.1848, "eval_samples_per_second": 125.946, "eval_steps_per_second": 7.908, "epoch": 1.0, "step": 401}, {"loss": 0.5247, "grad_norm": 18.916826248168945, "learning_rate": 1.1704073150457192e-05, "epoch": 1.2468827930174564, "step": 500}, {"eval_loss": 1.2098065614700317, "eval_exact_match": 71.51607963246555, "eval_f1": 80.66102990087539, "eval_runtime": 5.1859, "eval_samples_per_second": 125.92, "eval_steps_per_second": 7.906, "epoch": 2.0, "step": 802}, {"loss": 0.3121, "grad_norm": 15.454927444458008, "learning_rate": 3.391521197007482e-06, "epoch": 2.493765586034913, "step": 1000}, {"eval_loss": 1.3002876043319702, "eval_exact_match": 70.75038284839204, "eval_f1": 80.26239787753836, "eval_runtime": 5.137, "eval_samples_per_second": 127.116, "eval_steps_per_second": 7.981, "epoch": 3.0, "step": 1203}, {"train_runtime": 464.5896, "train_samples_per_second": 41.385, "train_steps_per_second": 2.589, "total_flos": 1884050365736448.0, "train_loss": 0.39259073284399676, "epoch": 3.0, "step": 1203}]
DeepLearning/w9-qna/results/round2/training_logs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"loss": 0.5603, "grad_norm": 17.91541862487793, "learning_rate": 1.918536990856193e-05, "epoch": 0.12468827930174564, "step": 50}, {"loss": 0.5049, "grad_norm": 11.399495124816895, "learning_rate": 1.835411471321696e-05, "epoch": 0.24937655860349128, "step": 100}, {"loss": 0.5972, "grad_norm": 6.771671295166016, "learning_rate": 1.752285951787199e-05, "epoch": 0.3740648379052369, "step": 150}, {"loss": 0.6039, "grad_norm": 11.424762725830078, "learning_rate": 1.6691604322527018e-05, "epoch": 0.49875311720698257, "step": 200}, {"loss": 0.544, "grad_norm": 28.906415939331055, "learning_rate": 1.5860349127182046e-05, "epoch": 0.6234413965087282, "step": 250}, {"loss": 0.5218, "grad_norm": 11.106880187988281, "learning_rate": 1.5029093931837075e-05, "epoch": 0.7481296758104738, "step": 300}, {"loss": 0.5945, "grad_norm": 11.069750785827637, "learning_rate": 1.4197838736492104e-05, "epoch": 0.8728179551122195, "step": 350}, {"loss": 0.5439, "grad_norm": 10.31427001953125, "learning_rate": 1.3366583541147134e-05, "epoch": 0.9975062344139651, "step": 400}, {"eval_loss": 1.0543116331100464, "eval_exact_match": 71.36294027565084, "eval_f1": 80.3990198651532, "eval_runtime": 4.972, "eval_samples_per_second": 131.335, "eval_steps_per_second": 8.246, "epoch": 1.0, "step": 401}, {"loss": 0.4052, "grad_norm": 16.57781410217285, "learning_rate": 1.2535328345802163e-05, "epoch": 1.1221945137157108, "step": 450}, {"loss": 0.3709, "grad_norm": 18.916826248168945, "learning_rate": 1.1704073150457192e-05, "epoch": 1.2468827930174564, "step": 500}, {"loss": 0.396, "grad_norm": 19.589569091796875, "learning_rate": 1.087281795511222e-05, "epoch": 1.371571072319202, "step": 550}, {"loss": 0.329, "grad_norm": 14.458925247192383, "learning_rate": 1.0041562759767249e-05, "epoch": 1.4962593516209477, "step": 600}, {"loss": 0.3673, "grad_norm": 6.033336639404297, "learning_rate": 9.210307564422278e-06, "epoch": 1.6209476309226933, "step": 650}, {"loss": 0.332, "grad_norm": 7.279592990875244, "learning_rate": 8.379052369077308e-06, "epoch": 1.745635910224439, "step": 700}, {"loss": 0.3572, "grad_norm": 27.507848739624023, "learning_rate": 7.547797173732336e-06, "epoch": 1.8703241895261846, "step": 750}, {"loss": 0.3501, "grad_norm": 13.733630180358887, "learning_rate": 6.7165419783873655e-06, "epoch": 1.9950124688279303, "step": 800}, {"eval_loss": 1.2098065614700317, "eval_exact_match": 71.51607963246555, "eval_f1": 80.66102990087539, "eval_runtime": 4.9288, "eval_samples_per_second": 132.486, "eval_steps_per_second": 8.318, "epoch": 2.0, "step": 802}, {"loss": 0.2351, "grad_norm": 7.376136779785156, "learning_rate": 5.885286783042394e-06, "epoch": 2.119700748129676, "step": 850}, {"loss": 0.2496, "grad_norm": 4.796431064605713, "learning_rate": 5.054031587697423e-06, "epoch": 2.2443890274314215, "step": 900}, {"loss": 0.2435, "grad_norm": 9.484352111816406, "learning_rate": 4.2227763923524525e-06, "epoch": 2.369077306733167, "step": 950}, {"loss": 0.2607, "grad_norm": 15.454927444458008, "learning_rate": 3.391521197007482e-06, "epoch": 2.493765586034913, "step": 1000}, {"loss": 0.2871, "grad_norm": 20.637434005737305, "learning_rate": 2.5602660016625107e-06, "epoch": 2.6184538653366585, "step": 1050}, {"loss": 0.2504, "grad_norm": 5.464017391204834, "learning_rate": 1.7290108063175396e-06, "epoch": 2.743142144638404, "step": 1100}, {"loss": 0.2567, "grad_norm": 13.079675674438477, "learning_rate": 8.977556109725687e-07, "epoch": 2.8678304239401498, "step": 1150}, {"loss": 0.2709, "grad_norm": 22.563940048217773, "learning_rate": 6.650041562759768e-08, "epoch": 2.9925187032418954, "step": 1200}, {"eval_loss": 1.3002876043319702, "eval_exact_match": 70.75038284839204, "eval_f1": 80.26239787753836, "eval_runtime": 4.9186, "eval_samples_per_second": 132.761, "eval_steps_per_second": 8.336, "epoch": 3.0, "step": 1203}, {"train_runtime": 452.7389, "train_samples_per_second": 42.468, "train_steps_per_second": 2.657, "total_flos": 1884050365736448.0, "train_loss": 0.39259084239168557, "epoch": 3.0, "step": 1203}]
DeepLearning/w9-qna/results/training_logs.json CHANGED
@@ -1 +1 @@
1
- [{"eval_loss": 1.0543116331100464, "eval_exact_match": 71.36294027565084, "eval_f1": 80.3990198651532, "eval_runtime": 5.1848, "eval_samples_per_second": 125.946, "eval_steps_per_second": 7.908, "epoch": 1.0, "step": 401}, {"loss": 0.5247, "grad_norm": 18.916826248168945, "learning_rate": 1.1704073150457192e-05, "epoch": 1.2468827930174564, "step": 500}, {"eval_loss": 1.2098065614700317, "eval_exact_match": 71.51607963246555, "eval_f1": 80.66102990087539, "eval_runtime": 5.1859, "eval_samples_per_second": 125.92, "eval_steps_per_second": 7.906, "epoch": 2.0, "step": 802}, {"loss": 0.3121, "grad_norm": 15.454927444458008, "learning_rate": 3.391521197007482e-06, "epoch": 2.493765586034913, "step": 1000}, {"eval_loss": 1.3002876043319702, "eval_exact_match": 70.75038284839204, "eval_f1": 80.26239787753836, "eval_runtime": 5.137, "eval_samples_per_second": 127.116, "eval_steps_per_second": 7.981, "epoch": 3.0, "step": 1203}, {"train_runtime": 464.5896, "train_samples_per_second": 41.385, "train_steps_per_second": 2.589, "total_flos": 1884050365736448.0, "train_loss": 0.39259073284399676, "epoch": 3.0, "step": 1203}]
 
1
+ [{"loss": 0.5603, "grad_norm": 17.91541862487793, "learning_rate": 1.918536990856193e-05, "epoch": 0.12468827930174564, "step": 50}, {"loss": 0.5049, "grad_norm": 11.399495124816895, "learning_rate": 1.835411471321696e-05, "epoch": 0.24937655860349128, "step": 100}, {"loss": 0.5972, "grad_norm": 6.771671295166016, "learning_rate": 1.752285951787199e-05, "epoch": 0.3740648379052369, "step": 150}, {"loss": 0.6039, "grad_norm": 11.424762725830078, "learning_rate": 1.6691604322527018e-05, "epoch": 0.49875311720698257, "step": 200}, {"loss": 0.544, "grad_norm": 28.906415939331055, "learning_rate": 1.5860349127182046e-05, "epoch": 0.6234413965087282, "step": 250}, {"loss": 0.5218, "grad_norm": 11.106880187988281, "learning_rate": 1.5029093931837075e-05, "epoch": 0.7481296758104738, "step": 300}, {"loss": 0.5945, "grad_norm": 11.069750785827637, "learning_rate": 1.4197838736492104e-05, "epoch": 0.8728179551122195, "step": 350}, {"loss": 0.5439, "grad_norm": 10.31427001953125, "learning_rate": 1.3366583541147134e-05, "epoch": 0.9975062344139651, "step": 400}, {"eval_loss": 1.0543116331100464, "eval_exact_match": 71.36294027565084, "eval_f1": 80.3990198651532, "eval_runtime": 4.972, "eval_samples_per_second": 131.335, "eval_steps_per_second": 8.246, "epoch": 1.0, "step": 401}, {"loss": 0.4052, "grad_norm": 16.57781410217285, "learning_rate": 1.2535328345802163e-05, "epoch": 1.1221945137157108, "step": 450}, {"loss": 0.3709, "grad_norm": 18.916826248168945, "learning_rate": 1.1704073150457192e-05, "epoch": 1.2468827930174564, "step": 500}, {"loss": 0.396, "grad_norm": 19.589569091796875, "learning_rate": 1.087281795511222e-05, "epoch": 1.371571072319202, "step": 550}, {"loss": 0.329, "grad_norm": 14.458925247192383, "learning_rate": 1.0041562759767249e-05, "epoch": 1.4962593516209477, "step": 600}, {"loss": 0.3673, "grad_norm": 6.033336639404297, "learning_rate": 9.210307564422278e-06, "epoch": 1.6209476309226933, "step": 650}, {"loss": 0.332, "grad_norm": 7.279592990875244, "learning_rate": 8.379052369077308e-06, "epoch": 1.745635910224439, "step": 700}, {"loss": 0.3572, "grad_norm": 27.507848739624023, "learning_rate": 7.547797173732336e-06, "epoch": 1.8703241895261846, "step": 750}, {"loss": 0.3501, "grad_norm": 13.733630180358887, "learning_rate": 6.7165419783873655e-06, "epoch": 1.9950124688279303, "step": 800}, {"eval_loss": 1.2098065614700317, "eval_exact_match": 71.51607963246555, "eval_f1": 80.66102990087539, "eval_runtime": 4.9288, "eval_samples_per_second": 132.486, "eval_steps_per_second": 8.318, "epoch": 2.0, "step": 802}, {"loss": 0.2351, "grad_norm": 7.376136779785156, "learning_rate": 5.885286783042394e-06, "epoch": 2.119700748129676, "step": 850}, {"loss": 0.2496, "grad_norm": 4.796431064605713, "learning_rate": 5.054031587697423e-06, "epoch": 2.2443890274314215, "step": 900}, {"loss": 0.2435, "grad_norm": 9.484352111816406, "learning_rate": 4.2227763923524525e-06, "epoch": 2.369077306733167, "step": 950}, {"loss": 0.2607, "grad_norm": 15.454927444458008, "learning_rate": 3.391521197007482e-06, "epoch": 2.493765586034913, "step": 1000}, {"loss": 0.2871, "grad_norm": 20.637434005737305, "learning_rate": 2.5602660016625107e-06, "epoch": 2.6184538653366585, "step": 1050}, {"loss": 0.2504, "grad_norm": 5.464017391204834, "learning_rate": 1.7290108063175396e-06, "epoch": 2.743142144638404, "step": 1100}, {"loss": 0.2567, "grad_norm": 13.079675674438477, "learning_rate": 8.977556109725687e-07, "epoch": 2.8678304239401498, "step": 1150}, {"loss": 0.2709, "grad_norm": 22.563940048217773, "learning_rate": 6.650041562759768e-08, "epoch": 2.9925187032418954, "step": 1200}, {"eval_loss": 1.3002876043319702, "eval_exact_match": 70.75038284839204, "eval_f1": 80.26239787753836, "eval_runtime": 4.9186, "eval_samples_per_second": 132.761, "eval_steps_per_second": 8.336, "epoch": 3.0, "step": 1203}, {"train_runtime": 452.7389, "train_samples_per_second": 42.468, "train_steps_per_second": 2.657, "total_flos": 1884050365736448.0, "train_loss": 0.39259084239168557, "epoch": 3.0, "step": 1203}]