prosecalign
/

phi3m0128-cds-0.8-kendall-onof-decrease-corr-max-2-simpo-max1500-default

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57d57704881e7fcbb9e56f84efbea3c6cc8423f70250963dbc46dd1392f117ca
 size 25200088

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc2428fff866abe2b75f565b751a960bc3488f5cdb9ba40313ead8da3472a835
 size 25200088

trainer_log.jsonl CHANGED Viewed

@@ -166,3 +166,9 @@
 {"current_steps": 1390, "total_steps": 1500, "loss": 0.2021, "accuracy": 0.9125000238418579, "learning_rate": 6.605274281709929e-08, "epoch": 1.1942415126772667, "percentage": 92.67, "elapsed_time": "3:03:15", "remaining_time": "0:14:30", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1400, "total_steps": 1500, "loss": 0.1899, "accuracy": 0.9375, "learning_rate": 5.463099816548578e-08, "epoch": 1.202836269875376, "percentage": 93.33, "elapsed_time": "3:04:25", "remaining_time": "0:13:10", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1400, "total_steps": 1500, "eval_loss": 0.21005278825759888, "epoch": 1.202836269875376, "percentage": 93.33, "elapsed_time": "3:04:51", "remaining_time": "0:13:12", "throughput": "0.00", "total_tokens": 0}

 {"current_steps": 1390, "total_steps": 1500, "loss": 0.2021, "accuracy": 0.9125000238418579, "learning_rate": 6.605274281709929e-08, "epoch": 1.1942415126772667, "percentage": 92.67, "elapsed_time": "3:03:15", "remaining_time": "0:14:30", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1400, "total_steps": 1500, "loss": 0.1899, "accuracy": 0.9375, "learning_rate": 5.463099816548578e-08, "epoch": 1.202836269875376, "percentage": 93.33, "elapsed_time": "3:04:25", "remaining_time": "0:13:10", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1400, "total_steps": 1500, "eval_loss": 0.21005278825759888, "epoch": 1.202836269875376, "percentage": 93.33, "elapsed_time": "3:04:51", "remaining_time": "0:13:12", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1410, "total_steps": 1500, "loss": 0.2233, "accuracy": 0.9125000238418579, "learning_rate": 4.428187317827848e-08, "epoch": 1.2114310270734852, "percentage": 94.0, "elapsed_time": "3:06:12", "remaining_time": "0:11:53", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1420, "total_steps": 1500, "loss": 0.1814, "accuracy": 0.9624999761581421, "learning_rate": 3.5009907323737826e-08, "epoch": 1.2200257842715942, "percentage": 94.67, "elapsed_time": "3:07:23", "remaining_time": "0:10:33", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1430, "total_steps": 1500, "loss": 0.2183, "accuracy": 0.949999988079071, "learning_rate": 2.681916759252917e-08, "epoch": 1.2286205414697036, "percentage": 95.33, "elapsed_time": "3:08:41", "remaining_time": "0:09:14", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1440, "total_steps": 1500, "loss": 0.1853, "accuracy": 0.9125000238418579, "learning_rate": 1.9713246713805588e-08, "epoch": 1.2372152986678127, "percentage": 96.0, "elapsed_time": "3:09:51", "remaining_time": "0:07:54", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1450, "total_steps": 1500, "loss": 0.2072, "accuracy": 0.9375, "learning_rate": 1.3695261579316776e-08, "epoch": 1.2458100558659218, "percentage": 96.67, "elapsed_time": "3:11:02", "remaining_time": "0:06:35", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1450, "total_steps": 1500, "eval_loss": 0.21267712116241455, "epoch": 1.2458100558659218, "percentage": 96.67, "elapsed_time": "3:11:27", "remaining_time": "0:06:36", "throughput": "0.00", "total_tokens": 0}