waleko's picture
Model save
5ebf305 verified
{"current_steps": 1, "total_steps": 190, "loss": 0.8488, "lr": 0.0, "epoch": 0.026845637583892617, "percentage": 0.53, "elapsed_time": "0:00:34", "remaining_time": "1:48:41", "throughput": 14038.82, "total_tokens": 484416}
{"current_steps": 2, "total_steps": 190, "loss": 0.9521, "lr": 2.631578947368421e-06, "epoch": 0.053691275167785234, "percentage": 1.05, "elapsed_time": "0:00:59", "remaining_time": "1:33:26", "throughput": 16636.13, "total_tokens": 992192}
{"current_steps": 3, "total_steps": 190, "loss": 0.8108, "lr": 5.263157894736842e-06, "epoch": 0.08053691275167785, "percentage": 1.58, "elapsed_time": "0:01:24", "remaining_time": "1:28:13", "throughput": 17589.68, "total_tokens": 1493664}
{"current_steps": 4, "total_steps": 190, "loss": 0.5474, "lr": 7.894736842105263e-06, "epoch": 0.10738255033557047, "percentage": 2.11, "elapsed_time": "0:01:50", "remaining_time": "1:25:24", "throughput": 18094.72, "total_tokens": 1994016}
{"current_steps": 5, "total_steps": 190, "loss": 0.5955, "lr": 1.0526315789473684e-05, "epoch": 0.1342281879194631, "percentage": 2.63, "elapsed_time": "0:02:15", "remaining_time": "1:23:41", "throughput": 18396.72, "total_tokens": 2496576}
{"current_steps": 6, "total_steps": 190, "loss": 0.8797, "lr": 1.3157894736842106e-05, "epoch": 0.1610738255033557, "percentage": 3.16, "elapsed_time": "0:02:40", "remaining_time": "1:22:13", "throughput": 18681.89, "total_tokens": 3005312}
{"current_steps": 7, "total_steps": 190, "loss": 0.8087, "lr": 1.5789473684210526e-05, "epoch": 0.18791946308724833, "percentage": 3.68, "elapsed_time": "0:03:05", "remaining_time": "1:20:55", "throughput": 18909.65, "total_tokens": 3512192}
{"current_steps": 8, "total_steps": 190, "loss": 0.7354, "lr": 1.8421052631578947e-05, "epoch": 0.21476510067114093, "percentage": 4.21, "elapsed_time": "0:03:31", "remaining_time": "1:20:11", "throughput": 18952.13, "total_tokens": 4007904}
{"current_steps": 9, "total_steps": 190, "loss": 1.0215, "lr": 2.105263157894737e-05, "epoch": 0.24161073825503357, "percentage": 4.74, "elapsed_time": "0:03:56", "remaining_time": "1:19:08", "throughput": 19067.68, "total_tokens": 4502016}
{"current_steps": 10, "total_steps": 190, "loss": 0.5994, "lr": 2.368421052631579e-05, "epoch": 0.2684563758389262, "percentage": 5.26, "elapsed_time": "0:04:21", "remaining_time": "1:18:28", "throughput": 19100.09, "total_tokens": 4995776}
{"current_steps": 11, "total_steps": 190, "loss": 0.579, "lr": 2.6315789473684212e-05, "epoch": 0.2953020134228188, "percentage": 5.79, "elapsed_time": "0:04:47", "remaining_time": "1:17:51", "throughput": 19215.55, "total_tokens": 5515744}
{"current_steps": 12, "total_steps": 190, "loss": 0.5383, "lr": 2.8947368421052634e-05, "epoch": 0.3221476510067114, "percentage": 6.32, "elapsed_time": "0:05:12", "remaining_time": "1:17:11", "throughput": 19252.6, "total_tokens": 6011616}
{"current_steps": 13, "total_steps": 190, "loss": 0.7063, "lr": 3.157894736842105e-05, "epoch": 0.348993288590604, "percentage": 6.84, "elapsed_time": "0:05:37", "remaining_time": "1:16:29", "throughput": 19224.66, "total_tokens": 6480256}
{"current_steps": 14, "total_steps": 190, "loss": 0.5852, "lr": 3.421052631578947e-05, "epoch": 0.37583892617449666, "percentage": 7.37, "elapsed_time": "0:06:02", "remaining_time": "1:15:59", "throughput": 19266.79, "total_tokens": 6987744}
{"current_steps": 15, "total_steps": 190, "loss": 0.5008, "lr": 3.6842105263157895e-05, "epoch": 0.40268456375838924, "percentage": 7.89, "elapsed_time": "0:06:27", "remaining_time": "1:15:25", "throughput": 19312.71, "total_tokens": 7491264}
{"current_steps": 16, "total_steps": 190, "loss": 0.5233, "lr": 3.9473684210526316e-05, "epoch": 0.42953020134228187, "percentage": 8.42, "elapsed_time": "0:06:53", "remaining_time": "1:14:54", "throughput": 19312.51, "total_tokens": 7981344}
{"current_steps": 17, "total_steps": 190, "loss": 0.5967, "lr": 4.210526315789474e-05, "epoch": 0.4563758389261745, "percentage": 8.95, "elapsed_time": "0:07:18", "remaining_time": "1:14:24", "throughput": 19349.69, "total_tokens": 8488736}
{"current_steps": 18, "total_steps": 190, "loss": 0.4981, "lr": 4.473684210526316e-05, "epoch": 0.48322147651006714, "percentage": 9.47, "elapsed_time": "0:07:44", "remaining_time": "1:13:53", "throughput": 19394.98, "total_tokens": 8999616}
{"current_steps": 19, "total_steps": 190, "loss": 0.5569, "lr": 4.736842105263158e-05, "epoch": 0.5100671140939598, "percentage": 10.0, "elapsed_time": "0:08:09", "remaining_time": "1:13:22", "throughput": 19454.66, "total_tokens": 9516224}
{"current_steps": 20, "total_steps": 190, "loss": 0.6289, "lr": 5e-05, "epoch": 0.5369127516778524, "percentage": 10.53, "elapsed_time": "0:08:34", "remaining_time": "1:12:54", "throughput": 19468.28, "total_tokens": 10020320}
{"current_steps": 21, "total_steps": 190, "loss": 0.6167, "lr": 4.999578104083307e-05, "epoch": 0.5637583892617449, "percentage": 11.05, "elapsed_time": "0:08:59", "remaining_time": "1:12:24", "throughput": 19474.88, "total_tokens": 10514592}
{"current_steps": 22, "total_steps": 190, "loss": 0.4801, "lr": 4.998312558730159e-05, "epoch": 0.5906040268456376, "percentage": 11.58, "elapsed_time": "0:09:25", "remaining_time": "1:11:58", "throughput": 19445.63, "total_tokens": 10996672}
{"current_steps": 23, "total_steps": 190, "loss": 0.6074, "lr": 4.996203791083291e-05, "epoch": 0.6174496644295302, "percentage": 12.11, "elapsed_time": "0:09:50", "remaining_time": "1:11:30", "throughput": 19430.66, "total_tokens": 11481888}
{"current_steps": 24, "total_steps": 190, "loss": 0.5378, "lr": 4.993252512887069e-05, "epoch": 0.6442953020134228, "percentage": 12.63, "elapsed_time": "0:10:16", "remaining_time": "1:11:04", "throughput": 19421.77, "total_tokens": 11974208}
{"current_steps": 25, "total_steps": 190, "loss": 0.731, "lr": 4.9894597202472696e-05, "epoch": 0.6711409395973155, "percentage": 13.16, "elapsed_time": "0:10:41", "remaining_time": "1:10:34", "throughput": 19453.06, "total_tokens": 12480192}
{"current_steps": 25, "total_steps": 190, "eval_loss": 0.5557246208190918, "epoch": 0.6711409395973155, "percentage": 13.16, "elapsed_time": "0:10:51", "remaining_time": "1:11:38", "throughput": 19162.02, "total_tokens": 12480192}
{"current_steps": 26, "total_steps": 190, "loss": 0.7527, "lr": 4.984826693294874e-05, "epoch": 0.697986577181208, "percentage": 13.68, "elapsed_time": "0:12:18", "remaining_time": "1:17:39", "throughput": 17582.77, "total_tokens": 12987040}
{"current_steps": 27, "total_steps": 190, "loss": 0.6606, "lr": 4.979354995754006e-05, "epoch": 0.7248322147651006, "percentage": 14.21, "elapsed_time": "0:12:43", "remaining_time": "1:16:51", "throughput": 17658.97, "total_tokens": 13488000}
{"current_steps": 28, "total_steps": 190, "loss": 1.3911, "lr": 4.9730464744141445e-05, "epoch": 0.7516778523489933, "percentage": 14.74, "elapsed_time": "0:13:09", "remaining_time": "1:16:06", "throughput": 17732.46, "total_tokens": 13996640}
{"current_steps": 29, "total_steps": 190, "loss": 0.6006, "lr": 4.965903258506806e-05, "epoch": 0.7785234899328859, "percentage": 15.26, "elapsed_time": "0:13:34", "remaining_time": "1:15:21", "throughput": 17761.23, "total_tokens": 14466432}
{"current_steps": 30, "total_steps": 190, "loss": 0.7419, "lr": 4.957927758986888e-05, "epoch": 0.8053691275167785, "percentage": 15.79, "elapsed_time": "0:13:59", "remaining_time": "1:14:39", "throughput": 17800.86, "total_tokens": 14952000}
{"current_steps": 31, "total_steps": 190, "loss": 0.4359, "lr": 4.949122667718935e-05, "epoch": 0.8322147651006712, "percentage": 16.32, "elapsed_time": "0:14:25", "remaining_time": "1:13:57", "throughput": 17873.02, "total_tokens": 15464320}
{"current_steps": 32, "total_steps": 190, "loss": 0.4554, "lr": 4.9394909565685894e-05, "epoch": 0.8590604026845637, "percentage": 16.84, "elapsed_time": "0:14:50", "remaining_time": "1:13:15", "throughput": 17936.26, "total_tokens": 15968224}
{"current_steps": 33, "total_steps": 190, "loss": 0.4697, "lr": 4.929035876399535e-05, "epoch": 0.8859060402684564, "percentage": 17.37, "elapsed_time": "0:15:15", "remaining_time": "1:12:34", "throughput": 18005.96, "total_tokens": 16482208}
{"current_steps": 34, "total_steps": 190, "loss": 0.446, "lr": 4.917760955976277e-05, "epoch": 0.912751677852349, "percentage": 17.89, "elapsed_time": "0:15:40", "remaining_time": "1:11:56", "throughput": 18062.85, "total_tokens": 16992768}
{"current_steps": 35, "total_steps": 190, "loss": 1.5644, "lr": 4.905670000773126e-05, "epoch": 0.9395973154362416, "percentage": 18.42, "elapsed_time": "0:16:06", "remaining_time": "1:11:18", "throughput": 18088.52, "total_tokens": 17475648}
{"current_steps": 36, "total_steps": 190, "loss": 0.6847, "lr": 4.892767091689786e-05, "epoch": 0.9664429530201343, "percentage": 18.95, "elapsed_time": "0:16:31", "remaining_time": "1:10:41", "throughput": 18142.84, "total_tokens": 17987904}
{"current_steps": 37, "total_steps": 190, "loss": 0.597, "lr": 4.87905658367398e-05, "epoch": 0.9932885906040269, "percentage": 19.47, "elapsed_time": "0:16:56", "remaining_time": "1:10:04", "throughput": 18194.18, "total_tokens": 18501216}
{"current_steps": 38, "total_steps": 190, "loss": 0.3041, "lr": 4.864543104251587e-05, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:17:01", "remaining_time": "1:08:05", "throughput": 18218.82, "total_tokens": 18609536}
{"current_steps": 39, "total_steps": 190, "loss": 0.4575, "lr": 4.849231551964771e-05, "epoch": 1.0268456375838926, "percentage": 20.53, "elapsed_time": "0:17:26", "remaining_time": "1:07:33", "throughput": 18241.02, "total_tokens": 19095808}
{"current_steps": 40, "total_steps": 190, "loss": 0.6856, "lr": 4.833127094718643e-05, "epoch": 1.0536912751677852, "percentage": 21.05, "elapsed_time": "0:17:51", "remaining_time": "1:06:58", "throughput": 18278.57, "total_tokens": 19589408}
{"current_steps": 41, "total_steps": 190, "loss": 0.4628, "lr": 4.8162351680370044e-05, "epoch": 1.0805369127516777, "percentage": 21.58, "elapsed_time": "0:18:17", "remaining_time": "1:06:27", "throughput": 18291.33, "total_tokens": 20068800}
{"current_steps": 42, "total_steps": 190, "loss": 0.4927, "lr": 4.79856147322777e-05, "epoch": 1.1073825503355705, "percentage": 22.11, "elapsed_time": "0:18:41", "remaining_time": "1:05:52", "throughput": 18329.84, "total_tokens": 20559136}
{"current_steps": 43, "total_steps": 190, "loss": 0.437, "lr": 4.7801119754586766e-05, "epoch": 1.1342281879194631, "percentage": 22.63, "elapsed_time": "0:19:06", "remaining_time": "1:05:20", "throughput": 18359.39, "total_tokens": 21053856}
{"current_steps": 44, "total_steps": 190, "loss": 0.8056, "lr": 4.760892901743944e-05, "epoch": 1.1610738255033557, "percentage": 23.16, "elapsed_time": "0:19:32", "remaining_time": "1:04:49", "throughput": 18380.99, "total_tokens": 21545152}
{"current_steps": 45, "total_steps": 190, "loss": 1.4207, "lr": 4.7409107388425504e-05, "epoch": 1.1879194630872483, "percentage": 23.68, "elapsed_time": "0:19:57", "remaining_time": "1:04:18", "throughput": 18410.95, "total_tokens": 22047744}
{"current_steps": 46, "total_steps": 190, "loss": 0.5632, "lr": 4.7201722310688445e-05, "epoch": 1.2147651006711409, "percentage": 24.21, "elapsed_time": "0:20:23", "remaining_time": "1:03:49", "throughput": 18437.69, "total_tokens": 22557728}
{"current_steps": 47, "total_steps": 190, "loss": 0.7564, "lr": 4.698684378016222e-05, "epoch": 1.2416107382550337, "percentage": 24.74, "elapsed_time": "0:20:48", "remaining_time": "1:03:18", "throughput": 18468.99, "total_tokens": 23058048}
{"current_steps": 48, "total_steps": 190, "loss": 0.7619, "lr": 4.676454432194656e-05, "epoch": 1.2684563758389262, "percentage": 25.26, "elapsed_time": "0:21:14", "remaining_time": "1:02:49", "throughput": 18487.48, "total_tokens": 23557312}
{"current_steps": 49, "total_steps": 190, "loss": 0.4021, "lr": 4.6534898965828405e-05, "epoch": 1.2953020134228188, "percentage": 25.79, "elapsed_time": "0:21:39", "remaining_time": "1:02:19", "throughput": 18507.88, "total_tokens": 24049312}
{"current_steps": 50, "total_steps": 190, "loss": 0.5164, "lr": 4.629798522095818e-05, "epoch": 1.3221476510067114, "percentage": 26.32, "elapsed_time": "0:22:04", "remaining_time": "1:01:48", "throughput": 18516.09, "total_tokens": 24526400}
{"current_steps": 50, "total_steps": 190, "eval_loss": 0.5057776570320129, "epoch": 1.3221476510067114, "percentage": 26.32, "elapsed_time": "0:22:14", "remaining_time": "1:02:16", "throughput": 18380.88, "total_tokens": 24526400}
{"current_steps": 51, "total_steps": 190, "loss": 1.1693, "lr": 4.6053883049689145e-05, "epoch": 1.348993288590604, "percentage": 26.84, "elapsed_time": "0:23:41", "remaining_time": "1:04:34", "throughput": 17607.92, "total_tokens": 25028608}
{"current_steps": 52, "total_steps": 190, "loss": 0.4814, "lr": 4.580267484058876e-05, "epoch": 1.3758389261744965, "percentage": 27.37, "elapsed_time": "0:24:17", "remaining_time": "1:04:28", "throughput": 17518.81, "total_tokens": 25535296}
{"current_steps": 53, "total_steps": 190, "loss": 0.4973, "lr": 4.554444538063113e-05, "epoch": 1.4026845637583891, "percentage": 27.89, "elapsed_time": "0:24:43", "remaining_time": "1:03:55", "throughput": 17549.05, "total_tokens": 26037664}
{"current_steps": 54, "total_steps": 190, "loss": 0.3931, "lr": 4.5279281826580056e-05, "epoch": 1.429530201342282, "percentage": 28.42, "elapsed_time": "0:25:09", "remaining_time": "1:03:22", "throughput": 17568.88, "total_tokens": 26528256}
{"current_steps": 55, "total_steps": 190, "loss": 0.3824, "lr": 4.5007273675572104e-05, "epoch": 1.4563758389261745, "percentage": 28.95, "elapsed_time": "0:25:36", "remaining_time": "1:02:50", "throughput": 17605.8, "total_tokens": 27044672}
{"current_steps": 56, "total_steps": 190, "loss": 0.4324, "lr": 4.4728512734909844e-05, "epoch": 1.483221476510067, "percentage": 29.47, "elapsed_time": "0:26:02", "remaining_time": "1:02:18", "throughput": 17623.37, "total_tokens": 27531232}
{"current_steps": 57, "total_steps": 190, "loss": 0.6634, "lr": 4.444309309107535e-05, "epoch": 1.5100671140939599, "percentage": 30.0, "elapsed_time": "0:26:28", "remaining_time": "1:01:45", "throughput": 17646.54, "total_tokens": 28025024}
{"current_steps": 58, "total_steps": 190, "loss": 0.4739, "lr": 4.415111107797445e-05, "epoch": 1.5369127516778525, "percentage": 30.53, "elapsed_time": "0:26:53", "remaining_time": "1:01:12", "throughput": 17675.29, "total_tokens": 28524576}
{"current_steps": 59, "total_steps": 190, "loss": 0.4483, "lr": 4.385266524442241e-05, "epoch": 1.563758389261745, "percentage": 31.05, "elapsed_time": "0:27:18", "remaining_time": "1:00:38", "throughput": 17700.95, "total_tokens": 29008288}
{"current_steps": 60, "total_steps": 190, "loss": 0.3853, "lr": 4.3547856320882044e-05, "epoch": 1.5906040268456376, "percentage": 31.58, "elapsed_time": "0:27:44", "remaining_time": "1:00:07", "throughput": 17724.53, "total_tokens": 29509952}
{"current_steps": 61, "total_steps": 190, "loss": 0.4383, "lr": 4.3236787185465525e-05, "epoch": 1.6174496644295302, "percentage": 32.11, "elapsed_time": "0:28:11", "remaining_time": "0:59:36", "throughput": 17747.01, "total_tokens": 30011680}
{"current_steps": 62, "total_steps": 190, "loss": 0.4384, "lr": 4.2919562829211283e-05, "epoch": 1.6442953020134228, "percentage": 32.63, "elapsed_time": "0:28:37", "remaining_time": "0:59:05", "throughput": 17764.15, "total_tokens": 30504320}
{"current_steps": 63, "total_steps": 190, "loss": 0.3599, "lr": 4.259629032064779e-05, "epoch": 1.6711409395973154, "percentage": 33.16, "elapsed_time": "0:29:02", "remaining_time": "0:58:33", "throughput": 17789.09, "total_tokens": 31000928}
{"current_steps": 64, "total_steps": 190, "loss": 1.7474, "lr": 4.226707876965611e-05, "epoch": 1.697986577181208, "percentage": 33.68, "elapsed_time": "0:29:28", "remaining_time": "0:58:01", "throughput": 17820.44, "total_tokens": 31514784}
{"current_steps": 65, "total_steps": 190, "loss": 1.2061, "lr": 4.193203929064353e-05, "epoch": 1.7248322147651005, "percentage": 34.21, "elapsed_time": "0:29:54", "remaining_time": "0:57:30", "throughput": 17851.94, "total_tokens": 32034560}
{"current_steps": 66, "total_steps": 190, "loss": 0.7208, "lr": 4.159128496504053e-05, "epoch": 1.7516778523489933, "percentage": 34.74, "elapsed_time": "0:30:20", "remaining_time": "0:57:00", "throughput": 17862.39, "total_tokens": 32521760}
{"current_steps": 67, "total_steps": 190, "loss": 0.5481, "lr": 4.1244930803134e-05, "epoch": 1.778523489932886, "percentage": 35.26, "elapsed_time": "0:30:46", "remaining_time": "0:56:30", "throughput": 17875.76, "total_tokens": 33014464}
{"current_steps": 68, "total_steps": 190, "loss": 0.54, "lr": 4.089309370524921e-05, "epoch": 1.8053691275167785, "percentage": 35.79, "elapsed_time": "0:31:12", "remaining_time": "0:56:00", "throughput": 17893.04, "total_tokens": 33510144}
{"current_steps": 69, "total_steps": 190, "loss": 0.4705, "lr": 4.053589242229412e-05, "epoch": 1.8322147651006713, "percentage": 36.32, "elapsed_time": "0:31:38", "remaining_time": "0:55:29", "throughput": 17924.9, "total_tokens": 34028288}
{"current_steps": 70, "total_steps": 190, "loss": 0.38, "lr": 4.0173447515678916e-05, "epoch": 1.8590604026845639, "percentage": 36.84, "elapsed_time": "0:32:03", "remaining_time": "0:54:57", "throughput": 17945.05, "total_tokens": 34522752}
{"current_steps": 71, "total_steps": 190, "loss": 0.3894, "lr": 3.9805881316624506e-05, "epoch": 1.8859060402684564, "percentage": 37.37, "elapsed_time": "0:32:29", "remaining_time": "0:54:28", "throughput": 17968.31, "total_tokens": 35036768}
{"current_steps": 72, "total_steps": 190, "loss": 0.3796, "lr": 3.9433317884873664e-05, "epoch": 1.912751677852349, "percentage": 37.89, "elapsed_time": "0:32:56", "remaining_time": "0:53:59", "throughput": 17987.73, "total_tokens": 35551360}
{"current_steps": 73, "total_steps": 190, "loss": 0.3344, "lr": 3.905588296681886e-05, "epoch": 1.9395973154362416, "percentage": 38.42, "elapsed_time": "0:33:22", "remaining_time": "0:53:29", "throughput": 18002.35, "total_tokens": 36048672}
{"current_steps": 74, "total_steps": 190, "loss": 0.3641, "lr": 3.867370395306068e-05, "epoch": 1.9664429530201342, "percentage": 38.95, "elapsed_time": "0:33:47", "remaining_time": "0:52:58", "throughput": 18027.0, "total_tokens": 36557472}
{"current_steps": 75, "total_steps": 190, "loss": 0.4785, "lr": 3.82869098354114e-05, "epoch": 1.9932885906040267, "percentage": 39.47, "elapsed_time": "0:34:14", "remaining_time": "0:52:29", "throughput": 18046.73, "total_tokens": 37072800}
{"current_steps": 75, "total_steps": 190, "eval_loss": 0.47579553723335266, "epoch": 1.9932885906040267, "percentage": 39.47, "elapsed_time": "0:34:24", "remaining_time": "0:52:44", "throughput": 17961.53, "total_tokens": 37072800}
{"current_steps": 76, "total_steps": 190, "loss": 0.4172, "lr": 3.7895631163358105e-05, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:35:31", "remaining_time": "0:53:16", "throughput": 17456.93, "total_tokens": 37203168}
{"current_steps": 77, "total_steps": 190, "loss": 0.3778, "lr": 3.7500000000000003e-05, "epoch": 2.0268456375838926, "percentage": 40.53, "elapsed_time": "0:35:57", "remaining_time": "0:52:45", "throughput": 17483.53, "total_tokens": 37717408}
{"current_steps": 78, "total_steps": 190, "loss": 0.6253, "lr": 3.7100149877474974e-05, "epoch": 2.053691275167785, "percentage": 41.05, "elapsed_time": "0:36:23", "remaining_time": "0:52:15", "throughput": 17506.85, "total_tokens": 38229760}
{"current_steps": 79, "total_steps": 190, "loss": 0.3002, "lr": 3.66962157518902e-05, "epoch": 2.0805369127516777, "percentage": 41.58, "elapsed_time": "0:36:50", "remaining_time": "0:51:45", "throughput": 17530.75, "total_tokens": 38744800}
{"current_steps": 80, "total_steps": 190, "loss": 0.3007, "lr": 3.628833395777224e-05, "epoch": 2.1073825503355703, "percentage": 42.11, "elapsed_time": "0:37:15", "remaining_time": "0:51:13", "throughput": 17551.79, "total_tokens": 39239328}
{"current_steps": 81, "total_steps": 190, "loss": 0.3573, "lr": 3.587664216205183e-05, "epoch": 2.134228187919463, "percentage": 42.63, "elapsed_time": "0:37:41", "remaining_time": "0:50:42", "throughput": 17583.11, "total_tokens": 39757728}
{"current_steps": 82, "total_steps": 190, "loss": 0.3136, "lr": 3.546127931759903e-05, "epoch": 2.1610738255033555, "percentage": 43.16, "elapsed_time": "0:38:06", "remaining_time": "0:50:11", "throughput": 17605.94, "total_tokens": 40260064}
{"current_steps": 83, "total_steps": 190, "loss": 1.4533, "lr": 3.504238561632424e-05, "epoch": 2.1879194630872485, "percentage": 43.68, "elapsed_time": "0:38:32", "remaining_time": "0:49:41", "throughput": 17623.54, "total_tokens": 40758112}
{"current_steps": 84, "total_steps": 190, "loss": 0.3279, "lr": 3.4620102441861143e-05, "epoch": 2.214765100671141, "percentage": 44.21, "elapsed_time": "0:38:58", "remaining_time": "0:49:10", "throughput": 17636.48, "total_tokens": 41240640}
{"current_steps": 85, "total_steps": 190, "loss": 1.0606, "lr": 3.4194572321847336e-05, "epoch": 2.2416107382550337, "percentage": 44.74, "elapsed_time": "0:39:24", "remaining_time": "0:48:40", "throughput": 17654.99, "total_tokens": 41737376}
{"current_steps": 86, "total_steps": 190, "loss": 3.5933, "lr": 3.376593887981887e-05, "epoch": 2.2684563758389262, "percentage": 45.26, "elapsed_time": "0:39:49", "remaining_time": "0:48:10", "throughput": 17671.07, "total_tokens": 42230752}
{"current_steps": 87, "total_steps": 190, "loss": 0.6932, "lr": 3.333434678673489e-05, "epoch": 2.295302013422819, "percentage": 45.79, "elapsed_time": "0:40:15", "remaining_time": "0:47:39", "throughput": 17692.93, "total_tokens": 42737152}
{"current_steps": 88, "total_steps": 190, "loss": 0.4915, "lr": 3.289994171214882e-05, "epoch": 2.3221476510067114, "percentage": 46.32, "elapsed_time": "0:40:41", "remaining_time": "0:47:10", "throughput": 17704.69, "total_tokens": 43229952}
{"current_steps": 89, "total_steps": 190, "loss": 0.379, "lr": 3.246287027504237e-05, "epoch": 2.348993288590604, "percentage": 46.84, "elapsed_time": "0:41:07", "remaining_time": "0:46:40", "throughput": 17718.65, "total_tokens": 43719232}
{"current_steps": 90, "total_steps": 190, "loss": 0.3202, "lr": 3.202327999433924e-05, "epoch": 2.3758389261744965, "percentage": 47.37, "elapsed_time": "0:41:33", "remaining_time": "0:46:10", "throughput": 17739.38, "total_tokens": 44225120}
{"current_steps": 91, "total_steps": 190, "loss": 0.3343, "lr": 3.158131923911498e-05, "epoch": 2.402684563758389, "percentage": 47.89, "elapsed_time": "0:41:58", "remaining_time": "0:45:40", "throughput": 17760.04, "total_tokens": 44734272}
{"current_steps": 92, "total_steps": 190, "loss": 0.8698, "lr": 3.1137137178519985e-05, "epoch": 2.4295302013422817, "percentage": 48.42, "elapsed_time": "0:42:24", "remaining_time": "0:45:10", "throughput": 17779.88, "total_tokens": 45239808}
{"current_steps": 93, "total_steps": 190, "loss": 0.2996, "lr": 3.069088373143234e-05, "epoch": 2.4563758389261743, "percentage": 48.95, "elapsed_time": "0:42:50", "remaining_time": "0:44:40", "throughput": 17796.5, "total_tokens": 45744640}
{"current_steps": 94, "total_steps": 190, "loss": 4.0478, "lr": 3.0242709515857758e-05, "epoch": 2.4832214765100673, "percentage": 49.47, "elapsed_time": "0:43:16", "remaining_time": "0:44:11", "throughput": 17810.56, "total_tokens": 46247648}
{"current_steps": 95, "total_steps": 190, "loss": 0.3086, "lr": 2.9792765798093465e-05, "epoch": 2.51006711409396, "percentage": 50.0, "elapsed_time": "0:43:42", "remaining_time": "0:43:42", "throughput": 17829.17, "total_tokens": 46751680}
{"current_steps": 96, "total_steps": 190, "loss": 2.5373, "lr": 2.9341204441673266e-05, "epoch": 2.5369127516778525, "percentage": 50.53, "elapsed_time": "0:44:08", "remaining_time": "0:43:12", "throughput": 17844.4, "total_tokens": 47252352}
{"current_steps": 97, "total_steps": 190, "loss": 0.3775, "lr": 2.8888177856111083e-05, "epoch": 2.563758389261745, "percentage": 51.05, "elapsed_time": "0:44:33", "remaining_time": "0:42:43", "throughput": 17859.79, "total_tokens": 47748512}
{"current_steps": 98, "total_steps": 190, "loss": 0.4474, "lr": 2.8433838945460205e-05, "epoch": 2.5906040268456376, "percentage": 51.58, "elapsed_time": "0:44:59", "remaining_time": "0:42:14", "throughput": 17877.56, "total_tokens": 48258400}
{"current_steps": 99, "total_steps": 190, "loss": 0.3873, "lr": 2.797834105670559e-05, "epoch": 2.61744966442953, "percentage": 52.11, "elapsed_time": "0:45:25", "remaining_time": "0:41:45", "throughput": 17882.18, "total_tokens": 48737888}
{"current_steps": 100, "total_steps": 190, "loss": 0.3249, "lr": 2.752183792800671e-05, "epoch": 2.6442953020134228, "percentage": 52.63, "elapsed_time": "0:45:51", "remaining_time": "0:41:16", "throughput": 17900.1, "total_tokens": 49251648}
{"current_steps": 100, "total_steps": 190, "eval_loss": 0.4449075758457184, "epoch": 2.6442953020134228, "percentage": 52.63, "elapsed_time": "0:46:01", "remaining_time": "0:41:25", "throughput": 17836.92, "total_tokens": 49251648}
{"current_steps": 101, "total_steps": 190, "loss": 0.7803, "lr": 2.7064483636808313e-05, "epoch": 2.6711409395973154, "percentage": 53.16, "elapsed_time": "0:47:28", "remaining_time": "0:41:49", "throughput": 17468.72, "total_tokens": 49752000}
{"current_steps": 102, "total_steps": 190, "loss": 0.6524, "lr": 2.6606432547836757e-05, "epoch": 2.697986577181208, "percentage": 53.68, "elapsed_time": "0:47:53", "remaining_time": "0:41:19", "throughput": 17487.59, "total_tokens": 50259200}
{"current_steps": 103, "total_steps": 190, "loss": 3.3874, "lr": 2.6147839260999295e-05, "epoch": 2.7248322147651005, "percentage": 54.21, "elapsed_time": "0:48:18", "remaining_time": "0:40:48", "throughput": 17498.61, "total_tokens": 50722880}
{"current_steps": 104, "total_steps": 190, "loss": 0.3131, "lr": 2.5688858559204053e-05, "epoch": 2.751677852348993, "percentage": 54.74, "elapsed_time": "0:48:45", "remaining_time": "0:40:18", "throughput": 17513.56, "total_tokens": 51231200}
{"current_steps": 105, "total_steps": 190, "loss": 0.363, "lr": 2.5229645356118163e-05, "epoch": 2.778523489932886, "percentage": 55.26, "elapsed_time": "0:49:11", "remaining_time": "0:39:49", "throughput": 17528.62, "total_tokens": 51737856}
{"current_steps": 106, "total_steps": 190, "loss": 0.3095, "lr": 2.4770354643881843e-05, "epoch": 2.8053691275167782, "percentage": 55.79, "elapsed_time": "0:49:37", "remaining_time": "0:39:19", "throughput": 17543.43, "total_tokens": 52237408}
{"current_steps": 107, "total_steps": 190, "loss": 0.5962, "lr": 2.4311141440795953e-05, "epoch": 2.8322147651006713, "percentage": 56.32, "elapsed_time": "0:50:03", "remaining_time": "0:38:49", "throughput": 17561.59, "total_tokens": 52739936}
{"current_steps": 108, "total_steps": 190, "loss": 1.8934, "lr": 2.3852160739000707e-05, "epoch": 2.859060402684564, "percentage": 56.84, "elapsed_time": "0:50:28", "remaining_time": "0:38:19", "throughput": 17579.52, "total_tokens": 53241504}
{"current_steps": 109, "total_steps": 190, "loss": 0.2885, "lr": 2.3393567452163252e-05, "epoch": 2.8859060402684564, "percentage": 57.37, "elapsed_time": "0:50:54", "remaining_time": "0:37:50", "throughput": 17584.19, "total_tokens": 53715488}
{"current_steps": 110, "total_steps": 190, "loss": 0.356, "lr": 2.2935516363191693e-05, "epoch": 2.912751677852349, "percentage": 57.89, "elapsed_time": "0:51:20", "remaining_time": "0:37:20", "throughput": 17592.06, "total_tokens": 54199424}
{"current_steps": 111, "total_steps": 190, "loss": 0.3343, "lr": 2.2478162071993298e-05, "epoch": 2.9395973154362416, "percentage": 58.42, "elapsed_time": "0:51:46", "remaining_time": "0:36:51", "throughput": 17600.42, "total_tokens": 54682048}
{"current_steps": 112, "total_steps": 190, "loss": 0.2983, "lr": 2.202165894329441e-05, "epoch": 2.966442953020134, "percentage": 58.95, "elapsed_time": "0:52:12", "remaining_time": "0:36:21", "throughput": 17618.09, "total_tokens": 55197472}
{"current_steps": 113, "total_steps": 190, "loss": 0.3724, "lr": 2.1566161054539798e-05, "epoch": 2.9932885906040267, "percentage": 59.47, "elapsed_time": "0:52:39", "remaining_time": "0:35:52", "throughput": 17633.35, "total_tokens": 55707552}
{"current_steps": 114, "total_steps": 190, "loss": 0.3716, "lr": 2.111182214388893e-05, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "0:52:44", "remaining_time": "0:35:09", "throughput": 17646.84, "total_tokens": 55836384}
{"current_steps": 115, "total_steps": 190, "loss": 1.4446, "lr": 2.0658795558326743e-05, "epoch": 3.0268456375838926, "percentage": 60.53, "elapsed_time": "0:53:10", "remaining_time": "0:34:40", "throughput": 17656.3, "total_tokens": 56327904}
{"current_steps": 116, "total_steps": 190, "loss": 0.788, "lr": 2.0207234201906547e-05, "epoch": 3.053691275167785, "percentage": 61.05, "elapsed_time": "0:53:35", "remaining_time": "0:34:11", "throughput": 17665.41, "total_tokens": 56808032}
{"current_steps": 117, "total_steps": 190, "loss": 0.2385, "lr": 1.9757290484142244e-05, "epoch": 3.0805369127516777, "percentage": 61.58, "elapsed_time": "0:54:01", "remaining_time": "0:33:42", "throughput": 17684.17, "total_tokens": 57316256}
{"current_steps": 118, "total_steps": 190, "loss": 0.4483, "lr": 1.9309116268567674e-05, "epoch": 3.1073825503355703, "percentage": 62.11, "elapsed_time": "0:54:26", "remaining_time": "0:33:13", "throughput": 17693.35, "total_tokens": 57801920}
{"current_steps": 119, "total_steps": 190, "loss": 0.2359, "lr": 1.8862862821480025e-05, "epoch": 3.134228187919463, "percentage": 62.63, "elapsed_time": "0:54:52", "remaining_time": "0:32:44", "throughput": 17705.84, "total_tokens": 58303008}
{"current_steps": 120, "total_steps": 190, "loss": 0.2234, "lr": 1.8418680760885027e-05, "epoch": 3.1610738255033555, "percentage": 63.16, "elapsed_time": "0:55:18", "remaining_time": "0:32:15", "throughput": 17719.94, "total_tokens": 58809248}
{"current_steps": 121, "total_steps": 190, "loss": 0.2944, "lr": 1.797672000566077e-05, "epoch": 3.1879194630872485, "percentage": 63.68, "elapsed_time": "0:55:44", "remaining_time": "0:31:46", "throughput": 17735.03, "total_tokens": 59306304}
{"current_steps": 122, "total_steps": 190, "loss": 0.2507, "lr": 1.7537129724957642e-05, "epoch": 3.214765100671141, "percentage": 64.21, "elapsed_time": "0:56:09", "remaining_time": "0:31:18", "throughput": 17750.34, "total_tokens": 59810208}
{"current_steps": 123, "total_steps": 190, "loss": 0.2786, "lr": 1.710005828785119e-05, "epoch": 3.2416107382550337, "percentage": 64.74, "elapsed_time": "0:56:35", "remaining_time": "0:30:49", "throughput": 17765.5, "total_tokens": 60328096}
{"current_steps": 124, "total_steps": 190, "loss": 0.8919, "lr": 1.666565321326512e-05, "epoch": 3.2684563758389262, "percentage": 65.26, "elapsed_time": "0:57:01", "remaining_time": "0:30:21", "throughput": 17777.03, "total_tokens": 60832896}
{"current_steps": 125, "total_steps": 190, "loss": 0.2713, "lr": 1.6234061120181142e-05, "epoch": 3.295302013422819, "percentage": 65.79, "elapsed_time": "0:57:28", "remaining_time": "0:29:52", "throughput": 17789.29, "total_tokens": 61338784}
{"current_steps": 125, "total_steps": 190, "eval_loss": 0.41680020093917847, "epoch": 3.295302013422819, "percentage": 65.79, "elapsed_time": "0:57:37", "remaining_time": "0:29:58", "throughput": 17739.19, "total_tokens": 61338784}
{"current_steps": 126, "total_steps": 190, "loss": 0.6785, "lr": 1.5805427678152677e-05, "epoch": 3.3221476510067114, "percentage": 66.32, "elapsed_time": "0:59:08", "remaining_time": "0:30:02", "throughput": 17426.6, "total_tokens": 61841152}
{"current_steps": 127, "total_steps": 190, "loss": 0.2253, "lr": 1.5379897558138862e-05, "epoch": 3.348993288590604, "percentage": 66.84, "elapsed_time": "0:59:35", "remaining_time": "0:29:33", "throughput": 17430.3, "total_tokens": 62330464}
{"current_steps": 128, "total_steps": 190, "loss": 0.5729, "lr": 1.495761438367577e-05, "epoch": 3.3758389261744965, "percentage": 67.37, "elapsed_time": "1:00:02", "remaining_time": "0:29:04", "throughput": 17438.26, "total_tokens": 62816512}
{"current_steps": 129, "total_steps": 190, "loss": 0.2369, "lr": 1.4538720682400969e-05, "epoch": 3.402684563758389, "percentage": 67.89, "elapsed_time": "1:00:28", "remaining_time": "0:28:35", "throughput": 17444.42, "total_tokens": 63301760}
{"current_steps": 130, "total_steps": 190, "loss": 0.2656, "lr": 1.4123357837948175e-05, "epoch": 3.4295302013422817, "percentage": 68.42, "elapsed_time": "1:00:55", "remaining_time": "0:28:07", "throughput": 17450.77, "total_tokens": 63788736}
{"current_steps": 131, "total_steps": 190, "loss": 0.6595, "lr": 1.3711666042227772e-05, "epoch": 3.4563758389261743, "percentage": 68.95, "elapsed_time": "1:01:21", "remaining_time": "0:27:38", "throughput": 17466.75, "total_tokens": 64302464}
{"current_steps": 132, "total_steps": 190, "loss": 0.3676, "lr": 1.330378424810981e-05, "epoch": 3.4832214765100673, "percentage": 69.47, "elapsed_time": "1:01:57", "remaining_time": "0:27:13", "throughput": 17429.6, "total_tokens": 64802208}
{"current_steps": 133, "total_steps": 190, "loss": 0.2294, "lr": 1.2899850122525037e-05, "epoch": 3.51006711409396, "percentage": 70.0, "elapsed_time": "1:02:23", "remaining_time": "0:26:44", "throughput": 17444.56, "total_tokens": 65304832}
{"current_steps": 134, "total_steps": 190, "loss": 0.2305, "lr": 1.2500000000000006e-05, "epoch": 3.5369127516778525, "percentage": 70.53, "elapsed_time": "1:02:49", "remaining_time": "0:26:15", "throughput": 17457.74, "total_tokens": 65811296}
{"current_steps": 135, "total_steps": 190, "loss": 0.2474, "lr": 1.2104368836641908e-05, "epoch": 3.563758389261745, "percentage": 71.05, "elapsed_time": "1:03:15", "remaining_time": "0:25:46", "throughput": 17469.6, "total_tokens": 66312960}
{"current_steps": 136, "total_steps": 190, "loss": 1.6081, "lr": 1.1713090164588607e-05, "epoch": 3.5906040268456376, "percentage": 71.58, "elapsed_time": "1:03:42", "remaining_time": "0:25:17", "throughput": 17480.73, "total_tokens": 66814048}
{"current_steps": 137, "total_steps": 190, "loss": 0.2101, "lr": 1.1326296046939333e-05, "epoch": 3.61744966442953, "percentage": 72.11, "elapsed_time": "1:04:07", "remaining_time": "0:24:48", "throughput": 17494.82, "total_tokens": 67318080}
{"current_steps": 138, "total_steps": 190, "loss": 0.3559, "lr": 1.0944117033181151e-05, "epoch": 3.6442953020134228, "percentage": 72.63, "elapsed_time": "1:04:33", "remaining_time": "0:24:19", "throughput": 17510.56, "total_tokens": 67829760}
{"current_steps": 139, "total_steps": 190, "loss": 0.2799, "lr": 1.0566682115126344e-05, "epoch": 3.6711409395973154, "percentage": 73.16, "elapsed_time": "1:04:59", "remaining_time": "0:23:50", "throughput": 17520.19, "total_tokens": 68327200}
{"current_steps": 140, "total_steps": 190, "loss": 0.2712, "lr": 1.0194118683375503e-05, "epoch": 3.697986577181208, "percentage": 73.68, "elapsed_time": "1:05:25", "remaining_time": "0:23:21", "throughput": 17530.95, "total_tokens": 68815616}
{"current_steps": 141, "total_steps": 190, "loss": 0.4591, "lr": 9.826552484321087e-06, "epoch": 3.7248322147651005, "percentage": 74.21, "elapsed_time": "1:05:51", "remaining_time": "0:22:53", "throughput": 17544.19, "total_tokens": 69326240}
{"current_steps": 142, "total_steps": 190, "loss": 0.3586, "lr": 9.464107577705886e-06, "epoch": 3.751677852348993, "percentage": 74.74, "elapsed_time": "1:06:17", "remaining_time": "0:22:24", "throughput": 17556.11, "total_tokens": 69827296}
{"current_steps": 143, "total_steps": 190, "loss": 0.1854, "lr": 9.106906294750805e-06, "epoch": 3.778523489932886, "percentage": 75.26, "elapsed_time": "1:06:43", "remaining_time": "0:21:55", "throughput": 17566.95, "total_tokens": 70330368}
{"current_steps": 144, "total_steps": 190, "loss": 0.3039, "lr": 8.755069196866014e-06, "epoch": 3.8053691275167782, "percentage": 75.79, "elapsed_time": "1:07:09", "remaining_time": "0:21:27", "throughput": 17580.29, "total_tokens": 70844640}
{"current_steps": 145, "total_steps": 190, "loss": 0.2444, "lr": 8.40871503495947e-06, "epoch": 3.8322147651006713, "percentage": 76.32, "elapsed_time": "1:07:35", "remaining_time": "0:20:58", "throughput": 17590.41, "total_tokens": 71346368}
{"current_steps": 146, "total_steps": 190, "loss": 0.1907, "lr": 8.067960709356478e-06, "epoch": 3.859060402684564, "percentage": 76.84, "elapsed_time": "1:08:02", "remaining_time": "0:20:30", "throughput": 17602.16, "total_tokens": 71858432}
{"current_steps": 147, "total_steps": 190, "loss": 0.9888, "lr": 7.732921230343892e-06, "epoch": 3.8859060402684564, "percentage": 77.37, "elapsed_time": "1:08:28", "remaining_time": "0:20:01", "throughput": 17612.39, "total_tokens": 72365984}
{"current_steps": 148, "total_steps": 190, "loss": 0.3004, "lr": 7.403709679352217e-06, "epoch": 3.912751677852349, "percentage": 77.89, "elapsed_time": "1:08:54", "remaining_time": "0:19:33", "throughput": 17621.02, "total_tokens": 72855456}
{"current_steps": 149, "total_steps": 190, "loss": 0.2575, "lr": 7.080437170788723e-06, "epoch": 3.9395973154362416, "percentage": 78.42, "elapsed_time": "1:09:20", "remaining_time": "0:19:04", "throughput": 17629.93, "total_tokens": 73352832}
{"current_steps": 150, "total_steps": 190, "loss": 0.2785, "lr": 6.763212814534484e-06, "epoch": 3.966442953020134, "percentage": 78.95, "elapsed_time": "1:09:46", "remaining_time": "0:18:36", "throughput": 17640.45, "total_tokens": 73844000}
{"current_steps": 150, "total_steps": 190, "eval_loss": 0.39368969202041626, "epoch": 3.966442953020134, "percentage": 78.95, "elapsed_time": "1:09:55", "remaining_time": "0:18:38", "throughput": 17599.6, "total_tokens": 73844000}
{"current_steps": 151, "total_steps": 190, "loss": 0.3369, "lr": 6.452143679117964e-06, "epoch": 3.9932885906040267, "percentage": 79.47, "elapsed_time": "1:11:21", "remaining_time": "0:18:25", "throughput": 17361.73, "total_tokens": 74341056}
{"current_steps": 152, "total_steps": 190, "loss": 0.1998, "lr": 6.147334755577596e-06, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "1:11:33", "remaining_time": "0:17:53", "throughput": 17344.08, "total_tokens": 74466592}
{"current_steps": 153, "total_steps": 190, "loss": 0.2124, "lr": 5.848888922025553e-06, "epoch": 4.026845637583893, "percentage": 80.53, "elapsed_time": "1:12:00", "remaining_time": "0:17:24", "throughput": 17350.5, "total_tokens": 74968096}
{"current_steps": 154, "total_steps": 190, "loss": 0.1877, "lr": 5.556906908924655e-06, "epoch": 4.053691275167785, "percentage": 81.05, "elapsed_time": "1:12:26", "remaining_time": "0:16:56", "throughput": 17364.95, "total_tokens": 75482144}
{"current_steps": 155, "total_steps": 190, "loss": 0.1934, "lr": 5.271487265090163e-06, "epoch": 4.080536912751678, "percentage": 81.58, "elapsed_time": "1:12:53", "remaining_time": "0:16:27", "throughput": 17374.03, "total_tokens": 75981600}
{"current_steps": 156, "total_steps": 190, "loss": 0.2432, "lr": 4.992726324427901e-06, "epoch": 4.10738255033557, "percentage": 82.11, "elapsed_time": "1:13:19", "remaining_time": "0:15:58", "throughput": 17388.14, "total_tokens": 76494816}
{"current_steps": 157, "total_steps": 190, "loss": 0.1627, "lr": 4.720718173419947e-06, "epoch": 4.134228187919463, "percentage": 82.63, "elapsed_time": "1:13:45", "remaining_time": "0:15:30", "throughput": 17397.16, "total_tokens": 76989536}
{"current_steps": 158, "total_steps": 190, "loss": 0.2083, "lr": 4.4555546193688735e-06, "epoch": 4.1610738255033555, "percentage": 83.16, "elapsed_time": "1:14:11", "remaining_time": "0:15:01", "throughput": 17406.71, "total_tokens": 77490912}
{"current_steps": 159, "total_steps": 190, "loss": 0.3569, "lr": 4.19732515941125e-06, "epoch": 4.1879194630872485, "percentage": 83.68, "elapsed_time": "1:14:37", "remaining_time": "0:14:33", "throughput": 17417.44, "total_tokens": 77993536}
{"current_steps": 160, "total_steps": 190, "loss": 0.2129, "lr": 3.94611695031086e-06, "epoch": 4.214765100671141, "percentage": 84.21, "elapsed_time": "1:15:04", "remaining_time": "0:14:04", "throughput": 17425.82, "total_tokens": 78492672}
{"current_steps": 161, "total_steps": 190, "loss": 0.1689, "lr": 3.7020147790418263e-06, "epoch": 4.241610738255034, "percentage": 84.74, "elapsed_time": "1:15:29", "remaining_time": "0:13:35", "throughput": 17434.29, "total_tokens": 78977248}
{"current_steps": 162, "total_steps": 190, "loss": 0.1889, "lr": 3.4651010341716028e-06, "epoch": 4.268456375838926, "percentage": 85.26, "elapsed_time": "1:15:55", "remaining_time": "0:13:07", "throughput": 17447.62, "total_tokens": 79486816}
{"current_steps": 163, "total_steps": 190, "loss": 0.2081, "lr": 3.2354556780534424e-06, "epoch": 4.295302013422819, "percentage": 85.79, "elapsed_time": "1:16:21", "remaining_time": "0:12:38", "throughput": 17459.39, "total_tokens": 79985920}
{"current_steps": 164, "total_steps": 190, "loss": 0.3593, "lr": 3.013156219837776e-06, "epoch": 4.322147651006711, "percentage": 86.32, "elapsed_time": "1:16:47", "remaining_time": "0:12:10", "throughput": 17468.36, "total_tokens": 80483456}
{"current_steps": 165, "total_steps": 190, "loss": 0.2434, "lr": 2.7982776893115627e-06, "epoch": 4.348993288590604, "percentage": 86.84, "elapsed_time": "1:17:13", "remaining_time": "0:11:41", "throughput": 17478.04, "total_tokens": 80976384}
{"current_steps": 166, "total_steps": 190, "loss": 0.1939, "lr": 2.5908926115744997e-06, "epoch": 4.375838926174497, "percentage": 87.37, "elapsed_time": "1:17:39", "remaining_time": "0:11:13", "throughput": 17487.71, "total_tokens": 81477120}
{"current_steps": 167, "total_steps": 190, "loss": 0.2118, "lr": 2.391070982560564e-06, "epoch": 4.402684563758389, "percentage": 87.89, "elapsed_time": "1:18:05", "remaining_time": "0:10:45", "throughput": 17491.22, "total_tokens": 81950496}
{"current_steps": 168, "total_steps": 190, "loss": 0.1738, "lr": 2.1988802454132403e-06, "epoch": 4.429530201342282, "percentage": 88.42, "elapsed_time": "1:18:31", "remaining_time": "0:10:16", "throughput": 17500.7, "total_tokens": 82452832}
{"current_steps": 169, "total_steps": 190, "loss": 0.1575, "lr": 2.0143852677223075e-06, "epoch": 4.456375838926174, "percentage": 88.95, "elapsed_time": "1:18:56", "remaining_time": "0:09:48", "throughput": 17510.83, "total_tokens": 82948672}
{"current_steps": 170, "total_steps": 190, "loss": 0.6905, "lr": 1.837648319629956e-06, "epoch": 4.483221476510067, "percentage": 89.47, "elapsed_time": "1:19:22", "remaining_time": "0:09:20", "throughput": 17523.79, "total_tokens": 83455616}
{"current_steps": 171, "total_steps": 190, "loss": 0.5674, "lr": 1.6687290528135723e-06, "epoch": 4.510067114093959, "percentage": 90.0, "elapsed_time": "1:19:48", "remaining_time": "0:08:52", "throughput": 17534.9, "total_tokens": 83964800}
{"current_steps": 172, "total_steps": 190, "loss": 0.2391, "lr": 1.5076844803522922e-06, "epoch": 4.5369127516778525, "percentage": 90.53, "elapsed_time": "1:20:14", "remaining_time": "0:08:23", "throughput": 17545.62, "total_tokens": 84478432}
{"current_steps": 173, "total_steps": 190, "loss": 0.1941, "lr": 1.3545689574841342e-06, "epoch": 4.563758389261745, "percentage": 91.05, "elapsed_time": "1:20:41", "remaining_time": "0:07:55", "throughput": 17553.71, "total_tokens": 84981664}
{"current_steps": 174, "total_steps": 190, "loss": 0.1917, "lr": 1.2094341632602064e-06, "epoch": 4.590604026845638, "percentage": 91.58, "elapsed_time": "1:21:07", "remaining_time": "0:07:27", "throughput": 17561.44, "total_tokens": 85477632}
{"current_steps": 175, "total_steps": 190, "loss": 0.9737, "lr": 1.0723290831021471e-06, "epoch": 4.617449664429531, "percentage": 92.11, "elapsed_time": "1:21:33", "remaining_time": "0:06:59", "throughput": 17573.28, "total_tokens": 85997184}
{"current_steps": 175, "total_steps": 190, "eval_loss": 0.387528657913208, "epoch": 4.617449664429531, "percentage": 92.11, "elapsed_time": "1:21:43", "remaining_time": "0:07:00", "throughput": 17538.45, "total_tokens": 85997184}
{"current_steps": 176, "total_steps": 190, "loss": 0.3249, "lr": 9.432999922687396e-07, "epoch": 4.644295302013423, "percentage": 92.63, "elapsed_time": "1:23:09", "remaining_time": "0:06:36", "throughput": 17339.27, "total_tokens": 86516704}
{"current_steps": 177, "total_steps": 190, "loss": 0.2329, "lr": 8.223904402372334e-07, "epoch": 4.671140939597316, "percentage": 93.16, "elapsed_time": "1:23:36", "remaining_time": "0:06:08", "throughput": 17351.22, "total_tokens": 87033760}
{"current_steps": 178, "total_steps": 190, "loss": 0.638, "lr": 7.096412360046545e-07, "epoch": 4.697986577181208, "percentage": 93.68, "elapsed_time": "1:24:02", "remaining_time": "0:05:39", "throughput": 17360.71, "total_tokens": 87535840}
{"current_steps": 179, "total_steps": 190, "loss": 0.6854, "lr": 6.050904343141095e-07, "epoch": 4.724832214765101, "percentage": 94.21, "elapsed_time": "1:24:27", "remaining_time": "0:05:11", "throughput": 17371.53, "total_tokens": 88031648}
{"current_steps": 180, "total_steps": 190, "loss": 0.2507, "lr": 5.087733228106517e-07, "epoch": 4.751677852348993, "percentage": 94.74, "elapsed_time": "1:24:53", "remaining_time": "0:04:42", "throughput": 17385.01, "total_tokens": 88549984}
{"current_steps": 181, "total_steps": 190, "loss": 0.2274, "lr": 4.207224101311247e-07, "epoch": 4.778523489932886, "percentage": 95.26, "elapsed_time": "1:25:19", "remaining_time": "0:04:14", "throughput": 17396.38, "total_tokens": 89061824}
{"current_steps": 182, "total_steps": 190, "loss": 0.1786, "lr": 3.4096741493194197e-07, "epoch": 4.805369127516778, "percentage": 95.79, "elapsed_time": "1:25:45", "remaining_time": "0:03:46", "throughput": 17401.03, "total_tokens": 89531904}
{"current_steps": 183, "total_steps": 190, "loss": 0.2258, "lr": 2.6953525585855234e-07, "epoch": 4.832214765100671, "percentage": 96.32, "elapsed_time": "1:26:11", "remaining_time": "0:03:17", "throughput": 17412.83, "total_tokens": 90043488}
{"current_steps": 184, "total_steps": 190, "loss": 0.2191, "lr": 2.064500424599436e-07, "epoch": 4.859060402684563, "percentage": 96.84, "elapsed_time": "1:26:37", "remaining_time": "0:02:49", "throughput": 17421.2, "total_tokens": 90540736}
{"current_steps": 185, "total_steps": 190, "loss": 0.2243, "lr": 1.517330670512629e-07, "epoch": 4.885906040268456, "percentage": 97.37, "elapsed_time": "1:27:03", "remaining_time": "0:02:21", "throughput": 17430.7, "total_tokens": 91045824}
{"current_steps": 186, "total_steps": 190, "loss": 0.1879, "lr": 1.0540279752731253e-07, "epoch": 4.912751677852349, "percentage": 97.89, "elapsed_time": "1:27:29", "remaining_time": "0:01:52", "throughput": 17438.21, "total_tokens": 91533568}
{"current_steps": 187, "total_steps": 190, "loss": 0.6272, "lr": 6.747487112931661e-08, "epoch": 4.939597315436242, "percentage": 98.42, "elapsed_time": "1:27:55", "remaining_time": "0:01:24", "throughput": 17447.73, "total_tokens": 92045280}
{"current_steps": 188, "total_steps": 190, "loss": 0.1572, "lr": 3.796208916709565e-08, "epoch": 4.966442953020135, "percentage": 98.95, "elapsed_time": "1:28:21", "remaining_time": "0:00:56", "throughput": 17458.0, "total_tokens": 92560096}
{"current_steps": 189, "total_steps": 190, "loss": 0.1958, "lr": 1.6874412698408836e-08, "epoch": 4.993288590604027, "percentage": 99.47, "elapsed_time": "1:28:48", "remaining_time": "0:00:28", "throughput": 17465.42, "total_tokens": 93057056}
{"current_steps": 190, "total_steps": 190, "loss": 0.2471, "lr": 4.218959166932268e-09, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "1:28:52", "remaining_time": "0:00:00", "throughput": 17473.95, "total_tokens": 93186816}
{"current_steps": 190, "total_steps": 190, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "1:29:51", "remaining_time": "0:00:00", "throughput": 17283.94, "total_tokens": 93186816}