{ "best_metric": 0.42429444193840027, "best_model_checkpoint": "t5/checkpoint-3921268", "epoch": 73.0, "global_step": 3921268, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.00099, "loss": 0.6596, "step": 53716 }, { "epoch": 1.0, "eval_bleu": 7.691952957568906, "eval_loss": 0.5863233804702759, "eval_runtime": 6528.3322, "eval_samples_per_second": 16.456, "eval_steps_per_second": 1.029, "step": 53716 }, { "epoch": 2.0, "learning_rate": 0.00098, "loss": 0.5807, "step": 107432 }, { "epoch": 2.0, "eval_bleu": 7.596172987179689, "eval_loss": 0.5534030199050903, "eval_runtime": 6601.0537, "eval_samples_per_second": 16.275, "eval_steps_per_second": 1.017, "step": 107432 }, { "epoch": 3.0, "learning_rate": 0.0009699999999999999, "loss": 0.5569, "step": 161148 }, { "epoch": 3.0, "eval_bleu": 7.149818476141028, "eval_loss": 0.538519024848938, "eval_runtime": 6606.0069, "eval_samples_per_second": 16.263, "eval_steps_per_second": 1.016, "step": 161148 }, { "epoch": 4.0, "learning_rate": 0.00096, "loss": 0.5441, "step": 214864 }, { "epoch": 4.0, "eval_bleu": 7.493924944402864, "eval_loss": 0.5301510691642761, "eval_runtime": 6228.7775, "eval_samples_per_second": 17.248, "eval_steps_per_second": 1.078, "step": 214864 }, { "epoch": 5.0, "learning_rate": 0.00095, "loss": 0.5349, "step": 268580 }, { "epoch": 5.0, "eval_bleu": 7.031863868448649, "eval_loss": 0.5224108695983887, "eval_runtime": 6611.0145, "eval_samples_per_second": 16.25, "eval_steps_per_second": 1.016, "step": 268580 }, { "epoch": 6.0, "learning_rate": 0.00094, "loss": 0.5281, "step": 322296 }, { "epoch": 6.0, "eval_bleu": 7.644179348361122, "eval_loss": 0.5192911028862, "eval_runtime": 6621.1034, "eval_samples_per_second": 16.226, "eval_steps_per_second": 1.014, "step": 322296 }, { "epoch": 7.0, "learning_rate": 0.00093, "loss": 0.5222, "step": 376012 }, { "epoch": 7.0, "eval_bleu": 7.607672700840728, "eval_loss": 0.5128632187843323, "eval_runtime": 6621.733, "eval_samples_per_second": 16.224, "eval_steps_per_second": 1.014, "step": 376012 }, { "epoch": 8.0, "learning_rate": 0.00092, "loss": 0.5181, "step": 429728 }, { "epoch": 8.0, "eval_bleu": 6.071314840861525, "eval_loss": 0.5084598064422607, "eval_runtime": 6242.5238, "eval_samples_per_second": 17.21, "eval_steps_per_second": 1.076, "step": 429728 }, { "epoch": 9.0, "learning_rate": 0.00091, "loss": 0.5137, "step": 483444 }, { "epoch": 9.0, "eval_bleu": 7.175821994303286, "eval_loss": 0.5051391124725342, "eval_runtime": 6238.6565, "eval_samples_per_second": 17.22, "eval_steps_per_second": 1.076, "step": 483444 }, { "epoch": 10.0, "learning_rate": 0.0009000000000000001, "loss": 0.5093, "step": 537160 }, { "epoch": 10.0, "eval_bleu": 7.716511125290912, "eval_loss": 0.5000638961791992, "eval_runtime": 6244.5271, "eval_samples_per_second": 17.204, "eval_steps_per_second": 1.075, "step": 537160 }, { "epoch": 11.0, "learning_rate": 0.0008900000000000001, "loss": 0.5037, "step": 590876 }, { "epoch": 11.0, "eval_bleu": 7.072832342346184, "eval_loss": 0.4958619177341461, "eval_runtime": 6248.1767, "eval_samples_per_second": 17.194, "eval_steps_per_second": 1.075, "step": 590876 }, { "epoch": 12.0, "learning_rate": 0.00088, "loss": 0.4992, "step": 644592 }, { "epoch": 12.0, "eval_bleu": 7.23951068440794, "eval_loss": 0.4918939471244812, "eval_runtime": 6240.6707, "eval_samples_per_second": 17.215, "eval_steps_per_second": 1.076, "step": 644592 }, { "epoch": 13.0, "learning_rate": 0.00087, "loss": 0.4954, "step": 698308 }, { "epoch": 13.0, "eval_bleu": 7.381643836163121, "eval_loss": 0.4886699914932251, "eval_runtime": 6245.5311, "eval_samples_per_second": 17.201, "eval_steps_per_second": 1.075, "step": 698308 }, { "epoch": 14.0, "learning_rate": 0.00086, "loss": 0.4915, "step": 752024 }, { "epoch": 14.0, "eval_bleu": 5.9857507598052075, "eval_loss": 0.4870322346687317, "eval_runtime": 6239.2065, "eval_samples_per_second": 17.219, "eval_steps_per_second": 1.076, "step": 752024 }, { "epoch": 15.0, "learning_rate": 0.00085, "loss": 0.488, "step": 805740 }, { "epoch": 15.0, "eval_bleu": 7.5727246325090976, "eval_loss": 0.4828002154827118, "eval_runtime": 6235.1501, "eval_samples_per_second": 17.23, "eval_steps_per_second": 1.077, "step": 805740 }, { "epoch": 16.0, "learning_rate": 0.00084, "loss": 0.4862, "step": 859456 }, { "epoch": 16.0, "eval_bleu": 7.59567809120864, "eval_loss": 0.4813084900379181, "eval_runtime": 6235.7161, "eval_samples_per_second": 17.228, "eval_steps_per_second": 1.077, "step": 859456 }, { "epoch": 17.0, "learning_rate": 0.00083, "loss": 0.4827, "step": 913172 }, { "epoch": 17.0, "eval_bleu": 7.1431546130798385, "eval_loss": 0.4796863794326782, "eval_runtime": 6236.1656, "eval_samples_per_second": 17.227, "eval_steps_per_second": 1.077, "step": 913172 }, { "epoch": 18.0, "learning_rate": 0.00082, "loss": 0.4798, "step": 966888 }, { "epoch": 18.0, "eval_bleu": 7.563058401551067, "eval_loss": 0.476810485124588, "eval_runtime": 6252.4371, "eval_samples_per_second": 17.182, "eval_steps_per_second": 1.074, "step": 966888 }, { "epoch": 19.0, "learning_rate": 0.0008100000000000001, "loss": 0.4767, "step": 1020604 }, { "epoch": 19.0, "eval_bleu": 7.242193570088235, "eval_loss": 0.47421401739120483, "eval_runtime": 6234.3401, "eval_samples_per_second": 17.232, "eval_steps_per_second": 1.077, "step": 1020604 }, { "epoch": 20.0, "learning_rate": 0.0008, "loss": 0.4748, "step": 1074320 }, { "epoch": 20.0, "eval_bleu": 6.120830355327935, "eval_loss": 0.47452765703201294, "eval_runtime": 6255.864, "eval_samples_per_second": 17.173, "eval_steps_per_second": 1.073, "step": 1074320 }, { "epoch": 21.0, "learning_rate": 0.00079, "loss": 0.4735, "step": 1128036 }, { "epoch": 21.0, "eval_bleu": 7.641822854665483, "eval_loss": 0.4723513424396515, "eval_runtime": 6304.5815, "eval_samples_per_second": 17.04, "eval_steps_per_second": 1.065, "step": 1128036 }, { "epoch": 22.0, "learning_rate": 0.0007800000000000001, "loss": 0.4716, "step": 1181752 }, { "epoch": 22.0, "eval_bleu": 7.3059537693760594, "eval_loss": 0.4718638062477112, "eval_runtime": 6317.7024, "eval_samples_per_second": 17.005, "eval_steps_per_second": 1.063, "step": 1181752 }, { "epoch": 23.0, "learning_rate": 0.0007700000000000001, "loss": 0.469, "step": 1235468 }, { "epoch": 23.0, "eval_bleu": 7.598346638071266, "eval_loss": 0.46901023387908936, "eval_runtime": 6308.4041, "eval_samples_per_second": 17.03, "eval_steps_per_second": 1.064, "step": 1235468 }, { "epoch": 24.0, "learning_rate": 0.00076, "loss": 0.4669, "step": 1289184 }, { "epoch": 24.0, "eval_bleu": 6.799435285671091, "eval_loss": 0.46680623292922974, "eval_runtime": 6315.2629, "eval_samples_per_second": 17.011, "eval_steps_per_second": 1.063, "step": 1289184 }, { "epoch": 25.0, "learning_rate": 0.00075, "loss": 0.4641, "step": 1342900 }, { "epoch": 25.0, "eval_bleu": 7.62775725124654, "eval_loss": 0.4666709899902344, "eval_runtime": 6262.5243, "eval_samples_per_second": 17.155, "eval_steps_per_second": 1.072, "step": 1342900 }, { "epoch": 26.0, "learning_rate": 0.00074, "loss": 0.4618, "step": 1396616 }, { "epoch": 26.0, "eval_bleu": 6.969418527447973, "eval_loss": 0.4641306698322296, "eval_runtime": 6239.5276, "eval_samples_per_second": 17.218, "eval_steps_per_second": 1.076, "step": 1396616 }, { "epoch": 27.0, "learning_rate": 0.00073, "loss": 0.4606, "step": 1450332 }, { "epoch": 27.0, "eval_bleu": 7.458516781341554, "eval_loss": 0.4627404510974884, "eval_runtime": 6245.0374, "eval_samples_per_second": 17.203, "eval_steps_per_second": 1.075, "step": 1450332 }, { "epoch": 28.0, "learning_rate": 0.0007199999999999999, "loss": 0.4582, "step": 1504048 }, { "epoch": 28.0, "eval_bleu": 6.694355343847021, "eval_loss": 0.46318283677101135, "eval_runtime": 6248.0853, "eval_samples_per_second": 17.194, "eval_steps_per_second": 1.075, "step": 1504048 }, { "epoch": 29.0, "learning_rate": 0.00071, "loss": 0.4569, "step": 1557764 }, { "epoch": 29.0, "eval_bleu": 7.538791367466209, "eval_loss": 0.46087339520454407, "eval_runtime": 6241.0141, "eval_samples_per_second": 17.214, "eval_steps_per_second": 1.076, "step": 1557764 }, { "epoch": 30.0, "learning_rate": 0.0007, "loss": 0.4548, "step": 1611480 }, { "epoch": 30.0, "eval_bleu": 7.528860869957395, "eval_loss": 0.4588477909564972, "eval_runtime": 6242.3068, "eval_samples_per_second": 17.21, "eval_steps_per_second": 1.076, "step": 1611480 }, { "epoch": 31.0, "learning_rate": 0.00069, "loss": 0.4537, "step": 1665196 }, { "epoch": 31.0, "eval_bleu": 7.362216478280285, "eval_loss": 0.4597391188144684, "eval_runtime": 6244.4866, "eval_samples_per_second": 17.204, "eval_steps_per_second": 1.075, "step": 1665196 }, { "epoch": 32.0, "learning_rate": 0.00068, "loss": 0.4513, "step": 1718912 }, { "epoch": 32.0, "eval_bleu": 7.137390175844847, "eval_loss": 0.4572164714336395, "eval_runtime": 6244.2148, "eval_samples_per_second": 17.205, "eval_steps_per_second": 1.075, "step": 1718912 }, { "epoch": 33.0, "learning_rate": 0.00067, "loss": 0.4485, "step": 1772628 }, { "epoch": 33.0, "eval_bleu": 7.081305145228205, "eval_loss": 0.45658349990844727, "eval_runtime": 6241.0726, "eval_samples_per_second": 17.214, "eval_steps_per_second": 1.076, "step": 1772628 }, { "epoch": 34.0, "learning_rate": 0.00066, "loss": 0.4469, "step": 1826344 }, { "epoch": 34.0, "eval_bleu": 7.065210289724078, "eval_loss": 0.4544486701488495, "eval_runtime": 6253.2099, "eval_samples_per_second": 17.18, "eval_steps_per_second": 1.074, "step": 1826344 }, { "epoch": 35.0, "learning_rate": 0.0006500000000000001, "loss": 0.4449, "step": 1880060 }, { "epoch": 35.0, "eval_bleu": 7.378548531953654, "eval_loss": 0.4559008777141571, "eval_runtime": 6234.4769, "eval_samples_per_second": 17.232, "eval_steps_per_second": 1.077, "step": 1880060 }, { "epoch": 36.0, "learning_rate": 0.00064, "loss": 0.4442, "step": 1933776 }, { "epoch": 36.0, "eval_bleu": 7.356901577029033, "eval_loss": 0.4534740746021271, "eval_runtime": 6249.755, "eval_samples_per_second": 17.19, "eval_steps_per_second": 1.074, "step": 1933776 }, { "epoch": 37.0, "learning_rate": 0.00063, "loss": 0.4431, "step": 1987492 }, { "epoch": 37.0, "eval_bleu": 7.175291475992041, "eval_loss": 0.45327481627464294, "eval_runtime": 6249.6092, "eval_samples_per_second": 17.19, "eval_steps_per_second": 1.074, "step": 1987492 }, { "epoch": 38.0, "learning_rate": 0.00062, "loss": 0.441, "step": 2041208 }, { "epoch": 38.0, "eval_bleu": 7.359022144163392, "eval_loss": 0.4524107277393341, "eval_runtime": 6240.8973, "eval_samples_per_second": 17.214, "eval_steps_per_second": 1.076, "step": 2041208 }, { "epoch": 39.0, "learning_rate": 0.00061, "loss": 0.4387, "step": 2094924 }, { "epoch": 39.0, "eval_bleu": 7.5283460331563745, "eval_loss": 0.4496091306209564, "eval_runtime": 6237.5918, "eval_samples_per_second": 17.223, "eval_steps_per_second": 1.077, "step": 2094924 }, { "epoch": 40.0, "learning_rate": 0.0006, "loss": 0.4359, "step": 2148640 }, { "epoch": 40.0, "eval_bleu": 7.5346208014087495, "eval_loss": 0.44786250591278076, "eval_runtime": 6243.0368, "eval_samples_per_second": 17.208, "eval_steps_per_second": 1.076, "step": 2148640 }, { "epoch": 41.0, "learning_rate": 0.00059, "loss": 0.4338, "step": 2202356 }, { "epoch": 41.0, "eval_bleu": 7.406528761971476, "eval_loss": 0.44740021228790283, "eval_runtime": 6247.6804, "eval_samples_per_second": 17.195, "eval_steps_per_second": 1.075, "step": 2202356 }, { "epoch": 42.0, "learning_rate": 0.00058, "loss": 0.4319, "step": 2256072 }, { "epoch": 42.0, "eval_bleu": 7.192159097527976, "eval_loss": 0.447433739900589, "eval_runtime": 6248.8648, "eval_samples_per_second": 17.192, "eval_steps_per_second": 1.075, "step": 2256072 }, { "epoch": 43.0, "learning_rate": 0.00057, "loss": 0.43, "step": 2309788 }, { "epoch": 43.0, "eval_bleu": 7.325069602605064, "eval_loss": 0.4456492066383362, "eval_runtime": 6263.0673, "eval_samples_per_second": 17.153, "eval_steps_per_second": 1.072, "step": 2309788 }, { "epoch": 44.0, "learning_rate": 0.0005600000000000001, "loss": 0.4279, "step": 2363504 }, { "epoch": 44.0, "eval_bleu": 7.532048814014251, "eval_loss": 0.4445250332355499, "eval_runtime": 6241.8298, "eval_samples_per_second": 17.211, "eval_steps_per_second": 1.076, "step": 2363504 }, { "epoch": 45.0, "learning_rate": 0.00055, "loss": 0.426, "step": 2417220 }, { "epoch": 45.0, "eval_bleu": 7.174420155924515, "eval_loss": 0.44330111145973206, "eval_runtime": 6231.5445, "eval_samples_per_second": 17.24, "eval_steps_per_second": 1.078, "step": 2417220 }, { "epoch": 46.0, "learning_rate": 0.00054, "loss": 0.4239, "step": 2470936 }, { "epoch": 46.0, "eval_bleu": 7.653281782827262, "eval_loss": 0.44130608439445496, "eval_runtime": 6244.5962, "eval_samples_per_second": 17.204, "eval_steps_per_second": 1.075, "step": 2470936 }, { "epoch": 47.0, "learning_rate": 0.0005300000000000001, "loss": 0.422, "step": 2524652 }, { "epoch": 47.0, "eval_bleu": 7.358951072022719, "eval_loss": 0.4416486620903015, "eval_runtime": 6246.4215, "eval_samples_per_second": 17.199, "eval_steps_per_second": 1.075, "step": 2524652 }, { "epoch": 48.0, "learning_rate": 0.0005200000000000001, "loss": 0.4206, "step": 2578368 }, { "epoch": 48.0, "eval_bleu": 6.700995294592222, "eval_loss": 0.441184937953949, "eval_runtime": 6248.9024, "eval_samples_per_second": 17.192, "eval_steps_per_second": 1.075, "step": 2578368 }, { "epoch": 49.0, "learning_rate": 0.00051, "loss": 0.4186, "step": 2632084 }, { "epoch": 49.0, "eval_bleu": 7.428626778422992, "eval_loss": 0.44076189398765564, "eval_runtime": 6244.5819, "eval_samples_per_second": 17.204, "eval_steps_per_second": 1.075, "step": 2632084 }, { "epoch": 50.0, "learning_rate": 0.0005, "loss": 0.416, "step": 2685800 }, { "epoch": 50.0, "eval_bleu": 7.538386131386865, "eval_loss": 0.43902388215065, "eval_runtime": 6236.1942, "eval_samples_per_second": 17.227, "eval_steps_per_second": 1.077, "step": 2685800 }, { "epoch": 51.0, "learning_rate": 0.00049, "loss": 0.4145, "step": 2739516 }, { "epoch": 51.0, "eval_bleu": 7.177849858240658, "eval_loss": 0.4388711452484131, "eval_runtime": 6244.4397, "eval_samples_per_second": 17.204, "eval_steps_per_second": 1.075, "step": 2739516 }, { "epoch": 52.0, "learning_rate": 0.00048, "loss": 0.4115, "step": 2793232 }, { "epoch": 52.0, "eval_bleu": 7.3825806146338895, "eval_loss": 0.43703773617744446, "eval_runtime": 6246.8394, "eval_samples_per_second": 17.198, "eval_steps_per_second": 1.075, "step": 2793232 }, { "epoch": 53.0, "learning_rate": 0.00047, "loss": 0.4091, "step": 2846948 }, { "epoch": 53.0, "eval_bleu": 7.354983260965792, "eval_loss": 0.4351899325847626, "eval_runtime": 6249.7526, "eval_samples_per_second": 17.19, "eval_steps_per_second": 1.074, "step": 2846948 }, { "epoch": 54.0, "learning_rate": 0.00046, "loss": 0.4062, "step": 2900664 }, { "epoch": 54.0, "eval_bleu": 7.070865789057057, "eval_loss": 0.4349888265132904, "eval_runtime": 6255.1992, "eval_samples_per_second": 17.175, "eval_steps_per_second": 1.074, "step": 2900664 }, { "epoch": 55.0, "learning_rate": 0.00045000000000000004, "loss": 0.4038, "step": 2954380 }, { "epoch": 55.0, "eval_bleu": 7.724805289860729, "eval_loss": 0.4359044134616852, "eval_runtime": 6250.2651, "eval_samples_per_second": 17.188, "eval_steps_per_second": 1.074, "step": 2954380 }, { "epoch": 56.0, "learning_rate": 0.00044, "loss": 0.402, "step": 3008096 }, { "epoch": 56.0, "eval_bleu": 7.154898411407466, "eval_loss": 0.4326974153518677, "eval_runtime": 6264.3019, "eval_samples_per_second": 17.15, "eval_steps_per_second": 1.072, "step": 3008096 }, { "epoch": 57.0, "learning_rate": 0.00043, "loss": 0.3995, "step": 3061812 }, { "epoch": 57.0, "eval_bleu": 7.508317247767554, "eval_loss": 0.4333774149417877, "eval_runtime": 6245.5823, "eval_samples_per_second": 17.201, "eval_steps_per_second": 1.075, "step": 3061812 }, { "epoch": 58.0, "learning_rate": 0.00042, "loss": 0.3972, "step": 3115528 }, { "epoch": 58.0, "eval_bleu": 7.127695021274113, "eval_loss": 0.43104425072669983, "eval_runtime": 6238.945, "eval_samples_per_second": 17.219, "eval_steps_per_second": 1.076, "step": 3115528 }, { "epoch": 59.0, "learning_rate": 0.00041, "loss": 0.3942, "step": 3169244 }, { "epoch": 59.0, "eval_bleu": 6.749919689906369, "eval_loss": 0.4318625032901764, "eval_runtime": 6250.8033, "eval_samples_per_second": 17.187, "eval_steps_per_second": 1.074, "step": 3169244 }, { "epoch": 60.0, "learning_rate": 0.0004, "loss": 0.3921, "step": 3222960 }, { "epoch": 60.0, "eval_bleu": 7.099626120333918, "eval_loss": 0.4313414394855499, "eval_runtime": 6233.5496, "eval_samples_per_second": 17.234, "eval_steps_per_second": 1.077, "step": 3222960 }, { "epoch": 61.0, "learning_rate": 0.00039000000000000005, "loss": 0.3897, "step": 3276676 }, { "epoch": 61.0, "eval_bleu": 7.280842993868327, "eval_loss": 0.4297857880592346, "eval_runtime": 6255.1519, "eval_samples_per_second": 17.175, "eval_steps_per_second": 1.074, "step": 3276676 }, { "epoch": 62.0, "learning_rate": 0.00038, "loss": 0.3867, "step": 3330392 }, { "epoch": 62.0, "eval_bleu": 7.328384730172046, "eval_loss": 0.42802175879478455, "eval_runtime": 6245.3286, "eval_samples_per_second": 17.202, "eval_steps_per_second": 1.075, "step": 3330392 }, { "epoch": 63.0, "learning_rate": 0.00037, "loss": 0.3832, "step": 3384108 }, { "epoch": 63.0, "eval_bleu": 7.230903636346123, "eval_loss": 0.42855262756347656, "eval_runtime": 6240.2263, "eval_samples_per_second": 17.216, "eval_steps_per_second": 1.076, "step": 3384108 }, { "epoch": 64.0, "learning_rate": 0.00035999999999999997, "loss": 0.3807, "step": 3437824 }, { "epoch": 64.0, "eval_bleu": 7.557291062260919, "eval_loss": 0.4279802143573761, "eval_runtime": 6249.6778, "eval_samples_per_second": 17.19, "eval_steps_per_second": 1.074, "step": 3437824 }, { "epoch": 65.0, "learning_rate": 0.00035, "loss": 0.3779, "step": 3491540 }, { "epoch": 65.0, "eval_bleu": 7.456221414498501, "eval_loss": 0.42722874879837036, "eval_runtime": 6251.0806, "eval_samples_per_second": 17.186, "eval_steps_per_second": 1.074, "step": 3491540 }, { "epoch": 66.0, "learning_rate": 0.00034, "loss": 0.3746, "step": 3545256 }, { "epoch": 66.0, "eval_bleu": 7.38246671281172, "eval_loss": 0.4264260232448578, "eval_runtime": 6254.848, "eval_samples_per_second": 17.176, "eval_steps_per_second": 1.074, "step": 3545256 }, { "epoch": 67.0, "learning_rate": 0.00033, "loss": 0.3713, "step": 3598972 }, { "epoch": 67.0, "eval_bleu": 7.250094489059249, "eval_loss": 0.42612648010253906, "eval_runtime": 6253.4328, "eval_samples_per_second": 17.18, "eval_steps_per_second": 1.074, "step": 3598972 }, { "epoch": 68.0, "learning_rate": 0.00032, "loss": 0.3679, "step": 3652688 }, { "epoch": 68.0, "eval_bleu": 7.24260795309734, "eval_loss": 0.42605340480804443, "eval_runtime": 6249.7217, "eval_samples_per_second": 17.19, "eval_steps_per_second": 1.074, "step": 3652688 }, { "epoch": 69.0, "learning_rate": 0.00031, "loss": 0.3646, "step": 3706404 }, { "epoch": 69.0, "eval_bleu": 7.705633206021796, "eval_loss": 0.42531710863113403, "eval_runtime": 6255.4922, "eval_samples_per_second": 17.174, "eval_steps_per_second": 1.073, "step": 3706404 }, { "epoch": 70.0, "learning_rate": 0.0003, "loss": 0.3617, "step": 3760120 }, { "epoch": 70.0, "eval_bleu": 7.066333513511338, "eval_loss": 0.4245583415031433, "eval_runtime": 6242.6393, "eval_samples_per_second": 17.209, "eval_steps_per_second": 1.076, "step": 3760120 }, { "epoch": 71.0, "learning_rate": 0.00029, "loss": 0.3576, "step": 3813836 }, { "epoch": 71.0, "eval_bleu": 7.453191107022425, "eval_loss": 0.4248814582824707, "eval_runtime": 6236.6745, "eval_samples_per_second": 17.226, "eval_steps_per_second": 1.077, "step": 3813836 }, { "epoch": 72.0, "learning_rate": 0.00028000000000000003, "loss": 0.3538, "step": 3867552 }, { "epoch": 72.0, "eval_bleu": 7.399721264841341, "eval_loss": 0.42560645937919617, "eval_runtime": 6247.7185, "eval_samples_per_second": 17.195, "eval_steps_per_second": 1.075, "step": 3867552 }, { "epoch": 73.0, "learning_rate": 0.00027, "loss": 0.3498, "step": 3921268 }, { "epoch": 73.0, "eval_bleu": 7.23131596568943, "eval_loss": 0.42429444193840027, "eval_runtime": 6248.7182, "eval_samples_per_second": 17.192, "eval_steps_per_second": 1.075, "step": 3921268 } ], "max_steps": 5371600, "num_train_epochs": 100, "total_flos": 8.225066520360465e+18, "trial_name": null, "trial_params": null }