ReactionT5v1-retrosynthesis / trainer_state.json
sagawa's picture
Upload 10 files
7153af1
{
"best_metric": 0.42429444193840027,
"best_model_checkpoint": "t5/checkpoint-3921268",
"epoch": 73.0,
"global_step": 3921268,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 0.00099,
"loss": 0.6596,
"step": 53716
},
{
"epoch": 1.0,
"eval_bleu": 7.691952957568906,
"eval_loss": 0.5863233804702759,
"eval_runtime": 6528.3322,
"eval_samples_per_second": 16.456,
"eval_steps_per_second": 1.029,
"step": 53716
},
{
"epoch": 2.0,
"learning_rate": 0.00098,
"loss": 0.5807,
"step": 107432
},
{
"epoch": 2.0,
"eval_bleu": 7.596172987179689,
"eval_loss": 0.5534030199050903,
"eval_runtime": 6601.0537,
"eval_samples_per_second": 16.275,
"eval_steps_per_second": 1.017,
"step": 107432
},
{
"epoch": 3.0,
"learning_rate": 0.0009699999999999999,
"loss": 0.5569,
"step": 161148
},
{
"epoch": 3.0,
"eval_bleu": 7.149818476141028,
"eval_loss": 0.538519024848938,
"eval_runtime": 6606.0069,
"eval_samples_per_second": 16.263,
"eval_steps_per_second": 1.016,
"step": 161148
},
{
"epoch": 4.0,
"learning_rate": 0.00096,
"loss": 0.5441,
"step": 214864
},
{
"epoch": 4.0,
"eval_bleu": 7.493924944402864,
"eval_loss": 0.5301510691642761,
"eval_runtime": 6228.7775,
"eval_samples_per_second": 17.248,
"eval_steps_per_second": 1.078,
"step": 214864
},
{
"epoch": 5.0,
"learning_rate": 0.00095,
"loss": 0.5349,
"step": 268580
},
{
"epoch": 5.0,
"eval_bleu": 7.031863868448649,
"eval_loss": 0.5224108695983887,
"eval_runtime": 6611.0145,
"eval_samples_per_second": 16.25,
"eval_steps_per_second": 1.016,
"step": 268580
},
{
"epoch": 6.0,
"learning_rate": 0.00094,
"loss": 0.5281,
"step": 322296
},
{
"epoch": 6.0,
"eval_bleu": 7.644179348361122,
"eval_loss": 0.5192911028862,
"eval_runtime": 6621.1034,
"eval_samples_per_second": 16.226,
"eval_steps_per_second": 1.014,
"step": 322296
},
{
"epoch": 7.0,
"learning_rate": 0.00093,
"loss": 0.5222,
"step": 376012
},
{
"epoch": 7.0,
"eval_bleu": 7.607672700840728,
"eval_loss": 0.5128632187843323,
"eval_runtime": 6621.733,
"eval_samples_per_second": 16.224,
"eval_steps_per_second": 1.014,
"step": 376012
},
{
"epoch": 8.0,
"learning_rate": 0.00092,
"loss": 0.5181,
"step": 429728
},
{
"epoch": 8.0,
"eval_bleu": 6.071314840861525,
"eval_loss": 0.5084598064422607,
"eval_runtime": 6242.5238,
"eval_samples_per_second": 17.21,
"eval_steps_per_second": 1.076,
"step": 429728
},
{
"epoch": 9.0,
"learning_rate": 0.00091,
"loss": 0.5137,
"step": 483444
},
{
"epoch": 9.0,
"eval_bleu": 7.175821994303286,
"eval_loss": 0.5051391124725342,
"eval_runtime": 6238.6565,
"eval_samples_per_second": 17.22,
"eval_steps_per_second": 1.076,
"step": 483444
},
{
"epoch": 10.0,
"learning_rate": 0.0009000000000000001,
"loss": 0.5093,
"step": 537160
},
{
"epoch": 10.0,
"eval_bleu": 7.716511125290912,
"eval_loss": 0.5000638961791992,
"eval_runtime": 6244.5271,
"eval_samples_per_second": 17.204,
"eval_steps_per_second": 1.075,
"step": 537160
},
{
"epoch": 11.0,
"learning_rate": 0.0008900000000000001,
"loss": 0.5037,
"step": 590876
},
{
"epoch": 11.0,
"eval_bleu": 7.072832342346184,
"eval_loss": 0.4958619177341461,
"eval_runtime": 6248.1767,
"eval_samples_per_second": 17.194,
"eval_steps_per_second": 1.075,
"step": 590876
},
{
"epoch": 12.0,
"learning_rate": 0.00088,
"loss": 0.4992,
"step": 644592
},
{
"epoch": 12.0,
"eval_bleu": 7.23951068440794,
"eval_loss": 0.4918939471244812,
"eval_runtime": 6240.6707,
"eval_samples_per_second": 17.215,
"eval_steps_per_second": 1.076,
"step": 644592
},
{
"epoch": 13.0,
"learning_rate": 0.00087,
"loss": 0.4954,
"step": 698308
},
{
"epoch": 13.0,
"eval_bleu": 7.381643836163121,
"eval_loss": 0.4886699914932251,
"eval_runtime": 6245.5311,
"eval_samples_per_second": 17.201,
"eval_steps_per_second": 1.075,
"step": 698308
},
{
"epoch": 14.0,
"learning_rate": 0.00086,
"loss": 0.4915,
"step": 752024
},
{
"epoch": 14.0,
"eval_bleu": 5.9857507598052075,
"eval_loss": 0.4870322346687317,
"eval_runtime": 6239.2065,
"eval_samples_per_second": 17.219,
"eval_steps_per_second": 1.076,
"step": 752024
},
{
"epoch": 15.0,
"learning_rate": 0.00085,
"loss": 0.488,
"step": 805740
},
{
"epoch": 15.0,
"eval_bleu": 7.5727246325090976,
"eval_loss": 0.4828002154827118,
"eval_runtime": 6235.1501,
"eval_samples_per_second": 17.23,
"eval_steps_per_second": 1.077,
"step": 805740
},
{
"epoch": 16.0,
"learning_rate": 0.00084,
"loss": 0.4862,
"step": 859456
},
{
"epoch": 16.0,
"eval_bleu": 7.59567809120864,
"eval_loss": 0.4813084900379181,
"eval_runtime": 6235.7161,
"eval_samples_per_second": 17.228,
"eval_steps_per_second": 1.077,
"step": 859456
},
{
"epoch": 17.0,
"learning_rate": 0.00083,
"loss": 0.4827,
"step": 913172
},
{
"epoch": 17.0,
"eval_bleu": 7.1431546130798385,
"eval_loss": 0.4796863794326782,
"eval_runtime": 6236.1656,
"eval_samples_per_second": 17.227,
"eval_steps_per_second": 1.077,
"step": 913172
},
{
"epoch": 18.0,
"learning_rate": 0.00082,
"loss": 0.4798,
"step": 966888
},
{
"epoch": 18.0,
"eval_bleu": 7.563058401551067,
"eval_loss": 0.476810485124588,
"eval_runtime": 6252.4371,
"eval_samples_per_second": 17.182,
"eval_steps_per_second": 1.074,
"step": 966888
},
{
"epoch": 19.0,
"learning_rate": 0.0008100000000000001,
"loss": 0.4767,
"step": 1020604
},
{
"epoch": 19.0,
"eval_bleu": 7.242193570088235,
"eval_loss": 0.47421401739120483,
"eval_runtime": 6234.3401,
"eval_samples_per_second": 17.232,
"eval_steps_per_second": 1.077,
"step": 1020604
},
{
"epoch": 20.0,
"learning_rate": 0.0008,
"loss": 0.4748,
"step": 1074320
},
{
"epoch": 20.0,
"eval_bleu": 6.120830355327935,
"eval_loss": 0.47452765703201294,
"eval_runtime": 6255.864,
"eval_samples_per_second": 17.173,
"eval_steps_per_second": 1.073,
"step": 1074320
},
{
"epoch": 21.0,
"learning_rate": 0.00079,
"loss": 0.4735,
"step": 1128036
},
{
"epoch": 21.0,
"eval_bleu": 7.641822854665483,
"eval_loss": 0.4723513424396515,
"eval_runtime": 6304.5815,
"eval_samples_per_second": 17.04,
"eval_steps_per_second": 1.065,
"step": 1128036
},
{
"epoch": 22.0,
"learning_rate": 0.0007800000000000001,
"loss": 0.4716,
"step": 1181752
},
{
"epoch": 22.0,
"eval_bleu": 7.3059537693760594,
"eval_loss": 0.4718638062477112,
"eval_runtime": 6317.7024,
"eval_samples_per_second": 17.005,
"eval_steps_per_second": 1.063,
"step": 1181752
},
{
"epoch": 23.0,
"learning_rate": 0.0007700000000000001,
"loss": 0.469,
"step": 1235468
},
{
"epoch": 23.0,
"eval_bleu": 7.598346638071266,
"eval_loss": 0.46901023387908936,
"eval_runtime": 6308.4041,
"eval_samples_per_second": 17.03,
"eval_steps_per_second": 1.064,
"step": 1235468
},
{
"epoch": 24.0,
"learning_rate": 0.00076,
"loss": 0.4669,
"step": 1289184
},
{
"epoch": 24.0,
"eval_bleu": 6.799435285671091,
"eval_loss": 0.46680623292922974,
"eval_runtime": 6315.2629,
"eval_samples_per_second": 17.011,
"eval_steps_per_second": 1.063,
"step": 1289184
},
{
"epoch": 25.0,
"learning_rate": 0.00075,
"loss": 0.4641,
"step": 1342900
},
{
"epoch": 25.0,
"eval_bleu": 7.62775725124654,
"eval_loss": 0.4666709899902344,
"eval_runtime": 6262.5243,
"eval_samples_per_second": 17.155,
"eval_steps_per_second": 1.072,
"step": 1342900
},
{
"epoch": 26.0,
"learning_rate": 0.00074,
"loss": 0.4618,
"step": 1396616
},
{
"epoch": 26.0,
"eval_bleu": 6.969418527447973,
"eval_loss": 0.4641306698322296,
"eval_runtime": 6239.5276,
"eval_samples_per_second": 17.218,
"eval_steps_per_second": 1.076,
"step": 1396616
},
{
"epoch": 27.0,
"learning_rate": 0.00073,
"loss": 0.4606,
"step": 1450332
},
{
"epoch": 27.0,
"eval_bleu": 7.458516781341554,
"eval_loss": 0.4627404510974884,
"eval_runtime": 6245.0374,
"eval_samples_per_second": 17.203,
"eval_steps_per_second": 1.075,
"step": 1450332
},
{
"epoch": 28.0,
"learning_rate": 0.0007199999999999999,
"loss": 0.4582,
"step": 1504048
},
{
"epoch": 28.0,
"eval_bleu": 6.694355343847021,
"eval_loss": 0.46318283677101135,
"eval_runtime": 6248.0853,
"eval_samples_per_second": 17.194,
"eval_steps_per_second": 1.075,
"step": 1504048
},
{
"epoch": 29.0,
"learning_rate": 0.00071,
"loss": 0.4569,
"step": 1557764
},
{
"epoch": 29.0,
"eval_bleu": 7.538791367466209,
"eval_loss": 0.46087339520454407,
"eval_runtime": 6241.0141,
"eval_samples_per_second": 17.214,
"eval_steps_per_second": 1.076,
"step": 1557764
},
{
"epoch": 30.0,
"learning_rate": 0.0007,
"loss": 0.4548,
"step": 1611480
},
{
"epoch": 30.0,
"eval_bleu": 7.528860869957395,
"eval_loss": 0.4588477909564972,
"eval_runtime": 6242.3068,
"eval_samples_per_second": 17.21,
"eval_steps_per_second": 1.076,
"step": 1611480
},
{
"epoch": 31.0,
"learning_rate": 0.00069,
"loss": 0.4537,
"step": 1665196
},
{
"epoch": 31.0,
"eval_bleu": 7.362216478280285,
"eval_loss": 0.4597391188144684,
"eval_runtime": 6244.4866,
"eval_samples_per_second": 17.204,
"eval_steps_per_second": 1.075,
"step": 1665196
},
{
"epoch": 32.0,
"learning_rate": 0.00068,
"loss": 0.4513,
"step": 1718912
},
{
"epoch": 32.0,
"eval_bleu": 7.137390175844847,
"eval_loss": 0.4572164714336395,
"eval_runtime": 6244.2148,
"eval_samples_per_second": 17.205,
"eval_steps_per_second": 1.075,
"step": 1718912
},
{
"epoch": 33.0,
"learning_rate": 0.00067,
"loss": 0.4485,
"step": 1772628
},
{
"epoch": 33.0,
"eval_bleu": 7.081305145228205,
"eval_loss": 0.45658349990844727,
"eval_runtime": 6241.0726,
"eval_samples_per_second": 17.214,
"eval_steps_per_second": 1.076,
"step": 1772628
},
{
"epoch": 34.0,
"learning_rate": 0.00066,
"loss": 0.4469,
"step": 1826344
},
{
"epoch": 34.0,
"eval_bleu": 7.065210289724078,
"eval_loss": 0.4544486701488495,
"eval_runtime": 6253.2099,
"eval_samples_per_second": 17.18,
"eval_steps_per_second": 1.074,
"step": 1826344
},
{
"epoch": 35.0,
"learning_rate": 0.0006500000000000001,
"loss": 0.4449,
"step": 1880060
},
{
"epoch": 35.0,
"eval_bleu": 7.378548531953654,
"eval_loss": 0.4559008777141571,
"eval_runtime": 6234.4769,
"eval_samples_per_second": 17.232,
"eval_steps_per_second": 1.077,
"step": 1880060
},
{
"epoch": 36.0,
"learning_rate": 0.00064,
"loss": 0.4442,
"step": 1933776
},
{
"epoch": 36.0,
"eval_bleu": 7.356901577029033,
"eval_loss": 0.4534740746021271,
"eval_runtime": 6249.755,
"eval_samples_per_second": 17.19,
"eval_steps_per_second": 1.074,
"step": 1933776
},
{
"epoch": 37.0,
"learning_rate": 0.00063,
"loss": 0.4431,
"step": 1987492
},
{
"epoch": 37.0,
"eval_bleu": 7.175291475992041,
"eval_loss": 0.45327481627464294,
"eval_runtime": 6249.6092,
"eval_samples_per_second": 17.19,
"eval_steps_per_second": 1.074,
"step": 1987492
},
{
"epoch": 38.0,
"learning_rate": 0.00062,
"loss": 0.441,
"step": 2041208
},
{
"epoch": 38.0,
"eval_bleu": 7.359022144163392,
"eval_loss": 0.4524107277393341,
"eval_runtime": 6240.8973,
"eval_samples_per_second": 17.214,
"eval_steps_per_second": 1.076,
"step": 2041208
},
{
"epoch": 39.0,
"learning_rate": 0.00061,
"loss": 0.4387,
"step": 2094924
},
{
"epoch": 39.0,
"eval_bleu": 7.5283460331563745,
"eval_loss": 0.4496091306209564,
"eval_runtime": 6237.5918,
"eval_samples_per_second": 17.223,
"eval_steps_per_second": 1.077,
"step": 2094924
},
{
"epoch": 40.0,
"learning_rate": 0.0006,
"loss": 0.4359,
"step": 2148640
},
{
"epoch": 40.0,
"eval_bleu": 7.5346208014087495,
"eval_loss": 0.44786250591278076,
"eval_runtime": 6243.0368,
"eval_samples_per_second": 17.208,
"eval_steps_per_second": 1.076,
"step": 2148640
},
{
"epoch": 41.0,
"learning_rate": 0.00059,
"loss": 0.4338,
"step": 2202356
},
{
"epoch": 41.0,
"eval_bleu": 7.406528761971476,
"eval_loss": 0.44740021228790283,
"eval_runtime": 6247.6804,
"eval_samples_per_second": 17.195,
"eval_steps_per_second": 1.075,
"step": 2202356
},
{
"epoch": 42.0,
"learning_rate": 0.00058,
"loss": 0.4319,
"step": 2256072
},
{
"epoch": 42.0,
"eval_bleu": 7.192159097527976,
"eval_loss": 0.447433739900589,
"eval_runtime": 6248.8648,
"eval_samples_per_second": 17.192,
"eval_steps_per_second": 1.075,
"step": 2256072
},
{
"epoch": 43.0,
"learning_rate": 0.00057,
"loss": 0.43,
"step": 2309788
},
{
"epoch": 43.0,
"eval_bleu": 7.325069602605064,
"eval_loss": 0.4456492066383362,
"eval_runtime": 6263.0673,
"eval_samples_per_second": 17.153,
"eval_steps_per_second": 1.072,
"step": 2309788
},
{
"epoch": 44.0,
"learning_rate": 0.0005600000000000001,
"loss": 0.4279,
"step": 2363504
},
{
"epoch": 44.0,
"eval_bleu": 7.532048814014251,
"eval_loss": 0.4445250332355499,
"eval_runtime": 6241.8298,
"eval_samples_per_second": 17.211,
"eval_steps_per_second": 1.076,
"step": 2363504
},
{
"epoch": 45.0,
"learning_rate": 0.00055,
"loss": 0.426,
"step": 2417220
},
{
"epoch": 45.0,
"eval_bleu": 7.174420155924515,
"eval_loss": 0.44330111145973206,
"eval_runtime": 6231.5445,
"eval_samples_per_second": 17.24,
"eval_steps_per_second": 1.078,
"step": 2417220
},
{
"epoch": 46.0,
"learning_rate": 0.00054,
"loss": 0.4239,
"step": 2470936
},
{
"epoch": 46.0,
"eval_bleu": 7.653281782827262,
"eval_loss": 0.44130608439445496,
"eval_runtime": 6244.5962,
"eval_samples_per_second": 17.204,
"eval_steps_per_second": 1.075,
"step": 2470936
},
{
"epoch": 47.0,
"learning_rate": 0.0005300000000000001,
"loss": 0.422,
"step": 2524652
},
{
"epoch": 47.0,
"eval_bleu": 7.358951072022719,
"eval_loss": 0.4416486620903015,
"eval_runtime": 6246.4215,
"eval_samples_per_second": 17.199,
"eval_steps_per_second": 1.075,
"step": 2524652
},
{
"epoch": 48.0,
"learning_rate": 0.0005200000000000001,
"loss": 0.4206,
"step": 2578368
},
{
"epoch": 48.0,
"eval_bleu": 6.700995294592222,
"eval_loss": 0.441184937953949,
"eval_runtime": 6248.9024,
"eval_samples_per_second": 17.192,
"eval_steps_per_second": 1.075,
"step": 2578368
},
{
"epoch": 49.0,
"learning_rate": 0.00051,
"loss": 0.4186,
"step": 2632084
},
{
"epoch": 49.0,
"eval_bleu": 7.428626778422992,
"eval_loss": 0.44076189398765564,
"eval_runtime": 6244.5819,
"eval_samples_per_second": 17.204,
"eval_steps_per_second": 1.075,
"step": 2632084
},
{
"epoch": 50.0,
"learning_rate": 0.0005,
"loss": 0.416,
"step": 2685800
},
{
"epoch": 50.0,
"eval_bleu": 7.538386131386865,
"eval_loss": 0.43902388215065,
"eval_runtime": 6236.1942,
"eval_samples_per_second": 17.227,
"eval_steps_per_second": 1.077,
"step": 2685800
},
{
"epoch": 51.0,
"learning_rate": 0.00049,
"loss": 0.4145,
"step": 2739516
},
{
"epoch": 51.0,
"eval_bleu": 7.177849858240658,
"eval_loss": 0.4388711452484131,
"eval_runtime": 6244.4397,
"eval_samples_per_second": 17.204,
"eval_steps_per_second": 1.075,
"step": 2739516
},
{
"epoch": 52.0,
"learning_rate": 0.00048,
"loss": 0.4115,
"step": 2793232
},
{
"epoch": 52.0,
"eval_bleu": 7.3825806146338895,
"eval_loss": 0.43703773617744446,
"eval_runtime": 6246.8394,
"eval_samples_per_second": 17.198,
"eval_steps_per_second": 1.075,
"step": 2793232
},
{
"epoch": 53.0,
"learning_rate": 0.00047,
"loss": 0.4091,
"step": 2846948
},
{
"epoch": 53.0,
"eval_bleu": 7.354983260965792,
"eval_loss": 0.4351899325847626,
"eval_runtime": 6249.7526,
"eval_samples_per_second": 17.19,
"eval_steps_per_second": 1.074,
"step": 2846948
},
{
"epoch": 54.0,
"learning_rate": 0.00046,
"loss": 0.4062,
"step": 2900664
},
{
"epoch": 54.0,
"eval_bleu": 7.070865789057057,
"eval_loss": 0.4349888265132904,
"eval_runtime": 6255.1992,
"eval_samples_per_second": 17.175,
"eval_steps_per_second": 1.074,
"step": 2900664
},
{
"epoch": 55.0,
"learning_rate": 0.00045000000000000004,
"loss": 0.4038,
"step": 2954380
},
{
"epoch": 55.0,
"eval_bleu": 7.724805289860729,
"eval_loss": 0.4359044134616852,
"eval_runtime": 6250.2651,
"eval_samples_per_second": 17.188,
"eval_steps_per_second": 1.074,
"step": 2954380
},
{
"epoch": 56.0,
"learning_rate": 0.00044,
"loss": 0.402,
"step": 3008096
},
{
"epoch": 56.0,
"eval_bleu": 7.154898411407466,
"eval_loss": 0.4326974153518677,
"eval_runtime": 6264.3019,
"eval_samples_per_second": 17.15,
"eval_steps_per_second": 1.072,
"step": 3008096
},
{
"epoch": 57.0,
"learning_rate": 0.00043,
"loss": 0.3995,
"step": 3061812
},
{
"epoch": 57.0,
"eval_bleu": 7.508317247767554,
"eval_loss": 0.4333774149417877,
"eval_runtime": 6245.5823,
"eval_samples_per_second": 17.201,
"eval_steps_per_second": 1.075,
"step": 3061812
},
{
"epoch": 58.0,
"learning_rate": 0.00042,
"loss": 0.3972,
"step": 3115528
},
{
"epoch": 58.0,
"eval_bleu": 7.127695021274113,
"eval_loss": 0.43104425072669983,
"eval_runtime": 6238.945,
"eval_samples_per_second": 17.219,
"eval_steps_per_second": 1.076,
"step": 3115528
},
{
"epoch": 59.0,
"learning_rate": 0.00041,
"loss": 0.3942,
"step": 3169244
},
{
"epoch": 59.0,
"eval_bleu": 6.749919689906369,
"eval_loss": 0.4318625032901764,
"eval_runtime": 6250.8033,
"eval_samples_per_second": 17.187,
"eval_steps_per_second": 1.074,
"step": 3169244
},
{
"epoch": 60.0,
"learning_rate": 0.0004,
"loss": 0.3921,
"step": 3222960
},
{
"epoch": 60.0,
"eval_bleu": 7.099626120333918,
"eval_loss": 0.4313414394855499,
"eval_runtime": 6233.5496,
"eval_samples_per_second": 17.234,
"eval_steps_per_second": 1.077,
"step": 3222960
},
{
"epoch": 61.0,
"learning_rate": 0.00039000000000000005,
"loss": 0.3897,
"step": 3276676
},
{
"epoch": 61.0,
"eval_bleu": 7.280842993868327,
"eval_loss": 0.4297857880592346,
"eval_runtime": 6255.1519,
"eval_samples_per_second": 17.175,
"eval_steps_per_second": 1.074,
"step": 3276676
},
{
"epoch": 62.0,
"learning_rate": 0.00038,
"loss": 0.3867,
"step": 3330392
},
{
"epoch": 62.0,
"eval_bleu": 7.328384730172046,
"eval_loss": 0.42802175879478455,
"eval_runtime": 6245.3286,
"eval_samples_per_second": 17.202,
"eval_steps_per_second": 1.075,
"step": 3330392
},
{
"epoch": 63.0,
"learning_rate": 0.00037,
"loss": 0.3832,
"step": 3384108
},
{
"epoch": 63.0,
"eval_bleu": 7.230903636346123,
"eval_loss": 0.42855262756347656,
"eval_runtime": 6240.2263,
"eval_samples_per_second": 17.216,
"eval_steps_per_second": 1.076,
"step": 3384108
},
{
"epoch": 64.0,
"learning_rate": 0.00035999999999999997,
"loss": 0.3807,
"step": 3437824
},
{
"epoch": 64.0,
"eval_bleu": 7.557291062260919,
"eval_loss": 0.4279802143573761,
"eval_runtime": 6249.6778,
"eval_samples_per_second": 17.19,
"eval_steps_per_second": 1.074,
"step": 3437824
},
{
"epoch": 65.0,
"learning_rate": 0.00035,
"loss": 0.3779,
"step": 3491540
},
{
"epoch": 65.0,
"eval_bleu": 7.456221414498501,
"eval_loss": 0.42722874879837036,
"eval_runtime": 6251.0806,
"eval_samples_per_second": 17.186,
"eval_steps_per_second": 1.074,
"step": 3491540
},
{
"epoch": 66.0,
"learning_rate": 0.00034,
"loss": 0.3746,
"step": 3545256
},
{
"epoch": 66.0,
"eval_bleu": 7.38246671281172,
"eval_loss": 0.4264260232448578,
"eval_runtime": 6254.848,
"eval_samples_per_second": 17.176,
"eval_steps_per_second": 1.074,
"step": 3545256
},
{
"epoch": 67.0,
"learning_rate": 0.00033,
"loss": 0.3713,
"step": 3598972
},
{
"epoch": 67.0,
"eval_bleu": 7.250094489059249,
"eval_loss": 0.42612648010253906,
"eval_runtime": 6253.4328,
"eval_samples_per_second": 17.18,
"eval_steps_per_second": 1.074,
"step": 3598972
},
{
"epoch": 68.0,
"learning_rate": 0.00032,
"loss": 0.3679,
"step": 3652688
},
{
"epoch": 68.0,
"eval_bleu": 7.24260795309734,
"eval_loss": 0.42605340480804443,
"eval_runtime": 6249.7217,
"eval_samples_per_second": 17.19,
"eval_steps_per_second": 1.074,
"step": 3652688
},
{
"epoch": 69.0,
"learning_rate": 0.00031,
"loss": 0.3646,
"step": 3706404
},
{
"epoch": 69.0,
"eval_bleu": 7.705633206021796,
"eval_loss": 0.42531710863113403,
"eval_runtime": 6255.4922,
"eval_samples_per_second": 17.174,
"eval_steps_per_second": 1.073,
"step": 3706404
},
{
"epoch": 70.0,
"learning_rate": 0.0003,
"loss": 0.3617,
"step": 3760120
},
{
"epoch": 70.0,
"eval_bleu": 7.066333513511338,
"eval_loss": 0.4245583415031433,
"eval_runtime": 6242.6393,
"eval_samples_per_second": 17.209,
"eval_steps_per_second": 1.076,
"step": 3760120
},
{
"epoch": 71.0,
"learning_rate": 0.00029,
"loss": 0.3576,
"step": 3813836
},
{
"epoch": 71.0,
"eval_bleu": 7.453191107022425,
"eval_loss": 0.4248814582824707,
"eval_runtime": 6236.6745,
"eval_samples_per_second": 17.226,
"eval_steps_per_second": 1.077,
"step": 3813836
},
{
"epoch": 72.0,
"learning_rate": 0.00028000000000000003,
"loss": 0.3538,
"step": 3867552
},
{
"epoch": 72.0,
"eval_bleu": 7.399721264841341,
"eval_loss": 0.42560645937919617,
"eval_runtime": 6247.7185,
"eval_samples_per_second": 17.195,
"eval_steps_per_second": 1.075,
"step": 3867552
},
{
"epoch": 73.0,
"learning_rate": 0.00027,
"loss": 0.3498,
"step": 3921268
},
{
"epoch": 73.0,
"eval_bleu": 7.23131596568943,
"eval_loss": 0.42429444193840027,
"eval_runtime": 6248.7182,
"eval_samples_per_second": 17.192,
"eval_steps_per_second": 1.075,
"step": 3921268
}
],
"max_steps": 5371600,
"num_train_epochs": 100,
"total_flos": 8.225066520360465e+18,
"trial_name": null,
"trial_params": null
}