|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3077, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.032499187520311994, |
|
"grad_norm": 0.38231489062309265, |
|
"learning_rate": 4.8375040623984405e-05, |
|
"loss": 0.1722, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06499837504062399, |
|
"grad_norm": 0.29991772770881653, |
|
"learning_rate": 4.67500812479688e-05, |
|
"loss": 0.1983, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09749756256093597, |
|
"grad_norm": 0.3033580780029297, |
|
"learning_rate": 4.51251218719532e-05, |
|
"loss": 0.2034, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12999675008124797, |
|
"grad_norm": 0.31061500310897827, |
|
"learning_rate": 4.3500162495937604e-05, |
|
"loss": 0.2022, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16249593760155995, |
|
"grad_norm": 0.27624836564064026, |
|
"learning_rate": 4.1875203119922e-05, |
|
"loss": 0.1975, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16249593760155995, |
|
"eval_gen_len": 19.964290067032298, |
|
"eval_loss": 0.18788783252239227, |
|
"eval_rouge1": 0.0068, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0068, |
|
"eval_rougeLsum": 0.0068, |
|
"eval_runtime": 2174.226, |
|
"eval_samples_per_second": 11.321, |
|
"eval_steps_per_second": 0.354, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19499512512187195, |
|
"grad_norm": 0.2493065893650055, |
|
"learning_rate": 4.02502437439064e-05, |
|
"loss": 0.1943, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.22749431264218395, |
|
"grad_norm": 0.2768040597438812, |
|
"learning_rate": 3.8625284367890804e-05, |
|
"loss": 0.1919, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.25999350016249595, |
|
"grad_norm": 0.2641097605228424, |
|
"learning_rate": 3.7000324991875206e-05, |
|
"loss": 0.1883, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.29249268768280795, |
|
"grad_norm": 0.266985148191452, |
|
"learning_rate": 3.537536561585961e-05, |
|
"loss": 0.1866, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3249918752031199, |
|
"grad_norm": 0.2505531311035156, |
|
"learning_rate": 3.3750406239844004e-05, |
|
"loss": 0.186, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3249918752031199, |
|
"eval_gen_len": 19.9981718464351, |
|
"eval_loss": 0.1798761785030365, |
|
"eval_rouge1": 0.0108, |
|
"eval_rouge2": 0.0041, |
|
"eval_rougeL": 0.0108, |
|
"eval_rougeLsum": 0.0108, |
|
"eval_runtime": 2176.3057, |
|
"eval_samples_per_second": 11.31, |
|
"eval_steps_per_second": 0.354, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3574910627234319, |
|
"grad_norm": 0.2498556226491928, |
|
"learning_rate": 3.2125446863828406e-05, |
|
"loss": 0.1839, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3899902502437439, |
|
"grad_norm": 0.2563565969467163, |
|
"learning_rate": 3.0500487487812808e-05, |
|
"loss": 0.1835, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4224894377640559, |
|
"grad_norm": 0.25993165373802185, |
|
"learning_rate": 2.8875528111797207e-05, |
|
"loss": 0.1837, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4549886252843679, |
|
"grad_norm": 0.2639683485031128, |
|
"learning_rate": 2.7250568735781606e-05, |
|
"loss": 0.1814, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4874878128046799, |
|
"grad_norm": 0.25668901205062866, |
|
"learning_rate": 2.5625609359766008e-05, |
|
"loss": 0.1762, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4874878128046799, |
|
"eval_gen_len": 19.983384115376804, |
|
"eval_loss": 0.17473776638507843, |
|
"eval_rouge1": 0.1836, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.1828, |
|
"eval_rougeLsum": 0.1851, |
|
"eval_runtime": 2167.8807, |
|
"eval_samples_per_second": 11.354, |
|
"eval_steps_per_second": 0.355, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5199870003249919, |
|
"grad_norm": 0.2645615041255951, |
|
"learning_rate": 2.4000649983750407e-05, |
|
"loss": 0.1794, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5524861878453039, |
|
"grad_norm": 0.24295924603939056, |
|
"learning_rate": 2.237569060773481e-05, |
|
"loss": 0.1769, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5849853753656159, |
|
"grad_norm": 0.25981849431991577, |
|
"learning_rate": 2.0750731231719208e-05, |
|
"loss": 0.1785, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6174845628859279, |
|
"grad_norm": 0.24553097784519196, |
|
"learning_rate": 1.912577185570361e-05, |
|
"loss": 0.173, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6499837504062398, |
|
"grad_norm": 0.24243266880512238, |
|
"learning_rate": 1.750081247968801e-05, |
|
"loss": 0.1764, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6499837504062398, |
|
"eval_gen_len": 19.99471866747918, |
|
"eval_loss": 0.17154192924499512, |
|
"eval_rouge1": 0.1151, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.1151, |
|
"eval_rougeLsum": 0.1165, |
|
"eval_runtime": 2157.4938, |
|
"eval_samples_per_second": 11.409, |
|
"eval_steps_per_second": 0.357, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6824829379265518, |
|
"grad_norm": 0.26906952261924744, |
|
"learning_rate": 1.5875853103672408e-05, |
|
"loss": 0.1789, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7149821254468638, |
|
"grad_norm": 0.2654452919960022, |
|
"learning_rate": 1.4250893727656808e-05, |
|
"loss": 0.1727, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7474813129671758, |
|
"grad_norm": 0.24443137645721436, |
|
"learning_rate": 1.2625934351641208e-05, |
|
"loss": 0.1767, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7799805004874878, |
|
"grad_norm": 0.24741144478321075, |
|
"learning_rate": 1.1000974975625609e-05, |
|
"loss": 0.1745, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8124796880077998, |
|
"grad_norm": 0.2507327198982239, |
|
"learning_rate": 9.376015599610011e-06, |
|
"loss": 0.1756, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8124796880077998, |
|
"eval_gen_len": 19.99869997968718, |
|
"eval_loss": 0.1691381335258484, |
|
"eval_rouge1": 0.0027, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0027, |
|
"eval_rougeLsum": 0.0027, |
|
"eval_runtime": 2163.6252, |
|
"eval_samples_per_second": 11.377, |
|
"eval_steps_per_second": 0.356, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8449788755281118, |
|
"grad_norm": 0.23773913085460663, |
|
"learning_rate": 7.75105622359441e-06, |
|
"loss": 0.1733, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8774780630484238, |
|
"grad_norm": 0.24695108830928802, |
|
"learning_rate": 6.1260968475788104e-06, |
|
"loss": 0.1744, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9099772505687358, |
|
"grad_norm": 0.23871463537216187, |
|
"learning_rate": 4.501137471563211e-06, |
|
"loss": 0.1741, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9424764380890478, |
|
"grad_norm": 0.24402374029159546, |
|
"learning_rate": 2.8761780955476114e-06, |
|
"loss": 0.1703, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9749756256093598, |
|
"grad_norm": 0.24215468764305115, |
|
"learning_rate": 1.2512187195320117e-06, |
|
"loss": 0.1724, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9749756256093598, |
|
"eval_gen_len": 19.99163111923624, |
|
"eval_loss": 0.1676524430513382, |
|
"eval_rouge1": 0.0257, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0244, |
|
"eval_rougeLsum": 0.0257, |
|
"eval_runtime": 2148.352, |
|
"eval_samples_per_second": 11.458, |
|
"eval_steps_per_second": 0.358, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3077, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7819450187495506e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|