|
{ |
|
"best_metric": 0.6102265119552612, |
|
"best_model_checkpoint": "mobilebert_add_GLUE_Experiment_cola_256/checkpoint-737", |
|
"epoch": 16.0, |
|
"global_step": 1072, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6129, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.6179760098457336, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.9044, |
|
"eval_samples_per_second": 547.668, |
|
"eval_steps_per_second": 4.726, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.6078, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.6178193688392639, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.9385, |
|
"eval_samples_per_second": 538.041, |
|
"eval_steps_per_second": 4.643, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.6073, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.6178669333457947, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.9618, |
|
"eval_samples_per_second": 531.659, |
|
"eval_steps_per_second": 4.588, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.6067, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.6166986227035522, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.9261, |
|
"eval_samples_per_second": 541.5, |
|
"eval_steps_per_second": 4.673, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6059, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.6167794466018677, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.882, |
|
"eval_samples_per_second": 554.203, |
|
"eval_steps_per_second": 4.782, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.5998, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.6115455627441406, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.8963, |
|
"eval_samples_per_second": 550.027, |
|
"eval_steps_per_second": 4.746, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.5917, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.6122425198554993, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.9219, |
|
"eval_samples_per_second": 542.687, |
|
"eval_steps_per_second": 4.683, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.5849, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.6126018166542053, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.9382, |
|
"eval_samples_per_second": 538.131, |
|
"eval_steps_per_second": 4.644, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.5796, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.6276524066925049, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.8651, |
|
"eval_samples_per_second": 559.208, |
|
"eval_steps_per_second": 4.825, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5759, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.6138085722923279, |
|
"eval_matthews_correlation": 0.00286100001416597, |
|
"eval_runtime": 1.8774, |
|
"eval_samples_per_second": 555.557, |
|
"eval_steps_per_second": 4.794, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.5733, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.6102265119552612, |
|
"eval_matthews_correlation": 0.01845565733408863, |
|
"eval_runtime": 1.925, |
|
"eval_samples_per_second": 541.806, |
|
"eval_steps_per_second": 4.675, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.5716, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.6143413782119751, |
|
"eval_matthews_correlation": 0.025208083291660098, |
|
"eval_runtime": 1.8542, |
|
"eval_samples_per_second": 562.494, |
|
"eval_steps_per_second": 4.854, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.5667, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.6347153782844543, |
|
"eval_matthews_correlation": 0.03482284441916008, |
|
"eval_runtime": 1.8966, |
|
"eval_samples_per_second": 549.934, |
|
"eval_steps_per_second": 4.745, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.5662, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.6314128637313843, |
|
"eval_matthews_correlation": 0.03846275142815186, |
|
"eval_runtime": 1.858, |
|
"eval_samples_per_second": 561.368, |
|
"eval_steps_per_second": 4.844, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.5631, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.6130307912826538, |
|
"eval_matthews_correlation": 0.017448205413933698, |
|
"eval_runtime": 1.8802, |
|
"eval_samples_per_second": 554.731, |
|
"eval_steps_per_second": 4.787, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.5628, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.6218040585517883, |
|
"eval_matthews_correlation": 0.03482284441916008, |
|
"eval_runtime": 1.8684, |
|
"eval_samples_per_second": 558.219, |
|
"eval_steps_per_second": 4.817, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 1072, |
|
"total_flos": 3526461549969408.0, |
|
"train_loss": 0.5860174342767516, |
|
"train_runtime": 908.8925, |
|
"train_samples_per_second": 470.408, |
|
"train_steps_per_second": 3.686 |
|
} |
|
], |
|
"max_steps": 3350, |
|
"num_train_epochs": 50, |
|
"total_flos": 3526461549969408.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|