|
{ |
|
"best_metric": 0.22905202209949493, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_pretrain_mrpc/checkpoint-435", |
|
"epoch": 20.0, |
|
"global_step": 580, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.536, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7279411764705882, |
|
"eval_combined_score": 0.7781900627329756, |
|
"eval_f1": 0.8284389489953632, |
|
"eval_loss": 0.413361519575119, |
|
"eval_runtime": 0.8016, |
|
"eval_samples_per_second": 508.993, |
|
"eval_steps_per_second": 4.99, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.3419, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8284313725490197, |
|
"eval_combined_score": 0.8542841794251947, |
|
"eval_f1": 0.8801369863013697, |
|
"eval_loss": 0.30050206184387207, |
|
"eval_runtime": 0.7827, |
|
"eval_samples_per_second": 521.241, |
|
"eval_steps_per_second": 5.11, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.2413, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_combined_score": 0.8507477567298105, |
|
"eval_f1": 0.8779661016949152, |
|
"eval_loss": 0.2707367241382599, |
|
"eval_runtime": 0.8474, |
|
"eval_samples_per_second": 481.468, |
|
"eval_steps_per_second": 4.72, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.1852, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8284313725490197, |
|
"eval_combined_score": 0.8560761513907889, |
|
"eval_f1": 0.883720930232558, |
|
"eval_loss": 0.3246958255767822, |
|
"eval_runtime": 0.7815, |
|
"eval_samples_per_second": 522.064, |
|
"eval_steps_per_second": 5.118, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1524, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8431372549019608, |
|
"eval_combined_score": 0.8665858095815646, |
|
"eval_f1": 0.8900343642611684, |
|
"eval_loss": 0.2855992019176483, |
|
"eval_runtime": 0.7813, |
|
"eval_samples_per_second": 522.186, |
|
"eval_steps_per_second": 5.119, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.1297, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8455882352941176, |
|
"eval_combined_score": 0.8702064715702642, |
|
"eval_f1": 0.8948247078464107, |
|
"eval_loss": 0.29985857009887695, |
|
"eval_runtime": 0.7783, |
|
"eval_samples_per_second": 524.229, |
|
"eval_steps_per_second": 5.139, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.1219, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8529411764705882, |
|
"eval_combined_score": 0.8757949125596185, |
|
"eval_f1": 0.8986486486486487, |
|
"eval_loss": 0.27967169880867004, |
|
"eval_runtime": 0.7807, |
|
"eval_samples_per_second": 522.641, |
|
"eval_steps_per_second": 5.124, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.1141, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8602941176470589, |
|
"eval_combined_score": 0.8804088389282414, |
|
"eval_f1": 0.900523560209424, |
|
"eval_loss": 0.2462492138147354, |
|
"eval_runtime": 0.7782, |
|
"eval_samples_per_second": 524.315, |
|
"eval_steps_per_second": 5.14, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.1127, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8578431372549019, |
|
"eval_combined_score": 0.8780443756449948, |
|
"eval_f1": 0.8982456140350877, |
|
"eval_loss": 0.25573617219924927, |
|
"eval_runtime": 0.7773, |
|
"eval_samples_per_second": 524.89, |
|
"eval_steps_per_second": 5.146, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1091, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8480392156862745, |
|
"eval_combined_score": 0.8723529411764706, |
|
"eval_f1": 0.8966666666666667, |
|
"eval_loss": 0.2853253185749054, |
|
"eval_runtime": 0.7789, |
|
"eval_samples_per_second": 523.847, |
|
"eval_steps_per_second": 5.136, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.1007, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8553921568627451, |
|
"eval_combined_score": 0.8767461647871584, |
|
"eval_f1": 0.8981001727115717, |
|
"eval_loss": 0.2471570074558258, |
|
"eval_runtime": 0.7834, |
|
"eval_samples_per_second": 520.839, |
|
"eval_steps_per_second": 5.106, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.0979, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8504901960784313, |
|
"eval_combined_score": 0.8727494009652054, |
|
"eval_f1": 0.8950086058519794, |
|
"eval_loss": 0.24307508766651154, |
|
"eval_runtime": 0.7814, |
|
"eval_samples_per_second": 522.12, |
|
"eval_steps_per_second": 5.119, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.0954, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8578431372549019, |
|
"eval_combined_score": 0.8792640343808756, |
|
"eval_f1": 0.9006849315068494, |
|
"eval_loss": 0.24562659859657288, |
|
"eval_runtime": 0.7831, |
|
"eval_samples_per_second": 521.003, |
|
"eval_steps_per_second": 5.108, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.0946, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8578431372549019, |
|
"eval_combined_score": 0.8797690262545697, |
|
"eval_f1": 0.9016949152542373, |
|
"eval_loss": 0.25261467695236206, |
|
"eval_runtime": 0.7806, |
|
"eval_samples_per_second": 522.682, |
|
"eval_steps_per_second": 5.124, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0946, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8578431372549019, |
|
"eval_combined_score": 0.8785743464052287, |
|
"eval_f1": 0.8993055555555555, |
|
"eval_loss": 0.22905202209949493, |
|
"eval_runtime": 0.78, |
|
"eval_samples_per_second": 523.081, |
|
"eval_steps_per_second": 5.128, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.0938, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8602941176470589, |
|
"eval_combined_score": 0.8815950997093898, |
|
"eval_f1": 0.9028960817717206, |
|
"eval_loss": 0.2451842576265335, |
|
"eval_runtime": 0.7798, |
|
"eval_samples_per_second": 523.204, |
|
"eval_steps_per_second": 5.129, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.0919, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8651960784313726, |
|
"eval_combined_score": 0.8851023570049782, |
|
"eval_f1": 0.9050086355785838, |
|
"eval_loss": 0.23652492463588715, |
|
"eval_runtime": 0.7816, |
|
"eval_samples_per_second": 522.014, |
|
"eval_steps_per_second": 5.118, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.0916, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8651960784313726, |
|
"eval_combined_score": 0.8855894922071392, |
|
"eval_f1": 0.905982905982906, |
|
"eval_loss": 0.23631499707698822, |
|
"eval_runtime": 0.781, |
|
"eval_samples_per_second": 522.414, |
|
"eval_steps_per_second": 5.122, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.0915, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8651960784313726, |
|
"eval_combined_score": 0.8857496576143234, |
|
"eval_f1": 0.9063032367972743, |
|
"eval_loss": 0.24315035343170166, |
|
"eval_runtime": 0.78, |
|
"eval_samples_per_second": 523.058, |
|
"eval_steps_per_second": 5.128, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0905, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8651960784313726, |
|
"eval_combined_score": 0.8854282278949316, |
|
"eval_f1": 0.9056603773584906, |
|
"eval_loss": 0.2296983003616333, |
|
"eval_runtime": 0.7873, |
|
"eval_samples_per_second": 518.229, |
|
"eval_steps_per_second": 5.081, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 580, |
|
"total_flos": 2329744947281920.0, |
|
"train_loss": 0.14934097281817732, |
|
"train_runtime": 450.6297, |
|
"train_samples_per_second": 406.986, |
|
"train_steps_per_second": 3.218 |
|
} |
|
], |
|
"max_steps": 1450, |
|
"num_train_epochs": 50, |
|
"total_flos": 2329744947281920.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|