|
{ |
|
"best_metric": 0.4542507827281952, |
|
"best_model_checkpoint": "mobilebert_add_GLUE_Experiment_sst2_128/checkpoint-3689", |
|
"epoch": 12.0, |
|
"global_step": 6324, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6677, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5756880733944955, |
|
"eval_loss": 0.677055299282074, |
|
"eval_runtime": 1.7061, |
|
"eval_samples_per_second": 511.104, |
|
"eval_steps_per_second": 4.103, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.5966, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5424311926605505, |
|
"eval_loss": 0.7135030627250671, |
|
"eval_runtime": 1.5325, |
|
"eval_samples_per_second": 569.001, |
|
"eval_steps_per_second": 4.568, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.5714, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.555045871559633, |
|
"eval_loss": 0.7271105647087097, |
|
"eval_runtime": 1.5765, |
|
"eval_samples_per_second": 553.131, |
|
"eval_steps_per_second": 4.44, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.5573, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5619266055045872, |
|
"eval_loss": 0.6891608238220215, |
|
"eval_runtime": 1.5223, |
|
"eval_samples_per_second": 572.828, |
|
"eval_steps_per_second": 4.598, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.501, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7798165137614679, |
|
"eval_loss": 0.45462021231651306, |
|
"eval_runtime": 1.5903, |
|
"eval_samples_per_second": 548.34, |
|
"eval_steps_per_second": 4.402, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.2856, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.805045871559633, |
|
"eval_loss": 0.46127501130104065, |
|
"eval_runtime": 1.5321, |
|
"eval_samples_per_second": 569.164, |
|
"eval_steps_per_second": 4.569, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.2288, |
|
"step": 3689 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.4542507827281952, |
|
"eval_runtime": 1.5229, |
|
"eval_samples_per_second": 572.609, |
|
"eval_steps_per_second": 4.597, |
|
"step": 3689 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.2027, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7993119266055045, |
|
"eval_loss": 0.4661611020565033, |
|
"eval_runtime": 1.5648, |
|
"eval_samples_per_second": 557.264, |
|
"eval_steps_per_second": 4.473, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.1883, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8038990825688074, |
|
"eval_loss": 0.516830563545227, |
|
"eval_runtime": 1.5219, |
|
"eval_samples_per_second": 572.954, |
|
"eval_steps_per_second": 4.599, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1779, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7855504587155964, |
|
"eval_loss": 0.5748016834259033, |
|
"eval_runtime": 1.5164, |
|
"eval_samples_per_second": 575.045, |
|
"eval_steps_per_second": 4.616, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.1691, |
|
"step": 5797 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8027522935779816, |
|
"eval_loss": 0.5195825099945068, |
|
"eval_runtime": 1.5266, |
|
"eval_samples_per_second": 571.211, |
|
"eval_steps_per_second": 4.585, |
|
"step": 5797 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.1596, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7947247706422018, |
|
"eval_loss": 0.5943012237548828, |
|
"eval_runtime": 1.5592, |
|
"eval_samples_per_second": 559.25, |
|
"eval_steps_per_second": 4.489, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 6324, |
|
"total_flos": 1.9293579893538816e+16, |
|
"train_loss": 0.3588200110110356, |
|
"train_runtime": 4204.4171, |
|
"train_samples_per_second": 800.931, |
|
"train_steps_per_second": 6.267 |
|
} |
|
], |
|
"max_steps": 26350, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.9293579893538816e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|