|
{ |
|
"best_metric": 0.5008143186569214, |
|
"best_model_checkpoint": "mobilebert_add_GLUE_Experiment_qqp/checkpoint-19901", |
|
"epoch": 12.0, |
|
"global_step": 34116, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6505, |
|
"step": 2843 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6320554044026713, |
|
"eval_combined_score": 0.3166319723772454, |
|
"eval_f1": 0.0012085403518195246, |
|
"eval_loss": 0.6498478651046753, |
|
"eval_runtime": 84.4187, |
|
"eval_samples_per_second": 478.923, |
|
"eval_steps_per_second": 3.743, |
|
"step": 2843 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.6474, |
|
"step": 5686 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6320554044026713, |
|
"eval_combined_score": 0.3166319723772454, |
|
"eval_f1": 0.0012085403518195246, |
|
"eval_loss": 0.6484191417694092, |
|
"eval_runtime": 84.4998, |
|
"eval_samples_per_second": 478.463, |
|
"eval_steps_per_second": 3.74, |
|
"step": 5686 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.646, |
|
"step": 8529 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6322038090526837, |
|
"eval_combined_score": 0.3173094719488563, |
|
"eval_f1": 0.0024151348450288474, |
|
"eval_loss": 0.6478633880615234, |
|
"eval_runtime": 84.5145, |
|
"eval_samples_per_second": 478.38, |
|
"eval_steps_per_second": 3.739, |
|
"step": 8529 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.5481, |
|
"step": 11372 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7486272569873856, |
|
"eval_combined_score": 0.6866867038546203, |
|
"eval_f1": 0.624746150721855, |
|
"eval_loss": 0.5139716267585754, |
|
"eval_runtime": 85.5613, |
|
"eval_samples_per_second": 472.527, |
|
"eval_steps_per_second": 3.693, |
|
"step": 11372 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4934, |
|
"step": 14215 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7529062577294089, |
|
"eval_combined_score": 0.7038665764495293, |
|
"eval_f1": 0.6548268951696496, |
|
"eval_loss": 0.508634090423584, |
|
"eval_runtime": 85.77, |
|
"eval_samples_per_second": 471.377, |
|
"eval_steps_per_second": 3.684, |
|
"step": 14215 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.4794, |
|
"step": 17058 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7575315359881276, |
|
"eval_combined_score": 0.7051322705269201, |
|
"eval_f1": 0.6527330050657125, |
|
"eval_loss": 0.5043991208076477, |
|
"eval_runtime": 85.6759, |
|
"eval_samples_per_second": 471.895, |
|
"eval_steps_per_second": 3.688, |
|
"step": 17058 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.4708, |
|
"step": 19901 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7599802127133317, |
|
"eval_combined_score": 0.7000865097678635, |
|
"eval_f1": 0.6401928068223952, |
|
"eval_loss": 0.5008143186569214, |
|
"eval_runtime": 85.4807, |
|
"eval_samples_per_second": 472.972, |
|
"eval_steps_per_second": 3.697, |
|
"step": 19901 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.4652, |
|
"step": 22744 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7618600049468217, |
|
"eval_combined_score": 0.7001156995887954, |
|
"eval_f1": 0.6383713942307692, |
|
"eval_loss": 0.5009844303131104, |
|
"eval_runtime": 85.8727, |
|
"eval_samples_per_second": 470.813, |
|
"eval_steps_per_second": 3.68, |
|
"step": 22744 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.4604, |
|
"step": 25587 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7614147909967846, |
|
"eval_combined_score": 0.7051723831239565, |
|
"eval_f1": 0.6489299752511283, |
|
"eval_loss": 0.5014066100120544, |
|
"eval_runtime": 86.3999, |
|
"eval_samples_per_second": 467.94, |
|
"eval_steps_per_second": 3.657, |
|
"step": 25587 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4562, |
|
"step": 28430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7599802127133317, |
|
"eval_combined_score": 0.7108365712848488, |
|
"eval_f1": 0.661692929856366, |
|
"eval_loss": 0.5057373642921448, |
|
"eval_runtime": 86.4384, |
|
"eval_samples_per_second": 467.732, |
|
"eval_steps_per_second": 3.656, |
|
"step": 28430 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.452, |
|
"step": 31273 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7619836754884987, |
|
"eval_combined_score": 0.6991856786662217, |
|
"eval_f1": 0.6363876818439448, |
|
"eval_loss": 0.5101634860038757, |
|
"eval_runtime": 86.4692, |
|
"eval_samples_per_second": 467.565, |
|
"eval_steps_per_second": 3.654, |
|
"step": 31273 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.4476, |
|
"step": 34116 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7621815483551818, |
|
"eval_combined_score": 0.712056633324597, |
|
"eval_f1": 0.6619317182940121, |
|
"eval_loss": 0.5301799774169922, |
|
"eval_runtime": 86.4861, |
|
"eval_samples_per_second": 467.474, |
|
"eval_steps_per_second": 3.654, |
|
"step": 34116 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 34116, |
|
"total_flos": 1.2915236812514918e+17, |
|
"train_loss": 0.5180756985645023, |
|
"train_runtime": 29413.6297, |
|
"train_samples_per_second": 618.499, |
|
"train_steps_per_second": 4.833 |
|
} |
|
], |
|
"max_steps": 142150, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.2915236812514918e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|