distilbert-finetuned-ner / trainer_state.json
Amit234's picture
Upload model checkpoint
e3b4db6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.0,
"eval_steps": 500,
"global_step": 6120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7352941176470589,
"grad_norm": 1.4399138689041138,
"learning_rate": 1.8366013071895427e-05,
"loss": 0.1305,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.9775613095979087,
"eval_f1": 0.740566037735849,
"eval_loss": 0.06862938404083252,
"eval_precision": 0.6908690869086909,
"eval_recall": 0.7979669631512071,
"eval_runtime": 2.5207,
"eval_samples_per_second": 366.567,
"eval_steps_per_second": 46.019,
"step": 680
},
{
"epoch": 1.4705882352941178,
"grad_norm": 3.059645652770996,
"learning_rate": 1.6732026143790852e-05,
"loss": 0.0475,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.9815448773808042,
"eval_f1": 0.8041237113402061,
"eval_loss": 0.067368283867836,
"eval_precision": 0.7691415313225058,
"eval_recall": 0.8424396442185514,
"eval_runtime": 3.126,
"eval_samples_per_second": 295.582,
"eval_steps_per_second": 37.108,
"step": 1360
},
{
"epoch": 2.2058823529411766,
"grad_norm": 2.6966614723205566,
"learning_rate": 1.5098039215686276e-05,
"loss": 0.0354,
"step": 1500
},
{
"epoch": 2.9411764705882355,
"grad_norm": 3.640780210494995,
"learning_rate": 1.3464052287581701e-05,
"loss": 0.0222,
"step": 2000
},
{
"epoch": 3.0,
"eval_accuracy": 0.982322917963401,
"eval_f1": 0.8089053803339517,
"eval_loss": 0.07790510356426239,
"eval_precision": 0.7879518072289157,
"eval_recall": 0.8310038119440915,
"eval_runtime": 4.1074,
"eval_samples_per_second": 224.961,
"eval_steps_per_second": 28.242,
"step": 2040
},
{
"epoch": 3.6764705882352944,
"grad_norm": 0.008734635077416897,
"learning_rate": 1.1830065359477125e-05,
"loss": 0.012,
"step": 2500
},
{
"epoch": 4.0,
"eval_accuracy": 0.982789742312959,
"eval_f1": 0.8168498168498168,
"eval_loss": 0.0823572650551796,
"eval_precision": 0.7861339600470035,
"eval_recall": 0.8500635324015248,
"eval_runtime": 2.5823,
"eval_samples_per_second": 357.827,
"eval_steps_per_second": 44.922,
"step": 2720
},
{
"epoch": 4.411764705882353,
"grad_norm": 0.3800855576992035,
"learning_rate": 1.0196078431372549e-05,
"loss": 0.0081,
"step": 3000
},
{
"epoch": 5.0,
"eval_accuracy": 0.9815137557575003,
"eval_f1": 0.8112745098039216,
"eval_loss": 0.10165887326002121,
"eval_precision": 0.7834319526627219,
"eval_recall": 0.841168996188056,
"eval_runtime": 2.596,
"eval_samples_per_second": 355.931,
"eval_steps_per_second": 44.684,
"step": 3400
},
{
"epoch": 5.147058823529412,
"grad_norm": 1.0274019241333008,
"learning_rate": 8.562091503267974e-06,
"loss": 0.0054,
"step": 3500
},
{
"epoch": 5.882352941176471,
"grad_norm": 0.9368045926094055,
"learning_rate": 6.928104575163399e-06,
"loss": 0.0028,
"step": 4000
},
{
"epoch": 6.0,
"eval_accuracy": 0.981607120627412,
"eval_f1": 0.8175092478421702,
"eval_loss": 0.11204753071069717,
"eval_precision": 0.7940119760479042,
"eval_recall": 0.8424396442185514,
"eval_runtime": 2.6214,
"eval_samples_per_second": 352.478,
"eval_steps_per_second": 44.25,
"step": 4080
},
{
"epoch": 6.617647058823529,
"grad_norm": 0.0018368919845670462,
"learning_rate": 5.294117647058824e-06,
"loss": 0.0021,
"step": 4500
},
{
"epoch": 7.0,
"eval_accuracy": 0.9808290800448152,
"eval_f1": 0.8186215235792019,
"eval_loss": 0.12193118035793304,
"eval_precision": 0.7808535178777394,
"eval_recall": 0.8602287166454892,
"eval_runtime": 2.6902,
"eval_samples_per_second": 343.468,
"eval_steps_per_second": 43.119,
"step": 4760
},
{
"epoch": 7.352941176470588,
"grad_norm": 0.004133788403123617,
"learning_rate": 3.6601307189542484e-06,
"loss": 0.0011,
"step": 5000
},
{
"epoch": 8.0,
"eval_accuracy": 0.9814826341341965,
"eval_f1": 0.8152173913043478,
"eval_loss": 0.12062280625104904,
"eval_precision": 0.7767548906789413,
"eval_recall": 0.8576874205844981,
"eval_runtime": 2.6543,
"eval_samples_per_second": 348.116,
"eval_steps_per_second": 43.703,
"step": 5440
},
{
"epoch": 8.088235294117647,
"grad_norm": 0.00526324100792408,
"learning_rate": 2.0261437908496734e-06,
"loss": 0.0018,
"step": 5500
},
{
"epoch": 8.823529411764707,
"grad_norm": 0.01854279637336731,
"learning_rate": 3.921568627450981e-07,
"loss": 0.0005,
"step": 6000
},
{
"epoch": 9.0,
"eval_accuracy": 0.9821050666002739,
"eval_f1": 0.8220858895705522,
"eval_loss": 0.1177670955657959,
"eval_precision": 0.7947805456702254,
"eval_recall": 0.8513341804320204,
"eval_runtime": 2.7389,
"eval_samples_per_second": 337.365,
"eval_steps_per_second": 42.353,
"step": 6120
}
],
"logging_steps": 500,
"max_steps": 6120,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 791914496183100.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}