{ "best_metric": 0.002982336562126875, "best_model_checkpoint": "t5/checkpoint-25566", "epoch": 8.0, "eval_steps": 500, "global_step": 102264, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.09756029397249222, "learning_rate": 1.9600000000000002e-05, "loss": 0.0059, "step": 12783 }, { "epoch": 1.0, "eval_accuracy": 0.9804666666666667, "eval_loss": 0.0031315775122493505, "eval_runtime": 472.802, "eval_samples_per_second": 63.452, "eval_steps_per_second": 0.497, "step": 12783 }, { "epoch": 2.0, "grad_norm": 0.0030228213872760534, "learning_rate": 1.9200000000000003e-05, "loss": 0.0022, "step": 25566 }, { "epoch": 2.0, "eval_accuracy": 0.9818666666666667, "eval_loss": 0.002982336562126875, "eval_runtime": 474.0664, "eval_samples_per_second": 63.282, "eval_steps_per_second": 0.496, "step": 25566 }, { "epoch": 3.0, "grad_norm": 0.00019585879635997117, "learning_rate": 1.88e-05, "loss": 0.0012, "step": 38349 }, { "epoch": 3.0, "eval_accuracy": 0.9825, "eval_loss": 0.0032824024092406034, "eval_runtime": 478.2854, "eval_samples_per_second": 62.724, "eval_steps_per_second": 0.491, "step": 38349 }, { "epoch": 4.0, "grad_norm": 0.004981159698218107, "learning_rate": 1.8400000000000003e-05, "loss": 0.0007, "step": 51132 }, { "epoch": 4.0, "eval_accuracy": 0.9826, "eval_loss": 0.003532745875418186, "eval_runtime": 469.3109, "eval_samples_per_second": 63.924, "eval_steps_per_second": 0.501, "step": 51132 }, { "epoch": 5.0, "grad_norm": 0.002326933667063713, "learning_rate": 1.8e-05, "loss": 0.0005, "step": 63915 }, { "epoch": 5.0, "eval_accuracy": 0.9821666666666666, "eval_loss": 0.003947438672184944, "eval_runtime": 471.3239, "eval_samples_per_second": 63.65, "eval_steps_per_second": 0.499, "step": 63915 }, { "epoch": 6.0, "grad_norm": 0.034791357815265656, "learning_rate": 1.76e-05, "loss": 0.0004, "step": 76698 }, { "epoch": 6.0, "eval_accuracy": 0.9815666666666667, "eval_loss": 0.0041623483411967754, "eval_runtime": 470.7737, "eval_samples_per_second": 63.725, "eval_steps_per_second": 0.499, "step": 76698 }, { "epoch": 7.0, "grad_norm": 9.868334018392488e-05, "learning_rate": 1.72e-05, "loss": 0.0003, "step": 89481 }, { "epoch": 7.0, "eval_accuracy": 0.9822666666666666, "eval_loss": 0.004203655291348696, "eval_runtime": 471.0374, "eval_samples_per_second": 63.689, "eval_steps_per_second": 0.499, "step": 89481 }, { "epoch": 8.0, "grad_norm": 0.002833570586517453, "learning_rate": 1.6800000000000002e-05, "loss": 0.0003, "step": 102264 }, { "epoch": 8.0, "eval_accuracy": 0.9826, "eval_loss": 0.0045397114008665085, "eval_runtime": 469.6081, "eval_samples_per_second": 63.883, "eval_steps_per_second": 0.5, "step": 102264 } ], "logging_steps": 500, "max_steps": 639150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 5.937159320662856e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }