{ "best_metric": 0.8, "best_model_checkpoint": "vit-base-patch16-224/checkpoint-8", "epoch": 10.0, "eval_steps": 500, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.8, "eval_loss": 0.5545626282691956, "eval_runtime": 0.0989, "eval_samples_per_second": 151.64, "eval_steps_per_second": 40.437, "step": 8 }, { "epoch": 1.25, "grad_norm": 4.057127952575684, "learning_rate": 4.861111111111111e-06, "loss": 0.5945, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.8, "eval_loss": 0.5408901572227478, "eval_runtime": 0.1082, "eval_samples_per_second": 138.616, "eval_steps_per_second": 36.964, "step": 16 }, { "epoch": 2.5, "grad_norm": 4.53317403793335, "learning_rate": 4.166666666666667e-06, "loss": 0.5832, "step": 20 }, { "epoch": 3.0, "eval_accuracy": 0.8, "eval_loss": 0.5466590523719788, "eval_runtime": 0.1022, "eval_samples_per_second": 146.703, "eval_steps_per_second": 39.121, "step": 24 }, { "epoch": 3.75, "grad_norm": 3.683661937713623, "learning_rate": 3.4722222222222224e-06, "loss": 0.5338, "step": 30 }, { "epoch": 4.0, "eval_accuracy": 0.8, "eval_loss": 0.551804780960083, "eval_runtime": 0.0956, "eval_samples_per_second": 156.964, "eval_steps_per_second": 41.857, "step": 32 }, { "epoch": 5.0, "grad_norm": 4.659008979797363, "learning_rate": 2.7777777777777783e-06, "loss": 0.5513, "step": 40 }, { "epoch": 5.0, "eval_accuracy": 0.8, "eval_loss": 0.5602456331253052, "eval_runtime": 0.0943, "eval_samples_per_second": 159.134, "eval_steps_per_second": 42.436, "step": 40 }, { "epoch": 6.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.5607239007949829, "eval_runtime": 0.1057, "eval_samples_per_second": 141.965, "eval_steps_per_second": 37.857, "step": 48 }, { "epoch": 6.25, "grad_norm": 6.287199974060059, "learning_rate": 2.0833333333333334e-06, "loss": 0.5417, "step": 50 }, { "epoch": 7.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.5707207322120667, "eval_runtime": 0.1098, "eval_samples_per_second": 136.606, "eval_steps_per_second": 36.428, "step": 56 }, { "epoch": 7.5, "grad_norm": 3.7512075901031494, "learning_rate": 1.3888888888888892e-06, "loss": 0.5343, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.5747508406639099, "eval_runtime": 0.0961, "eval_samples_per_second": 156.038, "eval_steps_per_second": 41.61, "step": 64 }, { "epoch": 8.75, "grad_norm": 4.6297926902771, "learning_rate": 6.944444444444446e-07, "loss": 0.5379, "step": 70 }, { "epoch": 9.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.5735542178153992, "eval_runtime": 0.0985, "eval_samples_per_second": 152.245, "eval_steps_per_second": 40.599, "step": 72 }, { "epoch": 10.0, "grad_norm": 2.6508984565734863, "learning_rate": 0.0, "loss": 0.5137, "step": 80 }, { "epoch": 10.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.5730186700820923, "eval_runtime": 0.344, "eval_samples_per_second": 43.601, "eval_steps_per_second": 11.627, "step": 80 }, { "epoch": 10.0, "step": 80, "total_flos": 9.841482681053184e+16, "train_loss": 0.5487962663173676, "train_runtime": 36.3019, "train_samples_per_second": 34.984, "train_steps_per_second": 2.204 } ], "logging_steps": 10, "max_steps": 80, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.841482681053184e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }