{ "best_metric": 1.1438716650009155, "best_model_checkpoint": "/data1/attanasiog/safetune/models/checkpoint-750", "epoch": 0.9996955859969558, "global_step": 821, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2e-05, "loss": 3.1406, "step": 25 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 2.1055, "step": 50 }, { "epoch": 0.06, "eval_loss": 1.762616515159607, "eval_mse": 1.7626165039521147, "eval_runtime": 183.7464, "eval_samples_per_second": 15.891, "eval_steps_per_second": 3.973, "step": 50 }, { "epoch": 0.09, "learning_rate": 2e-05, "loss": 1.7607, "step": 75 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 1.6818, "step": 100 }, { "epoch": 0.12, "eval_loss": 1.4659080505371094, "eval_mse": 1.4659080196895005, "eval_runtime": 183.1621, "eval_samples_per_second": 15.942, "eval_steps_per_second": 3.986, "step": 100 }, { "epoch": 0.15, "learning_rate": 2e-05, "loss": 1.5873, "step": 125 }, { "epoch": 0.18, "learning_rate": 2e-05, "loss": 1.4062, "step": 150 }, { "epoch": 0.18, "eval_loss": 1.6322834491729736, "eval_mse": 1.632283383560956, "eval_runtime": 183.308, "eval_samples_per_second": 15.929, "eval_steps_per_second": 3.982, "step": 150 }, { "epoch": 0.21, "learning_rate": 2e-05, "loss": 1.5638, "step": 175 }, { "epoch": 0.24, "learning_rate": 2e-05, "loss": 1.4777, "step": 200 }, { "epoch": 0.24, "eval_loss": 1.3775594234466553, "eval_mse": 1.3775594118336603, "eval_runtime": 183.7408, "eval_samples_per_second": 15.892, "eval_steps_per_second": 3.973, "step": 200 }, { "epoch": 0.27, "learning_rate": 2e-05, "loss": 1.4016, "step": 225 }, { "epoch": 0.3, "learning_rate": 2e-05, "loss": 1.4106, "step": 250 }, { "epoch": 0.3, "eval_loss": 1.304379940032959, "eval_mse": 1.3043799442360644, "eval_runtime": 183.2767, "eval_samples_per_second": 15.932, "eval_steps_per_second": 3.983, "step": 250 }, { "epoch": 0.33, "learning_rate": 2e-05, "loss": 1.3077, "step": 275 }, { "epoch": 0.37, "learning_rate": 2e-05, "loss": 1.2702, "step": 300 }, { "epoch": 0.37, "eval_loss": 1.3791648149490356, "eval_mse": 1.3791648080316543, "eval_runtime": 183.7482, "eval_samples_per_second": 15.891, "eval_steps_per_second": 3.973, "step": 300 }, { "epoch": 0.4, "learning_rate": 2e-05, "loss": 1.3216, "step": 325 }, { "epoch": 0.43, "learning_rate": 2e-05, "loss": 1.4448, "step": 350 }, { "epoch": 0.43, "eval_loss": 1.3047657012939453, "eval_mse": 1.3047656766562488, "eval_runtime": 183.5141, "eval_samples_per_second": 15.912, "eval_steps_per_second": 3.978, "step": 350 }, { "epoch": 0.46, "learning_rate": 2e-05, "loss": 1.3344, "step": 375 }, { "epoch": 0.49, "learning_rate": 2e-05, "loss": 1.3582, "step": 400 }, { "epoch": 0.49, "eval_loss": 1.248475432395935, "eval_mse": 1.248475439073647, "eval_runtime": 183.4404, "eval_samples_per_second": 15.918, "eval_steps_per_second": 3.979, "step": 400 }, { "epoch": 0.52, "learning_rate": 2e-05, "loss": 1.1767, "step": 425 }, { "epoch": 0.55, "learning_rate": 2e-05, "loss": 1.2357, "step": 450 }, { "epoch": 0.55, "eval_loss": 1.3708600997924805, "eval_mse": 1.3708599920592506, "eval_runtime": 183.3578, "eval_samples_per_second": 15.925, "eval_steps_per_second": 3.981, "step": 450 }, { "epoch": 0.58, "learning_rate": 2e-05, "loss": 1.3204, "step": 475 }, { "epoch": 0.61, "learning_rate": 2e-05, "loss": 1.1075, "step": 500 }, { "epoch": 0.61, "eval_loss": 1.2537685632705688, "eval_mse": 1.2537685594483436, "eval_runtime": 183.4476, "eval_samples_per_second": 15.917, "eval_steps_per_second": 3.979, "step": 500 }, { "epoch": 0.64, "learning_rate": 2e-05, "loss": 1.4399, "step": 525 }, { "epoch": 0.67, "learning_rate": 2e-05, "loss": 1.2406, "step": 550 }, { "epoch": 0.67, "eval_loss": 1.19956636428833, "eval_mse": 1.1995664079472905, "eval_runtime": 183.4122, "eval_samples_per_second": 15.92, "eval_steps_per_second": 3.98, "step": 550 }, { "epoch": 0.7, "learning_rate": 2e-05, "loss": 1.1667, "step": 575 }, { "epoch": 0.73, "learning_rate": 2e-05, "loss": 1.2309, "step": 600 }, { "epoch": 0.73, "eval_loss": 1.2671302556991577, "eval_mse": 1.2671302648346725, "eval_runtime": 183.2517, "eval_samples_per_second": 15.934, "eval_steps_per_second": 3.984, "step": 600 }, { "epoch": 0.76, "learning_rate": 2e-05, "loss": 1.1541, "step": 625 }, { "epoch": 0.79, "learning_rate": 2e-05, "loss": 1.194, "step": 650 }, { "epoch": 0.79, "eval_loss": 1.4325885772705078, "eval_mse": 1.4325886574488846, "eval_runtime": 183.2552, "eval_samples_per_second": 15.934, "eval_steps_per_second": 3.984, "step": 650 }, { "epoch": 0.82, "learning_rate": 2e-05, "loss": 1.1926, "step": 675 }, { "epoch": 0.85, "learning_rate": 2e-05, "loss": 1.2135, "step": 700 }, { "epoch": 0.85, "eval_loss": 1.176251769065857, "eval_mse": 1.1762518139201403, "eval_runtime": 183.5032, "eval_samples_per_second": 15.913, "eval_steps_per_second": 3.978, "step": 700 }, { "epoch": 0.88, "learning_rate": 2e-05, "loss": 1.111, "step": 725 }, { "epoch": 0.91, "learning_rate": 2e-05, "loss": 1.2196, "step": 750 }, { "epoch": 0.91, "eval_loss": 1.1438716650009155, "eval_mse": 1.1438715909390142, "eval_runtime": 183.3716, "eval_samples_per_second": 15.924, "eval_steps_per_second": 3.981, "step": 750 }, { "epoch": 0.94, "learning_rate": 2e-05, "loss": 1.1891, "step": 775 }, { "epoch": 0.97, "learning_rate": 2e-05, "loss": 1.1983, "step": 800 }, { "epoch": 0.97, "eval_loss": 1.1710615158081055, "eval_mse": 1.1710615362599224, "eval_runtime": 183.5541, "eval_samples_per_second": 15.908, "eval_steps_per_second": 3.977, "step": 800 }, { "epoch": 1.0, "step": 821, "total_flos": 2.448378334121165e+16, "train_loss": 1.4025647422428107, "train_runtime": 6674.4502, "train_samples_per_second": 3.937, "train_steps_per_second": 0.123 } ], "max_steps": 821, "num_train_epochs": 1, "total_flos": 2.448378334121165e+16, "trial_name": null, "trial_params": null }