{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5482456140350878, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03289473684210526, "grad_norm": 0.6724075675010681, "learning_rate": 1.9874388886763944e-05, "loss": 3.3606, "step": 30 }, { "epoch": 0.06578947368421052, "grad_norm": 0.5784845352172852, "learning_rate": 1.9396926207859085e-05, "loss": 2.93, "step": 60 }, { "epoch": 0.09868421052631579, "grad_norm": 0.43988049030303955, "learning_rate": 1.8579834132349773e-05, "loss": 2.4914, "step": 90 }, { "epoch": 0.13157894736842105, "grad_norm": 0.48597127199172974, "learning_rate": 1.745264449675755e-05, "loss": 2.1611, "step": 120 }, { "epoch": 0.16447368421052633, "grad_norm": 0.48318102955818176, "learning_rate": 1.6056096871376667e-05, "loss": 1.8257, "step": 150 }, { "epoch": 0.19736842105263158, "grad_norm": 0.5479584336280823, "learning_rate": 1.4440666126057743e-05, "loss": 1.5039, "step": 180 }, { "epoch": 0.23026315789473684, "grad_norm": 0.445963054895401, "learning_rate": 1.266473813690035e-05, "loss": 1.2459, "step": 210 }, { "epoch": 0.2631578947368421, "grad_norm": 0.5397000908851624, "learning_rate": 1.0792499568567885e-05, "loss": 1.0866, "step": 240 }, { "epoch": 0.29605263157894735, "grad_norm": 0.32380956411361694, "learning_rate": 8.89161800098989e-06, "loss": 0.9383, "step": 270 }, { "epoch": 0.32894736842105265, "grad_norm": 0.2861121892929077, "learning_rate": 7.0307962467172555e-06, "loss": 0.8876, "step": 300 }, { "epoch": 0.3618421052631579, "grad_norm": 0.33690324425697327, "learning_rate": 5.277289252273175e-06, "loss": 0.8535, "step": 330 }, { "epoch": 0.39473684210526316, "grad_norm": 0.3120814859867096, "learning_rate": 3.6944733291547784e-06, "loss": 0.8281, "step": 360 }, { "epoch": 0.4276315789473684, "grad_norm": 0.2872855067253113, "learning_rate": 2.339555568810221e-06, "loss": 0.8277, "step": 390 }, { "epoch": 0.4605263157894737, "grad_norm": 0.39285048842430115, "learning_rate": 1.2615062293021508e-06, "loss": 0.8036, "step": 420 }, { "epoch": 0.4934210526315789, "grad_norm": 0.3163042962551117, "learning_rate": 4.992888225905467e-07, "loss": 0.8061, "step": 450 }, { "epoch": 0.5263157894736842, "grad_norm": 0.29761916399002075, "learning_rate": 8.04518716920466e-08, "loss": 0.7961, "step": 480 } ], "logging_steps": 30, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "total_flos": 2.3224000778521805e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }