Wenboz's picture
End of training
e325d79 verified
{
"epoch": 0.9991111111111111,
"eval_logits/chosen": 14.29055118560791,
"eval_logits/rejected": 12.499335289001465,
"eval_logps/chosen": -409.5496520996094,
"eval_logps/rejected": -385.20703125,
"eval_loss": 0.6913684010505676,
"eval_rewards/accuracies": 0.591269850730896,
"eval_rewards/chosen": -0.009797219187021255,
"eval_rewards/margins": 0.004927510395646095,
"eval_rewards/rejected": -0.01472472958266735,
"eval_runtime": 89.7641,
"eval_samples": 1000,
"eval_samples_per_second": 11.14,
"eval_steps_per_second": 0.702,
"total_flos": 0.0,
"train_loss": 0.6931305729197438,
"train_runtime": 7518.6256,
"train_samples": 36000,
"train_samples_per_second": 4.788,
"train_steps_per_second": 0.075
}