{ "epoch": 2.998430141287284, "eval_logits/chosen": -0.58203125, "eval_logits/rejected": 1.25, "eval_logps/chosen": -113.0, "eval_logps/rejected": -171.0, "eval_loss": 0.849609375, "eval_nll_loss": 0.0, "eval_rewards/accuracies": 0.671875, "eval_rewards/chosen": -1.1328125, "eval_rewards/margins": 0.578125, "eval_rewards/rejected": -1.7109375, "eval_runtime": 23.4202, "eval_samples": 2000, "eval_samples_per_second": 85.396, "eval_steps_per_second": 1.366, "total_flos": 0.0, "train_loss": 0.0, "train_runtime": 0.0608, "train_samples": 61134, "train_samples_per_second": 3016192.654, "train_steps_per_second": 47117.218 }