deepseek-8b-orpo-lora / all_results.json
zfz1's picture
End of training
dd36c6b verified
raw
history blame
922 Bytes
{
"epoch": 1.9936102236421727,
"eval_log_odds_chosen": 1.0634132623672485,
"eval_log_odds_ratio": -0.421150267124176,
"eval_logits/chosen": 35.52544021606445,
"eval_logits/rejected": 34.42332077026367,
"eval_logps/chosen": -0.3376733958721161,
"eval_logps/rejected": -0.8398498296737671,
"eval_loss": 0.6817505359649658,
"eval_nll_loss": 0.6413611173629761,
"eval_rewards/accuracies": 0.8088235259056091,
"eval_rewards/chosen": -0.033767346292734146,
"eval_rewards/margins": 0.0502176471054554,
"eval_rewards/rejected": -0.08398497849702835,
"eval_runtime": 252.7054,
"eval_samples": 5398,
"eval_samples_per_second": 21.361,
"eval_steps_per_second": 0.336,
"total_flos": 0.0,
"train_loss": 0.7846963420892373,
"train_runtime": 5577.3844,
"train_samples": 20000,
"train_samples_per_second": 7.172,
"train_steps_per_second": 0.056
}