|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 10000, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004, |
|
"grad_norm": 28.533812348552967, |
|
"learning_rate": 2e-09, |
|
"logits/chosen": 64.40786743164062, |
|
"logits/rejected": 45.84376525878906, |
|
"logps/chosen": -100.49942779541016, |
|
"logps/rejected": -65.1117172241211, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004, |
|
"grad_norm": 30.859731079131965, |
|
"learning_rate": 2e-08, |
|
"logits/chosen": 57.91438674926758, |
|
"logits/rejected": 56.28672790527344, |
|
"logps/chosen": -55.96101760864258, |
|
"logps/rejected": -68.71676635742188, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": -0.007306650280952454, |
|
"rewards/margins": -0.014283686876296997, |
|
"rewards/rejected": 0.006977038457989693, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 27.85742234963813, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": 58.18536376953125, |
|
"logits/rejected": 55.3450927734375, |
|
"logps/chosen": -54.76381301879883, |
|
"logps/rejected": -60.58549880981445, |
|
"loss": 0.6953, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.011082855053246021, |
|
"rewards/margins": 0.0007482476648874581, |
|
"rewards/rejected": 0.010334606282413006, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.012, |
|
"grad_norm": 23.673728380473765, |
|
"learning_rate": 6e-08, |
|
"logits/chosen": 58.1522102355957, |
|
"logits/rejected": 57.0158576965332, |
|
"logps/chosen": -57.9251823425293, |
|
"logps/rejected": -69.07908630371094, |
|
"loss": 0.6949, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.008253919892013073, |
|
"rewards/margins": 0.016202187165617943, |
|
"rewards/rejected": -0.007948270067572594, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 28.67344451856464, |
|
"learning_rate": 8e-08, |
|
"logits/chosen": 58.062538146972656, |
|
"logits/rejected": 57.1534309387207, |
|
"logps/chosen": -61.0693473815918, |
|
"logps/rejected": -73.38080596923828, |
|
"loss": 0.6963, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.019384900107979774, |
|
"rewards/margins": 0.0025020535103976727, |
|
"rewards/rejected": 0.016882847994565964, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 31.647118654717907, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": 59.191986083984375, |
|
"logits/rejected": 53.941322326660156, |
|
"logps/chosen": -56.7202033996582, |
|
"logps/rejected": -66.81363677978516, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.009077167138457298, |
|
"rewards/margins": 0.010364162735641003, |
|
"rewards/rejected": -0.0012869939673691988, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 24.669256574908005, |
|
"learning_rate": 1.2e-07, |
|
"logits/chosen": 56.26934051513672, |
|
"logits/rejected": 56.80645751953125, |
|
"logps/chosen": -58.25422286987305, |
|
"logps/rejected": -62.55634689331055, |
|
"loss": 0.6951, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.00721455505117774, |
|
"rewards/margins": -0.0019339373102411628, |
|
"rewards/rejected": -0.005280619021505117, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.028, |
|
"grad_norm": 26.74384143919655, |
|
"learning_rate": 1.4e-07, |
|
"logits/chosen": 58.523521423339844, |
|
"logits/rejected": 55.622764587402344, |
|
"logps/chosen": -54.381553649902344, |
|
"logps/rejected": -60.544677734375, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.024661114439368248, |
|
"rewards/margins": -0.0074427248910069466, |
|
"rewards/rejected": -0.017218390479683876, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 31.840440623804586, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": 54.36933135986328, |
|
"logits/rejected": 59.20940017700195, |
|
"logps/chosen": -58.11821365356445, |
|
"logps/rejected": -70.71514892578125, |
|
"loss": 0.7036, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.01636091247200966, |
|
"rewards/margins": -0.008592168800532818, |
|
"rewards/rejected": -0.007768744137138128, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.036, |
|
"grad_norm": 34.438677762408695, |
|
"learning_rate": 1.8e-07, |
|
"logits/chosen": 57.025726318359375, |
|
"logits/rejected": 58.043739318847656, |
|
"logps/chosen": -54.304481506347656, |
|
"logps/rejected": -68.1335678100586, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.009257683530449867, |
|
"rewards/margins": -0.007113204337656498, |
|
"rewards/rejected": -0.002144479425624013, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 27.55087060204859, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": 58.55786895751953, |
|
"logits/rejected": 55.395263671875, |
|
"logps/chosen": -63.9180793762207, |
|
"logps/rejected": -65.21315002441406, |
|
"loss": 0.6993, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.010401496663689613, |
|
"rewards/margins": 0.014750251546502113, |
|
"rewards/rejected": -0.025151750072836876, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.044, |
|
"grad_norm": 34.47770001073974, |
|
"learning_rate": 2.1999999999999998e-07, |
|
"logits/chosen": 56.813232421875, |
|
"logits/rejected": 56.03766632080078, |
|
"logps/chosen": -58.6301155090332, |
|
"logps/rejected": -74.81730651855469, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.02511642314493656, |
|
"rewards/margins": 0.00451306626200676, |
|
"rewards/rejected": -0.02962948940694332, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 30.953399050687445, |
|
"learning_rate": 2.4e-07, |
|
"logits/chosen": 57.8872184753418, |
|
"logits/rejected": 57.22182083129883, |
|
"logps/chosen": -59.11281204223633, |
|
"logps/rejected": -70.58454895019531, |
|
"loss": 0.6964, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04789043590426445, |
|
"rewards/margins": 0.009716681204736233, |
|
"rewards/rejected": -0.057607125490903854, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.052, |
|
"grad_norm": 28.466689861082365, |
|
"learning_rate": 2.6e-07, |
|
"logits/chosen": 57.37153244018555, |
|
"logits/rejected": 56.422645568847656, |
|
"logps/chosen": -62.57783889770508, |
|
"logps/rejected": -73.04331970214844, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.023358307778835297, |
|
"rewards/margins": 0.020040828734636307, |
|
"rewards/rejected": -0.043399132788181305, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.056, |
|
"grad_norm": 28.47666983148757, |
|
"learning_rate": 2.8e-07, |
|
"logits/chosen": 56.3998908996582, |
|
"logits/rejected": 56.23912811279297, |
|
"logps/chosen": -54.915016174316406, |
|
"logps/rejected": -70.63322448730469, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04837559536099434, |
|
"rewards/margins": 0.03748052567243576, |
|
"rewards/rejected": -0.0858561173081398, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 31.41163403514937, |
|
"learning_rate": 3e-07, |
|
"logits/chosen": 58.12103271484375, |
|
"logits/rejected": 56.27421188354492, |
|
"logps/chosen": -63.10313034057617, |
|
"logps/rejected": -66.35491943359375, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08965396881103516, |
|
"rewards/margins": -0.004495841450989246, |
|
"rewards/rejected": -0.08515812456607819, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 26.840900506278157, |
|
"learning_rate": 3.2e-07, |
|
"logits/chosen": 55.839454650878906, |
|
"logits/rejected": 58.34368133544922, |
|
"logps/chosen": -50.59211349487305, |
|
"logps/rejected": -73.50721740722656, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06941857188940048, |
|
"rewards/margins": 0.04276493936777115, |
|
"rewards/rejected": -0.11218351125717163, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.068, |
|
"grad_norm": 27.292091726963864, |
|
"learning_rate": 3.4000000000000003e-07, |
|
"logits/chosen": 57.70301055908203, |
|
"logits/rejected": 56.25288009643555, |
|
"logps/chosen": -63.054412841796875, |
|
"logps/rejected": -68.32513427734375, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.07662694901227951, |
|
"rewards/margins": 0.05604536086320877, |
|
"rewards/rejected": -0.13267230987548828, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.072, |
|
"grad_norm": 30.73299858141547, |
|
"learning_rate": 3.6e-07, |
|
"logits/chosen": 56.193214416503906, |
|
"logits/rejected": 58.65944290161133, |
|
"logps/chosen": -59.230201721191406, |
|
"logps/rejected": -72.97864532470703, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11543898284435272, |
|
"rewards/margins": 0.10617627948522568, |
|
"rewards/rejected": -0.221615269780159, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.076, |
|
"grad_norm": 27.159221184778563, |
|
"learning_rate": 3.7999999999999996e-07, |
|
"logits/chosen": 57.91032791137695, |
|
"logits/rejected": 55.99603271484375, |
|
"logps/chosen": -59.06760787963867, |
|
"logps/rejected": -74.1017837524414, |
|
"loss": 0.642, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12510164082050323, |
|
"rewards/margins": 0.09499747306108475, |
|
"rewards/rejected": -0.22009912133216858, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 27.221778714068638, |
|
"learning_rate": 4e-07, |
|
"logits/chosen": 58.486793518066406, |
|
"logits/rejected": 55.91951370239258, |
|
"logps/chosen": -58.47349166870117, |
|
"logps/rejected": -62.717193603515625, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12836897373199463, |
|
"rewards/margins": 0.1229625716805458, |
|
"rewards/rejected": -0.25133153796195984, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.084, |
|
"grad_norm": 29.03653153648935, |
|
"learning_rate": 4.1999999999999995e-07, |
|
"logits/chosen": 57.81336212158203, |
|
"logits/rejected": 58.15123748779297, |
|
"logps/chosen": -61.33382034301758, |
|
"logps/rejected": -74.703125, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.23162539303302765, |
|
"rewards/margins": 0.11328538507223129, |
|
"rewards/rejected": -0.34491077065467834, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.088, |
|
"grad_norm": 26.95404534183593, |
|
"learning_rate": 4.3999999999999997e-07, |
|
"logits/chosen": 57.90010452270508, |
|
"logits/rejected": 54.545555114746094, |
|
"logps/chosen": -63.3861198425293, |
|
"logps/rejected": -69.0276107788086, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.22368836402893066, |
|
"rewards/margins": 0.12568101286888123, |
|
"rewards/rejected": -0.3493694067001343, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.092, |
|
"grad_norm": 26.883373858805115, |
|
"learning_rate": 4.6e-07, |
|
"logits/chosen": 56.64574432373047, |
|
"logits/rejected": 55.44426345825195, |
|
"logps/chosen": -54.02278518676758, |
|
"logps/rejected": -70.68146514892578, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3271670937538147, |
|
"rewards/margins": 0.13781145215034485, |
|
"rewards/rejected": -0.46497854590415955, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 26.103399727840255, |
|
"learning_rate": 4.8e-07, |
|
"logits/chosen": 56.06645965576172, |
|
"logits/rejected": 53.25556182861328, |
|
"logps/chosen": -59.9603271484375, |
|
"logps/rejected": -73.2823257446289, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3069990277290344, |
|
"rewards/margins": 0.23678632080554962, |
|
"rewards/rejected": -0.5437853336334229, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 23.722260965256044, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 56.92375946044922, |
|
"logits/rejected": 55.26447677612305, |
|
"logps/chosen": -69.1079330444336, |
|
"logps/rejected": -73.78802490234375, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4947397708892822, |
|
"rewards/margins": 0.14389055967330933, |
|
"rewards/rejected": -0.6386303305625916, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.104, |
|
"grad_norm": 25.422708939657227, |
|
"learning_rate": 4.99975631002326e-07, |
|
"logits/chosen": 54.239952087402344, |
|
"logits/rejected": 54.537017822265625, |
|
"logps/chosen": -58.73674392700195, |
|
"logps/rejected": -69.70175170898438, |
|
"loss": 0.5727, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4754973351955414, |
|
"rewards/margins": 0.28132364153862, |
|
"rewards/rejected": -0.7568209767341614, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.108, |
|
"grad_norm": 26.17681951761786, |
|
"learning_rate": 4.999025287600885e-07, |
|
"logits/chosen": 56.4760627746582, |
|
"logits/rejected": 54.777679443359375, |
|
"logps/chosen": -68.2262954711914, |
|
"logps/rejected": -77.97975158691406, |
|
"loss": 0.558, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5877800583839417, |
|
"rewards/margins": 0.398207426071167, |
|
"rewards/rejected": -0.9859874844551086, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.112, |
|
"grad_norm": 20.299221772759086, |
|
"learning_rate": 4.997807075247145e-07, |
|
"logits/chosen": 55.838172912597656, |
|
"logits/rejected": 55.217002868652344, |
|
"logps/chosen": -58.478309631347656, |
|
"logps/rejected": -74.65936279296875, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5783756971359253, |
|
"rewards/margins": 0.5172783136367798, |
|
"rewards/rejected": -1.095654010772705, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.116, |
|
"grad_norm": 28.36770834985096, |
|
"learning_rate": 4.996101910454953e-07, |
|
"logits/chosen": 56.4325065612793, |
|
"logits/rejected": 54.16033172607422, |
|
"logps/chosen": -69.69315338134766, |
|
"logps/rejected": -79.90055847167969, |
|
"loss": 0.5267, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7261027097702026, |
|
"rewards/margins": 0.5981040596961975, |
|
"rewards/rejected": -1.324206829071045, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 25.02377762418046, |
|
"learning_rate": 4.99391012564956e-07, |
|
"logits/chosen": 53.800933837890625, |
|
"logits/rejected": 55.656532287597656, |
|
"logps/chosen": -66.37216186523438, |
|
"logps/rejected": -82.07975769042969, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7666959762573242, |
|
"rewards/margins": 0.5859344005584717, |
|
"rewards/rejected": -1.352630376815796, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.124, |
|
"grad_norm": 29.446782373035884, |
|
"learning_rate": 4.991232148123761e-07, |
|
"logits/chosen": 57.31342697143555, |
|
"logits/rejected": 54.00205612182617, |
|
"logps/chosen": -72.55195617675781, |
|
"logps/rejected": -85.0060806274414, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.089627742767334, |
|
"rewards/margins": 0.5522868633270264, |
|
"rewards/rejected": -1.64191472530365, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 31.47681515659833, |
|
"learning_rate": 4.988068499954577e-07, |
|
"logits/chosen": 55.69438934326172, |
|
"logits/rejected": 54.8391227722168, |
|
"logps/chosen": -70.90913391113281, |
|
"logps/rejected": -82.49961853027344, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8803223371505737, |
|
"rewards/margins": 0.687156081199646, |
|
"rewards/rejected": -1.5674786567687988, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.132, |
|
"grad_norm": 29.597337732185277, |
|
"learning_rate": 4.984419797901491e-07, |
|
"logits/chosen": 53.4282112121582, |
|
"logits/rejected": 54.79206466674805, |
|
"logps/chosen": -69.86516571044922, |
|
"logps/rejected": -90.27011108398438, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9048943519592285, |
|
"rewards/margins": 1.1756442785263062, |
|
"rewards/rejected": -2.080538511276245, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.136, |
|
"grad_norm": 17.009359229526957, |
|
"learning_rate": 4.980286753286194e-07, |
|
"logits/chosen": 52.42332077026367, |
|
"logits/rejected": 57.04686737060547, |
|
"logps/chosen": -64.6177749633789, |
|
"logps/rejected": -89.59310150146484, |
|
"loss": 0.4411, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1452090740203857, |
|
"rewards/margins": 0.9245441555976868, |
|
"rewards/rejected": -2.0697531700134277, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 25.909018906834667, |
|
"learning_rate": 4.975670171853925e-07, |
|
"logits/chosen": 56.42598342895508, |
|
"logits/rejected": 53.42926025390625, |
|
"logps/chosen": -69.07212829589844, |
|
"logps/rejected": -84.19108581542969, |
|
"loss": 0.4175, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2340478897094727, |
|
"rewards/margins": 1.0201036930084229, |
|
"rewards/rejected": -2.2541518211364746, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 27.75848608177815, |
|
"learning_rate": 4.970570953616382e-07, |
|
"logits/chosen": 53.7008056640625, |
|
"logits/rejected": 55.94395065307617, |
|
"logps/chosen": -66.99665832519531, |
|
"logps/rejected": -87.76789855957031, |
|
"loss": 0.4435, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3015058040618896, |
|
"rewards/margins": 0.9406806826591492, |
|
"rewards/rejected": -2.2421865463256836, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.148, |
|
"grad_norm": 24.273863705867576, |
|
"learning_rate": 4.964990092676262e-07, |
|
"logits/chosen": 49.97655487060547, |
|
"logits/rejected": 58.69891357421875, |
|
"logps/chosen": -68.9350814819336, |
|
"logps/rejected": -98.9703369140625, |
|
"loss": 0.3817, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.4265937805175781, |
|
"rewards/margins": 1.1566370725631714, |
|
"rewards/rejected": -2.583230495452881, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.152, |
|
"grad_norm": 37.00653796376565, |
|
"learning_rate": 4.958928677033465e-07, |
|
"logits/chosen": 55.870933532714844, |
|
"logits/rejected": 54.31379318237305, |
|
"logps/chosen": -77.0854263305664, |
|
"logps/rejected": -90.93013000488281, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7595773935317993, |
|
"rewards/margins": 0.8833327293395996, |
|
"rewards/rejected": -2.6429102420806885, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.156, |
|
"grad_norm": 28.256248306216513, |
|
"learning_rate": 4.952387888372978e-07, |
|
"logits/chosen": 53.375587463378906, |
|
"logits/rejected": 52.61153030395508, |
|
"logps/chosen": -78.0414810180664, |
|
"logps/rejected": -98.44496154785156, |
|
"loss": 0.38, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7100744247436523, |
|
"rewards/margins": 1.1639991998672485, |
|
"rewards/rejected": -2.8740735054016113, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 29.2072173920723, |
|
"learning_rate": 4.945369001834514e-07, |
|
"logits/chosen": 51.90997314453125, |
|
"logits/rejected": 53.769920349121094, |
|
"logps/chosen": -66.5736083984375, |
|
"logps/rejected": -94.8015365600586, |
|
"loss": 0.4308, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6438815593719482, |
|
"rewards/margins": 1.1793160438537598, |
|
"rewards/rejected": -2.823197841644287, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.164, |
|
"grad_norm": 20.246428107882906, |
|
"learning_rate": 4.937873385763907e-07, |
|
"logits/chosen": 51.662147521972656, |
|
"logits/rejected": 52.028175354003906, |
|
"logps/chosen": -79.66545867919922, |
|
"logps/rejected": -107.46966552734375, |
|
"loss": 0.394, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8286956548690796, |
|
"rewards/margins": 1.288888931274414, |
|
"rewards/rejected": -3.117584705352783, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.168, |
|
"grad_norm": 27.307865717439135, |
|
"learning_rate": 4.929902501446366e-07, |
|
"logits/chosen": 50.78424835205078, |
|
"logits/rejected": 50.39312744140625, |
|
"logps/chosen": -75.19126892089844, |
|
"logps/rejected": -95.62612915039062, |
|
"loss": 0.4315, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0071020126342773, |
|
"rewards/margins": 1.3031747341156006, |
|
"rewards/rejected": -3.310276746749878, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.172, |
|
"grad_norm": 20.03898118642927, |
|
"learning_rate": 4.921457902821578e-07, |
|
"logits/chosen": 53.468177795410156, |
|
"logits/rejected": 53.30914306640625, |
|
"logps/chosen": -82.42778015136719, |
|
"logps/rejected": -109.83222961425781, |
|
"loss": 0.3376, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6454699039459229, |
|
"rewards/margins": 1.615443468093872, |
|
"rewards/rejected": -3.260913133621216, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.176, |
|
"grad_norm": 30.325957397734495, |
|
"learning_rate": 4.912541236180778e-07, |
|
"logits/chosen": 51.10878372192383, |
|
"logits/rejected": 53.10695266723633, |
|
"logps/chosen": -74.79537200927734, |
|
"logps/rejected": -100.06138610839844, |
|
"loss": 0.3929, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8177331686019897, |
|
"rewards/margins": 1.257211446762085, |
|
"rewards/rejected": -3.0749447345733643, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 14.262220601317786, |
|
"learning_rate": 4.903154239845797e-07, |
|
"logits/chosen": 51.69508743286133, |
|
"logits/rejected": 52.68989181518555, |
|
"logps/chosen": -68.33168029785156, |
|
"logps/rejected": -107.64765930175781, |
|
"loss": 0.3691, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.747357726097107, |
|
"rewards/margins": 1.74077570438385, |
|
"rewards/rejected": -3.488133192062378, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.184, |
|
"grad_norm": 22.979886521568336, |
|
"learning_rate": 4.893298743830167e-07, |
|
"logits/chosen": 52.4492301940918, |
|
"logits/rejected": 53.06267547607422, |
|
"logps/chosen": -81.17327117919922, |
|
"logps/rejected": -111.0452651977539, |
|
"loss": 0.3741, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.073838710784912, |
|
"rewards/margins": 1.7042049169540405, |
|
"rewards/rejected": -3.778043031692505, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.188, |
|
"grad_norm": 14.722797570982818, |
|
"learning_rate": 4.882976669482367e-07, |
|
"logits/chosen": 50.58995819091797, |
|
"logits/rejected": 51.509681701660156, |
|
"logps/chosen": -79.2942123413086, |
|
"logps/rejected": -102.22583770751953, |
|
"loss": 0.3359, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9251492023468018, |
|
"rewards/margins": 1.7899284362792969, |
|
"rewards/rejected": -3.7150776386260986, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 14.245338009793953, |
|
"learning_rate": 4.872190029111241e-07, |
|
"logits/chosen": 50.895538330078125, |
|
"logits/rejected": 54.43767166137695, |
|
"logps/chosen": -79.74618530273438, |
|
"logps/rejected": -120.28243255615234, |
|
"loss": 0.2751, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.2277004718780518, |
|
"rewards/margins": 1.9976272583007812, |
|
"rewards/rejected": -4.225327491760254, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.196, |
|
"grad_norm": 21.050016087172274, |
|
"learning_rate": 4.860940925593702e-07, |
|
"logits/chosen": 48.723018646240234, |
|
"logits/rejected": 50.939937591552734, |
|
"logps/chosen": -77.64906311035156, |
|
"logps/rejected": -111.71051025390625, |
|
"loss": 0.2957, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.465684652328491, |
|
"rewards/margins": 2.0183236598968506, |
|
"rewards/rejected": -4.484008312225342, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 50.320855504854826, |
|
"learning_rate": 4.849231551964771e-07, |
|
"logits/chosen": 51.236595153808594, |
|
"logits/rejected": 50.26428985595703, |
|
"logps/chosen": -92.86045837402344, |
|
"logps/rejected": -112.11564636230469, |
|
"loss": 0.3985, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.6628010272979736, |
|
"rewards/margins": 1.7543842792510986, |
|
"rewards/rejected": -4.417185306549072, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.204, |
|
"grad_norm": 14.73313528703832, |
|
"learning_rate": 4.837064190990036e-07, |
|
"logits/chosen": 55.05743408203125, |
|
"logits/rejected": 50.69791793823242, |
|
"logps/chosen": -98.44610595703125, |
|
"logps/rejected": -122.69828796386719, |
|
"loss": 0.3647, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.5547454357147217, |
|
"rewards/margins": 2.1788697242736816, |
|
"rewards/rejected": -4.733614921569824, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.208, |
|
"grad_norm": 27.57905277549734, |
|
"learning_rate": 4.824441214720628e-07, |
|
"logits/chosen": 50.13386535644531, |
|
"logits/rejected": 50.777809143066406, |
|
"logps/chosen": -86.73146057128906, |
|
"logps/rejected": -112.5364990234375, |
|
"loss": 0.3732, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.320972442626953, |
|
"rewards/margins": 2.0467121601104736, |
|
"rewards/rejected": -4.367684364318848, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.212, |
|
"grad_norm": 25.097188193390338, |
|
"learning_rate": 4.811365084030783e-07, |
|
"logits/chosen": 49.33405685424805, |
|
"logits/rejected": 50.48732376098633, |
|
"logps/chosen": -75.11846160888672, |
|
"logps/rejected": -104.57139587402344, |
|
"loss": 0.3868, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.4332354068756104, |
|
"rewards/margins": 1.9999496936798096, |
|
"rewards/rejected": -4.433184623718262, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.216, |
|
"grad_norm": 32.7903194698391, |
|
"learning_rate": 4.797838348138086e-07, |
|
"logits/chosen": 50.36452102661133, |
|
"logits/rejected": 50.02265167236328, |
|
"logps/chosen": -90.46897888183594, |
|
"logps/rejected": -112.5868911743164, |
|
"loss": 0.3413, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.6965606212615967, |
|
"rewards/margins": 1.6849288940429688, |
|
"rewards/rejected": -4.381489276885986, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 24.213068673102388, |
|
"learning_rate": 4.783863644106502e-07, |
|
"logits/chosen": 52.17128372192383, |
|
"logits/rejected": 50.315284729003906, |
|
"logps/chosen": -93.4994125366211, |
|
"logps/rejected": -119.5452880859375, |
|
"loss": 0.3351, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.50398325920105, |
|
"rewards/margins": 2.50455904006958, |
|
"rewards/rejected": -5.008542537689209, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 29.49589281477644, |
|
"learning_rate": 4.769443696332272e-07, |
|
"logits/chosen": 47.702674865722656, |
|
"logits/rejected": 50.343238830566406, |
|
"logps/chosen": -83.9803695678711, |
|
"logps/rejected": -123.3960189819336, |
|
"loss": 0.3181, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.564993381500244, |
|
"rewards/margins": 2.2127933502197266, |
|
"rewards/rejected": -4.7777862548828125, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.228, |
|
"grad_norm": 19.69678290974152, |
|
"learning_rate": 4.7545813160127845e-07, |
|
"logits/chosen": 50.214874267578125, |
|
"logits/rejected": 48.206787109375, |
|
"logps/chosen": -90.08515930175781, |
|
"logps/rejected": -118.11824798583984, |
|
"loss": 0.4009, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.8223087787628174, |
|
"rewards/margins": 2.011645555496216, |
|
"rewards/rejected": -4.833954811096191, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.232, |
|
"grad_norm": 13.246157822107891, |
|
"learning_rate": 4.739279400598532e-07, |
|
"logits/chosen": 51.175697326660156, |
|
"logits/rejected": 48.75572967529297, |
|
"logps/chosen": -82.0949935913086, |
|
"logps/rejected": -111.55613708496094, |
|
"loss": 0.3156, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.699707269668579, |
|
"rewards/margins": 1.895646333694458, |
|
"rewards/rejected": -4.595353126525879, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.236, |
|
"grad_norm": 22.333620027716158, |
|
"learning_rate": 4.7235409332282436e-07, |
|
"logits/chosen": 49.49880599975586, |
|
"logits/rejected": 51.62006759643555, |
|
"logps/chosen": -73.26454162597656, |
|
"logps/rejected": -111.3040771484375, |
|
"loss": 0.3451, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.179260730743408, |
|
"rewards/margins": 2.1387388706207275, |
|
"rewards/rejected": -4.317999839782715, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 26.7757211390623, |
|
"learning_rate": 4.707368982147317e-07, |
|
"logits/chosen": 47.9814567565918, |
|
"logits/rejected": 51.40210723876953, |
|
"logps/chosen": -91.81172180175781, |
|
"logps/rejected": -128.8383331298828, |
|
"loss": 0.2713, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.9874844551086426, |
|
"rewards/margins": 2.1201720237731934, |
|
"rewards/rejected": -5.107656955718994, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.244, |
|
"grad_norm": 40.79432104607236, |
|
"learning_rate": 4.6907667001096585e-07, |
|
"logits/chosen": 48.35408401489258, |
|
"logits/rejected": 47.354530334472656, |
|
"logps/chosen": -90.21234893798828, |
|
"logps/rejected": -118.80155944824219, |
|
"loss": 0.2689, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.0216469764709473, |
|
"rewards/margins": 2.228628635406494, |
|
"rewards/rejected": -5.250275611877441, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.248, |
|
"grad_norm": 31.8455689759253, |
|
"learning_rate": 4.6737373237630473e-07, |
|
"logits/chosen": 48.34640121459961, |
|
"logits/rejected": 51.8663444519043, |
|
"logps/chosen": -85.09153747558594, |
|
"logps/rejected": -127.3193130493164, |
|
"loss": 0.3664, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.7675275802612305, |
|
"rewards/margins": 2.4404561519622803, |
|
"rewards/rejected": -5.207983493804932, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.252, |
|
"grad_norm": 19.397624289021156, |
|
"learning_rate": 4.6562841730181435e-07, |
|
"logits/chosen": 48.62126159667969, |
|
"logits/rejected": 48.2672119140625, |
|
"logps/chosen": -90.38846588134766, |
|
"logps/rejected": -122.53373718261719, |
|
"loss": 0.3227, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.1435444355010986, |
|
"rewards/margins": 2.2223925590515137, |
|
"rewards/rejected": -5.365937232971191, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 34.367583617826874, |
|
"learning_rate": 4.6384106504012665e-07, |
|
"logits/chosen": 48.072669982910156, |
|
"logits/rejected": 49.07495880126953, |
|
"logps/chosen": -93.60935974121094, |
|
"logps/rejected": -128.94163513183594, |
|
"loss": 0.2825, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.1240580081939697, |
|
"rewards/margins": 2.519449472427368, |
|
"rewards/rejected": -5.64350700378418, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 27.938891622888274, |
|
"learning_rate": 4.6201202403910643e-07, |
|
"logits/chosen": 45.11116027832031, |
|
"logits/rejected": 51.54563522338867, |
|
"logps/chosen": -76.74453735351562, |
|
"logps/rejected": -129.6508331298828, |
|
"loss": 0.2858, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.9022960662841797, |
|
"rewards/margins": 2.7343924045562744, |
|
"rewards/rejected": -5.636688709259033, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.264, |
|
"grad_norm": 33.20013982791981, |
|
"learning_rate": 4.6014165087392105e-07, |
|
"logits/chosen": 47.67237854003906, |
|
"logits/rejected": 46.87964630126953, |
|
"logps/chosen": -87.0808334350586, |
|
"logps/rejected": -122.21199035644531, |
|
"loss": 0.2727, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.771348476409912, |
|
"rewards/margins": 2.724174976348877, |
|
"rewards/rejected": -5.495523452758789, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.268, |
|
"grad_norm": 29.256933473551793, |
|
"learning_rate": 4.582303101775248e-07, |
|
"logits/chosen": 49.69163513183594, |
|
"logits/rejected": 45.95726013183594, |
|
"logps/chosen": -93.39743041992188, |
|
"logps/rejected": -120.0958023071289, |
|
"loss": 0.2879, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.911816120147705, |
|
"rewards/margins": 2.617412805557251, |
|
"rewards/rejected": -5.529229164123535, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.272, |
|
"grad_norm": 43.80379099330064, |
|
"learning_rate": 4.5627837456957374e-07, |
|
"logits/chosen": 48.20374298095703, |
|
"logits/rejected": 47.810935974121094, |
|
"logps/chosen": -94.90364074707031, |
|
"logps/rejected": -125.94508361816406, |
|
"loss": 0.3458, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.2113685607910156, |
|
"rewards/margins": 2.2549712657928467, |
|
"rewards/rejected": -5.466340065002441, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.276, |
|
"grad_norm": 38.68916019817628, |
|
"learning_rate": 4.542862245837821e-07, |
|
"logits/chosen": 48.5870246887207, |
|
"logits/rejected": 47.81504440307617, |
|
"logps/chosen": -102.28499603271484, |
|
"logps/rejected": -129.62196350097656, |
|
"loss": 0.2492, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.5993125438690186, |
|
"rewards/margins": 2.266263484954834, |
|
"rewards/rejected": -5.865576267242432, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 21.578470273867236, |
|
"learning_rate": 4.5225424859373684e-07, |
|
"logits/chosen": 49.09315490722656, |
|
"logits/rejected": 48.342491149902344, |
|
"logps/chosen": -98.66539001464844, |
|
"logps/rejected": -139.3084259033203, |
|
"loss": 0.215, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.2259106636047363, |
|
"rewards/margins": 3.0345213413238525, |
|
"rewards/rejected": -6.260432243347168, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.284, |
|
"grad_norm": 16.808420798200792, |
|
"learning_rate": 4.501828427371833e-07, |
|
"logits/chosen": 46.62156295776367, |
|
"logits/rejected": 48.56031036376953, |
|
"logps/chosen": -92.45174407958984, |
|
"logps/rejected": -137.70553588867188, |
|
"loss": 0.3082, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.2984604835510254, |
|
"rewards/margins": 2.9785618782043457, |
|
"rewards/rejected": -6.277022361755371, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 25.497403458788426, |
|
"learning_rate": 4.4807241083879764e-07, |
|
"logits/chosen": 44.841453552246094, |
|
"logits/rejected": 48.752197265625, |
|
"logps/chosen": -88.73442077636719, |
|
"logps/rejected": -139.46563720703125, |
|
"loss": 0.2662, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.5202784538269043, |
|
"rewards/margins": 2.9266552925109863, |
|
"rewards/rejected": -6.446933746337891, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.292, |
|
"grad_norm": 26.661142376929647, |
|
"learning_rate": 4.459233643314599e-07, |
|
"logits/chosen": 48.446815490722656, |
|
"logits/rejected": 48.538719177246094, |
|
"logps/chosen": -84.2086410522461, |
|
"logps/rejected": -123.36454772949219, |
|
"loss": 0.2538, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9666292667388916, |
|
"rewards/margins": 2.7437281608581543, |
|
"rewards/rejected": -5.710357189178467, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.296, |
|
"grad_norm": 8.139706838501583, |
|
"learning_rate": 4.437361221760449e-07, |
|
"logits/chosen": 48.00127410888672, |
|
"logits/rejected": 48.78575897216797, |
|
"logps/chosen": -96.60485076904297, |
|
"logps/rejected": -124.87052917480469, |
|
"loss": 0.2151, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.223308563232422, |
|
"rewards/margins": 2.8362784385681152, |
|
"rewards/rejected": -6.059587478637695, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 34.66768947372861, |
|
"learning_rate": 4.415111107797445e-07, |
|
"logits/chosen": 46.998199462890625, |
|
"logits/rejected": 46.860145568847656, |
|
"logps/chosen": -99.70954895019531, |
|
"logps/rejected": -132.5911407470703, |
|
"loss": 0.3284, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.904909133911133, |
|
"rewards/margins": 2.286958932876587, |
|
"rewards/rejected": -6.191867828369141, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.304, |
|
"grad_norm": 27.280956863429665, |
|
"learning_rate": 4.392487639129391e-07, |
|
"logits/chosen": 44.16685485839844, |
|
"logits/rejected": 46.52399826049805, |
|
"logps/chosen": -89.0407485961914, |
|
"logps/rejected": -125.99409484863281, |
|
"loss": 0.2755, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.4610557556152344, |
|
"rewards/margins": 2.747231960296631, |
|
"rewards/rejected": -6.208288669586182, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.308, |
|
"grad_norm": 45.42173052837384, |
|
"learning_rate": 4.36949522624633e-07, |
|
"logits/chosen": 46.8876838684082, |
|
"logits/rejected": 45.308528900146484, |
|
"logps/chosen": -98.10558319091797, |
|
"logps/rejected": -131.274658203125, |
|
"loss": 0.2579, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.8448243141174316, |
|
"rewards/margins": 2.463934898376465, |
|
"rewards/rejected": -6.3087592124938965, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.312, |
|
"grad_norm": 44.27869354906801, |
|
"learning_rate": 4.34613835156471e-07, |
|
"logits/chosen": 46.87074279785156, |
|
"logits/rejected": 45.099308013916016, |
|
"logps/chosen": -111.92747497558594, |
|
"logps/rejected": -131.96014404296875, |
|
"loss": 0.3277, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.581133842468262, |
|
"rewards/margins": 2.0176618099212646, |
|
"rewards/rejected": -6.5987958908081055, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.316, |
|
"grad_norm": 22.111566033012554, |
|
"learning_rate": 4.3224215685535287e-07, |
|
"logits/chosen": 45.65962219238281, |
|
"logits/rejected": 45.8907585144043, |
|
"logps/chosen": -99.42485809326172, |
|
"logps/rejected": -141.40390014648438, |
|
"loss": 0.2526, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.320735454559326, |
|
"rewards/margins": 2.8679370880126953, |
|
"rewards/rejected": -7.188672065734863, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 52.35572333396793, |
|
"learning_rate": 4.2983495008466273e-07, |
|
"logits/chosen": 44.55757141113281, |
|
"logits/rejected": 47.1688232421875, |
|
"logps/chosen": -100.08128356933594, |
|
"logps/rejected": -150.44851684570312, |
|
"loss": 0.256, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.098430633544922, |
|
"rewards/margins": 3.171543598175049, |
|
"rewards/rejected": -7.269974708557129, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.324, |
|
"grad_norm": 33.21188816300218, |
|
"learning_rate": 4.273926841341302e-07, |
|
"logits/chosen": 46.28192138671875, |
|
"logits/rejected": 43.64000701904297, |
|
"logps/chosen": -99.21965789794922, |
|
"logps/rejected": -129.105712890625, |
|
"loss": 0.2544, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.7107014656066895, |
|
"rewards/margins": 3.0634143352508545, |
|
"rewards/rejected": -6.774115085601807, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.328, |
|
"grad_norm": 12.094382688192468, |
|
"learning_rate": 4.249158351283413e-07, |
|
"logits/chosen": 44.60408020019531, |
|
"logits/rejected": 46.49042510986328, |
|
"logps/chosen": -90.05680847167969, |
|
"logps/rejected": -142.6536102294922, |
|
"loss": 0.1868, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.59674072265625, |
|
"rewards/margins": 3.3266303539276123, |
|
"rewards/rejected": -6.923371315002441, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.332, |
|
"grad_norm": 4.00271990880729, |
|
"learning_rate": 4.224048859339174e-07, |
|
"logits/chosen": 45.333580017089844, |
|
"logits/rejected": 45.82084274291992, |
|
"logps/chosen": -102.02961730957031, |
|
"logps/rejected": -137.76678466796875, |
|
"loss": 0.2172, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.266281604766846, |
|
"rewards/margins": 2.6827759742736816, |
|
"rewards/rejected": -6.949057102203369, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 20.780824485200213, |
|
"learning_rate": 4.1986032606537916e-07, |
|
"logits/chosen": 46.636512756347656, |
|
"logits/rejected": 44.504981994628906, |
|
"logps/chosen": -97.64836120605469, |
|
"logps/rejected": -135.35731506347656, |
|
"loss": 0.2624, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.672236680984497, |
|
"rewards/margins": 3.0585567951202393, |
|
"rewards/rejected": -6.730792999267578, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 42.59198624354184, |
|
"learning_rate": 4.172826515897145e-07, |
|
"logits/chosen": 43.75339126586914, |
|
"logits/rejected": 47.46000289916992, |
|
"logps/chosen": -104.68599700927734, |
|
"logps/rejected": -141.9625701904297, |
|
"loss": 0.2583, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.268365859985352, |
|
"rewards/margins": 2.5187840461730957, |
|
"rewards/rejected": -6.787149906158447, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.344, |
|
"grad_norm": 20.008127683146643, |
|
"learning_rate": 4.146723650296701e-07, |
|
"logits/chosen": 43.556148529052734, |
|
"logits/rejected": 43.975608825683594, |
|
"logps/chosen": -95.18064880371094, |
|
"logps/rejected": -139.5054931640625, |
|
"loss": 0.3563, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.966346025466919, |
|
"rewards/margins": 2.8288350105285645, |
|
"rewards/rejected": -6.795180320739746, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.348, |
|
"grad_norm": 20.334985858976758, |
|
"learning_rate": 4.120299752657827e-07, |
|
"logits/chosen": 43.38778305053711, |
|
"logits/rejected": 45.5791015625, |
|
"logps/chosen": -98.27373504638672, |
|
"logps/rejected": -140.9309539794922, |
|
"loss": 0.1811, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.264477252960205, |
|
"rewards/margins": 3.044534206390381, |
|
"rewards/rejected": -7.309010982513428, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 28.757062776427272, |
|
"learning_rate": 4.0935599743717244e-07, |
|
"logits/chosen": 44.6265983581543, |
|
"logits/rejected": 44.702178955078125, |
|
"logps/chosen": -97.5745620727539, |
|
"logps/rejected": -126.3858413696289, |
|
"loss": 0.2602, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.221047401428223, |
|
"rewards/margins": 2.2703094482421875, |
|
"rewards/rejected": -6.49135684967041, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.356, |
|
"grad_norm": 14.773459336240398, |
|
"learning_rate": 4.066509528411151e-07, |
|
"logits/chosen": 45.27838897705078, |
|
"logits/rejected": 43.685638427734375, |
|
"logps/chosen": -95.4419937133789, |
|
"logps/rejected": -131.39169311523438, |
|
"loss": 0.198, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.907078981399536, |
|
"rewards/margins": 3.0583863258361816, |
|
"rewards/rejected": -6.9654645919799805, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 52.381278515402784, |
|
"learning_rate": 4.039153688314145e-07, |
|
"logits/chosen": 41.99245834350586, |
|
"logits/rejected": 46.48981857299805, |
|
"logps/chosen": -99.83820343017578, |
|
"logps/rejected": -151.70034790039062, |
|
"loss": 0.2602, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.080926418304443, |
|
"rewards/margins": 3.463571071624756, |
|
"rewards/rejected": -7.544497489929199, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.364, |
|
"grad_norm": 18.192241188575945, |
|
"learning_rate": 4.0114977871559377e-07, |
|
"logits/chosen": 42.960872650146484, |
|
"logits/rejected": 43.536293029785156, |
|
"logps/chosen": -89.50424194335938, |
|
"logps/rejected": -141.3973388671875, |
|
"loss": 0.1604, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.7081432342529297, |
|
"rewards/margins": 3.6759707927703857, |
|
"rewards/rejected": -7.3841142654418945, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.368, |
|
"grad_norm": 40.31098563989808, |
|
"learning_rate": 3.983547216509254e-07, |
|
"logits/chosen": 42.86689758300781, |
|
"logits/rejected": 45.748714447021484, |
|
"logps/chosen": -103.40921783447266, |
|
"logps/rejected": -158.37911987304688, |
|
"loss": 0.2078, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.184783458709717, |
|
"rewards/margins": 3.848146438598633, |
|
"rewards/rejected": -8.032930374145508, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.372, |
|
"grad_norm": 10.07444543448383, |
|
"learning_rate": 3.9553074253932233e-07, |
|
"logits/chosen": 42.905662536621094, |
|
"logits/rejected": 41.272789001464844, |
|
"logps/chosen": -100.79463195800781, |
|
"logps/rejected": -128.82476806640625, |
|
"loss": 0.2664, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.691293239593506, |
|
"rewards/margins": 2.189171314239502, |
|
"rewards/rejected": -6.880465507507324, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.376, |
|
"grad_norm": 20.93691989678441, |
|
"learning_rate": 3.9267839192110797e-07, |
|
"logits/chosen": 43.836544036865234, |
|
"logits/rejected": 42.157772064208984, |
|
"logps/chosen": -106.8996810913086, |
|
"logps/rejected": -141.3354034423828, |
|
"loss": 0.2145, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.653143405914307, |
|
"rewards/margins": 3.2738826274871826, |
|
"rewards/rejected": -7.927026271820068, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 31.706598510789135, |
|
"learning_rate": 3.8979822586768666e-07, |
|
"logits/chosen": 41.932212829589844, |
|
"logits/rejected": 42.14612579345703, |
|
"logps/chosen": -99.3321304321289, |
|
"logps/rejected": -132.33554077148438, |
|
"loss": 0.2512, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.518923282623291, |
|
"rewards/margins": 2.864084243774414, |
|
"rewards/rejected": -7.383008003234863, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 37.02742534141979, |
|
"learning_rate": 3.8689080587313755e-07, |
|
"logits/chosen": 40.756717681884766, |
|
"logits/rejected": 43.150856018066406, |
|
"logps/chosen": -94.36783599853516, |
|
"logps/rejected": -147.3682403564453, |
|
"loss": 0.2317, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.52658748626709, |
|
"rewards/margins": 3.43182373046875, |
|
"rewards/rejected": -7.95841121673584, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.388, |
|
"grad_norm": 46.22429207605269, |
|
"learning_rate": 3.839566987447491e-07, |
|
"logits/chosen": 40.86772918701172, |
|
"logits/rejected": 43.2618408203125, |
|
"logps/chosen": -96.85578918457031, |
|
"logps/rejected": -158.2089080810547, |
|
"loss": 0.2296, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.181971073150635, |
|
"rewards/margins": 3.6908957958221436, |
|
"rewards/rejected": -7.872866630554199, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.392, |
|
"grad_norm": 53.76947098726966, |
|
"learning_rate": 3.809964764925198e-07, |
|
"logits/chosen": 43.136131286621094, |
|
"logits/rejected": 43.18577194213867, |
|
"logps/chosen": -109.56819915771484, |
|
"logps/rejected": -163.19302368164062, |
|
"loss": 0.197, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.513330459594727, |
|
"rewards/margins": 4.060441493988037, |
|
"rewards/rejected": -8.573771476745605, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.396, |
|
"grad_norm": 32.036494164799386, |
|
"learning_rate": 3.780107162176429e-07, |
|
"logits/chosen": 42.74140167236328, |
|
"logits/rejected": 44.60755157470703, |
|
"logps/chosen": -110.55721282958984, |
|
"logps/rejected": -153.0295867919922, |
|
"loss": 0.2728, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -4.982661247253418, |
|
"rewards/margins": 2.932508945465088, |
|
"rewards/rejected": -7.915169715881348, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 52.99187462304637, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 40.694297790527344, |
|
"logits/rejected": 43.3404426574707, |
|
"logps/chosen": -113.76619720458984, |
|
"logps/rejected": -155.80531311035156, |
|
"loss": 0.2724, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -5.276650428771973, |
|
"rewards/margins": 3.189429998397827, |
|
"rewards/rejected": -8.466080665588379, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.404, |
|
"grad_norm": 25.664982214116712, |
|
"learning_rate": 3.7196491478468316e-07, |
|
"logits/chosen": 41.09137725830078, |
|
"logits/rejected": 44.35279083251953, |
|
"logps/chosen": -101.16764068603516, |
|
"logps/rejected": -150.22509765625, |
|
"loss": 0.1726, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.6303510665893555, |
|
"rewards/margins": 3.4471046924591064, |
|
"rewards/rejected": -8.077455520629883, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.408, |
|
"grad_norm": 4.601273961866918, |
|
"learning_rate": 3.689060522675688e-07, |
|
"logits/chosen": 42.17076873779297, |
|
"logits/rejected": 43.80558776855469, |
|
"logps/chosen": -114.15092468261719, |
|
"logps/rejected": -155.05160522460938, |
|
"loss": 0.1537, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.794250965118408, |
|
"rewards/margins": 4.128665447235107, |
|
"rewards/rejected": -8.922918319702148, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.412, |
|
"grad_norm": 23.199840684684883, |
|
"learning_rate": 3.658240087799654e-07, |
|
"logits/chosen": 39.78450393676758, |
|
"logits/rejected": 42.42991638183594, |
|
"logps/chosen": -101.93121337890625, |
|
"logps/rejected": -154.7505340576172, |
|
"loss": 0.1553, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.455100059509277, |
|
"rewards/margins": 4.042738914489746, |
|
"rewards/rejected": -8.497838973999023, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 31.277129565852135, |
|
"learning_rate": 3.6271938517235765e-07, |
|
"logits/chosen": 40.167823791503906, |
|
"logits/rejected": 42.2595100402832, |
|
"logps/chosen": -107.13337707519531, |
|
"logps/rejected": -156.47689819335938, |
|
"loss": 0.2185, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.04506778717041, |
|
"rewards/margins": 3.721726655960083, |
|
"rewards/rejected": -8.76679515838623, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 27.680206880133927, |
|
"learning_rate": 3.595927866972693e-07, |
|
"logits/chosen": 38.985374450683594, |
|
"logits/rejected": 39.77815628051758, |
|
"logps/chosen": -106.81770324707031, |
|
"logps/rejected": -149.25894165039062, |
|
"loss": 0.2087, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.879289150238037, |
|
"rewards/margins": 3.535437822341919, |
|
"rewards/rejected": -8.414728164672852, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.424, |
|
"grad_norm": 1.249926854778577, |
|
"learning_rate": 3.5644482289126813e-07, |
|
"logits/chosen": 39.94206237792969, |
|
"logits/rejected": 43.77482604980469, |
|
"logps/chosen": -106.6061782836914, |
|
"logps/rejected": -151.3231658935547, |
|
"loss": 0.2439, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -4.947367191314697, |
|
"rewards/margins": 3.605046033859253, |
|
"rewards/rejected": -8.552412986755371, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.428, |
|
"grad_norm": 6.362016874170334, |
|
"learning_rate": 3.5327610745613546e-07, |
|
"logits/chosen": 40.91377258300781, |
|
"logits/rejected": 41.30303192138672, |
|
"logps/chosen": -115.51036071777344, |
|
"logps/rejected": -159.98422241210938, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.147997856140137, |
|
"rewards/margins": 3.556520462036133, |
|
"rewards/rejected": -8.70451831817627, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 17.200244197099025, |
|
"learning_rate": 3.500872581392238e-07, |
|
"logits/chosen": 39.87400436401367, |
|
"logits/rejected": 40.99711990356445, |
|
"logps/chosen": -107.35628509521484, |
|
"logps/rejected": -152.93698120117188, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.61740255355835, |
|
"rewards/margins": 3.3743271827697754, |
|
"rewards/rejected": -8.991729736328125, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.436, |
|
"grad_norm": 6.686230161730707, |
|
"learning_rate": 3.468788966130257e-07, |
|
"logits/chosen": 39.11838150024414, |
|
"logits/rejected": 40.328468322753906, |
|
"logps/chosen": -111.20975494384766, |
|
"logps/rejected": -164.8342742919922, |
|
"loss": 0.2234, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.404995918273926, |
|
"rewards/margins": 4.121036529541016, |
|
"rewards/rejected": -9.526032447814941, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 43.68985759612088, |
|
"learning_rate": 3.43651648353978e-07, |
|
"logits/chosen": 38.73166275024414, |
|
"logits/rejected": 42.4930534362793, |
|
"logps/chosen": -101.05915069580078, |
|
"logps/rejected": -165.46095275878906, |
|
"loss": 0.1615, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.2063822746276855, |
|
"rewards/margins": 4.015776634216309, |
|
"rewards/rejected": -9.222158432006836, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.444, |
|
"grad_norm": 26.738862667956763, |
|
"learning_rate": 3.40406142520523e-07, |
|
"logits/chosen": 42.12168502807617, |
|
"logits/rejected": 41.788394927978516, |
|
"logps/chosen": -123.79948425292969, |
|
"logps/rejected": -153.36776733398438, |
|
"loss": 0.2421, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.871365547180176, |
|
"rewards/margins": 2.8958840370178223, |
|
"rewards/rejected": -8.76724910736084, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 17.03770002319556, |
|
"learning_rate": 3.371430118304538e-07, |
|
"logits/chosen": 36.466461181640625, |
|
"logits/rejected": 40.315452575683594, |
|
"logps/chosen": -112.66862487792969, |
|
"logps/rejected": -174.78466796875, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.390850067138672, |
|
"rewards/margins": 4.580835342407227, |
|
"rewards/rejected": -9.971685409545898, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.452, |
|
"grad_norm": 24.01278859618808, |
|
"learning_rate": 3.338628924375638e-07, |
|
"logits/chosen": 40.4681282043457, |
|
"logits/rejected": 41.152469635009766, |
|
"logps/chosen": -113.4747543334961, |
|
"logps/rejected": -158.70449829101562, |
|
"loss": 0.2367, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.428784370422363, |
|
"rewards/margins": 3.6359362602233887, |
|
"rewards/rejected": -9.06472110748291, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.456, |
|
"grad_norm": 29.87651257208437, |
|
"learning_rate": 3.305664238076278e-07, |
|
"logits/chosen": 39.47797775268555, |
|
"logits/rejected": 40.79694366455078, |
|
"logps/chosen": -100.62152099609375, |
|
"logps/rejected": -157.24269104003906, |
|
"loss": 0.1456, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.463057994842529, |
|
"rewards/margins": 4.45237922668457, |
|
"rewards/rejected": -8.915437698364258, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 20.100774899172244, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": 38.019832611083984, |
|
"logits/rejected": 39.65892791748047, |
|
"logps/chosen": -99.71014404296875, |
|
"logps/rejected": -152.48388671875, |
|
"loss": 0.2683, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.522518157958984, |
|
"rewards/margins": 4.272935390472412, |
|
"rewards/rejected": -8.795454025268555, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.464, |
|
"grad_norm": 17.271502510574486, |
|
"learning_rate": 3.2392701251101167e-07, |
|
"logits/chosen": 38.93779754638672, |
|
"logits/rejected": 43.86870574951172, |
|
"logps/chosen": -106.25166320800781, |
|
"logps/rejected": -169.70816040039062, |
|
"loss": 0.1857, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.724178314208984, |
|
"rewards/margins": 4.432940483093262, |
|
"rewards/rejected": -9.157119750976562, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.468, |
|
"grad_norm": 14.85667949892512, |
|
"learning_rate": 3.2058536421071914e-07, |
|
"logits/chosen": 41.40235900878906, |
|
"logits/rejected": 40.91047286987305, |
|
"logps/chosen": -121.07177734375, |
|
"logps/rejected": -163.6028289794922, |
|
"loss": 0.2094, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.4994378089904785, |
|
"rewards/margins": 3.685161590576172, |
|
"rewards/rejected": -9.184598922729492, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.472, |
|
"grad_norm": 27.172740058479278, |
|
"learning_rate": 3.172299551538164e-07, |
|
"logits/chosen": 38.644805908203125, |
|
"logits/rejected": 41.06559371948242, |
|
"logps/chosen": -116.37313079833984, |
|
"logps/rejected": -170.63470458984375, |
|
"loss": 0.2396, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.4368085861206055, |
|
"rewards/margins": 4.352160453796387, |
|
"rewards/rejected": -9.788969039916992, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.476, |
|
"grad_norm": 68.26652671242996, |
|
"learning_rate": 3.1386143948394763e-07, |
|
"logits/chosen": 39.66558074951172, |
|
"logits/rejected": 39.35725784301758, |
|
"logps/chosen": -110.7066421508789, |
|
"logps/rejected": -148.9991455078125, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.205443382263184, |
|
"rewards/margins": 3.4201292991638184, |
|
"rewards/rejected": -8.62557315826416, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 29.844573464953335, |
|
"learning_rate": 3.104804738999169e-07, |
|
"logits/chosen": 38.45096969604492, |
|
"logits/rejected": 39.93912887573242, |
|
"logps/chosen": -118.87516021728516, |
|
"logps/rejected": -177.38160705566406, |
|
"loss": 0.1226, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.623927116394043, |
|
"rewards/margins": 4.835057735443115, |
|
"rewards/rejected": -10.458983421325684, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.484, |
|
"grad_norm": 33.168807520397415, |
|
"learning_rate": 3.0708771752766395e-07, |
|
"logits/chosen": 37.66096496582031, |
|
"logits/rejected": 41.729278564453125, |
|
"logps/chosen": -113.61344909667969, |
|
"logps/rejected": -181.90975952148438, |
|
"loss": 0.2327, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.367053985595703, |
|
"rewards/margins": 5.373785495758057, |
|
"rewards/rejected": -10.740839958190918, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.488, |
|
"grad_norm": 35.949428687976074, |
|
"learning_rate": 3.036838317917658e-07, |
|
"logits/chosen": 39.2284049987793, |
|
"logits/rejected": 39.56584167480469, |
|
"logps/chosen": -116.41062927246094, |
|
"logps/rejected": -161.64004516601562, |
|
"loss": 0.2307, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -5.617162704467773, |
|
"rewards/margins": 3.87888765335083, |
|
"rewards/rejected": -9.496048927307129, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.492, |
|
"grad_norm": 21.071566027005225, |
|
"learning_rate": 3.002694802864912e-07, |
|
"logits/chosen": 38.54627227783203, |
|
"logits/rejected": 41.567474365234375, |
|
"logps/chosen": -126.9264144897461, |
|
"logps/rejected": -171.91610717773438, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -6.11997127532959, |
|
"rewards/margins": 3.630235195159912, |
|
"rewards/rejected": -9.75020694732666, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.496, |
|
"grad_norm": 19.472597346579764, |
|
"learning_rate": 2.968453286464312e-07, |
|
"logits/chosen": 38.088741302490234, |
|
"logits/rejected": 40.73246383666992, |
|
"logps/chosen": -110.57450103759766, |
|
"logps/rejected": -164.86895751953125, |
|
"loss": 0.1301, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.830416679382324, |
|
"rewards/margins": 3.7488045692443848, |
|
"rewards/rejected": -9.579221725463867, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 9.297784140084044, |
|
"learning_rate": 2.934120444167326e-07, |
|
"logits/chosen": 39.20268249511719, |
|
"logits/rejected": 39.98442077636719, |
|
"logps/chosen": -110.739013671875, |
|
"logps/rejected": -155.21205139160156, |
|
"loss": 0.1205, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.348870277404785, |
|
"rewards/margins": 4.167140007019043, |
|
"rewards/rejected": -9.516010284423828, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.504, |
|
"grad_norm": 15.15594138373471, |
|
"learning_rate": 2.899702969229587e-07, |
|
"logits/chosen": 39.848670959472656, |
|
"logits/rejected": 38.895225524902344, |
|
"logps/chosen": -113.55269622802734, |
|
"logps/rejected": -164.986083984375, |
|
"loss": 0.1671, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.258578777313232, |
|
"rewards/margins": 4.728585243225098, |
|
"rewards/rejected": -9.987164497375488, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.508, |
|
"grad_norm": 58.68168492653791, |
|
"learning_rate": 2.865207571406029e-07, |
|
"logits/chosen": 39.46464920043945, |
|
"logits/rejected": 37.720924377441406, |
|
"logps/chosen": -124.67518615722656, |
|
"logps/rejected": -169.3942413330078, |
|
"loss": 0.2329, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.639501571655273, |
|
"rewards/margins": 4.678294658660889, |
|
"rewards/rejected": -10.31779670715332, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 26.11850298779012, |
|
"learning_rate": 2.830640975642806e-07, |
|
"logits/chosen": 36.690589904785156, |
|
"logits/rejected": 37.93729782104492, |
|
"logps/chosen": -115.35652160644531, |
|
"logps/rejected": -169.6790008544922, |
|
"loss": 0.1717, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.6271867752075195, |
|
"rewards/margins": 4.886000633239746, |
|
"rewards/rejected": -10.513187408447266, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.516, |
|
"grad_norm": 18.8486793071701, |
|
"learning_rate": 2.796009920766253e-07, |
|
"logits/chosen": 34.7783203125, |
|
"logits/rejected": 40.29841232299805, |
|
"logps/chosen": -109.31401062011719, |
|
"logps/rejected": -170.9484100341797, |
|
"loss": 0.1634, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.725811958312988, |
|
"rewards/margins": 4.456699848175049, |
|
"rewards/rejected": -10.182512283325195, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 47.45375394157672, |
|
"learning_rate": 2.761321158169134e-07, |
|
"logits/chosen": 37.883079528808594, |
|
"logits/rejected": 40.114280700683594, |
|
"logps/chosen": -124.6867904663086, |
|
"logps/rejected": -172.8629150390625, |
|
"loss": 0.2061, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -6.434460639953613, |
|
"rewards/margins": 3.7778351306915283, |
|
"rewards/rejected": -10.212295532226562, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.524, |
|
"grad_norm": 16.432297078281568, |
|
"learning_rate": 2.726581450494451e-07, |
|
"logits/chosen": 35.259639739990234, |
|
"logits/rejected": 40.99456024169922, |
|
"logps/chosen": -111.4013900756836, |
|
"logps/rejected": -174.25094604492188, |
|
"loss": 0.154, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.062236785888672, |
|
"rewards/margins": 4.306510925292969, |
|
"rewards/rejected": -10.368746757507324, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 30.21091242436278, |
|
"learning_rate": 2.6917975703170465e-07, |
|
"logits/chosen": 35.50202178955078, |
|
"logits/rejected": 39.13561248779297, |
|
"logps/chosen": -113.72837829589844, |
|
"logps/rejected": -176.2806396484375, |
|
"loss": 0.176, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.105679035186768, |
|
"rewards/margins": 4.945074081420898, |
|
"rewards/rejected": -10.050752639770508, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.532, |
|
"grad_norm": 31.809974002259562, |
|
"learning_rate": 2.6569762988232837e-07, |
|
"logits/chosen": 35.528358459472656, |
|
"logits/rejected": 39.22612380981445, |
|
"logps/chosen": -108.7910385131836, |
|
"logps/rejected": -159.57582092285156, |
|
"loss": 0.2619, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -6.001819133758545, |
|
"rewards/margins": 3.755993366241455, |
|
"rewards/rejected": -9.757813453674316, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.536, |
|
"grad_norm": 18.493232174978285, |
|
"learning_rate": 2.6221244244890336e-07, |
|
"logits/chosen": 34.30480194091797, |
|
"logits/rejected": 38.207496643066406, |
|
"logps/chosen": -104.4682388305664, |
|
"logps/rejected": -172.13088989257812, |
|
"loss": 0.1043, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.1313066482543945, |
|
"rewards/margins": 5.272080421447754, |
|
"rewards/rejected": -10.403387069702148, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 24.170314835898346, |
|
"learning_rate": 2.5872487417562527e-07, |
|
"logits/chosen": 36.6849479675293, |
|
"logits/rejected": 37.138465881347656, |
|
"logps/chosen": -123.2945785522461, |
|
"logps/rejected": -178.46588134765625, |
|
"loss": 0.1769, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.805965423583984, |
|
"rewards/margins": 4.122119426727295, |
|
"rewards/rejected": -10.928085327148438, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 23.047663073832965, |
|
"learning_rate": 2.5523560497083924e-07, |
|
"logits/chosen": 35.815589904785156, |
|
"logits/rejected": 37.165496826171875, |
|
"logps/chosen": -118.9620361328125, |
|
"logps/rejected": -173.7703094482422, |
|
"loss": 0.1205, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.1362409591674805, |
|
"rewards/margins": 4.513824462890625, |
|
"rewards/rejected": -10.650065422058105, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.548, |
|
"grad_norm": 48.732887198712326, |
|
"learning_rate": 2.5174531507449037e-07, |
|
"logits/chosen": 36.993038177490234, |
|
"logits/rejected": 36.96677780151367, |
|
"logps/chosen": -132.6764678955078, |
|
"logps/rejected": -172.9881591796875, |
|
"loss": 0.2913, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -6.736947536468506, |
|
"rewards/margins": 4.004305362701416, |
|
"rewards/rejected": -10.741253852844238, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.552, |
|
"grad_norm": 18.181102544014543, |
|
"learning_rate": 2.482546849255096e-07, |
|
"logits/chosen": 34.073097229003906, |
|
"logits/rejected": 36.943172454833984, |
|
"logps/chosen": -117.58740234375, |
|
"logps/rejected": -197.7882537841797, |
|
"loss": 0.1062, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.832341194152832, |
|
"rewards/margins": 6.429389953613281, |
|
"rewards/rejected": -12.26173210144043, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.556, |
|
"grad_norm": 15.544623320366716, |
|
"learning_rate": 2.447643950291608e-07, |
|
"logits/chosen": 33.73606491088867, |
|
"logits/rejected": 36.898773193359375, |
|
"logps/chosen": -119.47406005859375, |
|
"logps/rejected": -167.00711059570312, |
|
"loss": 0.2418, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.214328289031982, |
|
"rewards/margins": 4.625095844268799, |
|
"rewards/rejected": -10.839425086975098, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 77.37565224093566, |
|
"learning_rate": 2.412751258243748e-07, |
|
"logits/chosen": 33.67988204956055, |
|
"logits/rejected": 34.09700393676758, |
|
"logps/chosen": -128.5489501953125, |
|
"logps/rejected": -176.24871826171875, |
|
"loss": 0.1963, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.089766502380371, |
|
"rewards/margins": 3.989410400390625, |
|
"rewards/rejected": -11.07917594909668, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.564, |
|
"grad_norm": 30.43460528340991, |
|
"learning_rate": 2.3778755755109667e-07, |
|
"logits/chosen": 34.34267044067383, |
|
"logits/rejected": 37.77847671508789, |
|
"logps/chosen": -105.62583923339844, |
|
"logps/rejected": -190.62722778320312, |
|
"loss": 0.1466, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.149474620819092, |
|
"rewards/margins": 6.530264854431152, |
|
"rewards/rejected": -11.679739952087402, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.568, |
|
"grad_norm": 7.007072586126817, |
|
"learning_rate": 2.3430237011767164e-07, |
|
"logits/chosen": 33.648799896240234, |
|
"logits/rejected": 36.46876525878906, |
|
"logps/chosen": -113.22758483886719, |
|
"logps/rejected": -178.4688262939453, |
|
"loss": 0.1731, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.234652042388916, |
|
"rewards/margins": 4.812324047088623, |
|
"rewards/rejected": -11.046976089477539, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.572, |
|
"grad_norm": 19.130152675478577, |
|
"learning_rate": 2.3082024296829532e-07, |
|
"logits/chosen": 35.6358757019043, |
|
"logits/rejected": 36.253257751464844, |
|
"logps/chosen": -113.02217864990234, |
|
"logps/rejected": -185.16107177734375, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.546517372131348, |
|
"rewards/margins": 5.918461799621582, |
|
"rewards/rejected": -11.464980125427246, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 18.125883555971487, |
|
"learning_rate": 2.2734185495055498e-07, |
|
"logits/chosen": 35.95376968383789, |
|
"logits/rejected": 37.0533332824707, |
|
"logps/chosen": -121.62260437011719, |
|
"logps/rejected": -175.02882385253906, |
|
"loss": 0.1505, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.334244251251221, |
|
"rewards/margins": 4.729000568389893, |
|
"rewards/rejected": -11.06324577331543, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 56.564917397952655, |
|
"learning_rate": 2.2386788418308665e-07, |
|
"logits/chosen": 33.461631774902344, |
|
"logits/rejected": 37.12461853027344, |
|
"logps/chosen": -112.4884262084961, |
|
"logps/rejected": -202.30970764160156, |
|
"loss": 0.0664, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.148140907287598, |
|
"rewards/margins": 7.338738441467285, |
|
"rewards/rejected": -12.486879348754883, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.584, |
|
"grad_norm": 43.54182209696087, |
|
"learning_rate": 2.2039900792337474e-07, |
|
"logits/chosen": 32.17646026611328, |
|
"logits/rejected": 35.69232940673828, |
|
"logps/chosen": -121.44427490234375, |
|
"logps/rejected": -192.20877075195312, |
|
"loss": 0.1323, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.396025657653809, |
|
"rewards/margins": 5.983513832092285, |
|
"rewards/rejected": -12.379539489746094, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.588, |
|
"grad_norm": 75.32127336294906, |
|
"learning_rate": 2.1693590243571935e-07, |
|
"logits/chosen": 34.178611755371094, |
|
"logits/rejected": 36.59508514404297, |
|
"logps/chosen": -123.4473648071289, |
|
"logps/rejected": -192.17855834960938, |
|
"loss": 0.2353, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.135283470153809, |
|
"rewards/margins": 5.6739821434021, |
|
"rewards/rejected": -11.80926513671875, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.592, |
|
"grad_norm": 45.58891579183569, |
|
"learning_rate": 2.1347924285939712e-07, |
|
"logits/chosen": 34.34710693359375, |
|
"logits/rejected": 34.652095794677734, |
|
"logps/chosen": -125.2497329711914, |
|
"logps/rejected": -172.30828857421875, |
|
"loss": 0.2612, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -7.1709747314453125, |
|
"rewards/margins": 4.1582560539245605, |
|
"rewards/rejected": -11.329230308532715, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.596, |
|
"grad_norm": 15.251049246391148, |
|
"learning_rate": 2.100297030770413e-07, |
|
"logits/chosen": 34.02775573730469, |
|
"logits/rejected": 35.586952209472656, |
|
"logps/chosen": -118.30632019042969, |
|
"logps/rejected": -179.9751434326172, |
|
"loss": 0.0947, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.386832237243652, |
|
"rewards/margins": 5.212705135345459, |
|
"rewards/rejected": -11.599536895751953, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 25.472331773437826, |
|
"learning_rate": 2.065879555832674e-07, |
|
"logits/chosen": 31.809621810913086, |
|
"logits/rejected": 35.93482208251953, |
|
"logps/chosen": -122.5445785522461, |
|
"logps/rejected": -203.33628845214844, |
|
"loss": 0.1295, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.351222991943359, |
|
"rewards/margins": 6.3509392738342285, |
|
"rewards/rejected": -12.702162742614746, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.604, |
|
"grad_norm": 36.99215746879388, |
|
"learning_rate": 2.0315467135356878e-07, |
|
"logits/chosen": 34.279014587402344, |
|
"logits/rejected": 36.6539421081543, |
|
"logps/chosen": -118.03910827636719, |
|
"logps/rejected": -185.2469482421875, |
|
"loss": 0.1324, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.5454583168029785, |
|
"rewards/margins": 5.202276229858398, |
|
"rewards/rejected": -11.747734069824219, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 5.72817076275817, |
|
"learning_rate": 1.9973051971350888e-07, |
|
"logits/chosen": 32.36498260498047, |
|
"logits/rejected": 36.872802734375, |
|
"logps/chosen": -119.40788269042969, |
|
"logps/rejected": -182.93051147460938, |
|
"loss": 0.1548, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.524139404296875, |
|
"rewards/margins": 4.851975440979004, |
|
"rewards/rejected": -11.376115798950195, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.612, |
|
"grad_norm": 62.7485215727146, |
|
"learning_rate": 1.9631616820823418e-07, |
|
"logits/chosen": 33.554473876953125, |
|
"logits/rejected": 34.441917419433594, |
|
"logps/chosen": -128.63380432128906, |
|
"logps/rejected": -187.55038452148438, |
|
"loss": 0.2279, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.242335319519043, |
|
"rewards/margins": 5.341699600219727, |
|
"rewards/rejected": -11.58403491973877, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.616, |
|
"grad_norm": 50.55388655519452, |
|
"learning_rate": 1.9291228247233603e-07, |
|
"logits/chosen": 34.57413101196289, |
|
"logits/rejected": 38.980873107910156, |
|
"logps/chosen": -124.8611068725586, |
|
"logps/rejected": -188.2270050048828, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.550134181976318, |
|
"rewards/margins": 4.793581008911133, |
|
"rewards/rejected": -11.34371566772461, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 20.98136994366918, |
|
"learning_rate": 1.895195261000831e-07, |
|
"logits/chosen": 32.666053771972656, |
|
"logits/rejected": 36.28156661987305, |
|
"logps/chosen": -123.55598449707031, |
|
"logps/rejected": -190.2432098388672, |
|
"loss": 0.2085, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.651708126068115, |
|
"rewards/margins": 5.466277122497559, |
|
"rewards/rejected": -12.117984771728516, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 3.695214530883218, |
|
"learning_rate": 1.861385605160524e-07, |
|
"logits/chosen": 34.80469512939453, |
|
"logits/rejected": 37.0577507019043, |
|
"logps/chosen": -127.52436828613281, |
|
"logps/rejected": -200.47572326660156, |
|
"loss": 0.1962, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.4474639892578125, |
|
"rewards/margins": 6.185749530792236, |
|
"rewards/rejected": -12.633213996887207, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.628, |
|
"grad_norm": 6.204487863528272, |
|
"learning_rate": 1.8277004484618357e-07, |
|
"logits/chosen": 35.11884689331055, |
|
"logits/rejected": 34.14295959472656, |
|
"logps/chosen": -123.8480224609375, |
|
"logps/rejected": -175.71351623535156, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.91684103012085, |
|
"rewards/margins": 5.305278778076172, |
|
"rewards/rejected": -11.22212028503418, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.632, |
|
"grad_norm": 27.777656870634978, |
|
"learning_rate": 1.7941463578928083e-07, |
|
"logits/chosen": 31.193029403686523, |
|
"logits/rejected": 34.23871612548828, |
|
"logps/chosen": -110.49909973144531, |
|
"logps/rejected": -183.0133514404297, |
|
"loss": 0.1618, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.92338752746582, |
|
"rewards/margins": 5.685771942138672, |
|
"rewards/rejected": -11.609160423278809, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.636, |
|
"grad_norm": 25.59557039318974, |
|
"learning_rate": 1.760729874889884e-07, |
|
"logits/chosen": 34.273590087890625, |
|
"logits/rejected": 36.67070388793945, |
|
"logps/chosen": -123.4420394897461, |
|
"logps/rejected": -188.76451110839844, |
|
"loss": 0.1413, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.854828834533691, |
|
"rewards/margins": 5.536397457122803, |
|
"rewards/rejected": -11.391225814819336, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 54.20969538897634, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": 35.63384246826172, |
|
"logits/rejected": 36.39768600463867, |
|
"logps/chosen": -142.24166870117188, |
|
"logps/rejected": -204.695556640625, |
|
"loss": 0.2189, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.065789699554443, |
|
"rewards/margins": 5.59266471862793, |
|
"rewards/rejected": -12.658454895019531, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.644, |
|
"grad_norm": 23.952776600344606, |
|
"learning_rate": 1.6943357619237225e-07, |
|
"logits/chosen": 30.349929809570312, |
|
"logits/rejected": 35.475894927978516, |
|
"logps/chosen": -136.0992431640625, |
|
"logps/rejected": -200.9610595703125, |
|
"loss": 0.1836, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.412652492523193, |
|
"rewards/margins": 4.916618347167969, |
|
"rewards/rejected": -12.32927131652832, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.648, |
|
"grad_norm": 20.457675381739918, |
|
"learning_rate": 1.6613710756243627e-07, |
|
"logits/chosen": 34.76255416870117, |
|
"logits/rejected": 34.33202362060547, |
|
"logps/chosen": -123.5507583618164, |
|
"logps/rejected": -174.77584838867188, |
|
"loss": 0.1588, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.010063171386719, |
|
"rewards/margins": 4.99750280380249, |
|
"rewards/rejected": -12.007566452026367, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.652, |
|
"grad_norm": 8.740451188853967, |
|
"learning_rate": 1.6285698816954624e-07, |
|
"logits/chosen": 32.92230987548828, |
|
"logits/rejected": 37.1563720703125, |
|
"logps/chosen": -125.0332260131836, |
|
"logps/rejected": -198.4005126953125, |
|
"loss": 0.146, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.896471977233887, |
|
"rewards/margins": 5.607396602630615, |
|
"rewards/rejected": -12.503868103027344, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.656, |
|
"grad_norm": 12.82247945171976, |
|
"learning_rate": 1.5959385747947695e-07, |
|
"logits/chosen": 32.56360626220703, |
|
"logits/rejected": 36.79113006591797, |
|
"logps/chosen": -115.17347717285156, |
|
"logps/rejected": -194.03790283203125, |
|
"loss": 0.1129, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.800166130065918, |
|
"rewards/margins": 5.930178165435791, |
|
"rewards/rejected": -11.73034381866455, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 54.05169324828869, |
|
"learning_rate": 1.5634835164602196e-07, |
|
"logits/chosen": 30.58074378967285, |
|
"logits/rejected": 34.36854934692383, |
|
"logps/chosen": -113.1893539428711, |
|
"logps/rejected": -182.39627075195312, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -6.311237812042236, |
|
"rewards/margins": 5.61757755279541, |
|
"rewards/rejected": -11.928815841674805, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.664, |
|
"grad_norm": 24.825767693321374, |
|
"learning_rate": 1.5312110338697427e-07, |
|
"logits/chosen": 35.91250228881836, |
|
"logits/rejected": 36.06043243408203, |
|
"logps/chosen": -124.50358581542969, |
|
"logps/rejected": -201.7527313232422, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.220883369445801, |
|
"rewards/margins": 6.432749271392822, |
|
"rewards/rejected": -12.653631210327148, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.668, |
|
"grad_norm": 21.917052429686564, |
|
"learning_rate": 1.4991274186077628e-07, |
|
"logits/chosen": 32.15296936035156, |
|
"logits/rejected": 34.598384857177734, |
|
"logps/chosen": -128.67645263671875, |
|
"logps/rejected": -194.93014526367188, |
|
"loss": 0.2175, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.7162299156188965, |
|
"rewards/margins": 5.552061557769775, |
|
"rewards/rejected": -12.268292427062988, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 73.93391750261412, |
|
"learning_rate": 1.4672389254386457e-07, |
|
"logits/chosen": 33.413238525390625, |
|
"logits/rejected": 34.18181610107422, |
|
"logps/chosen": -124.91861724853516, |
|
"logps/rejected": -205.4623260498047, |
|
"loss": 0.2138, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.610821723937988, |
|
"rewards/margins": 6.807159423828125, |
|
"rewards/rejected": -13.41797924041748, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.676, |
|
"grad_norm": 14.047455580384543, |
|
"learning_rate": 1.4355517710873182e-07, |
|
"logits/chosen": 34.12297821044922, |
|
"logits/rejected": 35.99665451049805, |
|
"logps/chosen": -121.33824157714844, |
|
"logps/rejected": -176.9901123046875, |
|
"loss": 0.1439, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.118622779846191, |
|
"rewards/margins": 5.124892234802246, |
|
"rewards/rejected": -11.243515014648438, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.7246303761815563, |
|
"learning_rate": 1.404072133027306e-07, |
|
"logits/chosen": 33.210655212402344, |
|
"logits/rejected": 36.71358108520508, |
|
"logps/chosen": -116.57881927490234, |
|
"logps/rejected": -182.92950439453125, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.269998550415039, |
|
"rewards/margins": 5.008700370788574, |
|
"rewards/rejected": -11.27869987487793, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.684, |
|
"grad_norm": 34.85736611285892, |
|
"learning_rate": 1.3728061482764235e-07, |
|
"logits/chosen": 32.43885040283203, |
|
"logits/rejected": 34.203834533691406, |
|
"logps/chosen": -125.60906982421875, |
|
"logps/rejected": -184.39035034179688, |
|
"loss": 0.1539, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.697512149810791, |
|
"rewards/margins": 5.102963924407959, |
|
"rewards/rejected": -11.800477027893066, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.688, |
|
"grad_norm": 9.586401996642, |
|
"learning_rate": 1.341759912200346e-07, |
|
"logits/chosen": 32.35176467895508, |
|
"logits/rejected": 34.566925048828125, |
|
"logps/chosen": -126.4211196899414, |
|
"logps/rejected": -186.43441772460938, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -6.952803611755371, |
|
"rewards/margins": 4.971192836761475, |
|
"rewards/rejected": -11.923995971679688, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.692, |
|
"grad_norm": 51.225200538375525, |
|
"learning_rate": 1.3109394773243115e-07, |
|
"logits/chosen": 32.60570526123047, |
|
"logits/rejected": 34.30889892578125, |
|
"logps/chosen": -112.5150375366211, |
|
"logps/rejected": -185.24020385742188, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.684107780456543, |
|
"rewards/margins": 5.926398277282715, |
|
"rewards/rejected": -11.610506057739258, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.696, |
|
"grad_norm": 7.892076438359368, |
|
"learning_rate": 1.2803508521531677e-07, |
|
"logits/chosen": 32.87091827392578, |
|
"logits/rejected": 35.00414276123047, |
|
"logps/chosen": -115.19721984863281, |
|
"logps/rejected": -186.33670043945312, |
|
"loss": 0.1689, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.1829729080200195, |
|
"rewards/margins": 5.768080711364746, |
|
"rewards/rejected": -11.951054573059082, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 71.60841944470864, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": 31.9818115234375, |
|
"logits/rejected": 34.28449630737305, |
|
"logps/chosen": -131.71206665039062, |
|
"logps/rejected": -179.4838104248047, |
|
"loss": 0.2651, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -7.21808385848999, |
|
"rewards/margins": 4.244929313659668, |
|
"rewards/rejected": -11.463014602661133, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 11.403139100682084, |
|
"learning_rate": 1.2198928378235715e-07, |
|
"logits/chosen": 35.4173583984375, |
|
"logits/rejected": 32.19343185424805, |
|
"logps/chosen": -141.1168212890625, |
|
"logps/rejected": -190.25442504882812, |
|
"loss": 0.1845, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.428712368011475, |
|
"rewards/margins": 4.997444152832031, |
|
"rewards/rejected": -12.426156997680664, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.708, |
|
"grad_norm": 15.614574564320913, |
|
"learning_rate": 1.1900352350748024e-07, |
|
"logits/chosen": 32.91938018798828, |
|
"logits/rejected": 37.30883026123047, |
|
"logps/chosen": -128.07351684570312, |
|
"logps/rejected": -202.3236083984375, |
|
"loss": 0.1459, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.7811689376831055, |
|
"rewards/margins": 5.805022239685059, |
|
"rewards/rejected": -12.586191177368164, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.712, |
|
"grad_norm": 36.93773962705704, |
|
"learning_rate": 1.1604330125525078e-07, |
|
"logits/chosen": 33.32939910888672, |
|
"logits/rejected": 34.7056770324707, |
|
"logps/chosen": -126.90995025634766, |
|
"logps/rejected": -206.4558563232422, |
|
"loss": 0.109, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.557587623596191, |
|
"rewards/margins": 6.45965576171875, |
|
"rewards/rejected": -13.017242431640625, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.716, |
|
"grad_norm": 19.64090712772826, |
|
"learning_rate": 1.1310919412686245e-07, |
|
"logits/chosen": 31.58943748474121, |
|
"logits/rejected": 32.97970962524414, |
|
"logps/chosen": -120.78947448730469, |
|
"logps/rejected": -185.22781372070312, |
|
"loss": 0.1554, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.556436061859131, |
|
"rewards/margins": 5.089656829833984, |
|
"rewards/rejected": -11.646093368530273, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.8073260877539248, |
|
"learning_rate": 1.1020177413231332e-07, |
|
"logits/chosen": 32.85557556152344, |
|
"logits/rejected": 36.344078063964844, |
|
"logps/chosen": -119.52928161621094, |
|
"logps/rejected": -186.12684631347656, |
|
"loss": 0.1001, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.19125509262085, |
|
"rewards/margins": 5.620010852813721, |
|
"rewards/rejected": -11.81126594543457, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.724, |
|
"grad_norm": 1.233410177278683, |
|
"learning_rate": 1.073216080788921e-07, |
|
"logits/chosen": 31.43521499633789, |
|
"logits/rejected": 34.373809814453125, |
|
"logps/chosen": -117.0105972290039, |
|
"logps/rejected": -202.0782928466797, |
|
"loss": 0.0878, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.203524112701416, |
|
"rewards/margins": 6.944014072418213, |
|
"rewards/rejected": -13.147537231445312, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.728, |
|
"grad_norm": 76.66139035605511, |
|
"learning_rate": 1.0446925746067766e-07, |
|
"logits/chosen": 34.948951721191406, |
|
"logits/rejected": 38.018917083740234, |
|
"logps/chosen": -128.2957305908203, |
|
"logps/rejected": -192.70611572265625, |
|
"loss": 0.1567, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.262060642242432, |
|
"rewards/margins": 5.220892906188965, |
|
"rewards/rejected": -12.482953071594238, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.732, |
|
"grad_norm": 3.2686353366997087, |
|
"learning_rate": 1.0164527834907466e-07, |
|
"logits/chosen": 33.62763977050781, |
|
"logits/rejected": 32.20967483520508, |
|
"logps/chosen": -115.2950668334961, |
|
"logps/rejected": -181.83990478515625, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.048405170440674, |
|
"rewards/margins": 6.285736083984375, |
|
"rewards/rejected": -12.334141731262207, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 19.72682359281921, |
|
"learning_rate": 9.885022128440629e-08, |
|
"logits/chosen": 30.57301902770996, |
|
"logits/rejected": 33.91904067993164, |
|
"logps/chosen": -123.07301330566406, |
|
"logps/rejected": -196.01779174804688, |
|
"loss": 0.1789, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.5850043296813965, |
|
"rewards/margins": 5.699263572692871, |
|
"rewards/rejected": -12.28426742553711, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 27.782784282950555, |
|
"learning_rate": 9.608463116858542e-08, |
|
"logits/chosen": 32.8736457824707, |
|
"logits/rejected": 33.712486267089844, |
|
"logps/chosen": -124.52708435058594, |
|
"logps/rejected": -185.42349243164062, |
|
"loss": 0.2559, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.897887229919434, |
|
"rewards/margins": 4.95641565322876, |
|
"rewards/rejected": -11.854303359985352, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.744, |
|
"grad_norm": 11.434937847700061, |
|
"learning_rate": 9.334904715888494e-08, |
|
"logits/chosen": 32.95331954956055, |
|
"logits/rejected": 34.37748336791992, |
|
"logps/chosen": -135.6911163330078, |
|
"logps/rejected": -199.61776733398438, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.312950134277344, |
|
"rewards/margins": 5.76673698425293, |
|
"rewards/rejected": -13.079686164855957, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.748, |
|
"grad_norm": 21.711629131923438, |
|
"learning_rate": 9.064400256282755e-08, |
|
"logits/chosen": 32.11374282836914, |
|
"logits/rejected": 32.81106185913086, |
|
"logps/chosen": -126.60643005371094, |
|
"logps/rejected": -190.9490966796875, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.639864444732666, |
|
"rewards/margins": 5.9442009925842285, |
|
"rewards/rejected": -12.584066390991211, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.752, |
|
"grad_norm": 4.882787255482076, |
|
"learning_rate": 8.797002473421727e-08, |
|
"logits/chosen": 32.480838775634766, |
|
"logits/rejected": 34.38404846191406, |
|
"logps/chosen": -120.6583023071289, |
|
"logps/rejected": -187.3675079345703, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.4553680419921875, |
|
"rewards/margins": 5.736647605895996, |
|
"rewards/rejected": -12.192015647888184, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.756, |
|
"grad_norm": 73.20268420406155, |
|
"learning_rate": 8.532763497032986e-08, |
|
"logits/chosen": 30.1946964263916, |
|
"logits/rejected": 35.3504638671875, |
|
"logps/chosen": -130.51853942871094, |
|
"logps/rejected": -197.19638061523438, |
|
"loss": 0.1867, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.48050594329834, |
|
"rewards/margins": 5.310255527496338, |
|
"rewards/rejected": -12.79076099395752, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 62.43015390970909, |
|
"learning_rate": 8.271734841028552e-08, |
|
"logits/chosen": 30.940185546875, |
|
"logits/rejected": 34.006492614746094, |
|
"logps/chosen": -125.14324951171875, |
|
"logps/rejected": -189.832763671875, |
|
"loss": 0.2225, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.153977870941162, |
|
"rewards/margins": 5.524373531341553, |
|
"rewards/rejected": -12.678351402282715, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.764, |
|
"grad_norm": 58.76354716466513, |
|
"learning_rate": 8.013967393462093e-08, |
|
"logits/chosen": 32.89315414428711, |
|
"logits/rejected": 35.409217834472656, |
|
"logps/chosen": -114.31298828125, |
|
"logps/rejected": -197.35025024414062, |
|
"loss": 0.089, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.788350582122803, |
|
"rewards/margins": 6.997166633605957, |
|
"rewards/rejected": -12.785517692565918, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 40.55885429267448, |
|
"learning_rate": 7.759511406608255e-08, |
|
"logits/chosen": 30.79556655883789, |
|
"logits/rejected": 33.01952362060547, |
|
"logps/chosen": -136.01585388183594, |
|
"logps/rejected": -189.73330688476562, |
|
"loss": 0.134, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.579942226409912, |
|
"rewards/margins": 4.825619220733643, |
|
"rewards/rejected": -12.405561447143555, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.772, |
|
"grad_norm": 11.57424881967435, |
|
"learning_rate": 7.508416487165862e-08, |
|
"logits/chosen": 28.126171112060547, |
|
"logits/rejected": 33.28355026245117, |
|
"logps/chosen": -123.10121154785156, |
|
"logps/rejected": -203.12960815429688, |
|
"loss": 0.093, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.236498832702637, |
|
"rewards/margins": 5.934015274047852, |
|
"rewards/rejected": -13.170514106750488, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.776, |
|
"grad_norm": 21.840133243019153, |
|
"learning_rate": 7.260731586586982e-08, |
|
"logits/chosen": 31.944263458251953, |
|
"logits/rejected": 30.55642318725586, |
|
"logps/chosen": -123.80488586425781, |
|
"logps/rejected": -195.6708526611328, |
|
"loss": 0.162, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.678342342376709, |
|
"rewards/margins": 6.705534934997559, |
|
"rewards/rejected": -13.383877754211426, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 13.922142563059072, |
|
"learning_rate": 7.016504991533726e-08, |
|
"logits/chosen": 32.323158264160156, |
|
"logits/rejected": 34.14168167114258, |
|
"logps/chosen": -122.30216979980469, |
|
"logps/rejected": -184.92527770996094, |
|
"loss": 0.178, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -6.948062896728516, |
|
"rewards/margins": 4.983668327331543, |
|
"rewards/rejected": -11.931732177734375, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.784, |
|
"grad_norm": 43.52548154571825, |
|
"learning_rate": 6.775784314464716e-08, |
|
"logits/chosen": 31.536640167236328, |
|
"logits/rejected": 33.37910461425781, |
|
"logps/chosen": -122.21321868896484, |
|
"logps/rejected": -192.3970184326172, |
|
"loss": 0.055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.536123752593994, |
|
"rewards/margins": 6.116715908050537, |
|
"rewards/rejected": -12.652839660644531, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.788, |
|
"grad_norm": 14.960056288014227, |
|
"learning_rate": 6.538616484352902e-08, |
|
"logits/chosen": 30.65342140197754, |
|
"logits/rejected": 31.002685546875, |
|
"logps/chosen": -119.79139709472656, |
|
"logps/rejected": -194.21560668945312, |
|
"loss": 0.1468, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.551787376403809, |
|
"rewards/margins": 6.742495536804199, |
|
"rewards/rejected": -13.294283866882324, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.792, |
|
"grad_norm": 63.96276766967657, |
|
"learning_rate": 6.305047737536707e-08, |
|
"logits/chosen": 32.422218322753906, |
|
"logits/rejected": 34.42628479003906, |
|
"logps/chosen": -141.8745574951172, |
|
"logps/rejected": -196.88424682617188, |
|
"loss": 0.2212, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -8.096589088439941, |
|
"rewards/margins": 4.696638584136963, |
|
"rewards/rejected": -12.79322624206543, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.796, |
|
"grad_norm": 6.994169229009863, |
|
"learning_rate": 6.075123608706093e-08, |
|
"logits/chosen": 31.055633544921875, |
|
"logits/rejected": 33.48008346557617, |
|
"logps/chosen": -122.30110931396484, |
|
"logps/rejected": -194.860595703125, |
|
"loss": 0.1579, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.244012355804443, |
|
"rewards/margins": 6.198871612548828, |
|
"rewards/rejected": -12.44288444519043, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 21.842683348401124, |
|
"learning_rate": 5.848888922025552e-08, |
|
"logits/chosen": 31.886560440063477, |
|
"logits/rejected": 35.48186492919922, |
|
"logps/chosen": -135.24679565429688, |
|
"logps/rejected": -206.4519500732422, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.111126899719238, |
|
"rewards/margins": 5.913634300231934, |
|
"rewards/rejected": -13.024760246276855, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.804, |
|
"grad_norm": 10.8862310531102, |
|
"learning_rate": 5.6263877823955115e-08, |
|
"logits/chosen": 33.398433685302734, |
|
"logits/rejected": 33.78891372680664, |
|
"logps/chosen": -123.2912368774414, |
|
"logps/rejected": -192.1443634033203, |
|
"loss": 0.1133, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.0102667808532715, |
|
"rewards/margins": 5.909992218017578, |
|
"rewards/rejected": -12.920259475708008, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.808, |
|
"grad_norm": 42.03601830195942, |
|
"learning_rate": 5.4076635668540065e-08, |
|
"logits/chosen": 29.69512939453125, |
|
"logits/rejected": 33.41075134277344, |
|
"logps/chosen": -120.9056625366211, |
|
"logps/rejected": -192.8750457763672, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.7075395584106445, |
|
"rewards/margins": 5.716131687164307, |
|
"rewards/rejected": -12.423670768737793, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.812, |
|
"grad_norm": 32.933513594571174, |
|
"learning_rate": 5.192758916120235e-08, |
|
"logits/chosen": 32.2730712890625, |
|
"logits/rejected": 35.09003448486328, |
|
"logps/chosen": -126.17935943603516, |
|
"logps/rejected": -197.20889282226562, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.885321140289307, |
|
"rewards/margins": 5.830145835876465, |
|
"rewards/rejected": -12.71546745300293, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 42.07730167231684, |
|
"learning_rate": 4.981715726281666e-08, |
|
"logits/chosen": 32.14569091796875, |
|
"logits/rejected": 33.87199401855469, |
|
"logps/chosen": -137.12149047851562, |
|
"logps/rejected": -203.73658752441406, |
|
"loss": 0.2057, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.4929962158203125, |
|
"rewards/margins": 5.718442916870117, |
|
"rewards/rejected": -13.211441040039062, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 15.321840192563384, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": 30.928787231445312, |
|
"logits/rejected": 33.557373046875, |
|
"logps/chosen": -122.8592529296875, |
|
"logps/rejected": -194.76742553710938, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.503848075866699, |
|
"rewards/margins": 6.216989040374756, |
|
"rewards/rejected": -12.720837593078613, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.824, |
|
"grad_norm": 71.00238324419556, |
|
"learning_rate": 4.5713775416217875e-08, |
|
"logits/chosen": 29.931808471679688, |
|
"logits/rejected": 32.696876525878906, |
|
"logps/chosen": -133.57138061523438, |
|
"logps/rejected": -205.98837280273438, |
|
"loss": 0.1357, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.4561920166015625, |
|
"rewards/margins": 5.7885637283325195, |
|
"rewards/rejected": -13.244755744934082, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.828, |
|
"grad_norm": 9.35367075528383, |
|
"learning_rate": 4.372162543042623e-08, |
|
"logits/chosen": 31.20676612854004, |
|
"logits/rejected": 31.955196380615234, |
|
"logps/chosen": -135.50155639648438, |
|
"logps/rejected": -201.00784301757812, |
|
"loss": 0.0994, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.0842413902282715, |
|
"rewards/margins": 6.034939289093018, |
|
"rewards/rejected": -13.119178771972656, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 24.304073923638086, |
|
"learning_rate": 4.176968982247514e-08, |
|
"logits/chosen": 29.5570068359375, |
|
"logits/rejected": 33.91077423095703, |
|
"logps/chosen": -126.53129577636719, |
|
"logps/rejected": -205.4754638671875, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.051833152770996, |
|
"rewards/margins": 6.18133020401001, |
|
"rewards/rejected": -13.233163833618164, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.836, |
|
"grad_norm": 21.40116937016571, |
|
"learning_rate": 3.9858349126078936e-08, |
|
"logits/chosen": 30.234338760375977, |
|
"logits/rejected": 35.29171371459961, |
|
"logps/chosen": -118.93162536621094, |
|
"logps/rejected": -211.0615692138672, |
|
"loss": 0.0803, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.521851539611816, |
|
"rewards/margins": 6.673744201660156, |
|
"rewards/rejected": -13.195594787597656, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 63.74043787739687, |
|
"learning_rate": 3.798797596089351e-08, |
|
"logits/chosen": 30.151935577392578, |
|
"logits/rejected": 33.42070770263672, |
|
"logps/chosen": -126.11729431152344, |
|
"logps/rejected": -205.4882354736328, |
|
"loss": 0.1006, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.028811454772949, |
|
"rewards/margins": 6.618474006652832, |
|
"rewards/rejected": -13.647287368774414, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.844, |
|
"grad_norm": 12.990695199065254, |
|
"learning_rate": 3.615893495987335e-08, |
|
"logits/chosen": 30.569061279296875, |
|
"logits/rejected": 33.5135383605957, |
|
"logps/chosen": -118.1374282836914, |
|
"logps/rejected": -183.0607147216797, |
|
"loss": 0.2439, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.712668418884277, |
|
"rewards/margins": 5.418641090393066, |
|
"rewards/rejected": -12.131309509277344, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.848, |
|
"grad_norm": 19.041922220697664, |
|
"learning_rate": 3.437158269818563e-08, |
|
"logits/chosen": 30.906530380249023, |
|
"logits/rejected": 33.76823425292969, |
|
"logps/chosen": -128.64764404296875, |
|
"logps/rejected": -212.09213256835938, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.712831020355225, |
|
"rewards/margins": 7.552107810974121, |
|
"rewards/rejected": -14.264938354492188, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.852, |
|
"grad_norm": 73.85387160932243, |
|
"learning_rate": 3.262626762369525e-08, |
|
"logits/chosen": 30.180126190185547, |
|
"logits/rejected": 34.350563049316406, |
|
"logps/chosen": -122.08283996582031, |
|
"logps/rejected": -191.43997192382812, |
|
"loss": 0.1989, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.999587059020996, |
|
"rewards/margins": 5.256472587585449, |
|
"rewards/rejected": -12.256059646606445, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.856, |
|
"grad_norm": 23.568790751855996, |
|
"learning_rate": 3.092332998903416e-08, |
|
"logits/chosen": 29.4277286529541, |
|
"logits/rejected": 31.689163208007812, |
|
"logps/chosen": -136.5303192138672, |
|
"logps/rejected": -214.65731811523438, |
|
"loss": 0.108, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.013432502746582, |
|
"rewards/margins": 6.365548133850098, |
|
"rewards/rejected": -14.378979682922363, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 16.09088977053807, |
|
"learning_rate": 2.9263101785268252e-08, |
|
"logits/chosen": 29.914642333984375, |
|
"logits/rejected": 30.405963897705078, |
|
"logps/chosen": -134.8611297607422, |
|
"logps/rejected": -205.1629180908203, |
|
"loss": 0.0879, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.248504638671875, |
|
"rewards/margins": 6.40295934677124, |
|
"rewards/rejected": -13.651464462280273, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 97.3126034768412, |
|
"learning_rate": 2.764590667717562e-08, |
|
"logits/chosen": 31.892868041992188, |
|
"logits/rejected": 33.220489501953125, |
|
"logps/chosen": -138.26364135742188, |
|
"logps/rejected": -187.60067749023438, |
|
"loss": 0.2452, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -7.446439266204834, |
|
"rewards/margins": 4.706901550292969, |
|
"rewards/rejected": -12.153340339660645, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.868, |
|
"grad_norm": 34.25127629439165, |
|
"learning_rate": 2.6072059940146772e-08, |
|
"logits/chosen": 30.961380004882812, |
|
"logits/rejected": 32.999900817871094, |
|
"logps/chosen": -123.99442291259766, |
|
"logps/rejected": -199.34922790527344, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.214547634124756, |
|
"rewards/margins": 6.843503475189209, |
|
"rewards/rejected": -13.058052062988281, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.872, |
|
"grad_norm": 24.468484869974596, |
|
"learning_rate": 2.4541868398721576e-08, |
|
"logits/chosen": 31.01216697692871, |
|
"logits/rejected": 32.20166778564453, |
|
"logps/chosen": -128.03175354003906, |
|
"logps/rejected": -194.68743896484375, |
|
"loss": 0.0858, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.191014766693115, |
|
"rewards/margins": 5.623617649078369, |
|
"rewards/rejected": -12.8146333694458, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.876, |
|
"grad_norm": 2.66007251335378, |
|
"learning_rate": 2.3055630366772856e-08, |
|
"logits/chosen": 29.776905059814453, |
|
"logits/rejected": 32.563453674316406, |
|
"logps/chosen": -130.4777069091797, |
|
"logps/rejected": -196.01138305664062, |
|
"loss": 0.1665, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.950491428375244, |
|
"rewards/margins": 6.007107734680176, |
|
"rewards/rejected": -12.957598686218262, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 28.407188913509714, |
|
"learning_rate": 2.1613635589349756e-08, |
|
"logits/chosen": 31.004446029663086, |
|
"logits/rejected": 31.923709869384766, |
|
"logps/chosen": -129.14511108398438, |
|
"logps/rejected": -189.40194702148438, |
|
"loss": 0.152, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.862164497375488, |
|
"rewards/margins": 5.448739051818848, |
|
"rewards/rejected": -12.310903549194336, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.884, |
|
"grad_norm": 27.05189743480488, |
|
"learning_rate": 2.0216165186191404e-08, |
|
"logits/chosen": 29.659343719482422, |
|
"logits/rejected": 32.05767059326172, |
|
"logps/chosen": -132.9698028564453, |
|
"logps/rejected": -204.43704223632812, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.252483367919922, |
|
"rewards/margins": 5.930403709411621, |
|
"rewards/rejected": -13.182887077331543, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.888, |
|
"grad_norm": 55.43405815780065, |
|
"learning_rate": 1.8863491596921743e-08, |
|
"logits/chosen": 32.782066345214844, |
|
"logits/rejected": 32.522891998291016, |
|
"logps/chosen": -136.2987060546875, |
|
"logps/rejected": -198.53738403320312, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.071662902832031, |
|
"rewards/margins": 6.142470359802246, |
|
"rewards/rejected": -13.214132308959961, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.892, |
|
"grad_norm": 51.20992346555615, |
|
"learning_rate": 1.7555878527937163e-08, |
|
"logits/chosen": 31.446645736694336, |
|
"logits/rejected": 35.24169921875, |
|
"logps/chosen": -118.80924224853516, |
|
"logps/rejected": -208.82534790039062, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.682692527770996, |
|
"rewards/margins": 6.966187477111816, |
|
"rewards/rejected": -13.648880004882812, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 2.6516558135690262, |
|
"learning_rate": 1.629358090099639e-08, |
|
"logits/chosen": 29.057031631469727, |
|
"logits/rejected": 32.2264404296875, |
|
"logps/chosen": -120.19584655761719, |
|
"logps/rejected": -200.8718719482422, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.863171577453613, |
|
"rewards/margins": 6.121987342834473, |
|
"rewards/rejected": -12.98515796661377, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 19.604673890955695, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": 31.421955108642578, |
|
"logits/rejected": 33.76438522338867, |
|
"logps/chosen": -122.68287658691406, |
|
"logps/rejected": -201.4803009033203, |
|
"loss": 0.1018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.61349630355835, |
|
"rewards/margins": 7.108196258544922, |
|
"rewards/rejected": -13.721692085266113, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.904, |
|
"grad_norm": 18.823513111687696, |
|
"learning_rate": 1.390590744062975e-08, |
|
"logits/chosen": 29.491558074951172, |
|
"logits/rejected": 31.476119995117188, |
|
"logps/chosen": -130.34339904785156, |
|
"logps/rejected": -202.07351684570312, |
|
"loss": 0.1108, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.389586448669434, |
|
"rewards/margins": 6.204103469848633, |
|
"rewards/rejected": -13.593690872192383, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.908, |
|
"grad_norm": 41.18098938197995, |
|
"learning_rate": 1.2780997088875866e-08, |
|
"logits/chosen": 32.09846878051758, |
|
"logits/rejected": 34.34507751464844, |
|
"logps/chosen": -122.9142074584961, |
|
"logps/rejected": -186.44857788085938, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.446104526519775, |
|
"rewards/margins": 5.684818267822266, |
|
"rewards/rejected": -12.130921363830566, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 26.449033993731902, |
|
"learning_rate": 1.1702333051763268e-08, |
|
"logits/chosen": 30.299280166625977, |
|
"logits/rejected": 33.13639831542969, |
|
"logps/chosen": -125.92582702636719, |
|
"logps/rejected": -201.4826202392578, |
|
"loss": 0.0914, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.8485307693481445, |
|
"rewards/margins": 6.146922588348389, |
|
"rewards/rejected": -12.995452880859375, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.916, |
|
"grad_norm": 3.868894147937223, |
|
"learning_rate": 1.0670125616983189e-08, |
|
"logits/chosen": 31.07822036743164, |
|
"logits/rejected": 33.824302673339844, |
|
"logps/chosen": -133.55230712890625, |
|
"logps/rejected": -204.15899658203125, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.182616233825684, |
|
"rewards/margins": 5.8420729637146, |
|
"rewards/rejected": -13.024690628051758, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 3.3807387885526454, |
|
"learning_rate": 9.684576015420275e-09, |
|
"logits/chosen": 29.672496795654297, |
|
"logits/rejected": 37.18809509277344, |
|
"logps/chosen": -126.36643981933594, |
|
"logps/rejected": -208.7083740234375, |
|
"loss": 0.158, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.423619270324707, |
|
"rewards/margins": 5.624678611755371, |
|
"rewards/rejected": -13.048298835754395, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.924, |
|
"grad_norm": 66.31275321980026, |
|
"learning_rate": 8.745876381922146e-09, |
|
"logits/chosen": 30.262380599975586, |
|
"logits/rejected": 33.276100158691406, |
|
"logps/chosen": -124.06925964355469, |
|
"logps/rejected": -195.91195678710938, |
|
"loss": 0.1534, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.720442295074463, |
|
"rewards/margins": 6.354052543640137, |
|
"rewards/rejected": -13.074495315551758, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 7.064746922185591, |
|
"learning_rate": 7.85420971784223e-09, |
|
"logits/chosen": 31.4277400970459, |
|
"logits/rejected": 30.434642791748047, |
|
"logps/chosen": -133.32081604003906, |
|
"logps/rejected": -188.7599639892578, |
|
"loss": 0.2425, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -7.45855712890625, |
|
"rewards/margins": 5.378756999969482, |
|
"rewards/rejected": -12.837315559387207, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.932, |
|
"grad_norm": 4.788662000330739, |
|
"learning_rate": 7.009749855363456e-09, |
|
"logits/chosen": 30.91952896118164, |
|
"logits/rejected": 32.42715072631836, |
|
"logps/chosen": -133.62342834472656, |
|
"logps/rejected": -188.62930297851562, |
|
"loss": 0.3079, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -7.070608615875244, |
|
"rewards/margins": 5.031331539154053, |
|
"rewards/rejected": -12.101941108703613, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.936, |
|
"grad_norm": 10.233821689329831, |
|
"learning_rate": 6.2126614236091834e-09, |
|
"logits/chosen": 28.668411254882812, |
|
"logits/rejected": 32.377296447753906, |
|
"logps/chosen": -117.2125244140625, |
|
"logps/rejected": -195.70059204101562, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.799081325531006, |
|
"rewards/margins": 6.002202987670898, |
|
"rewards/rejected": -12.801284790039062, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 12.346515996086536, |
|
"learning_rate": 5.463099816548577e-09, |
|
"logits/chosen": 31.012216567993164, |
|
"logits/rejected": 35.09211349487305, |
|
"logps/chosen": -144.66915893554688, |
|
"logps/rejected": -219.4803009033203, |
|
"loss": 0.0968, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.998269557952881, |
|
"rewards/margins": 5.874829292297363, |
|
"rewards/rejected": -13.873098373413086, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.944, |
|
"grad_norm": 33.1665678427252, |
|
"learning_rate": 4.761211162702117e-09, |
|
"logits/chosen": 29.088903427124023, |
|
"logits/rejected": 34.32123947143555, |
|
"logps/chosen": -127.30374908447266, |
|
"logps/rejected": -211.43215942382812, |
|
"loss": 0.1457, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.1538214683532715, |
|
"rewards/margins": 6.435636043548584, |
|
"rewards/rejected": -13.589457511901855, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.948, |
|
"grad_norm": 23.425984241535982, |
|
"learning_rate": 4.107132296653548e-09, |
|
"logits/chosen": 28.269973754882812, |
|
"logits/rejected": 31.716510772705078, |
|
"logps/chosen": -125.16642761230469, |
|
"logps/rejected": -197.10952758789062, |
|
"loss": 0.1364, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.0634284019470215, |
|
"rewards/margins": 5.970315933227539, |
|
"rewards/rejected": -13.033744812011719, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.952, |
|
"grad_norm": 10.233047916013119, |
|
"learning_rate": 3.5009907323737818e-09, |
|
"logits/chosen": 32.33917999267578, |
|
"logits/rejected": 31.942947387695312, |
|
"logps/chosen": -150.05201721191406, |
|
"logps/rejected": -198.63037109375, |
|
"loss": 0.1165, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.604508876800537, |
|
"rewards/margins": 5.146544933319092, |
|
"rewards/rejected": -12.751053810119629, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.956, |
|
"grad_norm": 43.68344404125229, |
|
"learning_rate": 2.9429046383618038e-09, |
|
"logits/chosen": 32.400047302246094, |
|
"logits/rejected": 32.68498992919922, |
|
"logps/chosen": -131.60537719726562, |
|
"logps/rejected": -197.2787628173828, |
|
"loss": 0.1184, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.099400520324707, |
|
"rewards/margins": 6.27431058883667, |
|
"rewards/rejected": -13.373710632324219, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 59.46460968091823, |
|
"learning_rate": 2.4329828146074096e-09, |
|
"logits/chosen": 30.623287200927734, |
|
"logits/rejected": 32.035682678222656, |
|
"logps/chosen": -131.70294189453125, |
|
"logps/rejected": -198.9145050048828, |
|
"loss": 0.099, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.071308135986328, |
|
"rewards/margins": 6.146972179412842, |
|
"rewards/rejected": -13.218279838562012, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.964, |
|
"grad_norm": 56.79097277412103, |
|
"learning_rate": 1.9713246713805587e-09, |
|
"logits/chosen": 31.465078353881836, |
|
"logits/rejected": 34.087913513183594, |
|
"logps/chosen": -137.09597778320312, |
|
"logps/rejected": -203.9413604736328, |
|
"loss": 0.1021, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.499648094177246, |
|
"rewards/margins": 5.972261905670166, |
|
"rewards/rejected": -13.47191047668457, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.968, |
|
"grad_norm": 0.8585461059925868, |
|
"learning_rate": 1.5580202098509076e-09, |
|
"logits/chosen": 29.60024070739746, |
|
"logits/rejected": 32.122642517089844, |
|
"logps/chosen": -138.75808715820312, |
|
"logps/rejected": -205.38681030273438, |
|
"loss": 0.1414, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.920256614685059, |
|
"rewards/margins": 5.451854228973389, |
|
"rewards/rejected": -13.372111320495605, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.972, |
|
"grad_norm": 6.596142925147252, |
|
"learning_rate": 1.1931500045422038e-09, |
|
"logits/chosen": 27.959314346313477, |
|
"logits/rejected": 32.65019226074219, |
|
"logps/chosen": -126.5806655883789, |
|
"logps/rejected": -199.76040649414062, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.968267917633057, |
|
"rewards/margins": 5.849569797515869, |
|
"rewards/rejected": -12.817837715148926, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.976, |
|
"grad_norm": 35.62534557996734, |
|
"learning_rate": 8.767851876239074e-10, |
|
"logits/chosen": 32.697261810302734, |
|
"logits/rejected": 35.91864013671875, |
|
"logps/chosen": -123.58503723144531, |
|
"logps/rejected": -206.4244842529297, |
|
"loss": 0.1628, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.5748610496521, |
|
"rewards/margins": 6.952831268310547, |
|
"rewards/rejected": -13.527691841125488, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 27.658485623502656, |
|
"learning_rate": 6.089874350439505e-10, |
|
"logits/chosen": 31.490856170654297, |
|
"logits/rejected": 32.93403625488281, |
|
"logps/chosen": -138.20703125, |
|
"logps/rejected": -193.5495147705078, |
|
"loss": 0.216, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -7.787785530090332, |
|
"rewards/margins": 4.913016319274902, |
|
"rewards/rejected": -12.700800895690918, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.984, |
|
"grad_norm": 0.08797343919690617, |
|
"learning_rate": 3.898089545047445e-10, |
|
"logits/chosen": 31.071331024169922, |
|
"logits/rejected": 32.911354064941406, |
|
"logps/chosen": -130.91525268554688, |
|
"logps/rejected": -205.3039093017578, |
|
"loss": 0.0549, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.0166826248168945, |
|
"rewards/margins": 6.3437819480896, |
|
"rewards/rejected": -13.360466003417969, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.988, |
|
"grad_norm": 1.2212789051560087, |
|
"learning_rate": 2.1929247528540418e-10, |
|
"logits/chosen": 28.965045928955078, |
|
"logits/rejected": 32.625064849853516, |
|
"logps/chosen": -127.5809326171875, |
|
"logps/rejected": -214.167724609375, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.630097389221191, |
|
"rewards/margins": 7.368367671966553, |
|
"rewards/rejected": -13.998464584350586, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 91.06898861755354, |
|
"learning_rate": 9.747123991141193e-11, |
|
"logits/chosen": 30.026744842529297, |
|
"logits/rejected": 32.432098388671875, |
|
"logps/chosen": -131.2405548095703, |
|
"logps/rejected": -194.84970092773438, |
|
"loss": 0.2616, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -7.285294532775879, |
|
"rewards/margins": 5.499347686767578, |
|
"rewards/rejected": -12.78464126586914, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.996, |
|
"grad_norm": 89.82748805014613, |
|
"learning_rate": 2.4368997673940294e-11, |
|
"logits/chosen": 29.70314598083496, |
|
"logits/rejected": 32.16979217529297, |
|
"logps/chosen": -126.3776626586914, |
|
"logps/rejected": -199.11517333984375, |
|
"loss": 0.174, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.552847385406494, |
|
"rewards/margins": 6.022560119628906, |
|
"rewards/rejected": -13.575407028198242, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 18.11358569727584, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 30.229150772094727, |
|
"logits/rejected": 34.69512176513672, |
|
"logps/chosen": -133.08348083496094, |
|
"logps/rejected": -207.73001098632812, |
|
"loss": 0.1378, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.222729682922363, |
|
"rewards/margins": 5.665229320526123, |
|
"rewards/rejected": -12.887959480285645, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2500, |
|
"total_flos": 0.0, |
|
"train_loss": 0.26106402876377105, |
|
"train_runtime": 17697.4864, |
|
"train_samples_per_second": 1.13, |
|
"train_steps_per_second": 0.141 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|