{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 3821, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.6109660574412532e-09, "logits/chosen": -1.3665199279785156, "logits/rejected": -1.22934889793396, "logps/chosen": -4618.75, "logps/rejected": -2311.76708984375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.610966057441253e-08, "logits/chosen": -1.2836003303527832, "logits/rejected": -1.2451469898223877, "logps/chosen": -2991.450439453125, "logps/rejected": -2506.193603515625, "loss": 0.7002, "rewards/accuracies": 0.3611111044883728, "rewards/chosen": -0.003100518137216568, "rewards/margins": -0.0017652130918577313, "rewards/rejected": -0.0013353050453588367, "step": 10 }, { "epoch": 0.01, "learning_rate": 5.221932114882506e-08, "logits/chosen": -1.289717435836792, "logits/rejected": -1.2764991521835327, "logps/chosen": -2514.9619140625, "logps/rejected": -2609.55224609375, "loss": 0.6925, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.004010486416518688, "rewards/margins": 0.0017830505967140198, "rewards/rejected": 0.0022274362854659557, "step": 20 }, { "epoch": 0.01, "learning_rate": 7.83289817232376e-08, "logits/chosen": -1.2992498874664307, "logits/rejected": -1.2743134498596191, "logps/chosen": -2825.876220703125, "logps/rejected": -2155.24853515625, "loss": 0.6922, "rewards/accuracies": 0.375, "rewards/chosen": 0.008475597016513348, "rewards/margins": -0.002866474213078618, "rewards/rejected": 0.011342070996761322, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.0443864229765012e-07, "logits/chosen": -1.2443135976791382, "logits/rejected": -1.2479488849639893, "logps/chosen": -2863.024169921875, "logps/rejected": -2658.76806640625, "loss": 0.688, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.04540400952100754, "rewards/margins": 0.002538739936426282, "rewards/rejected": 0.042865268886089325, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.3054830287206266e-07, "logits/chosen": -1.2135803699493408, "logits/rejected": -1.2339200973510742, "logps/chosen": -2587.803466796875, "logps/rejected": -2387.272705078125, "loss": 0.7147, "rewards/accuracies": 0.5, "rewards/chosen": 0.11740453541278839, "rewards/margins": 0.009207578375935555, "rewards/rejected": 0.10819695144891739, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.566579634464752e-07, "logits/chosen": -1.1599149703979492, "logits/rejected": -1.2010291814804077, "logps/chosen": -2478.61767578125, "logps/rejected": -2504.965087890625, "loss": 0.687, "rewards/accuracies": 0.5, "rewards/chosen": 0.17490772902965546, "rewards/margins": 0.014275921508669853, "rewards/rejected": 0.16063180565834045, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.8276762402088773e-07, "logits/chosen": -1.2249014377593994, "logits/rejected": -1.1395671367645264, "logps/chosen": -1990.302734375, "logps/rejected": -1826.503662109375, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.20387442409992218, "rewards/margins": 0.01711631938815117, "rewards/rejected": 0.18675807118415833, "step": 70 }, { "epoch": 0.02, "learning_rate": 2.0887728459530023e-07, "logits/chosen": -1.1765159368515015, "logits/rejected": -1.061958909034729, "logps/chosen": -2488.86083984375, "logps/rejected": -2211.66748046875, "loss": 0.6906, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.27337607741355896, "rewards/margins": 0.05351179838180542, "rewards/rejected": 0.21986432373523712, "step": 80 }, { "epoch": 0.02, "learning_rate": 2.349869451697128e-07, "logits/chosen": -1.2903188467025757, "logits/rejected": -1.1929465532302856, "logps/chosen": -2553.20556640625, "logps/rejected": -1782.565185546875, "loss": 0.6618, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.3111152946949005, "rewards/margins": 0.10488128662109375, "rewards/rejected": 0.20623397827148438, "step": 90 }, { "epoch": 0.03, "learning_rate": 2.610966057441253e-07, "logits/chosen": -1.2507898807525635, "logits/rejected": -1.149072289466858, "logps/chosen": -2592.64892578125, "logps/rejected": -2194.07470703125, "loss": 0.6694, "rewards/accuracies": 0.625, "rewards/chosen": 0.3974040448665619, "rewards/margins": 0.13847002387046814, "rewards/rejected": 0.25893402099609375, "step": 100 }, { "epoch": 0.03, "eval_logits/chosen": -1.246272087097168, "eval_logits/rejected": -1.2047406435012817, "eval_logps/chosen": -2626.098388671875, "eval_logps/rejected": -2193.6435546875, "eval_loss": 0.6733300685882568, "eval_rewards/accuracies": 0.550000011920929, "eval_rewards/chosen": 0.4667675495147705, "eval_rewards/margins": 0.09803615510463715, "eval_rewards/rejected": 0.36873137950897217, "eval_runtime": 271.9101, "eval_samples_per_second": 7.355, "eval_steps_per_second": 0.46, "step": 100 }, { "epoch": 0.03, "learning_rate": 2.8720626631853785e-07, "logits/chosen": -1.2401742935180664, "logits/rejected": -1.270200490951538, "logps/chosen": -2110.267822265625, "logps/rejected": -2461.110595703125, "loss": 0.7004, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.35654813051223755, "rewards/margins": -0.08482155203819275, "rewards/rejected": 0.4413697123527527, "step": 110 }, { "epoch": 0.03, "learning_rate": 3.133159268929504e-07, "logits/chosen": -1.2979562282562256, "logits/rejected": -1.2625576257705688, "logps/chosen": -2472.685302734375, "logps/rejected": -2142.302490234375, "loss": 0.6585, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.37470149993896484, "rewards/margins": 0.10966293513774872, "rewards/rejected": 0.26503854990005493, "step": 120 }, { "epoch": 0.03, "learning_rate": 3.3942558746736286e-07, "logits/chosen": -1.2801696062088013, "logits/rejected": -1.18109130859375, "logps/chosen": -2593.06103515625, "logps/rejected": -2058.24267578125, "loss": 0.6809, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.453033447265625, "rewards/margins": 0.13208410143852234, "rewards/rejected": 0.3209493160247803, "step": 130 }, { "epoch": 0.04, "learning_rate": 3.6553524804177545e-07, "logits/chosen": -1.230254054069519, "logits/rejected": -1.23989737033844, "logps/chosen": -2341.89990234375, "logps/rejected": -2458.319580078125, "loss": 0.6742, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.476696252822876, "rewards/margins": 0.016019124537706375, "rewards/rejected": 0.4606771469116211, "step": 140 }, { "epoch": 0.04, "learning_rate": 3.91644908616188e-07, "logits/chosen": -1.2208526134490967, "logits/rejected": -1.2657601833343506, "logps/chosen": -2545.86474609375, "logps/rejected": -2762.395751953125, "loss": 0.6924, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.4996720850467682, "rewards/margins": -0.10669504106044769, "rewards/rejected": 0.6063671112060547, "step": 150 }, { "epoch": 0.04, "learning_rate": 4.1775456919060046e-07, "logits/chosen": -1.2546348571777344, "logits/rejected": -1.2102447748184204, "logps/chosen": -2498.013671875, "logps/rejected": -1981.0, "loss": 0.6611, "rewards/accuracies": 0.625, "rewards/chosen": 0.5435835123062134, "rewards/margins": 0.1602788269519806, "rewards/rejected": 0.3833047151565552, "step": 160 }, { "epoch": 0.04, "learning_rate": 4.4386422976501305e-07, "logits/chosen": -1.1585718393325806, "logits/rejected": -1.1300784349441528, "logps/chosen": -2143.332763671875, "logps/rejected": -1890.7896728515625, "loss": 0.6694, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.3146580457687378, "rewards/margins": 0.0533723309636116, "rewards/rejected": 0.2612857222557068, "step": 170 }, { "epoch": 0.05, "learning_rate": 4.699738903394256e-07, "logits/chosen": -1.2575573921203613, "logits/rejected": -1.2878539562225342, "logps/chosen": -2350.91015625, "logps/rejected": -2002.0416259765625, "loss": 0.6549, "rewards/accuracies": 0.5, "rewards/chosen": 0.4910794794559479, "rewards/margins": 0.04307179898023605, "rewards/rejected": 0.4480076730251312, "step": 180 }, { "epoch": 0.05, "learning_rate": 4.960835509138381e-07, "logits/chosen": -1.255906105041504, "logits/rejected": -1.2742435932159424, "logps/chosen": -1905.1875, "logps/rejected": -1874.7750244140625, "loss": 0.6564, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.6333888173103333, "rewards/margins": 0.05971439927816391, "rewards/rejected": 0.5736743211746216, "step": 190 }, { "epoch": 0.05, "learning_rate": 5.221932114882506e-07, "logits/chosen": -1.2323532104492188, "logits/rejected": -1.2203179597854614, "logps/chosen": -2919.02880859375, "logps/rejected": -2549.182373046875, "loss": 0.6496, "rewards/accuracies": 0.5, "rewards/chosen": 0.9707019925117493, "rewards/margins": 0.10560061037540436, "rewards/rejected": 0.8651013374328613, "step": 200 }, { "epoch": 0.05, "eval_logits/chosen": -1.2029674053192139, "eval_logits/rejected": -1.162118911743164, "eval_logps/chosen": -2583.427001953125, "eval_logps/rejected": -2164.738525390625, "eval_loss": 0.6496742367744446, "eval_rewards/accuracies": 0.6039999723434448, "eval_rewards/chosen": 0.893484890460968, "eval_rewards/margins": 0.23570162057876587, "eval_rewards/rejected": 0.6577833294868469, "eval_runtime": 276.0725, "eval_samples_per_second": 7.244, "eval_steps_per_second": 0.453, "step": 200 }, { "epoch": 0.05, "learning_rate": 5.483028720626631e-07, "logits/chosen": -1.2817461490631104, "logits/rejected": -1.21957266330719, "logps/chosen": -2260.506591796875, "logps/rejected": -2005.905029296875, "loss": 0.7114, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.6090816259384155, "rewards/margins": 0.037415795028209686, "rewards/rejected": 0.5716658234596252, "step": 210 }, { "epoch": 0.06, "learning_rate": 5.744125326370757e-07, "logits/chosen": -1.24350106716156, "logits/rejected": -1.1972558498382568, "logps/chosen": -2426.041748046875, "logps/rejected": -2175.470458984375, "loss": 0.6441, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.5010141134262085, "rewards/margins": 0.11551054567098618, "rewards/rejected": 0.3855035901069641, "step": 220 }, { "epoch": 0.06, "learning_rate": 6.005221932114882e-07, "logits/chosen": -1.1981332302093506, "logits/rejected": -1.1106714010238647, "logps/chosen": -2660.1396484375, "logps/rejected": -2036.3939208984375, "loss": 0.6577, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.5493336915969849, "rewards/margins": 0.2223406583070755, "rewards/rejected": 0.3269929885864258, "step": 230 }, { "epoch": 0.06, "learning_rate": 6.266318537859008e-07, "logits/chosen": -1.2315361499786377, "logits/rejected": -1.2117688655853271, "logps/chosen": -2486.411865234375, "logps/rejected": -2081.799072265625, "loss": 0.6941, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.795777440071106, "rewards/margins": 0.1994515359401703, "rewards/rejected": 0.5963259339332581, "step": 240 }, { "epoch": 0.07, "learning_rate": 6.527415143603134e-07, "logits/chosen": -1.2543888092041016, "logits/rejected": -1.2065962553024292, "logps/chosen": -2412.635986328125, "logps/rejected": -2150.365234375, "loss": 0.6646, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.5733842849731445, "rewards/margins": 0.214861199259758, "rewards/rejected": 0.3585231602191925, "step": 250 }, { "epoch": 0.07, "learning_rate": 6.788511749347257e-07, "logits/chosen": -1.3239953517913818, "logits/rejected": -1.237247109413147, "logps/chosen": -3097.52001953125, "logps/rejected": -2403.084716796875, "loss": 0.6408, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.7425528168678284, "rewards/margins": 0.21805015206336975, "rewards/rejected": 0.5245026350021362, "step": 260 }, { "epoch": 0.07, "learning_rate": 7.049608355091383e-07, "logits/chosen": -1.3785080909729004, "logits/rejected": -1.310682773590088, "logps/chosen": -2970.88916015625, "logps/rejected": -2465.4521484375, "loss": 0.6973, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.8264948725700378, "rewards/margins": 0.1802043616771698, "rewards/rejected": 0.6462904214859009, "step": 270 }, { "epoch": 0.07, "learning_rate": 7.310704960835509e-07, "logits/chosen": -1.3364454507827759, "logits/rejected": -1.2810570001602173, "logps/chosen": -1845.6207275390625, "logps/rejected": -1552.9630126953125, "loss": 0.648, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.5993987321853638, "rewards/margins": 0.21035853028297424, "rewards/rejected": 0.38904014229774475, "step": 280 }, { "epoch": 0.08, "learning_rate": 7.571801566579634e-07, "logits/chosen": -1.350436806678772, "logits/rejected": -1.248679757118225, "logps/chosen": -2682.272705078125, "logps/rejected": -2016.766845703125, "loss": 0.6367, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.896868109703064, "rewards/margins": 0.3260273337364197, "rewards/rejected": 0.5708408951759338, "step": 290 }, { "epoch": 0.08, "learning_rate": 7.83289817232376e-07, "logits/chosen": -1.371492862701416, "logits/rejected": -1.3663508892059326, "logps/chosen": -2637.647705078125, "logps/rejected": -2172.9091796875, "loss": 0.6358, "rewards/accuracies": 0.625, "rewards/chosen": 0.6898142099380493, "rewards/margins": 0.30571404099464417, "rewards/rejected": 0.38410019874572754, "step": 300 }, { "epoch": 0.08, "eval_logits/chosen": -1.261673092842102, "eval_logits/rejected": -1.220168948173523, "eval_logps/chosen": -2605.7470703125, "eval_logps/rejected": -2186.15283203125, "eval_loss": 0.6671658158302307, "eval_rewards/accuracies": 0.5899999737739563, "eval_rewards/chosen": 0.6702810525894165, "eval_rewards/margins": 0.22664184868335724, "eval_rewards/rejected": 0.44363921880722046, "eval_runtime": 276.9466, "eval_samples_per_second": 7.222, "eval_steps_per_second": 0.451, "step": 300 }, { "epoch": 0.08, "learning_rate": 8.093994778067885e-07, "logits/chosen": -1.2656062841415405, "logits/rejected": -1.1738948822021484, "logps/chosen": -2773.645751953125, "logps/rejected": -2556.039306640625, "loss": 0.6351, "rewards/accuracies": 0.625, "rewards/chosen": 0.5250165462493896, "rewards/margins": 0.3903385102748871, "rewards/rejected": 0.13467800617218018, "step": 310 }, { "epoch": 0.08, "learning_rate": 8.355091383812009e-07, "logits/chosen": -1.2407519817352295, "logits/rejected": -1.159961462020874, "logps/chosen": -2324.83154296875, "logps/rejected": -2033.7972412109375, "loss": 0.6704, "rewards/accuracies": 0.5, "rewards/chosen": 0.46215081214904785, "rewards/margins": 0.13348433375358582, "rewards/rejected": 0.32866644859313965, "step": 320 }, { "epoch": 0.09, "learning_rate": 8.616187989556135e-07, "logits/chosen": -1.1832590103149414, "logits/rejected": -1.2228165864944458, "logps/chosen": -2444.755859375, "logps/rejected": -2273.108154296875, "loss": 0.7001, "rewards/accuracies": 0.5, "rewards/chosen": 0.41592496633529663, "rewards/margins": -0.03532214090228081, "rewards/rejected": 0.45124712586402893, "step": 330 }, { "epoch": 0.09, "learning_rate": 8.877284595300261e-07, "logits/chosen": -1.3150399923324585, "logits/rejected": -1.2690991163253784, "logps/chosen": -2488.77099609375, "logps/rejected": -2145.29248046875, "loss": 0.6355, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.3536873757839203, "rewards/margins": 0.19547489285469055, "rewards/rejected": 0.15821249783039093, "step": 340 }, { "epoch": 0.09, "learning_rate": 9.138381201044386e-07, "logits/chosen": -1.3865059614181519, "logits/rejected": -1.3378633260726929, "logps/chosen": -2283.524169921875, "logps/rejected": -2091.947265625, "loss": 0.6695, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.8051117658615112, "rewards/margins": 0.2420201301574707, "rewards/rejected": 0.5630916357040405, "step": 350 }, { "epoch": 0.09, "learning_rate": 9.399477806788512e-07, "logits/chosen": -1.414058804512024, "logits/rejected": -1.3964297771453857, "logps/chosen": -2432.849365234375, "logps/rejected": -2076.60107421875, "loss": 0.6743, "rewards/accuracies": 0.625, "rewards/chosen": 1.3647016286849976, "rewards/margins": 0.44374021887779236, "rewards/rejected": 0.9209613800048828, "step": 360 }, { "epoch": 0.1, "learning_rate": 9.660574412532637e-07, "logits/chosen": -1.3959238529205322, "logits/rejected": -1.378598928451538, "logps/chosen": -2425.37548828125, "logps/rejected": -1807.110107421875, "loss": 0.6755, "rewards/accuracies": 0.625, "rewards/chosen": 0.5852380990982056, "rewards/margins": 0.19205673038959503, "rewards/rejected": 0.39318135380744934, "step": 370 }, { "epoch": 0.1, "learning_rate": 9.921671018276761e-07, "logits/chosen": -1.3709585666656494, "logits/rejected": -1.2900068759918213, "logps/chosen": -2094.26220703125, "logps/rejected": -2028.6328125, "loss": 0.6926, "rewards/accuracies": 0.625, "rewards/chosen": 0.4213894009590149, "rewards/margins": 0.14318980276584625, "rewards/rejected": 0.27819958329200745, "step": 380 }, { "epoch": 0.1, "learning_rate": 9.999897712489534e-07, "logits/chosen": -1.5162689685821533, "logits/rejected": -1.4461922645568848, "logps/chosen": -2573.37890625, "logps/rejected": -2211.701171875, "loss": 0.656, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.8437172174453735, "rewards/margins": 0.25329241156578064, "rewards/rejected": 0.5904248356819153, "step": 390 }, { "epoch": 0.1, "learning_rate": 9.999396722513154e-07, "logits/chosen": -1.3767322301864624, "logits/rejected": -1.356715440750122, "logps/chosen": -2870.7294921875, "logps/rejected": -2530.47119140625, "loss": 0.6783, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.4322354197502136, "rewards/margins": 0.2268580198287964, "rewards/rejected": 0.20537741482257843, "step": 400 }, { "epoch": 0.1, "eval_logits/chosen": -1.4016751050949097, "eval_logits/rejected": -1.3598042726516724, "eval_logps/chosen": -2644.43896484375, "eval_logps/rejected": -2221.267578125, "eval_loss": 0.7143814563751221, "eval_rewards/accuracies": 0.5680000185966492, "eval_rewards/chosen": 0.2833646237850189, "eval_rewards/margins": 0.190872922539711, "eval_rewards/rejected": 0.09249173104763031, "eval_runtime": 272.9743, "eval_samples_per_second": 7.327, "eval_steps_per_second": 0.458, "step": 400 }, { "epoch": 0.11, "learning_rate": 9.99847828434916e-07, "logits/chosen": -1.4666458368301392, "logits/rejected": -1.4085910320281982, "logps/chosen": -2439.627197265625, "logps/rejected": -1939.951904296875, "loss": 0.6728, "rewards/accuracies": 0.625, "rewards/chosen": 0.6307787895202637, "rewards/margins": 0.3261147141456604, "rewards/rejected": 0.3046640455722809, "step": 410 }, { "epoch": 0.11, "learning_rate": 9.99714247468688e-07, "logits/chosen": -1.6231820583343506, "logits/rejected": -1.5343637466430664, "logps/chosen": -2322.893798828125, "logps/rejected": -1743.1722412109375, "loss": 0.6625, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 1.083807110786438, "rewards/margins": 0.2774105966091156, "rewards/rejected": 0.8063966035842896, "step": 420 }, { "epoch": 0.11, "learning_rate": 9.995389405066031e-07, "logits/chosen": -1.5276827812194824, "logits/rejected": -1.5160127878189087, "logps/chosen": -2009.1494140625, "logps/rejected": -1984.708251953125, "loss": 0.6479, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.9781614542007446, "rewards/margins": 0.3814542889595032, "rewards/rejected": 0.5967071056365967, "step": 430 }, { "epoch": 0.12, "learning_rate": 9.993219221867424e-07, "logits/chosen": -1.6460065841674805, "logits/rejected": -1.618194818496704, "logps/chosen": -3029.74072265625, "logps/rejected": -2566.05126953125, "loss": 0.6847, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.7843648195266724, "rewards/margins": 0.4065024256706238, "rewards/rejected": 0.3778623044490814, "step": 440 }, { "epoch": 0.12, "learning_rate": 9.990632106300731e-07, "logits/chosen": -1.6142499446868896, "logits/rejected": -1.526350975036621, "logps/chosen": -2914.735107421875, "logps/rejected": -2487.213623046875, "loss": 0.7076, "rewards/accuracies": 0.5, "rewards/chosen": 0.6956815123558044, "rewards/margins": 0.29298099875450134, "rewards/rejected": 0.4027004837989807, "step": 450 }, { "epoch": 0.12, "learning_rate": 9.98762827438936e-07, "logits/chosen": -1.636639952659607, "logits/rejected": -1.5618512630462646, "logps/chosen": -2808.490478515625, "logps/rejected": -2267.61962890625, "loss": 0.755, "rewards/accuracies": 0.5, "rewards/chosen": 0.7195907831192017, "rewards/margins": 0.28500932455062866, "rewards/rejected": 0.434581458568573, "step": 460 }, { "epoch": 0.12, "learning_rate": 9.98420797695241e-07, "logits/chosen": -1.7694892883300781, "logits/rejected": -1.6265687942504883, "logps/chosen": -2040.9447021484375, "logps/rejected": -1787.8951416015625, "loss": 0.6847, "rewards/accuracies": 0.625, "rewards/chosen": 0.03115728497505188, "rewards/margins": 0.17865832149982452, "rewards/rejected": -0.14750102162361145, "step": 470 }, { "epoch": 0.13, "learning_rate": 9.980371499583729e-07, "logits/chosen": -1.6505523920059204, "logits/rejected": -1.5521347522735596, "logps/chosen": -2404.302490234375, "logps/rejected": -1731.2249755859375, "loss": 0.7161, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.9065849184989929, "rewards/margins": 0.35126957297325134, "rewards/rejected": 0.555315375328064, "step": 480 }, { "epoch": 0.13, "learning_rate": 9.976119162628079e-07, "logits/chosen": -1.6144253015518188, "logits/rejected": -1.5651835203170776, "logps/chosen": -2368.7080078125, "logps/rejected": -2117.01611328125, "loss": 0.6833, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.7247565388679504, "rewards/margins": 0.07497567683458328, "rewards/rejected": 0.6497808694839478, "step": 490 }, { "epoch": 0.13, "learning_rate": 9.971451321154368e-07, "logits/chosen": -1.65665602684021, "logits/rejected": -1.6473827362060547, "logps/chosen": -2541.382568359375, "logps/rejected": -2192.03369140625, "loss": 0.751, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 1.047736406326294, "rewards/margins": 0.14227107167243958, "rewards/rejected": 0.9054654240608215, "step": 500 }, { "epoch": 0.13, "eval_logits/chosen": -1.5418598651885986, "eval_logits/rejected": -1.4749985933303833, "eval_logps/chosen": -2538.240478515625, "eval_logps/rejected": -2132.940185546875, "eval_loss": 0.6888701319694519, "eval_rewards/accuracies": 0.6019999980926514, "eval_rewards/chosen": 1.3453459739685059, "eval_rewards/margins": 0.3695811927318573, "eval_rewards/rejected": 0.9757645726203918, "eval_runtime": 276.6017, "eval_samples_per_second": 7.231, "eval_steps_per_second": 0.452, "step": 500 }, { "epoch": 0.13, "learning_rate": 9.966368364926017e-07, "logits/chosen": -1.6798099279403687, "logits/rejected": -1.6286773681640625, "logps/chosen": -2536.083984375, "logps/rejected": -2251.199951171875, "loss": 0.6877, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.9014034271240234, "rewards/margins": 0.2098127156496048, "rewards/rejected": 0.6915906667709351, "step": 510 }, { "epoch": 0.14, "learning_rate": 9.960870718368407e-07, "logits/chosen": -1.5539346933364868, "logits/rejected": -1.4379912614822388, "logps/chosen": -3408.21044921875, "logps/rejected": -2854.2998046875, "loss": 0.7279, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.7003393769264221, "rewards/margins": 0.01242439728230238, "rewards/rejected": 0.6879148483276367, "step": 520 }, { "epoch": 0.14, "learning_rate": 9.954958840533446e-07, "logits/chosen": -1.5695605278015137, "logits/rejected": -1.5862843990325928, "logps/chosen": -2218.409423828125, "logps/rejected": -2106.035888671875, "loss": 0.6549, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.5118662118911743, "rewards/margins": 0.12286017835140228, "rewards/rejected": 0.3890060782432556, "step": 530 }, { "epoch": 0.14, "learning_rate": 9.948633225061229e-07, "logits/chosen": -1.6302436590194702, "logits/rejected": -1.5741255283355713, "logps/chosen": -3048.370361328125, "logps/rejected": -2719.57666015625, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.4593297839164734, "rewards/margins": 0.29948073625564575, "rewards/rejected": 0.15984904766082764, "step": 540 }, { "epoch": 0.14, "learning_rate": 9.94189440013883e-07, "logits/chosen": -1.4814682006835938, "logits/rejected": -1.423117995262146, "logps/chosen": -2182.054931640625, "logps/rejected": -2035.183837890625, "loss": 0.6641, "rewards/accuracies": 0.625, "rewards/chosen": 0.7267901301383972, "rewards/margins": 0.27085989713668823, "rewards/rejected": 0.45593029260635376, "step": 550 }, { "epoch": 0.15, "learning_rate": 9.93474292845619e-07, "logits/chosen": -1.4905208349227905, "logits/rejected": -1.429957628250122, "logps/chosen": -2842.08740234375, "logps/rejected": -2393.443115234375, "loss": 0.6643, "rewards/accuracies": 0.625, "rewards/chosen": 1.1947301626205444, "rewards/margins": 0.3226833939552307, "rewards/rejected": 0.8720466494560242, "step": 560 }, { "epoch": 0.15, "learning_rate": 9.927179407159138e-07, "logits/chosen": -1.5065213441848755, "logits/rejected": -1.4873453378677368, "logps/chosen": -2420.403076171875, "logps/rejected": -2188.166015625, "loss": 0.6903, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.7575017213821411, "rewards/margins": 0.20866632461547852, "rewards/rejected": 0.5488353967666626, "step": 570 }, { "epoch": 0.15, "learning_rate": 9.919204467799522e-07, "logits/chosen": -1.4751510620117188, "logits/rejected": -1.531491994857788, "logps/chosen": -1874.2708740234375, "logps/rejected": -2106.25830078125, "loss": 0.7035, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.7245445251464844, "rewards/margins": 0.1843484789133072, "rewards/rejected": 0.5401960611343384, "step": 580 }, { "epoch": 0.15, "learning_rate": 9.910818776282485e-07, "logits/chosen": -1.467524528503418, "logits/rejected": -1.3987579345703125, "logps/chosen": -2417.3818359375, "logps/rejected": -1983.3590087890625, "loss": 0.6849, "rewards/accuracies": 0.625, "rewards/chosen": 0.8371850848197937, "rewards/margins": 0.24946501851081848, "rewards/rejected": 0.5877200365066528, "step": 590 }, { "epoch": 0.16, "learning_rate": 9.902023032810858e-07, "logits/chosen": -1.2535181045532227, "logits/rejected": -1.306420087814331, "logps/chosen": -2412.548095703125, "logps/rejected": -2069.903564453125, "loss": 0.6921, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.7285448312759399, "rewards/margins": 0.16802072525024414, "rewards/rejected": 0.5605241060256958, "step": 600 }, { "epoch": 0.16, "eval_logits/chosen": -1.3380622863769531, "eval_logits/rejected": -1.2841229438781738, "eval_logps/chosen": -2588.1318359375, "eval_logps/rejected": -2176.009033203125, "eval_loss": 0.6643623113632202, "eval_rewards/accuracies": 0.621999979019165, "eval_rewards/chosen": 0.8464368581771851, "eval_rewards/margins": 0.30136004090309143, "eval_rewards/rejected": 0.5450767874717712, "eval_runtime": 271.1854, "eval_samples_per_second": 7.375, "eval_steps_per_second": 0.461, "step": 600 }, { "epoch": 0.16, "learning_rate": 9.892817971826687e-07, "logits/chosen": -1.4630873203277588, "logits/rejected": -1.3637323379516602, "logps/chosen": -3049.18017578125, "logps/rejected": -2472.287353515625, "loss": 0.6826, "rewards/accuracies": 0.625, "rewards/chosen": 0.8837827444076538, "rewards/margins": 0.23291194438934326, "rewards/rejected": 0.6508709192276001, "step": 610 }, { "epoch": 0.16, "learning_rate": 9.883204361949916e-07, "logits/chosen": -1.4127538204193115, "logits/rejected": -1.3411035537719727, "logps/chosen": -2637.56494140625, "logps/rejected": -2205.36328125, "loss": 0.7038, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.8256186246871948, "rewards/margins": 0.11079633235931396, "rewards/rejected": 0.7148222923278809, "step": 620 }, { "epoch": 0.16, "learning_rate": 9.873183005914202e-07, "logits/chosen": -1.4629735946655273, "logits/rejected": -1.4857399463653564, "logps/chosen": -2502.25732421875, "logps/rejected": -2328.2666015625, "loss": 0.6947, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.7244993448257446, "rewards/margins": 0.07214044034481049, "rewards/rejected": 0.652358889579773, "step": 630 }, { "epoch": 0.17, "learning_rate": 9.86275474049989e-07, "logits/chosen": -1.4567070007324219, "logits/rejected": -1.3931400775909424, "logps/chosen": -2890.63671875, "logps/rejected": -2213.41455078125, "loss": 0.7202, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.1594771146774292, "rewards/margins": 0.5113081932067871, "rewards/rejected": 0.6481689214706421, "step": 640 }, { "epoch": 0.17, "learning_rate": 9.851920436464144e-07, "logits/chosen": -1.3868637084960938, "logits/rejected": -1.364241361618042, "logps/chosen": -2136.292236328125, "logps/rejected": -1974.369384765625, "loss": 0.729, "rewards/accuracies": 0.5, "rewards/chosen": 0.8055019378662109, "rewards/margins": 0.1073065996170044, "rewards/rejected": 0.6981953978538513, "step": 650 }, { "epoch": 0.17, "learning_rate": 9.840680998468231e-07, "logits/chosen": -1.3056769371032715, "logits/rejected": -1.2759480476379395, "logps/chosen": -2620.52880859375, "logps/rejected": -2473.78369140625, "loss": 0.777, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.027517640963196754, "rewards/margins": 0.15573057532310486, "rewards/rejected": -0.12821291387081146, "step": 660 }, { "epoch": 0.18, "learning_rate": 9.82903736500199e-07, "logits/chosen": -1.4665504693984985, "logits/rejected": -1.394852638244629, "logps/chosen": -2662.813720703125, "logps/rejected": -2131.2236328125, "loss": 0.6739, "rewards/accuracies": 0.5, "rewards/chosen": 0.3333565294742584, "rewards/margins": 0.1078319326043129, "rewards/rejected": 0.22552458941936493, "step": 670 }, { "epoch": 0.18, "learning_rate": 9.81699050830546e-07, "logits/chosen": -1.7163680791854858, "logits/rejected": -1.6032377481460571, "logps/chosen": -2856.134033203125, "logps/rejected": -2047.651611328125, "loss": 0.6547, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.5594689846038818, "rewards/margins": 0.5581027865409851, "rewards/rejected": 1.001366138458252, "step": 680 }, { "epoch": 0.18, "learning_rate": 9.804541434287716e-07, "logits/chosen": -1.684930443763733, "logits/rejected": -1.6162173748016357, "logps/chosen": -2177.42919921875, "logps/rejected": -2031.7994384765625, "loss": 0.6615, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.99462890625, "rewards/margins": 0.3446377217769623, "rewards/rejected": 0.6499910950660706, "step": 690 }, { "epoch": 0.18, "learning_rate": 9.791691182442852e-07, "logits/chosen": -1.5711638927459717, "logits/rejected": -1.599442958831787, "logps/chosen": -2674.912353515625, "logps/rejected": -2773.044189453125, "loss": 0.6437, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.8802645802497864, "rewards/margins": 0.1743774712085724, "rewards/rejected": 0.7058870792388916, "step": 700 }, { "epoch": 0.18, "eval_logits/chosen": -1.4816925525665283, "eval_logits/rejected": -1.4526340961456299, "eval_logps/chosen": -2590.2763671875, "eval_logps/rejected": -2182.556640625, "eval_loss": 0.6724444627761841, "eval_rewards/accuracies": 0.6420000195503235, "eval_rewards/chosen": 0.8249886631965637, "eval_rewards/margins": 0.34538939595222473, "eval_rewards/rejected": 0.4795991778373718, "eval_runtime": 276.6062, "eval_samples_per_second": 7.23, "eval_steps_per_second": 0.452, "step": 700 }, { "epoch": 0.19, "learning_rate": 9.7784408257632e-07, "logits/chosen": -1.5237572193145752, "logits/rejected": -1.545637845993042, "logps/chosen": -1991.960693359375, "logps/rejected": -2176.53271484375, "loss": 0.6927, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.6580592393875122, "rewards/margins": 0.20243236422538757, "rewards/rejected": 0.455626904964447, "step": 710 }, { "epoch": 0.19, "learning_rate": 9.764791470649727e-07, "logits/chosen": -1.5924136638641357, "logits/rejected": -1.5551161766052246, "logps/chosen": -1903.432373046875, "logps/rejected": -1853.7086181640625, "loss": 0.6915, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.7805501222610474, "rewards/margins": 0.07067215442657471, "rewards/rejected": 0.7098779678344727, "step": 720 }, { "epoch": 0.19, "learning_rate": 9.750744256819658e-07, "logits/chosen": -1.620234727859497, "logits/rejected": -1.572274088859558, "logps/chosen": -2286.07470703125, "logps/rejected": -2140.6611328125, "loss": 0.7207, "rewards/accuracies": 0.5, "rewards/chosen": 0.7918757200241089, "rewards/margins": -0.18175740540027618, "rewards/rejected": 0.9736331701278687, "step": 730 }, { "epoch": 0.19, "learning_rate": 9.736300357211307e-07, "logits/chosen": -1.7339751720428467, "logits/rejected": -1.6665589809417725, "logps/chosen": -2919.00537109375, "logps/rejected": -2452.6064453125, "loss": 0.6461, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.843177318572998, "rewards/margins": 0.30998367071151733, "rewards/rejected": 0.5331936478614807, "step": 740 }, { "epoch": 0.2, "learning_rate": 9.721460977886135e-07, "logits/chosen": -1.6689131259918213, "logits/rejected": -1.630812644958496, "logps/chosen": -2635.65673828125, "logps/rejected": -2379.21826171875, "loss": 0.6245, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.748379647731781, "rewards/margins": 0.33323729038238525, "rewards/rejected": 0.4151424467563629, "step": 750 }, { "epoch": 0.2, "learning_rate": 9.706227357928043e-07, "logits/chosen": -1.6714589595794678, "logits/rejected": -1.5527595281600952, "logps/chosen": -2750.80908203125, "logps/rejected": -2120.307373046875, "loss": 0.6817, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.7476462125778198, "rewards/margins": 0.1452573537826538, "rewards/rejected": 0.6023889183998108, "step": 760 }, { "epoch": 0.2, "learning_rate": 9.690600769339914e-07, "logits/chosen": -1.5319817066192627, "logits/rejected": -1.5909537076950073, "logps/chosen": -2279.741943359375, "logps/rejected": -2419.86572265625, "loss": 0.7147, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 1.177549958229065, "rewards/margins": 0.08217627555131912, "rewards/rejected": 1.0953737497329712, "step": 770 }, { "epoch": 0.2, "learning_rate": 9.6745825169374e-07, "logits/chosen": -1.6878665685653687, "logits/rejected": -1.612125039100647, "logps/chosen": -2866.05712890625, "logps/rejected": -2377.9013671875, "loss": 0.6833, "rewards/accuracies": 0.625, "rewards/chosen": 0.9150659441947937, "rewards/margins": 0.4293249547481537, "rewards/rejected": 0.4857410788536072, "step": 780 }, { "epoch": 0.21, "learning_rate": 9.658173938239966e-07, "logits/chosen": -1.6150919198989868, "logits/rejected": -1.576047420501709, "logps/chosen": -2437.158935546875, "logps/rejected": -1969.7994384765625, "loss": 0.6852, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.29879194498062134, "rewards/margins": 0.09543965756893158, "rewards/rejected": 0.20335224270820618, "step": 790 }, { "epoch": 0.21, "learning_rate": 9.64137640335921e-07, "logits/chosen": -1.7476589679718018, "logits/rejected": -1.7255117893218994, "logps/chosen": -2327.221923828125, "logps/rejected": -2246.793701171875, "loss": 0.8109, "rewards/accuracies": 0.625, "rewards/chosen": 0.8766447901725769, "rewards/margins": 0.11921733617782593, "rewards/rejected": 0.7574275732040405, "step": 800 }, { "epoch": 0.21, "eval_logits/chosen": -1.576106309890747, "eval_logits/rejected": -1.5267276763916016, "eval_logps/chosen": -2557.870849609375, "eval_logps/rejected": -2155.783203125, "eval_loss": 0.6654534339904785, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": 1.1490436792373657, "eval_rewards/margins": 0.40170982480049133, "eval_rewards/rejected": 0.7473338842391968, "eval_runtime": 276.556, "eval_samples_per_second": 7.232, "eval_steps_per_second": 0.452, "step": 800 }, { "epoch": 0.21, "learning_rate": 9.624191314884461e-07, "logits/chosen": -1.592280626296997, "logits/rejected": -1.648185133934021, "logps/chosen": -2357.55517578125, "logps/rejected": -2246.37744140625, "loss": 0.8166, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.5510924458503723, "rewards/margins": -0.10927625000476837, "rewards/rejected": 0.6603686809539795, "step": 810 }, { "epoch": 0.21, "learning_rate": 9.606620107765662e-07, "logits/chosen": -1.6358171701431274, "logits/rejected": -1.5666896104812622, "logps/chosen": -2290.347900390625, "logps/rejected": -2105.61474609375, "loss": 0.73, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.5445422530174255, "rewards/margins": 0.03851698711514473, "rewards/rejected": 0.5060251951217651, "step": 820 }, { "epoch": 0.22, "learning_rate": 9.58866424919355e-07, "logits/chosen": -1.5668513774871826, "logits/rejected": -1.510426640510559, "logps/chosen": -2239.77783203125, "logps/rejected": -1951.4498291015625, "loss": 0.5974, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.8219590187072754, "rewards/margins": 0.4823435842990875, "rewards/rejected": 0.3396154046058655, "step": 830 }, { "epoch": 0.22, "learning_rate": 9.570325238477148e-07, "logits/chosen": -1.5893141031265259, "logits/rejected": -1.4693152904510498, "logps/chosen": -2884.8115234375, "logps/rejected": -2526.07763671875, "loss": 0.6583, "rewards/accuracies": 0.625, "rewards/chosen": 0.786440372467041, "rewards/margins": 0.3468918800354004, "rewards/rejected": 0.4395485520362854, "step": 840 }, { "epoch": 0.22, "learning_rate": 9.551604606918575e-07, "logits/chosen": -1.6071548461914062, "logits/rejected": -1.5999706983566284, "logps/chosen": -2922.397705078125, "logps/rejected": -2410.974365234375, "loss": 0.6552, "rewards/accuracies": 0.5, "rewards/chosen": 0.24767303466796875, "rewards/margins": 0.08863957971334457, "rewards/rejected": 0.1590333878993988, "step": 850 }, { "epoch": 0.23, "learning_rate": 9.532503917685178e-07, "logits/chosen": -1.5351760387420654, "logits/rejected": -1.479089379310608, "logps/chosen": -2803.19140625, "logps/rejected": -2390.5615234375, "loss": 0.6987, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 1.1132092475891113, "rewards/margins": 0.5485633611679077, "rewards/rejected": 0.5646459460258484, "step": 860 }, { "epoch": 0.23, "learning_rate": 9.513024765679012e-07, "logits/chosen": -1.6146430969238281, "logits/rejected": -1.5676028728485107, "logps/chosen": -2616.34716796875, "logps/rejected": -2277.76611328125, "loss": 0.644, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 1.0030187368392944, "rewards/margins": 0.23817987740039825, "rewards/rejected": 0.7648389339447021, "step": 870 }, { "epoch": 0.23, "learning_rate": 9.493168777403662e-07, "logits/chosen": -1.543965220451355, "logits/rejected": -1.5248345136642456, "logps/chosen": -2174.06982421875, "logps/rejected": -2065.94140625, "loss": 0.663, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.7886601686477661, "rewards/margins": 0.3570699691772461, "rewards/rejected": 0.4315902590751648, "step": 880 }, { "epoch": 0.23, "learning_rate": 9.472937610828436e-07, "logits/chosen": -1.489497423171997, "logits/rejected": -1.5204612016677856, "logps/chosen": -2214.174560546875, "logps/rejected": -2092.203125, "loss": 0.6128, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.5309593081474304, "rewards/margins": 0.28473734855651855, "rewards/rejected": 0.24622204899787903, "step": 890 }, { "epoch": 0.24, "learning_rate": 9.452332955249919e-07, "logits/chosen": -1.4669028520584106, "logits/rejected": -1.3905723094940186, "logps/chosen": -2331.98046875, "logps/rejected": -2148.8232421875, "loss": 0.6725, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.7474310994148254, "rewards/margins": 0.1214209794998169, "rewards/rejected": 0.6260100603103638, "step": 900 }, { "epoch": 0.24, "eval_logits/chosen": -1.4909840822219849, "eval_logits/rejected": -1.4486072063446045, "eval_logps/chosen": -2530.19140625, "eval_logps/rejected": -2130.6240234375, "eval_loss": 0.6835622787475586, "eval_rewards/accuracies": 0.6159999966621399, "eval_rewards/chosen": 1.4258359670639038, "eval_rewards/margins": 0.4269082844257355, "eval_rewards/rejected": 0.998927652835846, "eval_runtime": 274.4345, "eval_samples_per_second": 7.288, "eval_steps_per_second": 0.455, "step": 900 }, { "epoch": 0.24, "learning_rate": 9.431356531150925e-07, "logits/chosen": -1.6749414205551147, "logits/rejected": -1.6013925075531006, "logps/chosen": -2510.5146484375, "logps/rejected": -2046.6390380859375, "loss": 0.6202, "rewards/accuracies": 0.625, "rewards/chosen": 1.2002885341644287, "rewards/margins": 0.3314119875431061, "rewards/rejected": 0.8688764572143555, "step": 910 }, { "epoch": 0.24, "learning_rate": 9.410010090056828e-07, "logits/chosen": -1.4817672967910767, "logits/rejected": -1.4551513195037842, "logps/chosen": -2566.467041015625, "logps/rejected": -2493.96630859375, "loss": 0.8313, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.86517333984375, "rewards/margins": 0.3244161009788513, "rewards/rejected": 0.5407571792602539, "step": 920 }, { "epoch": 0.24, "learning_rate": 9.388295414389318e-07, "logits/chosen": -1.3311041593551636, "logits/rejected": -1.3033561706542969, "logps/chosen": -2344.6435546875, "logps/rejected": -1979.4986572265625, "loss": 0.63, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.541279673576355, "rewards/margins": 0.37267619371414185, "rewards/rejected": 0.16860340535640717, "step": 930 }, { "epoch": 0.25, "learning_rate": 9.366214317317562e-07, "logits/chosen": -1.4732763767242432, "logits/rejected": -1.4141404628753662, "logps/chosen": -2585.295166015625, "logps/rejected": -2203.73681640625, "loss": 0.6414, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.7276701927185059, "rewards/margins": 0.4391177296638489, "rewards/rejected": 0.28855252265930176, "step": 940 }, { "epoch": 0.25, "learning_rate": 9.343768642606813e-07, "logits/chosen": -1.6381938457489014, "logits/rejected": -1.5806336402893066, "logps/chosen": -2596.53662109375, "logps/rejected": -2025.1402587890625, "loss": 0.6646, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 1.1860891580581665, "rewards/margins": 0.20563539862632751, "rewards/rejected": 0.9804538488388062, "step": 950 }, { "epoch": 0.25, "learning_rate": 9.320960264464448e-07, "logits/chosen": -1.6365054845809937, "logits/rejected": -1.6070477962493896, "logps/chosen": -1896.517822265625, "logps/rejected": -1744.9036865234375, "loss": 0.6713, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.994465172290802, "rewards/margins": 0.32086846232414246, "rewards/rejected": 0.6735965013504028, "step": 960 }, { "epoch": 0.25, "learning_rate": 9.29779108738348e-07, "logits/chosen": -1.7710649967193604, "logits/rejected": -1.715428113937378, "logps/chosen": -2846.1181640625, "logps/rejected": -2703.55029296875, "loss": 0.6904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.7387205362319946, "rewards/margins": 0.45319658517837524, "rewards/rejected": 0.2855239510536194, "step": 970 }, { "epoch": 0.26, "learning_rate": 9.274263045983528e-07, "logits/chosen": -1.702950119972229, "logits/rejected": -1.728872299194336, "logps/chosen": -2756.061767578125, "logps/rejected": -2911.74658203125, "loss": 0.6874, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.11702828109264374, "rewards/margins": 0.2605035901069641, "rewards/rejected": -0.14347527921199799, "step": 980 }, { "epoch": 0.26, "learning_rate": 9.250378104849275e-07, "logits/chosen": -1.6368192434310913, "logits/rejected": -1.5715930461883545, "logps/chosen": -2486.092041015625, "logps/rejected": -2241.58984375, "loss": 0.6305, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.089656114578247, "rewards/margins": 0.5296434164047241, "rewards/rejected": 0.560012698173523, "step": 990 }, { "epoch": 0.26, "learning_rate": 9.226138258366436e-07, "logits/chosen": -1.5675978660583496, "logits/rejected": -1.5653481483459473, "logps/chosen": -2241.11865234375, "logps/rejected": -2163.8515625, "loss": 0.7027, "rewards/accuracies": 0.625, "rewards/chosen": 0.998638927936554, "rewards/margins": 0.19615396857261658, "rewards/rejected": 0.8024848699569702, "step": 1000 }, { "epoch": 0.26, "eval_logits/chosen": -1.5565454959869385, "eval_logits/rejected": -1.5094586610794067, "eval_logps/chosen": -2591.25048828125, "eval_logps/rejected": -2183.227783203125, "eval_loss": 0.6689639091491699, "eval_rewards/accuracies": 0.6259999871253967, "eval_rewards/chosen": 0.8152462244033813, "eval_rewards/margins": 0.34235623478889465, "eval_rewards/rejected": 0.47289004921913147, "eval_runtime": 273.7847, "eval_samples_per_second": 7.305, "eval_steps_per_second": 0.457, "step": 1000 }, { "epoch": 0.26, "learning_rate": 9.201545530555213e-07, "logits/chosen": -1.6780484914779663, "logits/rejected": -1.7151432037353516, "logps/chosen": -2156.362060546875, "logps/rejected": -2084.3037109375, "loss": 0.6435, "rewards/accuracies": 0.75, "rewards/chosen": 0.6366477012634277, "rewards/margins": 0.6162145137786865, "rewards/rejected": 0.020433183759450912, "step": 1010 }, { "epoch": 0.27, "learning_rate": 9.176601974901304e-07, "logits/chosen": -1.624211072921753, "logits/rejected": -1.633967638015747, "logps/chosen": -2648.594970703125, "logps/rejected": -2590.938720703125, "loss": 0.6784, "rewards/accuracies": 0.625, "rewards/chosen": 0.47067689895629883, "rewards/margins": 0.21159331500530243, "rewards/rejected": 0.2590835690498352, "step": 1020 }, { "epoch": 0.27, "learning_rate": 9.151309674184427e-07, "logits/chosen": -1.6630769968032837, "logits/rejected": -1.5527112483978271, "logps/chosen": -2740.18212890625, "logps/rejected": -1866.758544921875, "loss": 0.6681, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.5968760848045349, "rewards/margins": 0.35547563433647156, "rewards/rejected": 0.24140043556690216, "step": 1030 }, { "epoch": 0.27, "learning_rate": 9.125670740304409e-07, "logits/chosen": -1.5219511985778809, "logits/rejected": -1.4647961854934692, "logps/chosen": -2756.6201171875, "logps/rejected": -2311.59423828125, "loss": 0.6871, "rewards/accuracies": 0.625, "rewards/chosen": 1.1456257104873657, "rewards/margins": 0.25537100434303284, "rewards/rejected": 0.8902546763420105, "step": 1040 }, { "epoch": 0.27, "learning_rate": 9.099687314104858e-07, "logits/chosen": -1.6184484958648682, "logits/rejected": -1.5983049869537354, "logps/chosen": -2442.0712890625, "logps/rejected": -2581.9248046875, "loss": 0.6509, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.6723107099533081, "rewards/margins": 0.27203884720802307, "rewards/rejected": 0.40027180314064026, "step": 1050 }, { "epoch": 0.28, "learning_rate": 9.073361565194381e-07, "logits/chosen": -1.5782119035720825, "logits/rejected": -1.6148672103881836, "logps/chosen": -2074.51611328125, "logps/rejected": -2046.3336181640625, "loss": 0.6968, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.2032502144575119, "rewards/margins": 0.15900087356567383, "rewards/rejected": 0.04424933344125748, "step": 1060 }, { "epoch": 0.28, "learning_rate": 9.046695691765435e-07, "logits/chosen": -1.7396224737167358, "logits/rejected": -1.7009761333465576, "logps/chosen": -2369.361328125, "logps/rejected": -2038.233642578125, "loss": 0.633, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.712352991104126, "rewards/margins": 0.39164695143699646, "rewards/rejected": 0.32070595026016235, "step": 1070 }, { "epoch": 0.28, "learning_rate": 9.019691920410778e-07, "logits/chosen": -1.8201916217803955, "logits/rejected": -1.8124058246612549, "logps/chosen": -2379.390869140625, "logps/rejected": -2190.876708984375, "loss": 0.7336, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 1.1395015716552734, "rewards/margins": 0.2840576171875, "rewards/rejected": 0.855444073677063, "step": 1080 }, { "epoch": 0.29, "learning_rate": 8.992352505937547e-07, "logits/chosen": -1.7162584066390991, "logits/rejected": -1.6304266452789307, "logps/chosen": -2344.68701171875, "logps/rejected": -1602.481201171875, "loss": 0.7, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.6249530911445618, "rewards/margins": 0.44733700156211853, "rewards/rejected": 0.17761602997779846, "step": 1090 }, { "epoch": 0.29, "learning_rate": 8.964679731178984e-07, "logits/chosen": -1.625732183456421, "logits/rejected": -1.5899415016174316, "logps/chosen": -2034.600341796875, "logps/rejected": -1786.969482421875, "loss": 0.6421, "rewards/accuracies": 0.625, "rewards/chosen": 0.20644518733024597, "rewards/margins": 0.36025696992874146, "rewards/rejected": -0.15381178259849548, "step": 1100 }, { "epoch": 0.29, "eval_logits/chosen": -1.5785434246063232, "eval_logits/rejected": -1.5381590127944946, "eval_logps/chosen": -2619.966064453125, "eval_logps/rejected": -2211.10400390625, "eval_loss": 0.6512665748596191, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": 0.5280923843383789, "eval_rewards/margins": 0.33396461606025696, "eval_rewards/rejected": 0.19412773847579956, "eval_runtime": 273.9824, "eval_samples_per_second": 7.3, "eval_steps_per_second": 0.456, "step": 1100 }, { "epoch": 0.29, "learning_rate": 8.936675906803815e-07, "logits/chosen": -1.623098373413086, "logits/rejected": -1.5492380857467651, "logps/chosen": -3145.0009765625, "logps/rejected": -2464.873046875, "loss": 0.6753, "rewards/accuracies": 0.625, "rewards/chosen": 0.9549194574356079, "rewards/margins": 0.5628315210342407, "rewards/rejected": 0.3920879065990448, "step": 1110 }, { "epoch": 0.29, "learning_rate": 8.908343371123319e-07, "logits/chosen": -1.7886734008789062, "logits/rejected": -1.7371702194213867, "logps/chosen": -2213.9833984375, "logps/rejected": -2165.156982421875, "loss": 0.6319, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.4289758801460266, "rewards/margins": -0.01336582936346531, "rewards/rejected": 0.4423416554927826, "step": 1120 }, { "epoch": 0.3, "learning_rate": 8.879684489896071e-07, "logits/chosen": -1.9691905975341797, "logits/rejected": -1.9757953882217407, "logps/chosen": -2335.632568359375, "logps/rejected": -2261.8828125, "loss": 0.7045, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.7513018846511841, "rewards/margins": 0.29577815532684326, "rewards/rejected": 0.4555237889289856, "step": 1130 }, { "epoch": 0.3, "learning_rate": 8.850701656130407e-07, "logits/chosen": -1.9100147485733032, "logits/rejected": -1.8400824069976807, "logps/chosen": -2170.811279296875, "logps/rejected": -1717.5797119140625, "loss": 0.6164, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.8315251469612122, "rewards/margins": 0.4699254631996155, "rewards/rejected": 0.3615996241569519, "step": 1140 }, { "epoch": 0.3, "learning_rate": 8.821397289884605e-07, "logits/chosen": -1.7558555603027344, "logits/rejected": -1.709307074546814, "logps/chosen": -2761.39501953125, "logps/rejected": -2207.11669921875, "loss": 0.6138, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.443568468093872, "rewards/margins": 0.5788078308105469, "rewards/rejected": 0.8647607564926147, "step": 1150 }, { "epoch": 0.3, "learning_rate": 8.791773838064811e-07, "logits/chosen": -1.928342580795288, "logits/rejected": -1.8991355895996094, "logps/chosen": -2210.54248046875, "logps/rejected": -2129.330078125, "loss": 0.6609, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.5884748697280884, "rewards/margins": 0.13398051261901855, "rewards/rejected": 0.45449432730674744, "step": 1160 }, { "epoch": 0.31, "learning_rate": 8.76183377422073e-07, "logits/chosen": -1.8995704650878906, "logits/rejected": -1.920475721359253, "logps/chosen": -2605.925048828125, "logps/rejected": -2576.642578125, "loss": 0.6536, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.7022437453269958, "rewards/margins": 0.12484090030193329, "rewards/rejected": 0.5774028897285461, "step": 1170 }, { "epoch": 0.31, "learning_rate": 8.731579598339079e-07, "logits/chosen": -1.8158420324325562, "logits/rejected": -1.8012596368789673, "logps/chosen": -2187.33447265625, "logps/rejected": -1711.1923828125, "loss": 0.6688, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.6456224918365479, "rewards/margins": 0.084513820707798, "rewards/rejected": 0.5611085891723633, "step": 1180 }, { "epoch": 0.31, "learning_rate": 8.701013836634832e-07, "logits/chosen": -1.7981412410736084, "logits/rejected": -1.6974513530731201, "logps/chosen": -2500.97412109375, "logps/rejected": -2135.02880859375, "loss": 0.6633, "rewards/accuracies": 0.625, "rewards/chosen": 1.2788394689559937, "rewards/margins": 0.4392651915550232, "rewards/rejected": 0.8395741581916809, "step": 1190 }, { "epoch": 0.31, "learning_rate": 8.670139041340298e-07, "logits/chosen": -1.8434431552886963, "logits/rejected": -1.789004921913147, "logps/chosen": -2898.79150390625, "logps/rejected": -2346.39208984375, "loss": 0.6217, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.0067036151885986, "rewards/margins": 0.2750965654850006, "rewards/rejected": 0.7316070795059204, "step": 1200 }, { "epoch": 0.31, "eval_logits/chosen": -1.6764802932739258, "eval_logits/rejected": -1.6345183849334717, "eval_logps/chosen": -2599.054443359375, "eval_logps/rejected": -2196.55810546875, "eval_loss": 0.6436493396759033, "eval_rewards/accuracies": 0.6460000276565552, "eval_rewards/chosen": 0.7372069954872131, "eval_rewards/margins": 0.3976210653781891, "eval_rewards/rejected": 0.33958590030670166, "eval_runtime": 274.6861, "eval_samples_per_second": 7.281, "eval_steps_per_second": 0.455, "step": 1200 }, { "epoch": 0.32, "learning_rate": 8.638957790491998e-07, "logits/chosen": -1.8298311233520508, "logits/rejected": -1.7556016445159912, "logps/chosen": -2440.884765625, "logps/rejected": -2195.47705078125, "loss": 0.6617, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.4565182626247406, "rewards/margins": 0.3322807252407074, "rewards/rejected": 0.1242375373840332, "step": 1210 }, { "epoch": 0.32, "learning_rate": 8.607472687715407e-07, "logits/chosen": -1.7025740146636963, "logits/rejected": -1.6857258081436157, "logps/chosen": -2362.98388671875, "logps/rejected": -2594.92822265625, "loss": 0.6842, "rewards/accuracies": 0.625, "rewards/chosen": 0.03539573401212692, "rewards/margins": 0.22561678290367126, "rewards/rejected": -0.19022107124328613, "step": 1220 }, { "epoch": 0.32, "learning_rate": 8.575686362007543e-07, "logits/chosen": -1.9242517948150635, "logits/rejected": -1.82456374168396, "logps/chosen": -2176.520751953125, "logps/rejected": -1710.506591796875, "loss": 0.6089, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.9000527262687683, "rewards/margins": 0.34358882904052734, "rewards/rejected": 0.5564638376235962, "step": 1230 }, { "epoch": 0.32, "learning_rate": 8.543601467517459e-07, "logits/chosen": -1.850541114807129, "logits/rejected": -1.7285171747207642, "logps/chosen": -2746.185302734375, "logps/rejected": -2296.318359375, "loss": 0.6566, "rewards/accuracies": 0.75, "rewards/chosen": 1.3377187252044678, "rewards/margins": 0.5021657347679138, "rewards/rejected": 0.835552990436554, "step": 1240 }, { "epoch": 0.33, "learning_rate": 8.511220683324607e-07, "logits/chosen": -1.6543235778808594, "logits/rejected": -1.5612728595733643, "logps/chosen": -2371.774658203125, "logps/rejected": -1993.567626953125, "loss": 0.6201, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 1.1099433898925781, "rewards/margins": 0.45349279046058655, "rewards/rejected": 0.656450629234314, "step": 1250 }, { "epoch": 0.33, "learning_rate": 8.478546713215151e-07, "logits/chosen": -1.630977988243103, "logits/rejected": -1.6222995519638062, "logps/chosen": -2651.537353515625, "logps/rejected": -2243.670166015625, "loss": 0.6546, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.9021153450012207, "rewards/margins": 0.5950330495834351, "rewards/rejected": 0.3070824146270752, "step": 1260 }, { "epoch": 0.33, "learning_rate": 8.445582285456195e-07, "logits/chosen": -1.564117193222046, "logits/rejected": -1.4608399868011475, "logps/chosen": -2718.778076171875, "logps/rejected": -2309.786376953125, "loss": 0.6459, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.9259850382804871, "rewards/margins": 0.6184819340705872, "rewards/rejected": 0.3075031638145447, "step": 1270 }, { "epoch": 0.33, "learning_rate": 8.412330152567964e-07, "logits/chosen": -1.4873427152633667, "logits/rejected": -1.4943621158599854, "logps/chosen": -2755.173828125, "logps/rejected": -2068.41455078125, "loss": 0.6562, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.9072205424308777, "rewards/margins": 0.3468925952911377, "rewards/rejected": 0.56032794713974, "step": 1280 }, { "epoch": 0.34, "learning_rate": 8.378793091093989e-07, "logits/chosen": -1.7745788097381592, "logits/rejected": -1.6726099252700806, "logps/chosen": -2075.09521484375, "logps/rejected": -1817.7054443359375, "loss": 0.7009, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.997990608215332, "rewards/margins": 0.39851805567741394, "rewards/rejected": 0.5994727611541748, "step": 1290 }, { "epoch": 0.34, "learning_rate": 8.344973901369252e-07, "logits/chosen": -1.7127208709716797, "logits/rejected": -1.6906402111053467, "logps/chosen": -1995.653564453125, "logps/rejected": -1904.506103515625, "loss": 0.7365, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.5294217467308044, "rewards/margins": -0.03213152289390564, "rewards/rejected": 0.5615532398223877, "step": 1300 }, { "epoch": 0.34, "eval_logits/chosen": -1.6008652448654175, "eval_logits/rejected": -1.5596896409988403, "eval_logps/chosen": -2580.944580078125, "eval_logps/rejected": -2178.24365234375, "eval_loss": 0.6399799585342407, "eval_rewards/accuracies": 0.6240000128746033, "eval_rewards/chosen": 0.9183096885681152, "eval_rewards/margins": 0.395578533411026, "eval_rewards/rejected": 0.5227311849594116, "eval_runtime": 279.2878, "eval_samples_per_second": 7.161, "eval_steps_per_second": 0.448, "step": 1300 }, { "epoch": 0.34, "learning_rate": 8.310875407286363e-07, "logits/chosen": -1.5962960720062256, "logits/rejected": -1.6336969137191772, "logps/chosen": -2335.583984375, "logps/rejected": -2098.15625, "loss": 0.646, "rewards/accuracies": 0.625, "rewards/chosen": 0.8231102228164673, "rewards/margins": 0.4199337363243103, "rewards/rejected": 0.4031763970851898, "step": 1310 }, { "epoch": 0.35, "learning_rate": 8.276500456059762e-07, "logits/chosen": -1.6020572185516357, "logits/rejected": -1.524186611175537, "logps/chosen": -2809.671142578125, "logps/rejected": -1945.8929443359375, "loss": 0.6434, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.7115261554718018, "rewards/margins": 0.7415082454681396, "rewards/rejected": -0.02998208999633789, "step": 1320 }, { "epoch": 0.35, "learning_rate": 8.241851917987987e-07, "logits/chosen": -1.5716346502304077, "logits/rejected": -1.5255610942840576, "logps/chosen": -2458.597900390625, "logps/rejected": -2043.8544921875, "loss": 0.698, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.652920663356781, "rewards/margins": 0.3171829283237457, "rewards/rejected": 0.3357377350330353, "step": 1330 }, { "epoch": 0.35, "learning_rate": 8.206932686213996e-07, "logits/chosen": -1.6627728939056396, "logits/rejected": -1.5985891819000244, "logps/chosen": -2899.52587890625, "logps/rejected": -1978.255859375, "loss": 0.6587, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.4774368703365326, "rewards/margins": 0.4220924973487854, "rewards/rejected": 0.055344413965940475, "step": 1340 }, { "epoch": 0.35, "learning_rate": 8.171745676483592e-07, "logits/chosen": -1.6537669897079468, "logits/rejected": -1.6453883647918701, "logps/chosen": -2317.5439453125, "logps/rejected": -2154.13525390625, "loss": 0.734, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.5170091986656189, "rewards/margins": 0.14945654571056366, "rewards/rejected": 0.3675526976585388, "step": 1350 }, { "epoch": 0.36, "learning_rate": 8.13629382690196e-07, "logits/chosen": -1.796841025352478, "logits/rejected": -1.765019416809082, "logps/chosen": -2391.85205078125, "logps/rejected": -2199.541015625, "loss": 0.6489, "rewards/accuracies": 0.5, "rewards/chosen": 0.6095671057701111, "rewards/margins": 0.15716035664081573, "rewards/rejected": 0.45240673422813416, "step": 1360 }, { "epoch": 0.36, "learning_rate": 8.100580097688341e-07, "logits/chosen": -1.688180923461914, "logits/rejected": -1.69215989112854, "logps/chosen": -3190.982177734375, "logps/rejected": -2641.860595703125, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.130110502243042, "rewards/margins": 0.5017646551132202, "rewards/rejected": 0.6283458471298218, "step": 1370 }, { "epoch": 0.36, "learning_rate": 8.064607470928844e-07, "logits/chosen": -1.8125782012939453, "logits/rejected": -1.709834337234497, "logps/chosen": -2791.87841796875, "logps/rejected": -1950.5830078125, "loss": 0.6634, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.7538470029830933, "rewards/margins": 0.3855181336402893, "rewards/rejected": 0.36832886934280396, "step": 1380 }, { "epoch": 0.36, "learning_rate": 8.028378950327452e-07, "logits/chosen": -1.8029365539550781, "logits/rejected": -1.725874900817871, "logps/chosen": -2617.264892578125, "logps/rejected": -2055.29296875, "loss": 0.6525, "rewards/accuracies": 0.5, "rewards/chosen": 0.6537911295890808, "rewards/margins": 0.3229553699493408, "rewards/rejected": 0.33083575963974, "step": 1390 }, { "epoch": 0.37, "learning_rate": 7.99189756095521e-07, "logits/chosen": -1.794398546218872, "logits/rejected": -1.8133939504623413, "logps/chosen": -2457.388671875, "logps/rejected": -2300.2109375, "loss": 0.7057, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.7108521461486816, "rewards/margins": 0.028808236122131348, "rewards/rejected": 0.6820439100265503, "step": 1400 }, { "epoch": 0.37, "eval_logits/chosen": -1.7116947174072266, "eval_logits/rejected": -1.671615719795227, "eval_logps/chosen": -2577.6376953125, "eval_logps/rejected": -2174.325439453125, "eval_loss": 0.6468178033828735, "eval_rewards/accuracies": 0.6140000224113464, "eval_rewards/chosen": 0.9513765573501587, "eval_rewards/margins": 0.38946446776390076, "eval_rewards/rejected": 0.5619121193885803, "eval_runtime": 276.3765, "eval_samples_per_second": 7.237, "eval_steps_per_second": 0.452, "step": 1400 }, { "epoch": 0.37, "learning_rate": 7.955166348997632e-07, "logits/chosen": -1.8387196063995361, "logits/rejected": -1.8379266262054443, "logps/chosen": -2603.5302734375, "logps/rejected": -2606.268798828125, "loss": 0.7209, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.6424747109413147, "rewards/margins": -0.08023136854171753, "rewards/rejected": 0.7227060198783875, "step": 1410 }, { "epoch": 0.37, "learning_rate": 7.918188381500343e-07, "logits/chosen": -1.763655424118042, "logits/rejected": -1.834721326828003, "logps/chosen": -2435.61669921875, "logps/rejected": -2525.558349609375, "loss": 0.6479, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.5239884853363037, "rewards/margins": 0.1298888623714447, "rewards/rejected": 0.3940996527671814, "step": 1420 }, { "epoch": 0.37, "learning_rate": 7.880966746112995e-07, "logits/chosen": -1.7425800561904907, "logits/rejected": -1.7341902256011963, "logps/chosen": -2153.57861328125, "logps/rejected": -2187.4521484375, "loss": 0.6584, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.2758369743824005, "rewards/margins": 0.08724844455718994, "rewards/rejected": 0.18858852982521057, "step": 1430 }, { "epoch": 0.38, "learning_rate": 7.843504550831423e-07, "logits/chosen": -1.8196338415145874, "logits/rejected": -1.8200260400772095, "logps/chosen": -2192.01513671875, "logps/rejected": -2054.28369140625, "loss": 0.7045, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.20235542953014374, "rewards/margins": 0.17298252880573273, "rewards/rejected": 0.029372822493314743, "step": 1440 }, { "epoch": 0.38, "learning_rate": 7.805804923738157e-07, "logits/chosen": -1.7764650583267212, "logits/rejected": -1.7625634670257568, "logps/chosen": -2062.028564453125, "logps/rejected": -2072.051025390625, "loss": 0.6557, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.46689948439598083, "rewards/margins": 0.057696618139743805, "rewards/rejected": 0.4092028737068176, "step": 1450 }, { "epoch": 0.38, "learning_rate": 7.76787101274121e-07, "logits/chosen": -1.5717532634735107, "logits/rejected": -1.531313180923462, "logps/chosen": -2799.623779296875, "logps/rejected": -2392.739501953125, "loss": 0.6483, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.6438823938369751, "rewards/margins": 0.3187286853790283, "rewards/rejected": 0.32515376806259155, "step": 1460 }, { "epoch": 0.38, "learning_rate": 7.729705985311232e-07, "logits/chosen": -1.8854055404663086, "logits/rejected": -1.782091736793518, "logps/chosen": -2639.52099609375, "logps/rejected": -2122.951416015625, "loss": 0.642, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.5428428053855896, "rewards/margins": 0.35903045535087585, "rewards/rejected": 0.18381235003471375, "step": 1470 }, { "epoch": 0.39, "learning_rate": 7.69131302821703e-07, "logits/chosen": -1.6489614248275757, "logits/rejected": -1.6407349109649658, "logps/chosen": -2218.22607421875, "logps/rejected": -2045.851318359375, "loss": 0.6138, "rewards/accuracies": 0.625, "rewards/chosen": 0.8161298036575317, "rewards/margins": 0.3229297399520874, "rewards/rejected": 0.49320006370544434, "step": 1480 }, { "epoch": 0.39, "learning_rate": 7.652695347259475e-07, "logits/chosen": -1.7384567260742188, "logits/rejected": -1.7179895639419556, "logps/chosen": -2210.828369140625, "logps/rejected": -1787.0751953125, "loss": 0.714, "rewards/accuracies": 0.5, "rewards/chosen": 0.6207581758499146, "rewards/margins": 0.25422900915145874, "rewards/rejected": 0.36652907729148865, "step": 1490 }, { "epoch": 0.39, "learning_rate": 7.613856167003811e-07, "logits/chosen": -1.7387768030166626, "logits/rejected": -1.7393105030059814, "logps/chosen": -2618.2705078125, "logps/rejected": -2462.711669921875, "loss": 0.6396, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.7113048434257507, "rewards/margins": 0.039934493601322174, "rewards/rejected": 0.6713703870773315, "step": 1500 }, { "epoch": 0.39, "eval_logits/chosen": -1.6599823236465454, "eval_logits/rejected": -1.6244251728057861, "eval_logps/chosen": -2577.3193359375, "eval_logps/rejected": -2176.467529296875, "eval_loss": 0.6498265862464905, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": 0.9545619487762451, "eval_rewards/margins": 0.41406965255737305, "eval_rewards/rejected": 0.5404923558235168, "eval_runtime": 277.7488, "eval_samples_per_second": 7.201, "eval_steps_per_second": 0.45, "step": 1500 }, { "epoch": 0.4, "learning_rate": 7.574798730510415e-07, "logits/chosen": -1.7479991912841797, "logits/rejected": -1.766013503074646, "logps/chosen": -2737.697265625, "logps/rejected": -2326.7646484375, "loss": 0.6725, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.8921098709106445, "rewards/margins": 0.41540461778640747, "rewards/rejected": 0.47670525312423706, "step": 1510 }, { "epoch": 0.4, "learning_rate": 7.53552629906399e-07, "logits/chosen": -1.7563549280166626, "logits/rejected": -1.6970514059066772, "logps/chosen": -2282.693603515625, "logps/rejected": -1962.1783447265625, "loss": 0.6439, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.7120363712310791, "rewards/margins": 0.4519389271736145, "rewards/rejected": 0.260097473859787, "step": 1520 }, { "epoch": 0.4, "learning_rate": 7.496042151901265e-07, "logits/chosen": -1.545775055885315, "logits/rejected": -1.5133075714111328, "logps/chosen": -2213.603271484375, "logps/rejected": -1864.095458984375, "loss": 0.6068, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.40503087639808655, "rewards/margins": 0.2904754877090454, "rewards/rejected": 0.11455540359020233, "step": 1530 }, { "epoch": 0.4, "learning_rate": 7.456349585937164e-07, "logits/chosen": -1.5178546905517578, "logits/rejected": -1.4629215002059937, "logps/chosen": -2501.837890625, "logps/rejected": -2465.253173828125, "loss": 0.6604, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.42861613631248474, "rewards/margins": 0.19511394202709198, "rewards/rejected": 0.23350219428539276, "step": 1540 }, { "epoch": 0.41, "learning_rate": 7.41645191548952e-07, "logits/chosen": -1.482337474822998, "logits/rejected": -1.4365692138671875, "logps/chosen": -2804.274169921875, "logps/rejected": -2157.90234375, "loss": 0.6541, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.6451601982116699, "rewards/margins": 0.35519418120384216, "rewards/rejected": 0.28996604681015015, "step": 1550 }, { "epoch": 0.41, "learning_rate": 7.376352472002336e-07, "logits/chosen": -1.6978435516357422, "logits/rejected": -1.6573905944824219, "logps/chosen": -2645.374267578125, "logps/rejected": -2618.08837890625, "loss": 0.6607, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.7978582382202148, "rewards/margins": 0.40063905715942383, "rewards/rejected": 0.397219181060791, "step": 1560 }, { "epoch": 0.41, "learning_rate": 7.336054603767603e-07, "logits/chosen": -1.7187271118164062, "logits/rejected": -1.630066156387329, "logps/chosen": -2555.4833984375, "logps/rejected": -2423.76904296875, "loss": 0.6165, "rewards/accuracies": 0.625, "rewards/chosen": 0.568882405757904, "rewards/margins": 0.4214504361152649, "rewards/rejected": 0.14743201434612274, "step": 1570 }, { "epoch": 0.41, "learning_rate": 7.295561675645719e-07, "logits/chosen": -1.852007269859314, "logits/rejected": -1.7471548318862915, "logps/chosen": -3065.30908203125, "logps/rejected": -2331.70068359375, "loss": 0.5974, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.9147247076034546, "rewards/margins": 0.5064901113510132, "rewards/rejected": 0.40823444724082947, "step": 1580 }, { "epoch": 0.42, "learning_rate": 7.254877068784535e-07, "logits/chosen": -1.764828085899353, "logits/rejected": -1.7764110565185547, "logps/chosen": -2410.434326171875, "logps/rejected": -2364.50927734375, "loss": 0.6653, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.9937675595283508, "rewards/margins": 0.3099968731403351, "rewards/rejected": 0.6837707161903381, "step": 1590 }, { "epoch": 0.42, "learning_rate": 7.214004180337011e-07, "logits/chosen": -1.7532641887664795, "logits/rejected": -1.7427523136138916, "logps/chosen": -2716.545654296875, "logps/rejected": -2454.630859375, "loss": 0.5835, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.9549525380134583, "rewards/margins": 0.41002315282821655, "rewards/rejected": 0.5449293851852417, "step": 1600 }, { "epoch": 0.42, "eval_logits/chosen": -1.6706230640411377, "eval_logits/rejected": -1.6255484819412231, "eval_logps/chosen": -2577.740234375, "eval_logps/rejected": -2176.956787109375, "eval_loss": 0.6487711668014526, "eval_rewards/accuracies": 0.6480000019073486, "eval_rewards/chosen": 0.9503532648086548, "eval_rewards/margins": 0.41475310921669006, "eval_rewards/rejected": 0.5356001853942871, "eval_runtime": 272.6994, "eval_samples_per_second": 7.334, "eval_steps_per_second": 0.458, "step": 1600 }, { "epoch": 0.42, "learning_rate": 7.172946423177573e-07, "logits/chosen": -1.7609493732452393, "logits/rejected": -1.705553412437439, "logps/chosen": -2359.901611328125, "logps/rejected": -1943.8248291015625, "loss": 0.6541, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.7265006303787231, "rewards/margins": 0.3340596854686737, "rewards/rejected": 0.3924410045146942, "step": 1610 }, { "epoch": 0.42, "learning_rate": 7.131707225617124e-07, "logits/chosen": -1.7774537801742554, "logits/rejected": -1.7147903442382812, "logps/chosen": -2557.030029296875, "logps/rejected": -1920.765380859375, "loss": 0.6345, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.7465502619743347, "rewards/margins": 0.5866314172744751, "rewards/rejected": 0.15991875529289246, "step": 1620 }, { "epoch": 0.43, "learning_rate": 7.090290031116797e-07, "logits/chosen": -1.598940134048462, "logits/rejected": -1.5846506357192993, "logps/chosen": -2371.542236328125, "logps/rejected": -2360.19091796875, "loss": 0.6786, "rewards/accuracies": 0.625, "rewards/chosen": 0.7226258516311646, "rewards/margins": 0.45876359939575195, "rewards/rejected": 0.2638623118400574, "step": 1630 }, { "epoch": 0.43, "learning_rate": 7.048698298000411e-07, "logits/chosen": -1.5689928531646729, "logits/rejected": -1.5398110151290894, "logps/chosen": -2748.099609375, "logps/rejected": -2357.985107421875, "loss": 0.683, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.8013742566108704, "rewards/margins": 0.36650413274765015, "rewards/rejected": 0.4348701536655426, "step": 1640 }, { "epoch": 0.43, "learning_rate": 7.006935499165714e-07, "logits/chosen": -1.4957153797149658, "logits/rejected": -1.4456464052200317, "logps/chosen": -2492.004638671875, "logps/rejected": -2111.281982421875, "loss": 0.628, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.48487788438796997, "rewards/margins": 0.27980294823646545, "rewards/rejected": 0.20507490634918213, "step": 1650 }, { "epoch": 0.43, "learning_rate": 6.965005121794388e-07, "logits/chosen": -1.7797536849975586, "logits/rejected": -1.652152419090271, "logps/chosen": -2582.0791015625, "logps/rejected": -1727.4976806640625, "loss": 0.6113, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 1.2586898803710938, "rewards/margins": 0.8147012591362, "rewards/rejected": 0.4439886212348938, "step": 1660 }, { "epoch": 0.44, "learning_rate": 6.922910667060881e-07, "logits/chosen": -1.6934763193130493, "logits/rejected": -1.7006620168685913, "logps/chosen": -2171.2431640625, "logps/rejected": -2124.131103515625, "loss": 0.6424, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 1.1348068714141846, "rewards/margins": 0.2678903043270111, "rewards/rejected": 0.8669164776802063, "step": 1670 }, { "epoch": 0.44, "learning_rate": 6.880655649840044e-07, "logits/chosen": -1.7043119668960571, "logits/rejected": -1.6158339977264404, "logps/chosen": -2277.177001953125, "logps/rejected": -1852.617919921875, "loss": 0.7029, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.9791079759597778, "rewards/margins": 0.4544892907142639, "rewards/rejected": 0.5246187448501587, "step": 1680 }, { "epoch": 0.44, "learning_rate": 6.838243598413657e-07, "logits/chosen": -1.6774076223373413, "logits/rejected": -1.5916404724121094, "logps/chosen": -2854.013671875, "logps/rejected": -2340.158203125, "loss": 0.6618, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.8015478253364563, "rewards/margins": 0.3010616898536682, "rewards/rejected": 0.5004860162734985, "step": 1690 }, { "epoch": 0.44, "learning_rate": 6.795678054175811e-07, "logits/chosen": -1.676679253578186, "logits/rejected": -1.6284263134002686, "logps/chosen": -2829.0634765625, "logps/rejected": -2389.222900390625, "loss": 0.629, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.1989120244979858, "rewards/margins": 0.28385791182518005, "rewards/rejected": 0.9150541424751282, "step": 1700 }, { "epoch": 0.44, "eval_logits/chosen": -1.6191504001617432, "eval_logits/rejected": -1.5736533403396606, "eval_logps/chosen": -2547.931640625, "eval_logps/rejected": -2149.956787109375, "eval_loss": 0.6500846147537231, "eval_rewards/accuracies": 0.6100000143051147, "eval_rewards/chosen": 1.248434066772461, "eval_rewards/margins": 0.4428330659866333, "eval_rewards/rejected": 0.8056011199951172, "eval_runtime": 273.8345, "eval_samples_per_second": 7.304, "eval_steps_per_second": 0.456, "step": 1700 }, { "epoch": 0.45, "learning_rate": 6.752962571337198e-07, "logits/chosen": -1.6186796426773071, "logits/rejected": -1.561805009841919, "logps/chosen": -2682.119384765625, "logps/rejected": -2285.04345703125, "loss": 0.6952, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.003474235534668, "rewards/margins": 0.2986948490142822, "rewards/rejected": 0.7047793865203857, "step": 1710 }, { "epoch": 0.45, "learning_rate": 6.710100716628344e-07, "logits/chosen": -1.6599292755126953, "logits/rejected": -1.6332380771636963, "logps/chosen": -2627.91357421875, "logps/rejected": -2327.94580078125, "loss": 0.6704, "rewards/accuracies": 0.625, "rewards/chosen": 0.7611712217330933, "rewards/margins": 0.35671621561050415, "rewards/rejected": 0.4044550061225891, "step": 1720 }, { "epoch": 0.45, "learning_rate": 6.66709606900178e-07, "logits/chosen": -1.7462646961212158, "logits/rejected": -1.6820160150527954, "logps/chosen": -2839.33154296875, "logps/rejected": -2203.18603515625, "loss": 0.6479, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.9174792170524597, "rewards/margins": 0.4399174749851227, "rewards/rejected": 0.4775618612766266, "step": 1730 }, { "epoch": 0.46, "learning_rate": 6.62395221933321e-07, "logits/chosen": -1.5487401485443115, "logits/rejected": -1.5394811630249023, "logps/chosen": -2549.23876953125, "logps/rejected": -2237.34228515625, "loss": 0.7309, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.9733200073242188, "rewards/margins": 0.22530770301818848, "rewards/rejected": 0.7480123043060303, "step": 1740 }, { "epoch": 0.46, "learning_rate": 6.580672770121663e-07, "logits/chosen": -1.641426682472229, "logits/rejected": -1.553511381149292, "logps/chosen": -2529.59033203125, "logps/rejected": -2177.71142578125, "loss": 0.6188, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 1.0179182291030884, "rewards/margins": 0.17651347815990448, "rewards/rejected": 0.8414047956466675, "step": 1750 }, { "epoch": 0.46, "learning_rate": 6.537261335188695e-07, "logits/chosen": -1.5916087627410889, "logits/rejected": -1.5262328386306763, "logps/chosen": -2461.32666015625, "logps/rejected": -1872.6234130859375, "loss": 0.6819, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.8468859791755676, "rewards/margins": 0.3362599015235901, "rewards/rejected": 0.5106260776519775, "step": 1760 }, { "epoch": 0.46, "learning_rate": 6.493721539376629e-07, "logits/chosen": -1.460197925567627, "logits/rejected": -1.4017575979232788, "logps/chosen": -2258.1845703125, "logps/rejected": -2031.2353515625, "loss": 0.6579, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.39380067586898804, "rewards/margins": 0.16991008818149567, "rewards/rejected": 0.22389057278633118, "step": 1770 }, { "epoch": 0.47, "learning_rate": 6.450057018245887e-07, "logits/chosen": -1.478092908859253, "logits/rejected": -1.413187026977539, "logps/chosen": -2277.25927734375, "logps/rejected": -2329.82421875, "loss": 0.665, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.27560552954673767, "rewards/margins": 0.3579794764518738, "rewards/rejected": -0.0823739543557167, "step": 1780 }, { "epoch": 0.47, "learning_rate": 6.406271417771417e-07, "logits/chosen": -1.4325132369995117, "logits/rejected": -1.4178228378295898, "logps/chosen": -2349.21337890625, "logps/rejected": -2154.297119140625, "loss": 0.6451, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.3952658474445343, "rewards/margins": 0.16848233342170715, "rewards/rejected": 0.22678343951702118, "step": 1790 }, { "epoch": 0.47, "learning_rate": 6.362368394038253e-07, "logits/chosen": -1.5897849798202515, "logits/rejected": -1.5390173196792603, "logps/chosen": -1875.8529052734375, "logps/rejected": -1862.1568603515625, "loss": 0.6495, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.7089827656745911, "rewards/margins": 0.22261781990528107, "rewards/rejected": 0.4863649904727936, "step": 1800 }, { "epoch": 0.47, "eval_logits/chosen": -1.4973212480545044, "eval_logits/rejected": -1.4589377641677856, "eval_logps/chosen": -2552.484619140625, "eval_logps/rejected": -2154.230712890625, "eval_loss": 0.6439757347106934, "eval_rewards/accuracies": 0.628000020980835, "eval_rewards/chosen": 1.2029086351394653, "eval_rewards/margins": 0.44004881381988525, "eval_rewards/rejected": 0.7628598809242249, "eval_runtime": 274.3283, "eval_samples_per_second": 7.291, "eval_steps_per_second": 0.456, "step": 1800 }, { "epoch": 0.47, "learning_rate": 6.318351612936251e-07, "logits/chosen": -1.540020227432251, "logits/rejected": -1.5377864837646484, "logps/chosen": -2579.018798828125, "logps/rejected": -2040.8626708984375, "loss": 0.5342, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.3657995462417603, "rewards/margins": 0.5092751383781433, "rewards/rejected": 0.8565242886543274, "step": 1810 }, { "epoch": 0.48, "learning_rate": 6.27422474985396e-07, "logits/chosen": -1.6203733682632446, "logits/rejected": -1.5685895681381226, "logps/chosen": -2994.5986328125, "logps/rejected": -2297.98779296875, "loss": 0.6322, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 1.1853954792022705, "rewards/margins": 0.3969642221927643, "rewards/rejected": 0.7884311079978943, "step": 1820 }, { "epoch": 0.48, "learning_rate": 6.229991489371753e-07, "logits/chosen": -1.4962559938430786, "logits/rejected": -1.5039407014846802, "logps/chosen": -2847.88330078125, "logps/rejected": -2582.642333984375, "loss": 0.6686, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 1.1997880935668945, "rewards/margins": 0.3008013367652893, "rewards/rejected": 0.8989866375923157, "step": 1830 }, { "epoch": 0.48, "learning_rate": 6.185655524954147e-07, "logits/chosen": -1.4752075672149658, "logits/rejected": -1.4018795490264893, "logps/chosen": -2723.45263671875, "logps/rejected": -2207.67626953125, "loss": 0.6545, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.8859116435050964, "rewards/margins": 0.42819732427597046, "rewards/rejected": 0.4577142596244812, "step": 1840 }, { "epoch": 0.48, "learning_rate": 6.141220558641415e-07, "logits/chosen": -1.6355358362197876, "logits/rejected": -1.5703387260437012, "logps/chosen": -2515.22509765625, "logps/rejected": -2019.4664306640625, "loss": 0.6205, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.9024883508682251, "rewards/margins": 0.5706073045730591, "rewards/rejected": 0.33188116550445557, "step": 1850 }, { "epoch": 0.49, "learning_rate": 6.096690300740452e-07, "logits/chosen": -1.5263564586639404, "logits/rejected": -1.5084933042526245, "logps/chosen": -2439.4453125, "logps/rejected": -2368.387939453125, "loss": 0.6958, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.8908926248550415, "rewards/margins": 0.32425767183303833, "rewards/rejected": 0.5666350722312927, "step": 1860 }, { "epoch": 0.49, "learning_rate": 6.052068469514983e-07, "logits/chosen": -1.4977385997772217, "logits/rejected": -1.4453041553497314, "logps/chosen": -2486.1298828125, "logps/rejected": -2206.44384765625, "loss": 0.7091, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.45466309785842896, "rewards/margins": 0.2726563811302185, "rewards/rejected": 0.18200668692588806, "step": 1870 }, { "epoch": 0.49, "learning_rate": 6.007358790875071e-07, "logits/chosen": -1.523725986480713, "logits/rejected": -1.3718494176864624, "logps/chosen": -3135.372314453125, "logps/rejected": -2062.346435546875, "loss": 0.5787, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.8700541257858276, "rewards/margins": 0.534584105014801, "rewards/rejected": 0.335470050573349, "step": 1880 }, { "epoch": 0.49, "learning_rate": 5.962564998066017e-07, "logits/chosen": -1.5558342933654785, "logits/rejected": -1.530552625656128, "logps/chosen": -2413.99951171875, "logps/rejected": -2231.44580078125, "loss": 0.7787, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.8536021113395691, "rewards/margins": 0.16090592741966248, "rewards/rejected": 0.692696213722229, "step": 1890 }, { "epoch": 0.5, "learning_rate": 5.917690831356632e-07, "logits/chosen": -1.5734992027282715, "logits/rejected": -1.5821874141693115, "logps/chosen": -1971.658447265625, "logps/rejected": -2253.21533203125, "loss": 0.6465, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.37103039026260376, "rewards/margins": 0.36184656620025635, "rewards/rejected": 0.00918380357325077, "step": 1900 }, { "epoch": 0.5, "eval_logits/chosen": -1.5323480367660522, "eval_logits/rejected": -1.4961434602737427, "eval_logps/chosen": -2651.6640625, "eval_logps/rejected": -2239.925537109375, "eval_loss": 0.6641379594802856, "eval_rewards/accuracies": 0.628000020980835, "eval_rewards/chosen": 0.21111172437667847, "eval_rewards/margins": 0.3052009046077728, "eval_rewards/rejected": -0.09408915787935257, "eval_runtime": 272.1389, "eval_samples_per_second": 7.349, "eval_steps_per_second": 0.459, "step": 1900 }, { "epoch": 0.5, "learning_rate": 5.872740037726918e-07, "logits/chosen": -1.6534467935562134, "logits/rejected": -1.586725115776062, "logps/chosen": -2391.630615234375, "logps/rejected": -2272.478271484375, "loss": 0.6567, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.07434698194265366, "rewards/margins": 0.33248382806777954, "rewards/rejected": -0.2581368684768677, "step": 1910 }, { "epoch": 0.5, "learning_rate": 5.82771637055521e-07, "logits/chosen": -1.5501420497894287, "logits/rejected": -1.5288193225860596, "logps/chosen": -2284.060302734375, "logps/rejected": -1815.4986572265625, "loss": 0.6759, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.5421526432037354, "rewards/margins": 0.34266138076782227, "rewards/rejected": 0.1994912326335907, "step": 1920 }, { "epoch": 0.51, "learning_rate": 5.78262358930476e-07, "logits/chosen": -1.6008961200714111, "logits/rejected": -1.6260267496109009, "logps/chosen": -2463.80615234375, "logps/rejected": -2245.742919921875, "loss": 0.6011, "rewards/accuracies": 0.625, "rewards/chosen": 1.3033373355865479, "rewards/margins": 0.4004366993904114, "rewards/rejected": 0.9029006958007812, "step": 1930 }, { "epoch": 0.51, "learning_rate": 5.737465459209825e-07, "logits/chosen": -1.4986151456832886, "logits/rejected": -1.5256952047348022, "logps/chosen": -2494.918701171875, "logps/rejected": -2140.651123046875, "loss": 0.6964, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.9287387728691101, "rewards/margins": 0.3198220729827881, "rewards/rejected": 0.6089166402816772, "step": 1940 }, { "epoch": 0.51, "learning_rate": 5.692245750961274e-07, "logits/chosen": -1.6182562112808228, "logits/rejected": -1.5028297901153564, "logps/chosen": -3051.378173828125, "logps/rejected": -2346.170654296875, "loss": 0.6231, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.0541870594024658, "rewards/margins": 0.3164740204811096, "rewards/rejected": 0.7377129793167114, "step": 1950 }, { "epoch": 0.51, "learning_rate": 5.646968240391729e-07, "logits/chosen": -1.649444580078125, "logits/rejected": -1.6214931011199951, "logps/chosen": -2746.20849609375, "logps/rejected": -2309.657958984375, "loss": 0.6202, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 1.2585784196853638, "rewards/margins": 0.8424956202507019, "rewards/rejected": 0.41608279943466187, "step": 1960 }, { "epoch": 0.52, "learning_rate": 5.601636708160296e-07, "logits/chosen": -1.547055959701538, "logits/rejected": -1.4317224025726318, "logps/chosen": -2590.408447265625, "logps/rejected": -2097.73779296875, "loss": 0.6244, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.8106128573417664, "rewards/margins": 0.47321099042892456, "rewards/rejected": 0.3374018669128418, "step": 1970 }, { "epoch": 0.52, "learning_rate": 5.55625493943687e-07, "logits/chosen": -1.6556923389434814, "logits/rejected": -1.5846012830734253, "logps/chosen": -2086.85400390625, "logps/rejected": -1811.0687255859375, "loss": 0.6727, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.6724368929862976, "rewards/margins": 0.2351864129304886, "rewards/rejected": 0.437250554561615, "step": 1980 }, { "epoch": 0.52, "learning_rate": 5.510826723586078e-07, "logits/chosen": -1.568554401397705, "logits/rejected": -1.5848562717437744, "logps/chosen": -2322.924560546875, "logps/rejected": -2143.58740234375, "loss": 0.6265, "rewards/accuracies": 0.625, "rewards/chosen": 0.8154586553573608, "rewards/margins": 0.22640132904052734, "rewards/rejected": 0.5890573263168335, "step": 1990 }, { "epoch": 0.52, "learning_rate": 5.465355853850871e-07, "logits/chosen": -1.5256285667419434, "logits/rejected": -1.4730937480926514, "logps/chosen": -2586.426025390625, "logps/rejected": -2263.304443359375, "loss": 0.6866, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.8933171033859253, "rewards/margins": 0.33347776532173157, "rewards/rejected": 0.5598393678665161, "step": 2000 }, { "epoch": 0.52, "eval_logits/chosen": -1.4934203624725342, "eval_logits/rejected": -1.450918436050415, "eval_logps/chosen": -2615.305419921875, "eval_logps/rejected": -2210.75, "eval_loss": 0.648023784160614, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": 0.5746970772743225, "eval_rewards/margins": 0.3770293593406677, "eval_rewards/rejected": 0.1976676881313324, "eval_runtime": 273.6196, "eval_samples_per_second": 7.309, "eval_steps_per_second": 0.457, "step": 2000 }, { "epoch": 0.53, "learning_rate": 5.41984612703579e-07, "logits/chosen": -1.4832929372787476, "logits/rejected": -1.4308890104293823, "logps/chosen": -2649.85986328125, "logps/rejected": -2068.37451171875, "loss": 0.6655, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.21885940432548523, "rewards/margins": 0.2792533338069916, "rewards/rejected": -0.06039392948150635, "step": 2010 }, { "epoch": 0.53, "learning_rate": 5.37430134318992e-07, "logits/chosen": -1.62113356590271, "logits/rejected": -1.5398887395858765, "logps/chosen": -2633.84912109375, "logps/rejected": -2066.56005859375, "loss": 0.6041, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.4892839789390564, "rewards/margins": 0.35047203302383423, "rewards/rejected": 0.13881191611289978, "step": 2020 }, { "epoch": 0.53, "learning_rate": 5.328725305289612e-07, "logits/chosen": -1.4779340028762817, "logits/rejected": -1.5425903797149658, "logps/chosen": -2540.42724609375, "logps/rejected": -2326.0771484375, "loss": 0.6508, "rewards/accuracies": 0.625, "rewards/chosen": 0.8963820338249207, "rewards/margins": 0.08198239654302597, "rewards/rejected": 0.8143996000289917, "step": 2030 }, { "epoch": 0.53, "learning_rate": 5.283121818920911e-07, "logits/chosen": -1.6571362018585205, "logits/rejected": -1.5648419857025146, "logps/chosen": -2485.504150390625, "logps/rejected": -1707.0404052734375, "loss": 0.7148, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 1.0816528797149658, "rewards/margins": 0.5686277151107788, "rewards/rejected": 0.5130252838134766, "step": 2040 }, { "epoch": 0.54, "learning_rate": 5.237494691961808e-07, "logits/chosen": -1.6447166204452515, "logits/rejected": -1.5862451791763306, "logps/chosen": -2401.858642578125, "logps/rejected": -1855.913818359375, "loss": 0.6581, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.7710812091827393, "rewards/margins": 0.4302866458892822, "rewards/rejected": 0.3407946228981018, "step": 2050 }, { "epoch": 0.54, "learning_rate": 5.191847734264272e-07, "logits/chosen": -1.4784064292907715, "logits/rejected": -1.3932642936706543, "logps/chosen": -2951.359619140625, "logps/rejected": -1941.2406005859375, "loss": 0.6879, "rewards/accuracies": 0.5, "rewards/chosen": 0.6071017384529114, "rewards/margins": 0.42749419808387756, "rewards/rejected": 0.17960752546787262, "step": 2060 }, { "epoch": 0.54, "learning_rate": 5.146184757336133e-07, "logits/chosen": -1.5501275062561035, "logits/rejected": -1.4930084943771362, "logps/chosen": -2447.392578125, "logps/rejected": -2330.06298828125, "loss": 0.6315, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.750685453414917, "rewards/margins": 0.24931129813194275, "rewards/rejected": 0.5013741254806519, "step": 2070 }, { "epoch": 0.54, "learning_rate": 5.100509574022827e-07, "logits/chosen": -1.5865153074264526, "logits/rejected": -1.5072886943817139, "logps/chosen": -2638.115966796875, "logps/rejected": -2270.362060546875, "loss": 0.6252, "rewards/accuracies": 0.625, "rewards/chosen": 0.5812515020370483, "rewards/margins": 0.3027082085609436, "rewards/rejected": 0.27854329347610474, "step": 2080 }, { "epoch": 0.55, "learning_rate": 5.054825998189012e-07, "logits/chosen": -1.5698521137237549, "logits/rejected": -1.5683820247650146, "logps/chosen": -3126.69384765625, "logps/rejected": -2817.0693359375, "loss": 0.6094, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.212766170501709, "rewards/margins": 0.6158873438835144, "rewards/rejected": 0.5968788266181946, "step": 2090 }, { "epoch": 0.55, "learning_rate": 5.009137844400127e-07, "logits/chosen": -1.5645443201065063, "logits/rejected": -1.5424262285232544, "logps/chosen": -2719.923583984375, "logps/rejected": -2182.44140625, "loss": 0.6441, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 1.0640016794204712, "rewards/margins": 0.3684343695640564, "rewards/rejected": 0.69556725025177, "step": 2100 }, { "epoch": 0.55, "eval_logits/chosen": -1.484187364578247, "eval_logits/rejected": -1.441811203956604, "eval_logps/chosen": -2584.68408203125, "eval_logps/rejected": -2185.49853515625, "eval_loss": 0.6357956528663635, "eval_rewards/accuracies": 0.6480000019073486, "eval_rewards/chosen": 0.8809126019477844, "eval_rewards/margins": 0.4307316839694977, "eval_rewards/rejected": 0.45018094778060913, "eval_runtime": 264.5545, "eval_samples_per_second": 7.56, "eval_steps_per_second": 0.472, "step": 2100 }, { "epoch": 0.55, "learning_rate": 4.963448927603866e-07, "logits/chosen": -1.485181212425232, "logits/rejected": -1.448392629623413, "logps/chosen": -2547.41015625, "logps/rejected": -2374.857421875, "loss": 0.6224, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.6609773635864258, "rewards/margins": 0.6512421369552612, "rewards/rejected": 0.00973515771329403, "step": 2110 }, { "epoch": 0.55, "learning_rate": 4.917763062811631e-07, "logits/chosen": -1.5620416402816772, "logits/rejected": -1.5052043199539185, "logps/chosen": -2927.76953125, "logps/rejected": -2448.58154296875, "loss": 0.6657, "rewards/accuracies": 0.625, "rewards/chosen": 1.1013530492782593, "rewards/margins": 0.3367983400821686, "rewards/rejected": 0.7645547389984131, "step": 2120 }, { "epoch": 0.56, "learning_rate": 4.872084064779983e-07, "logits/chosen": -1.538593053817749, "logits/rejected": -1.4424632787704468, "logps/chosen": -2763.087646484375, "logps/rejected": -2043.810546875, "loss": 0.6184, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 1.2892661094665527, "rewards/margins": 0.6896950006484985, "rewards/rejected": 0.5995711088180542, "step": 2130 }, { "epoch": 0.56, "learning_rate": 4.826415747692117e-07, "logits/chosen": -1.4685488939285278, "logits/rejected": -1.3683886528015137, "logps/chosen": -2700.881591796875, "logps/rejected": -1875.2572021484375, "loss": 0.6501, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.738589882850647, "rewards/margins": 0.4977818429470062, "rewards/rejected": 0.24080801010131836, "step": 2140 }, { "epoch": 0.56, "learning_rate": 4.780761924839365e-07, "logits/chosen": -1.310004472732544, "logits/rejected": -1.1984180212020874, "logps/chosen": -2606.06982421875, "logps/rejected": -2154.67529296875, "loss": 0.6671, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.3540424108505249, "rewards/margins": 0.13220936059951782, "rewards/rejected": 0.22183306515216827, "step": 2150 }, { "epoch": 0.57, "learning_rate": 4.7351264083027954e-07, "logits/chosen": -1.4272174835205078, "logits/rejected": -1.3643287420272827, "logps/chosen": -2741.249755859375, "logps/rejected": -2210.83203125, "loss": 0.5928, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.0889028310775757, "rewards/margins": 0.5112255215644836, "rewards/rejected": 0.5776773691177368, "step": 2160 }, { "epoch": 0.57, "learning_rate": 4.689513008634906e-07, "logits/chosen": -1.4570413827896118, "logits/rejected": -1.4573755264282227, "logps/chosen": -2330.010986328125, "logps/rejected": -2262.497802734375, "loss": 0.6381, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.9347332119941711, "rewards/margins": 0.4426857829093933, "rewards/rejected": 0.4920472204685211, "step": 2170 }, { "epoch": 0.57, "learning_rate": 4.6439255345414475e-07, "logits/chosen": -1.3844455480575562, "logits/rejected": -1.3740614652633667, "logps/chosen": -2633.23876953125, "logps/rejected": -2337.79296875, "loss": 0.6539, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.6188936829566956, "rewards/margins": 0.3281751871109009, "rewards/rejected": 0.29071852564811707, "step": 2180 }, { "epoch": 0.57, "learning_rate": 4.5983677925633836e-07, "logits/chosen": -1.4414739608764648, "logits/rejected": -1.4345848560333252, "logps/chosen": -2682.71337890625, "logps/rejected": -2551.15185546875, "loss": 0.6541, "rewards/accuracies": 0.5, "rewards/chosen": 0.5205819010734558, "rewards/margins": 0.21859097480773926, "rewards/rejected": 0.30199089646339417, "step": 2190 }, { "epoch": 0.58, "learning_rate": 4.5528435867590595e-07, "logits/chosen": -1.4516639709472656, "logits/rejected": -1.437811255455017, "logps/chosen": -2479.385498046875, "logps/rejected": -2412.54052734375, "loss": 0.6752, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.7011703252792358, "rewards/margins": 0.2219802588224411, "rewards/rejected": 0.4791901111602783, "step": 2200 }, { "epoch": 0.58, "eval_logits/chosen": -1.3656065464019775, "eval_logits/rejected": -1.3192769289016724, "eval_logps/chosen": -2579.66357421875, "eval_logps/rejected": -2179.766845703125, "eval_loss": 0.6346109509468079, "eval_rewards/accuracies": 0.656000018119812, "eval_rewards/chosen": 0.9311181306838989, "eval_rewards/margins": 0.42361852526664734, "eval_rewards/rejected": 0.5074995160102844, "eval_runtime": 268.8417, "eval_samples_per_second": 7.439, "eval_steps_per_second": 0.465, "step": 2200 }, { "epoch": 0.58, "learning_rate": 4.507356718386556e-07, "logits/chosen": -1.4297059774398804, "logits/rejected": -1.306510329246521, "logps/chosen": -2648.14892578125, "logps/rejected": -1472.99169921875, "loss": 0.6332, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.7275451421737671, "rewards/margins": 0.39894041419029236, "rewards/rejected": 0.32860463857650757, "step": 2210 }, { "epoch": 0.58, "learning_rate": 4.461910985586298e-07, "logits/chosen": -1.5129501819610596, "logits/rejected": -1.4305498600006104, "logps/chosen": -2532.091796875, "logps/rejected": -1929.642333984375, "loss": 0.6767, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.124761939048767, "rewards/margins": 0.4813266694545746, "rewards/rejected": 0.6434352993965149, "step": 2220 }, { "epoch": 0.58, "learning_rate": 4.4165101830638937e-07, "logits/chosen": -1.3523738384246826, "logits/rejected": -1.2613658905029297, "logps/chosen": -2724.252197265625, "logps/rejected": -2269.352783203125, "loss": 0.6623, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.0175944566726685, "rewards/margins": 0.4952741265296936, "rewards/rejected": 0.5223202705383301, "step": 2230 }, { "epoch": 0.59, "learning_rate": 4.3711581017732866e-07, "logits/chosen": -1.3893333673477173, "logits/rejected": -1.390366554260254, "logps/chosen": -2303.6611328125, "logps/rejected": -2041.0625, "loss": 0.5847, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.8912409543991089, "rewards/margins": 0.2850377857685089, "rewards/rejected": 0.6062031388282776, "step": 2240 }, { "epoch": 0.59, "learning_rate": 4.325858528600214e-07, "logits/chosen": -1.3108584880828857, "logits/rejected": -1.2193920612335205, "logps/chosen": -2475.795654296875, "logps/rejected": -2092.73583984375, "loss": 0.595, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.1036341190338135, "rewards/margins": 0.3697579503059387, "rewards/rejected": 0.7338761687278748, "step": 2250 }, { "epoch": 0.59, "learning_rate": 4.280615246046001e-07, "logits/chosen": -1.4235751628875732, "logits/rejected": -1.3617385625839233, "logps/chosen": -2633.86083984375, "logps/rejected": -2408.10009765625, "loss": 0.6118, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.153663992881775, "rewards/margins": 0.3248019814491272, "rewards/rejected": 0.8288620710372925, "step": 2260 }, { "epoch": 0.59, "learning_rate": 4.235432031911719e-07, "logits/chosen": -1.4218213558197021, "logits/rejected": -1.4208636283874512, "logps/chosen": -2631.169189453125, "logps/rejected": -2223.19970703125, "loss": 0.639, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.8666805028915405, "rewards/margins": 0.2339131385087967, "rewards/rejected": 0.632767379283905, "step": 2270 }, { "epoch": 0.6, "learning_rate": 4.190312658982747e-07, "logits/chosen": -1.4493274688720703, "logits/rejected": -1.3931918144226074, "logps/chosen": -2808.74853515625, "logps/rejected": -2093.283203125, "loss": 0.6821, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.029400110244751, "rewards/margins": 0.31582584977149963, "rewards/rejected": 0.7135743498802185, "step": 2280 }, { "epoch": 0.6, "learning_rate": 4.145260894713738e-07, "logits/chosen": -1.3703809976577759, "logits/rejected": -1.3452428579330444, "logps/chosen": -2364.276123046875, "logps/rejected": -2126.041015625, "loss": 0.6331, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.6234472990036011, "rewards/margins": 0.22590875625610352, "rewards/rejected": 0.39753851294517517, "step": 2290 }, { "epoch": 0.6, "learning_rate": 4.1002805009140464e-07, "logits/chosen": -1.2794725894927979, "logits/rejected": -1.265462875366211, "logps/chosen": -3121.4658203125, "logps/rejected": -2392.01123046875, "loss": 0.5646, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.061316728591919, "rewards/margins": 0.7338230013847351, "rewards/rejected": 0.32749372720718384, "step": 2300 }, { "epoch": 0.6, "eval_logits/chosen": -1.3116086721420288, "eval_logits/rejected": -1.2831621170043945, "eval_logps/chosen": -2606.788330078125, "eval_logps/rejected": -2201.394775390625, "eval_loss": 0.6396492719650269, "eval_rewards/accuracies": 0.6480000019073486, "eval_rewards/chosen": 0.6598689556121826, "eval_rewards/margins": 0.368648886680603, "eval_rewards/rejected": 0.2912200391292572, "eval_runtime": 275.4174, "eval_samples_per_second": 7.262, "eval_steps_per_second": 0.454, "step": 2300 }, { "epoch": 0.6, "learning_rate": 4.055375233433608e-07, "logits/chosen": -1.4271332025527954, "logits/rejected": -1.423020601272583, "logps/chosen": -2071.07080078125, "logps/rejected": -1836.254638671875, "loss": 0.6856, "rewards/accuracies": 0.625, "rewards/chosen": 0.4686586856842041, "rewards/margins": 0.19411948323249817, "rewards/rejected": 0.27453920245170593, "step": 2310 }, { "epoch": 0.61, "learning_rate": 4.010548841849336e-07, "logits/chosen": -1.3506792783737183, "logits/rejected": -1.3817119598388672, "logps/chosen": -2158.24267578125, "logps/rejected": -2183.30029296875, "loss": 0.6816, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.4484243392944336, "rewards/margins": 0.16151314973831177, "rewards/rejected": 0.2869111895561218, "step": 2320 }, { "epoch": 0.61, "learning_rate": 3.9658050691520243e-07, "logits/chosen": -1.2983678579330444, "logits/rejected": -1.291818380355835, "logps/chosen": -1904.927978515625, "logps/rejected": -2080.394287109375, "loss": 0.626, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.5332204699516296, "rewards/margins": 0.13143374025821686, "rewards/rejected": 0.401786744594574, "step": 2330 }, { "epoch": 0.61, "learning_rate": 3.921147651433822e-07, "logits/chosen": -1.2738254070281982, "logits/rejected": -1.3125003576278687, "logps/chosen": -2600.62841796875, "logps/rejected": -2347.55517578125, "loss": 0.6109, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.9114497900009155, "rewards/margins": 0.5179422497749329, "rewards/rejected": 0.3935074806213379, "step": 2340 }, { "epoch": 0.62, "learning_rate": 3.8765803175762547e-07, "logits/chosen": -1.4664332866668701, "logits/rejected": -1.4184716939926147, "logps/chosen": -3116.819580078125, "logps/rejected": -2370.01806640625, "loss": 0.6761, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.5260448455810547, "rewards/margins": 0.2121487557888031, "rewards/rejected": 0.3138960897922516, "step": 2350 }, { "epoch": 0.62, "learning_rate": 3.832106788938873e-07, "logits/chosen": -1.3046257495880127, "logits/rejected": -1.2335705757141113, "logps/chosen": -2104.3447265625, "logps/rejected": -1580.002685546875, "loss": 0.6046, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.5160833597183228, "rewards/margins": 0.3431095778942108, "rewards/rejected": 0.17297373712062836, "step": 2360 }, { "epoch": 0.62, "learning_rate": 3.7877307790485204e-07, "logits/chosen": -1.4190560579299927, "logits/rejected": -1.4400821924209595, "logps/chosen": -2375.158935546875, "logps/rejected": -2192.509765625, "loss": 0.6432, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.026429295539856, "rewards/margins": 0.4570732116699219, "rewards/rejected": 0.5693560838699341, "step": 2370 }, { "epoch": 0.62, "learning_rate": 3.7434559932892527e-07, "logits/chosen": -1.4493039846420288, "logits/rejected": -1.4444842338562012, "logps/chosen": -2466.854248046875, "logps/rejected": -2544.655517578125, "loss": 0.6927, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.071681022644043, "rewards/margins": 0.44979220628738403, "rewards/rejected": 0.6218888163566589, "step": 2380 }, { "epoch": 0.63, "learning_rate": 3.699286128592939e-07, "logits/chosen": -1.5252349376678467, "logits/rejected": -1.5242667198181152, "logps/chosen": -2524.96923828125, "logps/rejected": -2338.88916015625, "loss": 0.6446, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.6778838634490967, "rewards/margins": 0.30297914147377014, "rewards/rejected": 0.37490472197532654, "step": 2390 }, { "epoch": 0.63, "learning_rate": 3.655224873130571e-07, "logits/chosen": -1.5355488061904907, "logits/rejected": -1.5058531761169434, "logps/chosen": -2609.71728515625, "logps/rejected": -2428.035888671875, "loss": 0.6519, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.44195812940597534, "rewards/margins": 0.31578582525253296, "rewards/rejected": 0.12617230415344238, "step": 2400 }, { "epoch": 0.63, "eval_logits/chosen": -1.477705478668213, "eval_logits/rejected": -1.4460340738296509, "eval_logps/chosen": -2630.405029296875, "eval_logps/rejected": -2221.14599609375, "eval_loss": 0.6450788378715515, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": 0.4237046539783478, "eval_rewards/margins": 0.3299960792064667, "eval_rewards/rejected": 0.09370850026607513, "eval_runtime": 267.0847, "eval_samples_per_second": 7.488, "eval_steps_per_second": 0.468, "step": 2400 }, { "epoch": 0.63, "learning_rate": 3.611275906004298e-07, "logits/chosen": -1.3755953311920166, "logits/rejected": -1.3369966745376587, "logps/chosen": -2507.03515625, "logps/rejected": -2109.78173828125, "loss": 0.618, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.5401030778884888, "rewards/margins": 0.40320855379104614, "rewards/rejected": 0.13689449429512024, "step": 2410 }, { "epoch": 0.63, "learning_rate": 3.5674428969402306e-07, "logits/chosen": -1.5944547653198242, "logits/rejected": -1.5777031183242798, "logps/chosen": -2441.38525390625, "logps/rejected": -2047.470947265625, "loss": 0.6396, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.8870500326156616, "rewards/margins": 0.2639123797416687, "rewards/rejected": 0.6231377720832825, "step": 2420 }, { "epoch": 0.64, "learning_rate": 3.523729505982008e-07, "logits/chosen": -1.5361446142196655, "logits/rejected": -1.5061471462249756, "logps/chosen": -2847.749267578125, "logps/rejected": -2523.47705078125, "loss": 0.668, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.2106680870056152, "rewards/margins": 0.3830196261405945, "rewards/rejected": 0.8276484608650208, "step": 2430 }, { "epoch": 0.64, "learning_rate": 3.480139383185199e-07, "logits/chosen": -1.4447945356369019, "logits/rejected": -1.446575403213501, "logps/chosen": -2404.881591796875, "logps/rejected": -1928.1968994140625, "loss": 0.6213, "rewards/accuracies": 0.625, "rewards/chosen": 0.8293660879135132, "rewards/margins": 0.44402575492858887, "rewards/rejected": 0.38534030318260193, "step": 2440 }, { "epoch": 0.64, "learning_rate": 3.436676168312508e-07, "logits/chosen": -1.5186269283294678, "logits/rejected": -1.4943647384643555, "logps/chosen": -2593.47021484375, "logps/rejected": -2183.07861328125, "loss": 0.6634, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.6045433878898621, "rewards/margins": 0.43318018317222595, "rewards/rejected": 0.17136314511299133, "step": 2450 }, { "epoch": 0.64, "learning_rate": 3.393343490529874e-07, "logits/chosen": -1.4893147945404053, "logits/rejected": -1.487684965133667, "logps/chosen": -2017.423583984375, "logps/rejected": -1761.2838134765625, "loss": 0.6279, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.5850464105606079, "rewards/margins": 0.3719884753227234, "rewards/rejected": 0.21305795013904572, "step": 2460 }, { "epoch": 0.65, "learning_rate": 3.35014496810342e-07, "logits/chosen": -1.4742892980575562, "logits/rejected": -1.4468326568603516, "logps/chosen": -2771.84423828125, "logps/rejected": -2527.495849609375, "loss": 0.6262, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.9822020530700684, "rewards/margins": 0.38336580991744995, "rewards/rejected": 0.5988362431526184, "step": 2470 }, { "epoch": 0.65, "learning_rate": 3.3070842080973365e-07, "logits/chosen": -1.3868352174758911, "logits/rejected": -1.2980562448501587, "logps/chosen": -2637.708984375, "logps/rejected": -2321.34423828125, "loss": 0.5936, "rewards/accuracies": 0.625, "rewards/chosen": 0.7674530744552612, "rewards/margins": 0.3672861158847809, "rewards/rejected": 0.40016698837280273, "step": 2480 }, { "epoch": 0.65, "learning_rate": 3.264164806072691e-07, "logits/chosen": -1.4247747659683228, "logits/rejected": -1.4318523406982422, "logps/chosen": -1995.621337890625, "logps/rejected": -1889.0433349609375, "loss": 0.5861, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.4019368290901184, "rewards/margins": 0.3562806248664856, "rewards/rejected": 0.04565621167421341, "step": 2490 }, { "epoch": 0.65, "learning_rate": 3.221390345787205e-07, "logits/chosen": -1.4784921407699585, "logits/rejected": -1.4326039552688599, "logps/chosen": -2378.757080078125, "logps/rejected": -2247.66650390625, "loss": 0.6292, "rewards/accuracies": 0.625, "rewards/chosen": 0.8328452110290527, "rewards/margins": 0.35527855157852173, "rewards/rejected": 0.47756657004356384, "step": 2500 }, { "epoch": 0.65, "eval_logits/chosen": -1.4396635293960571, "eval_logits/rejected": -1.4040294885635376, "eval_logps/chosen": -2585.951171875, "eval_logps/rejected": -2188.20947265625, "eval_loss": 0.6313052177429199, "eval_rewards/accuracies": 0.6460000276565552, "eval_rewards/chosen": 0.8682412505149841, "eval_rewards/margins": 0.44516855478286743, "eval_rewards/rejected": 0.4230727553367615, "eval_runtime": 273.6779, "eval_samples_per_second": 7.308, "eval_steps_per_second": 0.457, "step": 2500 }, { "epoch": 0.66, "learning_rate": 3.178764398895999e-07, "logits/chosen": -1.5333797931671143, "logits/rejected": -1.4616153240203857, "logps/chosen": -2828.387451171875, "logps/rejected": -2186.489501953125, "loss": 0.6816, "rewards/accuracies": 0.75, "rewards/chosen": 1.0755081176757812, "rewards/margins": 0.3677830100059509, "rewards/rejected": 0.7077249884605408, "step": 2510 }, { "epoch": 0.66, "learning_rate": 3.1362905246533733e-07, "logits/chosen": -1.5495270490646362, "logits/rejected": -1.5117230415344238, "logps/chosen": -2374.715087890625, "logps/rejected": -2174.619384765625, "loss": 0.6468, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.9045315980911255, "rewards/margins": 0.290399968624115, "rewards/rejected": 0.6141316890716553, "step": 2520 }, { "epoch": 0.66, "learning_rate": 3.093972269615602e-07, "logits/chosen": -1.5428906679153442, "logits/rejected": -1.4694817066192627, "logps/chosen": -2695.100341796875, "logps/rejected": -2482.61328125, "loss": 0.6343, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.9704828262329102, "rewards/margins": 0.4584183096885681, "rewards/rejected": 0.5120643377304077, "step": 2530 }, { "epoch": 0.66, "learning_rate": 3.051813167344807e-07, "logits/chosen": -1.493492841720581, "logits/rejected": -1.4127018451690674, "logps/chosen": -2684.31298828125, "logps/rejected": -2033.2591552734375, "loss": 0.6365, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.5565832853317261, "rewards/margins": 0.16704833507537842, "rewards/rejected": 0.38953498005867004, "step": 2540 }, { "epoch": 0.67, "learning_rate": 3.009816738113891e-07, "logits/chosen": -1.4053213596343994, "logits/rejected": -1.3655506372451782, "logps/chosen": -2278.53955078125, "logps/rejected": -2113.007568359375, "loss": 0.6118, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.7199645042419434, "rewards/margins": 0.3212481141090393, "rewards/rejected": 0.39871641993522644, "step": 2550 }, { "epoch": 0.67, "learning_rate": 2.967986488612611e-07, "logits/chosen": -1.4535365104675293, "logits/rejected": -1.4404280185699463, "logps/chosen": -1694.073486328125, "logps/rejected": -1605.5189208984375, "loss": 0.5899, "rewards/accuracies": 0.75, "rewards/chosen": 0.42730236053466797, "rewards/margins": 0.42983976006507874, "rewards/rejected": -0.0025373927783221006, "step": 2560 }, { "epoch": 0.67, "learning_rate": 2.92632591165476e-07, "logits/chosen": -1.4190573692321777, "logits/rejected": -1.398150086402893, "logps/chosen": -2259.364013671875, "logps/rejected": -2268.209228515625, "loss": 0.6192, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.7744486927986145, "rewards/margins": 0.38980168104171753, "rewards/rejected": 0.38464704155921936, "step": 2570 }, { "epoch": 0.68, "learning_rate": 2.884838485886531e-07, "logits/chosen": -1.0902016162872314, "logits/rejected": -1.1198498010635376, "logps/chosen": -2453.63427734375, "logps/rejected": -2333.11572265625, "loss": 0.6449, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.7615150809288025, "rewards/margins": 0.3345804810523987, "rewards/rejected": 0.4269346594810486, "step": 2580 }, { "epoch": 0.68, "learning_rate": 2.8435276754960316e-07, "logits/chosen": -1.4796117544174194, "logits/rejected": -1.3743705749511719, "logps/chosen": -2669.708984375, "logps/rejected": -2046.904296875, "loss": 0.6064, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 1.0122426748275757, "rewards/margins": 0.7491260766983032, "rewards/rejected": 0.2631165385246277, "step": 2590 }, { "epoch": 0.68, "learning_rate": 2.802396929924042e-07, "logits/chosen": -1.5238767862319946, "logits/rejected": -1.4813053607940674, "logps/chosen": -2612.95654296875, "logps/rejected": -2195.318115234375, "loss": 0.5985, "rewards/accuracies": 0.625, "rewards/chosen": 0.7694698572158813, "rewards/margins": 0.3584301471710205, "rewards/rejected": 0.41103968024253845, "step": 2600 }, { "epoch": 0.68, "eval_logits/chosen": -1.3860489130020142, "eval_logits/rejected": -1.357996940612793, "eval_logps/chosen": -2588.8173828125, "eval_logps/rejected": -2194.014404296875, "eval_loss": 0.6273570656776428, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": 0.8395788669586182, "eval_rewards/margins": 0.47455480694770813, "eval_rewards/rejected": 0.3650241196155548, "eval_runtime": 275.8616, "eval_samples_per_second": 7.25, "eval_steps_per_second": 0.453, "step": 2600 }, { "epoch": 0.68, "learning_rate": 2.761449683575979e-07, "logits/chosen": -1.394689917564392, "logits/rejected": -1.3897894620895386, "logps/chosen": -2253.80322265625, "logps/rejected": -1823.135986328125, "loss": 0.6579, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.6918545961380005, "rewards/margins": 0.3240147531032562, "rewards/rejected": 0.3678398132324219, "step": 2610 }, { "epoch": 0.69, "learning_rate": 2.720689355535133e-07, "logits/chosen": -1.5367738008499146, "logits/rejected": -1.4291003942489624, "logps/chosen": -3060.17431640625, "logps/rejected": -2661.6337890625, "loss": 0.67, "rewards/accuracies": 0.625, "rewards/chosen": 0.8580164909362793, "rewards/margins": 0.25758737325668335, "rewards/rejected": 0.6004289984703064, "step": 2620 }, { "epoch": 0.69, "learning_rate": 2.680119349277163e-07, "logits/chosen": -1.4364140033721924, "logits/rejected": -1.3742132186889648, "logps/chosen": -2562.680908203125, "logps/rejected": -2340.421875, "loss": 0.5949, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.6592432260513306, "rewards/margins": 0.4979625642299652, "rewards/rejected": 0.16128072142601013, "step": 2630 }, { "epoch": 0.69, "learning_rate": 2.639743052385917e-07, "logits/chosen": -1.4134931564331055, "logits/rejected": -1.4067234992980957, "logps/chosen": -2398.591796875, "logps/rejected": -2171.51611328125, "loss": 0.5719, "rewards/accuracies": 0.625, "rewards/chosen": 0.5373933911323547, "rewards/margins": 0.3850085139274597, "rewards/rejected": 0.15238483250141144, "step": 2640 }, { "epoch": 0.69, "learning_rate": 2.599563836270564e-07, "logits/chosen": -1.3736032247543335, "logits/rejected": -1.3715035915374756, "logps/chosen": -2091.04345703125, "logps/rejected": -1912.631591796875, "loss": 0.5962, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.41586917638778687, "rewards/margins": 0.2734389901161194, "rewards/rejected": 0.14243023097515106, "step": 2650 }, { "epoch": 0.7, "learning_rate": 2.55958505588409e-07, "logits/chosen": -1.2896828651428223, "logits/rejected": -1.27872896194458, "logps/chosen": -2430.079345703125, "logps/rejected": -1943.730224609375, "loss": 0.6424, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.5126298069953918, "rewards/margins": 0.3756958842277527, "rewards/rejected": 0.13693387806415558, "step": 2660 }, { "epoch": 0.7, "learning_rate": 2.519810049443152e-07, "logits/chosen": -1.4509341716766357, "logits/rejected": -1.3884532451629639, "logps/chosen": -3016.3662109375, "logps/rejected": -2212.178466796875, "loss": 0.657, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.7909504175186157, "rewards/margins": 0.5453279614448547, "rewards/rejected": 0.24562236666679382, "step": 2670 }, { "epoch": 0.7, "learning_rate": 2.4802421381493405e-07, "logits/chosen": -1.3750033378601074, "logits/rejected": -1.3092578649520874, "logps/chosen": -2432.66455078125, "logps/rejected": -1898.987548828125, "loss": 0.6082, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.6120018362998962, "rewards/margins": 0.7680531144142151, "rewards/rejected": -0.15605124831199646, "step": 2680 }, { "epoch": 0.7, "learning_rate": 2.440884625911861e-07, "logits/chosen": -1.446340560913086, "logits/rejected": -1.4364707469940186, "logps/chosen": -2399.11572265625, "logps/rejected": -2216.21240234375, "loss": 0.5942, "rewards/accuracies": 0.75, "rewards/chosen": 0.7894426584243774, "rewards/margins": 0.5115488767623901, "rewards/rejected": 0.2778938412666321, "step": 2690 }, { "epoch": 0.71, "learning_rate": 2.4017407990716597e-07, "logits/chosen": -1.3791276216506958, "logits/rejected": -1.3035521507263184, "logps/chosen": -2603.680908203125, "logps/rejected": -2026.604248046875, "loss": 0.6323, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.9257089495658875, "rewards/margins": 0.778711199760437, "rewards/rejected": 0.14699774980545044, "step": 2700 }, { "epoch": 0.71, "eval_logits/chosen": -1.2938450574874878, "eval_logits/rejected": -1.2622296810150146, "eval_logps/chosen": -2606.926025390625, "eval_logps/rejected": -2210.395751953125, "eval_loss": 0.6327735781669617, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": 0.6584945917129517, "eval_rewards/margins": 0.45728600025177, "eval_rewards/rejected": 0.20120853185653687, "eval_runtime": 277.1566, "eval_samples_per_second": 7.216, "eval_steps_per_second": 0.451, "step": 2700 }, { "epoch": 0.71, "learning_rate": 2.3628139261270135e-07, "logits/chosen": -1.3448095321655273, "logits/rejected": -1.2422099113464355, "logps/chosen": -3200.04736328125, "logps/rejected": -2413.40673828125, "loss": 0.6214, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.6920865774154663, "rewards/margins": 0.627841055393219, "rewards/rejected": 0.06424557417631149, "step": 2710 }, { "epoch": 0.71, "learning_rate": 2.3241072574606102e-07, "logits/chosen": -1.4279682636260986, "logits/rejected": -1.3650823831558228, "logps/chosen": -2858.18310546875, "logps/rejected": -2436.86474609375, "loss": 0.5988, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.9761995077133179, "rewards/margins": 0.4037070870399475, "rewards/rejected": 0.5724924802780151, "step": 2720 }, { "epoch": 0.71, "learning_rate": 2.285624025068143e-07, "logits/chosen": -1.4990990161895752, "logits/rejected": -1.4075102806091309, "logps/chosen": -2588.497802734375, "logps/rejected": -1992.470703125, "loss": 0.609, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.8248246312141418, "rewards/margins": 0.41641107201576233, "rewards/rejected": 0.40841349959373474, "step": 2730 }, { "epoch": 0.72, "learning_rate": 2.247367442288446e-07, "logits/chosen": -1.4395039081573486, "logits/rejected": -1.3425724506378174, "logps/chosen": -2676.036376953125, "logps/rejected": -1750.607177734375, "loss": 0.5769, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.0520992279052734, "rewards/margins": 0.9028045535087585, "rewards/rejected": 0.14929473400115967, "step": 2740 }, { "epoch": 0.72, "learning_rate": 2.209340703535169e-07, "logits/chosen": -1.4604949951171875, "logits/rejected": -1.349486231803894, "logps/chosen": -2208.95947265625, "logps/rejected": -1485.8690185546875, "loss": 0.681, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.6709630489349365, "rewards/margins": 0.3589937090873718, "rewards/rejected": 0.3119693398475647, "step": 2750 }, { "epoch": 0.72, "learning_rate": 2.171546984030056e-07, "logits/chosen": -1.4167903661727905, "logits/rejected": -1.3515651226043701, "logps/chosen": -2376.03857421875, "logps/rejected": -1771.710205078125, "loss": 0.6252, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.8238846063613892, "rewards/margins": 0.5126960277557373, "rewards/rejected": 0.31118854880332947, "step": 2760 }, { "epoch": 0.72, "learning_rate": 2.1339894395378067e-07, "logits/chosen": -1.4049344062805176, "logits/rejected": -1.3172833919525146, "logps/chosen": -2733.88330078125, "logps/rejected": -2381.346923828125, "loss": 0.6818, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.9343398213386536, "rewards/margins": 0.8638205528259277, "rewards/rejected": 0.0705193430185318, "step": 2770 }, { "epoch": 0.73, "learning_rate": 2.096671206102582e-07, "logits/chosen": -1.4337115287780762, "logits/rejected": -1.318379521369934, "logps/chosen": -2679.900390625, "logps/rejected": -1911.437255859375, "loss": 0.6114, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.6781307458877563, "rewards/margins": 0.644757866859436, "rewards/rejected": 0.03337289020419121, "step": 2780 }, { "epoch": 0.73, "learning_rate": 2.0595953997861326e-07, "logits/chosen": -1.4050424098968506, "logits/rejected": -1.3058971166610718, "logps/chosen": -2156.65185546875, "logps/rejected": -1914.407958984375, "loss": 0.5951, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.6217959523200989, "rewards/margins": 0.3280433416366577, "rewards/rejected": 0.2937525808811188, "step": 2790 }, { "epoch": 0.73, "learning_rate": 2.0227651164076153e-07, "logits/chosen": -1.4374796152114868, "logits/rejected": -1.432049036026001, "logps/chosen": -2434.140380859375, "logps/rejected": -1971.261474609375, "loss": 0.6174, "rewards/accuracies": 0.75, "rewards/chosen": 0.6336767673492432, "rewards/margins": 0.5388184785842896, "rewards/rejected": 0.09485818445682526, "step": 2800 }, { "epoch": 0.73, "eval_logits/chosen": -1.3635029792785645, "eval_logits/rejected": -1.331161618232727, "eval_logps/chosen": -2587.720947265625, "eval_logps/rejected": -2192.89892578125, "eval_loss": 0.6305412650108337, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": 0.8505436778068542, "eval_rewards/margins": 0.474366158246994, "eval_rewards/rejected": 0.376177579164505, "eval_runtime": 278.5163, "eval_samples_per_second": 7.181, "eval_steps_per_second": 0.449, "step": 2800 }, { "epoch": 0.74, "learning_rate": 1.986183431285095e-07, "logits/chosen": -1.4380724430084229, "logits/rejected": -1.3568377494812012, "logps/chosen": -2706.828857421875, "logps/rejected": -2156.448486328125, "loss": 0.6185, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.1412960290908813, "rewards/margins": 0.6656811833381653, "rewards/rejected": 0.4756149351596832, "step": 2810 }, { "epoch": 0.74, "learning_rate": 1.9498533989787508e-07, "logits/chosen": -1.4406335353851318, "logits/rejected": -1.4548825025558472, "logps/chosen": -2395.5869140625, "logps/rejected": -2094.87353515625, "loss": 0.6198, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.9250233769416809, "rewards/margins": 0.521003782749176, "rewards/rejected": 0.4040195345878601, "step": 2820 }, { "epoch": 0.74, "learning_rate": 1.9137780530358255e-07, "logits/chosen": -1.4243742227554321, "logits/rejected": -1.3397035598754883, "logps/chosen": -2837.8330078125, "logps/rejected": -2148.09423828125, "loss": 0.6453, "rewards/accuracies": 0.75, "rewards/chosen": 1.0236155986785889, "rewards/margins": 0.5235460996627808, "rewards/rejected": 0.5000696182250977, "step": 2830 }, { "epoch": 0.74, "learning_rate": 1.8779604057373232e-07, "logits/chosen": -1.4576950073242188, "logits/rejected": -1.3922784328460693, "logps/chosen": -2905.541259765625, "logps/rejected": -2111.591064453125, "loss": 0.5378, "rewards/accuracies": 0.625, "rewards/chosen": 1.0767219066619873, "rewards/margins": 0.7249119281768799, "rewards/rejected": 0.35180991888046265, "step": 2840 }, { "epoch": 0.75, "learning_rate": 1.842403447846485e-07, "logits/chosen": -1.3431110382080078, "logits/rejected": -1.3422303199768066, "logps/chosen": -2476.67431640625, "logps/rejected": -1822.1331787109375, "loss": 0.6316, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.6926653385162354, "rewards/margins": 0.35034242272377014, "rewards/rejected": 0.34232297539711, "step": 2850 }, { "epoch": 0.75, "learning_rate": 1.8071101483590657e-07, "logits/chosen": -1.4735561609268188, "logits/rejected": -1.4546959400177002, "logps/chosen": -2374.36376953125, "logps/rejected": -1975.539794921875, "loss": 0.6615, "rewards/accuracies": 0.75, "rewards/chosen": 0.792915940284729, "rewards/margins": 0.5957690477371216, "rewards/rejected": 0.19714678823947906, "step": 2860 }, { "epoch": 0.75, "learning_rate": 1.772083454255413e-07, "logits/chosen": -1.4286420345306396, "logits/rejected": -1.3648045063018799, "logps/chosen": -3039.596435546875, "logps/rejected": -2663.343017578125, "loss": 0.6159, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.8077454566955566, "rewards/margins": 0.41170400381088257, "rewards/rejected": 0.3960413932800293, "step": 2870 }, { "epoch": 0.75, "learning_rate": 1.7373262902544057e-07, "logits/chosen": -1.3764533996582031, "logits/rejected": -1.3593838214874268, "logps/chosen": -1988.8544921875, "logps/rejected": -1976.740966796875, "loss": 0.6497, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.4973524212837219, "rewards/margins": 0.3927370011806488, "rewards/rejected": 0.10461540520191193, "step": 2880 }, { "epoch": 0.76, "learning_rate": 1.7028415585692335e-07, "logits/chosen": -1.4325544834136963, "logits/rejected": -1.3712177276611328, "logps/chosen": -2467.70849609375, "logps/rejected": -1956.5791015625, "loss": 0.6218, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.5883597135543823, "rewards/margins": 0.17649570107460022, "rewards/rejected": 0.41186395287513733, "step": 2890 }, { "epoch": 0.76, "learning_rate": 1.668632138665071e-07, "logits/chosen": -1.4749171733856201, "logits/rejected": -1.4637329578399658, "logps/chosen": -2227.57861328125, "logps/rejected": -2143.876220703125, "loss": 0.5972, "rewards/accuracies": 0.625, "rewards/chosen": 0.6317578554153442, "rewards/margins": 0.4265865683555603, "rewards/rejected": 0.20517130196094513, "step": 2900 }, { "epoch": 0.76, "eval_logits/chosen": -1.3840327262878418, "eval_logits/rejected": -1.3491802215576172, "eval_logps/chosen": -2607.56591796875, "eval_logps/rejected": -2207.613037109375, "eval_loss": 0.6310118436813354, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": 0.6520929932594299, "eval_rewards/margins": 0.423054039478302, "eval_rewards/rejected": 0.22903895378112793, "eval_runtime": 272.7578, "eval_samples_per_second": 7.333, "eval_steps_per_second": 0.458, "step": 2900 }, { "epoch": 0.76, "learning_rate": 1.6347008870186346e-07, "logits/chosen": -1.5404746532440186, "logits/rejected": -1.529778242111206, "logps/chosen": -1933.6298828125, "logps/rejected": -1536.895263671875, "loss": 0.5808, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.5152641534805298, "rewards/margins": 0.40403586626052856, "rewards/rejected": 0.11122828722000122, "step": 2910 }, { "epoch": 0.76, "learning_rate": 1.6010506368796718e-07, "logits/chosen": -1.4943128824234009, "logits/rejected": -1.456924319267273, "logps/chosen": -2417.9599609375, "logps/rejected": -2162.760009765625, "loss": 0.6541, "rewards/accuracies": 0.5, "rewards/chosen": 0.7175670862197876, "rewards/margins": 0.277101993560791, "rewards/rejected": 0.4404650628566742, "step": 2920 }, { "epoch": 0.77, "learning_rate": 1.5676841980343852e-07, "logits/chosen": -1.3981895446777344, "logits/rejected": -1.4527111053466797, "logps/chosen": -2597.65966796875, "logps/rejected": -2179.39794921875, "loss": 0.5982, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.7483694553375244, "rewards/margins": 0.6122652292251587, "rewards/rejected": 0.13610415160655975, "step": 2930 }, { "epoch": 0.77, "learning_rate": 1.5346043565708167e-07, "logits/chosen": -1.3840751647949219, "logits/rejected": -1.3206273317337036, "logps/chosen": -3326.64208984375, "logps/rejected": -2419.403564453125, "loss": 0.6279, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.9665316343307495, "rewards/margins": 0.39615345001220703, "rewards/rejected": 0.570378303527832, "step": 2940 }, { "epoch": 0.77, "learning_rate": 1.5018138746462077e-07, "logits/chosen": -1.3687031269073486, "logits/rejected": -1.3737823963165283, "logps/chosen": -2446.22119140625, "logps/rejected": -2089.50146484375, "loss": 0.5952, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.6719570755958557, "rewards/margins": 0.43817195296287537, "rewards/rejected": 0.23378506302833557, "step": 2950 }, { "epoch": 0.77, "learning_rate": 1.4693154902563642e-07, "logits/chosen": -1.4316002130508423, "logits/rejected": -1.4517178535461426, "logps/chosen": -2900.650146484375, "logps/rejected": -2507.39111328125, "loss": 0.5741, "rewards/accuracies": 0.75, "rewards/chosen": 1.0101929903030396, "rewards/margins": 0.6262551546096802, "rewards/rejected": 0.3839378356933594, "step": 2960 }, { "epoch": 0.78, "learning_rate": 1.4371119170070273e-07, "logits/chosen": -1.3841904401779175, "logits/rejected": -1.363883137702942, "logps/chosen": -1948.8519287109375, "logps/rejected": -1932.33984375, "loss": 0.6363, "rewards/accuracies": 0.5, "rewards/chosen": 0.4831606447696686, "rewards/margins": 0.17371401190757751, "rewards/rejected": 0.3094465136528015, "step": 2970 }, { "epoch": 0.78, "learning_rate": 1.4052058438873004e-07, "logits/chosen": -1.416628122329712, "logits/rejected": -1.4233081340789795, "logps/chosen": -2243.0478515625, "logps/rejected": -1969.6051025390625, "loss": 0.6821, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.6370882987976074, "rewards/margins": 0.44900625944137573, "rewards/rejected": 0.18808197975158691, "step": 2980 }, { "epoch": 0.78, "learning_rate": 1.3735999350451043e-07, "logits/chosen": -1.4408037662506104, "logits/rejected": -1.404234528541565, "logps/chosen": -2907.764892578125, "logps/rejected": -2352.541259765625, "loss": 0.5851, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.8572597503662109, "rewards/margins": 0.5889304280281067, "rewards/rejected": 0.26832932233810425, "step": 2990 }, { "epoch": 0.79, "learning_rate": 1.3422968295647325e-07, "logits/chosen": -1.3623632192611694, "logits/rejected": -1.3807958364486694, "logps/chosen": -2706.734375, "logps/rejected": -2360.01220703125, "loss": 0.6645, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.5788125395774841, "rewards/margins": 0.5309603810310364, "rewards/rejected": 0.04785219207406044, "step": 3000 }, { "epoch": 0.79, "eval_logits/chosen": -1.3678345680236816, "eval_logits/rejected": -1.3329540491104126, "eval_logps/chosen": -2602.423828125, "eval_logps/rejected": -2204.72509765625, "eval_loss": 0.6291071176528931, "eval_rewards/accuracies": 0.6520000100135803, "eval_rewards/chosen": 0.7035152912139893, "eval_rewards/margins": 0.4455997347831726, "eval_rewards/rejected": 0.2579156160354614, "eval_runtime": 270.6778, "eval_samples_per_second": 7.389, "eval_steps_per_second": 0.462, "step": 3000 }, { "epoch": 0.79, "learning_rate": 1.3112991412464825e-07, "logits/chosen": -1.4341932535171509, "logits/rejected": -1.3131691217422485, "logps/chosen": -2790.000244140625, "logps/rejected": -2364.25634765625, "loss": 0.592, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.8394748568534851, "rewards/margins": 0.5635267496109009, "rewards/rejected": 0.2759481370449066, "step": 3010 }, { "epoch": 0.79, "learning_rate": 1.2806094583884114e-07, "logits/chosen": -1.4048922061920166, "logits/rejected": -1.338818907737732, "logps/chosen": -2550.039306640625, "logps/rejected": -2193.238037109375, "loss": 0.593, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.9174189567565918, "rewards/margins": 0.5785583257675171, "rewards/rejected": 0.3388606011867523, "step": 3020 }, { "epoch": 0.79, "learning_rate": 1.2502303435702043e-07, "logits/chosen": -1.365466594696045, "logits/rejected": -1.372934103012085, "logps/chosen": -2571.452880859375, "logps/rejected": -2407.875244140625, "loss": 0.6395, "rewards/accuracies": 0.5, "rewards/chosen": 0.7644615173339844, "rewards/margins": 0.3260182738304138, "rewards/rejected": 0.43844324350357056, "step": 3030 }, { "epoch": 0.8, "learning_rate": 1.2201643334392082e-07, "logits/chosen": -1.4473758935928345, "logits/rejected": -1.4218379259109497, "logps/chosen": -2538.86669921875, "logps/rejected": -2070.79150390625, "loss": 0.6042, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.5745121836662292, "rewards/margins": 0.16391856968402863, "rewards/rejected": 0.41059359908103943, "step": 3040 }, { "epoch": 0.8, "learning_rate": 1.1904139384986123e-07, "logits/chosen": -1.490330696105957, "logits/rejected": -1.4432008266448975, "logps/chosen": -3188.41455078125, "logps/rejected": -2706.665283203125, "loss": 0.5808, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 1.0437430143356323, "rewards/margins": 0.7445145845413208, "rewards/rejected": 0.2992284893989563, "step": 3050 }, { "epoch": 0.8, "learning_rate": 1.1609816428978359e-07, "logits/chosen": -1.4559834003448486, "logits/rejected": -1.420280933380127, "logps/chosen": -2551.58740234375, "logps/rejected": -2584.430419921875, "loss": 0.6416, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.8050218820571899, "rewards/margins": 0.5264039635658264, "rewards/rejected": 0.2786179184913635, "step": 3060 }, { "epoch": 0.8, "learning_rate": 1.1318699042250918e-07, "logits/chosen": -1.3969361782073975, "logits/rejected": -1.3813354969024658, "logps/chosen": -2994.219970703125, "logps/rejected": -3177.122802734375, "loss": 0.7049, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.7557341456413269, "rewards/margins": 0.26294729113578796, "rewards/rejected": 0.4927869439125061, "step": 3070 }, { "epoch": 0.81, "learning_rate": 1.10308115330218e-07, "logits/chosen": -1.43484365940094, "logits/rejected": -1.3891115188598633, "logps/chosen": -3088.582275390625, "logps/rejected": -2204.107666015625, "loss": 0.6057, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.45172542333602905, "rewards/margins": 0.5553884506225586, "rewards/rejected": -0.10366306453943253, "step": 3080 }, { "epoch": 0.81, "learning_rate": 1.0746177939815171e-07, "logits/chosen": -1.4643625020980835, "logits/rejected": -1.4422532320022583, "logps/chosen": -2440.419921875, "logps/rejected": -1975.225830078125, "loss": 0.582, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.4540809094905853, "rewards/margins": 0.652599573135376, "rewards/rejected": -0.19851863384246826, "step": 3090 }, { "epoch": 0.81, "learning_rate": 1.0464822029454179e-07, "logits/chosen": -1.4160230159759521, "logits/rejected": -1.3704195022583008, "logps/chosen": -2510.39892578125, "logps/rejected": -2070.017333984375, "loss": 0.5786, "rewards/accuracies": 0.5, "rewards/chosen": 0.3841037154197693, "rewards/margins": 0.3324592113494873, "rewards/rejected": 0.05164450407028198, "step": 3100 }, { "epoch": 0.81, "eval_logits/chosen": -1.3498371839523315, "eval_logits/rejected": -1.3173154592514038, "eval_logps/chosen": -2618.25341796875, "eval_logps/rejected": -2218.29443359375, "eval_loss": 0.6310141086578369, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": 0.5452163815498352, "eval_rewards/margins": 0.42299649119377136, "eval_rewards/rejected": 0.12221993505954742, "eval_runtime": 280.1277, "eval_samples_per_second": 7.14, "eval_steps_per_second": 0.446, "step": 3100 }, { "epoch": 0.81, "learning_rate": 1.0186767295076359e-07, "logits/chosen": -1.2851413488388062, "logits/rejected": -1.2941486835479736, "logps/chosen": -2658.156982421875, "logps/rejected": -2545.443359375, "loss": 0.6753, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.700161337852478, "rewards/margins": 0.09443531185388565, "rewards/rejected": 0.6057260632514954, "step": 3110 }, { "epoch": 0.82, "learning_rate": 9.91203695417201e-08, "logits/chosen": -1.4478733539581299, "logits/rejected": -1.4418970346450806, "logps/chosen": -2017.9547119140625, "logps/rejected": -1859.371826171875, "loss": 0.6449, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.4665658473968506, "rewards/margins": 0.1884872317314148, "rewards/rejected": 0.2780786454677582, "step": 3120 }, { "epoch": 0.82, "learning_rate": 9.640653946645527e-08, "logits/chosen": -1.4594916105270386, "logits/rejected": -1.3838837146759033, "logps/chosen": -2871.414794921875, "logps/rejected": -2458.6142578125, "loss": 0.6278, "rewards/accuracies": 0.75, "rewards/chosen": 0.8108320236206055, "rewards/margins": 0.5832007527351379, "rewards/rejected": 0.22763130068778992, "step": 3130 }, { "epoch": 0.82, "learning_rate": 9.372640932899962e-08, "logits/chosen": -1.389960527420044, "logits/rejected": -1.3612440824508667, "logps/chosen": -1948.0992431640625, "logps/rejected": -1892.2113037109375, "loss": 0.7257, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.27821439504623413, "rewards/margins": 0.045331284403800964, "rewards/rejected": 0.23288312554359436, "step": 3140 }, { "epoch": 0.82, "learning_rate": 9.108020291944835e-08, "logits/chosen": -1.418404221534729, "logits/rejected": -1.3830190896987915, "logps/chosen": -3158.57080078125, "logps/rejected": -2620.3515625, "loss": 0.6405, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.5857509970664978, "rewards/margins": 0.29750341176986694, "rewards/rejected": 0.28824761509895325, "step": 3150 }, { "epoch": 0.83, "learning_rate": 8.84681411952749e-08, "logits/chosen": -1.4332993030548096, "logits/rejected": -1.414900302886963, "logps/chosen": -2372.030517578125, "logps/rejected": -2132.096435546875, "loss": 0.6393, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.41337814927101135, "rewards/margins": 0.3938220739364624, "rewards/rejected": 0.019556106999516487, "step": 3160 }, { "epoch": 0.83, "learning_rate": 8.589044226288156e-08, "logits/chosen": -1.4317436218261719, "logits/rejected": -1.347076177597046, "logps/chosen": -2397.37548828125, "logps/rejected": -2127.24951171875, "loss": 0.6105, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.4847371578216553, "rewards/margins": 0.4483153820037842, "rewards/rejected": 0.03642178699374199, "step": 3170 }, { "epoch": 0.83, "learning_rate": 8.334732135938761e-08, "logits/chosen": -1.2719449996948242, "logits/rejected": -1.2198649644851685, "logps/chosen": -2358.66259765625, "logps/rejected": -2044.496826171875, "loss": 0.6566, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.3985385596752167, "rewards/margins": 0.4965389370918274, "rewards/rejected": -0.09800038486719131, "step": 3180 }, { "epoch": 0.83, "learning_rate": 8.08389908346565e-08, "logits/chosen": -1.5145864486694336, "logits/rejected": -1.4807628393173218, "logps/chosen": -2540.138916015625, "logps/rejected": -2373.93701171875, "loss": 0.6552, "rewards/accuracies": 0.625, "rewards/chosen": 0.4005827009677887, "rewards/margins": 0.17770811915397644, "rewards/rejected": 0.22287459671497345, "step": 3190 }, { "epoch": 0.84, "learning_rate": 7.836566013356521e-08, "logits/chosen": -1.2830257415771484, "logits/rejected": -1.309693694114685, "logps/chosen": -2139.562255859375, "logps/rejected": -1838.9498291015625, "loss": 0.604, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.2404891550540924, "rewards/margins": 0.255484402179718, "rewards/rejected": -0.014995294623076916, "step": 3200 }, { "epoch": 0.84, "eval_logits/chosen": -1.3759506940841675, "eval_logits/rejected": -1.34440279006958, "eval_logps/chosen": -2639.503173828125, "eval_logps/rejected": -2235.78515625, "eval_loss": 0.637482225894928, "eval_rewards/accuracies": 0.6539999842643738, "eval_rewards/chosen": 0.3327209949493408, "eval_rewards/margins": 0.38540521264076233, "eval_rewards/rejected": -0.05268419533967972, "eval_runtime": 278.5127, "eval_samples_per_second": 7.181, "eval_steps_per_second": 0.449, "step": 3200 }, { "epoch": 0.84, "learning_rate": 7.59275357785154e-08, "logits/chosen": -1.4814903736114502, "logits/rejected": -1.3584920167922974, "logps/chosen": -2739.283935546875, "logps/rejected": -1941.9361572265625, "loss": 0.6488, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.32887548208236694, "rewards/margins": 0.31491416692733765, "rewards/rejected": 0.01396133191883564, "step": 3210 }, { "epoch": 0.84, "learning_rate": 7.352482135218929e-08, "logits/chosen": -1.497166633605957, "logits/rejected": -1.419870138168335, "logps/chosen": -2452.299072265625, "logps/rejected": -1719.0814208984375, "loss": 0.6135, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.4226195812225342, "rewards/margins": 0.5035466551780701, "rewards/rejected": -0.08092708140611649, "step": 3220 }, { "epoch": 0.85, "learning_rate": 7.115771748054994e-08, "logits/chosen": -1.4832055568695068, "logits/rejected": -1.4479320049285889, "logps/chosen": -2702.10791015625, "logps/rejected": -2256.0849609375, "loss": 0.6603, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.7271806001663208, "rewards/margins": 0.25195473432540894, "rewards/rejected": 0.47522586584091187, "step": 3230 }, { "epoch": 0.85, "learning_rate": 6.882642181608938e-08, "logits/chosen": -1.3810951709747314, "logits/rejected": -1.2905547618865967, "logps/chosen": -2894.007080078125, "logps/rejected": -2063.866943359375, "loss": 0.6606, "rewards/accuracies": 0.75, "rewards/chosen": 0.7991534471511841, "rewards/margins": 0.7913631200790405, "rewards/rejected": 0.007790341041982174, "step": 3240 }, { "epoch": 0.85, "learning_rate": 6.653112902132468e-08, "logits/chosen": -1.3729079961776733, "logits/rejected": -1.3389288187026978, "logps/chosen": -2688.36376953125, "logps/rejected": -2399.32763671875, "loss": 0.6659, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.43051600456237793, "rewards/margins": 0.23889867961406708, "rewards/rejected": 0.19161732494831085, "step": 3250 }, { "epoch": 0.85, "learning_rate": 6.427203075254389e-08, "logits/chosen": -1.4705748558044434, "logits/rejected": -1.4179273843765259, "logps/chosen": -3436.260986328125, "logps/rejected": -2933.96923828125, "loss": 0.6552, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.7826542854309082, "rewards/margins": 1.2829375267028809, "rewards/rejected": -0.5002831220626831, "step": 3260 }, { "epoch": 0.86, "learning_rate": 6.204931564380212e-08, "logits/chosen": -1.477654218673706, "logits/rejected": -1.4244531393051147, "logps/chosen": -2670.315673828125, "logps/rejected": -2377.605712890625, "loss": 0.6777, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.8277036547660828, "rewards/margins": 0.34663817286491394, "rewards/rejected": 0.48106545209884644, "step": 3270 }, { "epoch": 0.86, "learning_rate": 5.98631692911713e-08, "logits/chosen": -1.3127539157867432, "logits/rejected": -1.3314340114593506, "logps/chosen": -2312.9111328125, "logps/rejected": -2017.139404296875, "loss": 0.6084, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.5287872552871704, "rewards/margins": 0.13690263032913208, "rewards/rejected": 0.39188462495803833, "step": 3280 }, { "epoch": 0.86, "learning_rate": 5.7713774237242716e-08, "logits/chosen": -1.291377305984497, "logits/rejected": -1.405045747756958, "logps/chosen": -1881.165283203125, "logps/rejected": -1862.961181640625, "loss": 0.582, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.5566512942314148, "rewards/margins": 0.4035774767398834, "rewards/rejected": 0.1530737578868866, "step": 3290 }, { "epoch": 0.86, "learning_rate": 5.5601309955884965e-08, "logits/chosen": -1.4422776699066162, "logits/rejected": -1.4091880321502686, "logps/chosen": -2665.621337890625, "logps/rejected": -2591.151123046875, "loss": 0.6704, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.6587532758712769, "rewards/margins": 0.12101428210735321, "rewards/rejected": 0.5377389788627625, "step": 3300 }, { "epoch": 0.86, "eval_logits/chosen": -1.3585261106491089, "eval_logits/rejected": -1.3240634202957153, "eval_logps/chosen": -2599.5048828125, "eval_logps/rejected": -2201.557861328125, "eval_loss": 0.6269048452377319, "eval_rewards/accuracies": 0.6539999842643738, "eval_rewards/chosen": 0.7327041625976562, "eval_rewards/margins": 0.4431154131889343, "eval_rewards/rejected": 0.2895888090133667, "eval_runtime": 266.5396, "eval_samples_per_second": 7.504, "eval_steps_per_second": 0.469, "step": 3300 }, { "epoch": 0.87, "learning_rate": 5.352595283725758e-08, "logits/chosen": -1.3561054468154907, "logits/rejected": -1.364051103591919, "logps/chosen": -2751.002197265625, "logps/rejected": -2776.696044921875, "loss": 0.5839, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.9600070714950562, "rewards/margins": 0.5443626642227173, "rewards/rejected": 0.41564440727233887, "step": 3310 }, { "epoch": 0.87, "learning_rate": 5.1487876173082704e-08, "logits/chosen": -1.3982620239257812, "logits/rejected": -1.3981740474700928, "logps/chosen": -2878.243408203125, "logps/rejected": -2694.689208984375, "loss": 0.6258, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.723610520362854, "rewards/margins": 0.2514679431915283, "rewards/rejected": 0.4721425473690033, "step": 3320 }, { "epoch": 0.87, "learning_rate": 4.948725014217514e-08, "logits/chosen": -1.425713300704956, "logits/rejected": -1.3950655460357666, "logps/chosen": -3086.80078125, "logps/rejected": -2376.532958984375, "loss": 0.644, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.8816632032394409, "rewards/margins": 0.4414462447166443, "rewards/rejected": 0.44021695852279663, "step": 3330 }, { "epoch": 0.87, "learning_rate": 4.752424179623299e-08, "logits/chosen": -1.4097102880477905, "logits/rejected": -1.3888671398162842, "logps/chosen": -2709.783935546875, "logps/rejected": -2628.78759765625, "loss": 0.6948, "rewards/accuracies": 0.625, "rewards/chosen": 0.5886452794075012, "rewards/margins": 0.1681179255247116, "rewards/rejected": 0.4205273687839508, "step": 3340 }, { "epoch": 0.88, "learning_rate": 4.559901504588809e-08, "logits/chosen": -1.210430383682251, "logits/rejected": -1.2701146602630615, "logps/chosen": -2500.774169921875, "logps/rejected": -2351.170166015625, "loss": 0.6319, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.28235870599746704, "rewards/margins": 0.1418120115995407, "rewards/rejected": 0.14054664969444275, "step": 3350 }, { "epoch": 0.88, "learning_rate": 4.371173064702011e-08, "logits/chosen": -1.3905829191207886, "logits/rejected": -1.3594163656234741, "logps/chosen": -1871.867919921875, "logps/rejected": -2297.736328125, "loss": 0.6397, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.32014450430870056, "rewards/margins": 0.10169104486703873, "rewards/rejected": 0.21845343708992004, "step": 3360 }, { "epoch": 0.88, "learning_rate": 4.1862546187333145e-08, "logits/chosen": -1.4513777494430542, "logits/rejected": -1.4358501434326172, "logps/chosen": -2513.241455078125, "logps/rejected": -2141.718994140625, "loss": 0.6454, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.5060147047042847, "rewards/margins": 0.21100831031799316, "rewards/rejected": 0.2950064539909363, "step": 3370 }, { "epoch": 0.88, "learning_rate": 4.005161607319746e-08, "logits/chosen": -1.4373111724853516, "logits/rejected": -1.399320363998413, "logps/chosen": -2743.34814453125, "logps/rejected": -2631.894775390625, "loss": 0.6441, "rewards/accuracies": 0.625, "rewards/chosen": 0.6500831246376038, "rewards/margins": 0.30057162046432495, "rewards/rejected": 0.3495115339756012, "step": 3380 }, { "epoch": 0.89, "learning_rate": 3.827909151675651e-08, "logits/chosen": -1.281432867050171, "logits/rejected": -1.2471529245376587, "logps/chosen": -2523.722412109375, "logps/rejected": -2145.090087890625, "loss": 0.6569, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.4543123245239258, "rewards/margins": 0.2866598069667816, "rewards/rejected": 0.16765250265598297, "step": 3390 }, { "epoch": 0.89, "learning_rate": 3.6545120523300554e-08, "logits/chosen": -1.2581170797348022, "logits/rejected": -1.2861872911453247, "logps/chosen": -2385.53515625, "logps/rejected": -2062.32470703125, "loss": 0.6365, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.6952205300331116, "rewards/margins": 0.453753799200058, "rewards/rejected": 0.24146680533885956, "step": 3400 }, { "epoch": 0.89, "eval_logits/chosen": -1.3370972871780396, "eval_logits/rejected": -1.3038172721862793, "eval_logps/chosen": -2603.77392578125, "eval_logps/rejected": -2204.74365234375, "eval_loss": 0.6270672678947449, "eval_rewards/accuracies": 0.656000018119812, "eval_rewards/chosen": 0.6900160312652588, "eval_rewards/margins": 0.4322858154773712, "eval_rewards/rejected": 0.2577301859855652, "eval_runtime": 274.6396, "eval_samples_per_second": 7.282, "eval_steps_per_second": 0.455, "step": 3400 }, { "epoch": 0.89, "learning_rate": 3.484984787890854e-08, "logits/chosen": -1.440553903579712, "logits/rejected": -1.4239243268966675, "logps/chosen": -2027.2261962890625, "logps/rejected": -2211.42333984375, "loss": 0.6302, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.5019555687904358, "rewards/margins": 0.24998077750205994, "rewards/rejected": 0.25197476148605347, "step": 3410 }, { "epoch": 0.9, "learning_rate": 3.3193415138358605e-08, "logits/chosen": -1.3577989339828491, "logits/rejected": -1.3291637897491455, "logps/chosen": -2733.109375, "logps/rejected": -2187.62939453125, "loss": 0.6422, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.5953333377838135, "rewards/margins": 0.41831302642822266, "rewards/rejected": 0.1770203560590744, "step": 3420 }, { "epoch": 0.9, "learning_rate": 3.1575960613307697e-08, "logits/chosen": -1.3546245098114014, "logits/rejected": -1.3422510623931885, "logps/chosen": -3364.149169921875, "logps/rejected": -2881.01806640625, "loss": 0.6371, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.7074936628341675, "rewards/margins": 0.3585987389087677, "rewards/rejected": 0.348894864320755, "step": 3430 }, { "epoch": 0.9, "learning_rate": 2.99976193607433e-08, "logits/chosen": -1.4342143535614014, "logits/rejected": -1.3803553581237793, "logps/chosen": -2840.54345703125, "logps/rejected": -2618.3125, "loss": 0.604, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.7095770835876465, "rewards/margins": 0.4237367510795593, "rewards/rejected": 0.28584036231040955, "step": 3440 }, { "epoch": 0.9, "learning_rate": 2.8458523171705606e-08, "logits/chosen": -1.3679982423782349, "logits/rejected": -1.3952059745788574, "logps/chosen": -2528.96484375, "logps/rejected": -1968.1148681640625, "loss": 0.6092, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.5186874270439148, "rewards/margins": 0.28269147872924805, "rewards/rejected": 0.23599597811698914, "step": 3450 }, { "epoch": 0.91, "learning_rate": 2.6958800560283766e-08, "logits/chosen": -1.2991187572479248, "logits/rejected": -1.2987558841705322, "logps/chosen": -1767.0989990234375, "logps/rejected": -1917.181396484375, "loss": 0.6245, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.22372308373451233, "rewards/margins": 0.1914294809103012, "rewards/rejected": 0.032293595373630524, "step": 3460 }, { "epoch": 0.91, "learning_rate": 2.5498576752884083e-08, "logits/chosen": -1.2810406684875488, "logits/rejected": -1.2834056615829468, "logps/chosen": -2079.9150390625, "logps/rejected": -1688.689208984375, "loss": 0.5937, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.3866010308265686, "rewards/margins": 0.4304170608520508, "rewards/rejected": -0.04381602257490158, "step": 3470 }, { "epoch": 0.91, "learning_rate": 2.4077973677774255e-08, "logits/chosen": -1.3817551136016846, "logits/rejected": -1.3114452362060547, "logps/chosen": -2581.08251953125, "logps/rejected": -2052.685302734375, "loss": 0.6545, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.6486982107162476, "rewards/margins": 0.4967314600944519, "rewards/rejected": 0.15196672081947327, "step": 3480 }, { "epoch": 0.91, "learning_rate": 2.2697109954902262e-08, "logits/chosen": -1.3522804975509644, "logits/rejected": -1.3641847372055054, "logps/chosen": -2173.435302734375, "logps/rejected": -2246.341064453125, "loss": 0.6613, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.4131731390953064, "rewards/margins": 0.2463291585445404, "rewards/rejected": 0.1668439656496048, "step": 3490 }, { "epoch": 0.92, "learning_rate": 2.13561008859916e-08, "logits/chosen": -1.3085618019104004, "logits/rejected": -1.340914249420166, "logps/chosen": -2336.463134765625, "logps/rejected": -1888.4931640625, "loss": 0.6621, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.40662437677383423, "rewards/margins": 0.2938459515571594, "rewards/rejected": 0.11277846246957779, "step": 3500 }, { "epoch": 0.92, "eval_logits/chosen": -1.3320984840393066, "eval_logits/rejected": -1.299089789390564, "eval_logps/chosen": -2609.7431640625, "eval_logps/rejected": -2209.78271484375, "eval_loss": 0.6278749704360962, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": 0.630323052406311, "eval_rewards/margins": 0.4229816198348999, "eval_rewards/rejected": 0.20734144747257233, "eval_runtime": 272.2354, "eval_samples_per_second": 7.347, "eval_steps_per_second": 0.459, "step": 3500 }, { "epoch": 0.92, "learning_rate": 2.0055058444913507e-08, "logits/chosen": -1.448785424232483, "logits/rejected": -1.4095611572265625, "logps/chosen": -2245.84423828125, "logps/rejected": -2347.550537109375, "loss": 0.6379, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.6017327904701233, "rewards/margins": 0.3268904685974121, "rewards/rejected": 0.2748422920703888, "step": 3510 }, { "epoch": 0.92, "learning_rate": 1.879409126833753e-08, "logits/chosen": -1.4214824438095093, "logits/rejected": -1.410632610321045, "logps/chosen": -2336.942626953125, "logps/rejected": -2101.439697265625, "loss": 0.6053, "rewards/accuracies": 0.625, "rewards/chosen": 0.49574050307273865, "rewards/margins": 0.32954609394073486, "rewards/rejected": 0.16619448363780975, "step": 3520 }, { "epoch": 0.92, "learning_rate": 1.757330464665996e-08, "logits/chosen": -1.3709776401519775, "logits/rejected": -1.3891581296920776, "logps/chosen": -2405.004638671875, "logps/rejected": -2451.75537109375, "loss": 0.6052, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.5401250123977661, "rewards/margins": 0.3884989619255066, "rewards/rejected": 0.15162606537342072, "step": 3530 }, { "epoch": 0.93, "learning_rate": 1.639280051521241e-08, "logits/chosen": -1.5023291110992432, "logits/rejected": -1.4649862051010132, "logps/chosen": -2664.8759765625, "logps/rejected": -2538.817626953125, "loss": 0.6109, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.4001461863517761, "rewards/margins": 0.20141109824180603, "rewards/rejected": 0.19873513281345367, "step": 3540 }, { "epoch": 0.93, "learning_rate": 1.525267744575015e-08, "logits/chosen": -1.3901126384735107, "logits/rejected": -1.394431710243225, "logps/chosen": -2564.427490234375, "logps/rejected": -2594.514404296875, "loss": 0.6037, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.7964712381362915, "rewards/margins": 0.5120159387588501, "rewards/rejected": 0.28445538878440857, "step": 3550 }, { "epoch": 0.93, "learning_rate": 1.4153030638221375e-08, "logits/chosen": -1.4207435846328735, "logits/rejected": -1.4046471118927002, "logps/chosen": -2954.137939453125, "logps/rejected": -2511.393798828125, "loss": 0.61, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.6611908674240112, "rewards/margins": 0.4354327321052551, "rewards/rejected": 0.2257581651210785, "step": 3560 }, { "epoch": 0.93, "learning_rate": 1.309395191281798e-08, "logits/chosen": -1.330440878868103, "logits/rejected": -1.348436713218689, "logps/chosen": -2192.97412109375, "logps/rejected": -2151.583740234375, "loss": 0.6413, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.434548556804657, "rewards/margins": 0.1627674400806427, "rewards/rejected": 0.27178114652633667, "step": 3570 }, { "epoch": 0.94, "learning_rate": 1.207552970230885e-08, "logits/chosen": -1.4331698417663574, "logits/rejected": -1.414426326751709, "logps/chosen": -2461.943115234375, "logps/rejected": -2080.1328125, "loss": 0.6443, "rewards/accuracies": 0.625, "rewards/chosen": 0.4848058819770813, "rewards/margins": 0.4457271099090576, "rewards/rejected": 0.0390787310898304, "step": 3580 }, { "epoch": 0.94, "learning_rate": 1.1097849044655494e-08, "logits/chosen": -1.362441897392273, "logits/rejected": -1.3546955585479736, "logps/chosen": -2819.646728515625, "logps/rejected": -2652.50439453125, "loss": 0.6953, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.5813437700271606, "rewards/margins": 0.29619497060775757, "rewards/rejected": 0.28514885902404785, "step": 3590 }, { "epoch": 0.94, "learning_rate": 1.0160991575911382e-08, "logits/chosen": -1.226768970489502, "logits/rejected": -1.2340877056121826, "logps/chosen": -2563.304931640625, "logps/rejected": -2343.0185546875, "loss": 0.6597, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.4969327449798584, "rewards/margins": 0.1737329661846161, "rewards/rejected": 0.3231998085975647, "step": 3600 }, { "epoch": 0.94, "eval_logits/chosen": -1.334847331047058, "eval_logits/rejected": -1.3028244972229004, "eval_logps/chosen": -2617.37744140625, "eval_logps/rejected": -2216.108154296875, "eval_loss": 0.6294077038764954, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": 0.5539795756340027, "eval_rewards/margins": 0.40989428758621216, "eval_rewards/rejected": 0.14408528804779053, "eval_runtime": 270.7461, "eval_samples_per_second": 7.387, "eval_steps_per_second": 0.462, "step": 3600 }, { "epoch": 0.94, "learning_rate": 9.265035523405628e-09, "logits/chosen": -1.4237889051437378, "logits/rejected": -1.3327428102493286, "logps/chosen": -2901.97412109375, "logps/rejected": -2182.808349609375, "loss": 0.6106, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.6185927391052246, "rewards/margins": 0.4138878285884857, "rewards/rejected": 0.20470492541790009, "step": 3610 }, { "epoch": 0.95, "learning_rate": 8.410055699210716e-09, "logits/chosen": -1.3188023567199707, "logits/rejected": -1.3039883375167847, "logps/chosen": -2117.862060546875, "logps/rejected": -2145.378662109375, "loss": 0.6161, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.2947086691856384, "rewards/margins": 0.19353394210338593, "rewards/rejected": 0.1011747345328331, "step": 3620 }, { "epoch": 0.95, "learning_rate": 7.59612349389599e-09, "logits/chosen": -1.3702406883239746, "logits/rejected": -1.3394161462783813, "logps/chosen": -2715.30615234375, "logps/rejected": -2510.065185546875, "loss": 0.6207, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.7481818199157715, "rewards/margins": 0.404646635055542, "rewards/rejected": 0.3435351848602295, "step": 3630 }, { "epoch": 0.95, "learning_rate": 6.823306870566314e-09, "logits/chosen": -1.3175442218780518, "logits/rejected": -1.3016589879989624, "logps/chosen": -2922.791259765625, "logps/rejected": -2657.24951171875, "loss": 0.632, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.4680928587913513, "rewards/margins": 0.19783690571784973, "rewards/rejected": 0.270255982875824, "step": 3640 }, { "epoch": 0.96, "learning_rate": 6.0916703591873396e-09, "logits/chosen": -1.373708724975586, "logits/rejected": -1.3260128498077393, "logps/chosen": -2837.0703125, "logps/rejected": -2214.2490234375, "loss": 0.5756, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.7062289118766785, "rewards/margins": 0.4836703836917877, "rewards/rejected": 0.22255854308605194, "step": 3650 }, { "epoch": 0.96, "learning_rate": 5.401275051197196e-09, "logits/chosen": -1.2850544452667236, "logits/rejected": -1.304872989654541, "logps/chosen": -2000.0989990234375, "logps/rejected": -1932.3131103515625, "loss": 0.6367, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.4161587357521057, "rewards/margins": 0.26545530557632446, "rewards/rejected": 0.15070338547229767, "step": 3660 }, { "epoch": 0.96, "learning_rate": 4.752178594405465e-09, "logits/chosen": -1.2426577806472778, "logits/rejected": -1.2828842401504517, "logps/chosen": -2575.1416015625, "logps/rejected": -2630.06201171875, "loss": 0.6169, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.5871464610099792, "rewards/margins": 0.4162687659263611, "rewards/rejected": 0.17087773978710175, "step": 3670 }, { "epoch": 0.96, "learning_rate": 4.144435188179529e-09, "logits/chosen": -1.380395531654358, "logits/rejected": -1.3460490703582764, "logps/chosen": -2178.73583984375, "logps/rejected": -1936.242919921875, "loss": 0.612, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.585101306438446, "rewards/margins": 0.3356373906135559, "rewards/rejected": 0.24946394562721252, "step": 3680 }, { "epoch": 0.97, "learning_rate": 3.5780955789187497e-09, "logits/chosen": -1.3782151937484741, "logits/rejected": -1.345668911933899, "logps/chosen": -3300.29248046875, "logps/rejected": -2721.07958984375, "loss": 0.6427, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.9547792673110962, "rewards/margins": 0.46429508924484253, "rewards/rejected": 0.49048417806625366, "step": 3690 }, { "epoch": 0.97, "learning_rate": 3.0532070558177415e-09, "logits/chosen": -1.3277114629745483, "logits/rejected": -1.3181272745132446, "logps/chosen": -2194.970947265625, "logps/rejected": -1831.0181884765625, "loss": 0.671, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.3654665946960449, "rewards/margins": 0.46283870935440063, "rewards/rejected": -0.09737209975719452, "step": 3700 }, { "epoch": 0.97, "eval_logits/chosen": -1.3357877731323242, "eval_logits/rejected": -1.3033195734024048, "eval_logps/chosen": -2613.330322265625, "eval_logps/rejected": -2212.7783203125, "eval_loss": 0.6284996271133423, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": 0.5944509506225586, "eval_rewards/margins": 0.41706666350364685, "eval_rewards/rejected": 0.17738424241542816, "eval_runtime": 273.8684, "eval_samples_per_second": 7.303, "eval_steps_per_second": 0.456, "step": 3700 }, { "epoch": 0.97, "learning_rate": 2.5698134469169243e-09, "logits/chosen": -1.3617829084396362, "logits/rejected": -1.394221544265747, "logps/chosen": -2001.156494140625, "logps/rejected": -1911.104736328125, "loss": 0.581, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.5259832143783569, "rewards/margins": 0.3205917775630951, "rewards/rejected": 0.20539140701293945, "step": 3710 }, { "epoch": 0.97, "learning_rate": 2.127955115443725e-09, "logits/chosen": -1.38681960105896, "logits/rejected": -1.381144404411316, "logps/chosen": -2056.17724609375, "logps/rejected": -1763.3297119140625, "loss": 0.5337, "rewards/accuracies": 0.75, "rewards/chosen": 0.6707687377929688, "rewards/margins": 0.6110190153121948, "rewards/rejected": 0.0597497932612896, "step": 3720 }, { "epoch": 0.98, "learning_rate": 1.727668956441497e-09, "logits/chosen": -1.3605945110321045, "logits/rejected": -1.3855431079864502, "logps/chosen": -2391.19384765625, "logps/rejected": -2493.66259765625, "loss": 0.6363, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.550477147102356, "rewards/margins": 0.11832934617996216, "rewards/rejected": 0.432147741317749, "step": 3730 }, { "epoch": 0.98, "learning_rate": 1.3689883936894298e-09, "logits/chosen": -1.4387106895446777, "logits/rejected": -1.3756752014160156, "logps/chosen": -2820.1337890625, "logps/rejected": -2417.27783203125, "loss": 0.6197, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.8682245016098022, "rewards/margins": 0.6829289197921753, "rewards/rejected": 0.1852956861257553, "step": 3740 }, { "epoch": 0.98, "learning_rate": 1.051943376911224e-09, "logits/chosen": -1.4866057634353638, "logits/rejected": -1.391998529434204, "logps/chosen": -2453.385986328125, "logps/rejected": -1823.2886962890625, "loss": 0.5907, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.4227697253227234, "rewards/margins": 0.3134082853794098, "rewards/rejected": 0.10936151444911957, "step": 3750 }, { "epoch": 0.98, "learning_rate": 7.765603792745934e-10, "logits/chosen": -1.4555871486663818, "logits/rejected": -1.450751543045044, "logps/chosen": -2521.285400390625, "logps/rejected": -2256.712890625, "loss": 0.6001, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.5491132140159607, "rewards/margins": 0.3491145074367523, "rewards/rejected": 0.19999869167804718, "step": 3760 }, { "epoch": 0.99, "learning_rate": 5.428623951805322e-10, "logits/chosen": -1.2330322265625, "logits/rejected": -1.3336702585220337, "logps/chosen": -2287.126708984375, "logps/rejected": -2347.366455078125, "loss": 0.5997, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.6386030316352844, "rewards/margins": 0.4102831780910492, "rewards/rejected": 0.22831980884075165, "step": 3770 }, { "epoch": 0.99, "learning_rate": 3.508689383435182e-10, "logits/chosen": -1.3702977895736694, "logits/rejected": -1.3696632385253906, "logps/chosen": -2384.869384765625, "logps/rejected": -2304.236083984375, "loss": 0.6022, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.36002689599990845, "rewards/margins": 0.4558481276035309, "rewards/rejected": -0.09582126140594482, "step": 3780 }, { "epoch": 0.99, "learning_rate": 2.0059604016192665e-10, "logits/chosen": -1.4226309061050415, "logits/rejected": -1.3605704307556152, "logps/chosen": -2192.42529296875, "logps/rejected": -1818.756591796875, "loss": 0.6746, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.3192385137081146, "rewards/margins": 0.25180768966674805, "rewards/rejected": 0.06743079423904419, "step": 3790 }, { "epoch": 0.99, "learning_rate": 9.205624837949066e-11, "logits/chosen": -1.3719966411590576, "logits/rejected": -1.394190788269043, "logps/chosen": -2545.56494140625, "logps/rejected": -2351.23876953125, "loss": 0.6328, "rewards/accuracies": 0.625, "rewards/chosen": 0.5859331488609314, "rewards/margins": 0.3292482793331146, "rewards/rejected": 0.25668492913246155, "step": 3800 }, { "epoch": 0.99, "eval_logits/chosen": -1.3356244564056396, "eval_logits/rejected": -1.3031599521636963, "eval_logps/chosen": -2612.92578125, "eval_logps/rejected": -2212.490234375, "eval_loss": 0.6283265948295593, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": 0.5984972715377808, "eval_rewards/margins": 0.4182315766811371, "eval_rewards/rejected": 0.18026570975780487, "eval_runtime": 279.1253, "eval_samples_per_second": 7.165, "eval_steps_per_second": 0.448, "step": 3800 }, { "epoch": 1.0, "learning_rate": 2.5258626037638618e-11, "logits/chosen": -1.3538461923599243, "logits/rejected": -1.344422698020935, "logps/chosen": -2360.3203125, "logps/rejected": -2012.327880859375, "loss": 0.6754, "rewards/accuracies": 0.5, "rewards/chosen": 0.3601759374141693, "rewards/margins": 0.11125414073467255, "rewards/rejected": 0.24892178177833557, "step": 3810 }, { "epoch": 1.0, "learning_rate": 2.087507185999371e-13, "logits/chosen": -1.4853910207748413, "logits/rejected": -1.4589704275131226, "logps/chosen": -2143.158935546875, "logps/rejected": -2006.003173828125, "loss": 0.6016, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.35302072763442993, "rewards/margins": 0.34298470616340637, "rewards/rejected": 0.010035954415798187, "step": 3820 }, { "epoch": 1.0, "step": 3821, "total_flos": 0.0, "train_loss": 0.6510803117330272, "train_runtime": 30377.0855, "train_samples_per_second": 2.013, "train_steps_per_second": 0.126 } ], "logging_steps": 10, "max_steps": 3821, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000000000, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }