{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.999015425008205, "global_step": 11415, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 3.7125e-06, "loss": 9.2501, "step": 100 }, { "epoch": 0.26, "learning_rate": 7.4625e-06, "loss": 6.0161, "step": 200 }, { "epoch": 0.39, "learning_rate": 1.1212499999999998e-05, "loss": 4.0333, "step": 300 }, { "epoch": 0.53, "learning_rate": 1.49625e-05, "loss": 3.449, "step": 400 }, { "epoch": 0.66, "learning_rate": 1.8712499999999997e-05, "loss": 3.1638, "step": 500 }, { "epoch": 0.66, "eval_loss": 3.0685720443725586, "eval_runtime": 399.7202, "eval_samples_per_second": 13.372, "eval_steps_per_second": 0.838, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.79, "learning_rate": 2.2462499999999997e-05, "loss": 3.0226, "step": 600 }, { "epoch": 0.92, "learning_rate": 2.6212499999999997e-05, "loss": 2.9748, "step": 700 }, { "epoch": 1.05, "learning_rate": 2.99625e-05, "loss": 2.9583, "step": 800 }, { "epoch": 1.18, "learning_rate": 3.37125e-05, "loss": 2.9372, "step": 900 }, { "epoch": 1.31, "learning_rate": 3.7462499999999996e-05, "loss": 2.9311, "step": 1000 }, { "epoch": 1.31, "eval_loss": 2.920839309692383, "eval_runtime": 390.9897, "eval_samples_per_second": 13.67, "eval_steps_per_second": 0.857, "eval_wer": 1.0, "step": 1000 }, { "epoch": 1.45, "learning_rate": 4.12125e-05, "loss": 2.9082, "step": 1100 }, { "epoch": 1.58, "learning_rate": 4.4962499999999995e-05, "loss": 2.8906, "step": 1200 }, { "epoch": 1.71, "learning_rate": 4.871249999999999e-05, "loss": 2.8496, "step": 1300 }, { "epoch": 1.84, "learning_rate": 5.2462499999999994e-05, "loss": 2.7544, "step": 1400 }, { "epoch": 1.97, "learning_rate": 5.62125e-05, "loss": 2.4175, "step": 1500 }, { "epoch": 1.97, "eval_loss": 1.5009071826934814, "eval_runtime": 386.8095, "eval_samples_per_second": 13.818, "eval_steps_per_second": 0.866, "eval_wer": 0.904880074758592, "step": 1500 }, { "epoch": 2.1, "learning_rate": 5.9962499999999994e-05, "loss": 2.07, "step": 1600 }, { "epoch": 2.23, "learning_rate": 6.37125e-05, "loss": 1.7743, "step": 1700 }, { "epoch": 2.36, "learning_rate": 6.746249999999999e-05, "loss": 1.6002, "step": 1800 }, { "epoch": 2.5, "learning_rate": 7.121249999999999e-05, "loss": 1.5316, "step": 1900 }, { "epoch": 2.63, "learning_rate": 7.49625e-05, "loss": 1.4442, "step": 2000 }, { "epoch": 2.63, "eval_loss": 0.4426315724849701, "eval_runtime": 388.1138, "eval_samples_per_second": 13.772, "eval_steps_per_second": 0.863, "eval_wer": 0.37832000830651025, "step": 2000 }, { "epoch": 2.76, "learning_rate": 7.42113648433351e-05, "loss": 1.4054, "step": 2100 }, { "epoch": 2.89, "learning_rate": 7.341476367498672e-05, "loss": 1.3519, "step": 2200 }, { "epoch": 3.02, "learning_rate": 7.261816250663834e-05, "loss": 1.3285, "step": 2300 }, { "epoch": 3.15, "learning_rate": 7.182156133828995e-05, "loss": 1.2726, "step": 2400 }, { "epoch": 3.28, "learning_rate": 7.102496016994158e-05, "loss": 1.2624, "step": 2500 }, { "epoch": 3.28, "eval_loss": 0.3192683756351471, "eval_runtime": 387.478, "eval_samples_per_second": 13.794, "eval_steps_per_second": 0.865, "eval_wer": 0.29977157096874674, "step": 2500 }, { "epoch": 3.42, "learning_rate": 7.022835900159319e-05, "loss": 1.2737, "step": 2600 }, { "epoch": 3.55, "learning_rate": 6.943175783324481e-05, "loss": 1.2333, "step": 2700 }, { "epoch": 3.68, "learning_rate": 6.863515666489644e-05, "loss": 1.2028, "step": 2800 }, { "epoch": 3.81, "learning_rate": 6.783855549654805e-05, "loss": 1.1867, "step": 2900 }, { "epoch": 3.94, "learning_rate": 6.704992033988316e-05, "loss": 1.1889, "step": 3000 }, { "epoch": 3.94, "eval_loss": 0.2867003381252289, "eval_runtime": 384.7872, "eval_samples_per_second": 13.891, "eval_steps_per_second": 0.871, "eval_wer": 0.2630048800747586, "step": 3000 }, { "epoch": 4.07, "learning_rate": 6.625331917153477e-05, "loss": 1.1945, "step": 3100 }, { "epoch": 4.2, "learning_rate": 6.54567180031864e-05, "loss": 1.1473, "step": 3200 }, { "epoch": 4.34, "learning_rate": 6.466011683483802e-05, "loss": 1.1408, "step": 3300 }, { "epoch": 4.47, "learning_rate": 6.386351566648965e-05, "loss": 1.1376, "step": 3400 }, { "epoch": 4.6, "learning_rate": 6.306691449814126e-05, "loss": 1.1315, "step": 3500 }, { "epoch": 4.6, "eval_loss": 0.25655683875083923, "eval_runtime": 384.1782, "eval_samples_per_second": 13.913, "eval_steps_per_second": 0.872, "eval_wer": 0.2443775308898349, "step": 3500 }, { "epoch": 4.73, "learning_rate": 6.227031332979288e-05, "loss": 1.1225, "step": 3600 }, { "epoch": 4.86, "learning_rate": 6.14737121614445e-05, "loss": 1.1386, "step": 3700 }, { "epoch": 4.99, "learning_rate": 6.067711099309612e-05, "loss": 1.1098, "step": 3800 }, { "epoch": 5.12, "learning_rate": 5.9880509824747736e-05, "loss": 1.1149, "step": 3900 }, { "epoch": 5.26, "learning_rate": 5.908390865639936e-05, "loss": 1.0864, "step": 4000 }, { "epoch": 5.26, "eval_loss": 0.23677974939346313, "eval_runtime": 394.8394, "eval_samples_per_second": 13.537, "eval_steps_per_second": 0.848, "eval_wer": 0.2293946630671789, "step": 4000 }, { "epoch": 5.39, "learning_rate": 5.828730748805098e-05, "loss": 1.0996, "step": 4100 }, { "epoch": 5.52, "learning_rate": 5.74907063197026e-05, "loss": 1.0714, "step": 4200 }, { "epoch": 5.65, "learning_rate": 5.6694105151354214e-05, "loss": 1.0849, "step": 4300 }, { "epoch": 5.78, "learning_rate": 5.589750398300583e-05, "loss": 1.069, "step": 4400 }, { "epoch": 5.91, "learning_rate": 5.5100902814657456e-05, "loss": 1.093, "step": 4500 }, { "epoch": 5.91, "eval_loss": 0.2239702194929123, "eval_runtime": 385.949, "eval_samples_per_second": 13.849, "eval_steps_per_second": 0.868, "eval_wer": 0.21509708233828262, "step": 4500 }, { "epoch": 6.04, "learning_rate": 5.4304301646309074e-05, "loss": 1.0677, "step": 4600 }, { "epoch": 6.18, "learning_rate": 5.35077004779607e-05, "loss": 1.089, "step": 4700 }, { "epoch": 6.31, "learning_rate": 5.2711099309612316e-05, "loss": 1.0475, "step": 4800 }, { "epoch": 6.44, "learning_rate": 5.191449814126394e-05, "loss": 1.0455, "step": 4900 }, { "epoch": 6.57, "learning_rate": 5.111789697291556e-05, "loss": 1.0368, "step": 5000 }, { "epoch": 6.57, "eval_loss": 0.21173720061779022, "eval_runtime": 384.8447, "eval_samples_per_second": 13.889, "eval_steps_per_second": 0.87, "eval_wer": 0.205575744990136, "step": 5000 }, { "epoch": 6.7, "learning_rate": 5.032129580456717e-05, "loss": 1.0459, "step": 5100 }, { "epoch": 6.83, "learning_rate": 4.9524694636218794e-05, "loss": 1.0375, "step": 5200 }, { "epoch": 6.96, "learning_rate": 4.872809346787041e-05, "loss": 1.033, "step": 5300 }, { "epoch": 7.1, "learning_rate": 4.7931492299522036e-05, "loss": 1.0291, "step": 5400 }, { "epoch": 7.23, "learning_rate": 4.7134891131173654e-05, "loss": 1.0178, "step": 5500 }, { "epoch": 7.23, "eval_loss": 0.20199091732501984, "eval_runtime": 385.2062, "eval_samples_per_second": 13.876, "eval_steps_per_second": 0.87, "eval_wer": 0.19541065309936662, "step": 5500 }, { "epoch": 7.36, "learning_rate": 4.634625597450876e-05, "loss": 1.0258, "step": 5600 }, { "epoch": 7.49, "learning_rate": 4.554965480616038e-05, "loss": 1.0161, "step": 5700 }, { "epoch": 7.62, "learning_rate": 4.4753053637812e-05, "loss": 1.0443, "step": 5800 }, { "epoch": 7.75, "learning_rate": 4.395645246946362e-05, "loss": 1.0143, "step": 5900 }, { "epoch": 7.88, "learning_rate": 4.3159851301115245e-05, "loss": 1.0035, "step": 6000 }, { "epoch": 7.88, "eval_loss": 0.20046772062778473, "eval_runtime": 386.8691, "eval_samples_per_second": 13.816, "eval_steps_per_second": 0.866, "eval_wer": 0.192378776866369, "step": 6000 }, { "epoch": 8.02, "learning_rate": 4.2363250132766856e-05, "loss": 1.0233, "step": 6100 }, { "epoch": 8.15, "learning_rate": 4.1566648964418474e-05, "loss": 0.9893, "step": 6200 }, { "epoch": 8.28, "learning_rate": 4.07700477960701e-05, "loss": 0.9896, "step": 6300 }, { "epoch": 8.41, "learning_rate": 3.9973446627721716e-05, "loss": 0.9879, "step": 6400 }, { "epoch": 8.54, "learning_rate": 3.917684545937334e-05, "loss": 0.9759, "step": 6500 }, { "epoch": 8.54, "eval_loss": 0.1971476823091507, "eval_runtime": 383.9373, "eval_samples_per_second": 13.922, "eval_steps_per_second": 0.873, "eval_wer": 0.18630464126258955, "step": 6500 }, { "epoch": 8.67, "learning_rate": 3.838024429102496e-05, "loss": 0.9969, "step": 6600 }, { "epoch": 8.8, "learning_rate": 3.758364312267658e-05, "loss": 1.0003, "step": 6700 }, { "epoch": 8.93, "learning_rate": 3.6787041954328194e-05, "loss": 1.0125, "step": 6800 }, { "epoch": 9.07, "learning_rate": 3.599044078597982e-05, "loss": 0.9907, "step": 6900 }, { "epoch": 9.2, "learning_rate": 3.5193839617631436e-05, "loss": 0.9795, "step": 7000 }, { "epoch": 9.2, "eval_loss": 0.18917934596538544, "eval_runtime": 385.8026, "eval_samples_per_second": 13.854, "eval_steps_per_second": 0.868, "eval_wer": 0.18121690374831273, "step": 7000 }, { "epoch": 9.33, "learning_rate": 3.4397238449283054e-05, "loss": 0.9768, "step": 7100 }, { "epoch": 9.46, "learning_rate": 3.360063728093468e-05, "loss": 1.0074, "step": 7200 }, { "epoch": 9.59, "learning_rate": 3.2804036112586296e-05, "loss": 0.9729, "step": 7300 }, { "epoch": 9.72, "learning_rate": 3.2007434944237914e-05, "loss": 0.9584, "step": 7400 }, { "epoch": 9.85, "learning_rate": 3.121083377588953e-05, "loss": 0.9601, "step": 7500 }, { "epoch": 9.85, "eval_loss": 0.18627513945102692, "eval_runtime": 384.7444, "eval_samples_per_second": 13.892, "eval_steps_per_second": 0.871, "eval_wer": 0.17949330287612916, "step": 7500 }, { "epoch": 9.99, "learning_rate": 3.0414232607541153e-05, "loss": 0.9729, "step": 7600 }, { "epoch": 10.12, "learning_rate": 2.9617631439192774e-05, "loss": 0.9712, "step": 7700 }, { "epoch": 10.25, "learning_rate": 2.882899628252788e-05, "loss": 0.9746, "step": 7800 }, { "epoch": 10.38, "learning_rate": 2.80323951141795e-05, "loss": 0.9595, "step": 7900 }, { "epoch": 10.51, "learning_rate": 2.7235793945831116e-05, "loss": 0.9673, "step": 8000 }, { "epoch": 10.51, "eval_loss": 0.1808742731809616, "eval_runtime": 383.472, "eval_samples_per_second": 13.938, "eval_steps_per_second": 0.874, "eval_wer": 0.17611878309625167, "step": 8000 }, { "epoch": 10.64, "learning_rate": 2.6439192777482737e-05, "loss": 0.9465, "step": 8100 }, { "epoch": 10.77, "learning_rate": 2.564259160913436e-05, "loss": 0.9628, "step": 8200 }, { "epoch": 10.91, "learning_rate": 2.484599044078598e-05, "loss": 0.946, "step": 8300 }, { "epoch": 11.04, "learning_rate": 2.4049389272437594e-05, "loss": 0.9316, "step": 8400 }, { "epoch": 11.17, "learning_rate": 2.3252788104089215e-05, "loss": 0.9233, "step": 8500 }, { "epoch": 11.17, "eval_loss": 0.18183015286922455, "eval_runtime": 379.914, "eval_samples_per_second": 14.069, "eval_steps_per_second": 0.882, "eval_wer": 0.17553732738033434, "step": 8500 }, { "epoch": 11.3, "learning_rate": 2.2456186935740836e-05, "loss": 0.9521, "step": 8600 }, { "epoch": 11.43, "learning_rate": 2.1659585767392457e-05, "loss": 0.9411, "step": 8700 }, { "epoch": 11.56, "learning_rate": 2.0862984599044075e-05, "loss": 0.9333, "step": 8800 }, { "epoch": 11.69, "learning_rate": 2.0066383430695696e-05, "loss": 0.937, "step": 8900 }, { "epoch": 11.83, "learning_rate": 1.9269782262347317e-05, "loss": 0.9382, "step": 9000 }, { "epoch": 11.83, "eval_loss": 0.17674756050109863, "eval_runtime": 384.6492, "eval_samples_per_second": 13.896, "eval_steps_per_second": 0.871, "eval_wer": 0.1741459869172464, "step": 9000 }, { "epoch": 11.96, "learning_rate": 1.847318109399894e-05, "loss": 0.9453, "step": 9100 }, { "epoch": 12.09, "learning_rate": 1.7676579925650556e-05, "loss": 0.9626, "step": 9200 }, { "epoch": 12.22, "learning_rate": 1.6879978757302177e-05, "loss": 0.9289, "step": 9300 }, { "epoch": 12.35, "learning_rate": 1.6083377588953795e-05, "loss": 0.9282, "step": 9400 }, { "epoch": 12.48, "learning_rate": 1.5286776420605416e-05, "loss": 0.9242, "step": 9500 }, { "epoch": 12.48, "eval_loss": 0.17428240180015564, "eval_runtime": 381.3036, "eval_samples_per_second": 14.018, "eval_steps_per_second": 0.879, "eval_wer": 0.17025231024815698, "step": 9500 }, { "epoch": 12.61, "learning_rate": 1.4490175252257036e-05, "loss": 0.9273, "step": 9600 }, { "epoch": 12.75, "learning_rate": 1.3693574083908655e-05, "loss": 0.9254, "step": 9700 }, { "epoch": 12.88, "learning_rate": 1.2896972915560275e-05, "loss": 0.9209, "step": 9800 }, { "epoch": 13.01, "learning_rate": 1.2100371747211896e-05, "loss": 0.9292, "step": 9900 }, { "epoch": 13.14, "learning_rate": 1.1303770578863514e-05, "loss": 0.9703, "step": 10000 }, { "epoch": 13.14, "eval_loss": 0.17106124758720398, "eval_runtime": 382.5906, "eval_samples_per_second": 13.971, "eval_steps_per_second": 0.876, "eval_wer": 0.17105181185754334, "step": 10000 }, { "epoch": 13.27, "learning_rate": 1.0507169410515135e-05, "loss": 0.9186, "step": 10100 }, { "epoch": 13.4, "learning_rate": 9.710568242166754e-06, "loss": 0.9227, "step": 10200 }, { "epoch": 13.53, "learning_rate": 8.913967073818375e-06, "loss": 0.909, "step": 10300 }, { "epoch": 13.67, "learning_rate": 8.117365905469995e-06, "loss": 0.9028, "step": 10400 }, { "epoch": 13.8, "learning_rate": 7.328730748805097e-06, "loss": 0.9139, "step": 10500 }, { "epoch": 13.8, "eval_loss": 0.1718469262123108, "eval_runtime": 380.6631, "eval_samples_per_second": 14.041, "eval_steps_per_second": 0.88, "eval_wer": 0.1671996677395909, "step": 10500 }, { "epoch": 13.93, "learning_rate": 6.532129580456718e-06, "loss": 0.9146, "step": 10600 }, { "epoch": 14.06, "learning_rate": 5.735528412108338e-06, "loss": 0.9239, "step": 10700 }, { "epoch": 14.19, "learning_rate": 4.938927243759957e-06, "loss": 0.9351, "step": 10800 }, { "epoch": 14.32, "learning_rate": 4.142326075411577e-06, "loss": 0.9127, "step": 10900 }, { "epoch": 14.45, "learning_rate": 3.3457249070631965e-06, "loss": 0.9073, "step": 11000 }, { "epoch": 14.45, "eval_loss": 0.16996563971042633, "eval_runtime": 380.1012, "eval_samples_per_second": 14.062, "eval_steps_per_second": 0.881, "eval_wer": 0.1664728480946942, "step": 11000 }, { "epoch": 14.59, "learning_rate": 2.5491237387148164e-06, "loss": 0.9023, "step": 11100 }, { "epoch": 14.72, "learning_rate": 1.76048858204992e-06, "loss": 0.908, "step": 11200 }, { "epoch": 14.85, "learning_rate": 9.6388741370154e-07, "loss": 0.9096, "step": 11300 }, { "epoch": 14.98, "learning_rate": 1.6728624535315985e-07, "loss": 0.9436, "step": 11400 }, { "epoch": 15.0, "step": 11415, "total_flos": 1.4956070851005566e+20, "train_loss": 1.3962327468410898, "train_runtime": 63316.5045, "train_samples_per_second": 11.549, "train_steps_per_second": 0.18 } ], "max_steps": 11415, "num_train_epochs": 15, "total_flos": 1.4956070851005566e+20, "trial_name": null, "trial_params": null }