{ "best_metric": null, "best_model_checkpoint": null, "epoch": 36.72947939955286, "global_step": 115000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.8744809964867454e-07, "loss": 91.176, "step": 100 }, { "epoch": 0.06, "learning_rate": 6.068348770360907e-07, "loss": 64.0684, "step": 200 }, { "epoch": 0.1, "learning_rate": 9.26221654423507e-07, "loss": 27.1061, "step": 300 }, { "epoch": 0.13, "learning_rate": 1.242414564037049e-06, "loss": 8.0198, "step": 400 }, { "epoch": 0.16, "learning_rate": 1.561801341424465e-06, "loss": 4.7691, "step": 500 }, { "epoch": 0.19, "learning_rate": 1.8811881188118813e-06, "loss": 3.4387, "step": 600 }, { "epoch": 0.22, "learning_rate": 2.2005748961992977e-06, "loss": 2.7027, "step": 700 }, { "epoch": 0.26, "learning_rate": 2.5199616735867137e-06, "loss": 2.4431, "step": 800 }, { "epoch": 0.29, "learning_rate": 2.8393484509741297e-06, "loss": 2.0534, "step": 900 }, { "epoch": 0.32, "learning_rate": 3.1587352283615465e-06, "loss": 1.8537, "step": 1000 }, { "epoch": 0.35, "learning_rate": 3.4781220057489625e-06, "loss": 1.642, "step": 1100 }, { "epoch": 0.38, "learning_rate": 3.797508783136378e-06, "loss": 1.5092, "step": 1200 }, { "epoch": 0.42, "learning_rate": 4.1168955605237945e-06, "loss": 1.4302, "step": 1300 }, { "epoch": 0.45, "learning_rate": 4.436282337911211e-06, "loss": 1.3527, "step": 1400 }, { "epoch": 0.48, "learning_rate": 4.755669115298627e-06, "loss": 1.2495, "step": 1500 }, { "epoch": 0.51, "learning_rate": 5.075055892686043e-06, "loss": 1.2039, "step": 1600 }, { "epoch": 0.54, "learning_rate": 5.39444267007346e-06, "loss": 1.1438, "step": 1700 }, { "epoch": 0.57, "learning_rate": 5.713829447460876e-06, "loss": 1.1056, "step": 1800 }, { "epoch": 0.61, "learning_rate": 6.033216224848291e-06, "loss": 1.0436, "step": 1900 }, { "epoch": 0.64, "learning_rate": 6.352603002235708e-06, "loss": 1.0225, "step": 2000 }, { "epoch": 0.67, "learning_rate": 6.671989779623123e-06, "loss": 0.9856, "step": 2100 }, { "epoch": 0.7, "learning_rate": 6.9913765570105406e-06, "loss": 0.9412, "step": 2200 }, { "epoch": 0.73, "learning_rate": 7.310763334397957e-06, "loss": 0.9075, "step": 2300 }, { "epoch": 0.77, "learning_rate": 7.630150111785373e-06, "loss": 0.8752, "step": 2400 }, { "epoch": 0.8, "learning_rate": 7.949536889172788e-06, "loss": 0.854, "step": 2500 }, { "epoch": 0.83, "learning_rate": 8.268923666560205e-06, "loss": 0.8282, "step": 2600 }, { "epoch": 0.86, "learning_rate": 8.588310443947621e-06, "loss": 0.8074, "step": 2700 }, { "epoch": 0.89, "learning_rate": 8.907697221335038e-06, "loss": 0.7976, "step": 2800 }, { "epoch": 0.93, "learning_rate": 9.227083998722454e-06, "loss": 0.7955, "step": 2900 }, { "epoch": 0.96, "learning_rate": 9.54647077610987e-06, "loss": 0.7502, "step": 3000 }, { "epoch": 0.99, "learning_rate": 9.865857553497285e-06, "loss": 0.7455, "step": 3100 }, { "epoch": 1.02, "learning_rate": 1.0185244330884702e-05, "loss": 0.7177, "step": 3200 }, { "epoch": 1.05, "learning_rate": 1.050463110827212e-05, "loss": 0.724, "step": 3300 }, { "epoch": 1.09, "learning_rate": 1.0824017885659535e-05, "loss": 0.6975, "step": 3400 }, { "epoch": 1.12, "learning_rate": 1.114340466304695e-05, "loss": 0.7054, "step": 3500 }, { "epoch": 1.15, "learning_rate": 1.1462791440434366e-05, "loss": 0.71, "step": 3600 }, { "epoch": 1.18, "learning_rate": 1.1782178217821782e-05, "loss": 0.6825, "step": 3700 }, { "epoch": 1.21, "learning_rate": 1.2101564995209199e-05, "loss": 0.6792, "step": 3800 }, { "epoch": 1.25, "learning_rate": 1.2420951772596615e-05, "loss": 0.6794, "step": 3900 }, { "epoch": 1.28, "learning_rate": 1.2740338549984032e-05, "loss": 0.6672, "step": 4000 }, { "epoch": 1.31, "learning_rate": 1.3059725327371449e-05, "loss": 0.6356, "step": 4100 }, { "epoch": 1.34, "learning_rate": 1.3379112104758865e-05, "loss": 0.6503, "step": 4200 }, { "epoch": 1.37, "learning_rate": 1.369849888214628e-05, "loss": 0.6495, "step": 4300 }, { "epoch": 1.41, "learning_rate": 1.4017885659533696e-05, "loss": 0.6428, "step": 4400 }, { "epoch": 1.44, "learning_rate": 1.4337272436921113e-05, "loss": 0.6171, "step": 4500 }, { "epoch": 1.47, "learning_rate": 1.4656659214308529e-05, "loss": 0.6084, "step": 4600 }, { "epoch": 1.5, "learning_rate": 1.4976045991695944e-05, "loss": 0.6222, "step": 4700 }, { "epoch": 1.53, "learning_rate": 1.529543276908336e-05, "loss": 0.5971, "step": 4800 }, { "epoch": 1.56, "learning_rate": 1.5614819546470777e-05, "loss": 0.5745, "step": 4900 }, { "epoch": 1.6, "learning_rate": 1.5934206323858196e-05, "loss": 0.5753, "step": 5000 }, { "epoch": 1.63, "learning_rate": 1.625359310124561e-05, "loss": 0.6045, "step": 5100 }, { "epoch": 1.66, "learning_rate": 1.6572979878633027e-05, "loss": 0.6006, "step": 5200 }, { "epoch": 1.69, "learning_rate": 1.6892366656020443e-05, "loss": 0.6003, "step": 5300 }, { "epoch": 1.72, "learning_rate": 1.7211753433407858e-05, "loss": 0.5688, "step": 5400 }, { "epoch": 1.76, "learning_rate": 1.7531140210795274e-05, "loss": 0.5662, "step": 5500 }, { "epoch": 1.79, "learning_rate": 1.785052698818269e-05, "loss": 0.5788, "step": 5600 }, { "epoch": 1.82, "learning_rate": 1.8169913765570105e-05, "loss": 0.5541, "step": 5700 }, { "epoch": 1.85, "learning_rate": 1.8489300542957524e-05, "loss": 0.5656, "step": 5800 }, { "epoch": 1.88, "learning_rate": 1.880868732034494e-05, "loss": 0.5466, "step": 5900 }, { "epoch": 1.92, "learning_rate": 1.9128074097732355e-05, "loss": 0.563, "step": 6000 }, { "epoch": 1.95, "learning_rate": 1.9447460875119774e-05, "loss": 0.5603, "step": 6100 }, { "epoch": 1.98, "learning_rate": 1.976684765250719e-05, "loss": 0.5579, "step": 6200 }, { "epoch": 2.01, "learning_rate": 2.0086234429894605e-05, "loss": 0.5346, "step": 6300 }, { "epoch": 2.04, "learning_rate": 2.0405621207282017e-05, "loss": 0.5038, "step": 6400 }, { "epoch": 2.08, "learning_rate": 2.0725007984669436e-05, "loss": 0.5121, "step": 6500 }, { "epoch": 2.11, "learning_rate": 2.1044394762056855e-05, "loss": 0.5135, "step": 6600 }, { "epoch": 2.14, "learning_rate": 2.1363781539444267e-05, "loss": 0.504, "step": 6700 }, { "epoch": 2.17, "learning_rate": 2.1683168316831686e-05, "loss": 0.5119, "step": 6800 }, { "epoch": 2.2, "learning_rate": 2.20025550942191e-05, "loss": 0.4986, "step": 6900 }, { "epoch": 2.24, "learning_rate": 2.2321941871606518e-05, "loss": 0.504, "step": 7000 }, { "epoch": 2.27, "learning_rate": 2.2641328648993933e-05, "loss": 0.5024, "step": 7100 }, { "epoch": 2.3, "learning_rate": 2.296071542638135e-05, "loss": 0.479, "step": 7200 }, { "epoch": 2.33, "learning_rate": 2.3280102203768764e-05, "loss": 0.492, "step": 7300 }, { "epoch": 2.36, "learning_rate": 2.3599488981156183e-05, "loss": 0.4928, "step": 7400 }, { "epoch": 2.4, "learning_rate": 2.39188757585436e-05, "loss": 0.4862, "step": 7500 }, { "epoch": 2.43, "learning_rate": 2.4238262535931014e-05, "loss": 0.5148, "step": 7600 }, { "epoch": 2.46, "learning_rate": 2.4557649313318433e-05, "loss": 0.4922, "step": 7700 }, { "epoch": 2.49, "learning_rate": 2.4877036090705846e-05, "loss": 0.4882, "step": 7800 }, { "epoch": 2.52, "learning_rate": 2.5196422868093264e-05, "loss": 0.4871, "step": 7900 }, { "epoch": 2.56, "learning_rate": 2.5515809645480677e-05, "loss": 0.4967, "step": 8000 }, { "epoch": 2.59, "learning_rate": 2.5835196422868096e-05, "loss": 0.4834, "step": 8100 }, { "epoch": 2.62, "learning_rate": 2.615458320025551e-05, "loss": 0.4769, "step": 8200 }, { "epoch": 2.65, "learning_rate": 2.6473969977642927e-05, "loss": 0.4776, "step": 8300 }, { "epoch": 2.68, "learning_rate": 2.6793356755030342e-05, "loss": 0.4861, "step": 8400 }, { "epoch": 2.71, "learning_rate": 2.711274353241776e-05, "loss": 0.486, "step": 8500 }, { "epoch": 2.75, "learning_rate": 2.7432130309805177e-05, "loss": 0.4745, "step": 8600 }, { "epoch": 2.78, "learning_rate": 2.7751517087192592e-05, "loss": 0.4805, "step": 8700 }, { "epoch": 2.81, "learning_rate": 2.807090386458001e-05, "loss": 0.5003, "step": 8800 }, { "epoch": 2.84, "learning_rate": 2.8390290641967424e-05, "loss": 0.4639, "step": 8900 }, { "epoch": 2.87, "learning_rate": 2.8709677419354843e-05, "loss": 0.4763, "step": 9000 }, { "epoch": 2.91, "learning_rate": 2.9029064196742255e-05, "loss": 0.4727, "step": 9100 }, { "epoch": 2.94, "learning_rate": 2.9348450974129674e-05, "loss": 0.4839, "step": 9200 }, { "epoch": 2.97, "learning_rate": 2.966783775151709e-05, "loss": 0.4769, "step": 9300 }, { "epoch": 3.0, "learning_rate": 2.9987224528904505e-05, "loss": 0.4641, "step": 9400 }, { "epoch": 3.03, "learning_rate": 3.030661130629192e-05, "loss": 0.4282, "step": 9500 }, { "epoch": 3.07, "learning_rate": 3.0625998083679336e-05, "loss": 0.4057, "step": 9600 }, { "epoch": 3.1, "learning_rate": 3.094538486106676e-05, "loss": 0.4192, "step": 9700 }, { "epoch": 3.13, "learning_rate": 3.126477163845417e-05, "loss": 0.4186, "step": 9800 }, { "epoch": 3.16, "learning_rate": 3.158415841584159e-05, "loss": 0.4086, "step": 9900 }, { "epoch": 3.19, "learning_rate": 3.1903545193229005e-05, "loss": 0.4194, "step": 10000 }, { "epoch": 3.23, "learning_rate": 3.221973810284254e-05, "loss": 0.4443, "step": 10100 }, { "epoch": 3.26, "learning_rate": 3.2539124880229965e-05, "loss": 0.4217, "step": 10200 }, { "epoch": 3.29, "learning_rate": 3.2858511657617374e-05, "loss": 0.4275, "step": 10300 }, { "epoch": 3.32, "learning_rate": 3.3177898435004796e-05, "loss": 0.4422, "step": 10400 }, { "epoch": 3.35, "learning_rate": 3.3497285212392205e-05, "loss": 0.4227, "step": 10500 }, { "epoch": 3.39, "learning_rate": 3.381667198977963e-05, "loss": 0.4191, "step": 10600 }, { "epoch": 3.42, "learning_rate": 3.413605876716704e-05, "loss": 0.4309, "step": 10700 }, { "epoch": 3.45, "learning_rate": 3.445544554455446e-05, "loss": 0.4079, "step": 10800 }, { "epoch": 3.48, "learning_rate": 3.4774832321941874e-05, "loss": 0.4281, "step": 10900 }, { "epoch": 3.51, "learning_rate": 3.509421909932929e-05, "loss": 0.4137, "step": 11000 }, { "epoch": 3.55, "learning_rate": 3.5413605876716705e-05, "loss": 0.4168, "step": 11100 }, { "epoch": 3.58, "learning_rate": 3.573299265410412e-05, "loss": 0.4186, "step": 11200 }, { "epoch": 3.61, "learning_rate": 3.605237943149154e-05, "loss": 0.4325, "step": 11300 }, { "epoch": 3.64, "learning_rate": 3.637176620887895e-05, "loss": 0.4422, "step": 11400 }, { "epoch": 3.67, "learning_rate": 3.6691152986266374e-05, "loss": 0.4269, "step": 11500 }, { "epoch": 3.7, "learning_rate": 3.701053976365379e-05, "loss": 0.4368, "step": 11600 }, { "epoch": 3.74, "learning_rate": 3.7329926541041205e-05, "loss": 0.4207, "step": 11700 }, { "epoch": 3.77, "learning_rate": 3.764931331842862e-05, "loss": 0.4365, "step": 11800 }, { "epoch": 3.8, "learning_rate": 3.7968700095816036e-05, "loss": 0.4394, "step": 11900 }, { "epoch": 3.83, "learning_rate": 3.828808687320345e-05, "loss": 0.4351, "step": 12000 }, { "epoch": 3.86, "learning_rate": 3.860747365059087e-05, "loss": 0.4113, "step": 12100 }, { "epoch": 3.9, "learning_rate": 3.892686042797828e-05, "loss": 0.4261, "step": 12200 }, { "epoch": 3.93, "learning_rate": 3.92462472053657e-05, "loss": 0.4311, "step": 12300 }, { "epoch": 3.96, "learning_rate": 3.956563398275312e-05, "loss": 0.4246, "step": 12400 }, { "epoch": 3.99, "learning_rate": 3.988502076014053e-05, "loss": 0.4294, "step": 12500 }, { "epoch": 4.02, "learning_rate": 3.997728805138579e-05, "loss": 0.3854, "step": 12600 }, { "epoch": 4.06, "learning_rate": 3.994180063167608e-05, "loss": 0.3656, "step": 12700 }, { "epoch": 4.09, "learning_rate": 3.9906313211966366e-05, "loss": 0.3836, "step": 12800 }, { "epoch": 4.12, "learning_rate": 3.987082579225665e-05, "loss": 0.3664, "step": 12900 }, { "epoch": 4.15, "learning_rate": 3.9835338372546935e-05, "loss": 0.3884, "step": 13000 }, { "epoch": 4.18, "learning_rate": 3.979985095283722e-05, "loss": 0.3764, "step": 13100 }, { "epoch": 4.22, "learning_rate": 3.976436353312751e-05, "loss": 0.3733, "step": 13200 }, { "epoch": 4.25, "learning_rate": 3.97288761134178e-05, "loss": 0.3682, "step": 13300 }, { "epoch": 4.28, "learning_rate": 3.969338869370809e-05, "loss": 0.371, "step": 13400 }, { "epoch": 4.31, "learning_rate": 3.965790127399837e-05, "loss": 0.3741, "step": 13500 }, { "epoch": 4.34, "learning_rate": 3.9622413854288656e-05, "loss": 0.3757, "step": 13600 }, { "epoch": 4.38, "learning_rate": 3.9586926434578944e-05, "loss": 0.3652, "step": 13700 }, { "epoch": 4.41, "learning_rate": 3.955143901486923e-05, "loss": 0.3892, "step": 13800 }, { "epoch": 4.44, "learning_rate": 3.951630646935662e-05, "loss": 0.3893, "step": 13900 }, { "epoch": 4.47, "learning_rate": 3.948081904964691e-05, "loss": 0.3823, "step": 14000 }, { "epoch": 4.5, "learning_rate": 3.944533162993719e-05, "loss": 0.3654, "step": 14100 }, { "epoch": 4.54, "learning_rate": 3.9409844210227476e-05, "loss": 0.3791, "step": 14200 }, { "epoch": 4.57, "learning_rate": 3.9374356790517764e-05, "loss": 0.3918, "step": 14300 }, { "epoch": 4.6, "learning_rate": 3.933886937080805e-05, "loss": 0.3713, "step": 14400 }, { "epoch": 4.63, "learning_rate": 3.930338195109834e-05, "loss": 0.376, "step": 14500 }, { "epoch": 4.66, "learning_rate": 3.926789453138863e-05, "loss": 0.3682, "step": 14600 }, { "epoch": 4.69, "learning_rate": 3.9232407111678915e-05, "loss": 0.374, "step": 14700 }, { "epoch": 4.73, "learning_rate": 3.91969196919692e-05, "loss": 0.3774, "step": 14800 }, { "epoch": 4.76, "learning_rate": 3.9161432272259484e-05, "loss": 0.3806, "step": 14900 }, { "epoch": 4.79, "learning_rate": 3.912594485254977e-05, "loss": 0.3886, "step": 15000 }, { "epoch": 4.82, "learning_rate": 3.909045743284006e-05, "loss": 0.3742, "step": 15100 }, { "epoch": 4.85, "learning_rate": 3.905497001313035e-05, "loss": 0.3714, "step": 15200 }, { "epoch": 4.89, "learning_rate": 3.9019482593420636e-05, "loss": 0.3721, "step": 15300 }, { "epoch": 4.92, "learning_rate": 3.8983995173710924e-05, "loss": 0.3716, "step": 15400 }, { "epoch": 4.95, "learning_rate": 3.8948507754001205e-05, "loss": 0.3728, "step": 15500 }, { "epoch": 4.98, "learning_rate": 3.891302033429149e-05, "loss": 0.384, "step": 15600 }, { "epoch": 5.01, "learning_rate": 3.887753291458178e-05, "loss": 0.3415, "step": 15700 }, { "epoch": 5.05, "learning_rate": 3.884204549487207e-05, "loss": 0.3064, "step": 15800 }, { "epoch": 5.08, "learning_rate": 3.880655807516236e-05, "loss": 0.3164, "step": 15900 }, { "epoch": 5.11, "learning_rate": 3.8771070655452645e-05, "loss": 0.3215, "step": 16000 }, { "epoch": 5.14, "learning_rate": 3.873558323574293e-05, "loss": 0.3234, "step": 16100 }, { "epoch": 5.17, "learning_rate": 3.870009581603322e-05, "loss": 0.3119, "step": 16200 }, { "epoch": 5.21, "learning_rate": 3.866460839632351e-05, "loss": 0.3145, "step": 16300 }, { "epoch": 5.24, "learning_rate": 3.8629120976613796e-05, "loss": 0.3333, "step": 16400 }, { "epoch": 5.27, "learning_rate": 3.8593633556904084e-05, "loss": 0.3195, "step": 16500 }, { "epoch": 5.3, "learning_rate": 3.855814613719437e-05, "loss": 0.3159, "step": 16600 }, { "epoch": 5.33, "learning_rate": 3.852265871748465e-05, "loss": 0.3189, "step": 16700 }, { "epoch": 5.37, "learning_rate": 3.848717129777494e-05, "loss": 0.317, "step": 16800 }, { "epoch": 5.4, "learning_rate": 3.845168387806523e-05, "loss": 0.3231, "step": 16900 }, { "epoch": 5.43, "learning_rate": 3.841619645835552e-05, "loss": 0.3328, "step": 17000 }, { "epoch": 5.46, "learning_rate": 3.8380709038645805e-05, "loss": 0.3282, "step": 17100 }, { "epoch": 5.49, "learning_rate": 3.8345221618936086e-05, "loss": 0.3169, "step": 17200 }, { "epoch": 5.53, "learning_rate": 3.8309734199226374e-05, "loss": 0.3156, "step": 17300 }, { "epoch": 5.56, "learning_rate": 3.827424677951666e-05, "loss": 0.3313, "step": 17400 }, { "epoch": 5.59, "learning_rate": 3.823875935980695e-05, "loss": 0.3181, "step": 17500 }, { "epoch": 5.62, "learning_rate": 3.820327194009724e-05, "loss": 0.334, "step": 17600 }, { "epoch": 5.65, "learning_rate": 3.8167784520387526e-05, "loss": 0.3313, "step": 17700 }, { "epoch": 5.69, "learning_rate": 3.8132297100677814e-05, "loss": 0.3331, "step": 17800 }, { "epoch": 5.72, "learning_rate": 3.80968096809681e-05, "loss": 0.3405, "step": 17900 }, { "epoch": 5.75, "learning_rate": 3.806132226125839e-05, "loss": 0.3345, "step": 18000 }, { "epoch": 5.78, "learning_rate": 3.802583484154868e-05, "loss": 0.3295, "step": 18100 }, { "epoch": 5.81, "learning_rate": 3.7990347421838965e-05, "loss": 0.3244, "step": 18200 }, { "epoch": 5.84, "learning_rate": 3.795486000212925e-05, "loss": 0.3346, "step": 18300 }, { "epoch": 5.88, "learning_rate": 3.791972745661663e-05, "loss": 0.3317, "step": 18400 }, { "epoch": 5.91, "learning_rate": 3.788424003690692e-05, "loss": 0.3219, "step": 18500 }, { "epoch": 5.94, "learning_rate": 3.784875261719721e-05, "loss": 0.3167, "step": 18600 }, { "epoch": 5.97, "learning_rate": 3.781326519748749e-05, "loss": 0.33, "step": 18700 }, { "epoch": 6.0, "learning_rate": 3.777777777777778e-05, "loss": 0.3211, "step": 18800 }, { "epoch": 6.04, "learning_rate": 3.7742290358068066e-05, "loss": 0.2794, "step": 18900 }, { "epoch": 6.07, "learning_rate": 3.7706802938358354e-05, "loss": 0.2675, "step": 19000 }, { "epoch": 6.1, "learning_rate": 3.767131551864864e-05, "loss": 0.2676, "step": 19100 }, { "epoch": 6.13, "learning_rate": 3.763582809893893e-05, "loss": 0.2693, "step": 19200 }, { "epoch": 6.16, "learning_rate": 3.760034067922922e-05, "loss": 0.2669, "step": 19300 }, { "epoch": 6.2, "learning_rate": 3.7564853259519506e-05, "loss": 0.2744, "step": 19400 }, { "epoch": 6.23, "learning_rate": 3.7529365839809794e-05, "loss": 0.282, "step": 19500 }, { "epoch": 6.26, "learning_rate": 3.749387842010008e-05, "loss": 0.2848, "step": 19600 }, { "epoch": 6.29, "learning_rate": 3.745839100039037e-05, "loss": 0.2783, "step": 19700 }, { "epoch": 6.32, "learning_rate": 3.742290358068066e-05, "loss": 0.2843, "step": 19800 }, { "epoch": 6.36, "learning_rate": 3.738741616097094e-05, "loss": 0.2696, "step": 19900 }, { "epoch": 6.39, "learning_rate": 3.7351928741261226e-05, "loss": 0.2932, "step": 20000 }, { "epoch": 6.42, "learning_rate": 3.7316441321551514e-05, "loss": 0.2775, "step": 20100 }, { "epoch": 6.45, "learning_rate": 3.72809539018418e-05, "loss": 0.2893, "step": 20200 }, { "epoch": 6.48, "learning_rate": 3.724546648213209e-05, "loss": 0.286, "step": 20300 }, { "epoch": 6.52, "learning_rate": 3.720997906242237e-05, "loss": 0.2799, "step": 20400 }, { "epoch": 6.55, "learning_rate": 3.717449164271266e-05, "loss": 0.2827, "step": 20500 }, { "epoch": 6.58, "learning_rate": 3.713900422300295e-05, "loss": 0.2783, "step": 20600 }, { "epoch": 6.61, "learning_rate": 3.7103516803293235e-05, "loss": 0.2729, "step": 20700 }, { "epoch": 6.64, "learning_rate": 3.706802938358352e-05, "loss": 0.2747, "step": 20800 }, { "epoch": 6.68, "learning_rate": 3.703254196387381e-05, "loss": 0.2764, "step": 20900 }, { "epoch": 6.71, "learning_rate": 3.699705454416409e-05, "loss": 0.2804, "step": 21000 }, { "epoch": 6.74, "learning_rate": 3.696156712445438e-05, "loss": 0.2841, "step": 21100 }, { "epoch": 6.77, "learning_rate": 3.692607970474467e-05, "loss": 0.277, "step": 21200 }, { "epoch": 6.8, "learning_rate": 3.6890592285034956e-05, "loss": 0.284, "step": 21300 }, { "epoch": 6.83, "learning_rate": 3.6855104865325244e-05, "loss": 0.2778, "step": 21400 }, { "epoch": 6.87, "learning_rate": 3.681961744561553e-05, "loss": 0.2797, "step": 21500 }, { "epoch": 6.9, "learning_rate": 3.678413002590582e-05, "loss": 0.3036, "step": 21600 }, { "epoch": 6.93, "learning_rate": 3.674864260619611e-05, "loss": 0.2869, "step": 21700 }, { "epoch": 6.96, "learning_rate": 3.6713155186486395e-05, "loss": 0.2921, "step": 21800 }, { "epoch": 6.99, "learning_rate": 3.667766776677668e-05, "loss": 0.2844, "step": 21900 }, { "epoch": 7.03, "learning_rate": 3.664218034706697e-05, "loss": 0.2427, "step": 22000 }, { "epoch": 7.06, "learning_rate": 3.660669292735726e-05, "loss": 0.243, "step": 22100 }, { "epoch": 7.09, "learning_rate": 3.657120550764754e-05, "loss": 0.2311, "step": 22200 }, { "epoch": 7.12, "learning_rate": 3.653571808793783e-05, "loss": 0.2358, "step": 22300 }, { "epoch": 7.15, "learning_rate": 3.6500230668228116e-05, "loss": 0.2287, "step": 22400 }, { "epoch": 7.19, "learning_rate": 3.6464743248518404e-05, "loss": 0.2519, "step": 22500 }, { "epoch": 7.22, "learning_rate": 3.642925582880869e-05, "loss": 0.2528, "step": 22600 }, { "epoch": 7.25, "learning_rate": 3.639376840909898e-05, "loss": 0.2507, "step": 22700 }, { "epoch": 7.28, "learning_rate": 3.635828098938926e-05, "loss": 0.2496, "step": 22800 }, { "epoch": 7.31, "learning_rate": 3.632279356967955e-05, "loss": 0.2503, "step": 22900 }, { "epoch": 7.35, "learning_rate": 3.628730614996984e-05, "loss": 0.2383, "step": 23000 }, { "epoch": 7.38, "learning_rate": 3.6251818730260125e-05, "loss": 0.2513, "step": 23100 }, { "epoch": 7.41, "learning_rate": 3.621633131055041e-05, "loss": 0.2492, "step": 23200 }, { "epoch": 7.44, "learning_rate": 3.61808438908407e-05, "loss": 0.2473, "step": 23300 }, { "epoch": 7.47, "learning_rate": 3.614535647113099e-05, "loss": 0.2502, "step": 23400 }, { "epoch": 7.51, "learning_rate": 3.6109869051421276e-05, "loss": 0.2492, "step": 23500 }, { "epoch": 7.54, "learning_rate": 3.6074381631711564e-05, "loss": 0.2366, "step": 23600 }, { "epoch": 7.57, "learning_rate": 3.603889421200185e-05, "loss": 0.2393, "step": 23700 }, { "epoch": 7.6, "learning_rate": 3.600340679229214e-05, "loss": 0.2483, "step": 23800 }, { "epoch": 7.63, "learning_rate": 3.596791937258242e-05, "loss": 0.2428, "step": 23900 }, { "epoch": 7.67, "learning_rate": 3.593243195287271e-05, "loss": 0.2507, "step": 24000 }, { "epoch": 7.7, "learning_rate": 3.5896944533163e-05, "loss": 0.2491, "step": 24100 }, { "epoch": 7.73, "learning_rate": 3.5861457113453285e-05, "loss": 0.2549, "step": 24200 }, { "epoch": 7.76, "learning_rate": 3.582596969374357e-05, "loss": 0.251, "step": 24300 }, { "epoch": 7.79, "learning_rate": 3.579048227403386e-05, "loss": 0.2357, "step": 24400 }, { "epoch": 7.82, "learning_rate": 3.575499485432414e-05, "loss": 0.2441, "step": 24500 }, { "epoch": 7.86, "learning_rate": 3.571950743461443e-05, "loss": 0.2561, "step": 24600 }, { "epoch": 7.89, "learning_rate": 3.568402001490472e-05, "loss": 0.2524, "step": 24700 }, { "epoch": 7.92, "learning_rate": 3.5648532595195006e-05, "loss": 0.2467, "step": 24800 }, { "epoch": 7.95, "learning_rate": 3.5613045175485293e-05, "loss": 0.2416, "step": 24900 }, { "epoch": 7.98, "learning_rate": 3.557755775577558e-05, "loss": 0.2511, "step": 25000 }, { "epoch": 8.02, "learning_rate": 3.554207033606587e-05, "loss": 0.2323, "step": 25100 }, { "epoch": 8.05, "learning_rate": 3.550658291635615e-05, "loss": 0.2158, "step": 25200 }, { "epoch": 8.08, "learning_rate": 3.547109549664644e-05, "loss": 0.21, "step": 25300 }, { "epoch": 8.11, "learning_rate": 3.5435608076936726e-05, "loss": 0.2102, "step": 25400 }, { "epoch": 8.14, "learning_rate": 3.5400120657227014e-05, "loss": 0.2071, "step": 25500 }, { "epoch": 8.18, "learning_rate": 3.53646332375173e-05, "loss": 0.2198, "step": 25600 }, { "epoch": 8.21, "learning_rate": 3.532914581780759e-05, "loss": 0.2193, "step": 25700 }, { "epoch": 8.24, "learning_rate": 3.529365839809788e-05, "loss": 0.2166, "step": 25800 }, { "epoch": 8.27, "learning_rate": 3.5258170978388166e-05, "loss": 0.2237, "step": 25900 }, { "epoch": 8.3, "learning_rate": 3.5222683558678454e-05, "loss": 0.2241, "step": 26000 }, { "epoch": 8.34, "learning_rate": 3.518719613896874e-05, "loss": 0.2132, "step": 26100 }, { "epoch": 8.37, "learning_rate": 3.515170871925902e-05, "loss": 0.2101, "step": 26200 }, { "epoch": 8.4, "learning_rate": 3.511622129954931e-05, "loss": 0.227, "step": 26300 }, { "epoch": 8.43, "learning_rate": 3.50807338798396e-05, "loss": 0.2221, "step": 26400 }, { "epoch": 8.46, "learning_rate": 3.504560133432698e-05, "loss": 0.2181, "step": 26500 }, { "epoch": 8.5, "learning_rate": 3.501011391461727e-05, "loss": 0.2208, "step": 26600 }, { "epoch": 8.53, "learning_rate": 3.4974626494907555e-05, "loss": 0.2192, "step": 26700 }, { "epoch": 8.56, "learning_rate": 3.493913907519784e-05, "loss": 0.2227, "step": 26800 }, { "epoch": 8.59, "learning_rate": 3.490365165548813e-05, "loss": 0.2308, "step": 26900 }, { "epoch": 8.62, "learning_rate": 3.486816423577842e-05, "loss": 0.22, "step": 27000 }, { "epoch": 8.66, "learning_rate": 3.4832676816068706e-05, "loss": 0.2247, "step": 27100 }, { "epoch": 8.69, "learning_rate": 3.4797189396358994e-05, "loss": 0.2148, "step": 27200 }, { "epoch": 8.72, "learning_rate": 3.476170197664928e-05, "loss": 0.2136, "step": 27300 }, { "epoch": 8.75, "learning_rate": 3.472621455693957e-05, "loss": 0.2282, "step": 27400 }, { "epoch": 8.78, "learning_rate": 3.469072713722986e-05, "loss": 0.2266, "step": 27500 }, { "epoch": 8.82, "learning_rate": 3.4655239717520146e-05, "loss": 0.2198, "step": 27600 }, { "epoch": 8.85, "learning_rate": 3.461975229781043e-05, "loss": 0.2243, "step": 27700 }, { "epoch": 8.88, "learning_rate": 3.4584264878100715e-05, "loss": 0.2358, "step": 27800 }, { "epoch": 8.91, "learning_rate": 3.4548777458391e-05, "loss": 0.2255, "step": 27900 }, { "epoch": 8.94, "learning_rate": 3.451329003868129e-05, "loss": 0.2258, "step": 28000 }, { "epoch": 8.97, "learning_rate": 3.447780261897158e-05, "loss": 0.2228, "step": 28100 }, { "epoch": 9.01, "learning_rate": 3.4442315199261866e-05, "loss": 0.2183, "step": 28200 }, { "epoch": 9.04, "learning_rate": 3.440682777955215e-05, "loss": 0.1988, "step": 28300 }, { "epoch": 9.07, "learning_rate": 3.4371340359842436e-05, "loss": 0.188, "step": 28400 }, { "epoch": 9.1, "learning_rate": 3.4335852940132723e-05, "loss": 0.1963, "step": 28500 }, { "epoch": 9.13, "learning_rate": 3.430036552042301e-05, "loss": 0.1919, "step": 28600 }, { "epoch": 9.17, "learning_rate": 3.42648781007133e-05, "loss": 0.1906, "step": 28700 }, { "epoch": 9.2, "learning_rate": 3.422939068100359e-05, "loss": 0.1911, "step": 28800 }, { "epoch": 9.23, "learning_rate": 3.4193903261293875e-05, "loss": 0.1971, "step": 28900 }, { "epoch": 9.26, "learning_rate": 3.415841584158416e-05, "loss": 0.1931, "step": 29000 }, { "epoch": 9.29, "learning_rate": 3.412292842187445e-05, "loss": 0.1951, "step": 29100 }, { "epoch": 9.33, "learning_rate": 3.408744100216474e-05, "loss": 0.2008, "step": 29200 }, { "epoch": 9.36, "learning_rate": 3.405195358245503e-05, "loss": 0.2053, "step": 29300 }, { "epoch": 9.39, "learning_rate": 3.401646616274531e-05, "loss": 0.2015, "step": 29400 }, { "epoch": 9.42, "learning_rate": 3.3980978743035596e-05, "loss": 0.193, "step": 29500 }, { "epoch": 9.45, "learning_rate": 3.3945491323325884e-05, "loss": 0.1955, "step": 29600 }, { "epoch": 9.49, "learning_rate": 3.391000390361617e-05, "loss": 0.1993, "step": 29700 }, { "epoch": 9.52, "learning_rate": 3.387451648390646e-05, "loss": 0.2043, "step": 29800 }, { "epoch": 9.55, "learning_rate": 3.383902906419675e-05, "loss": 0.1955, "step": 29900 }, { "epoch": 9.58, "learning_rate": 3.380354164448703e-05, "loss": 0.1961, "step": 30000 }, { "epoch": 9.61, "learning_rate": 3.3768054224777316e-05, "loss": 0.1961, "step": 30100 }, { "epoch": 9.65, "learning_rate": 3.3732566805067604e-05, "loss": 0.196, "step": 30200 }, { "epoch": 9.68, "learning_rate": 3.369707938535789e-05, "loss": 0.2088, "step": 30300 }, { "epoch": 9.71, "learning_rate": 3.366159196564818e-05, "loss": 0.2031, "step": 30400 }, { "epoch": 9.74, "learning_rate": 3.362610454593847e-05, "loss": 0.1949, "step": 30500 }, { "epoch": 9.77, "learning_rate": 3.3590617126228756e-05, "loss": 0.1988, "step": 30600 }, { "epoch": 9.81, "learning_rate": 3.3555129706519044e-05, "loss": 0.2061, "step": 30700 }, { "epoch": 9.84, "learning_rate": 3.351964228680933e-05, "loss": 0.2022, "step": 30800 }, { "epoch": 9.87, "learning_rate": 3.348450974129671e-05, "loss": 0.1965, "step": 30900 }, { "epoch": 9.9, "learning_rate": 3.3449022321587e-05, "loss": 0.1999, "step": 31000 }, { "epoch": 9.93, "learning_rate": 3.341353490187729e-05, "loss": 0.2065, "step": 31100 }, { "epoch": 9.96, "learning_rate": 3.3378047482167576e-05, "loss": 0.2031, "step": 31200 }, { "epoch": 10.0, "learning_rate": 3.3342560062457864e-05, "loss": 0.2078, "step": 31300 }, { "epoch": 10.03, "learning_rate": 3.330707264274815e-05, "loss": 0.1733, "step": 31400 }, { "epoch": 10.06, "learning_rate": 3.327158522303843e-05, "loss": 0.1654, "step": 31500 }, { "epoch": 10.09, "learning_rate": 3.323609780332872e-05, "loss": 0.1674, "step": 31600 }, { "epoch": 10.12, "learning_rate": 3.320061038361901e-05, "loss": 0.1725, "step": 31700 }, { "epoch": 10.16, "learning_rate": 3.3165122963909297e-05, "loss": 0.1761, "step": 31800 }, { "epoch": 10.19, "learning_rate": 3.3129635544199584e-05, "loss": 0.1757, "step": 31900 }, { "epoch": 10.22, "learning_rate": 3.309414812448987e-05, "loss": 0.1781, "step": 32000 }, { "epoch": 10.25, "learning_rate": 3.3058660704780153e-05, "loss": 0.1716, "step": 32100 }, { "epoch": 10.28, "learning_rate": 3.302317328507044e-05, "loss": 0.1717, "step": 32200 }, { "epoch": 10.32, "learning_rate": 3.298768586536073e-05, "loss": 0.179, "step": 32300 }, { "epoch": 10.35, "learning_rate": 3.295219844565102e-05, "loss": 0.174, "step": 32400 }, { "epoch": 10.38, "learning_rate": 3.2916711025941305e-05, "loss": 0.178, "step": 32500 }, { "epoch": 10.41, "learning_rate": 3.288122360623159e-05, "loss": 0.1765, "step": 32600 }, { "epoch": 10.44, "learning_rate": 3.284573618652188e-05, "loss": 0.1824, "step": 32700 }, { "epoch": 10.48, "learning_rate": 3.281024876681217e-05, "loss": 0.1806, "step": 32800 }, { "epoch": 10.51, "learning_rate": 3.277511622129955e-05, "loss": 0.1785, "step": 32900 }, { "epoch": 10.54, "learning_rate": 3.2739983675786936e-05, "loss": 0.1698, "step": 33000 }, { "epoch": 10.57, "learning_rate": 3.2704496256077224e-05, "loss": 0.1734, "step": 33100 }, { "epoch": 10.6, "learning_rate": 3.2669008836367505e-05, "loss": 0.1761, "step": 33200 }, { "epoch": 10.64, "learning_rate": 3.263352141665779e-05, "loss": 0.1786, "step": 33300 }, { "epoch": 10.67, "learning_rate": 3.259803399694808e-05, "loss": 0.1767, "step": 33400 }, { "epoch": 10.7, "learning_rate": 3.256254657723837e-05, "loss": 0.1881, "step": 33500 }, { "epoch": 10.73, "learning_rate": 3.252705915752866e-05, "loss": 0.1792, "step": 33600 }, { "epoch": 10.76, "learning_rate": 3.2491571737818945e-05, "loss": 0.1832, "step": 33700 }, { "epoch": 10.8, "learning_rate": 3.245608431810923e-05, "loss": 0.1889, "step": 33800 }, { "epoch": 10.83, "learning_rate": 3.242059689839952e-05, "loss": 0.1824, "step": 33900 }, { "epoch": 10.86, "learning_rate": 3.238510947868981e-05, "loss": 0.1746, "step": 34000 }, { "epoch": 10.89, "learning_rate": 3.2349622058980096e-05, "loss": 0.1735, "step": 34100 }, { "epoch": 10.92, "learning_rate": 3.2314134639270384e-05, "loss": 0.1822, "step": 34200 }, { "epoch": 10.95, "learning_rate": 3.227864721956067e-05, "loss": 0.1768, "step": 34300 }, { "epoch": 10.99, "learning_rate": 3.224315979985095e-05, "loss": 0.1812, "step": 34400 }, { "epoch": 11.02, "learning_rate": 3.220767238014124e-05, "loss": 0.1645, "step": 34500 }, { "epoch": 11.05, "learning_rate": 3.217218496043153e-05, "loss": 0.1483, "step": 34600 }, { "epoch": 11.08, "learning_rate": 3.213669754072182e-05, "loss": 0.1615, "step": 34700 }, { "epoch": 11.11, "learning_rate": 3.2101210121012105e-05, "loss": 0.1637, "step": 34800 }, { "epoch": 11.15, "learning_rate": 3.206572270130239e-05, "loss": 0.1497, "step": 34900 }, { "epoch": 11.18, "learning_rate": 3.2030235281592674e-05, "loss": 0.1524, "step": 35000 }, { "epoch": 11.21, "learning_rate": 3.199474786188296e-05, "loss": 0.1545, "step": 35100 }, { "epoch": 11.24, "learning_rate": 3.195926044217325e-05, "loss": 0.1526, "step": 35200 }, { "epoch": 11.27, "learning_rate": 3.192377302246354e-05, "loss": 0.1499, "step": 35300 }, { "epoch": 11.31, "learning_rate": 3.1888285602753826e-05, "loss": 0.1553, "step": 35400 }, { "epoch": 11.34, "learning_rate": 3.1852798183044114e-05, "loss": 0.1583, "step": 35500 }, { "epoch": 11.37, "learning_rate": 3.18173107633344e-05, "loss": 0.156, "step": 35600 }, { "epoch": 11.4, "learning_rate": 3.178182334362469e-05, "loss": 0.1582, "step": 35700 }, { "epoch": 11.43, "learning_rate": 3.174633592391498e-05, "loss": 0.155, "step": 35800 }, { "epoch": 11.47, "learning_rate": 3.1710848504205265e-05, "loss": 0.1546, "step": 35900 }, { "epoch": 11.5, "learning_rate": 3.167536108449555e-05, "loss": 0.157, "step": 36000 }, { "epoch": 11.53, "learning_rate": 3.1639873664785834e-05, "loss": 0.1644, "step": 36100 }, { "epoch": 11.56, "learning_rate": 3.160438624507612e-05, "loss": 0.1627, "step": 36200 }, { "epoch": 11.59, "learning_rate": 3.156889882536641e-05, "loss": 0.165, "step": 36300 }, { "epoch": 11.63, "learning_rate": 3.15334114056567e-05, "loss": 0.1602, "step": 36400 }, { "epoch": 11.66, "learning_rate": 3.1497923985946986e-05, "loss": 0.1694, "step": 36500 }, { "epoch": 11.69, "learning_rate": 3.1462436566237274e-05, "loss": 0.1604, "step": 36600 }, { "epoch": 11.72, "learning_rate": 3.1426949146527555e-05, "loss": 0.1725, "step": 36700 }, { "epoch": 11.75, "learning_rate": 3.139146172681784e-05, "loss": 0.1577, "step": 36800 }, { "epoch": 11.79, "learning_rate": 3.135597430710813e-05, "loss": 0.1741, "step": 36900 }, { "epoch": 11.82, "learning_rate": 3.132048688739842e-05, "loss": 0.1547, "step": 37000 }, { "epoch": 11.85, "learning_rate": 3.1284999467688707e-05, "loss": 0.1621, "step": 37100 }, { "epoch": 11.88, "learning_rate": 3.1249512047978994e-05, "loss": 0.1663, "step": 37200 }, { "epoch": 11.91, "learning_rate": 3.121437950246638e-05, "loss": 0.1594, "step": 37300 }, { "epoch": 11.95, "learning_rate": 3.117889208275667e-05, "loss": 0.174, "step": 37400 }, { "epoch": 11.98, "learning_rate": 3.114340466304696e-05, "loss": 0.1755, "step": 37500 }, { "epoch": 12.01, "learning_rate": 3.110791724333724e-05, "loss": 0.1627, "step": 37600 }, { "epoch": 12.04, "learning_rate": 3.1072429823627526e-05, "loss": 0.1445, "step": 37700 }, { "epoch": 12.07, "learning_rate": 3.1036942403917814e-05, "loss": 0.1399, "step": 37800 }, { "epoch": 12.1, "learning_rate": 3.10014549842081e-05, "loss": 0.1475, "step": 37900 }, { "epoch": 12.14, "learning_rate": 3.096596756449839e-05, "loss": 0.1373, "step": 38000 }, { "epoch": 12.17, "learning_rate": 3.093048014478868e-05, "loss": 0.1379, "step": 38100 }, { "epoch": 12.2, "learning_rate": 3.089499272507896e-05, "loss": 0.1389, "step": 38200 }, { "epoch": 12.23, "learning_rate": 3.085950530536925e-05, "loss": 0.1394, "step": 38300 }, { "epoch": 12.26, "learning_rate": 3.0824017885659535e-05, "loss": 0.1434, "step": 38400 }, { "epoch": 12.3, "learning_rate": 3.078853046594982e-05, "loss": 0.1467, "step": 38500 }, { "epoch": 12.33, "learning_rate": 3.075339792043721e-05, "loss": 0.1272, "step": 38600 }, { "epoch": 12.36, "learning_rate": 3.07179105007275e-05, "loss": 0.1363, "step": 38700 }, { "epoch": 12.39, "learning_rate": 3.0682423081017786e-05, "loss": 0.1467, "step": 38800 }, { "epoch": 12.42, "learning_rate": 3.0646935661308074e-05, "loss": 0.1389, "step": 38900 }, { "epoch": 12.46, "learning_rate": 3.0611448241598355e-05, "loss": 0.1447, "step": 39000 }, { "epoch": 12.49, "learning_rate": 3.057596082188864e-05, "loss": 0.1524, "step": 39100 }, { "epoch": 12.52, "learning_rate": 3.054047340217893e-05, "loss": 0.1434, "step": 39200 }, { "epoch": 12.55, "learning_rate": 3.050498598246922e-05, "loss": 0.142, "step": 39300 }, { "epoch": 12.58, "learning_rate": 3.0469498562759506e-05, "loss": 0.1495, "step": 39400 }, { "epoch": 12.62, "learning_rate": 3.0434011143049794e-05, "loss": 0.1451, "step": 39500 }, { "epoch": 12.65, "learning_rate": 3.0398523723340075e-05, "loss": 0.1464, "step": 39600 }, { "epoch": 12.68, "learning_rate": 3.0363036303630363e-05, "loss": 0.1419, "step": 39700 }, { "epoch": 12.71, "learning_rate": 3.032754888392065e-05, "loss": 0.1451, "step": 39800 }, { "epoch": 12.74, "learning_rate": 3.029206146421094e-05, "loss": 0.151, "step": 39900 }, { "epoch": 12.78, "learning_rate": 3.0256574044501227e-05, "loss": 0.1442, "step": 40000 }, { "epoch": 12.81, "learning_rate": 3.0221086624791515e-05, "loss": 0.1553, "step": 40100 }, { "epoch": 12.84, "learning_rate": 3.01855992050818e-05, "loss": 0.1481, "step": 40200 }, { "epoch": 12.87, "learning_rate": 3.0150111785372087e-05, "loss": 0.1442, "step": 40300 }, { "epoch": 12.9, "learning_rate": 3.0114624365662375e-05, "loss": 0.1504, "step": 40400 }, { "epoch": 12.94, "learning_rate": 3.0079136945952663e-05, "loss": 0.1476, "step": 40500 }, { "epoch": 12.97, "learning_rate": 3.004364952624295e-05, "loss": 0.145, "step": 40600 }, { "epoch": 13.0, "learning_rate": 3.000816210653324e-05, "loss": 0.1549, "step": 40700 }, { "epoch": 13.03, "learning_rate": 2.997267468682352e-05, "loss": 0.1253, "step": 40800 }, { "epoch": 13.06, "learning_rate": 2.9937187267113808e-05, "loss": 0.1259, "step": 40900 }, { "epoch": 13.09, "learning_rate": 2.9901699847404096e-05, "loss": 0.1334, "step": 41000 }, { "epoch": 13.13, "learning_rate": 2.9866212427694384e-05, "loss": 0.1184, "step": 41100 }, { "epoch": 13.16, "learning_rate": 2.9830725007984672e-05, "loss": 0.1323, "step": 41200 }, { "epoch": 13.19, "learning_rate": 2.979523758827496e-05, "loss": 0.1294, "step": 41300 }, { "epoch": 13.22, "learning_rate": 2.9759750168565244e-05, "loss": 0.1283, "step": 41400 }, { "epoch": 13.25, "learning_rate": 2.9724262748855532e-05, "loss": 0.1313, "step": 41500 }, { "epoch": 13.29, "learning_rate": 2.968877532914582e-05, "loss": 0.1336, "step": 41600 }, { "epoch": 13.32, "learning_rate": 2.9653287909436108e-05, "loss": 0.1329, "step": 41700 }, { "epoch": 13.35, "learning_rate": 2.9617800489726396e-05, "loss": 0.1336, "step": 41800 }, { "epoch": 13.38, "learning_rate": 2.958231307001668e-05, "loss": 0.1305, "step": 41900 }, { "epoch": 13.41, "learning_rate": 2.954682565030697e-05, "loss": 0.1308, "step": 42000 }, { "epoch": 13.45, "learning_rate": 2.9511338230597256e-05, "loss": 0.1381, "step": 42100 }, { "epoch": 13.48, "learning_rate": 2.9475850810887544e-05, "loss": 0.1379, "step": 42200 }, { "epoch": 13.51, "learning_rate": 2.9440363391177832e-05, "loss": 0.1359, "step": 42300 }, { "epoch": 13.54, "learning_rate": 2.940487597146812e-05, "loss": 0.1273, "step": 42400 }, { "epoch": 13.57, "learning_rate": 2.93693885517584e-05, "loss": 0.1331, "step": 42500 }, { "epoch": 13.61, "learning_rate": 2.933390113204869e-05, "loss": 0.1315, "step": 42600 }, { "epoch": 13.64, "learning_rate": 2.9298413712338977e-05, "loss": 0.1335, "step": 42700 }, { "epoch": 13.67, "learning_rate": 2.9262926292629265e-05, "loss": 0.1344, "step": 42800 }, { "epoch": 13.7, "learning_rate": 2.9227438872919553e-05, "loss": 0.1411, "step": 42900 }, { "epoch": 13.73, "learning_rate": 2.919195145320984e-05, "loss": 0.132, "step": 43000 }, { "epoch": 13.77, "learning_rate": 2.9156464033500125e-05, "loss": 0.1338, "step": 43100 }, { "epoch": 13.8, "learning_rate": 2.9120976613790413e-05, "loss": 0.125, "step": 43200 }, { "epoch": 13.83, "learning_rate": 2.90858440682778e-05, "loss": 0.1389, "step": 43300 }, { "epoch": 13.86, "learning_rate": 2.9050356648568085e-05, "loss": 0.1386, "step": 43400 }, { "epoch": 13.89, "learning_rate": 2.9014869228858373e-05, "loss": 0.13, "step": 43500 }, { "epoch": 13.93, "learning_rate": 2.897938180914866e-05, "loss": 0.1351, "step": 43600 }, { "epoch": 13.96, "learning_rate": 2.894389438943895e-05, "loss": 0.1341, "step": 43700 }, { "epoch": 13.99, "learning_rate": 2.8908406969729236e-05, "loss": 0.1399, "step": 43800 }, { "epoch": 14.02, "learning_rate": 2.887291955001952e-05, "loss": 0.1186, "step": 43900 }, { "epoch": 14.05, "learning_rate": 2.8837432130309805e-05, "loss": 0.1124, "step": 44000 }, { "epoch": 14.08, "learning_rate": 2.8801944710600093e-05, "loss": 0.1156, "step": 44100 }, { "epoch": 14.12, "learning_rate": 2.876645729089038e-05, "loss": 0.1161, "step": 44200 }, { "epoch": 14.15, "learning_rate": 2.873096987118067e-05, "loss": 0.113, "step": 44300 }, { "epoch": 14.18, "learning_rate": 2.8695482451470957e-05, "loss": 0.1219, "step": 44400 }, { "epoch": 14.21, "learning_rate": 2.8659995031761245e-05, "loss": 0.1228, "step": 44500 }, { "epoch": 14.24, "learning_rate": 2.862450761205153e-05, "loss": 0.1208, "step": 44600 }, { "epoch": 14.28, "learning_rate": 2.8589020192341817e-05, "loss": 0.1213, "step": 44700 }, { "epoch": 14.31, "learning_rate": 2.8553532772632105e-05, "loss": 0.1147, "step": 44800 }, { "epoch": 14.34, "learning_rate": 2.8518045352922393e-05, "loss": 0.1198, "step": 44900 }, { "epoch": 14.37, "learning_rate": 2.848255793321268e-05, "loss": 0.1144, "step": 45000 }, { "epoch": 14.4, "learning_rate": 2.8447070513502962e-05, "loss": 0.1172, "step": 45100 }, { "epoch": 14.44, "learning_rate": 2.841158309379325e-05, "loss": 0.1158, "step": 45200 }, { "epoch": 14.47, "learning_rate": 2.8376095674083538e-05, "loss": 0.1188, "step": 45300 }, { "epoch": 14.5, "learning_rate": 2.8340608254373826e-05, "loss": 0.1249, "step": 45400 }, { "epoch": 14.53, "learning_rate": 2.8305120834664114e-05, "loss": 0.1208, "step": 45500 }, { "epoch": 14.56, "learning_rate": 2.8269633414954402e-05, "loss": 0.125, "step": 45600 }, { "epoch": 14.6, "learning_rate": 2.8234145995244686e-05, "loss": 0.1153, "step": 45700 }, { "epoch": 14.63, "learning_rate": 2.8199013449732073e-05, "loss": 0.1262, "step": 45800 }, { "epoch": 14.66, "learning_rate": 2.816352603002236e-05, "loss": 0.1242, "step": 45900 }, { "epoch": 14.69, "learning_rate": 2.8128038610312646e-05, "loss": 0.1286, "step": 46000 }, { "epoch": 14.72, "learning_rate": 2.8092551190602934e-05, "loss": 0.1238, "step": 46100 }, { "epoch": 14.76, "learning_rate": 2.805706377089322e-05, "loss": 0.1303, "step": 46200 }, { "epoch": 14.79, "learning_rate": 2.802157635118351e-05, "loss": 0.1249, "step": 46300 }, { "epoch": 14.82, "learning_rate": 2.7986088931473797e-05, "loss": 0.1301, "step": 46400 }, { "epoch": 14.85, "learning_rate": 2.7950601511764085e-05, "loss": 0.1276, "step": 46500 }, { "epoch": 14.88, "learning_rate": 2.7915114092054366e-05, "loss": 0.1271, "step": 46600 }, { "epoch": 14.92, "learning_rate": 2.7879626672344654e-05, "loss": 0.1275, "step": 46700 }, { "epoch": 14.95, "learning_rate": 2.7844139252634942e-05, "loss": 0.1239, "step": 46800 }, { "epoch": 14.98, "learning_rate": 2.780865183292523e-05, "loss": 0.1177, "step": 46900 }, { "epoch": 15.01, "learning_rate": 2.7773164413215518e-05, "loss": 0.1172, "step": 47000 }, { "epoch": 15.04, "learning_rate": 2.7737676993505806e-05, "loss": 0.105, "step": 47100 }, { "epoch": 15.08, "learning_rate": 2.770254444799319e-05, "loss": 0.1079, "step": 47200 }, { "epoch": 15.11, "learning_rate": 2.7667057028283478e-05, "loss": 0.1038, "step": 47300 }, { "epoch": 15.14, "learning_rate": 2.7631569608573765e-05, "loss": 0.1056, "step": 47400 }, { "epoch": 15.17, "learning_rate": 2.759608218886405e-05, "loss": 0.1102, "step": 47500 }, { "epoch": 15.2, "learning_rate": 2.7560594769154338e-05, "loss": 0.1105, "step": 47600 }, { "epoch": 15.23, "learning_rate": 2.7525107349444626e-05, "loss": 0.1076, "step": 47700 }, { "epoch": 15.27, "learning_rate": 2.748961992973491e-05, "loss": 0.1072, "step": 47800 }, { "epoch": 15.3, "learning_rate": 2.7454132510025198e-05, "loss": 0.109, "step": 47900 }, { "epoch": 15.33, "learning_rate": 2.7418645090315483e-05, "loss": 0.1042, "step": 48000 }, { "epoch": 15.36, "learning_rate": 2.738315767060577e-05, "loss": 0.1077, "step": 48100 }, { "epoch": 15.39, "learning_rate": 2.734767025089606e-05, "loss": 0.1127, "step": 48200 }, { "epoch": 15.43, "learning_rate": 2.7312182831186346e-05, "loss": 0.1048, "step": 48300 }, { "epoch": 15.46, "learning_rate": 2.7276695411476634e-05, "loss": 0.1131, "step": 48400 }, { "epoch": 15.49, "learning_rate": 2.7241207991766922e-05, "loss": 0.1096, "step": 48500 }, { "epoch": 15.52, "learning_rate": 2.7205720572057207e-05, "loss": 0.1134, "step": 48600 }, { "epoch": 15.55, "learning_rate": 2.7170233152347495e-05, "loss": 0.1117, "step": 48700 }, { "epoch": 15.59, "learning_rate": 2.7134745732637783e-05, "loss": 0.111, "step": 48800 }, { "epoch": 15.62, "learning_rate": 2.709925831292807e-05, "loss": 0.1171, "step": 48900 }, { "epoch": 15.65, "learning_rate": 2.706377089321836e-05, "loss": 0.1266, "step": 49000 }, { "epoch": 15.68, "learning_rate": 2.7028283473508646e-05, "loss": 0.114, "step": 49100 }, { "epoch": 15.71, "learning_rate": 2.6992796053798928e-05, "loss": 0.1158, "step": 49200 }, { "epoch": 15.75, "learning_rate": 2.6957663508286315e-05, "loss": 0.1097, "step": 49300 }, { "epoch": 15.78, "learning_rate": 2.6922176088576602e-05, "loss": 0.118, "step": 49400 }, { "epoch": 15.81, "learning_rate": 2.6886688668866887e-05, "loss": 0.1153, "step": 49500 }, { "epoch": 15.84, "learning_rate": 2.6851201249157175e-05, "loss": 0.111, "step": 49600 }, { "epoch": 15.87, "learning_rate": 2.6815713829447463e-05, "loss": 0.1213, "step": 49700 }, { "epoch": 15.91, "learning_rate": 2.678022640973775e-05, "loss": 0.1105, "step": 49800 }, { "epoch": 15.94, "learning_rate": 2.674473899002804e-05, "loss": 0.1128, "step": 49900 }, { "epoch": 15.97, "learning_rate": 2.6709251570318327e-05, "loss": 0.1198, "step": 50000 }, { "epoch": 16.0, "learning_rate": 2.667376415060861e-05, "loss": 0.1075, "step": 50100 }, { "epoch": 16.03, "learning_rate": 2.66382767308989e-05, "loss": 0.0971, "step": 50200 }, { "epoch": 16.07, "learning_rate": 2.6602789311189187e-05, "loss": 0.0958, "step": 50300 }, { "epoch": 16.1, "learning_rate": 2.6567301891479475e-05, "loss": 0.1015, "step": 50400 }, { "epoch": 16.13, "learning_rate": 2.6531814471769763e-05, "loss": 0.1073, "step": 50500 }, { "epoch": 16.16, "learning_rate": 2.649632705206005e-05, "loss": 0.0994, "step": 50600 }, { "epoch": 16.19, "learning_rate": 2.6460839632350332e-05, "loss": 0.0928, "step": 50700 }, { "epoch": 16.22, "learning_rate": 2.642535221264062e-05, "loss": 0.0924, "step": 50800 }, { "epoch": 16.26, "learning_rate": 2.6389864792930908e-05, "loss": 0.1044, "step": 50900 }, { "epoch": 16.29, "learning_rate": 2.6354377373221195e-05, "loss": 0.0998, "step": 51000 }, { "epoch": 16.32, "learning_rate": 2.6318889953511483e-05, "loss": 0.0946, "step": 51100 }, { "epoch": 16.35, "learning_rate": 2.6283402533801768e-05, "loss": 0.0924, "step": 51200 }, { "epoch": 16.38, "learning_rate": 2.6247915114092056e-05, "loss": 0.1029, "step": 51300 }, { "epoch": 16.42, "learning_rate": 2.6212427694382344e-05, "loss": 0.0986, "step": 51400 }, { "epoch": 16.45, "learning_rate": 2.617694027467263e-05, "loss": 0.095, "step": 51500 }, { "epoch": 16.48, "learning_rate": 2.614145285496292e-05, "loss": 0.0982, "step": 51600 }, { "epoch": 16.51, "learning_rate": 2.6105965435253207e-05, "loss": 0.0982, "step": 51700 }, { "epoch": 16.54, "learning_rate": 2.607047801554349e-05, "loss": 0.1013, "step": 51800 }, { "epoch": 16.58, "learning_rate": 2.6034990595833777e-05, "loss": 0.1001, "step": 51900 }, { "epoch": 16.61, "learning_rate": 2.5999503176124064e-05, "loss": 0.1047, "step": 52000 }, { "epoch": 16.64, "learning_rate": 2.5964015756414352e-05, "loss": 0.0965, "step": 52100 }, { "epoch": 16.67, "learning_rate": 2.592852833670464e-05, "loss": 0.0995, "step": 52200 }, { "epoch": 16.7, "learning_rate": 2.5893040916994928e-05, "loss": 0.1036, "step": 52300 }, { "epoch": 16.74, "learning_rate": 2.5857553497285213e-05, "loss": 0.1052, "step": 52400 }, { "epoch": 16.77, "learning_rate": 2.58220660775755e-05, "loss": 0.1067, "step": 52500 }, { "epoch": 16.8, "learning_rate": 2.578657865786579e-05, "loss": 0.1033, "step": 52600 }, { "epoch": 16.83, "learning_rate": 2.5751091238156076e-05, "loss": 0.0998, "step": 52700 }, { "epoch": 16.86, "learning_rate": 2.5715603818446364e-05, "loss": 0.1022, "step": 52800 }, { "epoch": 16.9, "learning_rate": 2.5680116398736652e-05, "loss": 0.1035, "step": 52900 }, { "epoch": 16.93, "learning_rate": 2.5644628979026937e-05, "loss": 0.1036, "step": 53000 }, { "epoch": 16.96, "learning_rate": 2.5609496433514324e-05, "loss": 0.098, "step": 53100 }, { "epoch": 16.99, "learning_rate": 2.557400901380461e-05, "loss": 0.0996, "step": 53200 }, { "epoch": 17.02, "learning_rate": 2.5538521594094893e-05, "loss": 0.0942, "step": 53300 }, { "epoch": 17.06, "learning_rate": 2.550303417438518e-05, "loss": 0.0881, "step": 53400 }, { "epoch": 17.09, "learning_rate": 2.546754675467547e-05, "loss": 0.0923, "step": 53500 }, { "epoch": 17.12, "learning_rate": 2.5432059334965757e-05, "loss": 0.0929, "step": 53600 }, { "epoch": 17.15, "learning_rate": 2.5396571915256044e-05, "loss": 0.0899, "step": 53700 }, { "epoch": 17.18, "learning_rate": 2.5361084495546332e-05, "loss": 0.0988, "step": 53800 }, { "epoch": 17.21, "learning_rate": 2.5325597075836617e-05, "loss": 0.0871, "step": 53900 }, { "epoch": 17.25, "learning_rate": 2.5290109656126905e-05, "loss": 0.0888, "step": 54000 }, { "epoch": 17.28, "learning_rate": 2.5254622236417193e-05, "loss": 0.0949, "step": 54100 }, { "epoch": 17.31, "learning_rate": 2.521913481670748e-05, "loss": 0.086, "step": 54200 }, { "epoch": 17.34, "learning_rate": 2.518364739699777e-05, "loss": 0.0898, "step": 54300 }, { "epoch": 17.37, "learning_rate": 2.514815997728805e-05, "loss": 0.087, "step": 54400 }, { "epoch": 17.41, "learning_rate": 2.5112672557578338e-05, "loss": 0.0967, "step": 54500 }, { "epoch": 17.44, "learning_rate": 2.5077185137868625e-05, "loss": 0.09, "step": 54600 }, { "epoch": 17.47, "learning_rate": 2.5041697718158913e-05, "loss": 0.0945, "step": 54700 }, { "epoch": 17.5, "learning_rate": 2.50062102984492e-05, "loss": 0.0932, "step": 54800 }, { "epoch": 17.53, "learning_rate": 2.497072287873949e-05, "loss": 0.0971, "step": 54900 }, { "epoch": 17.57, "learning_rate": 2.4935235459029774e-05, "loss": 0.0929, "step": 55000 }, { "epoch": 17.6, "learning_rate": 2.489974803932006e-05, "loss": 0.0891, "step": 55100 }, { "epoch": 17.63, "learning_rate": 2.486426061961035e-05, "loss": 0.0932, "step": 55200 }, { "epoch": 17.66, "learning_rate": 2.4828773199900637e-05, "loss": 0.0941, "step": 55300 }, { "epoch": 17.69, "learning_rate": 2.4793285780190925e-05, "loss": 0.093, "step": 55400 }, { "epoch": 17.73, "learning_rate": 2.4757798360481213e-05, "loss": 0.0972, "step": 55500 }, { "epoch": 17.76, "learning_rate": 2.4722310940771498e-05, "loss": 0.0934, "step": 55600 }, { "epoch": 17.79, "learning_rate": 2.4686823521061786e-05, "loss": 0.0997, "step": 55700 }, { "epoch": 17.82, "learning_rate": 2.4651336101352074e-05, "loss": 0.0958, "step": 55800 }, { "epoch": 17.85, "learning_rate": 2.461584868164236e-05, "loss": 0.1009, "step": 55900 }, { "epoch": 17.89, "learning_rate": 2.458036126193265e-05, "loss": 0.0987, "step": 56000 }, { "epoch": 17.92, "learning_rate": 2.4544873842222937e-05, "loss": 0.0919, "step": 56100 }, { "epoch": 17.95, "learning_rate": 2.450938642251322e-05, "loss": 0.0909, "step": 56200 }, { "epoch": 17.98, "learning_rate": 2.4473899002803506e-05, "loss": 0.1017, "step": 56300 }, { "epoch": 18.01, "learning_rate": 2.4438411583093794e-05, "loss": 0.0951, "step": 56400 }, { "epoch": 18.05, "learning_rate": 2.4402924163384082e-05, "loss": 0.0793, "step": 56500 }, { "epoch": 18.08, "learning_rate": 2.4367791617871466e-05, "loss": 0.0806, "step": 56600 }, { "epoch": 18.11, "learning_rate": 2.4332304198161754e-05, "loss": 0.0809, "step": 56700 }, { "epoch": 18.14, "learning_rate": 2.4296816778452042e-05, "loss": 0.0857, "step": 56800 }, { "epoch": 18.17, "learning_rate": 2.426132935874233e-05, "loss": 0.0825, "step": 56900 }, { "epoch": 18.21, "learning_rate": 2.4225841939032614e-05, "loss": 0.0807, "step": 57000 }, { "epoch": 18.24, "learning_rate": 2.4190354519322902e-05, "loss": 0.0856, "step": 57100 }, { "epoch": 18.27, "learning_rate": 2.415486709961319e-05, "loss": 0.0916, "step": 57200 }, { "epoch": 18.3, "learning_rate": 2.4119379679903478e-05, "loss": 0.0818, "step": 57300 }, { "epoch": 18.33, "learning_rate": 2.4083892260193766e-05, "loss": 0.0884, "step": 57400 }, { "epoch": 18.36, "learning_rate": 2.404840484048405e-05, "loss": 0.0807, "step": 57500 }, { "epoch": 18.4, "learning_rate": 2.4012917420774335e-05, "loss": 0.0875, "step": 57600 }, { "epoch": 18.43, "learning_rate": 2.3977430001064623e-05, "loss": 0.0908, "step": 57700 }, { "epoch": 18.46, "learning_rate": 2.394194258135491e-05, "loss": 0.0851, "step": 57800 }, { "epoch": 18.49, "learning_rate": 2.39064551616452e-05, "loss": 0.093, "step": 57900 }, { "epoch": 18.52, "learning_rate": 2.3870967741935486e-05, "loss": 0.0946, "step": 58000 }, { "epoch": 18.56, "learning_rate": 2.3835480322225774e-05, "loss": 0.0838, "step": 58100 }, { "epoch": 18.59, "learning_rate": 2.379999290251606e-05, "loss": 0.0833, "step": 58200 }, { "epoch": 18.62, "learning_rate": 2.3764505482806347e-05, "loss": 0.0884, "step": 58300 }, { "epoch": 18.65, "learning_rate": 2.3729018063096635e-05, "loss": 0.0891, "step": 58400 }, { "epoch": 18.68, "learning_rate": 2.3693530643386923e-05, "loss": 0.0876, "step": 58500 }, { "epoch": 18.72, "learning_rate": 2.365804322367721e-05, "loss": 0.0842, "step": 58600 }, { "epoch": 18.75, "learning_rate": 2.36225558039675e-05, "loss": 0.0893, "step": 58700 }, { "epoch": 18.78, "learning_rate": 2.358742325845488e-05, "loss": 0.0881, "step": 58800 }, { "epoch": 18.81, "learning_rate": 2.3551935838745167e-05, "loss": 0.0832, "step": 58900 }, { "epoch": 18.84, "learning_rate": 2.3516448419035455e-05, "loss": 0.0851, "step": 59000 }, { "epoch": 18.88, "learning_rate": 2.348096099932574e-05, "loss": 0.0879, "step": 59100 }, { "epoch": 18.91, "learning_rate": 2.3445473579616027e-05, "loss": 0.0836, "step": 59200 }, { "epoch": 18.94, "learning_rate": 2.3409986159906315e-05, "loss": 0.0899, "step": 59300 }, { "epoch": 18.97, "learning_rate": 2.3374498740196603e-05, "loss": 0.0896, "step": 59400 }, { "epoch": 19.0, "learning_rate": 2.333901132048689e-05, "loss": 0.0914, "step": 59500 }, { "epoch": 19.04, "learning_rate": 2.330352390077718e-05, "loss": 0.0786, "step": 59600 }, { "epoch": 19.07, "learning_rate": 2.3268036481067463e-05, "loss": 0.0783, "step": 59700 }, { "epoch": 19.1, "learning_rate": 2.323254906135775e-05, "loss": 0.0822, "step": 59800 }, { "epoch": 19.13, "learning_rate": 2.319706164164804e-05, "loss": 0.0743, "step": 59900 }, { "epoch": 19.16, "learning_rate": 2.3161574221938327e-05, "loss": 0.0748, "step": 60000 }, { "epoch": 19.2, "learning_rate": 2.3126086802228615e-05, "loss": 0.0751, "step": 60100 }, { "epoch": 19.23, "learning_rate": 2.3090599382518896e-05, "loss": 0.0787, "step": 60200 }, { "epoch": 19.26, "learning_rate": 2.3055111962809184e-05, "loss": 0.0744, "step": 60300 }, { "epoch": 19.29, "learning_rate": 2.3019624543099472e-05, "loss": 0.0801, "step": 60400 }, { "epoch": 19.32, "learning_rate": 2.298413712338976e-05, "loss": 0.0779, "step": 60500 }, { "epoch": 19.35, "learning_rate": 2.2948649703680048e-05, "loss": 0.0818, "step": 60600 }, { "epoch": 19.39, "learning_rate": 2.2913162283970335e-05, "loss": 0.0799, "step": 60700 }, { "epoch": 19.42, "learning_rate": 2.287767486426062e-05, "loss": 0.0787, "step": 60800 }, { "epoch": 19.45, "learning_rate": 2.2842187444550908e-05, "loss": 0.0819, "step": 60900 }, { "epoch": 19.48, "learning_rate": 2.2806700024841196e-05, "loss": 0.0657, "step": 61000 }, { "epoch": 19.51, "learning_rate": 2.2771212605131484e-05, "loss": 0.0781, "step": 61100 }, { "epoch": 19.55, "learning_rate": 2.273572518542177e-05, "loss": 0.088, "step": 61200 }, { "epoch": 19.58, "learning_rate": 2.270023776571206e-05, "loss": 0.0735, "step": 61300 }, { "epoch": 19.61, "learning_rate": 2.266475034600234e-05, "loss": 0.0819, "step": 61400 }, { "epoch": 19.64, "learning_rate": 2.2629617800489728e-05, "loss": 0.078, "step": 61500 }, { "epoch": 19.67, "learning_rate": 2.2594130380780016e-05, "loss": 0.0792, "step": 61600 }, { "epoch": 19.71, "learning_rate": 2.25586429610703e-05, "loss": 0.0808, "step": 61700 }, { "epoch": 19.74, "learning_rate": 2.2523155541360588e-05, "loss": 0.0801, "step": 61800 }, { "epoch": 19.77, "learning_rate": 2.2487668121650876e-05, "loss": 0.079, "step": 61900 }, { "epoch": 19.8, "learning_rate": 2.2452180701941164e-05, "loss": 0.0751, "step": 62000 }, { "epoch": 19.83, "learning_rate": 2.2416693282231452e-05, "loss": 0.0848, "step": 62100 }, { "epoch": 19.87, "learning_rate": 2.238120586252174e-05, "loss": 0.0834, "step": 62200 }, { "epoch": 19.9, "learning_rate": 2.2345718442812024e-05, "loss": 0.0818, "step": 62300 }, { "epoch": 19.93, "learning_rate": 2.2310231023102312e-05, "loss": 0.0817, "step": 62400 }, { "epoch": 19.96, "learning_rate": 2.22747436033926e-05, "loss": 0.0855, "step": 62500 }, { "epoch": 19.99, "learning_rate": 2.2239256183682888e-05, "loss": 0.0721, "step": 62600 }, { "epoch": 20.03, "learning_rate": 2.2203768763973176e-05, "loss": 0.067, "step": 62700 }, { "epoch": 20.06, "learning_rate": 2.2168281344263464e-05, "loss": 0.0752, "step": 62800 }, { "epoch": 20.09, "learning_rate": 2.2132793924553745e-05, "loss": 0.0729, "step": 62900 }, { "epoch": 20.12, "learning_rate": 2.2097306504844033e-05, "loss": 0.0716, "step": 63000 }, { "epoch": 20.15, "learning_rate": 2.206181908513432e-05, "loss": 0.0694, "step": 63100 }, { "epoch": 20.19, "learning_rate": 2.202633166542461e-05, "loss": 0.0726, "step": 63200 }, { "epoch": 20.22, "learning_rate": 2.1990844245714897e-05, "loss": 0.0726, "step": 63300 }, { "epoch": 20.25, "learning_rate": 2.195535682600518e-05, "loss": 0.0715, "step": 63400 }, { "epoch": 20.28, "learning_rate": 2.191986940629547e-05, "loss": 0.074, "step": 63500 }, { "epoch": 20.31, "learning_rate": 2.1884381986585757e-05, "loss": 0.0705, "step": 63600 }, { "epoch": 20.34, "learning_rate": 2.1848894566876045e-05, "loss": 0.0766, "step": 63700 }, { "epoch": 20.38, "learning_rate": 2.1813407147166333e-05, "loss": 0.0741, "step": 63800 }, { "epoch": 20.41, "learning_rate": 2.177791972745662e-05, "loss": 0.0678, "step": 63900 }, { "epoch": 20.44, "learning_rate": 2.1742432307746905e-05, "loss": 0.071, "step": 64000 }, { "epoch": 20.47, "learning_rate": 2.170694488803719e-05, "loss": 0.0758, "step": 64100 }, { "epoch": 20.5, "learning_rate": 2.1671457468327478e-05, "loss": 0.0768, "step": 64200 }, { "epoch": 20.54, "learning_rate": 2.1635970048617765e-05, "loss": 0.0768, "step": 64300 }, { "epoch": 20.57, "learning_rate": 2.1600482628908053e-05, "loss": 0.0751, "step": 64400 }, { "epoch": 20.6, "learning_rate": 2.156499520919834e-05, "loss": 0.066, "step": 64500 }, { "epoch": 20.63, "learning_rate": 2.1529507789488626e-05, "loss": 0.0737, "step": 64600 }, { "epoch": 20.66, "learning_rate": 2.1494020369778914e-05, "loss": 0.0797, "step": 64700 }, { "epoch": 20.7, "learning_rate": 2.14585329500692e-05, "loss": 0.0725, "step": 64800 }, { "epoch": 20.73, "learning_rate": 2.142304553035949e-05, "loss": 0.0729, "step": 64900 }, { "epoch": 20.76, "learning_rate": 2.1387912984846873e-05, "loss": 0.0732, "step": 65000 }, { "epoch": 20.79, "learning_rate": 2.135242556513716e-05, "loss": 0.0747, "step": 65100 }, { "epoch": 20.82, "learning_rate": 2.131693814542745e-05, "loss": 0.0737, "step": 65200 }, { "epoch": 20.86, "learning_rate": 2.1281450725717737e-05, "loss": 0.0756, "step": 65300 }, { "epoch": 20.89, "learning_rate": 2.1245963306008025e-05, "loss": 0.0692, "step": 65400 }, { "epoch": 20.92, "learning_rate": 2.1210475886298306e-05, "loss": 0.0778, "step": 65500 }, { "epoch": 20.95, "learning_rate": 2.1174988466588594e-05, "loss": 0.0683, "step": 65600 }, { "epoch": 20.98, "learning_rate": 2.1139501046878882e-05, "loss": 0.0748, "step": 65700 }, { "epoch": 21.02, "learning_rate": 2.110401362716917e-05, "loss": 0.0713, "step": 65800 }, { "epoch": 21.05, "learning_rate": 2.1068526207459458e-05, "loss": 0.0687, "step": 65900 }, { "epoch": 21.08, "learning_rate": 2.1033038787749746e-05, "loss": 0.0701, "step": 66000 }, { "epoch": 21.11, "learning_rate": 2.099755136804003e-05, "loss": 0.074, "step": 66100 }, { "epoch": 21.14, "learning_rate": 2.0962063948330318e-05, "loss": 0.0651, "step": 66200 }, { "epoch": 21.18, "learning_rate": 2.0926576528620606e-05, "loss": 0.0642, "step": 66300 }, { "epoch": 21.21, "learning_rate": 2.0891089108910894e-05, "loss": 0.0643, "step": 66400 }, { "epoch": 21.24, "learning_rate": 2.0855601689201182e-05, "loss": 0.067, "step": 66500 }, { "epoch": 21.27, "learning_rate": 2.0820114269491466e-05, "loss": 0.0631, "step": 66600 }, { "epoch": 21.3, "learning_rate": 2.0784626849781754e-05, "loss": 0.0644, "step": 66700 }, { "epoch": 21.34, "learning_rate": 2.0749139430072042e-05, "loss": 0.0696, "step": 66800 }, { "epoch": 21.37, "learning_rate": 2.071365201036233e-05, "loss": 0.0685, "step": 66900 }, { "epoch": 21.4, "learning_rate": 2.0678164590652618e-05, "loss": 0.0669, "step": 67000 }, { "epoch": 21.43, "learning_rate": 2.0642677170942906e-05, "loss": 0.0638, "step": 67100 }, { "epoch": 21.46, "learning_rate": 2.0607189751233187e-05, "loss": 0.0679, "step": 67200 }, { "epoch": 21.49, "learning_rate": 2.0571702331523475e-05, "loss": 0.0742, "step": 67300 }, { "epoch": 21.53, "learning_rate": 2.0536214911813763e-05, "loss": 0.0642, "step": 67400 }, { "epoch": 21.56, "learning_rate": 2.0501082366301146e-05, "loss": 0.0766, "step": 67500 }, { "epoch": 21.59, "learning_rate": 2.0465594946591434e-05, "loss": 0.0677, "step": 67600 }, { "epoch": 21.62, "learning_rate": 2.0430107526881722e-05, "loss": 0.0607, "step": 67700 }, { "epoch": 21.65, "learning_rate": 2.039462010717201e-05, "loss": 0.0721, "step": 67800 }, { "epoch": 21.69, "learning_rate": 2.0359132687462298e-05, "loss": 0.0717, "step": 67900 }, { "epoch": 21.72, "learning_rate": 2.0323645267752586e-05, "loss": 0.0725, "step": 68000 }, { "epoch": 21.75, "learning_rate": 2.0288157848042867e-05, "loss": 0.065, "step": 68100 }, { "epoch": 21.78, "learning_rate": 2.0252670428333155e-05, "loss": 0.0647, "step": 68200 }, { "epoch": 21.81, "learning_rate": 2.0217183008623443e-05, "loss": 0.0699, "step": 68300 }, { "epoch": 21.85, "learning_rate": 2.018169558891373e-05, "loss": 0.0723, "step": 68400 }, { "epoch": 21.88, "learning_rate": 2.014620816920402e-05, "loss": 0.0737, "step": 68500 }, { "epoch": 21.91, "learning_rate": 2.0110720749494307e-05, "loss": 0.074, "step": 68600 }, { "epoch": 21.94, "learning_rate": 2.007523332978459e-05, "loss": 0.0699, "step": 68700 }, { "epoch": 21.97, "learning_rate": 2.003974591007488e-05, "loss": 0.07, "step": 68800 }, { "epoch": 22.01, "learning_rate": 2.0004258490365167e-05, "loss": 0.0642, "step": 68900 }, { "epoch": 22.04, "learning_rate": 1.9968771070655455e-05, "loss": 0.0631, "step": 69000 }, { "epoch": 22.07, "learning_rate": 1.993328365094574e-05, "loss": 0.0608, "step": 69100 }, { "epoch": 22.1, "learning_rate": 1.9897796231236027e-05, "loss": 0.0616, "step": 69200 }, { "epoch": 22.13, "learning_rate": 1.9862308811526315e-05, "loss": 0.0604, "step": 69300 }, { "epoch": 22.17, "learning_rate": 1.9826821391816603e-05, "loss": 0.0563, "step": 69400 }, { "epoch": 22.2, "learning_rate": 1.979133397210689e-05, "loss": 0.0626, "step": 69500 }, { "epoch": 22.23, "learning_rate": 1.975584655239718e-05, "loss": 0.0664, "step": 69600 }, { "epoch": 22.26, "learning_rate": 1.9720359132687463e-05, "loss": 0.0636, "step": 69700 }, { "epoch": 22.29, "learning_rate": 1.968487171297775e-05, "loss": 0.0534, "step": 69800 }, { "epoch": 22.33, "learning_rate": 1.964938429326804e-05, "loss": 0.0633, "step": 69900 }, { "epoch": 22.36, "learning_rate": 1.9613896873558324e-05, "loss": 0.0605, "step": 70000 }, { "epoch": 22.39, "learning_rate": 1.9578409453848612e-05, "loss": 0.0629, "step": 70100 }, { "epoch": 22.42, "learning_rate": 1.95429220341389e-05, "loss": 0.0599, "step": 70200 }, { "epoch": 22.45, "learning_rate": 1.9507434614429188e-05, "loss": 0.064, "step": 70300 }, { "epoch": 22.48, "learning_rate": 1.9471947194719475e-05, "loss": 0.0621, "step": 70400 }, { "epoch": 22.52, "learning_rate": 1.943645977500976e-05, "loss": 0.0641, "step": 70500 }, { "epoch": 22.55, "learning_rate": 1.9400972355300048e-05, "loss": 0.062, "step": 70600 }, { "epoch": 22.58, "learning_rate": 1.9365484935590336e-05, "loss": 0.0626, "step": 70700 }, { "epoch": 22.61, "learning_rate": 1.9329997515880624e-05, "loss": 0.0631, "step": 70800 }, { "epoch": 22.64, "learning_rate": 1.9294510096170908e-05, "loss": 0.062, "step": 70900 }, { "epoch": 22.68, "learning_rate": 1.9259022676461196e-05, "loss": 0.067, "step": 71000 }, { "epoch": 22.71, "learning_rate": 1.922353525675148e-05, "loss": 0.0587, "step": 71100 }, { "epoch": 22.74, "learning_rate": 1.918804783704177e-05, "loss": 0.0557, "step": 71200 }, { "epoch": 22.77, "learning_rate": 1.9152560417332057e-05, "loss": 0.0655, "step": 71300 }, { "epoch": 22.8, "learning_rate": 1.9117072997622344e-05, "loss": 0.0658, "step": 71400 }, { "epoch": 22.84, "learning_rate": 1.9081585577912632e-05, "loss": 0.0645, "step": 71500 }, { "epoch": 22.87, "learning_rate": 1.904609815820292e-05, "loss": 0.0611, "step": 71600 }, { "epoch": 22.9, "learning_rate": 1.9010610738493205e-05, "loss": 0.0686, "step": 71700 }, { "epoch": 22.93, "learning_rate": 1.8975123318783493e-05, "loss": 0.0667, "step": 71800 }, { "epoch": 22.96, "learning_rate": 1.893963589907378e-05, "loss": 0.0614, "step": 71900 }, { "epoch": 23.0, "learning_rate": 1.8904148479364065e-05, "loss": 0.0606, "step": 72000 }, { "epoch": 23.03, "learning_rate": 1.8868661059654353e-05, "loss": 0.0602, "step": 72100 }, { "epoch": 23.06, "learning_rate": 1.883317363994464e-05, "loss": 0.0528, "step": 72200 }, { "epoch": 23.09, "learning_rate": 1.879768622023493e-05, "loss": 0.0566, "step": 72300 }, { "epoch": 23.12, "learning_rate": 1.8762198800525217e-05, "loss": 0.0569, "step": 72400 }, { "epoch": 23.16, "learning_rate": 1.8726711380815505e-05, "loss": 0.0586, "step": 72500 }, { "epoch": 23.19, "learning_rate": 1.869122396110579e-05, "loss": 0.0608, "step": 72600 }, { "epoch": 23.22, "learning_rate": 1.8655736541396077e-05, "loss": 0.0571, "step": 72700 }, { "epoch": 23.25, "learning_rate": 1.8620249121686365e-05, "loss": 0.0583, "step": 72800 }, { "epoch": 23.28, "learning_rate": 1.858511657617375e-05, "loss": 0.056, "step": 72900 }, { "epoch": 23.32, "learning_rate": 1.8549629156464037e-05, "loss": 0.0541, "step": 73000 }, { "epoch": 23.35, "learning_rate": 1.8514141736754324e-05, "loss": 0.0609, "step": 73100 }, { "epoch": 23.38, "learning_rate": 1.847865431704461e-05, "loss": 0.0511, "step": 73200 }, { "epoch": 23.41, "learning_rate": 1.8443166897334897e-05, "loss": 0.056, "step": 73300 }, { "epoch": 23.44, "learning_rate": 1.8407679477625185e-05, "loss": 0.0527, "step": 73400 }, { "epoch": 23.47, "learning_rate": 1.837219205791547e-05, "loss": 0.0529, "step": 73500 }, { "epoch": 23.51, "learning_rate": 1.8336704638205757e-05, "loss": 0.058, "step": 73600 }, { "epoch": 23.54, "learning_rate": 1.8301217218496045e-05, "loss": 0.0597, "step": 73700 }, { "epoch": 23.57, "learning_rate": 1.826572979878633e-05, "loss": 0.0595, "step": 73800 }, { "epoch": 23.6, "learning_rate": 1.8230242379076618e-05, "loss": 0.0593, "step": 73900 }, { "epoch": 23.63, "learning_rate": 1.8194754959366905e-05, "loss": 0.0545, "step": 74000 }, { "epoch": 23.67, "learning_rate": 1.8159267539657193e-05, "loss": 0.0577, "step": 74100 }, { "epoch": 23.7, "learning_rate": 1.812378011994748e-05, "loss": 0.0593, "step": 74200 }, { "epoch": 23.73, "learning_rate": 1.8088292700237766e-05, "loss": 0.053, "step": 74300 }, { "epoch": 23.76, "learning_rate": 1.8052805280528054e-05, "loss": 0.0585, "step": 74400 }, { "epoch": 23.79, "learning_rate": 1.801731786081834e-05, "loss": 0.0631, "step": 74500 }, { "epoch": 23.83, "learning_rate": 1.7981830441108626e-05, "loss": 0.0596, "step": 74600 }, { "epoch": 23.86, "learning_rate": 1.7946343021398914e-05, "loss": 0.062, "step": 74700 }, { "epoch": 23.89, "learning_rate": 1.7910855601689202e-05, "loss": 0.0596, "step": 74800 }, { "epoch": 23.92, "learning_rate": 1.787536818197949e-05, "loss": 0.0596, "step": 74900 }, { "epoch": 23.95, "learning_rate": 1.7839880762269778e-05, "loss": 0.0617, "step": 75000 }, { "epoch": 23.99, "learning_rate": 1.7804393342560066e-05, "loss": 0.0588, "step": 75100 }, { "epoch": 24.02, "learning_rate": 1.776890592285035e-05, "loss": 0.0535, "step": 75200 }, { "epoch": 24.05, "learning_rate": 1.7733418503140638e-05, "loss": 0.0601, "step": 75300 }, { "epoch": 24.08, "learning_rate": 1.7697931083430926e-05, "loss": 0.0552, "step": 75400 }, { "epoch": 24.11, "learning_rate": 1.766279853791831e-05, "loss": 0.0493, "step": 75500 }, { "epoch": 24.15, "learning_rate": 1.7627311118208598e-05, "loss": 0.0563, "step": 75600 }, { "epoch": 24.18, "learning_rate": 1.759217857269598e-05, "loss": 0.0516, "step": 75700 }, { "epoch": 24.21, "learning_rate": 1.7556691152986266e-05, "loss": 0.0503, "step": 75800 }, { "epoch": 24.24, "learning_rate": 1.7521203733276554e-05, "loss": 0.0528, "step": 75900 }, { "epoch": 24.27, "learning_rate": 1.748571631356684e-05, "loss": 0.0534, "step": 76000 }, { "epoch": 24.31, "learning_rate": 1.745022889385713e-05, "loss": 0.0544, "step": 76100 }, { "epoch": 24.34, "learning_rate": 1.7414741474147417e-05, "loss": 0.0555, "step": 76200 }, { "epoch": 24.37, "learning_rate": 1.7379254054437705e-05, "loss": 0.0497, "step": 76300 }, { "epoch": 24.4, "learning_rate": 1.7344121508925086e-05, "loss": 0.0542, "step": 76400 }, { "epoch": 24.43, "learning_rate": 1.7308634089215373e-05, "loss": 0.0572, "step": 76500 }, { "epoch": 24.47, "learning_rate": 1.727314666950566e-05, "loss": 0.0571, "step": 76600 }, { "epoch": 24.5, "learning_rate": 1.723765924979595e-05, "loss": 0.0488, "step": 76700 }, { "epoch": 24.53, "learning_rate": 1.7202171830086237e-05, "loss": 0.0549, "step": 76800 }, { "epoch": 24.56, "learning_rate": 1.7166684410376525e-05, "loss": 0.0511, "step": 76900 }, { "epoch": 24.59, "learning_rate": 1.713119699066681e-05, "loss": 0.0691, "step": 77000 }, { "epoch": 24.62, "learning_rate": 1.7095709570957098e-05, "loss": 0.055, "step": 77100 }, { "epoch": 24.66, "learning_rate": 1.7060222151247382e-05, "loss": 0.0581, "step": 77200 }, { "epoch": 24.69, "learning_rate": 1.702473473153767e-05, "loss": 0.051, "step": 77300 }, { "epoch": 24.72, "learning_rate": 1.6989247311827958e-05, "loss": 0.0521, "step": 77400 }, { "epoch": 24.75, "learning_rate": 1.6953759892118246e-05, "loss": 0.0543, "step": 77500 }, { "epoch": 24.78, "learning_rate": 1.6918272472408534e-05, "loss": 0.0587, "step": 77600 }, { "epoch": 24.82, "learning_rate": 1.688278505269882e-05, "loss": 0.0562, "step": 77700 }, { "epoch": 24.85, "learning_rate": 1.6847297632989106e-05, "loss": 0.0502, "step": 77800 }, { "epoch": 24.88, "learning_rate": 1.6811810213279394e-05, "loss": 0.0548, "step": 77900 }, { "epoch": 24.91, "learning_rate": 1.6776322793569682e-05, "loss": 0.0554, "step": 78000 }, { "epoch": 24.94, "learning_rate": 1.6740835373859966e-05, "loss": 0.0513, "step": 78100 }, { "epoch": 24.98, "learning_rate": 1.6705347954150254e-05, "loss": 0.0504, "step": 78200 }, { "epoch": 25.01, "learning_rate": 1.6669860534440542e-05, "loss": 0.0528, "step": 78300 }, { "epoch": 25.04, "learning_rate": 1.6634373114730827e-05, "loss": 0.0481, "step": 78400 }, { "epoch": 25.07, "learning_rate": 1.6598885695021115e-05, "loss": 0.0492, "step": 78500 }, { "epoch": 25.1, "learning_rate": 1.6563398275311403e-05, "loss": 0.0513, "step": 78600 }, { "epoch": 25.14, "learning_rate": 1.652791085560169e-05, "loss": 0.051, "step": 78700 }, { "epoch": 25.17, "learning_rate": 1.649242343589198e-05, "loss": 0.0529, "step": 78800 }, { "epoch": 25.2, "learning_rate": 1.6456936016182266e-05, "loss": 0.0536, "step": 78900 }, { "epoch": 25.23, "learning_rate": 1.642144859647255e-05, "loss": 0.0512, "step": 79000 }, { "epoch": 25.26, "learning_rate": 1.638596117676284e-05, "loss": 0.0482, "step": 79100 }, { "epoch": 25.3, "learning_rate": 1.6350473757053127e-05, "loss": 0.0448, "step": 79200 }, { "epoch": 25.33, "learning_rate": 1.631498633734341e-05, "loss": 0.0464, "step": 79300 }, { "epoch": 25.36, "learning_rate": 1.62794989176337e-05, "loss": 0.0546, "step": 79400 }, { "epoch": 25.39, "learning_rate": 1.6244011497923987e-05, "loss": 0.0554, "step": 79500 }, { "epoch": 25.42, "learning_rate": 1.6208524078214275e-05, "loss": 0.0488, "step": 79600 }, { "epoch": 25.46, "learning_rate": 1.6173036658504563e-05, "loss": 0.0527, "step": 79700 }, { "epoch": 25.49, "learning_rate": 1.613754923879485e-05, "loss": 0.0499, "step": 79800 }, { "epoch": 25.52, "learning_rate": 1.6102061819085135e-05, "loss": 0.0517, "step": 79900 }, { "epoch": 25.55, "learning_rate": 1.6066574399375423e-05, "loss": 0.0497, "step": 80000 }, { "epoch": 25.58, "learning_rate": 1.603108697966571e-05, "loss": 0.0454, "step": 80100 }, { "epoch": 25.61, "learning_rate": 1.5995599559955996e-05, "loss": 0.0512, "step": 80200 }, { "epoch": 25.65, "learning_rate": 1.5960112140246284e-05, "loss": 0.049, "step": 80300 }, { "epoch": 25.68, "learning_rate": 1.592462472053657e-05, "loss": 0.0542, "step": 80400 }, { "epoch": 25.71, "learning_rate": 1.588913730082686e-05, "loss": 0.0464, "step": 80500 }, { "epoch": 25.74, "learning_rate": 1.5853649881117147e-05, "loss": 0.0509, "step": 80600 }, { "epoch": 25.77, "learning_rate": 1.5818162461407432e-05, "loss": 0.0484, "step": 80700 }, { "epoch": 25.81, "learning_rate": 1.578267504169772e-05, "loss": 0.0504, "step": 80800 }, { "epoch": 25.84, "learning_rate": 1.5747187621988008e-05, "loss": 0.0483, "step": 80900 }, { "epoch": 25.87, "learning_rate": 1.5711700202278292e-05, "loss": 0.053, "step": 81000 }, { "epoch": 25.9, "learning_rate": 1.567621278256858e-05, "loss": 0.0486, "step": 81100 }, { "epoch": 25.93, "learning_rate": 1.5640725362858868e-05, "loss": 0.0471, "step": 81200 }, { "epoch": 25.97, "learning_rate": 1.5605237943149153e-05, "loss": 0.0487, "step": 81300 }, { "epoch": 26.0, "learning_rate": 1.556975052343944e-05, "loss": 0.0507, "step": 81400 }, { "epoch": 26.03, "learning_rate": 1.553426310372973e-05, "loss": 0.0423, "step": 81500 }, { "epoch": 26.06, "learning_rate": 1.5498775684020016e-05, "loss": 0.0442, "step": 81600 }, { "epoch": 26.09, "learning_rate": 1.5463288264310304e-05, "loss": 0.0499, "step": 81700 }, { "epoch": 26.13, "learning_rate": 1.5427800844600592e-05, "loss": 0.0454, "step": 81800 }, { "epoch": 26.16, "learning_rate": 1.5392313424890877e-05, "loss": 0.0476, "step": 81900 }, { "epoch": 26.19, "learning_rate": 1.5356826005181165e-05, "loss": 0.0516, "step": 82000 }, { "epoch": 26.22, "learning_rate": 1.5321693459668548e-05, "loss": 0.0451, "step": 82100 }, { "epoch": 26.25, "learning_rate": 1.5286206039958836e-05, "loss": 0.046, "step": 82200 }, { "epoch": 26.29, "learning_rate": 1.5250718620249122e-05, "loss": 0.0454, "step": 82300 }, { "epoch": 26.32, "learning_rate": 1.521523120053941e-05, "loss": 0.0466, "step": 82400 }, { "epoch": 26.35, "learning_rate": 1.5179743780829696e-05, "loss": 0.0519, "step": 82500 }, { "epoch": 26.38, "learning_rate": 1.5144256361119984e-05, "loss": 0.0449, "step": 82600 }, { "epoch": 26.41, "learning_rate": 1.5108768941410272e-05, "loss": 0.051, "step": 82700 }, { "epoch": 26.45, "learning_rate": 1.5073281521700558e-05, "loss": 0.0457, "step": 82800 }, { "epoch": 26.48, "learning_rate": 1.5037794101990846e-05, "loss": 0.0496, "step": 82900 }, { "epoch": 26.51, "learning_rate": 1.5002306682281134e-05, "loss": 0.047, "step": 83000 }, { "epoch": 26.54, "learning_rate": 1.4966819262571419e-05, "loss": 0.0454, "step": 83100 }, { "epoch": 26.57, "learning_rate": 1.4931331842861707e-05, "loss": 0.0443, "step": 83200 }, { "epoch": 26.6, "learning_rate": 1.4895844423151993e-05, "loss": 0.0464, "step": 83300 }, { "epoch": 26.64, "learning_rate": 1.486035700344228e-05, "loss": 0.048, "step": 83400 }, { "epoch": 26.67, "learning_rate": 1.4824869583732569e-05, "loss": 0.0426, "step": 83500 }, { "epoch": 26.7, "learning_rate": 1.4789382164022853e-05, "loss": 0.0461, "step": 83600 }, { "epoch": 26.73, "learning_rate": 1.4753894744313141e-05, "loss": 0.0445, "step": 83700 }, { "epoch": 26.76, "learning_rate": 1.4718762198800526e-05, "loss": 0.0486, "step": 83800 }, { "epoch": 26.8, "learning_rate": 1.4683274779090813e-05, "loss": 0.0467, "step": 83900 }, { "epoch": 26.83, "learning_rate": 1.46477873593811e-05, "loss": 0.0451, "step": 84000 }, { "epoch": 26.86, "learning_rate": 1.4612299939671389e-05, "loss": 0.0442, "step": 84100 }, { "epoch": 26.89, "learning_rate": 1.4576812519961673e-05, "loss": 0.0484, "step": 84200 }, { "epoch": 26.92, "learning_rate": 1.4541325100251961e-05, "loss": 0.0462, "step": 84300 }, { "epoch": 26.96, "learning_rate": 1.4505837680542249e-05, "loss": 0.0485, "step": 84400 }, { "epoch": 26.99, "learning_rate": 1.4470350260832535e-05, "loss": 0.0465, "step": 84500 }, { "epoch": 27.02, "learning_rate": 1.4434862841122823e-05, "loss": 0.0449, "step": 84600 }, { "epoch": 27.05, "learning_rate": 1.4399375421413111e-05, "loss": 0.0441, "step": 84700 }, { "epoch": 27.08, "learning_rate": 1.4363888001703397e-05, "loss": 0.0453, "step": 84800 }, { "epoch": 27.12, "learning_rate": 1.4328400581993685e-05, "loss": 0.0409, "step": 84900 }, { "epoch": 27.15, "learning_rate": 1.4292913162283973e-05, "loss": 0.0435, "step": 85000 }, { "epoch": 27.18, "learning_rate": 1.4257425742574257e-05, "loss": 0.0444, "step": 85100 }, { "epoch": 27.21, "learning_rate": 1.4221938322864545e-05, "loss": 0.04, "step": 85200 }, { "epoch": 27.24, "learning_rate": 1.4186450903154833e-05, "loss": 0.0426, "step": 85300 }, { "epoch": 27.28, "learning_rate": 1.415096348344512e-05, "loss": 0.0438, "step": 85400 }, { "epoch": 27.31, "learning_rate": 1.4115476063735407e-05, "loss": 0.044, "step": 85500 }, { "epoch": 27.34, "learning_rate": 1.4079988644025695e-05, "loss": 0.0436, "step": 85600 }, { "epoch": 27.37, "learning_rate": 1.404450122431598e-05, "loss": 0.0437, "step": 85700 }, { "epoch": 27.4, "learning_rate": 1.4009013804606268e-05, "loss": 0.0438, "step": 85800 }, { "epoch": 27.44, "learning_rate": 1.3973526384896556e-05, "loss": 0.0405, "step": 85900 }, { "epoch": 27.47, "learning_rate": 1.3938038965186842e-05, "loss": 0.0471, "step": 86000 }, { "epoch": 27.5, "learning_rate": 1.390255154547713e-05, "loss": 0.044, "step": 86100 }, { "epoch": 27.53, "learning_rate": 1.3867064125767418e-05, "loss": 0.0492, "step": 86200 }, { "epoch": 27.56, "learning_rate": 1.3831576706057704e-05, "loss": 0.0418, "step": 86300 }, { "epoch": 27.6, "learning_rate": 1.3796089286347992e-05, "loss": 0.0426, "step": 86400 }, { "epoch": 27.63, "learning_rate": 1.3760601866638276e-05, "loss": 0.0441, "step": 86500 }, { "epoch": 27.66, "learning_rate": 1.3725114446928564e-05, "loss": 0.0451, "step": 86600 }, { "epoch": 27.69, "learning_rate": 1.3689627027218852e-05, "loss": 0.0445, "step": 86700 }, { "epoch": 27.72, "learning_rate": 1.3654139607509138e-05, "loss": 0.0449, "step": 86800 }, { "epoch": 27.75, "learning_rate": 1.3618652187799426e-05, "loss": 0.0448, "step": 86900 }, { "epoch": 27.79, "learning_rate": 1.3583164768089714e-05, "loss": 0.0398, "step": 87000 }, { "epoch": 27.82, "learning_rate": 1.3547677348379999e-05, "loss": 0.0423, "step": 87100 }, { "epoch": 27.85, "learning_rate": 1.3512189928670287e-05, "loss": 0.0419, "step": 87200 }, { "epoch": 27.88, "learning_rate": 1.3477057383157672e-05, "loss": 0.0461, "step": 87300 }, { "epoch": 27.91, "learning_rate": 1.3441569963447958e-05, "loss": 0.041, "step": 87400 }, { "epoch": 27.95, "learning_rate": 1.3406082543738246e-05, "loss": 0.0429, "step": 87500 }, { "epoch": 27.98, "learning_rate": 1.3370595124028534e-05, "loss": 0.0409, "step": 87600 }, { "epoch": 28.01, "learning_rate": 1.3335107704318819e-05, "loss": 0.0431, "step": 87700 }, { "epoch": 28.04, "learning_rate": 1.3299620284609106e-05, "loss": 0.0431, "step": 87800 }, { "epoch": 28.07, "learning_rate": 1.3264132864899394e-05, "loss": 0.0403, "step": 87900 }, { "epoch": 28.11, "learning_rate": 1.322864544518968e-05, "loss": 0.0384, "step": 88000 }, { "epoch": 28.14, "learning_rate": 1.3193158025479969e-05, "loss": 0.0414, "step": 88100 }, { "epoch": 28.17, "learning_rate": 1.3157670605770256e-05, "loss": 0.0445, "step": 88200 }, { "epoch": 28.2, "learning_rate": 1.3122183186060543e-05, "loss": 0.0389, "step": 88300 }, { "epoch": 28.23, "learning_rate": 1.308669576635083e-05, "loss": 0.0395, "step": 88400 }, { "epoch": 28.27, "learning_rate": 1.3051208346641118e-05, "loss": 0.042, "step": 88500 }, { "epoch": 28.3, "learning_rate": 1.3015720926931403e-05, "loss": 0.0439, "step": 88600 }, { "epoch": 28.33, "learning_rate": 1.2980233507221691e-05, "loss": 0.0405, "step": 88700 }, { "epoch": 28.36, "learning_rate": 1.2944746087511979e-05, "loss": 0.0414, "step": 88800 }, { "epoch": 28.39, "learning_rate": 1.2909258667802265e-05, "loss": 0.0428, "step": 88900 }, { "epoch": 28.43, "learning_rate": 1.2873771248092553e-05, "loss": 0.0355, "step": 89000 }, { "epoch": 28.46, "learning_rate": 1.2838283828382841e-05, "loss": 0.0408, "step": 89100 }, { "epoch": 28.49, "learning_rate": 1.2802796408673125e-05, "loss": 0.0397, "step": 89200 }, { "epoch": 28.52, "learning_rate": 1.2767308988963413e-05, "loss": 0.0382, "step": 89300 }, { "epoch": 28.55, "learning_rate": 1.27318215692537e-05, "loss": 0.0409, "step": 89400 }, { "epoch": 28.59, "learning_rate": 1.2696334149543987e-05, "loss": 0.0457, "step": 89500 }, { "epoch": 28.62, "learning_rate": 1.2660846729834275e-05, "loss": 0.0432, "step": 89600 }, { "epoch": 28.65, "learning_rate": 1.2625359310124562e-05, "loss": 0.0376, "step": 89700 }, { "epoch": 28.68, "learning_rate": 1.258987189041485e-05, "loss": 0.0422, "step": 89800 }, { "epoch": 28.71, "learning_rate": 1.2554384470705137e-05, "loss": 0.0387, "step": 89900 }, { "epoch": 28.74, "learning_rate": 1.2518897050995422e-05, "loss": 0.0419, "step": 90000 }, { "epoch": 28.78, "learning_rate": 1.248340963128571e-05, "loss": 0.0393, "step": 90100 }, { "epoch": 28.81, "learning_rate": 1.2447922211575998e-05, "loss": 0.0402, "step": 90200 }, { "epoch": 28.84, "learning_rate": 1.2412434791866284e-05, "loss": 0.0437, "step": 90300 }, { "epoch": 28.87, "learning_rate": 1.2376947372156572e-05, "loss": 0.0434, "step": 90400 }, { "epoch": 28.9, "learning_rate": 1.234145995244686e-05, "loss": 0.0392, "step": 90500 }, { "epoch": 28.94, "learning_rate": 1.2305972532737144e-05, "loss": 0.0407, "step": 90600 }, { "epoch": 28.97, "learning_rate": 1.2270485113027432e-05, "loss": 0.0402, "step": 90700 }, { "epoch": 29.0, "learning_rate": 1.223499769331772e-05, "loss": 0.0384, "step": 90800 }, { "epoch": 29.03, "learning_rate": 1.2199510273608006e-05, "loss": 0.0319, "step": 90900 }, { "epoch": 29.06, "learning_rate": 1.2164022853898294e-05, "loss": 0.0373, "step": 91000 }, { "epoch": 29.1, "learning_rate": 1.2128535434188582e-05, "loss": 0.036, "step": 91100 }, { "epoch": 29.13, "learning_rate": 1.2093048014478868e-05, "loss": 0.0428, "step": 91200 }, { "epoch": 29.16, "learning_rate": 1.2057560594769155e-05, "loss": 0.0313, "step": 91300 }, { "epoch": 29.19, "learning_rate": 1.2022073175059442e-05, "loss": 0.0367, "step": 91400 }, { "epoch": 29.22, "learning_rate": 1.1986585755349729e-05, "loss": 0.0415, "step": 91500 }, { "epoch": 29.26, "learning_rate": 1.1951453209837114e-05, "loss": 0.0343, "step": 91600 }, { "epoch": 29.29, "learning_rate": 1.1915965790127402e-05, "loss": 0.0387, "step": 91700 }, { "epoch": 29.32, "learning_rate": 1.1880478370417688e-05, "loss": 0.0382, "step": 91800 }, { "epoch": 29.35, "learning_rate": 1.1844990950707976e-05, "loss": 0.04, "step": 91900 }, { "epoch": 29.38, "learning_rate": 1.1809503530998262e-05, "loss": 0.0358, "step": 92000 }, { "epoch": 29.42, "learning_rate": 1.1774016111288548e-05, "loss": 0.0414, "step": 92100 }, { "epoch": 29.45, "learning_rate": 1.1738528691578836e-05, "loss": 0.0409, "step": 92200 }, { "epoch": 29.48, "learning_rate": 1.1703041271869124e-05, "loss": 0.0373, "step": 92300 }, { "epoch": 29.51, "learning_rate": 1.166755385215941e-05, "loss": 0.0392, "step": 92400 }, { "epoch": 29.54, "learning_rate": 1.1632066432449698e-05, "loss": 0.0344, "step": 92500 }, { "epoch": 29.58, "learning_rate": 1.1596579012739983e-05, "loss": 0.0365, "step": 92600 }, { "epoch": 29.61, "learning_rate": 1.1561091593030271e-05, "loss": 0.036, "step": 92700 }, { "epoch": 29.64, "learning_rate": 1.1525604173320559e-05, "loss": 0.0401, "step": 92800 }, { "epoch": 29.67, "learning_rate": 1.1490116753610845e-05, "loss": 0.0367, "step": 92900 }, { "epoch": 29.7, "learning_rate": 1.1454629333901133e-05, "loss": 0.043, "step": 93000 }, { "epoch": 29.73, "learning_rate": 1.141914191419142e-05, "loss": 0.0379, "step": 93100 }, { "epoch": 29.77, "learning_rate": 1.1383654494481707e-05, "loss": 0.0343, "step": 93200 }, { "epoch": 29.8, "learning_rate": 1.1348167074771993e-05, "loss": 0.0378, "step": 93300 }, { "epoch": 29.83, "learning_rate": 1.1312679655062281e-05, "loss": 0.0348, "step": 93400 }, { "epoch": 29.86, "learning_rate": 1.1277192235352567e-05, "loss": 0.0415, "step": 93500 }, { "epoch": 29.89, "learning_rate": 1.1241704815642855e-05, "loss": 0.0357, "step": 93600 }, { "epoch": 29.93, "learning_rate": 1.1206217395933143e-05, "loss": 0.0321, "step": 93700 }, { "epoch": 29.96, "learning_rate": 1.117072997622343e-05, "loss": 0.0328, "step": 93800 }, { "epoch": 29.99, "learning_rate": 1.1135597430710813e-05, "loss": 0.0443, "step": 93900 }, { "epoch": 30.02, "learning_rate": 1.1100110011001101e-05, "loss": 0.031, "step": 94000 }, { "epoch": 30.05, "learning_rate": 1.1064622591291387e-05, "loss": 0.0299, "step": 94100 }, { "epoch": 30.09, "learning_rate": 1.1029135171581675e-05, "loss": 0.0364, "step": 94200 }, { "epoch": 30.12, "learning_rate": 1.0993647751871963e-05, "loss": 0.0328, "step": 94300 }, { "epoch": 30.15, "learning_rate": 1.095816033216225e-05, "loss": 0.0368, "step": 94400 }, { "epoch": 30.18, "learning_rate": 1.0922672912452537e-05, "loss": 0.0289, "step": 94500 }, { "epoch": 30.21, "learning_rate": 1.0887185492742825e-05, "loss": 0.032, "step": 94600 }, { "epoch": 30.25, "learning_rate": 1.085169807303311e-05, "loss": 0.0317, "step": 94700 }, { "epoch": 30.28, "learning_rate": 1.0816210653323397e-05, "loss": 0.0386, "step": 94800 }, { "epoch": 30.31, "learning_rate": 1.0780723233613685e-05, "loss": 0.0376, "step": 94900 }, { "epoch": 30.34, "learning_rate": 1.0745235813903972e-05, "loss": 0.0338, "step": 95000 }, { "epoch": 30.37, "learning_rate": 1.070974839419426e-05, "loss": 0.0378, "step": 95100 }, { "epoch": 30.41, "learning_rate": 1.0674260974484547e-05, "loss": 0.0357, "step": 95200 }, { "epoch": 30.44, "learning_rate": 1.0638773554774832e-05, "loss": 0.0295, "step": 95300 }, { "epoch": 30.47, "learning_rate": 1.060328613506512e-05, "loss": 0.0358, "step": 95400 }, { "epoch": 30.5, "learning_rate": 1.0567798715355408e-05, "loss": 0.0306, "step": 95500 }, { "epoch": 30.53, "learning_rate": 1.0532311295645694e-05, "loss": 0.0347, "step": 95600 }, { "epoch": 30.57, "learning_rate": 1.0496823875935982e-05, "loss": 0.0309, "step": 95700 }, { "epoch": 30.6, "learning_rate": 1.0461336456226268e-05, "loss": 0.0303, "step": 95800 }, { "epoch": 30.63, "learning_rate": 1.0425849036516556e-05, "loss": 0.0382, "step": 95900 }, { "epoch": 30.66, "learning_rate": 1.0390361616806844e-05, "loss": 0.0388, "step": 96000 }, { "epoch": 30.69, "learning_rate": 1.0355229071294226e-05, "loss": 0.0344, "step": 96100 }, { "epoch": 30.73, "learning_rate": 1.0319741651584514e-05, "loss": 0.0323, "step": 96200 }, { "epoch": 30.76, "learning_rate": 1.0284254231874802e-05, "loss": 0.035, "step": 96300 }, { "epoch": 30.79, "learning_rate": 1.0248766812165088e-05, "loss": 0.0334, "step": 96400 }, { "epoch": 30.82, "learning_rate": 1.0213279392455376e-05, "loss": 0.0405, "step": 96500 }, { "epoch": 30.85, "learning_rate": 1.0177791972745664e-05, "loss": 0.0298, "step": 96600 }, { "epoch": 30.88, "learning_rate": 1.0142304553035948e-05, "loss": 0.0311, "step": 96700 }, { "epoch": 30.92, "learning_rate": 1.0106817133326236e-05, "loss": 0.0357, "step": 96800 }, { "epoch": 30.95, "learning_rate": 1.0071329713616524e-05, "loss": 0.0332, "step": 96900 }, { "epoch": 30.98, "learning_rate": 1.003584229390681e-05, "loss": 0.0343, "step": 97000 }, { "epoch": 31.01, "learning_rate": 1.0000354874197098e-05, "loss": 0.0343, "step": 97100 }, { "epoch": 31.04, "learning_rate": 9.964867454487384e-06, "loss": 0.0328, "step": 97200 }, { "epoch": 31.08, "learning_rate": 9.929380034777672e-06, "loss": 0.0326, "step": 97300 }, { "epoch": 31.11, "learning_rate": 9.893892615067959e-06, "loss": 0.0334, "step": 97400 }, { "epoch": 31.14, "learning_rate": 9.858405195358246e-06, "loss": 0.0337, "step": 97500 }, { "epoch": 31.17, "learning_rate": 9.822917775648534e-06, "loss": 0.0302, "step": 97600 }, { "epoch": 31.2, "learning_rate": 9.78743035593882e-06, "loss": 0.0325, "step": 97700 }, { "epoch": 31.24, "learning_rate": 9.751942936229107e-06, "loss": 0.0318, "step": 97800 }, { "epoch": 31.27, "learning_rate": 9.716455516519395e-06, "loss": 0.0367, "step": 97900 }, { "epoch": 31.3, "learning_rate": 9.680968096809683e-06, "loss": 0.0308, "step": 98000 }, { "epoch": 31.33, "learning_rate": 9.645480677099969e-06, "loss": 0.032, "step": 98100 }, { "epoch": 31.36, "learning_rate": 9.609993257390255e-06, "loss": 0.0312, "step": 98200 }, { "epoch": 31.4, "learning_rate": 9.574505837680543e-06, "loss": 0.0297, "step": 98300 }, { "epoch": 31.43, "learning_rate": 9.539018417970831e-06, "loss": 0.0337, "step": 98400 }, { "epoch": 31.46, "learning_rate": 9.503530998261117e-06, "loss": 0.0365, "step": 98500 }, { "epoch": 31.49, "learning_rate": 9.468043578551405e-06, "loss": 0.031, "step": 98600 }, { "epoch": 31.52, "learning_rate": 9.432556158841691e-06, "loss": 0.0365, "step": 98700 }, { "epoch": 31.56, "learning_rate": 9.397068739131977e-06, "loss": 0.0333, "step": 98800 }, { "epoch": 31.59, "learning_rate": 9.361581319422265e-06, "loss": 0.0321, "step": 98900 }, { "epoch": 31.62, "learning_rate": 9.326093899712553e-06, "loss": 0.033, "step": 99000 }, { "epoch": 31.65, "learning_rate": 9.29060648000284e-06, "loss": 0.0329, "step": 99100 }, { "epoch": 31.68, "learning_rate": 9.255119060293127e-06, "loss": 0.0293, "step": 99200 }, { "epoch": 31.72, "learning_rate": 9.219986514780511e-06, "loss": 0.0312, "step": 99300 }, { "epoch": 31.75, "learning_rate": 9.184499095070797e-06, "loss": 0.0293, "step": 99400 }, { "epoch": 31.78, "learning_rate": 9.149011675361085e-06, "loss": 0.0295, "step": 99500 }, { "epoch": 31.81, "learning_rate": 9.113524255651373e-06, "loss": 0.0283, "step": 99600 }, { "epoch": 31.84, "learning_rate": 9.07803683594166e-06, "loss": 0.031, "step": 99700 }, { "epoch": 31.87, "learning_rate": 9.042549416231946e-06, "loss": 0.0354, "step": 99800 }, { "epoch": 31.91, "learning_rate": 9.007061996522233e-06, "loss": 0.0289, "step": 99900 }, { "epoch": 31.94, "learning_rate": 8.971574576812521e-06, "loss": 0.0364, "step": 100000 }, { "epoch": 31.97, "learning_rate": 8.936087157102808e-06, "loss": 0.0359, "step": 100100 }, { "epoch": 32.0, "learning_rate": 8.900599737393095e-06, "loss": 0.0302, "step": 100200 }, { "epoch": 32.03, "learning_rate": 8.865112317683382e-06, "loss": 0.0295, "step": 100300 }, { "epoch": 32.07, "learning_rate": 8.829624897973668e-06, "loss": 0.0305, "step": 100400 }, { "epoch": 32.1, "learning_rate": 8.794137478263956e-06, "loss": 0.0299, "step": 100500 }, { "epoch": 32.13, "learning_rate": 8.758650058554244e-06, "loss": 0.0257, "step": 100600 }, { "epoch": 32.16, "learning_rate": 8.72316263884453e-06, "loss": 0.0307, "step": 100700 }, { "epoch": 32.19, "learning_rate": 8.687675219134818e-06, "loss": 0.0333, "step": 100800 }, { "epoch": 32.23, "learning_rate": 8.652187799425104e-06, "loss": 0.0286, "step": 100900 }, { "epoch": 32.26, "learning_rate": 8.616700379715392e-06, "loss": 0.0268, "step": 101000 }, { "epoch": 32.29, "learning_rate": 8.581212960005678e-06, "loss": 0.0295, "step": 101100 }, { "epoch": 32.32, "learning_rate": 8.545725540295966e-06, "loss": 0.0323, "step": 101200 }, { "epoch": 32.35, "learning_rate": 8.51059299478335e-06, "loss": 0.0286, "step": 101300 }, { "epoch": 32.39, "learning_rate": 8.475105575073636e-06, "loss": 0.0275, "step": 101400 }, { "epoch": 32.42, "learning_rate": 8.439618155363924e-06, "loss": 0.0302, "step": 101500 }, { "epoch": 32.45, "learning_rate": 8.404130735654212e-06, "loss": 0.0318, "step": 101600 }, { "epoch": 32.48, "learning_rate": 8.368643315944498e-06, "loss": 0.0271, "step": 101700 }, { "epoch": 32.51, "learning_rate": 8.333155896234786e-06, "loss": 0.0297, "step": 101800 }, { "epoch": 32.55, "learning_rate": 8.297668476525072e-06, "loss": 0.0289, "step": 101900 }, { "epoch": 32.58, "learning_rate": 8.26218105681536e-06, "loss": 0.0278, "step": 102000 }, { "epoch": 32.61, "learning_rate": 8.226693637105648e-06, "loss": 0.0328, "step": 102100 }, { "epoch": 32.64, "learning_rate": 8.191206217395934e-06, "loss": 0.0271, "step": 102200 }, { "epoch": 32.67, "learning_rate": 8.15571879768622e-06, "loss": 0.0299, "step": 102300 }, { "epoch": 32.71, "learning_rate": 8.120231377976508e-06, "loss": 0.0292, "step": 102400 }, { "epoch": 32.74, "learning_rate": 8.084743958266794e-06, "loss": 0.0278, "step": 102500 }, { "epoch": 32.77, "learning_rate": 8.049256538557082e-06, "loss": 0.0279, "step": 102600 }, { "epoch": 32.8, "learning_rate": 8.013769118847369e-06, "loss": 0.0323, "step": 102700 }, { "epoch": 32.83, "learning_rate": 7.978281699137657e-06, "loss": 0.0294, "step": 102800 }, { "epoch": 32.86, "learning_rate": 7.942794279427943e-06, "loss": 0.0257, "step": 102900 }, { "epoch": 32.9, "learning_rate": 7.90730685971823e-06, "loss": 0.0317, "step": 103000 }, { "epoch": 32.93, "learning_rate": 7.871819440008519e-06, "loss": 0.0351, "step": 103100 }, { "epoch": 32.96, "learning_rate": 7.836332020298805e-06, "loss": 0.0255, "step": 103200 }, { "epoch": 32.99, "learning_rate": 7.800844600589091e-06, "loss": 0.0288, "step": 103300 }, { "epoch": 33.02, "learning_rate": 7.765357180879379e-06, "loss": 0.0281, "step": 103400 }, { "epoch": 33.06, "learning_rate": 7.729869761169667e-06, "loss": 0.0252, "step": 103500 }, { "epoch": 33.09, "learning_rate": 7.694382341459953e-06, "loss": 0.0246, "step": 103600 }, { "epoch": 33.12, "learning_rate": 7.658894921750241e-06, "loss": 0.0281, "step": 103700 }, { "epoch": 33.15, "learning_rate": 7.623407502040527e-06, "loss": 0.0237, "step": 103800 }, { "epoch": 33.18, "learning_rate": 7.587920082330814e-06, "loss": 0.0313, "step": 103900 }, { "epoch": 33.22, "learning_rate": 7.552432662621102e-06, "loss": 0.0261, "step": 104000 }, { "epoch": 33.25, "learning_rate": 7.516945242911388e-06, "loss": 0.0313, "step": 104100 }, { "epoch": 33.28, "learning_rate": 7.4814578232016754e-06, "loss": 0.0276, "step": 104200 }, { "epoch": 33.31, "learning_rate": 7.4459704034919625e-06, "loss": 0.0238, "step": 104300 }, { "epoch": 33.34, "learning_rate": 7.41048298378225e-06, "loss": 0.0254, "step": 104400 }, { "epoch": 33.38, "learning_rate": 7.374995564072537e-06, "loss": 0.0307, "step": 104500 }, { "epoch": 33.41, "learning_rate": 7.339508144362824e-06, "loss": 0.0287, "step": 104600 }, { "epoch": 33.44, "learning_rate": 7.304020724653112e-06, "loss": 0.0249, "step": 104700 }, { "epoch": 33.47, "learning_rate": 7.268533304943398e-06, "loss": 0.0256, "step": 104800 }, { "epoch": 33.5, "learning_rate": 7.233045885233685e-06, "loss": 0.0279, "step": 104900 }, { "epoch": 33.54, "learning_rate": 7.197558465523973e-06, "loss": 0.0269, "step": 105000 }, { "epoch": 33.57, "learning_rate": 7.16207104581426e-06, "loss": 0.0252, "step": 105100 }, { "epoch": 33.6, "learning_rate": 7.126583626104546e-06, "loss": 0.0272, "step": 105200 }, { "epoch": 33.63, "learning_rate": 7.091096206394834e-06, "loss": 0.0281, "step": 105300 }, { "epoch": 33.66, "learning_rate": 7.055608786685121e-06, "loss": 0.0234, "step": 105400 }, { "epoch": 33.7, "learning_rate": 7.020121366975407e-06, "loss": 0.0328, "step": 105500 }, { "epoch": 33.73, "learning_rate": 6.984988821462793e-06, "loss": 0.0286, "step": 105600 }, { "epoch": 33.76, "learning_rate": 6.94950140175308e-06, "loss": 0.0276, "step": 105700 }, { "epoch": 33.79, "learning_rate": 6.914013982043366e-06, "loss": 0.0239, "step": 105800 }, { "epoch": 33.82, "learning_rate": 6.878526562333653e-06, "loss": 0.0294, "step": 105900 }, { "epoch": 33.85, "learning_rate": 6.843039142623941e-06, "loss": 0.0311, "step": 106000 }, { "epoch": 33.89, "learning_rate": 6.807551722914227e-06, "loss": 0.0251, "step": 106100 }, { "epoch": 33.92, "learning_rate": 6.772064303204514e-06, "loss": 0.0297, "step": 106200 }, { "epoch": 33.95, "learning_rate": 6.736576883494802e-06, "loss": 0.0262, "step": 106300 }, { "epoch": 33.98, "learning_rate": 6.701089463785089e-06, "loss": 0.0283, "step": 106400 }, { "epoch": 34.01, "learning_rate": 6.665602044075375e-06, "loss": 0.0221, "step": 106500 }, { "epoch": 34.05, "learning_rate": 6.630114624365663e-06, "loss": 0.0232, "step": 106600 }, { "epoch": 34.08, "learning_rate": 6.59462720465595e-06, "loss": 0.0249, "step": 106700 }, { "epoch": 34.11, "learning_rate": 6.5591397849462365e-06, "loss": 0.0226, "step": 106800 }, { "epoch": 34.14, "learning_rate": 6.5236523652365244e-06, "loss": 0.0258, "step": 106900 }, { "epoch": 34.17, "learning_rate": 6.4881649455268115e-06, "loss": 0.0243, "step": 107000 }, { "epoch": 34.21, "learning_rate": 6.4526775258170986e-06, "loss": 0.022, "step": 107100 }, { "epoch": 34.24, "learning_rate": 6.417190106107385e-06, "loss": 0.0256, "step": 107200 }, { "epoch": 34.27, "learning_rate": 6.381702686397673e-06, "loss": 0.0303, "step": 107300 }, { "epoch": 34.3, "learning_rate": 6.34621526668796e-06, "loss": 0.0274, "step": 107400 }, { "epoch": 34.33, "learning_rate": 6.310727846978246e-06, "loss": 0.0262, "step": 107500 }, { "epoch": 34.37, "learning_rate": 6.275240427268534e-06, "loss": 0.0233, "step": 107600 }, { "epoch": 34.4, "learning_rate": 6.239753007558821e-06, "loss": 0.031, "step": 107700 }, { "epoch": 34.43, "learning_rate": 6.204265587849108e-06, "loss": 0.0287, "step": 107800 }, { "epoch": 34.46, "learning_rate": 6.168778168139395e-06, "loss": 0.0226, "step": 107900 }, { "epoch": 34.49, "learning_rate": 6.133290748429682e-06, "loss": 0.0212, "step": 108000 }, { "epoch": 34.53, "learning_rate": 6.097803328719969e-06, "loss": 0.0193, "step": 108100 }, { "epoch": 34.56, "learning_rate": 6.062315909010257e-06, "loss": 0.0231, "step": 108200 }, { "epoch": 34.59, "learning_rate": 6.026828489300543e-06, "loss": 0.0251, "step": 108300 }, { "epoch": 34.62, "learning_rate": 5.99134106959083e-06, "loss": 0.0266, "step": 108400 }, { "epoch": 34.65, "learning_rate": 5.955853649881118e-06, "loss": 0.0249, "step": 108500 }, { "epoch": 34.69, "learning_rate": 5.9203662301714045e-06, "loss": 0.0259, "step": 108600 }, { "epoch": 34.72, "learning_rate": 5.884878810461692e-06, "loss": 0.0248, "step": 108700 }, { "epoch": 34.75, "learning_rate": 5.849391390751979e-06, "loss": 0.0254, "step": 108800 }, { "epoch": 34.78, "learning_rate": 5.8139039710422666e-06, "loss": 0.0219, "step": 108900 }, { "epoch": 34.81, "learning_rate": 5.778416551332553e-06, "loss": 0.0266, "step": 109000 }, { "epoch": 34.85, "learning_rate": 5.74292913162284e-06, "loss": 0.0274, "step": 109100 }, { "epoch": 34.88, "learning_rate": 5.707441711913128e-06, "loss": 0.0239, "step": 109200 }, { "epoch": 34.91, "learning_rate": 5.671954292203414e-06, "loss": 0.0236, "step": 109300 }, { "epoch": 34.94, "learning_rate": 5.636466872493701e-06, "loss": 0.0257, "step": 109400 }, { "epoch": 34.97, "learning_rate": 5.601334326981086e-06, "loss": 0.0266, "step": 109500 }, { "epoch": 35.0, "learning_rate": 5.5658469072713726e-06, "loss": 0.0236, "step": 109600 }, { "epoch": 35.04, "learning_rate": 5.53035948756166e-06, "loss": 0.0217, "step": 109700 }, { "epoch": 35.07, "learning_rate": 5.4948720678519475e-06, "loss": 0.0261, "step": 109800 }, { "epoch": 35.1, "learning_rate": 5.459384648142234e-06, "loss": 0.0251, "step": 109900 }, { "epoch": 35.13, "learning_rate": 5.423897228432521e-06, "loss": 0.0229, "step": 110000 }, { "epoch": 35.16, "learning_rate": 5.388409808722809e-06, "loss": 0.0187, "step": 110100 }, { "epoch": 35.2, "learning_rate": 5.352922389013096e-06, "loss": 0.0283, "step": 110200 }, { "epoch": 35.23, "learning_rate": 5.317434969303382e-06, "loss": 0.0248, "step": 110300 }, { "epoch": 35.26, "learning_rate": 5.281947549593669e-06, "loss": 0.0239, "step": 110400 }, { "epoch": 35.29, "learning_rate": 5.246460129883957e-06, "loss": 0.0232, "step": 110500 }, { "epoch": 35.32, "learning_rate": 5.210972710174243e-06, "loss": 0.0206, "step": 110600 }, { "epoch": 35.36, "learning_rate": 5.17548529046453e-06, "loss": 0.0231, "step": 110700 }, { "epoch": 35.39, "learning_rate": 5.139997870754818e-06, "loss": 0.0221, "step": 110800 }, { "epoch": 35.42, "learning_rate": 5.104510451045105e-06, "loss": 0.0243, "step": 110900 }, { "epoch": 35.45, "learning_rate": 5.0690230313353915e-06, "loss": 0.0231, "step": 111000 }, { "epoch": 35.48, "learning_rate": 5.033535611625679e-06, "loss": 0.0241, "step": 111100 }, { "epoch": 35.52, "learning_rate": 4.9980481919159664e-06, "loss": 0.0275, "step": 111200 }, { "epoch": 35.55, "learning_rate": 4.9625607722062535e-06, "loss": 0.0248, "step": 111300 }, { "epoch": 35.58, "learning_rate": 4.927073352496541e-06, "loss": 0.0213, "step": 111400 }, { "epoch": 35.61, "learning_rate": 4.891585932786828e-06, "loss": 0.0221, "step": 111500 }, { "epoch": 35.64, "learning_rate": 4.856098513077115e-06, "loss": 0.021, "step": 111600 }, { "epoch": 35.68, "learning_rate": 4.820965967564499e-06, "loss": 0.0229, "step": 111700 }, { "epoch": 35.71, "learning_rate": 4.785478547854786e-06, "loss": 0.0255, "step": 111800 }, { "epoch": 35.74, "learning_rate": 4.7499911281450724e-06, "loss": 0.0223, "step": 111900 }, { "epoch": 35.77, "learning_rate": 4.71450370843536e-06, "loss": 0.0279, "step": 112000 }, { "epoch": 35.8, "learning_rate": 4.679016288725647e-06, "loss": 0.0176, "step": 112100 }, { "epoch": 35.84, "learning_rate": 4.6435288690159345e-06, "loss": 0.0221, "step": 112200 }, { "epoch": 35.87, "learning_rate": 4.6080414493062216e-06, "loss": 0.0243, "step": 112300 }, { "epoch": 35.9, "learning_rate": 4.572554029596508e-06, "loss": 0.0221, "step": 112400 }, { "epoch": 35.93, "learning_rate": 4.537421484083892e-06, "loss": 0.0258, "step": 112500 }, { "epoch": 35.96, "learning_rate": 4.50193406437418e-06, "loss": 0.022, "step": 112600 }, { "epoch": 35.99, "learning_rate": 4.466446644664466e-06, "loss": 0.0194, "step": 112700 }, { "epoch": 36.03, "learning_rate": 4.430959224954754e-06, "loss": 0.0192, "step": 112800 }, { "epoch": 36.06, "learning_rate": 4.395471805245041e-06, "loss": 0.0266, "step": 112900 }, { "epoch": 36.09, "learning_rate": 4.359984385535328e-06, "loss": 0.0206, "step": 113000 }, { "epoch": 36.12, "learning_rate": 4.3244969658256155e-06, "loss": 0.0234, "step": 113100 }, { "epoch": 36.15, "learning_rate": 4.289009546115902e-06, "loss": 0.0219, "step": 113200 }, { "epoch": 36.19, "learning_rate": 4.25352212640619e-06, "loss": 0.0308, "step": 113300 }, { "epoch": 36.22, "learning_rate": 4.218034706696477e-06, "loss": 0.0222, "step": 113400 }, { "epoch": 36.25, "learning_rate": 4.182547286986764e-06, "loss": 0.0222, "step": 113500 }, { "epoch": 36.28, "learning_rate": 4.147059867277051e-06, "loss": 0.0218, "step": 113600 }, { "epoch": 36.31, "learning_rate": 4.111572447567338e-06, "loss": 0.0255, "step": 113700 }, { "epoch": 36.35, "learning_rate": 4.076085027857625e-06, "loss": 0.0202, "step": 113800 }, { "epoch": 36.38, "learning_rate": 4.040597608147912e-06, "loss": 0.0161, "step": 113900 }, { "epoch": 36.41, "learning_rate": 4.005110188438199e-06, "loss": 0.0214, "step": 114000 }, { "epoch": 36.44, "learning_rate": 3.969622768728486e-06, "loss": 0.0183, "step": 114100 }, { "epoch": 36.47, "learning_rate": 3.934135349018773e-06, "loss": 0.021, "step": 114200 }, { "epoch": 36.51, "learning_rate": 3.89864792930906e-06, "loss": 0.0183, "step": 114300 }, { "epoch": 36.54, "learning_rate": 3.863160509599347e-06, "loss": 0.0181, "step": 114400 }, { "epoch": 36.57, "learning_rate": 3.827673089889634e-06, "loss": 0.0221, "step": 114500 }, { "epoch": 36.6, "learning_rate": 3.7925405443770184e-06, "loss": 0.0246, "step": 114600 }, { "epoch": 36.63, "learning_rate": 3.757053124667306e-06, "loss": 0.0245, "step": 114700 }, { "epoch": 36.67, "learning_rate": 3.7215657049575925e-06, "loss": 0.0222, "step": 114800 }, { "epoch": 36.7, "learning_rate": 3.68607828524788e-06, "loss": 0.0188, "step": 114900 }, { "epoch": 36.73, "learning_rate": 3.650590865538167e-06, "loss": 0.021, "step": 115000 } ], "max_steps": 125240, "num_train_epochs": 40, "total_flos": 0.0, "trial_name": null, "trial_params": null }